7 files changed, 1112 insertions, 0 deletions
diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c
new file mode 100644
index 000000000..befba7a82
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_count.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bench.h"
+
+/* COUNT-GLOBAL benchmark */
+
+static struct count_global_ctx {
+	struct counter hits;
+} count_global_ctx;
+
+static void *count_global_producer(void *input)
+{
+	struct count_global_ctx *ctx = &count_global_ctx;
+
+	while (true) {
+		atomic_inc(&ctx->hits.value);
+	}
+	return NULL;
+}
+
+static void *count_global_consumer(void *input)
+{
+	return NULL;
+}
+
+static void count_global_measure(struct bench_res *res)
+{
+	struct count_global_ctx *ctx = &count_global_ctx;
+
+	res->hits = atomic_swap(&ctx->hits.value, 0);
+}
+
+/* COUNT-local benchmark */
+
+static struct count_local_ctx {
+	struct counter *hits;
+} count_local_ctx;
+
+static void count_local_setup()
+{
+	struct count_local_ctx *ctx = &count_local_ctx;
+
+	ctx->hits = calloc(env.consumer_cnt, sizeof(*ctx->hits));
+	if (!ctx->hits)
+		exit(1);
+}
+
+static void *count_local_producer(void *input)
+{
+	struct count_local_ctx *ctx = &count_local_ctx;
+	int idx = (long)input;
+
+	while (true) {
+		atomic_inc(&ctx->hits[idx].value);
+	}
+	return NULL;
+}
+
+static void *count_local_consumer(void *input)
+{
+	return NULL;
+}
+
+static void count_local_measure(struct bench_res *res)
+{
+	struct count_local_ctx *ctx = &count_local_ctx;
+	int i;
+
+	for (i = 0; i < env.producer_cnt; i++) {
+		res->hits += atomic_swap(&ctx->hits[i].value, 0);
+	}
+}
+
+const struct bench bench_count_global = {
+	.name = "count-global",
+	.producer_thread = count_global_producer,
+	.consumer_thread = count_global_consumer,
+	.measure = count_global_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_count_local = {
+	.name = "count-local",
+	.setup = count_local_setup,
+	.producer_thread = count_local_producer,
+	.consumer_thread = count_local_consumer,
+	.measure = count_local_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c
new file mode 100644
index 000000000..a96767409
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <fcntl.h>
+#include "bench.h"
+#include "test_overhead.skel.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+	struct test_overhead *skel;
+	struct counter hits;
+	int fd;
+} ctx;
+
+static void validate()
+{
+	if (env.producer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+		exit(1);
+	}
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+		exit(1);
+	}
+}
+
+static void *producer(void *input)
+{
+	char buf[] = "test_overhead";
+	int err;
+
+	while (true) {
+		err = write(ctx.fd, buf, sizeof(buf));
+		if (err < 0) {
+			fprintf(stderr, "write failed\n");
+			exit(1);
+		}
+		atomic_inc(&ctx.hits.value);
+	}
+}
+
+static void measure(struct bench_res *res)
+{
+	res->hits = atomic_swap(&ctx.hits.value, 0);
+}
+
+static void setup_ctx()
+{
+	setup_libbpf();
+
+	ctx.skel = test_overhead__open_and_load();
+	if (!ctx.skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+
+	ctx.fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+	if (ctx.fd < 0) {
+		fprintf(stderr, "failed to open /proc/self/comm: %d\n", -errno);
+		exit(1);
+	}
+}
+
+static void attach_bpf(struct bpf_program *prog)
+{
+	struct bpf_link *link;
+
+	link = bpf_program__attach(prog);
+	if (IS_ERR(link)) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static void setup_base()
+{
+	setup_ctx();
+}
+
+static void setup_kprobe()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog1);
+}
+
+static void setup_kretprobe()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog2);
+}
+
+static void setup_rawtp()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog3);
+}
+
+static void setup_fentry()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog4);
+}
+
+static void setup_fexit()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog5);
+}
+
+static void *consumer(void *input)
+{
+	return NULL;
+}
+
+const struct bench bench_rename_base = {
+	.name = "rename-base",
+	.validate = validate,
+	.setup = setup_base,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_kprobe = {
+	.name = "rename-kprobe",
+	.validate = validate,
+	.setup = setup_kprobe,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_kretprobe = {
+	.name = "rename-kretprobe",
+	.validate = validate,
+	.setup = setup_kretprobe,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_rawtp = {
+	.name = "rename-rawtp",
+	.validate = validate,
+	.setup = setup_rawtp,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_fentry = {
+	.name = "rename-fentry",
+	.validate = validate,
+	.setup = setup_fentry,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_fexit = {
+	.name = "rename-fexit",
+	.validate = validate,
+	.setup = setup_fexit,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
new file mode 100644
index 000000000..da87c7f31
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <asm/barrier.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <argp.h>
+#include <stdlib.h>
+#include "bench.h"
+#include "ringbuf_bench.skel.h"
+#include "perfbuf_bench.skel.h"
+
+static struct {
+	bool back2back;
+	int batch_cnt;
+	bool sampled;
+	int sample_rate;
+	int ringbuf_sz; /* per-ringbuf, in bytes */
+	bool ringbuf_use_output; /* use slower output API */
+	int perfbuf_sz; /* per-CPU size, in pages */
+} args = {
+	.back2back = false,
+	.batch_cnt = 500,
+	.sampled = false,
+	.sample_rate = 500,
+	.ringbuf_sz = 512 * 1024,
+	.ringbuf_use_output = false,
+	.perfbuf_sz = 128,
+};
+
+enum {
+	ARG_RB_BACK2BACK = 2000,
+	ARG_RB_USE_OUTPUT = 2001,
+	ARG_RB_BATCH_CNT = 2002,
+	ARG_RB_SAMPLED = 2003,
+	ARG_RB_SAMPLE_RATE = 2004,
+};
+
+static const struct argp_option opts[] = {
+	{ "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"},
+	{ "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"},
+	{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
+	{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
+	{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	switch (key) {
+	case ARG_RB_BACK2BACK:
+		args.back2back = true;
+		break;
+	case ARG_RB_USE_OUTPUT:
+		args.ringbuf_use_output = true;
+		break;
+	case ARG_RB_BATCH_CNT:
+		args.batch_cnt = strtol(arg, NULL, 10);
+		if (args.batch_cnt < 0) {
+			fprintf(stderr, "Invalid batch count.");
+			argp_usage(state);
+		}
+		break;
+	case ARG_RB_SAMPLED:
+		args.sampled = true;
+		break;
+	case ARG_RB_SAMPLE_RATE:
+		args.sample_rate = strtol(arg, NULL, 10);
+		if (args.sample_rate < 0) {
+			fprintf(stderr, "Invalid perfbuf sample rate.");
+			argp_usage(state);
+		}
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+	return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_ringbufs_argp = {
+	.options = opts,
+	.parser = parse_arg,
+};
+
+/* RINGBUF-LIBBPF benchmark */
+
+static struct counter buf_hits;
+
+static inline void bufs_trigger_batch()
+{
+	(void)syscall(__NR_getpgid);
+}
+
+static void bufs_validate()
+{
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n");
+		exit(1);
+	}
+
+	if (args.back2back && env.producer_cnt > 1) {
+		fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n");
+		exit(1);
+	}
+}
+
+static void *bufs_sample_producer(void *input)
+{
+	if (args.back2back) {
+		/* initial batch to get everything started */
+		bufs_trigger_batch();
+		return NULL;
+	}
+
+	while (true)
+		bufs_trigger_batch();
+	return NULL;
+}
+
+static struct ringbuf_libbpf_ctx {
+	struct ringbuf_bench *skel;
+	struct ring_buffer *ringbuf;
+} ringbuf_libbpf_ctx;
+
+static void ringbuf_libbpf_measure(struct bench_res *res)
+{
+	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+
+	res->hits = atomic_swap(&buf_hits.value, 0);
+	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static struct ringbuf_bench *ringbuf_setup_skeleton()
+{
+	struct ringbuf_bench *skel;
+
+	setup_libbpf();
+
+	skel = ringbuf_bench__open();
+	if (!skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+
+	skel->rodata->batch_cnt = args.batch_cnt;
+	skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
+
+	if (args.sampled)
+		/* record data + header take 16 bytes */
+		skel->rodata->wakeup_data_size = args.sample_rate * 16;
+
+	bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz);
+
+	if (ringbuf_bench__load(skel)) {
+		fprintf(stderr, "failed to load skeleton\n");
+		exit(1);
+	}
+
+	return skel;
+}
+
+static int buf_process_sample(void *ctx, void *data, size_t len)
+{
+	atomic_inc(&buf_hits.value);
+	return 0;
+}
+
+static void ringbuf_libbpf_setup()
+{
+	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+	struct bpf_link *link;
+
+	ctx->skel = ringbuf_setup_skeleton();
+	ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
+					buf_process_sample, NULL, NULL);
+	if (!ctx->ringbuf) {
+		fprintf(stderr, "failed to create ringbuf\n");
+		exit(1);
+	}
+
+	link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
+	if (IS_ERR(link)) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static void *ringbuf_libbpf_consumer(void *input)
+{
+	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+
+	while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) {
+		if (args.back2back)
+			bufs_trigger_batch();
+	}
+	fprintf(stderr, "ringbuf polling failed!\n");
+	return NULL;
+}
+
+/* RINGBUF-CUSTOM benchmark */
+struct ringbuf_custom {
+	__u64 *consumer_pos;
+	__u64 *producer_pos;
+	__u64 mask;
+	void *data;
+	int map_fd;
+};
+
+static struct ringbuf_custom_ctx {
+	struct ringbuf_bench *skel;
+	struct ringbuf_custom ringbuf;
+	int epoll_fd;
+	struct epoll_event event;
+} ringbuf_custom_ctx;
+
+static void ringbuf_custom_measure(struct bench_res *res)
+{
+	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+
+	res->hits = atomic_swap(&buf_hits.value, 0);
+	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static void ringbuf_custom_setup()
+{
+	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+	const size_t page_size = getpagesize();
+	struct bpf_link *link;
+	struct ringbuf_custom *r;
+	void *tmp;
+	int err;
+
+	ctx->skel = ringbuf_setup_skeleton();
+
+	ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+	if (ctx->epoll_fd < 0) {
+		fprintf(stderr, "failed to create epoll fd: %d\n", -errno);
+		exit(1);
+	}
+
+	r = &ctx->ringbuf;
+	r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
+	r->mask = args.ringbuf_sz - 1;
+
+	/* Map writable consumer page */
+	tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+		   r->map_fd, 0);
+	if (tmp == MAP_FAILED) {
+		fprintf(stderr, "failed to mmap consumer page: %d\n", -errno);
+		exit(1);
+	}
+	r->consumer_pos = tmp;
+
+	/* Map read-only producer page and data pages. */
+	tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED,
+		   r->map_fd, page_size);
+	if (tmp == MAP_FAILED) {
+		fprintf(stderr, "failed to mmap data pages: %d\n", -errno);
+		exit(1);
+	}
+	r->producer_pos = tmp;
+	r->data = tmp + page_size;
+
+	ctx->event.events = EPOLLIN;
+	err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event);
+	if (err < 0) {
+		fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno);
+		exit(1);
+	}
+
+	link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
+	if (IS_ERR(link)) {
+		fprintf(stderr, "failed to attach program\n");
+		exit(1);
+	}
+}
+
+#define RINGBUF_BUSY_BIT (1 << 31)
+#define RINGBUF_DISCARD_BIT (1 << 30)
+#define RINGBUF_META_LEN 8
+
+static inline int roundup_len(__u32 len)
+{
+	/* clear out top 2 bits */
+	len <<= 2;
+	len >>= 2;
+	/* add length prefix */
+	len += RINGBUF_META_LEN;
+	/* round up to 8 byte alignment */
+	return (len + 7) / 8 * 8;
+}
+
+static void ringbuf_custom_process_ring(struct ringbuf_custom *r)
+{
+	unsigned long cons_pos, prod_pos;
+	int *len_ptr, len;
+	bool got_new_data;
+
+	cons_pos = smp_load_acquire(r->consumer_pos);
+	while (true) {
+		got_new_data = false;
+		prod_pos = smp_load_acquire(r->producer_pos);
+		while (cons_pos < prod_pos) {
+			len_ptr = r->data + (cons_pos & r->mask);
+			len = smp_load_acquire(len_ptr);
+
+			/* sample not committed yet, bail out for now */
+			if (len & RINGBUF_BUSY_BIT)
+				return;
+
+			got_new_data = true;
+			cons_pos += roundup_len(len);
+
+			atomic_inc(&buf_hits.value);
+		}
+		if (got_new_data)
+			smp_store_release(r->consumer_pos, cons_pos);
+		else
+			break;
+	};
+}
+
+static void *ringbuf_custom_consumer(void *input)
+{
+	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+	int cnt;
+
+	do {
+		if (args.back2back)
+			bufs_trigger_batch();
+		cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1);
+		if (cnt > 0)
+			ringbuf_custom_process_ring(&ctx->ringbuf);
+	} while (cnt >= 0);
+	fprintf(stderr, "ringbuf polling failed!\n");
+	return 0;
+}
+
+/* PERFBUF-LIBBPF benchmark */
+static struct perfbuf_libbpf_ctx {
+	struct perfbuf_bench *skel;
+	struct perf_buffer *perfbuf;
+} perfbuf_libbpf_ctx;
+
+static void perfbuf_measure(struct bench_res *res)
+{
+	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+
+	res->hits = atomic_swap(&buf_hits.value, 0);
+	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static struct perfbuf_bench *perfbuf_setup_skeleton()
+{
+	struct perfbuf_bench *skel;
+
+	setup_libbpf();
+
+	skel = perfbuf_bench__open();
+	if (!skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+
+	skel->rodata->batch_cnt = args.batch_cnt;
+
+	if (perfbuf_bench__load(skel)) {
+		fprintf(stderr, "failed to load skeleton\n");
+		exit(1);
+	}
+
+	return skel;
+}
+
+static enum bpf_perf_event_ret
+perfbuf_process_sample_raw(void *input_ctx, int cpu,
+			   struct perf_event_header *e)
+{
+	switch (e->type) {
+	case PERF_RECORD_SAMPLE:
+		atomic_inc(&buf_hits.value);
+		break;
+	case PERF_RECORD_LOST:
+		break;
+	default:
+		return LIBBPF_PERF_EVENT_ERROR;
+	}
+	return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void perfbuf_libbpf_setup()
+{
+	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+	struct perf_event_attr attr;
+	struct perf_buffer_raw_opts pb_opts = {
+		.event_cb = perfbuf_process_sample_raw,
+		.ctx = (void *)(long)0,
+		.attr = &attr,
+	};
+	struct bpf_link *link;
+
+	ctx->skel = perfbuf_setup_skeleton();
+
+	memset(&attr, 0, sizeof(attr));
+	attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+	attr.type = PERF_TYPE_SOFTWARE;
+	attr.sample_type = PERF_SAMPLE_RAW;
+	/* notify only every Nth sample */
+	if (args.sampled) {
+		attr.sample_period = args.sample_rate;
+		attr.wakeup_events = args.sample_rate;
+	} else {
+		attr.sample_period = 1;
+		attr.wakeup_events = 1;
+	}
+
+	if (args.sample_rate > args.batch_cnt) {
+		fprintf(stderr, "sample rate %d is too high for given batch count %d\n",
+			args.sample_rate, args.batch_cnt);
+		exit(1);
+	}
+
+	ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
+					    args.perfbuf_sz, &pb_opts);
+	if (!ctx->perfbuf) {
+		fprintf(stderr, "failed to create perfbuf\n");
+		exit(1);
+	}
+
+	link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
+	if (IS_ERR(link)) {
+		fprintf(stderr, "failed to attach program\n");
+		exit(1);
+	}
+}
+
+static void *perfbuf_libbpf_consumer(void *input)
+{
+	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+
+	while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) {
+		if (args.back2back)
+			bufs_trigger_batch();
+	}
+	fprintf(stderr, "perfbuf polling failed!\n");
+	return NULL;
+}
+
+/* PERFBUF-CUSTOM benchmark */
+
+/* copies of internal libbpf definitions */
+struct perf_cpu_buf {
+	struct perf_buffer *pb;
+	void *base; /* mmap()'ed memory */
+	void *buf; /* for reconstructing segmented data */
+	size_t buf_size;
+	int fd;
+	int cpu;
+	int map_key;
+};
+
+struct perf_buffer {
+	perf_buffer_event_fn event_cb;
+	perf_buffer_sample_fn sample_cb;
+	perf_buffer_lost_fn lost_cb;
+	void *ctx; /* passed into callbacks */
+
+	size_t page_size;
+	size_t mmap_size;
+	struct perf_cpu_buf **cpu_bufs;
+	struct epoll_event *events;
+	int cpu_cnt; /* number of allocated CPU buffers */
+	int epoll_fd; /* perf event FD */
+	int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
+};
+
+static void *perfbuf_custom_consumer(void *input)
+{
+	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+	struct perf_buffer *pb = ctx->perfbuf;
+	struct perf_cpu_buf *cpu_buf;
+	struct perf_event_mmap_page *header;
+	size_t mmap_mask = pb->mmap_size - 1;
+	struct perf_event_header *ehdr;
+	__u64 data_head, data_tail;
+	size_t ehdr_size;
+	void *base;
+	int i, cnt;
+
+	while (true) {
+		if (args.back2back)
+			bufs_trigger_batch();
+		cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1);
+		if (cnt <= 0) {
+			fprintf(stderr, "perf epoll failed: %d\n", -errno);
+			exit(1);
+		}
+
+		for (i = 0; i < cnt; ++i) {
+			cpu_buf = pb->events[i].data.ptr;
+			header = cpu_buf->base;
+			base = ((void *)header) + pb->page_size;
+
+			data_head = ring_buffer_read_head(header);
+			data_tail = header->data_tail;
+			while (data_head != data_tail) {
+				ehdr = base + (data_tail & mmap_mask);
+				ehdr_size = ehdr->size;
+
+				if (ehdr->type == PERF_RECORD_SAMPLE)
+					atomic_inc(&buf_hits.value);
+
+				data_tail += ehdr_size;
+			}
+			ring_buffer_write_tail(header, data_tail);
+		}
+	}
+	return NULL;
+}
+
+const struct bench bench_rb_libbpf = {
+	.name = "rb-libbpf",
+	.validate = bufs_validate,
+	.setup = ringbuf_libbpf_setup,
+	.producer_thread = bufs_sample_producer,
+	.consumer_thread = ringbuf_libbpf_consumer,
+	.measure = ringbuf_libbpf_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rb_custom = {
+	.name = "rb-custom",
+	.validate = bufs_validate,
+	.setup = ringbuf_custom_setup,
+	.producer_thread = bufs_sample_producer,
+	.consumer_thread = ringbuf_custom_consumer,
+	.measure = ringbuf_custom_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_pb_libbpf = {
+	.name = "pb-libbpf",
+	.validate = bufs_validate,
+	.setup = perfbuf_libbpf_setup,
+	.producer_thread = bufs_sample_producer,
+	.consumer_thread = perfbuf_libbpf_consumer,
+	.measure = perfbuf_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_pb_custom = {
+	.name = "pb-custom",
+	.validate = bufs_validate,
+	.setup = perfbuf_libbpf_setup,
+	.producer_thread = bufs_sample_producer,
+	.consumer_thread = perfbuf_custom_consumer,
+	.measure = perfbuf_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
new file mode 100644
index 000000000..2a0b6c988
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bench.h"
+#include "trigger_bench.skel.h"
+
+/* BPF triggering benchmarks */
+static struct trigger_ctx {
+	struct trigger_bench *skel;
+} ctx;
+
+static struct counter base_hits;
+
+static void trigger_validate()
+{
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+		exit(1);
+	}
+}
+
+static void *trigger_base_producer(void *input)
+{
+	while (true) {
+		(void)syscall(__NR_getpgid);
+		atomic_inc(&base_hits.value);
+	}
+	return NULL;
+}
+
+static void trigger_base_measure(struct bench_res *res)
+{
+	res->hits = atomic_swap(&base_hits.value, 0);
+}
+
+static void *trigger_producer(void *input)
+{
+	while (true)
+		(void)syscall(__NR_getpgid);
+	return NULL;
+}
+
+static void trigger_measure(struct bench_res *res)
+{
+	res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+static void setup_ctx()
+{
+	setup_libbpf();
+
+	ctx.skel = trigger_bench__open_and_load();
+	if (!ctx.skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+}
+
+static void attach_bpf(struct bpf_program *prog)
+{
+	struct bpf_link *link;
+
+	link = bpf_program__attach(prog);
+	if (IS_ERR(link)) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static void trigger_tp_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_tp);
+}
+
+static void trigger_rawtp_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_raw_tp);
+}
+
+static void trigger_kprobe_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
+}
+
+static void trigger_fentry_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_fentry);
+}
+
+static void trigger_fentry_sleep_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep);
+}
+
+static void trigger_fmodret_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
+}
+
+static void *trigger_consumer(void *input)
+{
+	return NULL;
+}
+
+const struct bench bench_trig_base = {
+	.name = "trig-base",
+	.validate = trigger_validate,
+	.producer_thread = trigger_base_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_base_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_tp = {
+	.name = "trig-tp",
+	.validate = trigger_validate,
+	.setup = trigger_tp_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_rawtp = {
+	.name = "trig-rawtp",
+	.validate = trigger_validate,
+	.setup = trigger_rawtp_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_kprobe = {
+	.name = "trig-kprobe",
+	.validate = trigger_validate,
+	.setup = trigger_kprobe_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_fentry = {
+	.name = "trig-fentry",
+	.validate = trigger_validate,
+	.setup = trigger_fentry_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_fentry_sleep = {
+	.name = "trig-fentry-sleep",
+	.validate = trigger_validate,
+	.setup = trigger_fentry_sleep_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_fmodret = {
+	.name = "trig-fmodret",
+	.validate = trigger_validate,
+	.setup = trigger_fmodret_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
new file mode 100755
index 000000000..7b281dbe4
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+for i in base kprobe kretprobe rawtp fentry fexit
+do
+	summary=$(sudo ./bench -w2 -d5 -a rename-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+	printf "%-10s: %s\n" $i "$summary"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
new file mode 100755
index 000000000..af4aa04ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+RUN_BENCH="sudo ./bench -w3 -d10 -a"
+
+function hits()
+{
+	echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function drops()
+{
+	echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function header()
+{
+	local len=${#1}
+
+	printf "\n%s\n" "$1"
+	for i in $(seq 1 $len); do printf '='; done
+	printf '\n'
+}
+
+function summarize()
+{
+	bench="$1"
+	summary=$(echo $2 | tail -n1)
+	printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)"
+}
+
+header "Single-producer, parallel producer"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+	summarize $b "$($RUN_BENCH $b)"
+done
+
+header "Single-producer, parallel producer, sampled notification"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+	summarize $b "$($RUN_BENCH --rb-sampled $b)"
+done
+
+header "Single-producer, back-to-back mode"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+	summarize $b "$($RUN_BENCH --rb-b2b $b)"
+	summarize $b-sampled "$($RUN_BENCH --rb-sampled --rb-b2b $b)"
+done
+
+header "Ringbuf back-to-back, effect of sample rate"
+for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
+	summarize "rb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)"
+done
+header "Perfbuf back-to-back, effect of sample rate"
+for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
+	summarize "pb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)"
+done
+
+header "Ringbuf back-to-back, reserve+commit vs output"
+summarize "reserve" "$($RUN_BENCH --rb-b2b                 rb-custom)"
+summarize "output"  "$($RUN_BENCH --rb-b2b --rb-use-output rb-custom)"
+
+header "Ringbuf sampled, reserve+commit vs output"
+summarize "reserve-sampled" "$($RUN_BENCH --rb-sampled                 rb-custom)"
+summarize "output-sampled"  "$($RUN_BENCH --rb-sampled --rb-use-output rb-custom)"
+
+header "Single-producer, consumer/producer competing on the same CPU, low batch count"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+	summarize $b "$($RUN_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)"
+done
+
+header "Ringbuf, multi-producer contention"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+	summarize "rb-libbpf nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
+done
+
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
new file mode 100755
index 000000000..78e83f243
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+for i in base tp rawtp kprobe fentry fmodret
+do
+	summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+	printf "%-10s: %s\n" $i "$summary"
+done