diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
commit | ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch) | |
tree | b2d64bc10158fdd5497876388cd68142ca374ed3 /tools/testing/selftests/bpf/benchs | |
parent | Initial commit. (diff) | |
download | linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip |
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools/testing/selftests/bpf/benchs')
24 files changed, 3694 insertions, 0 deletions
diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c new file mode 100644 index 0000000000..e289dd1a14 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c @@ -0,0 +1,477 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <argp.h> +#include <linux/log2.h> +#include <pthread.h> +#include "bench.h" +#include "bloom_filter_bench.skel.h" +#include "bpf_util.h" + +static struct ctx { + bool use_array_map; + bool use_hashmap; + bool hashmap_use_bloom; + bool count_false_hits; + + struct bloom_filter_bench *skel; + + int bloom_fd; + int hashmap_fd; + int array_map_fd; + + pthread_mutex_t map_done_mtx; + pthread_cond_t map_done_cv; + bool map_done; + bool map_prepare_err; + + __u32 next_map_idx; +} ctx = { + .map_done_mtx = PTHREAD_MUTEX_INITIALIZER, + .map_done_cv = PTHREAD_COND_INITIALIZER, +}; + +struct stat { + __u32 stats[3]; +}; + +static struct { + __u32 nr_entries; + __u8 nr_hash_funcs; + __u8 value_size; +} args = { + .nr_entries = 1000, + .nr_hash_funcs = 3, + .value_size = 8, +}; + +enum { + ARG_NR_ENTRIES = 3000, + ARG_NR_HASH_FUNCS = 3001, + ARG_VALUE_SIZE = 3002, +}; + +static const struct argp_option opts[] = { + { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0, + "Set number of expected unique entries in the bloom filter"}, + { "nr_hash_funcs", ARG_NR_HASH_FUNCS, "NR_HASH_FUNCS", 0, + "Set number of hash functions in the bloom filter"}, + { "value_size", ARG_VALUE_SIZE, "VALUE_SIZE", 0, + "Set value size (in bytes) of bloom filter entries"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_NR_ENTRIES: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "Invalid nr_entries count."); + argp_usage(state); + } + args.nr_entries = ret; + break; + case ARG_NR_HASH_FUNCS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > 15) { + fprintf(stderr, + "The bloom filter must use 1 to 15 hash functions."); + argp_usage(state); + } + args.nr_hash_funcs = ret; + break; + case ARG_VALUE_SIZE: + ret = strtol(arg, NULL, 10); + if (ret < 2 || ret > 256) { + fprintf(stderr, + "Invalid value size. Must be between 2 and 256 bytes"); + argp_usage(state); + } + args.value_size = ret; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_bloom_map_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, + "The bloom filter benchmarks do not support consumer\n"); + exit(1); + } +} + +static inline void trigger_bpf_program(void) +{ + syscall(__NR_getpgid); +} + +static void *producer(void *input) +{ + while (true) + trigger_bpf_program(); + + return NULL; +} + +static void *map_prepare_thread(void *arg) +{ + __u32 val_size, i; + void *val = NULL; + int err; + + val_size = args.value_size; + val = malloc(val_size); + if (!val) { + ctx.map_prepare_err = true; + goto done; + } + + while (true) { + i = __atomic_add_fetch(&ctx.next_map_idx, 1, __ATOMIC_RELAXED); + if (i > args.nr_entries) + break; + +again: + /* Populate hashmap, bloom filter map, and array map with the same + * random values + */ + err = syscall(__NR_getrandom, val, val_size, 0); + if (err != val_size) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to get random value: %d\n", -errno); + break; + } + + if (ctx.use_hashmap) { + err = bpf_map_update_elem(ctx.hashmap_fd, val, val, BPF_NOEXIST); + if (err) { + if (err != -EEXIST) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to add elem to hashmap: %d\n", + -errno); + break; + } + goto again; + } + } + + i--; + + if (ctx.use_array_map) { + err = bpf_map_update_elem(ctx.array_map_fd, &i, val, 0); + if (err) { + ctx.map_prepare_err = true; + fprintf(stderr, "failed to add elem to array map: %d\n", -errno); + break; + } + } + + if (ctx.use_hashmap && !ctx.hashmap_use_bloom) + continue; + + err = bpf_map_update_elem(ctx.bloom_fd, NULL, val, 0); + if (err) { + ctx.map_prepare_err = true; + fprintf(stderr, + "failed to add elem to bloom filter map: %d\n", -errno); + break; + } + } +done: + pthread_mutex_lock(&ctx.map_done_mtx); + ctx.map_done = true; + pthread_cond_signal(&ctx.map_done_cv); + pthread_mutex_unlock(&ctx.map_done_mtx); + + if (val) + free(val); + + return NULL; +} + +static void populate_maps(void) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + pthread_t map_thread; + int i, err, nr_rand_bytes; + + ctx.bloom_fd = bpf_map__fd(ctx.skel->maps.bloom_map); + ctx.hashmap_fd = bpf_map__fd(ctx.skel->maps.hashmap); + ctx.array_map_fd = bpf_map__fd(ctx.skel->maps.array_map); + + for (i = 0; i < nr_cpus; i++) { + err = pthread_create(&map_thread, NULL, map_prepare_thread, + NULL); + if (err) { + fprintf(stderr, "failed to create pthread: %d\n", -errno); + exit(1); + } + } + + pthread_mutex_lock(&ctx.map_done_mtx); + while (!ctx.map_done) + pthread_cond_wait(&ctx.map_done_cv, &ctx.map_done_mtx); + pthread_mutex_unlock(&ctx.map_done_mtx); + + if (ctx.map_prepare_err) + exit(1); + + nr_rand_bytes = syscall(__NR_getrandom, ctx.skel->bss->rand_vals, + ctx.skel->rodata->nr_rand_bytes, 0); + if (nr_rand_bytes != ctx.skel->rodata->nr_rand_bytes) { + fprintf(stderr, "failed to get random bytes\n"); + exit(1); + } +} + +static void check_args(void) +{ + if (args.value_size < 8) { + __u64 nr_unique_entries = 1ULL << (args.value_size * 8); + + if (args.nr_entries > nr_unique_entries) { + fprintf(stderr, + "Not enough unique values for the nr_entries requested\n"); + exit(1); + } + } +} + +static struct bloom_filter_bench *setup_skeleton(void) +{ + struct bloom_filter_bench *skel; + + check_args(); + + setup_libbpf(); + + skel = bloom_filter_bench__open(); + if (!skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + skel->rodata->hashmap_use_bloom = ctx.hashmap_use_bloom; + skel->rodata->count_false_hits = ctx.count_false_hits; + + /* Resize number of entries */ + bpf_map__set_max_entries(skel->maps.hashmap, args.nr_entries); + + bpf_map__set_max_entries(skel->maps.array_map, args.nr_entries); + + bpf_map__set_max_entries(skel->maps.bloom_map, args.nr_entries); + + /* Set value size */ + bpf_map__set_value_size(skel->maps.array_map, args.value_size); + + bpf_map__set_value_size(skel->maps.bloom_map, args.value_size); + + bpf_map__set_value_size(skel->maps.hashmap, args.value_size); + + /* For the hashmap, we use the value as the key as well */ + bpf_map__set_key_size(skel->maps.hashmap, args.value_size); + + skel->bss->value_size = args.value_size; + + /* Set number of hash functions */ + bpf_map__set_map_extra(skel->maps.bloom_map, args.nr_hash_funcs); + + if (bloom_filter_bench__load(skel)) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + + return skel; +} + +static void bloom_lookup_setup(void) +{ + struct bpf_link *link; + + ctx.use_array_map = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void bloom_update_setup(void) +{ + struct bpf_link *link; + + ctx.use_array_map = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_update); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void false_positive_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + ctx.hashmap_use_bloom = true; + ctx.count_false_hits = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void hashmap_with_bloom_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + ctx.hashmap_use_bloom = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void hashmap_no_bloom_setup(void) +{ + struct bpf_link *link; + + ctx.use_hashmap = true; + + ctx.skel = setup_skeleton(); + + populate_maps(); + + link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void measure(struct bench_res *res) +{ + unsigned long total_hits = 0, total_drops = 0, total_false_hits = 0; + static unsigned long last_hits, last_drops, last_false_hits; + unsigned int nr_cpus = bpf_num_possible_cpus(); + int hit_key, drop_key, false_hit_key; + int i; + + hit_key = ctx.skel->rodata->hit_key; + drop_key = ctx.skel->rodata->drop_key; + false_hit_key = ctx.skel->rodata->false_hit_key; + + if (ctx.skel->bss->error != 0) { + fprintf(stderr, "error (%d) when searching the bloom filter\n", + ctx.skel->bss->error); + exit(1); + } + + for (i = 0; i < nr_cpus; i++) { + struct stat *s = (void *)&ctx.skel->bss->percpu_stats[i]; + + total_hits += s->stats[hit_key]; + total_drops += s->stats[drop_key]; + total_false_hits += s->stats[false_hit_key]; + } + + res->hits = total_hits - last_hits; + res->drops = total_drops - last_drops; + res->false_hits = total_false_hits - last_false_hits; + + last_hits = total_hits; + last_drops = total_drops; + last_false_hits = total_false_hits; +} + +const struct bench bench_bloom_lookup = { + .name = "bloom-lookup", + .argp = &bench_bloom_map_argp, + .validate = validate, + .setup = bloom_lookup_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_bloom_update = { + .name = "bloom-update", + .argp = &bench_bloom_map_argp, + .validate = validate, + .setup = bloom_update_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_bloom_false_positive = { + .name = "bloom-false-positive", + .argp = &bench_bloom_map_argp, + .validate = validate, + .setup = false_positive_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = false_hits_report_progress, + .report_final = false_hits_report_final, +}; + +const struct bench bench_hashmap_without_bloom = { + .name = "hashmap-without-bloom", + .argp = &bench_bloom_map_argp, + .validate = validate, + .setup = hashmap_no_bloom_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_hashmap_with_bloom = { + .name = "hashmap-with-bloom", + .argp = &bench_bloom_map_argp, + .validate = validate, + .setup = hashmap_with_bloom_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c new file mode 100644 index 0000000000..ee1dc12c5e --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Bytedance */ + +#include "bench.h" +#include "bpf_hashmap_full_update_bench.skel.h" +#include "bpf_util.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct bpf_hashmap_full_update_bench *skel; +} ctx; + +#define MAX_LOOP_NUM 10000 + +static void validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); + exit(1); + } +} + +static void *producer(void *input) +{ + while (true) { + /* trigger the bpf program */ + syscall(__NR_getpgid); + } + + return NULL; +} + +static void measure(struct bench_res *res) +{ +} + +static void setup(void) +{ + struct bpf_link *link; + int map_fd, i, max_entries; + + setup_libbpf(); + + ctx.skel = bpf_hashmap_full_update_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + ctx.skel->bss->nr_loops = MAX_LOOP_NUM; + + link = bpf_program__attach(ctx.skel->progs.benchmark); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } + + /* fill hash_map */ + map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench); + max_entries = bpf_map__max_entries(ctx.skel->maps.hash_map_bench); + for (i = 0; i < max_entries; i++) + bpf_map_update_elem(map_fd, &i, &i, BPF_ANY); +} + +static void hashmap_report_final(struct bench_res res[], int res_cnt) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + int i; + + for (i = 0; i < nr_cpus; i++) { + u64 time = ctx.skel->bss->percpu_time[i]; + + if (!time) + continue; + + printf("%d:hash_map_full_perf %lld events per sec\n", + i, ctx.skel->bss->nr_loops * 1000000000ll / time); + } +} + +const struct bench bench_bpf_hashmap_full_update = { + .name = "bpf-hashmap-full-update", + .validate = validate, + .setup = setup, + .producer_thread = producer, + .measure = measure, + .report_progress = NULL, + .report_final = hashmap_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c new file mode 100644 index 0000000000..279ff1b8b5 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ + +#include <sys/random.h> +#include <argp.h> +#include "bench.h" +#include "bpf_hashmap_lookup.skel.h" +#include "bpf_util.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct bpf_hashmap_lookup *skel; +} ctx; + +/* only available to kernel, so define it here */ +#define BPF_MAX_LOOPS (1<<23) + +#define MAX_KEY_SIZE 1024 /* the size of the key map */ + +static struct { + __u32 key_size; + __u32 map_flags; + __u32 max_entries; + __u32 nr_entries; + __u32 nr_loops; +} args = { + .key_size = 4, + .map_flags = 0, + .max_entries = 1000, + .nr_entries = 500, + .nr_loops = 1000000, +}; + +enum { + ARG_KEY_SIZE = 8001, + ARG_MAP_FLAGS, + ARG_MAX_ENTRIES, + ARG_NR_ENTRIES, + ARG_NR_LOOPS, +}; + +static const struct argp_option opts[] = { + { "key_size", ARG_KEY_SIZE, "KEY_SIZE", 0, + "The hashmap key size (max 1024)"}, + { "map_flags", ARG_MAP_FLAGS, "MAP_FLAGS", 0, + "The hashmap flags passed to BPF_MAP_CREATE"}, + { "max_entries", ARG_MAX_ENTRIES, "MAX_ENTRIES", 0, + "The hashmap max entries"}, + { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0, + "The number of entries to insert/lookup"}, + { "nr_loops", ARG_NR_LOOPS, "NR_LOOPS", 0, + "The number of loops for the benchmark"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_KEY_SIZE: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > MAX_KEY_SIZE) { + fprintf(stderr, "invalid key_size"); + argp_usage(state); + } + args.key_size = ret; + break; + case ARG_MAP_FLAGS: + ret = strtol(arg, NULL, 0); + if (ret < 0 || ret > UINT_MAX) { + fprintf(stderr, "invalid map_flags"); + argp_usage(state); + } + args.map_flags = ret; + break; + case ARG_MAX_ENTRIES: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid max_entries"); + argp_usage(state); + } + args.max_entries = ret; + break; + case ARG_NR_ENTRIES: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid nr_entries"); + argp_usage(state); + } + args.nr_entries = ret; + break; + case ARG_NR_LOOPS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > BPF_MAX_LOOPS) { + fprintf(stderr, "invalid nr_loops: %ld (min=1 max=%u)\n", + ret, BPF_MAX_LOOPS); + argp_usage(state); + } + args.nr_loops = ret; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_hashmap_lookup_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); + exit(1); + } + + if (args.nr_entries > args.max_entries) { + fprintf(stderr, "args.nr_entries is too big! (max %u, got %u)\n", + args.max_entries, args.nr_entries); + exit(1); + } +} + +static void *producer(void *input) +{ + while (true) { + /* trigger the bpf program */ + syscall(__NR_getpgid); + } + return NULL; +} + +static void measure(struct bench_res *res) +{ +} + +static inline void patch_key(u32 i, u32 *key) +{ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + *key = i + 1; +#else + *key = __builtin_bswap32(i + 1); +#endif + /* the rest of key is random */ +} + +static void setup(void) +{ + struct bpf_link *link; + int map_fd; + int ret; + int i; + + setup_libbpf(); + + ctx.skel = bpf_hashmap_lookup__open(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + bpf_map__set_max_entries(ctx.skel->maps.hash_map_bench, args.max_entries); + bpf_map__set_key_size(ctx.skel->maps.hash_map_bench, args.key_size); + bpf_map__set_value_size(ctx.skel->maps.hash_map_bench, 8); + bpf_map__set_map_flags(ctx.skel->maps.hash_map_bench, args.map_flags); + + ctx.skel->bss->nr_entries = args.nr_entries; + ctx.skel->bss->nr_loops = args.nr_loops / args.nr_entries; + + if (args.key_size > 4) { + for (i = 1; i < args.key_size/4; i++) + ctx.skel->bss->key[i] = 2654435761 * i; + } + + ret = bpf_hashmap_lookup__load(ctx.skel); + if (ret) { + bpf_hashmap_lookup__destroy(ctx.skel); + fprintf(stderr, "failed to load map: %s", strerror(-ret)); + exit(1); + } + + /* fill in the hash_map */ + map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench); + for (u64 i = 0; i < args.nr_entries; i++) { + patch_key(i, ctx.skel->bss->key); + bpf_map_update_elem(map_fd, ctx.skel->bss->key, &i, BPF_ANY); + } + + link = bpf_program__attach(ctx.skel->progs.benchmark); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static inline double events_from_time(u64 time) +{ + if (time) + return args.nr_loops * 1000000000llu / time / 1000000.0L; + + return 0; +} + +static int compute_events(u64 *times, double *events_mean, double *events_stddev, u64 *mean_time) +{ + int i, n = 0; + + *events_mean = 0; + *events_stddev = 0; + *mean_time = 0; + + for (i = 0; i < 32; i++) { + if (!times[i]) + break; + *mean_time += times[i]; + *events_mean += events_from_time(times[i]); + n += 1; + } + if (!n) + return 0; + + *mean_time /= n; + *events_mean /= n; + + if (n > 1) { + for (i = 0; i < n; i++) { + double events_i = *events_mean - events_from_time(times[i]); + *events_stddev += events_i * events_i / (n - 1); + } + *events_stddev = sqrt(*events_stddev); + } + + return n; +} + +static void hashmap_report_final(struct bench_res res[], int res_cnt) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + double events_mean, events_stddev; + u64 mean_time; + int i, n; + + for (i = 0; i < nr_cpus; i++) { + n = compute_events(ctx.skel->bss->percpu_times[i], &events_mean, + &events_stddev, &mean_time); + if (n == 0) + continue; + + if (env.quiet) { + /* we expect only one cpu to be present */ + if (env.affinity) + printf("%.3lf\n", events_mean); + else + printf("cpu%02d %.3lf\n", i, events_mean); + } else { + printf("cpu%02d: lookup %.3lfM ± %.3lfM events/sec" + " (approximated from %d samples of ~%lums)\n", + i, events_mean, 2*events_stddev, + n, mean_time / 1000000); + } + } +} + +const struct bench bench_bpf_hashmap_lookup = { + .name = "bpf-hashmap-lookup", + .argp = &bench_hashmap_lookup_argp, + .validate = validate, + .setup = setup, + .producer_thread = producer, + .measure = measure, + .report_progress = NULL, + .report_final = hashmap_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c new file mode 100644 index 0000000000..a705cfb2bc --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ + +#include <argp.h> +#include "bench.h" +#include "bpf_loop_bench.skel.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct bpf_loop_bench *skel; +} ctx; + +static struct { + __u32 nr_loops; +} args = { + .nr_loops = 10, +}; + +enum { + ARG_NR_LOOPS = 4000, +}; + +static const struct argp_option opts[] = { + { "nr_loops", ARG_NR_LOOPS, "nr_loops", 0, + "Set number of loops for the bpf_loop helper"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_NR_LOOPS: + args.nr_loops = strtol(arg, NULL, 10); + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_bpf_loop_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); + exit(1); + } +} + +static void *producer(void *input) +{ + while (true) + /* trigger the bpf program */ + syscall(__NR_getpgid); + + return NULL; +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); +} + +static void setup(void) +{ + struct bpf_link *link; + + setup_libbpf(); + + ctx.skel = bpf_loop_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + link = bpf_program__attach(ctx.skel->progs.benchmark); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } + + ctx.skel->bss->nr_loops = args.nr_loops; +} + +const struct bench bench_bpf_loop = { + .name = "bpf-loop", + .argp = &bench_bpf_loop_argp, + .validate = validate, + .setup = setup, + .producer_thread = producer, + .measure = measure, + .report_progress = ops_report_progress, + .report_final = ops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c new file mode 100644 index 0000000000..ba89ed3936 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_count.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bench.h" + +/* COUNT-GLOBAL benchmark */ + +static struct count_global_ctx { + struct counter hits; +} count_global_ctx; + +static void *count_global_producer(void *input) +{ + struct count_global_ctx *ctx = &count_global_ctx; + + while (true) { + atomic_inc(&ctx->hits.value); + } + return NULL; +} + +static void count_global_measure(struct bench_res *res) +{ + struct count_global_ctx *ctx = &count_global_ctx; + + res->hits = atomic_swap(&ctx->hits.value, 0); +} + +/* COUNT-local benchmark */ + +static struct count_local_ctx { + struct counter *hits; +} count_local_ctx; + +static void count_local_setup(void) +{ + struct count_local_ctx *ctx = &count_local_ctx; + + ctx->hits = calloc(env.producer_cnt, sizeof(*ctx->hits)); + if (!ctx->hits) + exit(1); +} + +static void *count_local_producer(void *input) +{ + struct count_local_ctx *ctx = &count_local_ctx; + int idx = (long)input; + + while (true) { + atomic_inc(&ctx->hits[idx].value); + } + return NULL; +} + +static void count_local_measure(struct bench_res *res) +{ + struct count_local_ctx *ctx = &count_local_ctx; + int i; + + for (i = 0; i < env.producer_cnt; i++) { + res->hits += atomic_swap(&ctx->hits[i].value, 0); + } +} + +const struct bench bench_count_global = { + .name = "count-global", + .producer_thread = count_global_producer, + .measure = count_global_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_count_local = { + .name = "count-local", + .setup = count_local_setup, + .producer_thread = count_local_producer, + .measure = count_local_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c new file mode 100644 index 0000000000..9146d3f414 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <argp.h> +#include <stdbool.h> +#include <pthread.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <fcntl.h> + +#include "bench.h" +#include "bpf_util.h" +#include "cgroup_helpers.h" +#include "htab_mem_bench.skel.h" + +struct htab_mem_use_case { + const char *name; + const char **progs; + /* Do synchronization between addition thread and deletion thread */ + bool need_sync; +}; + +static struct htab_mem_ctx { + const struct htab_mem_use_case *uc; + struct htab_mem_bench *skel; + pthread_barrier_t *notify; + int fd; +} ctx; + +const char *ow_progs[] = {"overwrite", NULL}; +const char *batch_progs[] = {"batch_add_batch_del", NULL}; +const char *add_del_progs[] = {"add_only", "del_only", NULL}; +const static struct htab_mem_use_case use_cases[] = { + { .name = "overwrite", .progs = ow_progs }, + { .name = "batch_add_batch_del", .progs = batch_progs }, + { .name = "add_del_on_diff_cpu", .progs = add_del_progs, .need_sync = true }, +}; + +static struct htab_mem_args { + u32 value_size; + const char *use_case; + bool preallocated; +} args = { + .value_size = 8, + .use_case = "overwrite", + .preallocated = false, +}; + +enum { + ARG_VALUE_SIZE = 10000, + ARG_USE_CASE = 10001, + ARG_PREALLOCATED = 10002, +}; + +static const struct argp_option opts[] = { + { "value-size", ARG_VALUE_SIZE, "VALUE_SIZE", 0, + "Set the value size of hash map (default 8)" }, + { "use-case", ARG_USE_CASE, "USE_CASE", 0, + "Set the use case of hash map: overwrite|batch_add_batch_del|add_del_on_diff_cpu" }, + { "preallocated", ARG_PREALLOCATED, NULL, 0, "use preallocated hash map" }, + {}, +}; + +static error_t htab_mem_parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_VALUE_SIZE: + args.value_size = strtoul(arg, NULL, 10); + if (args.value_size > 4096) { + fprintf(stderr, "too big value size %u\n", args.value_size); + argp_usage(state); + } + break; + case ARG_USE_CASE: + args.use_case = strdup(arg); + if (!args.use_case) { + fprintf(stderr, "no mem for use-case\n"); + argp_usage(state); + } + break; + case ARG_PREALLOCATED: + args.preallocated = true; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_htab_mem_argp = { + .options = opts, + .parser = htab_mem_parse_arg, +}; + +static void htab_mem_validate(void) +{ + if (!strcmp(use_cases[2].name, args.use_case) && env.producer_cnt % 2) { + fprintf(stderr, "%s needs an even number of producers\n", args.use_case); + exit(1); + } +} + +static int htab_mem_bench_init_barriers(void) +{ + pthread_barrier_t *barriers; + unsigned int i, nr; + + if (!ctx.uc->need_sync) + return 0; + + nr = (env.producer_cnt + 1) / 2; + barriers = calloc(nr, sizeof(*barriers)); + if (!barriers) + return -1; + + /* Used for synchronization between two threads */ + for (i = 0; i < nr; i++) + pthread_barrier_init(&barriers[i], NULL, 2); + + ctx.notify = barriers; + return 0; +} + +static void htab_mem_bench_exit_barriers(void) +{ + unsigned int i, nr; + + if (!ctx.notify) + return; + + nr = (env.producer_cnt + 1) / 2; + for (i = 0; i < nr; i++) + pthread_barrier_destroy(&ctx.notify[i]); + free(ctx.notify); +} + +static const struct htab_mem_use_case *htab_mem_find_use_case_or_exit(const char *name) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(use_cases); i++) { + if (!strcmp(name, use_cases[i].name)) + return &use_cases[i]; + } + + fprintf(stderr, "no such use-case: %s\n", name); + fprintf(stderr, "available use case:"); + for (i = 0; i < ARRAY_SIZE(use_cases); i++) + fprintf(stderr, " %s", use_cases[i].name); + fprintf(stderr, "\n"); + exit(1); +} + +static void htab_mem_setup(void) +{ + struct bpf_map *map; + const char **names; + int err; + + setup_libbpf(); + + ctx.uc = htab_mem_find_use_case_or_exit(args.use_case); + err = htab_mem_bench_init_barriers(); + if (err) { + fprintf(stderr, "failed to init barrier\n"); + exit(1); + } + + ctx.fd = cgroup_setup_and_join("/htab_mem"); + if (ctx.fd < 0) + goto cleanup; + + ctx.skel = htab_mem_bench__open(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + goto cleanup; + } + + map = ctx.skel->maps.htab; + bpf_map__set_value_size(map, args.value_size); + /* Ensure that different CPUs can operate on different subset */ + bpf_map__set_max_entries(map, MAX(8192, 64 * env.nr_cpus)); + if (args.preallocated) + bpf_map__set_map_flags(map, bpf_map__map_flags(map) & ~BPF_F_NO_PREALLOC); + + names = ctx.uc->progs; + while (*names) { + struct bpf_program *prog; + + prog = bpf_object__find_program_by_name(ctx.skel->obj, *names); + if (!prog) { + fprintf(stderr, "no such program %s\n", *names); + goto cleanup; + } + bpf_program__set_autoload(prog, true); + names++; + } + ctx.skel->bss->nr_thread = env.producer_cnt; + + err = htab_mem_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "failed to load skeleton\n"); + goto cleanup; + } + err = htab_mem_bench__attach(ctx.skel); + if (err) { + fprintf(stderr, "failed to attach skeleton\n"); + goto cleanup; + } + return; + +cleanup: + htab_mem_bench__destroy(ctx.skel); + htab_mem_bench_exit_barriers(); + if (ctx.fd >= 0) { + close(ctx.fd); + cleanup_cgroup_environment(); + } + exit(1); +} + +static void htab_mem_add_fn(pthread_barrier_t *notify) +{ + while (true) { + /* Do addition */ + (void)syscall(__NR_getpgid, 0); + /* Notify deletion thread to do deletion */ + pthread_barrier_wait(notify); + /* Wait for deletion to complete */ + pthread_barrier_wait(notify); + } +} + +static void htab_mem_delete_fn(pthread_barrier_t *notify) +{ + while (true) { + /* Wait for addition to complete */ + pthread_barrier_wait(notify); + /* Do deletion */ + (void)syscall(__NR_getppid); + /* Notify addition thread to do addition */ + pthread_barrier_wait(notify); + } +} + +static void *htab_mem_producer(void *arg) +{ + pthread_barrier_t *notify; + int seq; + + if (!ctx.uc->need_sync) { + while (true) + (void)syscall(__NR_getpgid, 0); + return NULL; + } + + seq = (long)arg; + notify = &ctx.notify[seq / 2]; + if (seq & 1) + htab_mem_delete_fn(notify); + else + htab_mem_add_fn(notify); + return NULL; +} + +static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value) +{ + char buf[32]; + ssize_t got; + int fd; + + fd = openat(ctx.fd, name, O_RDONLY); + if (fd < 0) { + /* cgroup v1 ? */ + fprintf(stderr, "no %s\n", name); + *value = 0; + return; + } + + got = read(fd, buf, sizeof(buf) - 1); + if (got <= 0) { + *value = 0; + return; + } + buf[got] = 0; + + *value = strtoull(buf, NULL, 0); + + close(fd); +} + +static void htab_mem_measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->op_cnt, 0) / env.producer_cnt; + htab_mem_read_mem_cgrp_file("memory.current", &res->gp_ct); +} + +static void htab_mem_report_progress(int iter, struct bench_res *res, long delta_ns) +{ + double loop, mem; + + loop = res->hits / 1000.0 / (delta_ns / 1000000000.0); + mem = res->gp_ct / 1048576.0; + printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); + printf("per-prod-op %7.2lfk/s, memory usage %7.2lfMiB\n", loop, mem); +} + +static void htab_mem_report_final(struct bench_res res[], int res_cnt) +{ + double mem_mean = 0.0, mem_stddev = 0.0; + double loop_mean = 0.0, loop_stddev = 0.0; + unsigned long peak_mem; + int i; + + for (i = 0; i < res_cnt; i++) { + loop_mean += res[i].hits / 1000.0 / (0.0 + res_cnt); + mem_mean += res[i].gp_ct / 1048576.0 / (0.0 + res_cnt); + } + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) { + loop_stddev += (loop_mean - res[i].hits / 1000.0) * + (loop_mean - res[i].hits / 1000.0) / + (res_cnt - 1.0); + mem_stddev += (mem_mean - res[i].gp_ct / 1048576.0) * + (mem_mean - res[i].gp_ct / 1048576.0) / + (res_cnt - 1.0); + } + loop_stddev = sqrt(loop_stddev); + mem_stddev = sqrt(mem_stddev); + } + + htab_mem_read_mem_cgrp_file("memory.peak", &peak_mem); + printf("Summary: per-prod-op %7.2lf \u00B1 %7.2lfk/s, memory usage %7.2lf \u00B1 %7.2lfMiB," + " peak memory usage %7.2lfMiB\n", + loop_mean, loop_stddev, mem_mean, mem_stddev, peak_mem / 1048576.0); + + cleanup_cgroup_environment(); +} + +const struct bench bench_htab_mem = { + .name = "htab-mem", + .argp = &bench_htab_mem_argp, + .validate = htab_mem_validate, + .setup = htab_mem_setup, + .producer_thread = htab_mem_producer, + .measure = htab_mem_measure, + .report_progress = htab_mem_report_progress, + .report_final = htab_mem_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c new file mode 100644 index 0000000000..452499428c --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c @@ -0,0 +1,282 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <argp.h> +#include <linux/btf.h> + +#include "local_storage_bench.skel.h" +#include "bench.h" + +#include <test_btf.h> + +static struct { + __u32 nr_maps; + __u32 hashmap_nr_keys_used; +} args = { + .nr_maps = 1000, + .hashmap_nr_keys_used = 1000, +}; + +enum { + ARG_NR_MAPS = 6000, + ARG_HASHMAP_NR_KEYS_USED = 6001, +}; + +static const struct argp_option opts[] = { + { "nr_maps", ARG_NR_MAPS, "NR_MAPS", 0, + "Set number of local_storage maps"}, + { "hashmap_nr_keys_used", ARG_HASHMAP_NR_KEYS_USED, "NR_KEYS", + 0, "When doing hashmap test, set number of hashmap keys test uses"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_NR_MAPS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid nr_maps"); + argp_usage(state); + } + args.nr_maps = ret; + break; + case ARG_HASHMAP_NR_KEYS_USED: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid hashmap_nr_keys_used"); + argp_usage(state); + } + args.hashmap_nr_keys_used = ret; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_local_storage_argp = { + .options = opts, + .parser = parse_arg, +}; + +/* Keep in sync w/ array of maps in bpf */ +#define MAX_NR_MAPS 1000 +/* keep in sync w/ same define in bpf */ +#define HASHMAP_SZ 4194304 + +static void validate(void) +{ + if (env.producer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-producer!\n"); + exit(1); + } + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); + exit(1); + } + + if (args.nr_maps > MAX_NR_MAPS) { + fprintf(stderr, "nr_maps must be <= 1000\n"); + exit(1); + } + + if (args.hashmap_nr_keys_used > HASHMAP_SZ) { + fprintf(stderr, "hashmap_nr_keys_used must be <= %u\n", HASHMAP_SZ); + exit(1); + } +} + +static struct { + struct local_storage_bench *skel; + void *bpf_obj; + struct bpf_map *array_of_maps; +} ctx; + +static void prepopulate_hashmap(int fd) +{ + int i, key, val; + + /* local_storage gets will have BPF_LOCAL_STORAGE_GET_F_CREATE flag set, so + * populate the hashmap for a similar comparison + */ + for (i = 0; i < HASHMAP_SZ; i++) { + key = val = i; + if (bpf_map_update_elem(fd, &key, &val, 0)) { + fprintf(stderr, "Error prepopulating hashmap (key %d)\n", key); + exit(1); + } + } +} + +static void __setup(struct bpf_program *prog, bool hashmap) +{ + struct bpf_map *inner_map; + int i, fd, mim_fd, err; + + LIBBPF_OPTS(bpf_map_create_opts, create_opts); + + if (!hashmap) + create_opts.map_flags = BPF_F_NO_PREALLOC; + + ctx.skel->rodata->num_maps = args.nr_maps; + ctx.skel->rodata->hashmap_num_keys = args.hashmap_nr_keys_used; + inner_map = bpf_map__inner_map(ctx.array_of_maps); + create_opts.btf_key_type_id = bpf_map__btf_key_type_id(inner_map); + create_opts.btf_value_type_id = bpf_map__btf_value_type_id(inner_map); + + err = local_storage_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "Error loading skeleton\n"); + goto err_out; + } + + create_opts.btf_fd = bpf_object__btf_fd(ctx.skel->obj); + + mim_fd = bpf_map__fd(ctx.array_of_maps); + if (mim_fd < 0) { + fprintf(stderr, "Error getting map_in_map fd\n"); + goto err_out; + } + + for (i = 0; i < args.nr_maps; i++) { + if (hashmap) + fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), + sizeof(int), HASHMAP_SZ, &create_opts); + else + fd = bpf_map_create(BPF_MAP_TYPE_TASK_STORAGE, NULL, sizeof(int), + sizeof(int), 0, &create_opts); + if (fd < 0) { + fprintf(stderr, "Error creating map %d: %d\n", i, fd); + goto err_out; + } + + if (hashmap) + prepopulate_hashmap(fd); + + err = bpf_map_update_elem(mim_fd, &i, &fd, 0); + if (err) { + fprintf(stderr, "Error updating array-of-maps w/ map %d\n", i); + goto err_out; + } + } + + if (!bpf_program__attach(prog)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + return; +err_out: + exit(1); +} + +static void hashmap_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_hash_maps; + skel->rodata->use_hashmap = 1; + skel->rodata->interleave = 0; + + __setup(skel->progs.get_local, true); +} + +static void local_storage_cache_get_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_local_storage_maps; + skel->rodata->use_hashmap = 0; + skel->rodata->interleave = 0; + + __setup(skel->progs.get_local, false); +} + +static void local_storage_cache_get_interleaved_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_local_storage_maps; + skel->rodata->use_hashmap = 0; + skel->rodata->interleave = 1; + + __setup(skel->progs.get_local, false); +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); + res->important_hits = atomic_swap(&ctx.skel->bss->important_hits, 0); +} + +static inline void trigger_bpf_program(void) +{ + syscall(__NR_getpgid); +} + +static void *producer(void *input) +{ + while (true) + trigger_bpf_program(); + + return NULL; +} + +/* cache sequential and interleaved get benchs test local_storage get + * performance, specifically they demonstrate performance cliff of + * current list-plus-cache local_storage model. + * + * cache sequential get: call bpf_task_storage_get on n maps in order + * cache interleaved get: like "sequential get", but interleave 4 calls to the + * 'important' map (idx 0 in array_of_maps) for every 10 calls. Goal + * is to mimic environment where many progs are accessing their local_storage + * maps, with 'our' prog needing to access its map more often than others + */ +const struct bench bench_local_storage_cache_seq_get = { + .name = "local-storage-cache-seq-get", + .argp = &bench_local_storage_argp, + .validate = validate, + .setup = local_storage_cache_get_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; + +const struct bench bench_local_storage_cache_interleaved_get = { + .name = "local-storage-cache-int-get", + .argp = &bench_local_storage_argp, + .validate = validate, + .setup = local_storage_cache_get_interleaved_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; + +const struct bench bench_local_storage_cache_hashmap_control = { + .name = "local-storage-cache-hashmap-control", + .argp = &bench_local_storage_argp, + .validate = validate, + .setup = hashmap_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c new file mode 100644 index 0000000000..b36de42ee4 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <pthread.h> +#include <argp.h> + +#include "bench.h" +#include "bench_local_storage_create.skel.h" + +struct thread { + int *fds; + pthread_t *pthds; + int *pthd_results; +}; + +static struct bench_local_storage_create *skel; +static struct thread *threads; +static long create_owner_errs; +static int storage_type = BPF_MAP_TYPE_SK_STORAGE; +static int batch_sz = 32; + +enum { + ARG_BATCH_SZ = 9000, + ARG_STORAGE_TYPE = 9001, +}; + +static const struct argp_option opts[] = { + { "batch-size", ARG_BATCH_SZ, "BATCH_SIZE", 0, + "The number of storage creations in each batch" }, + { "storage-type", ARG_STORAGE_TYPE, "STORAGE_TYPE", 0, + "The type of local storage to test (socket or task)" }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + int ret; + + switch (key) { + case ARG_BATCH_SZ: + ret = atoi(arg); + if (ret < 1) { + fprintf(stderr, "invalid batch-size\n"); + argp_usage(state); + } + batch_sz = ret; + break; + case ARG_STORAGE_TYPE: + if (!strcmp(arg, "task")) { + storage_type = BPF_MAP_TYPE_TASK_STORAGE; + } else if (!strcmp(arg, "socket")) { + storage_type = BPF_MAP_TYPE_SK_STORAGE; + } else { + fprintf(stderr, "invalid storage-type (socket or task)\n"); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_local_storage_create_argp = { + .options = opts, + .parser = parse_arg, +}; + +static void validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, + "local-storage-create benchmark does not need consumer\n"); + exit(1); + } +} + +static void setup(void) +{ + int i; + + skel = bench_local_storage_create__open_and_load(); + if (!skel) { + fprintf(stderr, "error loading skel\n"); + exit(1); + } + + skel->bss->bench_pid = getpid(); + if (storage_type == BPF_MAP_TYPE_SK_STORAGE) { + if (!bpf_program__attach(skel->progs.socket_post_create)) { + fprintf(stderr, "Error attaching bpf program\n"); + exit(1); + } + } else { + if (!bpf_program__attach(skel->progs.sched_process_fork)) { + fprintf(stderr, "Error attaching bpf program\n"); + exit(1); + } + } + + if (!bpf_program__attach(skel->progs.kmalloc)) { + fprintf(stderr, "Error attaching bpf program\n"); + exit(1); + } + + threads = calloc(env.producer_cnt, sizeof(*threads)); + + if (!threads) { + fprintf(stderr, "cannot alloc thread_res\n"); + exit(1); + } + + for (i = 0; i < env.producer_cnt; i++) { + struct thread *t = &threads[i]; + + if (storage_type == BPF_MAP_TYPE_SK_STORAGE) { + t->fds = malloc(batch_sz * sizeof(*t->fds)); + if (!t->fds) { + fprintf(stderr, "cannot alloc t->fds\n"); + exit(1); + } + } else { + t->pthds = malloc(batch_sz * sizeof(*t->pthds)); + if (!t->pthds) { + fprintf(stderr, "cannot alloc t->pthds\n"); + exit(1); + } + t->pthd_results = malloc(batch_sz * sizeof(*t->pthd_results)); + if (!t->pthd_results) { + fprintf(stderr, "cannot alloc t->pthd_results\n"); + exit(1); + } + } + } +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&skel->bss->create_cnts, 0); + res->drops = atomic_swap(&skel->bss->kmalloc_cnts, 0); +} + +static void *sk_producer(void *input) +{ + struct thread *t = &threads[(long)(input)]; + int *fds = t->fds; + int i; + + while (true) { + for (i = 0; i < batch_sz; i++) { + fds[i] = socket(AF_INET6, SOCK_DGRAM, 0); + if (fds[i] == -1) + atomic_inc(&create_owner_errs); + } + + for (i = 0; i < batch_sz; i++) { + if (fds[i] != -1) + close(fds[i]); + } + } + + return NULL; +} + +static void *thread_func(void *arg) +{ + return NULL; +} + +static void *task_producer(void *input) +{ + struct thread *t = &threads[(long)(input)]; + pthread_t *pthds = t->pthds; + int *pthd_results = t->pthd_results; + int i; + + while (true) { + for (i = 0; i < batch_sz; i++) { + pthd_results[i] = pthread_create(&pthds[i], NULL, thread_func, NULL); + if (pthd_results[i]) + atomic_inc(&create_owner_errs); + } + + for (i = 0; i < batch_sz; i++) { + if (!pthd_results[i]) + pthread_join(pthds[i], NULL);; + } + } + + return NULL; +} + +static void *producer(void *input) +{ + if (storage_type == BPF_MAP_TYPE_SK_STORAGE) + return sk_producer(input); + else + return task_producer(input); +} + +static void report_progress(int iter, struct bench_res *res, long delta_ns) +{ + double creates_per_sec, kmallocs_per_create; + + creates_per_sec = res->hits / 1000.0 / (delta_ns / 1000000000.0); + kmallocs_per_create = (double)res->drops / res->hits; + + printf("Iter %3d (%7.3lfus): ", + iter, (delta_ns - 1000000000) / 1000.0); + printf("creates %8.3lfk/s (%7.3lfk/prod), ", + creates_per_sec, creates_per_sec / env.producer_cnt); + printf("%3.2lf kmallocs/create\n", kmallocs_per_create); +} + +static void report_final(struct bench_res res[], int res_cnt) +{ + double creates_mean = 0.0, creates_stddev = 0.0; + long total_creates = 0, total_kmallocs = 0; + int i; + + for (i = 0; i < res_cnt; i++) { + creates_mean += res[i].hits / 1000.0 / (0.0 + res_cnt); + total_creates += res[i].hits; + total_kmallocs += res[i].drops; + } + + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) + creates_stddev += (creates_mean - res[i].hits / 1000.0) * + (creates_mean - res[i].hits / 1000.0) / + (res_cnt - 1.0); + creates_stddev = sqrt(creates_stddev); + } + printf("Summary: creates %8.3lf \u00B1 %5.3lfk/s (%7.3lfk/prod), ", + creates_mean, creates_stddev, creates_mean / env.producer_cnt); + printf("%4.2lf kmallocs/create\n", (double)total_kmallocs / total_creates); + if (create_owner_errs || skel->bss->create_errs) + printf("%s() errors %ld create_errs %ld\n", + storage_type == BPF_MAP_TYPE_SK_STORAGE ? + "socket" : "pthread_create", + create_owner_errs, + skel->bss->create_errs); +} + +/* Benchmark performance of creating bpf local storage */ +const struct bench bench_local_storage_create = { + .name = "local-storage-create", + .argp = &bench_local_storage_create_argp, + .validate = validate, + .setup = setup, + .producer_thread = producer, + .measure = measure, + .report_progress = report_progress, + .report_final = report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c new file mode 100644 index 0000000000..edf0b00418 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <argp.h> + +#include <sys/prctl.h> +#include "local_storage_rcu_tasks_trace_bench.skel.h" +#include "bench.h" + +#include <signal.h> + +static struct { + __u32 nr_procs; + __u32 kthread_pid; +} args = { + .nr_procs = 1000, + .kthread_pid = 0, +}; + +enum { + ARG_NR_PROCS = 7000, + ARG_KTHREAD_PID = 7001, +}; + +static const struct argp_option opts[] = { + { "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0, + "Set number of user processes to spin up"}, + { "kthread_pid", ARG_KTHREAD_PID, "PID", 0, + "Pid of rcu_tasks_trace kthread for ticks tracking"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_NR_PROCS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid nr_procs\n"); + argp_usage(state); + } + args.nr_procs = ret; + break; + case ARG_KTHREAD_PID: + ret = strtol(arg, NULL, 10); + if (ret < 1) { + fprintf(stderr, "invalid kthread_pid\n"); + argp_usage(state); + } + args.kthread_pid = ret; + break; +break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_local_storage_rcu_tasks_trace_argp = { + .options = opts, + .parser = parse_arg, +}; + +#define MAX_SLEEP_PROCS 150000 + +static void validate(void) +{ + if (env.producer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-producer!\n"); + exit(1); + } + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); + exit(1); + } + + if (args.nr_procs > MAX_SLEEP_PROCS) { + fprintf(stderr, "benchmark supports up to %u sleeper procs!\n", + MAX_SLEEP_PROCS); + exit(1); + } +} + +static long kthread_pid_ticks(void) +{ + char procfs_path[100]; + long stime; + FILE *f; + + if (!args.kthread_pid) + return -1; + + sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid); + f = fopen(procfs_path, "r"); + if (!f) { + fprintf(stderr, "couldn't open %s, exiting\n", procfs_path); + goto err_out; + } + if (fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime) != 1) { + fprintf(stderr, "fscanf of %s failed, exiting\n", procfs_path); + goto err_out; + } + fclose(f); + return stime; + +err_out: + if (f) + fclose(f); + exit(1); + return 0; +} + +static struct { + struct local_storage_rcu_tasks_trace_bench *skel; + long prev_kthread_stime; +} ctx; + +static void sleep_and_loop(void) +{ + while (true) { + sleep(rand() % 4); + syscall(__NR_getpgid); + } +} + +static void local_storage_tasks_trace_setup(void) +{ + int i, err, forkret, runner_pid; + + runner_pid = getpid(); + + for (i = 0; i < args.nr_procs; i++) { + forkret = fork(); + if (forkret < 0) { + fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i, + args.nr_procs); + goto err_out; + } + + if (!forkret) { + err = prctl(PR_SET_PDEATHSIG, SIGKILL); + if (err < 0) { + fprintf(stderr, "prctl failed with err %d, exiting\n", errno); + goto err_out; + } + + if (getppid() != runner_pid) { + fprintf(stderr, "Runner died while spinning up procs, exiting\n"); + goto err_out; + } + sleep_and_loop(); + } + } + printf("Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid); + + setup_libbpf(); + + ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "Error doing open_and_load, exiting\n"); + goto err_out; + } + + ctx.prev_kthread_stime = kthread_pid_ticks(); + + if (!bpf_program__attach(ctx.skel->progs.get_local)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + if (!bpf_program__attach(ctx.skel->progs.pregp_step)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + if (!bpf_program__attach(ctx.skel->progs.postgp)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + return; +err_out: + exit(1); +} + +static void measure(struct bench_res *res) +{ + long ticks; + + res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0); + res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0); + ticks = kthread_pid_ticks(); + res->stime = ticks - ctx.prev_kthread_stime; + ctx.prev_kthread_stime = ticks; +} + +static void *producer(void *input) +{ + while (true) + syscall(__NR_getpgid); + return NULL; +} + +static void report_progress(int iter, struct bench_res *res, long delta_ns) +{ + if (ctx.skel->bss->unexpected) { + fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp)."); + fprintf(stderr, "Data can't be trusted, exiting\n"); + exit(1); + } + + if (env.quiet) + return; + + printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n", + iter, res->gp_ns / (double)res->gp_ct); + printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n", + iter, res->stime / (double)res->gp_ct); +} + +static void report_final(struct bench_res res[], int res_cnt) +{ + struct basic_stats gp_stat; + + grace_period_latency_basic_stats(res, res_cnt, &gp_stat); + printf("SUMMARY tasks_trace grace period latency"); + printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev); + grace_period_ticks_basic_stats(res, res_cnt, &gp_stat); + printf("SUMMARY ticks per tasks_trace grace period"); + printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev); +} + +/* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use + * of RCU Tasks-Trace. + * + * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside + * from sleep() loop, and creating/destroying BPF task-local storage on wakeup. + * The number of forked tasks is configurable. + * + * exercising code paths which call call_rcu_tasks_trace while there are many + * thousands of tasks on the system should result in RCU Tasks-Trace having to + * do a noticeable amount of work. + * + * This should be observable by measuring rcu_tasks_trace_kthread CPU usage + * after the grace period has ended, or by measuring grace period latency. + * + * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step + * and rcu_tasks_trace_postgp functions to measure grace period latency and + * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks + */ +const struct bench bench_local_storage_tasks_trace = { + .name = "local-storage-tasks-trace", + .argp = &bench_local_storage_rcu_tasks_trace_argp, + .validate = validate, + .setup = local_storage_tasks_trace_setup, + .producer_thread = producer, + .measure = measure, + .report_progress = report_progress, + .report_final = report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c new file mode 100644 index 0000000000..bf66893c7a --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_rename.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <fcntl.h> +#include "bench.h" +#include "test_overhead.skel.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct test_overhead *skel; + struct counter hits; + int fd; +} ctx; + +static void validate(void) +{ + if (env.producer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-producer!\n"); + exit(1); + } + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); + exit(1); + } +} + +static void *producer(void *input) +{ + char buf[] = "test_overhead"; + int err; + + while (true) { + err = write(ctx.fd, buf, sizeof(buf)); + if (err < 0) { + fprintf(stderr, "write failed\n"); + exit(1); + } + atomic_inc(&ctx.hits.value); + } +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.hits.value, 0); +} + +static void setup_ctx(void) +{ + setup_libbpf(); + + ctx.skel = test_overhead__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + ctx.fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); + if (ctx.fd < 0) { + fprintf(stderr, "failed to open /proc/self/comm: %d\n", -errno); + exit(1); + } +} + +static void attach_bpf(struct bpf_program *prog) +{ + struct bpf_link *link; + + link = bpf_program__attach(prog); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void setup_base(void) +{ + setup_ctx(); +} + +static void setup_kprobe(void) +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.prog1); +} + +static void setup_kretprobe(void) +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.prog2); +} + +static void setup_rawtp(void) +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.prog3); +} + +static void setup_fentry(void) +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.prog4); +} + +static void setup_fexit(void) +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.prog5); +} + +const struct bench bench_rename_base = { + .name = "rename-base", + .validate = validate, + .setup = setup_base, + .producer_thread = producer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_rename_kprobe = { + .name = "rename-kprobe", + .validate = validate, + .setup = setup_kprobe, + .producer_thread = producer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_rename_kretprobe = { + .name = "rename-kretprobe", + .validate = validate, + .setup = setup_kretprobe, + .producer_thread = producer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_rename_rawtp = { + .name = "rename-rawtp", + .validate = validate, + .setup = setup_rawtp, + .producer_thread = producer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_rename_fentry = { + .name = "rename-fentry", + .validate = validate, + .setup = setup_fentry, + .producer_thread = producer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_rename_fexit = { + .name = "rename-fexit", + .validate = validate, + .setup = setup_fexit, + .producer_thread = producer, + .measure = measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c new file mode 100644 index 0000000000..e1ee979e6a --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -0,0 +1,566 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <asm/barrier.h> +#include <linux/perf_event.h> +#include <linux/ring_buffer.h> +#include <sys/epoll.h> +#include <sys/mman.h> +#include <argp.h> +#include <stdlib.h> +#include "bench.h" +#include "ringbuf_bench.skel.h" +#include "perfbuf_bench.skel.h" + +static struct { + bool back2back; + int batch_cnt; + bool sampled; + int sample_rate; + int ringbuf_sz; /* per-ringbuf, in bytes */ + bool ringbuf_use_output; /* use slower output API */ + int perfbuf_sz; /* per-CPU size, in pages */ +} args = { + .back2back = false, + .batch_cnt = 500, + .sampled = false, + .sample_rate = 500, + .ringbuf_sz = 512 * 1024, + .ringbuf_use_output = false, + .perfbuf_sz = 128, +}; + +enum { + ARG_RB_BACK2BACK = 2000, + ARG_RB_USE_OUTPUT = 2001, + ARG_RB_BATCH_CNT = 2002, + ARG_RB_SAMPLED = 2003, + ARG_RB_SAMPLE_RATE = 2004, +}; + +static const struct argp_option opts[] = { + { "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"}, + { "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"}, + { "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"}, + { "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"}, + { "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_RB_BACK2BACK: + args.back2back = true; + break; + case ARG_RB_USE_OUTPUT: + args.ringbuf_use_output = true; + break; + case ARG_RB_BATCH_CNT: + args.batch_cnt = strtol(arg, NULL, 10); + if (args.batch_cnt < 0) { + fprintf(stderr, "Invalid batch count."); + argp_usage(state); + } + break; + case ARG_RB_SAMPLED: + args.sampled = true; + break; + case ARG_RB_SAMPLE_RATE: + args.sample_rate = strtol(arg, NULL, 10); + if (args.sample_rate < 0) { + fprintf(stderr, "Invalid perfbuf sample rate."); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +/* exported into benchmark runner */ +const struct argp bench_ringbufs_argp = { + .options = opts, + .parser = parse_arg, +}; + +/* RINGBUF-LIBBPF benchmark */ + +static struct counter buf_hits; + +static inline void bufs_trigger_batch(void) +{ + (void)syscall(__NR_getpgid); +} + +static void bufs_validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n"); + exit(1); + } + + if (args.back2back && env.producer_cnt > 1) { + fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n"); + exit(1); + } +} + +static void *bufs_sample_producer(void *input) +{ + if (args.back2back) { + /* initial batch to get everything started */ + bufs_trigger_batch(); + return NULL; + } + + while (true) + bufs_trigger_batch(); + return NULL; +} + +static struct ringbuf_libbpf_ctx { + struct ringbuf_bench *skel; + struct ring_buffer *ringbuf; +} ringbuf_libbpf_ctx; + +static void ringbuf_libbpf_measure(struct bench_res *res) +{ + struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; + + res->hits = atomic_swap(&buf_hits.value, 0); + res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); +} + +static struct ringbuf_bench *ringbuf_setup_skeleton(void) +{ + struct ringbuf_bench *skel; + + setup_libbpf(); + + skel = ringbuf_bench__open(); + if (!skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + skel->rodata->batch_cnt = args.batch_cnt; + skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0; + + if (args.sampled) + /* record data + header take 16 bytes */ + skel->rodata->wakeup_data_size = args.sample_rate * 16; + + bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz); + + if (ringbuf_bench__load(skel)) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + + return skel; +} + +static int buf_process_sample(void *ctx, void *data, size_t len) +{ + atomic_inc(&buf_hits.value); + return 0; +} + +static void ringbuf_libbpf_setup(void) +{ + struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; + struct bpf_link *link; + + ctx->skel = ringbuf_setup_skeleton(); + ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf), + buf_process_sample, NULL, NULL); + if (!ctx->ringbuf) { + fprintf(stderr, "failed to create ringbuf\n"); + exit(1); + } + + link = bpf_program__attach(ctx->skel->progs.bench_ringbuf); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void *ringbuf_libbpf_consumer(void *input) +{ + struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx; + + while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) { + if (args.back2back) + bufs_trigger_batch(); + } + fprintf(stderr, "ringbuf polling failed!\n"); + return NULL; +} + +/* RINGBUF-CUSTOM benchmark */ +struct ringbuf_custom { + __u64 *consumer_pos; + __u64 *producer_pos; + __u64 mask; + void *data; + int map_fd; +}; + +static struct ringbuf_custom_ctx { + struct ringbuf_bench *skel; + struct ringbuf_custom ringbuf; + int epoll_fd; + struct epoll_event event; +} ringbuf_custom_ctx; + +static void ringbuf_custom_measure(struct bench_res *res) +{ + struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx; + + res->hits = atomic_swap(&buf_hits.value, 0); + res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); +} + +static void ringbuf_custom_setup(void) +{ + struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx; + const size_t page_size = getpagesize(); + struct bpf_link *link; + struct ringbuf_custom *r; + void *tmp; + int err; + + ctx->skel = ringbuf_setup_skeleton(); + + ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (ctx->epoll_fd < 0) { + fprintf(stderr, "failed to create epoll fd: %d\n", -errno); + exit(1); + } + + r = &ctx->ringbuf; + r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf); + r->mask = args.ringbuf_sz - 1; + + /* Map writable consumer page */ + tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, + r->map_fd, 0); + if (tmp == MAP_FAILED) { + fprintf(stderr, "failed to mmap consumer page: %d\n", -errno); + exit(1); + } + r->consumer_pos = tmp; + + /* Map read-only producer page and data pages. */ + tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED, + r->map_fd, page_size); + if (tmp == MAP_FAILED) { + fprintf(stderr, "failed to mmap data pages: %d\n", -errno); + exit(1); + } + r->producer_pos = tmp; + r->data = tmp + page_size; + + ctx->event.events = EPOLLIN; + err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event); + if (err < 0) { + fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno); + exit(1); + } + + link = bpf_program__attach(ctx->skel->progs.bench_ringbuf); + if (!link) { + fprintf(stderr, "failed to attach program\n"); + exit(1); + } +} + +#define RINGBUF_BUSY_BIT (1 << 31) +#define RINGBUF_DISCARD_BIT (1 << 30) +#define RINGBUF_META_LEN 8 + +static inline int roundup_len(__u32 len) +{ + /* clear out top 2 bits */ + len <<= 2; + len >>= 2; + /* add length prefix */ + len += RINGBUF_META_LEN; + /* round up to 8 byte alignment */ + return (len + 7) / 8 * 8; +} + +static void ringbuf_custom_process_ring(struct ringbuf_custom *r) +{ + unsigned long cons_pos, prod_pos; + int *len_ptr, len; + bool got_new_data; + + cons_pos = smp_load_acquire(r->consumer_pos); + while (true) { + got_new_data = false; + prod_pos = smp_load_acquire(r->producer_pos); + while (cons_pos < prod_pos) { + len_ptr = r->data + (cons_pos & r->mask); + len = smp_load_acquire(len_ptr); + + /* sample not committed yet, bail out for now */ + if (len & RINGBUF_BUSY_BIT) + return; + + got_new_data = true; + cons_pos += roundup_len(len); + + atomic_inc(&buf_hits.value); + } + if (got_new_data) + smp_store_release(r->consumer_pos, cons_pos); + else + break; + } +} + +static void *ringbuf_custom_consumer(void *input) +{ + struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx; + int cnt; + + do { + if (args.back2back) + bufs_trigger_batch(); + cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1); + if (cnt > 0) + ringbuf_custom_process_ring(&ctx->ringbuf); + } while (cnt >= 0); + fprintf(stderr, "ringbuf polling failed!\n"); + return 0; +} + +/* PERFBUF-LIBBPF benchmark */ +static struct perfbuf_libbpf_ctx { + struct perfbuf_bench *skel; + struct perf_buffer *perfbuf; +} perfbuf_libbpf_ctx; + +static void perfbuf_measure(struct bench_res *res) +{ + struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; + + res->hits = atomic_swap(&buf_hits.value, 0); + res->drops = atomic_swap(&ctx->skel->bss->dropped, 0); +} + +static struct perfbuf_bench *perfbuf_setup_skeleton(void) +{ + struct perfbuf_bench *skel; + + setup_libbpf(); + + skel = perfbuf_bench__open(); + if (!skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + skel->rodata->batch_cnt = args.batch_cnt; + + if (perfbuf_bench__load(skel)) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + + return skel; +} + +static enum bpf_perf_event_ret +perfbuf_process_sample_raw(void *input_ctx, int cpu, + struct perf_event_header *e) +{ + switch (e->type) { + case PERF_RECORD_SAMPLE: + atomic_inc(&buf_hits.value); + break; + case PERF_RECORD_LOST: + break; + default: + return LIBBPF_PERF_EVENT_ERROR; + } + return LIBBPF_PERF_EVENT_CONT; +} + +static void perfbuf_libbpf_setup(void) +{ + struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; + struct perf_event_attr attr; + struct bpf_link *link; + + ctx->skel = perfbuf_setup_skeleton(); + + memset(&attr, 0, sizeof(attr)); + attr.config = PERF_COUNT_SW_BPF_OUTPUT; + attr.type = PERF_TYPE_SOFTWARE; + attr.sample_type = PERF_SAMPLE_RAW; + /* notify only every Nth sample */ + if (args.sampled) { + attr.sample_period = args.sample_rate; + attr.wakeup_events = args.sample_rate; + } else { + attr.sample_period = 1; + attr.wakeup_events = 1; + } + + if (args.sample_rate > args.batch_cnt) { + fprintf(stderr, "sample rate %d is too high for given batch count %d\n", + args.sample_rate, args.batch_cnt); + exit(1); + } + + ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf), + args.perfbuf_sz, &attr, + perfbuf_process_sample_raw, NULL, NULL); + if (!ctx->perfbuf) { + fprintf(stderr, "failed to create perfbuf\n"); + exit(1); + } + + link = bpf_program__attach(ctx->skel->progs.bench_perfbuf); + if (!link) { + fprintf(stderr, "failed to attach program\n"); + exit(1); + } +} + +static void *perfbuf_libbpf_consumer(void *input) +{ + struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; + + while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) { + if (args.back2back) + bufs_trigger_batch(); + } + fprintf(stderr, "perfbuf polling failed!\n"); + return NULL; +} + +/* PERFBUF-CUSTOM benchmark */ + +/* copies of internal libbpf definitions */ +struct perf_cpu_buf { + struct perf_buffer *pb; + void *base; /* mmap()'ed memory */ + void *buf; /* for reconstructing segmented data */ + size_t buf_size; + int fd; + int cpu; + int map_key; +}; + +struct perf_buffer { + perf_buffer_event_fn event_cb; + perf_buffer_sample_fn sample_cb; + perf_buffer_lost_fn lost_cb; + void *ctx; /* passed into callbacks */ + + size_t page_size; + size_t mmap_size; + struct perf_cpu_buf **cpu_bufs; + struct epoll_event *events; + int cpu_cnt; /* number of allocated CPU buffers */ + int epoll_fd; /* perf event FD */ + int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ +}; + +static void *perfbuf_custom_consumer(void *input) +{ + struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx; + struct perf_buffer *pb = ctx->perfbuf; + struct perf_cpu_buf *cpu_buf; + struct perf_event_mmap_page *header; + size_t mmap_mask = pb->mmap_size - 1; + struct perf_event_header *ehdr; + __u64 data_head, data_tail; + size_t ehdr_size; + void *base; + int i, cnt; + + while (true) { + if (args.back2back) + bufs_trigger_batch(); + cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1); + if (cnt <= 0) { + fprintf(stderr, "perf epoll failed: %d\n", -errno); + exit(1); + } + + for (i = 0; i < cnt; ++i) { + cpu_buf = pb->events[i].data.ptr; + header = cpu_buf->base; + base = ((void *)header) + pb->page_size; + + data_head = ring_buffer_read_head(header); + data_tail = header->data_tail; + while (data_head != data_tail) { + ehdr = base + (data_tail & mmap_mask); + ehdr_size = ehdr->size; + + if (ehdr->type == PERF_RECORD_SAMPLE) + atomic_inc(&buf_hits.value); + + data_tail += ehdr_size; + } + ring_buffer_write_tail(header, data_tail); + } + } + return NULL; +} + +const struct bench bench_rb_libbpf = { + .name = "rb-libbpf", + .argp = &bench_ringbufs_argp, + .validate = bufs_validate, + .setup = ringbuf_libbpf_setup, + .producer_thread = bufs_sample_producer, + .consumer_thread = ringbuf_libbpf_consumer, + .measure = ringbuf_libbpf_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_rb_custom = { + .name = "rb-custom", + .argp = &bench_ringbufs_argp, + .validate = bufs_validate, + .setup = ringbuf_custom_setup, + .producer_thread = bufs_sample_producer, + .consumer_thread = ringbuf_custom_consumer, + .measure = ringbuf_custom_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_pb_libbpf = { + .name = "pb-libbpf", + .argp = &bench_ringbufs_argp, + .validate = bufs_validate, + .setup = perfbuf_libbpf_setup, + .producer_thread = bufs_sample_producer, + .consumer_thread = perfbuf_libbpf_consumer, + .measure = perfbuf_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_pb_custom = { + .name = "pb-custom", + .argp = &bench_ringbufs_argp, + .validate = bufs_validate, + .setup = perfbuf_libbpf_setup, + .producer_thread = bufs_sample_producer, + .consumer_thread = perfbuf_custom_consumer, + .measure = perfbuf_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + diff --git a/tools/testing/selftests/bpf/benchs/bench_strncmp.c b/tools/testing/selftests/bpf/benchs/bench_strncmp.c new file mode 100644 index 0000000000..a5e1428fd7 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_strncmp.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2021. Huawei Technologies Co., Ltd */ +#include <argp.h> +#include "bench.h" +#include "strncmp_bench.skel.h" + +static struct strncmp_ctx { + struct strncmp_bench *skel; +} ctx; + +static struct strncmp_args { + u32 cmp_str_len; +} args = { + .cmp_str_len = 32, +}; + +enum { + ARG_CMP_STR_LEN = 5000, +}; + +static const struct argp_option opts[] = { + { "cmp-str-len", ARG_CMP_STR_LEN, "CMP_STR_LEN", 0, + "Set the length of compared string" }, + {}, +}; + +static error_t strncmp_parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_CMP_STR_LEN: + args.cmp_str_len = strtoul(arg, NULL, 10); + if (!args.cmp_str_len || + args.cmp_str_len >= sizeof(ctx.skel->bss->str)) { + fprintf(stderr, "Invalid cmp str len (limit %zu)\n", + sizeof(ctx.skel->bss->str)); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_strncmp_argp = { + .options = opts, + .parser = strncmp_parse_arg, +}; + +static void strncmp_validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "strncmp benchmark doesn't support consumer!\n"); + exit(1); + } +} + +static void strncmp_setup(void) +{ + int err; + char *target; + size_t i, sz; + + sz = sizeof(ctx.skel->rodata->target); + if (!sz || sz < sizeof(ctx.skel->bss->str)) { + fprintf(stderr, "invalid string size (target %zu, src %zu)\n", + sz, sizeof(ctx.skel->bss->str)); + exit(1); + } + + setup_libbpf(); + + ctx.skel = strncmp_bench__open(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + srandom(time(NULL)); + target = ctx.skel->rodata->target; + for (i = 0; i < sz - 1; i++) + target[i] = '1' + random() % 9; + target[sz - 1] = '\0'; + + ctx.skel->rodata->cmp_str_len = args.cmp_str_len; + + memcpy(ctx.skel->bss->str, target, args.cmp_str_len); + ctx.skel->bss->str[args.cmp_str_len] = '\0'; + /* Make bss->str < rodata->target */ + ctx.skel->bss->str[args.cmp_str_len - 1] -= 1; + + err = strncmp_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "failed to load skeleton\n"); + strncmp_bench__destroy(ctx.skel); + exit(1); + } +} + +static void strncmp_attach_prog(struct bpf_program *prog) +{ + struct bpf_link *link; + + link = bpf_program__attach(prog); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void strncmp_no_helper_setup(void) +{ + strncmp_setup(); + strncmp_attach_prog(ctx.skel->progs.strncmp_no_helper); +} + +static void strncmp_helper_setup(void) +{ + strncmp_setup(); + strncmp_attach_prog(ctx.skel->progs.strncmp_helper); +} + +static void *strncmp_producer(void *ctx) +{ + while (true) + (void)syscall(__NR_getpgid); + return NULL; +} + +static void strncmp_measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); +} + +const struct bench bench_strncmp_no_helper = { + .name = "strncmp-no-helper", + .argp = &bench_strncmp_argp, + .validate = strncmp_validate, + .setup = strncmp_no_helper_setup, + .producer_thread = strncmp_producer, + .measure = strncmp_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_strncmp_helper = { + .name = "strncmp-helper", + .argp = &bench_strncmp_argp, + .validate = strncmp_validate, + .setup = strncmp_helper_setup, + .producer_thread = strncmp_producer, + .measure = strncmp_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c new file mode 100644 index 0000000000..dbd362771d --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -0,0 +1,311 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bench.h" +#include "trigger_bench.skel.h" +#include "trace_helpers.h" + +/* BPF triggering benchmarks */ +static struct trigger_ctx { + struct trigger_bench *skel; +} ctx; + +static struct counter base_hits; + +static void trigger_validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); + exit(1); + } +} + +static void *trigger_base_producer(void *input) +{ + while (true) { + (void)syscall(__NR_getpgid); + atomic_inc(&base_hits.value); + } + return NULL; +} + +static void trigger_base_measure(struct bench_res *res) +{ + res->hits = atomic_swap(&base_hits.value, 0); +} + +static void *trigger_producer(void *input) +{ + while (true) + (void)syscall(__NR_getpgid); + return NULL; +} + +static void trigger_measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); +} + +static void setup_ctx(void) +{ + setup_libbpf(); + + ctx.skel = trigger_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } +} + +static void attach_bpf(struct bpf_program *prog) +{ + struct bpf_link *link; + + link = bpf_program__attach(prog); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } +} + +static void trigger_tp_setup(void) +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_tp); +} + +static void trigger_rawtp_setup(void) +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_raw_tp); +} + +static void trigger_kprobe_setup(void) +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_kprobe); +} + +static void trigger_fentry_setup(void) +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fentry); +} + +static void trigger_fentry_sleep_setup(void) +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep); +} + +static void trigger_fmodret_setup(void) +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fmodret); +} + +/* make sure call is not inlined and not avoided by compiler, so __weak and + * inline asm volatile in the body of the function + * + * There is a performance difference between uprobing at nop location vs other + * instructions. So use two different targets, one of which starts with nop + * and another doesn't. + * + * GCC doesn't generate stack setup preample for these functions due to them + * having no input arguments and doing nothing in the body. + */ +__weak void uprobe_target_with_nop(void) +{ + asm volatile ("nop"); +} + +__weak void uprobe_target_without_nop(void) +{ + asm volatile (""); +} + +static void *uprobe_base_producer(void *input) +{ + while (true) { + uprobe_target_with_nop(); + atomic_inc(&base_hits.value); + } + return NULL; +} + +static void *uprobe_producer_with_nop(void *input) +{ + while (true) + uprobe_target_with_nop(); + return NULL; +} + +static void *uprobe_producer_without_nop(void *input) +{ + while (true) + uprobe_target_without_nop(); + return NULL; +} + +static void usetup(bool use_retprobe, bool use_nop) +{ + size_t uprobe_offset; + struct bpf_link *link; + + setup_libbpf(); + + ctx.skel = trigger_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + if (use_nop) + uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop); + else + uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop); + + link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe, + use_retprobe, + -1 /* all PIDs */, + "/proc/self/exe", + uprobe_offset); + if (!link) { + fprintf(stderr, "failed to attach uprobe!\n"); + exit(1); + } + ctx.skel->links.bench_trigger_uprobe = link; +} + +static void uprobe_setup_with_nop(void) +{ + usetup(false, true); +} + +static void uretprobe_setup_with_nop(void) +{ + usetup(true, true); +} + +static void uprobe_setup_without_nop(void) +{ + usetup(false, false); +} + +static void uretprobe_setup_without_nop(void) +{ + usetup(true, false); +} + +const struct bench bench_trig_base = { + .name = "trig-base", + .validate = trigger_validate, + .producer_thread = trigger_base_producer, + .measure = trigger_base_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_tp = { + .name = "trig-tp", + .validate = trigger_validate, + .setup = trigger_tp_setup, + .producer_thread = trigger_producer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_rawtp = { + .name = "trig-rawtp", + .validate = trigger_validate, + .setup = trigger_rawtp_setup, + .producer_thread = trigger_producer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_kprobe = { + .name = "trig-kprobe", + .validate = trigger_validate, + .setup = trigger_kprobe_setup, + .producer_thread = trigger_producer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_fentry = { + .name = "trig-fentry", + .validate = trigger_validate, + .setup = trigger_fentry_setup, + .producer_thread = trigger_producer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_fentry_sleep = { + .name = "trig-fentry-sleep", + .validate = trigger_validate, + .setup = trigger_fentry_sleep_setup, + .producer_thread = trigger_producer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_fmodret = { + .name = "trig-fmodret", + .validate = trigger_validate, + .setup = trigger_fmodret_setup, + .producer_thread = trigger_producer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_uprobe_base = { + .name = "trig-uprobe-base", + .setup = NULL, /* no uprobe/uretprobe is attached */ + .producer_thread = uprobe_base_producer, + .measure = trigger_base_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_uprobe_with_nop = { + .name = "trig-uprobe-with-nop", + .setup = uprobe_setup_with_nop, + .producer_thread = uprobe_producer_with_nop, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_uretprobe_with_nop = { + .name = "trig-uretprobe-with-nop", + .setup = uretprobe_setup_with_nop, + .producer_thread = uprobe_producer_with_nop, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_uprobe_without_nop = { + .name = "trig-uprobe-without-nop", + .setup = uprobe_setup_without_nop, + .producer_thread = uprobe_producer_without_nop, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_trig_uretprobe_without_nop = { + .name = "trig-uretprobe-without-nop", + .setup = uretprobe_setup_without_nop, + .producer_thread = uprobe_producer_without_nop, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh new file mode 100755 index 0000000000..8ffd385ab2 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +header "Bloom filter map" +for v in 2 4 8 16 40; do +for t in 1 4 8 12 16; do +for h in {1..10}; do +subtitle "value_size: $v bytes, # threads: $t, # hashes: $h" + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do + printf "%'d entries -\n" $e + printf "\t" + summarize "Lookups, total operations: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-lookup)" + printf "\t" + summarize "Updates, total operations: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-update)" + printf "\t" + summarize_percentage "False positive rate: " \ + "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-false-positive)" + done + printf "\n" +done +done +done + +header "Hashmap without bloom filter vs. hashmap with bloom filter (throughput, 8 threads)" +for v in 2 4 8 16 40; do +for h in {1..10}; do +subtitle "value_size: $v, # hashes: $h" + for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do + printf "%'d entries -\n" $e + printf "\t" + summarize_total "Hashmap without bloom filter: " \ + "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-without-bloom)" + printf "\t" + summarize_total "Hashmap with bloom filter: " \ + "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-with-bloom)" + done + printf "\n" +done +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh new file mode 100755 index 0000000000..cd2efd3fde --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +nr_threads=`expr $(cat /proc/cpuinfo | grep "processor"| wc -l) - 1` +summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-full-update) +printf "$summary" +printf "\n" diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh new file mode 100755 index 0000000000..d4f5f73b35 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +for t in 1 4 8 12 16; do +for i in 10 100 500 1000 5000 10000 50000 100000 500000 1000000; do +subtitle "nr_loops: $i, nr_threads: $t" + summarize_ops "bpf_loop: " \ + "$($RUN_BENCH -p $t --nr_loops $i bpf-loop)" + printf "\n" +done +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh b/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh new file mode 100755 index 0000000000..9ff5832463 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +htab_mem() +{ + echo -n "per-prod-op: " + echo -n "$*" | sed -E "s/.* per-prod-op\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+k\/s).*/\1/" + echo -n -e ", avg mem: " + echo -n "$*" | sed -E "s/.* memory usage\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+MiB).*/\1/" + echo -n ", peak mem: " + echo "$*" | sed -E "s/.* peak memory usage\s+([0-9]+\.[0-9]+MiB).*/\1/" +} + +summarize_htab_mem() +{ + local bench="$1" + local summary=$(echo $2 | tail -n1) + + printf "%-20s %s\n" "$bench" "$(htab_mem $summary)" +} + +htab_mem_bench() +{ + local name + + for name in overwrite batch_add_batch_del add_del_on_diff_cpu + do + summarize_htab_mem "$name" "$($RUN_BENCH htab-mem --use-case $name -p8 "$@")" + done +} + +header "preallocated" +htab_mem_bench "--preallocated" + +header "normal bpf ma" +htab_mem_bench diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh new file mode 100755 index 0000000000..2eb2b513a1 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +header "Hashmap Control" +for i in 10 1000 10000 100000 4194304; do +subtitle "num keys: $i" + summarize_local_storage "hashmap (control) sequential get: "\ + "$(./bench --nr_maps 1 --hashmap_nr_keys_used=$i local-storage-cache-hashmap-control)" + printf "\n" +done + +header "Local Storage" +for i in 1 10 16 17 24 32 100 1000; do +subtitle "num_maps: $i" + summarize_local_storage "local_storage cache sequential get: "\ + "$(./bench --nr_maps $i local-storage-cache-seq-get)" + summarize_local_storage "local_storage cache interleaved get: "\ + "$(./bench --nr_maps $i local-storage-cache-int-get)" + printf "\n" +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh new file mode 100755 index 0000000000..3e8a969f20 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +kthread_pid=`pgrep rcu_tasks_trace_kthread` + +if [ -z $kthread_pid ]; then + echo "error: Couldn't find rcu_tasks_trace_kthread" + exit 1 +fi + +./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet local-storage-tasks-trace diff --git a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh new file mode 100755 index 0000000000..7b281dbe41 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -eufo pipefail + +for i in base kprobe kretprobe rawtp fentry fexit +do + summary=$(sudo ./bench -w2 -d5 -a rename-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) + printf "%-10s: %s\n" $i "$summary" +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh new file mode 100755 index 0000000000..91e3567962 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +source ./benchs/run_common.sh + +set -eufo pipefail + +RUN_RB_BENCH="$RUN_BENCH -c1" + +header "Single-producer, parallel producer" +for b in rb-libbpf rb-custom pb-libbpf pb-custom; do + summarize $b "$($RUN_RB_BENCH $b)" +done + +header "Single-producer, parallel producer, sampled notification" +for b in rb-libbpf rb-custom pb-libbpf pb-custom; do + summarize $b "$($RUN_RB_BENCH --rb-sampled $b)" +done + +header "Single-producer, back-to-back mode" +for b in rb-libbpf rb-custom pb-libbpf pb-custom; do + summarize $b "$($RUN_RB_BENCH --rb-b2b $b)" + summarize $b-sampled "$($RUN_RB_BENCH --rb-sampled --rb-b2b $b)" +done + +header "Ringbuf back-to-back, effect of sample rate" +for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do + summarize "rb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)" +done +header "Perfbuf back-to-back, effect of sample rate" +for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do + summarize "pb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)" +done + +header "Ringbuf back-to-back, reserve+commit vs output" +summarize "reserve" "$($RUN_RB_BENCH --rb-b2b rb-custom)" +summarize "output" "$($RUN_RB_BENCH --rb-b2b --rb-use-output rb-custom)" + +header "Ringbuf sampled, reserve+commit vs output" +summarize "reserve-sampled" "$($RUN_RB_BENCH --rb-sampled rb-custom)" +summarize "output-sampled" "$($RUN_RB_BENCH --rb-sampled --rb-use-output rb-custom)" + +header "Single-producer, consumer/producer competing on the same CPU, low batch count" +for b in rb-libbpf rb-custom pb-libbpf pb-custom; do + summarize $b "$($RUN_RB_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)" +done + +header "Ringbuf, multi-producer contention" +for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do + summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)" +done + diff --git a/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh b/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh new file mode 100755 index 0000000000..142697284b --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +for s in 1 8 64 512 2048 4095; do + for b in no-helper helper; do + summarize ${b}-${s} "$($RUN_BENCH --cmp-str-len=$s strncmp-${b})" + done +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh new file mode 100755 index 0000000000..78e83f2432 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -eufo pipefail + +for i in base tp rawtp kprobe fentry fmodret +do + summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) + printf "%-10s: %s\n" $i "$summary" +done diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh new file mode 100644 index 0000000000..d9f40af820 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_common.sh @@ -0,0 +1,92 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +RUN_BENCH="sudo ./bench -w3 -d10 -a" + +function header() +{ + local len=${#1} + + printf "\n%s\n" "$1" + for i in $(seq 1 $len); do printf '='; done + printf '\n' +} + +function subtitle() +{ + local len=${#1} + printf "\t%s\n" "$1" +} + +function hits() +{ + echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function drops() +{ + echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function percentage() +{ + echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/" +} + +function ops() +{ + echo -n "throughput: " + echo -n "$*" | sed -E "s/.*throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" + echo -n -e ", latency: " + echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/" +} + +function local_storage() +{ + echo -n "hits throughput: " + echo -n "$*" | sed -E "s/.* hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" + echo -n -e ", hits latency: " + echo -n "$*" | sed -E "s/.* hits latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/" + echo -n ", important_hits throughput: " + echo "$*" | sed -E "s/.*important_hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" +} + +function total() +{ + echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" +} + +function summarize() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)" +} + +function summarize_percentage() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s%%\n" "$bench" "$(percentage $summary)" +} + +function summarize_ops() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(ops $summary)" +} + +function summarize_local_storage() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(local_storage $summary)" +} + +function summarize_total() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(total $summary)" +} |