summaryrefslogtreecommitdiffstats
path: root/lib/util
diff options
context:
space:
mode:
Diffstat (limited to 'lib/util')
-rw-r--r--lib/util/Makefile34
-rw-r--r--lib/util/compat.h12
-rw-r--r--lib/util/logging.c96
-rw-r--r--lib/util/logging.h35
-rw-r--r--lib/util/params.c808
-rw-r--r--lib/util/params.h149
-rw-r--r--lib/util/stats.c292
-rw-r--r--lib/util/stats.h26
-rw-r--r--lib/util/util.c946
-rw-r--r--lib/util/util.h97
-rw-r--r--lib/util/util.mk2
-rw-r--r--lib/util/xdp_sample.bpf.c18
-rw-r--r--lib/util/xdp_sample.c1643
-rw-r--r--lib/util/xdp_sample.h133
-rw-r--r--lib/util/xpcapng.c635
-rw-r--r--lib/util/xpcapng.h58
16 files changed, 4984 insertions, 0 deletions
diff --git a/lib/util/Makefile b/lib/util/Makefile
new file mode 100644
index 0000000..24070f0
--- /dev/null
+++ b/lib/util/Makefile
@@ -0,0 +1,34 @@
+include util.mk
+
+LIB_DIR ?= ..
+
+include $(LIB_DIR)/defines.mk
+include $(LIBXDP_DIR)/libxdp.mk
+
+all: $(UTIL_OBJS)
+
+UTIL_SKEL_H = $(UTIL_BPF_OBJS:.bpf.o=.skel.h)
+
+$(UTIL_OBJS): %.o: %.c %.h $(UTIL_SKEL_H) $(LIBMK)
+ $(QUIET_CC)$(CC) $(CFLAGS) $(CPPFLAGS) -Wall -I../../headers -c -o $@ $<
+
+clean:
+ $(Q)rm -f $(UTIL_OBJS) $(UTIL_BPF_OBJS) $(UTIL_SKEL_H)
+
+BPF_CFLAGS += -I$(HEADER_DIR) $(ARCH_INCLUDES)
+
+$(UTIL_BPF_OBJS): %.o: %.c $(KERN_USER_H) $(BPF_HEADERS) $(LIBMK)
+ $(QUIET_CLANG)$(CLANG) -S \
+ -target $(BPF_TARGET) \
+ -D __BPF_TRACING__ \
+ $(BPF_CFLAGS) \
+ -Wall \
+ -Wno-unused-value \
+ -Wno-pointer-sign \
+ -Wno-compare-distinct-pointer-types \
+ -Werror \
+ -O2 -emit-llvm -c -g -o ${@:.o=.ll} $<
+ $(QUIET_LLC)$(LLC) -march=$(BPF_TARGET) -filetype=obj -o $@ ${@:.o=.ll}
+
+$(UTIL_SKEL_H): %.skel.h: %.bpf.o
+ $(QUIET_GEN)$(BPFTOOL) gen skeleton $< name ${@:.skel.h=} > $@
diff --git a/lib/util/compat.h b/lib/util/compat.h
new file mode 100644
index 0000000..f058e9c
--- /dev/null
+++ b/lib/util/compat.h
@@ -0,0 +1,12 @@
+#ifndef __COMPAT_H
+#define __COMPAT_H
+
+#ifndef HAVE_LIBBPF_BTF__TYPE_CNT
+static __u32 btf__type_cnt(const struct btf *btf)
+{
+ /* old function didn't include 'void' type in count */
+ return btf__get_nr_types(btf) + 1;
+}
+#endif
+
+#endif
diff --git a/lib/util/logging.c b/lib/util/logging.c
new file mode 100644
index 0000000..7ad21d7
--- /dev/null
+++ b/lib/util/logging.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <stdio.h>
+#include <stdarg.h>
+
+#include <bpf/libbpf.h>
+#include <xdp/libxdp.h>
+
+#include "logging.h"
+#include "util.h"
+
+static enum logging_print_level log_level = LOG_INFO;
+
+static int print_func(enum logging_print_level level, int indent,
+ const char *format, va_list args)
+{
+ int i;
+ if (level > log_level)
+ return 0;
+
+ for (i = 0; i < indent; i++)
+ fprintf(stderr, " ");
+
+ return vfprintf(stderr, format, args);
+}
+
+static int libbpf_print_func(enum libbpf_print_level level, const char *format,
+ va_list args)
+{
+ return print_func(level + 1, 2, format, args);
+}
+
+static int libbpf_silent_func(__unused enum libbpf_print_level level,
+ __unused const char *format,
+ __unused va_list args)
+{
+ return 0;
+}
+
+static int libxdp_print_func(enum libxdp_print_level level, const char *format,
+ va_list args)
+{
+ return print_func(level + 1, 1, format, args);
+}
+
+static int libxdp_silent_func(__unused enum libxdp_print_level level,
+ __unused const char *format,
+ __unused va_list args)
+{
+ return 0;
+}
+
+#define __printf(a, b) __attribute__((format(printf, a, b)))
+
+__printf(2, 3) void logging_print(enum logging_print_level level,
+ const char *format, ...)
+{
+ va_list args;
+
+ va_start(args, format);
+ print_func(level, 0, format, args);
+ va_end(args);
+}
+
+void init_lib_logging(void)
+{
+ libbpf_set_print(libbpf_print_func);
+ libxdp_set_print(libxdp_print_func);
+}
+
+void silence_libbpf_logging(void)
+{
+ if (log_level < LOG_VERBOSE)
+ libbpf_set_print(libbpf_silent_func);
+}
+
+void silence_libxdp_logging(void)
+{
+ if (log_level < LOG_VERBOSE)
+ libxdp_set_print(libxdp_silent_func);
+}
+
+enum logging_print_level set_log_level(enum logging_print_level level)
+{
+ enum logging_print_level old_level = log_level;
+
+ log_level = level;
+ return old_level;
+}
+
+enum logging_print_level increase_log_level(void)
+{
+ if (log_level < LOG_VERBOSE)
+ log_level++;
+ return log_level;
+}
diff --git a/lib/util/logging.h b/lib/util/logging.h
new file mode 100644
index 0000000..16c4e74
--- /dev/null
+++ b/lib/util/logging.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __LOGGING_H
+#define __LOGGING_H
+
+/* This matches the libbpf logging levels, but with an additional VERBOSE level;
+ * we demote all libbpf messages by one level so debug messages only show up on
+ * VERBOSE.
+ */
+enum logging_print_level {
+ LOG_WARN,
+ LOG_INFO,
+ LOG_DEBUG,
+ LOG_VERBOSE,
+};
+
+extern void logging_print(enum logging_print_level level, const char *format,
+ ...) __attribute__((format(printf, 2, 3)));
+
+#define __pr(level, fmt, ...) \
+ do { \
+ logging_print(level, fmt, ##__VA_ARGS__); \
+ } while (0)
+
+#define pr_warn(fmt, ...) __pr(LOG_WARN, fmt, ##__VA_ARGS__)
+#define pr_info(fmt, ...) __pr(LOG_INFO, fmt, ##__VA_ARGS__)
+#define pr_debug(fmt, ...) __pr(LOG_DEBUG, fmt, ##__VA_ARGS__)
+
+void init_lib_logging(void);
+void silence_libbpf_logging(void);
+void silence_libxdp_logging(void);
+enum logging_print_level set_log_level(enum logging_print_level level);
+enum logging_print_level increase_log_level();
+
+#endif
diff --git a/lib/util/params.c b/lib/util/params.c
new file mode 100644
index 0000000..838a520
--- /dev/null
+++ b/lib/util/params.c
@@ -0,0 +1,808 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <getopt.h>
+#include <errno.h>
+
+#include <net/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_link.h> /* XDP_FLAGS_* depend on kernel-headers installed */
+#include <linux/if_xdp.h>
+#include <arpa/inet.h>
+
+#include "params.h"
+#include "logging.h"
+#include "util.h"
+
+#define BUFSIZE 30
+#define FIRST_PRINTABLE 65 /* ord('A') = 65 */
+#define VERSION_SHORT_OPT 0
+
+static bool opt_needs_arg(const struct prog_option *opt)
+{
+ return opt->type > OPT_BOOL && !opt->positional;
+}
+
+static bool opt_is_multi(const struct prog_option *opt)
+{
+ return opt->type == OPT_MULTISTRING || opt->type == OPT_IFNAME_MULTI ||
+ opt->type == OPT_U32_MULTI;
+}
+
+static int handle_bool(__unused char *optarg, void *tgt, __unused struct prog_option *opt)
+{
+ bool *opt_set = tgt;
+
+ *opt_set = true;
+ return 0;
+}
+
+static int handle_string(char *optarg, void *tgt, __unused struct prog_option *opt)
+{
+ char **opt_set = tgt;
+
+ *opt_set = optarg;
+ return 0;
+}
+
+static int handle_multistring(char *optarg, void *tgt, __unused struct prog_option *opt)
+{
+ struct multistring *opt_set = tgt;
+ void *ptr;
+
+ if (opt_set->num_strings +1 > SIZE_MAX / sizeof(*opt_set->strings))
+ return -ENOMEM;
+
+ ptr = realloc(opt_set->strings, sizeof(*opt_set->strings) * (opt_set->num_strings +1));
+
+ if (!ptr)
+ return -errno;
+
+ opt_set->strings = ptr;
+ opt_set->strings[opt_set->num_strings++] = optarg;
+ return 0;
+}
+
+static int handle_u32(char *optarg, void *tgt, __unused struct prog_option *opt)
+{
+ __u32 *opt_set = tgt;
+ unsigned long val;
+
+ errno = 0;
+ val = strtoul(optarg, NULL, 10);
+ if (errno || val > 0xffffffff)
+ return -EINVAL;
+
+ *opt_set = val;
+ return 0;
+}
+
+static int handle_u32_multi(char *optarg, void *tgt, struct prog_option *opt)
+{
+ struct u32_multi *opt_set = tgt;
+ __u32 val;
+ void *ptr;
+ int ret;
+
+ if (opt_set->num_vals +1 > SIZE_MAX / sizeof(*opt_set->vals))
+ return -ENOMEM;
+
+ ret = handle_u32(optarg, &val, opt);
+ if (ret)
+ return ret;
+
+ ptr = realloc(opt_set->vals, sizeof(*opt_set->vals) * (opt_set->num_vals +1));
+ if (!ptr)
+ return -errno;
+
+ opt_set->vals = ptr;
+ opt_set->vals[opt_set->num_vals++] = val;
+ return 0;
+}
+
+static int handle_u16(char *optarg, void *tgt, __unused struct prog_option *opt)
+{
+ __u16 *opt_set = tgt;
+ unsigned long val;
+
+ errno = 0;
+ val = strtoul(optarg, NULL, 10);
+ if (errno || val > 0xffff)
+ return -EINVAL;
+ *opt_set = val;
+ return 0;
+}
+
+static int parse_mac(char *str, unsigned char mac[ETH_ALEN])
+{
+ unsigned int v[ETH_ALEN];
+ int len, i;
+
+ /* Based on https://stackoverflow.com/a/20553913 */
+ len = sscanf(str, "%x:%x:%x:%x:%x:%x%*c",
+ &v[0], &v[1], &v[2], &v[3], &v[4], &v[5]);
+
+ if (len != ETH_ALEN)
+ return -EINVAL;
+
+ for (i = 0; i < ETH_ALEN; i++) {
+ if (v[i] > 0xFF)
+ return -EINVAL;
+ mac[i] = v[i];
+ }
+ return 0;
+}
+
+static int handle_macaddr(char *optarg, void *tgt, __unused struct prog_option *opt)
+{
+ struct mac_addr *opt_set = tgt;
+ int err;
+
+ err = parse_mac(optarg, opt_set->addr);
+ if (err)
+ pr_warn("Invalid MAC address: %s\n", optarg);
+
+ return err;
+}
+
+void print_macaddr(char *buf, size_t buf_len, const struct mac_addr *addr)
+{
+ int i, len;
+
+ for (i = 0; buf_len > 0 && i < ETH_ALEN; i++) {
+ len = snprintf(buf, buf_len, "%02x", addr->addr[i]);
+ if (len < 0 || (size_t)len >= buf_len)
+ break;
+
+ buf += len;
+ buf_len -= len;
+
+ if (i < ETH_ALEN - 1) {
+ *buf++ = ':';
+ buf_len -= 1;
+ }
+ }
+
+ *buf = '\0';
+}
+
+bool macaddr_is_null(const struct mac_addr *addr) {
+ static struct mac_addr nulladdr = {};
+
+ return memcmp(addr, &nulladdr, sizeof(nulladdr)) == 0;
+}
+
+static const struct flag_val *find_flag(const struct flag_val *flag_vals,
+ const char *chr)
+{
+ while (flag_vals->flagstring) {
+ if (strcmp(chr, flag_vals->flagstring) == 0)
+ return flag_vals;
+ flag_vals++;
+ }
+ return NULL;
+}
+
+static int handle_flags(char *optarg, void *tgt, struct prog_option *opt)
+{
+ const struct flag_val *flag, *flag_vals = opt->typearg;
+ unsigned int *opt_set = tgt;
+ unsigned int flagval = 0;
+ char *c = NULL;
+
+ while (*optarg) {
+ c = strchr(optarg, ',');
+ if (c)
+ *c = '\0';
+ flag = find_flag(flag_vals, optarg);
+ if (!flag)
+ return -EINVAL;
+ flagval |= flag->flagval;
+
+ if (!c)
+ break;
+ optarg = c + 1;
+ }
+ *opt_set = flagval;
+ return 0;
+}
+
+static int get_ifindex(const char *ifname)
+{
+ int ifindex;
+
+ ifindex = if_nametoindex(ifname);
+ if (!ifindex) {
+ pr_warn("Couldn't find network interface '%s'.\n", ifname);
+ return -ENOENT;
+ }
+ return ifindex;
+}
+
+static int handle_ifname(char *optarg, void *tgt, __unused struct prog_option *opt)
+{
+ struct iface *iface = tgt;
+ int ifindex;
+
+ ifindex = get_ifindex(optarg);
+ if (ifindex < 0)
+ return ifindex;
+
+ iface->ifname = optarg;
+ iface->ifindex = ifindex;
+ return 0;
+}
+
+static int handle_ifname_multi(char *optarg, void *tgt, __unused struct prog_option *opt)
+{
+ struct iface **ifaces = tgt;
+ struct iface *iface, *tmp;
+ int ifindex;
+
+ ifindex = get_ifindex(optarg);
+ if (ifindex < 0)
+ return ifindex;
+
+ iface = calloc(sizeof(*iface), 1);
+ if (!iface)
+ return -ENOMEM;
+
+ iface->ifname = optarg;
+ iface->ifindex = ifindex;
+
+ if (!*ifaces) {
+ *ifaces = iface;
+ return 0;
+ }
+
+ tmp = *ifaces;
+ while(tmp->next)
+ tmp = tmp->next;
+
+ tmp->next = iface;
+ return 0;
+}
+
+void print_addr(char *buf, size_t buf_len, const struct ip_addr *addr)
+{
+ inet_ntop(addr->af, &addr->addr, buf, buf_len);
+}
+
+bool ipaddr_is_null(const struct ip_addr *addr) {
+ static struct ip_addr nulladdr = {};
+
+ return memcmp(addr, &nulladdr, sizeof(nulladdr)) == 0;
+}
+
+static int handle_ipaddr(char *optarg, void *tgt, __unused struct prog_option *opt)
+{
+ struct ip_addr *addr = tgt;
+ int af;
+
+ af = strchr(optarg, ':') ? AF_INET6 : AF_INET;
+
+ if (inet_pton(af, optarg, &addr->addr) != 1) {
+ pr_warn("Invalid IP address: %s\n", optarg);
+ return -ENOENT; /* caller won't print error on ENOENT */
+ }
+
+ addr->af = af;
+ return 0;
+}
+
+static const struct enum_val *find_enum(const struct enum_val *enum_vals,
+ const char *chr)
+{
+ while (enum_vals->name) {
+ if (strcmp(chr, enum_vals->name) == 0)
+ return enum_vals;
+ enum_vals++;
+ }
+ return NULL;
+}
+
+static int handle_enum(char *optarg, void *tgt, struct prog_option *opt)
+{
+ const struct enum_val *val, *all_vals = opt->typearg;
+ unsigned int *opt_set = tgt;
+
+ val = find_enum(all_vals, optarg);
+ if (!val)
+ return -EINVAL;
+ *opt_set = val->value;
+ return 0;
+}
+
+static void print_enum_vals(char *buf, size_t buf_len,
+ const struct enum_val *vals)
+{
+ const struct enum_val *val;
+ bool first = true;
+
+ for (val = vals; buf_len && val->name; val++) {
+ int len;
+
+ if (!first) {
+ *buf++ = ',';
+ buf_len--;
+ }
+ first = false;
+
+ len = snprintf(buf, buf_len, "%s", val->name);
+ if (len < 0 || (size_t)len >= buf_len)
+ break;
+ buf += len;
+ buf_len -= len;
+ }
+ *buf = '\0';
+}
+
+const char *get_enum_name(const struct enum_val *vals, unsigned int value)
+{
+ const struct enum_val *val;
+
+ for (val = vals; val->name; val++)
+ if (val->value == value)
+ return val->name;
+ return NULL;
+}
+
+static const struct opthandler {
+ int (*func)(char *optarg, void *tgt, struct prog_option *opt);
+} handlers[__OPT_MAX] = {
+ {NULL},
+ {handle_bool},
+ {handle_flags},
+ {handle_string},
+ {handle_u16},
+ {handle_u32},
+ {handle_u32_multi},
+ {handle_macaddr},
+ {handle_ifname},
+ {handle_ifname_multi},
+ {handle_ipaddr},
+ {handle_enum},
+ {handle_multistring}
+};
+
+void print_flags(char *buf, size_t buf_len, const struct flag_val *flags,
+ unsigned long flags_set)
+{
+ const struct flag_val *flag;
+ bool first = true;
+
+ for (flag = flags; buf_len && flag->flagstring; flag++) {
+ int len;
+
+ if (!(flag->flagval & flags_set))
+ continue;
+
+ if (!first) {
+ *buf++ = ',';
+ buf_len--;
+ }
+ first = false;
+ len = snprintf(buf, buf_len, "%s", flag->flagstring);
+ if (len < 0 || (size_t)len >= buf_len)
+ break;
+ buf += len;
+ buf_len -= len;
+ }
+ *buf = '\0';
+}
+
+static void print_help_flags(const struct prog_option *opt)
+{
+ char buf[100] = {};
+
+ if (!opt->typearg)
+ pr_warn("Missing typearg for opt %s\n", opt->name);
+ else
+ print_flags(buf, sizeof(buf), opt->typearg, -1);
+
+ printf(" %s (valid values: %s)", opt->help, buf);
+}
+
+static void print_help_enum(const struct prog_option *opt)
+{
+ char buf[100] = {};
+
+ if (!opt->typearg)
+ pr_warn("Missing typearg for opt %s\n", opt->name);
+ else
+ print_enum_vals(buf, sizeof(buf), opt->typearg);
+
+ printf(" %s (valid values: %s)", opt->help, buf);
+}
+
+static const struct helprinter {
+ void (*func)(const struct prog_option *opt);
+} help_printers[__OPT_MAX] = {
+ {NULL},
+ {NULL},
+ {print_help_flags},
+ {NULL},
+ {NULL},
+ {NULL},
+ {NULL},
+ {NULL},
+ {NULL},
+ {NULL},
+ {NULL},
+ {print_help_enum},
+ {NULL}
+};
+
+
+static void _print_positional(const struct prog_option *long_options)
+{
+ const struct prog_option *opt;
+
+ FOR_EACH_OPTION (long_options, opt) {
+ if (!opt->positional)
+ continue;
+
+ printf(" %s", opt->metavar ?: opt->name);
+ }
+}
+
+static void _print_options(const struct prog_option *poptions, bool required)
+{
+ const struct prog_option *opt;
+
+ FOR_EACH_OPTION (poptions, opt) {
+ if (opt->required != required)
+ continue;
+
+ if (opt->positional) {
+ printf(" %-30s", opt->metavar ?: opt->name);
+ } else {
+ char buf[BUFSIZE];
+ int pos;
+
+ if (opt->short_opt >= FIRST_PRINTABLE)
+ printf(" -%c,", opt->short_opt);
+ else
+ printf(" ");
+ pos = snprintf(buf, BUFSIZE, " --%s", opt->name);
+ if (pos < 0 || pos >= BUFSIZE) {
+ pr_warn("opt name too long: %s\n", opt->name);
+ continue;
+ }
+ if (opt->metavar)
+ snprintf(&buf[pos], BUFSIZE - pos, " %s",
+ opt->metavar);
+ printf("%-28s", buf);
+ }
+
+ if (help_printers[opt->type].func != NULL)
+ help_printers[opt->type].func(opt);
+ else if (opt->help)
+ printf(" %s", opt->help);
+ printf("\n");
+ }
+}
+
+bool is_prefix(const char *pfx, const char *str)
+{
+ if (!pfx)
+ return false;
+ if (strlen(str) < strlen(pfx))
+ return false;
+
+ return !memcmp(str, pfx, strlen(pfx));
+}
+
+void usage(const char *prog_name, const char *doc,
+ const struct prog_option *poptions, bool full)
+{
+ const struct prog_option *opt;
+ int num_req = 0;
+
+ printf("\nUsage: %s [options]", prog_name);
+ _print_positional(poptions);
+ printf("\n");
+
+ if (!full) {
+ printf("Use --help (or -h) to see full option list.\n");
+ return;
+ }
+
+ FOR_EACH_OPTION (poptions, opt)
+ if (opt->required)
+ num_req++;
+
+ printf("\n %s\n\n", doc);
+ if (num_req) {
+ printf("Required parameters:\n");
+ _print_options(poptions, true);
+ printf("\n");
+ }
+ printf("Options:\n");
+ _print_options(poptions, false);
+ printf(" -v, --verbose Enable verbose logging (-vv: more verbose)\n");
+ printf(" --version Display version information\n");
+ printf(" -h, --help Show this help\n");
+ printf("\n");
+}
+
+static int prog_options_to_options(struct prog_option *poptions,
+ struct option **options, char **optstring)
+{
+ int num = 0, num_cmn = 0, n_sopt = VERSION_SHORT_OPT + 1;
+ struct option *new_options, *nopt;
+ struct prog_option *opt;
+ char buf[100], *c = buf;
+
+ struct option common_opts[] = {
+ {"help", no_argument, NULL, 'h'},
+ {"verbose", no_argument, NULL, 'v'},
+ {"version", no_argument, NULL, VERSION_SHORT_OPT},
+ {}
+ };
+
+ for (nopt = common_opts; nopt->name; nopt++) {
+ num++;
+ num_cmn++;
+ if (nopt->val != VERSION_SHORT_OPT)
+ *c++ = nopt->val;
+ }
+
+ FOR_EACH_OPTION (poptions, opt)
+ if (!opt->positional)
+ num++;
+
+ new_options = calloc(num + 1, sizeof(struct option));
+ if (!new_options)
+ return -ENOMEM;
+
+ memcpy(new_options, &common_opts, sizeof(struct option) * num_cmn);
+ nopt = new_options + num_cmn;
+
+ FOR_EACH_OPTION (poptions, opt) {
+ if (opt->positional)
+ continue;
+ if (opt->short_opt) {
+ *(c++) = opt->short_opt;
+ if (opt_needs_arg(opt))
+ *(c++) = ':';
+ } else {
+ /* getopt expects options to have unique values in the
+ * 'val' field, however we want to be able to define
+ * options that don't have a short opt. So get around
+ * that, just number such options sequentially.
+ */
+ if (n_sopt >= FIRST_PRINTABLE) {
+ pr_warn("Too many options with no short opt\n");
+ goto err;
+ }
+ opt->short_opt = n_sopt++;
+ }
+ nopt->has_arg = opt_needs_arg(opt) ? required_argument : no_argument;
+ nopt->name = opt->name;
+ nopt->val = opt->short_opt;
+ nopt->flag = NULL;
+ nopt++;
+ }
+ *(c++) = '\0';
+
+ *optstring = strdup(buf);
+ if (!*optstring)
+ goto err;
+
+ /* Make sure we clear the last option, or else we crash. */
+ memset(new_options + num, 0, sizeof(struct option));
+
+ *options = new_options;
+ return 0;
+
+err:
+ free(new_options);
+ return -EINVAL;
+}
+
+static struct prog_option *find_opt(struct prog_option *all_opts, int optchar)
+{
+ struct prog_option *opt;
+
+ FOR_EACH_OPTION (all_opts, opt)
+ if (opt->short_opt == optchar)
+ return opt;
+ return NULL;
+}
+
+static int _set_opt(void *cfg, struct prog_option *opt, char *optarg)
+{
+ int ret;
+
+ if (opt->max_num && opt->num_set + 1 > opt->max_num) {
+ pr_warn("Too many parameters for %s (max %u)\n",
+ opt->metavar ?: opt->name, opt->max_num);
+ return -E2BIG;
+ }
+
+ ret = handlers[opt->type].func(optarg, (cfg + opt->cfg_offset), opt);
+ if (!ret)
+ opt->num_set++;
+ else if (ret != -ENOENT)
+ pr_warn("Couldn't parse option %s: %s.\n", opt->name, strerror(-ret));
+ return ret;
+}
+
+static int set_opt(void *cfg, struct prog_option *all_opts, int optchar,
+ char *optarg)
+{
+ struct prog_option *opt;
+
+ if (!cfg)
+ return -EFAULT;
+
+ opt = find_opt(all_opts, optchar);
+ if (!opt)
+ return -ENOENT;
+
+ return _set_opt(cfg, opt, optarg);
+}
+
+static int set_pos_opt(void *cfg, struct prog_option *all_opts, char *optarg)
+{
+ struct prog_option *o, *opt = NULL;
+
+ FOR_EACH_OPTION (all_opts, o) {
+ if (o->positional && (!o->num_set || opt_is_multi(o))) {
+ opt = o;
+ break;
+ }
+ }
+
+ if (!opt)
+ return -ENOENT;
+
+ return _set_opt(cfg, opt, optarg);
+}
+
+int parse_cmdline_args(int argc, char **argv, struct prog_option *poptions,
+ void *cfg, const char *prog, const char *usage_cmd,
+ const char *doc, const void *defaults)
+{
+ struct prog_option *opt_iter;
+ struct option *long_options;
+ bool full_help = false;
+ int i, opt, err = 0;
+ int longindex = 0;
+ char *optstring;
+
+ if (prog_options_to_options(poptions, &long_options, &optstring)) {
+ pr_warn("Unable to malloc()\n");
+ return -ENOMEM;
+ }
+
+ /* Parse commands line args */
+ while ((opt = getopt_long(argc, argv, optstring,
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ case 'h':
+ usage(usage_cmd, doc, poptions, true);
+ err = EXIT_FAILURE;
+ goto out;
+ case 'v':
+ increase_log_level();
+ break;
+ case VERSION_SHORT_OPT:
+ printf("%s version %s using libbpf version %s\n",
+ prog,
+ TOOLS_VERSION,
+ get_libbpf_version());
+ err = EXIT_FAILURE;
+ goto out;
+ default:
+ if (set_opt(cfg, poptions, opt, optarg)) {
+ usage(prog, doc, poptions, full_help);
+ err = EXIT_FAILURE;
+ goto out;
+ }
+ break;
+ }
+ }
+
+ for (i = optind; i < argc; i++) {
+ if (set_pos_opt(cfg, poptions, argv[i])) {
+ usage(usage_cmd, doc, poptions, full_help);
+ err = EXIT_FAILURE;
+ goto out;
+ }
+ }
+
+ FOR_EACH_OPTION (poptions, opt_iter) {
+ if (opt_iter->num_set && (!opt_iter->min_num ||
+ opt_iter->num_set >= opt_iter->min_num))
+ continue;
+
+ if (opt_iter->required) {
+ if (opt_iter->positional)
+ pr_warn("Missing required parameter %s\n",
+ opt_iter->metavar ?: opt_iter->name);
+ else
+ pr_warn("Missing required option '--%s'\n",
+ opt_iter->name);
+ usage(prog, doc, poptions, full_help);
+ err = EXIT_FAILURE;
+ goto out;
+ } else if (defaults) {
+ void *dst = cfg + opt_iter->cfg_offset;
+ const void *src = defaults + opt_iter->cfg_offset;
+
+ memcpy(dst, src, opt_iter->opt_size);
+ }
+ }
+out:
+ free(long_options);
+ free(optstring);
+
+ return err;
+}
+
+int dispatch_commands(const char *argv0, int argc, char **argv,
+ const struct prog_command *cmds, size_t cfg_size,
+ const char *prog_name, bool needs_bpffs)
+{
+ const struct prog_command *c, *cmd = NULL;
+ int ret = EXIT_FAILURE, err, len;
+ char pin_root_path[PATH_MAX];
+ char usagebuf[100];
+ void *cfg;
+
+ for (c = cmds; c->name; c++) {
+ if (is_prefix(argv0, c->name)) {
+ cmd = c;
+ break;
+ }
+ }
+
+ if (!cmd) {
+ pr_warn("Command '%s' is unknown, try '%s help'.\n",
+ argv0, prog_name);
+ return EXIT_FAILURE;
+ }
+
+ if (cmd->no_cfg)
+ return cmd->func(NULL, NULL);
+
+ cfg = calloc(1, cfg_size);
+ if (!cfg) {
+ pr_warn("Couldn't allocate memory\n");
+ return EXIT_FAILURE;
+ }
+
+ len = snprintf(usagebuf, sizeof(usagebuf), "%s %s", prog_name, cmd->name);
+ if (len < 0 || (size_t)len >= sizeof(usagebuf))
+ goto out;
+
+ err = parse_cmdline_args(argc, argv, cmd->options, cfg, prog_name, usagebuf,
+ cmd->doc, cmd->default_cfg);
+ if (err)
+ goto out;
+
+ err = get_bpf_root_dir(pin_root_path, sizeof(pin_root_path), prog_name,
+ needs_bpffs);
+ if (err && needs_bpffs)
+ goto out;
+
+ err = check_bpf_environ();
+ if (err)
+ goto out;
+
+ if (prog_lock_get(prog_name))
+ goto out;
+
+ ret = cmd->func(cfg, pin_root_path);
+ prog_lock_release(0);
+out:
+ free(cfg);
+ return ret;
+}
diff --git a/lib/util/params.h b/lib/util/params.h
new file mode 100644
index 0000000..fa77964
--- /dev/null
+++ b/lib/util/params.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __PARAMS_H
+#define __PARAMS_H
+
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/if_ether.h>
+#include <bpf/libbpf.h>
+
+enum option_type {
+ OPT_NONE,
+ OPT_BOOL,
+ OPT_FLAGS,
+ OPT_STRING,
+ OPT_U16,
+ OPT_U32,
+ OPT_U32_MULTI,
+ OPT_MACADDR,
+ OPT_IFNAME,
+ OPT_IFNAME_MULTI,
+ OPT_IPADDR,
+ OPT_ENUM,
+ OPT_MULTISTRING,
+ __OPT_MAX
+};
+
+struct prog_option {
+ enum option_type type;
+ size_t cfg_size;
+ size_t cfg_offset;
+ size_t opt_size;
+ char *name;
+ char short_opt;
+ char *help;
+ char *metavar;
+ void *typearg;
+ bool required;
+ bool positional;
+ unsigned int min_num;
+ unsigned int max_num;
+ unsigned int num_set;
+};
+
+struct flag_val {
+ const char *flagstring;
+ unsigned int flagval;
+};
+
+struct enum_val {
+ const char *name;
+ unsigned int value;
+};
+
+struct multistring {
+ const char **strings;
+ size_t num_strings;
+};
+
+struct u32_multi {
+ __u32 *vals;
+ size_t num_vals;
+};
+
+struct iface {
+ struct iface *next;
+ char *ifname;
+ int ifindex;
+};
+
+struct ip_addr {
+ int af;
+ union {
+ struct in_addr addr4;
+ struct in6_addr addr6;
+ } addr;
+};
+
+struct mac_addr {
+ unsigned char addr[ETH_ALEN];
+};
+
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+
+#define DEFINE_OPTION(_name, _type, _cfgtype, _cfgmember, ...) \
+ { \
+ .cfg_size = sizeof(_cfgtype), \
+ .opt_size = sizeof_field(_cfgtype, _cfgmember), \
+ .cfg_offset = offsetof(_cfgtype, _cfgmember), .name = _name, \
+ .type = _type, __VA_ARGS__ \
+ }
+
+#define END_OPTIONS \
+ { \
+ }
+
+#define FOR_EACH_OPTION(_options, _opt) \
+ for (_opt = _options; _opt->type != OPT_NONE; _opt++)
+
+struct prog_command {
+ const char *name;
+ int (*func)(const void *cfg, const char *pin_root_path);
+ struct prog_option *options;
+ const void *default_cfg;
+ char *doc;
+ bool no_cfg;
+};
+
+#define DEFINE_COMMAND_NAME(_name, _func, _doc) \
+ { \
+ .name = _name, .func = do_##_func, \
+ .options = _func##_options, .default_cfg = &defaults_##_func, \
+ .doc = _doc \
+ }
+#define DEFINE_COMMAND(_name, _doc) DEFINE_COMMAND_NAME(textify(_name), _name, _doc)
+
+#define DEFINE_COMMAND_NODEF(_name, _doc) \
+ { \
+ .name = textify(_name), .func = do_##_name, \
+ .options = _name##_options, .doc = _doc \
+ }
+
+#define END_COMMANDS \
+ { \
+ }
+
+const char *get_enum_name(const struct enum_val *vals, unsigned int value);
+void print_flags(char *buf, size_t buf_len, const struct flag_val *flags,
+ unsigned long flags_val);
+void print_addr(char *buf, size_t buf_len, const struct ip_addr *addr);
+void print_macaddr(char *buf, size_t buf_len, const struct mac_addr *addr);
+bool macaddr_is_null(const struct mac_addr *addr);
+bool ipaddr_is_null(const struct ip_addr *addr);
+bool is_prefix(const char *prefix, const char *string);
+void usage(const char *prog_name, const char *doc,
+ const struct prog_option *long_options, bool full);
+
+int parse_cmdline_args(int argc, char **argv, struct prog_option *long_options,
+ void *cfg, const char *prog, const char *usage_cmd,
+ const char *doc, const void *defaults);
+
+int dispatch_commands(const char *argv0, int argc, char **argv,
+ const struct prog_command *cmds, size_t cfg_size,
+ const char *prog_name, bool needs_bpffs);
+
+#endif /* __COMMON_PARAMS_H */
diff --git a/lib/util/stats.c b/lib/util/stats.c
new file mode 100644
index 0000000..f04c968
--- /dev/null
+++ b/lib/util/stats.c
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <locale.h>
+#include <unistd.h>
+#include <time.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "stats.h"
+#include "util.h"
+#include "logging.h"
+
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+static int gettime(__u64 *nstime)
+{
+ struct timespec t;
+ int res;
+
+ res = clock_gettime(CLOCK_MONOTONIC, &t);
+ if (res < 0) {
+ pr_warn("Error with gettimeofday! (%i)\n", res);
+ return res;
+ }
+
+ *nstime = (__u64)t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+ return 0;
+}
+
+static double calc_period(struct record *r, struct record *p)
+{
+ double period_ = 0;
+ __u64 period = 0;
+
+ period = r->timestamp - p->timestamp;
+ if (period > 0)
+ period_ = ((double)period / NANOSEC_PER_SEC);
+
+ return period_;
+}
+
+int stats_print_one(struct stats_record *stats_rec)
+{
+ __u64 packets, bytes;
+ struct record *rec;
+ int i, err;
+
+ /* Print for each XDP actions stats */
+ for (i = 0; i < XDP_ACTION_MAX; i++) {
+ char *fmt = " %-35s %'11lld pkts %'11lld KiB\n";
+ const char *action = action2str(i);
+
+ rec = &stats_rec->stats[i];
+ packets = rec->total.rx_packets;
+ bytes = rec->total.rx_bytes;
+
+ if (rec->enabled) {
+ err = printf(fmt, action, packets, bytes / 1024);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int stats_print(struct stats_record *stats_rec, struct stats_record *stats_prev)
+{
+ struct record *rec, *prev;
+ __u64 packets, bytes;
+ struct timespec t;
+ bool first = true;
+ double period;
+ double pps; /* packets per sec */
+ double bps; /* bits per sec */
+ int i, err;
+
+ err = clock_gettime(CLOCK_REALTIME, &t);
+ if (err < 0) {
+ pr_warn("Error with gettimeofday! (%i)\n", err);
+ return err;
+ }
+
+ /* Print for each XDP actions stats */
+ for (i = 0; i < XDP_ACTION_MAX; i++) {
+ char *fmt = "%-12s %'11lld pkts (%'10.0f pps)"
+ " %'11lld KiB (%'6.0f Mbits/s)\n";
+ const char *action = action2str(i);
+
+ rec = &stats_rec->stats[i];
+ prev = &stats_prev->stats[i];
+
+ if (!rec->enabled)
+ continue;
+
+ packets = rec->total.rx_packets - prev->total.rx_packets;
+ bytes = rec->total.rx_bytes - prev->total.rx_bytes;
+
+ period = calc_period(rec, prev);
+ if (period == 0)
+ return 0;
+
+ if (first) {
+ printf("Period of %fs ending at %ld.%06ld\n", period,
+ (long) t.tv_sec, (long) t.tv_nsec / 1000);
+ first = false;
+ }
+
+ pps = packets / period;
+
+ bps = (bytes * 8) / period / 1000000;
+
+ printf(fmt, action, rec->total.rx_packets, pps,
+ rec->total.rx_bytes / 1024, bps, period);
+ }
+ printf("\n");
+
+ return 0;
+}
+
+/* BPF_MAP_TYPE_ARRAY */
+static int map_get_value_array(int fd, __u32 key, struct xdp_stats_record *value)
+{
+ int err = 0;
+
+ err = bpf_map_lookup_elem(fd, &key, value);
+ if (err)
+ pr_debug("bpf_map_lookup_elem failed key:0x%X\n", key);
+
+ return err;
+}
+
+/* BPF_MAP_TYPE_PERCPU_ARRAY */
+static int map_get_value_percpu_array(int fd, __u32 key, struct xdp_stats_record *value)
+{
+ /* For percpu maps, userspace gets a value per possible CPU */
+ int nr_cpus = libbpf_num_possible_cpus();
+ struct xdp_stats_record *values;
+ __u64 sum_bytes = 0;
+ __u64 sum_pkts = 0;
+ int i, err;
+
+ if (nr_cpus < 0)
+ return nr_cpus;
+
+ values = calloc(nr_cpus, sizeof(*values));
+ if (!values)
+ return -ENOMEM;
+
+ err = bpf_map_lookup_elem(fd, &key, values);
+ if (err) {
+ pr_debug("bpf_map_lookup_elem failed key:0x%X\n", key);
+ goto out;
+ }
+
+ /* Sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ sum_pkts += values[i].rx_packets;
+ sum_bytes += values[i].rx_bytes;
+ }
+ value->rx_packets = sum_pkts;
+ value->rx_bytes = sum_bytes;
+out:
+ free(values);
+ return err;
+}
+
+static int map_collect(int fd, __u32 map_type, __u32 key, struct record *rec)
+{
+ struct xdp_stats_record value = {};
+ int err;
+
+ /* Get time as close as possible to reading map contents */
+ err = gettime(&rec->timestamp);
+ if (err)
+ return err;
+
+ switch (map_type) {
+ case BPF_MAP_TYPE_ARRAY:
+ err = map_get_value_array(fd, key, &value);
+ break;
+ case BPF_MAP_TYPE_PERCPU_ARRAY:
+ err = map_get_value_percpu_array(fd, key, &value);
+ break;
+ default:
+ pr_warn("Unknown map_type: %u cannot handle\n", map_type);
+ err = -EINVAL;
+ break;
+ }
+
+ if (err)
+ return err;
+
+ rec->total.rx_packets = value.rx_packets;
+ rec->total.rx_bytes = value.rx_bytes;
+ return 0;
+}
+
+int stats_collect(int map_fd, __u32 map_type, struct stats_record *stats_rec)
+{
+ /* Collect all XDP actions stats */
+ __u32 key;
+ int err;
+
+ for (key = 0; key < XDP_ACTION_MAX; key++) {
+ if (!stats_rec->stats[key].enabled)
+ continue;
+
+ err = map_collect(map_fd, map_type, key,
+ &stats_rec->stats[key]);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int check_map_pin(__u32 map_id, const char *pin_dir, const char *map_name)
+{
+ struct bpf_map_info info = {};
+ int fd, ret = 0;
+
+ fd = get_pinned_map_fd(pin_dir, map_name, &info);
+ if (fd < 0) {
+ if (fd == -ENOENT)
+ pr_warn("Stats map disappeared while polling\n");
+ else
+ pr_warn("Unable to re-open stats map\n");
+ return fd;
+ }
+
+ if (info.id != map_id) {
+ pr_warn("Stats map ID changed while polling\n");
+ ret = -EINVAL;
+ }
+ close(fd);
+
+ return ret;
+}
+
+int stats_poll(int map_fd, int interval, bool *exit,
+ const char *pin_dir, const char *map_name)
+{
+ struct bpf_map_info info = {};
+ struct stats_record prev, record = { 0 };
+ __u32 info_len = sizeof(info);
+ __u32 map_type, map_id;
+ int err;
+
+ record.stats[XDP_DROP].enabled = true;
+ record.stats[XDP_PASS].enabled = true;
+ record.stats[XDP_REDIRECT].enabled = true;
+ record.stats[XDP_TX].enabled = true;
+
+ if (!interval)
+ return -EINVAL;
+
+ err = bpf_obj_get_info_by_fd(map_fd, &info, &info_len);
+ if (err)
+ return -errno;
+ map_type = info.type;
+ map_id = info.id;
+
+ /* Get initial reading quickly */
+ stats_collect(map_fd, map_type, &record);
+
+ usleep(1000000 / 4);
+
+ while (!*exit) {
+ if (pin_dir) {
+ err = check_map_pin(map_id, pin_dir, map_name);
+ if (err)
+ return err;
+ }
+
+ memset(&info, 0, sizeof(info));
+ prev = record; /* struct copy */
+ stats_collect(map_fd, map_type, &record);
+ err = stats_print(&record, &prev);
+ if (err)
+ return err;
+ usleep(interval * 1000);
+ }
+
+ return 0;
+}
diff --git a/lib/util/stats.h b/lib/util/stats.h
new file mode 100644
index 0000000..9ee0cad
--- /dev/null
+++ b/lib/util/stats.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __STATS_H
+#define __STATS_H
+
+#include <bpf/libbpf.h>
+
+#include "xdp/xdp_stats_kern_user.h"
+
+struct record {
+ __u64 timestamp;
+ bool enabled;
+ struct xdp_stats_record total; /* defined in common_kern_user.h */
+};
+
+struct stats_record {
+ struct record stats[XDP_ACTION_MAX];
+};
+
+int stats_print_one(struct stats_record *stats_rec);
+int stats_print(struct stats_record *stats_rec,
+ struct stats_record *stats_prev);
+int stats_collect(int map_fd, __u32 map_type, struct stats_record *stats_rec);
+int stats_poll(int map_fd, int interval, bool *exit, const char *pin_dir, const char *map_name);
+
+#endif
diff --git a/lib/util/util.c b/lib/util/util.c
new file mode 100644
index 0000000..70c5d18
--- /dev/null
+++ b/lib/util/util.c
@@ -0,0 +1,946 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/vfs.h>
+#include <sys/stat.h>
+#include <linux/if_link.h> /* Need XDP flags */
+#include <linux/magic.h> /* BPF FS magic */
+#include <linux/err.h> /* ERR_PTR */
+#include <bpf/bpf.h>
+#include <dirent.h>
+#include <net/if.h>
+
+#include "util.h"
+#include "logging.h"
+
+static struct enum_val xdp_modes[] = {
+ {"native", XDP_MODE_NATIVE},
+ {"skb", XDP_MODE_SKB},
+ {"hw", XDP_MODE_HW},
+ {"unspecified", XDP_MODE_UNSPEC},
+ {NULL, 0}
+};
+
+int try_snprintf(char *buf, size_t buf_len, const char *format, ...)
+{
+ va_list args;
+ int len;
+
+ va_start(args, format);
+ len = vsnprintf(buf, buf_len, format, args);
+ va_end(args);
+
+ if (len < 0)
+ return -EINVAL;
+ else if ((size_t)len >= buf_len)
+ return -ENAMETOOLONG;
+
+ return 0;
+}
+
+static int set_rlimit(unsigned int min_limit)
+{
+ struct rlimit limit;
+ int err = 0;
+
+ err = getrlimit(RLIMIT_MEMLOCK, &limit);
+ if (err) {
+ err = -errno;
+ pr_warn("Couldn't get current rlimit\n");
+ return err;
+ }
+
+ if (limit.rlim_cur == RLIM_INFINITY || limit.rlim_cur == 0) {
+ pr_debug("Current rlimit is infinity or 0. Not raising\n");
+ return -ENOMEM;
+ }
+
+ if (min_limit) {
+ if (limit.rlim_cur >= min_limit) {
+ pr_debug("Current rlimit %ju already >= minimum %u\n",
+ (uintmax_t)limit.rlim_cur, min_limit);
+ return 0;
+ }
+ pr_debug("Setting rlimit to minimum %u\n", min_limit);
+ limit.rlim_cur = min_limit;
+ } else {
+ pr_debug("Doubling current rlimit of %ju\n", (uintmax_t)limit.rlim_cur);
+ limit.rlim_cur <<= 1;
+ }
+ limit.rlim_max = max(limit.rlim_cur, limit.rlim_max);
+
+ err = setrlimit(RLIMIT_MEMLOCK, &limit);
+ if (err) {
+ err = -errno;
+ pr_warn("Couldn't raise rlimit: %s\n", strerror(-err));
+ return err;
+ }
+
+ return 0;
+}
+
+int double_rlimit(void)
+{
+ pr_debug("Permission denied when loading eBPF object; "
+ "raising rlimit and retrying\n");
+
+ return set_rlimit(0);
+}
+
+static const char *_libbpf_compile_version = LIBBPF_VERSION;
+static char _libbpf_version[10] = {};
+
+const char *get_libbpf_version(void)
+{
+ /* Start by copying compile-time version into buffer so we have a
+ * fallback value in case we are dynamically linked, or can't find a
+ * version in /proc/self/maps below.
+ */
+ strncpy(_libbpf_version, _libbpf_compile_version,
+ sizeof(_libbpf_version)-1);
+
+#ifdef LIBBPF_DYNAMIC
+ char path[PATH_MAX], buf[PATH_MAX], *s;
+ bool found = false;
+ FILE *fp;
+
+ /* When dynamically linking against libbpf, we can't be sure that the
+ * version we discovered at compile time is actually the one we are
+ * using at runtime. This can lead to hard-to-debug errors, so we try to
+ * discover the correct version at runtime.
+ *
+ * The simple solution to this would be if libbpf itself exported a
+ * version in its API. But since it doesn't, we work around this by
+ * parsing the mappings of the binary at runtime, looking for the full
+ * filename of libbpf.so and using that.
+ */
+ fp = fopen("/proc/self/maps", "r");
+ if (fp == NULL)
+ goto out;
+
+ while ((s = fgets(buf, sizeof(buf), fp)) != NULL) {
+ /* We are looking for a line like:
+ * 7f63c2105000-7f63c2106000 rw-p 00032000 fe:02 4200947 /usr/lib/libbpf.so.0.1.0
+ */
+ if (sscanf(s, "%*x-%*x %*4c %*x %*5c %*d %s\n", path) == 1 &&
+ (s = strstr(path, "libbpf.so.")) != NULL) {
+ strncpy(_libbpf_version, s+10, sizeof(_libbpf_version)-1);
+ found = true;
+ break;
+ }
+ }
+
+ fclose(fp);
+out:
+ if (!found)
+ pr_warn("Couldn't find runtime libbpf version - falling back to compile-time value!\n");
+
+#endif
+ _libbpf_version[sizeof(_libbpf_version)-1] = '\0';
+ return _libbpf_version;
+}
+
+int find_bpf_file(char *buf, size_t buf_size, const char *progname)
+{
+ static char *bpf_obj_paths[] = {
+#ifdef DEBUG
+ ".",
+#endif
+ BPF_OBJECT_PATH,
+ NULL
+ };
+ struct stat sb = {};
+ char **path;
+ int err;
+
+ for (path = bpf_obj_paths; *path; path++) {
+ err = try_snprintf(buf, buf_size, "%s/%s", *path, progname);
+ if (err)
+ return err;
+
+ pr_debug("Looking for '%s'\n", buf);
+ err = stat(buf, &sb);
+ if (err)
+ continue;
+
+ return 0;
+ }
+
+ pr_warn("Couldn't find a BPF file with name %s\n", progname);
+ return -ENOENT;
+}
+
+struct bpf_object *open_bpf_file(const char *progname,
+ struct bpf_object_open_opts *opts)
+{
+ char buf[PATH_MAX];
+ int err;
+
+ err = find_bpf_file(buf, sizeof(buf), progname);
+ if (err)
+ return ERR_PTR(err);
+
+ pr_debug("Loading bpf file '%s' from '%s'\n", progname, buf);
+ return bpf_object__open_file(buf, opts);
+}
+
+static int get_pinned_object_fd(const char *path, void *info, __u32 *info_len)
+{
+ char errmsg[STRERR_BUFSIZE];
+ int pin_fd, err;
+
+ pin_fd = bpf_obj_get(path);
+ if (pin_fd < 0) {
+ err = -errno;
+ libbpf_strerror(-err, errmsg, sizeof(errmsg));
+ pr_debug("Couldn't retrieve pinned object '%s': %s\n", path, errmsg);
+ return err;
+ }
+
+ if (info) {
+ err = bpf_obj_get_info_by_fd(pin_fd, info, info_len);
+ if (err) {
+ err = -errno;
+ libbpf_strerror(-err, errmsg, sizeof(errmsg));
+ pr_debug("Couldn't retrieve object info: %s\n", errmsg);
+ return err;
+ }
+ }
+
+ return pin_fd;
+}
+
+int make_dir_subdir(const char *parent, const char *dir)
+{
+ char path[PATH_MAX];
+ int err;
+
+ err = try_snprintf(path, sizeof(path), "%s/%s", parent, dir);
+ if (err)
+ return err;
+
+ err = mkdir(parent, S_IRWXU);
+ if (err && errno != EEXIST) {
+ err = -errno;
+ return err;
+ }
+
+ err = mkdir(path, S_IRWXU);
+ if (err && errno != EEXIST) {
+ err = -errno;
+ return err;
+ }
+
+ return 0;
+}
+
+int attach_xdp_program(struct xdp_program *prog, const struct iface *iface,
+ enum xdp_attach_mode mode, const char *pin_root_path)
+{
+ char pin_path[PATH_MAX];
+ int err = 0;
+
+ if (!prog || !pin_root_path)
+ return -EINVAL;
+
+ err = make_dir_subdir(pin_root_path, "programs");
+ if (err) {
+ pr_warn("Unable to create pin directory: %s\n", strerror(-err));
+ return err;
+ }
+
+ err = try_snprintf(pin_path, sizeof(pin_path), "%s/programs/%s/%s",
+ pin_root_path, iface->ifname,
+ xdp_program__name(prog));
+ if (err)
+ return err;
+
+ err = xdp_program__attach(prog, iface->ifindex, mode, 0);
+ if (err) {
+ if (pin_root_path && err != -EEXIST)
+ unlink(pin_path);
+ return err;
+ }
+
+ pr_debug("Program '%s' loaded on interface '%s'%s\n",
+ xdp_program__name(prog), iface->ifname,
+ mode == XDP_MODE_SKB ? " in skb mode" : "");
+
+ err = xdp_program__pin(prog, pin_path);
+ if (err) {
+ pr_warn("Unable to pin XDP program at %s: %s\n",
+ pin_path, strerror(-err));
+ goto unload;
+ }
+ pr_debug("XDP program pinned at %s\n", pin_path);
+ return err;
+
+unload:
+ xdp_program__detach(prog, iface->ifindex, mode, 0);
+ return err;
+}
+
+int detach_xdp_program(struct xdp_program *prog, const struct iface *iface,
+ enum xdp_attach_mode mode, const char *pin_root_path)
+{
+ char pin_path[PATH_MAX];
+ int err;
+
+ err = xdp_program__detach(prog, iface->ifindex, mode, 0);
+ if (err)
+ goto out;
+
+ err = try_snprintf(pin_path, sizeof(pin_path), "%s/programs/%s/%s",
+ pin_root_path, iface->ifname,
+ xdp_program__name(prog));
+ if (err)
+ return err;
+
+ err = unlink(pin_path);
+ if (err && errno != ENOENT)
+ goto out;
+
+ err = try_snprintf(pin_path, sizeof(pin_path), "%s/programs/%s",
+ pin_root_path, iface->ifname);
+ if (err)
+ goto out;
+
+ err = rmdir(pin_path);
+ if (err && errno == ENOENT)
+ err = 0;
+ else if (err)
+ err = -errno;
+out:
+ return err;
+}
+
+int get_pinned_program(const struct iface *iface, const char *pin_root_path,
+ enum xdp_attach_mode *mode,
+ struct xdp_program **xdp_prog)
+{
+ int ret = -ENOENT, err, ifindex = iface->ifindex;
+ char pin_path[PATH_MAX];
+ bool remove_all = false;
+ enum xdp_attach_mode m;
+ struct dirent *de;
+ DIR *dr;
+
+ err = try_snprintf(pin_path, sizeof(pin_path), "%s/programs/%s",
+ pin_root_path, iface->ifname);
+ if (err)
+ return err;
+
+ dr = opendir(pin_path);
+ if (!dr) {
+ err = -errno;
+ pr_debug("Couldn't open pin directory %s: %s\n",
+ pin_path, strerror(-err));
+ return err;
+ }
+
+ if (!ifindex)
+ ifindex = if_nametoindex(iface->ifname);
+ if (!ifindex) {
+ pr_debug("Interface %s no longer exists\n", iface->ifname);
+ remove_all = true;
+ ret = -ENODEV;
+ }
+
+ while ((de = readdir(dr)) != NULL) {
+ DECLARE_LIBXDP_OPTS(xdp_program_opts, opts, 0);
+ struct xdp_program *prog;
+
+ if (!strcmp(".", de->d_name) || !strcmp("..", de->d_name))
+ continue;
+
+ err = try_snprintf(pin_path, sizeof(pin_path),
+ "%s/programs/%s/%s", pin_root_path,
+ iface->ifname, de->d_name);
+ if (err)
+ goto out;
+
+ if (remove_all) {
+ err = unlink(pin_path);
+ if (err)
+ ret = err;
+ continue;
+ }
+
+ opts.pin_path = pin_path;
+ prog = xdp_program__create(&opts);
+ if (libxdp_get_error(prog) ||
+ !(m = xdp_program__is_attached(prog, iface->ifindex))) {
+ ret = libxdp_get_error(prog) ?: -ENOENT;
+ pr_debug("Program %s no longer loaded on %s: %s\n",
+ de->d_name, iface->ifname, strerror(-ret));
+ err = unlink(pin_path);
+ if (err)
+ ret = err;
+ if (prog)
+ xdp_program__close(prog);
+ } else {
+ if (strcmp(xdp_program__name(prog), de->d_name)) {
+ pr_warn("Pinned and kernel prog names differ: %s/%s\n",
+ xdp_program__name(prog), de->d_name);
+ ret = -EFAULT;
+ xdp_program__close(prog);
+ } else {
+ ret = 0;
+ *xdp_prog = prog;
+ if (mode)
+ *mode = m;
+ }
+ break;
+ }
+ }
+out:
+ closedir(dr);
+ return ret;
+}
+
+int iterate_pinned_programs(const char *pin_root_path, program_callback cb,
+ void *arg)
+{
+ char pin_path[PATH_MAX];
+ struct dirent *de;
+ int err = 0;
+ DIR *dr;
+
+ err = try_snprintf(pin_path, sizeof(pin_path), "%s/programs",
+ pin_root_path);
+ if (err)
+ return err;
+
+ dr = opendir(pin_path);
+ if (!dr)
+ return -ENOENT;
+
+ while ((de = readdir(dr)) != NULL) {
+ enum xdp_attach_mode mode = XDP_MODE_UNSPEC;
+ struct xdp_program *prog = NULL;
+ struct iface iface = {};
+
+ if (!strcmp(".", de->d_name) || !strcmp("..", de->d_name))
+ continue;
+
+ iface.ifname = de->d_name;
+ iface.ifindex = if_nametoindex(iface.ifname);
+
+ err = try_snprintf(pin_path, sizeof(pin_path), "%s/programs/%s",
+ pin_root_path, iface.ifname);
+ if (err)
+ goto out;
+
+ err = get_pinned_program(&iface, pin_root_path, &mode, &prog);
+ if (err == -ENOENT || err == -ENODEV) {
+ err = rmdir(pin_path);
+ if (err)
+ goto out;
+ continue;
+ } else if (err) {
+ goto out;
+ }
+
+ err = cb(&iface, prog, mode, arg);
+ xdp_program__close(prog);
+ if (err)
+ goto out;
+ }
+
+out:
+ closedir(dr);
+ return err;
+}
+
+int iterate_iface_multiprogs(multiprog_callback cb, void *arg)
+{
+ struct if_nameindex *idx, *indexes = NULL;
+ int err = 0;
+
+ indexes = if_nameindex();
+ if (!indexes) {
+ err = -errno;
+ pr_warn("Couldn't get list of interfaces: %s\n", strerror(-err));
+ return err;
+ }
+
+ for (idx = indexes; idx->if_index; idx++) {
+ struct xdp_multiprog *mp;
+ struct iface iface = {
+ .ifindex = idx->if_index,
+ .ifname = idx->if_name,
+ };
+
+ mp = xdp_multiprog__get_from_ifindex(iface.ifindex);
+ if (IS_ERR_OR_NULL(mp)) {
+ if (PTR_ERR(mp) != -ENOENT) {
+ err = PTR_ERR(mp);
+ pr_warn("Error getting XDP status for interface %s: %s\n",
+ idx->if_name, strerror(-err));
+ goto out;
+ }
+ mp = NULL;
+ }
+
+ err = cb(&iface, mp, arg);
+ xdp_multiprog__close(mp);
+ if (err)
+ goto out;
+ }
+
+out:
+ if_freenameindex(indexes);
+ return err;
+}
+
+static bool bpf_is_valid_mntpt(const char *mnt, unsigned long magic)
+{
+ struct statfs st_fs;
+
+ if (statfs(mnt, &st_fs) < 0)
+ return false;
+ if ((unsigned long)st_fs.f_type != magic)
+ return false;
+
+ return true;
+}
+
+static const char *bpf_find_mntpt_single(unsigned long magic, char *mnt,
+ int len, const char *mntpt)
+{
+ if (bpf_is_valid_mntpt(mntpt, magic)) {
+ strncpy(mnt, mntpt, len - 1);
+ mnt[len - 1] = '\0';
+ return mnt;
+ }
+
+ return NULL;
+}
+
+static const char *bpf_find_mntpt(const char *fstype, unsigned long magic,
+ char *mnt, int len,
+ const char * const *known_mnts)
+{
+ const char * const *ptr;
+ char type[100];
+ FILE *fp;
+
+ if (known_mnts) {
+ ptr = known_mnts;
+ while (*ptr) {
+ if (bpf_find_mntpt_single(magic, mnt, len, *ptr))
+ return mnt;
+ ptr++;
+ }
+ }
+
+ if (len != PATH_MAX)
+ return NULL;
+
+ fp = fopen("/proc/mounts", "r");
+ if (fp == NULL)
+ return NULL;
+
+ while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n", mnt,
+ type) == 2) {
+ if (strcmp(type, fstype) == 0)
+ break;
+ }
+
+ fclose(fp);
+ if (strcmp(type, fstype) != 0)
+ return NULL;
+
+ return mnt;
+}
+
+static int bpf_mnt_check_target(const char *target)
+{
+ int ret;
+
+ ret = mkdir(target, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ ret = -errno;
+ pr_warn("mkdir %s failed: %s\n", target, strerror(-ret));
+ return ret;
+ }
+
+ return 0;
+}
+/* simplified version of code from iproute2 */
+static const char *bpf_get_work_dir()
+{
+ static char bpf_tmp[PATH_MAX] = BPF_DIR_MNT;
+ static char bpf_wrk_dir[PATH_MAX];
+ static const char *mnt;
+ static bool bpf_mnt_cached;
+ static const char *const bpf_known_mnts[] = {
+ BPF_DIR_MNT,
+ "/bpf",
+ 0,
+ };
+ int ret;
+
+ if (bpf_mnt_cached)
+ return mnt;
+
+ mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_tmp, sizeof(bpf_tmp),
+ bpf_known_mnts);
+ if (!mnt) {
+ mnt = BPF_DIR_MNT;
+ ret = bpf_mnt_check_target(mnt);
+ if (ret || !bpf_is_valid_mntpt(mnt, BPF_FS_MAGIC)) {
+ mnt = NULL;
+ goto out;
+ }
+ }
+
+ strncpy(bpf_wrk_dir, mnt, sizeof(bpf_wrk_dir));
+ bpf_wrk_dir[sizeof(bpf_wrk_dir) - 1] = '\0';
+ mnt = bpf_wrk_dir;
+out:
+ bpf_mnt_cached = true;
+ return mnt;
+}
+
+int get_bpf_root_dir(char *buf, size_t buf_len, const char *subdir, bool fatal)
+{
+ const char *bpf_dir;
+
+ bpf_dir = bpf_get_work_dir();
+ if (!bpf_dir) {
+ logging_print(fatal ? LOG_WARN : LOG_DEBUG,
+ "Could not find BPF working dir - bpffs not mounted?\n");
+ return -ENOENT;
+ }
+
+ if (subdir)
+ return try_snprintf(buf, buf_len, "%s/%s", bpf_dir, subdir);
+ else
+ return try_snprintf(buf, buf_len, "%s", bpf_dir);
+}
+
+int get_pinned_map_fd(const char *bpf_root, const char *map_name,
+ struct bpf_map_info *info)
+{
+ __u32 info_len = sizeof(*info);
+ char buf[PATH_MAX];
+ int err;
+
+ err = try_snprintf(buf, sizeof(buf), "%s/%s", bpf_root, map_name);
+ if (err)
+ return err;
+
+ pr_debug("Getting pinned object from %s\n", buf);
+ return get_pinned_object_fd(buf, info, &info_len);
+}
+
+int unlink_pinned_map(int dir_fd, const char *map_name)
+{
+ struct stat statbuf = {};
+ int err;
+
+ err = fstatat(dir_fd, map_name, &statbuf, 0);
+ if (err && errno == ENOENT) {
+ pr_debug("Map name %s not pinned\n", map_name);
+ return 0;
+ } else if (err) {
+ err = -errno;
+ pr_warn("Couldn't stat pinned map %s: %s\n",
+ map_name, strerror(-err));
+ return err;
+ }
+
+ pr_debug("Unlinking pinned map %s\n", map_name);
+ err = unlinkat(dir_fd, map_name, 0);
+ if (err) {
+ err = -errno;
+ pr_warn("Couldn't unlink pinned map %s: %s\n",
+ map_name, strerror(-err));
+ return -errno;
+ }
+
+ return 0;
+}
+
+#define XDP_UNKNOWN (XDP_REDIRECT + 1)
+#ifndef XDP_ACTION_MAX
+#define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
+#endif
+
+static const char *xdp_action_names[XDP_ACTION_MAX] = {
+ [XDP_ABORTED] = "XDP_ABORTED",
+ [XDP_DROP] = "XDP_DROP",
+ [XDP_PASS] = "XDP_PASS",
+ [XDP_TX] = "XDP_TX",
+ [XDP_REDIRECT] = "XDP_REDIRECT",
+ [XDP_UNKNOWN] = "XDP_UNKNOWN",
+};
+
+const char *action2str(__u32 action)
+{
+ if (action < XDP_ACTION_MAX)
+ return xdp_action_names[action];
+ return NULL;
+}
+
+int check_bpf_environ(void)
+{
+ init_lib_logging();
+
+ if (geteuid() != 0) {
+ pr_warn("This program must be run as root.\n");
+ return 1;
+ }
+
+ /* Try to avoid probing errors due to rlimit exhaustion by starting out
+ * with an rlimit of 1 MiB. This is not going to solve all issues, but
+ * it will at least make things work when there is nothing else loaded.
+ *
+ * Ignore return code because an error shouldn't abort running.
+ */
+ set_rlimit(1024 * 1024);
+
+ return 0;
+}
+
+static const char *lock_dir = RUNDIR;
+static char *prog_lock_file = NULL;
+static int prog_lock_fd = -1;
+static pid_t prog_pid = 0;
+
+void prog_lock_release(int signal)
+{
+ struct sigaction sigact = { .sa_flags = SA_RESETHAND };
+ int err;
+
+ if (prog_lock_fd < 0 || !prog_lock_file)
+ return;
+
+ sigaction(SIGHUP, &sigact, NULL);
+ sigaction(SIGINT, &sigact, NULL);
+ sigaction(SIGSEGV, &sigact, NULL);
+ sigaction(SIGFPE, &sigact, NULL);
+ sigaction(SIGTERM, &sigact, NULL);
+
+ err = unlink(prog_lock_file);
+ if (err) {
+ err = -errno;
+ pr_warn("Unable to unlink lock file: %s\n", strerror(-err));
+ goto out;
+ }
+
+ close(prog_lock_fd);
+ free(prog_lock_file);
+ prog_lock_fd = -1;
+ prog_lock_file = NULL;
+
+out:
+ if (signal) {
+ pr_debug("Exiting on signal %d\n", signal);
+ if (prog_pid)
+ kill(prog_pid, signal);
+ else
+ exit(signal);
+ }
+}
+
+int prog_lock_get(const char *progname)
+{
+ char buf[PATH_MAX];
+ int err;
+ struct sigaction sigact = { .sa_handler = prog_lock_release };
+
+ if (prog_lock_fd >= 0) {
+ pr_warn("Attempt to get prog_lock twice.\n");
+ return -EFAULT;
+ }
+
+ if (!prog_lock_file) {
+ err = try_snprintf(buf, sizeof(buf), "%s/%s.lck", lock_dir,
+ progname);
+ if (err)
+ return err;
+
+ prog_lock_file = strdup(buf);
+ if (!prog_lock_file)
+ return -ENOMEM;
+ }
+
+ prog_pid = getpid();
+
+ if (sigaction(SIGHUP, &sigact, NULL) ||
+ sigaction(SIGINT, &sigact, NULL) ||
+ sigaction(SIGSEGV, &sigact, NULL) ||
+ sigaction(SIGFPE, &sigact, NULL) ||
+ sigaction(SIGTERM, &sigact, NULL)) {
+ err = -errno;
+ pr_warn("Unable to install signal handler: %s\n", strerror(-err));
+ return err;
+ }
+
+ prog_lock_fd = open(prog_lock_file, O_WRONLY | O_CREAT | O_EXCL, 0644);
+ if (prog_lock_fd < 0) {
+ err = -errno;
+ if (err == -EEXIST) {
+ pid_t pid = 0;
+ char buf[100];
+ ssize_t len;
+ int fd;
+
+ fd = open(prog_lock_file, O_RDONLY);
+ if (fd < 0) {
+ err = -errno;
+ pr_warn("Unable to open lockfile for reading: %s\n",
+ strerror(-err));
+ return err;
+ }
+
+ len = read(fd, buf, sizeof(buf) - 1);
+ err = -errno;
+ close(fd);
+ if (len > 0) {
+ buf[len] = '\0';
+ pid = strtoul(buf, NULL, 10);
+ }
+ if (!pid || err) {
+ pr_warn("Unable to read PID from lockfile: %s\n",
+ strerror(-err));
+ return err;
+ }
+ pr_warn("Unable to get program lock: Already held by pid %d\n",
+ pid);
+ } else {
+ pr_warn("Unable to get program lock: %s\n", strerror(-err));
+ }
+ return err;
+ }
+
+ err = dprintf(prog_lock_fd, "%d\n", prog_pid);
+ if (err < 0) {
+ err = -errno;
+ pr_warn("Unable to write pid to lock file: %s\n", strerror(-err));
+ goto out_err;
+ }
+
+ err = fsync(prog_lock_fd);
+ if (err) {
+ err = -errno;
+ pr_warn("Unable fsync() lock file: %s\n", strerror(-err));
+ goto out_err;
+ }
+
+ return 0;
+out_err:
+ unlink(prog_lock_file);
+ close(prog_lock_fd);
+ free(prog_lock_file);
+ prog_lock_file = NULL;
+ prog_lock_fd = -1;
+ return err;
+}
+
+static char *print_bpf_tag(char buf[BPF_TAG_SIZE * 2 + 1],
+ const unsigned char tag[BPF_TAG_SIZE])
+{
+ int i;
+
+ for (i = 0; i < BPF_TAG_SIZE; i++)
+ sprintf(&buf[i * 2], "%02x", tag[i]);
+ buf[BPF_TAG_SIZE * 2] = '\0';
+ return buf;
+}
+
+static int print_iface_status(const struct iface *iface,
+ const struct xdp_multiprog *mp,
+ __unused void *arg)
+{
+ struct xdp_program *prog, *dispatcher, *hw_prog;
+ char tag[BPF_TAG_SIZE * 2 + 1];
+ char buf[STRERR_BUFSIZE];
+ int err;
+
+ if (!mp) {
+ printf("%-22s <No XDP program loaded!>\n", iface->ifname);
+ return 0;
+ }
+
+ hw_prog = xdp_multiprog__hw_prog(mp);
+ if (hw_prog) {
+ printf("%-16s %-5s %-17s %-8s %-4d %-17s\n",
+ iface->ifname,
+ "",
+ xdp_program__name(hw_prog),
+ get_enum_name(xdp_modes, XDP_MODE_HW),
+ xdp_program__id(hw_prog),
+ print_bpf_tag(tag, xdp_program__tag(hw_prog)));
+ }
+
+ dispatcher = xdp_multiprog__main_prog(mp);
+ if (dispatcher) {
+ printf("%-16s %-5s %-17s %-8s %-4d %-17s\n",
+ iface->ifname,
+ "",
+ xdp_program__name(dispatcher),
+ get_enum_name(xdp_modes, xdp_multiprog__attach_mode(mp)),
+ xdp_program__id(dispatcher),
+ print_bpf_tag(tag, xdp_program__tag(dispatcher)));
+
+
+ for (prog = xdp_multiprog__next_prog(NULL, mp);
+ prog;
+ prog = xdp_multiprog__next_prog(prog, mp)) {
+
+ err = xdp_program__print_chain_call_actions(prog, buf,
+ sizeof(buf));
+ if (err)
+ return err;
+
+ printf("%-16s %-5d %-16s %-8s %-4u %-17s %s\n",
+ " =>", xdp_program__run_prio(prog),
+ xdp_program__name(prog),
+ "", xdp_program__id(prog),
+ print_bpf_tag(tag, xdp_program__tag(prog)),
+ buf);
+ }
+ }
+
+ return 0;
+}
+
+int iface_print_status(const struct iface *iface)
+{
+ int err = 0;
+
+ printf("%-16s %-5s %-17s Mode ID %-17s %s\n",
+ "Interface", "Prio", "Program name", "Tag", "Chain actions");
+ printf("--------------------------------------------------------------------------------------\n");
+
+ if (iface) {
+ struct xdp_multiprog *mp;
+
+ mp = xdp_multiprog__get_from_ifindex(iface->ifindex);
+ if (IS_ERR_OR_NULL(mp)) {
+ if (PTR_ERR(mp) != -ENOENT) {
+ err = PTR_ERR(mp);
+ pr_warn("Error getting XDP status for interface %s: %s\n",
+ iface->ifname, strerror(-err));
+ goto out;
+ }
+ mp = NULL;
+ }
+ print_iface_status(iface, mp, NULL);
+ } else {
+ err = iterate_iface_multiprogs(print_iface_status, NULL);
+ }
+ printf("\n");
+out:
+ return err;
+}
diff --git a/lib/util/util.h b/lib/util/util.h
new file mode 100644
index 0000000..8848e41
--- /dev/null
+++ b/lib/util/util.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __UTIL_H
+#define __UTIL_H
+
+#include <bpf/libbpf.h>
+#include <xdp/libxdp.h>
+#include "params.h"
+
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+#define STRERR_BUFSIZE 1024
+#define _textify(x) #x
+#define textify(x) _textify(x)
+
+#define __unused __attribute__((unused))
+
+#ifndef BPF_DIR_MNT
+#define BPF_DIR_MNT "/sys/fs/bpf"
+#endif
+
+#ifndef BPF_OBJECT_PATH
+#define BPF_OBJECT_PATH "/usr/lib/bpf"
+#endif
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+
+#define FOR_EACH_MAP_KEY(_err, _map_fd, _map_key, _prev_key) \
+ for (_err = bpf_map_get_next_key(_map_fd, NULL, &_map_key); \
+ !_err; \
+ _prev_key = _map_key, \
+ _err = bpf_map_get_next_key(_map_fd, &_prev_key, &_map_key))
+
+#define min(x, y) ((x) < (y) ? x : y)
+#define max(x, y) ((x) > (y) ? x : y)
+
+#ifndef offsetof
+#define offsetof(type, member) ((size_t) & ((type *)0)->member)
+#endif
+
+#ifndef container_of
+#define container_of(ptr, type, member) \
+ ({ \
+ const typeof(((type *)0)->member) *__mptr = (ptr); \
+ (type *)((char *)__mptr - offsetof(type, member)); \
+ })
+#endif
+
+#ifndef roundup
+#define roundup(x, y) \
+ ({ \
+ typeof(y) __y = y; \
+ (((x) + (__y - 1)) / __y) * __y; \
+ })
+#endif
+
+int try_snprintf(char *buf, size_t buf_len, const char *format, ...);
+int make_dir_subdir(const char *parent, const char *dir);
+
+int check_bpf_environ(void);
+int double_rlimit(void);
+
+int attach_xdp_program(struct xdp_program *prog, const struct iface *iface,
+ enum xdp_attach_mode mode, const char *pin_root_dir);
+int detach_xdp_program(struct xdp_program *prog, const struct iface *iface,
+ enum xdp_attach_mode mode, const char *pin_root_dir);
+
+int find_bpf_file(char *buf, size_t buf_size, const char *progname);
+struct bpf_object *open_bpf_file(const char *progname,
+ struct bpf_object_open_opts *opts);
+
+typedef int (*program_callback)(const struct iface *iface,
+ struct xdp_program *prog,
+ enum xdp_attach_mode mode, void *arg);
+typedef int (*multiprog_callback)(const struct iface *iface,
+ const struct xdp_multiprog *mp, void *arg);
+int get_pinned_program(const struct iface *iface, const char *pin_root_path,
+ enum xdp_attach_mode *mode, struct xdp_program **prog);
+int iterate_pinned_programs(const char *pin_root_path, program_callback cb,
+ void *arg);
+int iterate_iface_multiprogs(multiprog_callback cb, void *arg);
+
+int get_bpf_root_dir(char *buf, size_t buf_len, const char *subdir, bool fatal);
+int get_pinned_map_fd(const char *bpf_root, const char *map_name,
+ struct bpf_map_info *info);
+int unlink_pinned_map(int dir_fd, const char *map_name);
+
+const char *action2str(__u32 action);
+
+int prog_lock_get(const char *progname);
+void prog_lock_release(int signal);
+
+const char *get_libbpf_version(void);
+int iface_print_status(const struct iface *iface);
+
+#endif
diff --git a/lib/util/util.mk b/lib/util/util.mk
new file mode 100644
index 0000000..7fc3b43
--- /dev/null
+++ b/lib/util/util.mk
@@ -0,0 +1,2 @@
+UTIL_OBJS := params.o logging.o util.o stats.o xpcapng.o xdp_sample.o
+UTIL_BPF_OBJS := xdp_sample.bpf.o
diff --git a/lib/util/xdp_sample.bpf.c b/lib/util/xdp_sample.bpf.c
new file mode 100644
index 0000000..3e301bb
--- /dev/null
+++ b/lib/util/xdp_sample.bpf.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <bpf/vmlinux.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("tp_btf/xdp_cpumap_kthread")
+int BPF_PROG(tp_xdp_cpumap_kthread, int map_id, unsigned int processed,
+ unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats)
+{
+ bpf_printk("Stats: %d %u %u %d %d\n",
+ map_id, processed, drops, sched, xdp_stats->pass);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/lib/util/xdp_sample.c b/lib/util/xdp_sample.c
new file mode 100644
index 0000000..4385ea5
--- /dev/null
+++ b/lib/util/xdp_sample.c
@@ -0,0 +1,1643 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+
+#include <math.h>
+#include <poll.h>
+#include <time.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <locale.h>
+#include <net/if.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <stdbool.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <arpa/inet.h>
+#include <sys/ioctl.h>
+#include <bpf/libbpf.h>
+#include <sys/sysinfo.h>
+#include <sys/timerfd.h>
+#include <sys/utsname.h>
+#include <linux/limits.h>
+#include <sys/resource.h>
+#include <sys/signalfd.h>
+#include <linux/ethtool.h>
+#include <linux/if_link.h>
+#include <linux/sockios.h>
+#include <linux/hashtable.h>
+
+#include "xdp_sample.h"
+#include "logging.h"
+
+#include "xdp_sample.skel.h"
+
+#define __sample_print(fmt, cond, ...) \
+ ({ \
+ if (cond) \
+ printf(fmt, ##__VA_ARGS__); \
+ })
+
+#define print_always(fmt, ...) __sample_print(fmt, 1, ##__VA_ARGS__)
+#define print_default(fmt, ...) \
+ __sample_print(fmt, sample_log_level & LL_DEFAULT, ##__VA_ARGS__)
+#define __print_err(err, fmt, ...) \
+ ({ \
+ __sample_print(fmt, err > 0 || sample_log_level & LL_DEFAULT, \
+ ##__VA_ARGS__); \
+ sample_err_exp = sample_err_exp ? true : err > 0; \
+ })
+#define print_err(err, fmt, ...) __print_err(err, fmt, ##__VA_ARGS__)
+
+#define __COLUMN(x) "%'10" x " %-13s"
+#define FMT_COLUMNf __COLUMN(".0f")
+#define FMT_COLUMNd __COLUMN("d")
+#define FMT_COLUMNl __COLUMN(PRIu64)
+#define RX(rx) rx, "rx/s"
+#define PPS(pps) pps, "pkt/s"
+#define DROP(drop) drop, "drop/s"
+#define ERR(err) err, "error/s"
+#define HITS(hits) hits, "hit/s"
+#define XMIT(xmit) xmit, "xmit/s"
+#define PASS(pass) pass, "pass/s"
+#define REDIR(redir) redir, "redir/s"
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+
+#define XDP_UNKNOWN (XDP_REDIRECT + 1)
+#define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
+#define XDP_REDIRECT_ERR_MAX 7
+
+enum map_type {
+ MAP_RX,
+ MAP_RXQ,
+ MAP_REDIRECT_ERR,
+ MAP_CPUMAP_ENQUEUE,
+ MAP_CPUMAP_KTHREAD,
+ MAP_EXCEPTION,
+ MAP_DEVMAP_XMIT,
+ MAP_DEVMAP_XMIT_MULTI,
+ NUM_MAP,
+};
+
+enum log_level {
+ LL_DEFAULT = 1U << 0,
+ LL_SIMPLE = 1U << 1,
+ LL_DEBUG = 1U << 2,
+};
+
+struct record {
+ __u64 timestamp;
+ struct datarec total;
+ union {
+ struct datarec *cpu;
+ struct datarec *rxq;
+ };
+};
+
+struct map_entry {
+ struct hlist_node node;
+ __u64 pair;
+ struct record val;
+};
+
+struct stats_record {
+ struct record rx_cnt;
+ struct record rxq_cnt;
+ struct record redir_err[XDP_REDIRECT_ERR_MAX];
+ struct record kthread;
+ struct record exception[XDP_ACTION_MAX];
+ struct record devmap_xmit;
+ DECLARE_HASHTABLE(xmit_map, 5);
+ struct record enq[];
+};
+
+struct sample_output {
+ struct {
+ uint64_t rx;
+ uint64_t redir;
+ uint64_t drop;
+ uint64_t drop_xmit;
+ uint64_t err;
+ uint64_t xmit;
+ } totals;
+ struct {
+ union {
+ uint64_t pps;
+ uint64_t num;
+ };
+ uint64_t drop;
+ uint64_t err;
+ } rx_cnt;
+ struct {
+ uint64_t suc;
+ uint64_t err;
+ } redir_cnt;
+ struct {
+ uint64_t hits;
+ } except_cnt;
+ struct {
+ uint64_t pps;
+ uint64_t drop;
+ uint64_t err;
+ double bavg;
+ } xmit_cnt;
+};
+
+struct datarec *sample_mmap[NUM_MAP];
+struct bpf_map *sample_map[NUM_MAP];
+size_t sample_map_count[NUM_MAP];
+enum log_level sample_log_level;
+struct sample_output sample_out;
+unsigned long sample_interval;
+bool sample_err_exp;
+int sample_xdp_cnt;
+int sample_n_cpus;
+int sample_n_rxqs;
+int sample_sig_fd;
+int sample_mask;
+int ifindex[2];
+
+static struct {
+ bool checked;
+ bool compat;
+} sample_compat[SAMPLE_COMPAT_MAX] = {};
+
+bool sample_is_compat(enum sample_compat compat_value)
+{
+ return sample_compat[compat_value].compat;
+}
+
+bool sample_probe_cpumap_compat(void)
+{
+ struct xdp_sample *skel;
+ bool res;
+
+ skel = xdp_sample__open_and_load();
+ res = !!skel;
+ xdp_sample__destroy(skel);
+
+ return res;
+}
+
+void sample_check_cpumap_compat(struct bpf_program *prog,
+ struct bpf_program *prog_compat)
+{
+ bool res = sample_compat[SAMPLE_COMPAT_CPUMAP_KTHREAD].compat;
+
+ if (!sample_compat[SAMPLE_COMPAT_CPUMAP_KTHREAD].checked) {
+ res = sample_probe_cpumap_compat();
+
+ sample_compat[SAMPLE_COMPAT_CPUMAP_KTHREAD].checked = true;
+ sample_compat[SAMPLE_COMPAT_CPUMAP_KTHREAD].compat = res;
+ }
+
+ if (res) {
+ pr_debug("Kernel supports 5-arg xdp_cpumap_kthread tracepoint\n");
+ bpf_program__set_autoload(prog_compat, false);
+ } else {
+ pr_debug("Kernel does not support 5-arg xdp_cpumap_kthread tracepoint, using compat version\n");
+ bpf_program__set_autoload(prog, false);
+ }
+}
+
+static const char *xdp_redirect_err_names[XDP_REDIRECT_ERR_MAX] = {
+ /* Key=1 keeps unknown errors */
+ "Success",
+ "Unknown",
+ "EINVAL",
+ "ENETDOWN",
+ "EMSGSIZE",
+ "EOPNOTSUPP",
+ "ENOSPC",
+};
+
+static const char *xdp_action_names[XDP_ACTION_MAX] = {
+ [XDP_ABORTED] = "XDP_ABORTED",
+ [XDP_DROP] = "XDP_DROP",
+ [XDP_PASS] = "XDP_PASS",
+ [XDP_TX] = "XDP_TX",
+ [XDP_REDIRECT] = "XDP_REDIRECT",
+ [XDP_UNKNOWN] = "XDP_UNKNOWN",
+};
+
+static __u64 gettime(void)
+{
+ struct timespec t;
+ int res;
+
+ res = clock_gettime(CLOCK_MONOTONIC, &t);
+ if (res < 0) {
+ pr_warn("Error with gettimeofday! (%i)\n", res);
+ return UINT64_MAX;
+ }
+ return (__u64)t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+static const char *xdp_action2str(int action)
+{
+ if (action < XDP_ACTION_MAX)
+ return xdp_action_names[action];
+ return NULL;
+}
+
+static struct datarec *alloc_records(int nr_entries)
+{
+ struct datarec *array;
+
+ if (nr_entries <= 0)
+ return NULL;
+
+ array = calloc(nr_entries, sizeof(*array));
+ if (!array) {
+ pr_warn("Failed to allocate memory (nr_entries: %u)\n", nr_entries);
+ return NULL;
+ }
+ return array;
+}
+
+static int map_entry_init(struct map_entry *e, __u64 pair)
+{
+ e->pair = pair;
+ INIT_HLIST_NODE(&e->node);
+ e->val.timestamp = gettime();
+ e->val.cpu = alloc_records(libbpf_num_possible_cpus());
+ if (!e->val.cpu)
+ return -ENOMEM;
+ return 0;
+}
+
+static void map_collect_rxqs(struct datarec *values, struct record *rec)
+{
+ int i;
+
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
+
+ /* Record and sum values from each RXQ */
+ for (i = 0; i < sample_n_rxqs; i++) {
+ pr_debug("%d: %lx %lx\n", i, (unsigned long)&rec->rxq[i], (unsigned long)&values[i]);
+ rec->rxq[i].processed = READ_ONCE(values[i].processed);
+ rec->rxq[i].dropped = READ_ONCE(values[i].dropped);
+ rec->rxq[i].issue = READ_ONCE(values[i].issue);
+ rec->rxq[i].xdp_pass = READ_ONCE(values[i].xdp_pass);
+ rec->rxq[i].xdp_drop = READ_ONCE(values[i].xdp_drop);
+ rec->rxq[i].xdp_redirect = READ_ONCE(values[i].xdp_redirect);
+ }
+}
+
+static void map_collect_percpu(struct datarec *values, struct record *rec)
+{
+ /* For percpu maps, userspace gets a value per possible CPU */
+ int nr_cpus = libbpf_num_possible_cpus();
+ __u64 sum_xdp_redirect = 0;
+ __u64 sum_processed = 0;
+ __u64 sum_xdp_pass = 0;
+ __u64 sum_xdp_drop = 0;
+ __u64 sum_dropped = 0;
+ __u64 sum_issue = 0;
+ int i;
+
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
+
+ /* Record and sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ rec->cpu[i].processed = READ_ONCE(values[i].processed);
+ rec->cpu[i].dropped = READ_ONCE(values[i].dropped);
+ rec->cpu[i].issue = READ_ONCE(values[i].issue);
+ rec->cpu[i].xdp_pass = READ_ONCE(values[i].xdp_pass);
+ rec->cpu[i].xdp_drop = READ_ONCE(values[i].xdp_drop);
+ rec->cpu[i].xdp_redirect = READ_ONCE(values[i].xdp_redirect);
+
+ sum_processed += rec->cpu[i].processed;
+ sum_dropped += rec->cpu[i].dropped;
+ sum_issue += rec->cpu[i].issue;
+ sum_xdp_pass += rec->cpu[i].xdp_pass;
+ sum_xdp_drop += rec->cpu[i].xdp_drop;
+ sum_xdp_redirect += rec->cpu[i].xdp_redirect;
+ }
+
+ rec->total.processed = sum_processed;
+ rec->total.dropped = sum_dropped;
+ rec->total.issue = sum_issue;
+ rec->total.xdp_pass = sum_xdp_pass;
+ rec->total.xdp_drop = sum_xdp_drop;
+ rec->total.xdp_redirect = sum_xdp_redirect;
+}
+
+static int map_collect_percpu_devmap(int map_fd, struct stats_record *rec)
+{
+ int nr_cpus = libbpf_num_possible_cpus();
+ int i, ret, count = 32;
+ struct datarec *values;
+ bool init = false;
+ __u32 batch;
+ __u64 *keys;
+
+ keys = calloc(count, sizeof(__u64));
+ if (!keys)
+ return -ENOMEM;
+ values = calloc(count * nr_cpus, sizeof(struct datarec));
+ if (!values) {
+ free(keys);
+ return -ENOMEM;
+ }
+
+ for (;;) {
+ bool exit = false;
+
+ ret = bpf_map_lookup_batch(map_fd, init ? &batch : NULL, &batch,
+ keys, values, (__u32 *)&count, NULL);
+ if (ret < 0 && errno != ENOENT)
+ break;
+ if (errno == ENOENT)
+ exit = true;
+
+ init = true;
+ for (i = 0; i < count; i++) {
+ struct map_entry *e, *x = NULL;
+ __u64 pair = keys[i];
+ struct datarec *arr;
+
+ arr = &values[i * nr_cpus];
+ hash_for_each_possible(rec->xmit_map, e, node, pair) {
+ if (e->pair == pair) {
+ x = e;
+ break;
+ }
+ }
+ if (!x) {
+ x = calloc(1, sizeof(*x));
+ if (!x)
+ goto cleanup;
+ if (map_entry_init(x, pair) < 0) {
+ free(x);
+ goto cleanup;
+ }
+ hash_add(rec->xmit_map, &x->node, pair);
+ }
+ map_collect_percpu(arr, &x->val);
+ }
+
+ if (exit)
+ break;
+ count = 32;
+ }
+
+ free(values);
+ free(keys);
+ return 0;
+cleanup:
+ free(values);
+ free(keys);
+ return -ENOMEM;
+}
+
+static struct stats_record *alloc_stats_record(void)
+{
+ struct stats_record *rec;
+ int i;
+
+ rec = calloc(1, sizeof(*rec) + sample_n_cpus * sizeof(struct record));
+ if (!rec) {
+ pr_warn("Failed to allocate memory\n");
+ return NULL;
+ }
+
+ if (sample_mask & SAMPLE_RX_CNT) {
+ rec->rx_cnt.cpu = alloc_records(libbpf_num_possible_cpus());
+ if (!rec->rx_cnt.cpu) {
+ pr_warn("Failed to allocate rx_cnt per-CPU array\n");
+ goto end_rec;
+ }
+ }
+ if (sample_mask & SAMPLE_RXQ_STATS) {
+ if (sample_n_rxqs <= 0) {
+ pr_warn("Invalid number of RXQs: %d\n", sample_n_rxqs);
+ goto end_rx_cnt;
+ }
+
+ rec->rxq_cnt.rxq = alloc_records(sample_n_rxqs);
+ if (!rec->rxq_cnt.rxq) {
+ pr_warn("Failed to allocate rxq_cnt per RXQ array\n");
+ goto end_rx_cnt;
+ }
+ }
+ if (sample_mask & (SAMPLE_REDIRECT_CNT | SAMPLE_REDIRECT_ERR_CNT)) {
+ for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++) {
+ rec->redir_err[i].cpu = alloc_records(libbpf_num_possible_cpus());
+ if (!rec->redir_err[i].cpu) {
+ pr_warn("Failed to allocate redir_err per-CPU array for \"%s\" case\n",
+ xdp_redirect_err_names[i]);
+ while (i--)
+ free(rec->redir_err[i].cpu);
+ goto end_rxq_cnt;
+ }
+ }
+ }
+ if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+ rec->kthread.cpu = alloc_records(libbpf_num_possible_cpus());
+ if (!rec->kthread.cpu) {
+ pr_warn("Failed to allocate kthread per-CPU array\n");
+ goto end_redir;
+ }
+ }
+ if (sample_mask & SAMPLE_EXCEPTION_CNT) {
+ for (i = 0; i < XDP_ACTION_MAX; i++) {
+ rec->exception[i].cpu = alloc_records(libbpf_num_possible_cpus());
+ if (!rec->exception[i].cpu) {
+ pr_warn("Failed to allocate exception per-CPU array for \"%s\" case\n",
+ xdp_action2str(i));
+ while (i--)
+ free(rec->exception[i].cpu);
+ goto end_kthread;
+ }
+ }
+ }
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT) {
+ rec->devmap_xmit.cpu = alloc_records(libbpf_num_possible_cpus());
+ if (!rec->devmap_xmit.cpu) {
+ pr_warn("Failed to allocate devmap_xmit per-CPU array\n");
+ goto end_exception;
+ }
+ }
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ hash_init(rec->xmit_map);
+ if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT) {
+ for (i = 0; i < sample_n_cpus; i++) {
+ rec->enq[i].cpu = alloc_records(libbpf_num_possible_cpus());
+ if (!rec->enq[i].cpu) {
+ pr_warn("Failed to allocate enqueue per-CPU array for CPU %d\n", i);
+ while (i--)
+ free(rec->enq[i].cpu);
+ goto end_devmap_xmit;
+ }
+ }
+ }
+
+ return rec;
+
+end_devmap_xmit:
+ free(rec->devmap_xmit.cpu);
+end_exception:
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ free(rec->exception[i].cpu);
+end_kthread:
+ free(rec->kthread.cpu);
+end_redir:
+ for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++)
+ free(rec->redir_err[i].cpu);
+end_rxq_cnt:
+ free(rec->rxq_cnt.rxq);
+end_rx_cnt:
+ free(rec->rx_cnt.cpu);
+end_rec:
+ free(rec);
+ return NULL;
+}
+
+static void free_stats_record(struct stats_record *r)
+{
+ struct hlist_node *tmp;
+ struct map_entry *e;
+ unsigned int bkt;
+ int i;
+
+ for (i = 0; i < sample_n_cpus; i++)
+ free(r->enq[i].cpu);
+ hash_for_each_safe(r->xmit_map, bkt, tmp, e, node) {
+ hash_del(&e->node);
+ free(e->val.cpu);
+ free(e);
+ }
+ free(r->devmap_xmit.cpu);
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ free(r->exception[i].cpu);
+ free(r->kthread.cpu);
+ for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++)
+ free(r->redir_err[i].cpu);
+ free(r->rx_cnt.cpu);
+ free(r);
+}
+
+static double calc_period(struct record *r, struct record *p)
+{
+ double period_ = 0;
+ __u64 period = 0;
+
+ period = r->timestamp - p->timestamp;
+ if (period > 0)
+ period_ = ((double)period / NANOSEC_PER_SEC);
+
+ return period_;
+}
+
+static double sample_round(double val)
+{
+ if (val - floor(val) < 0.5)
+ return floor(val);
+ return ceil(val);
+}
+
+static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->processed - p->processed;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->dropped - p->dropped;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static __u64 calc_errs_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->issue - p->issue;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static __u64 calc_info_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->info - p->info;
+ pps = sample_round(packets / period_);
+ }
+ return pps;
+}
+
+static void calc_xdp_pps(struct datarec *r, struct datarec *p, double *xdp_pass,
+ double *xdp_drop, double *xdp_redirect, double period_)
+{
+ *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0;
+ if (period_ > 0) {
+ *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_;
+ *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_;
+ *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_;
+ }
+}
+
+static void stats_get_rx_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ int nr_cpus, struct sample_output *out)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i;
+
+ rec = &stats_rec->rx_cnt;
+ prev = &stats_prev->rx_cnt;
+ t = calc_period(rec, prev);
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PPS(pps), DROP(drop), ERR(err));
+ }
+
+ if (out) {
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ out->rx_cnt.pps = pps;
+ out->rx_cnt.drop = drop;
+ out->rx_cnt.err = err;
+ out->totals.rx += pps;
+ out->totals.drop += drop;
+ out->totals.err += err;
+ }
+}
+
+static void stats_get_rxq_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i;
+
+ rec = &stats_rec->rxq_cnt;
+ prev = &stats_prev->rxq_cnt;
+ t = calc_period(rec, prev);
+
+ print_default("\n");
+ for (i = 0; i < sample_n_rxqs; i++) {
+ struct datarec *r = &rec->rxq[i];
+ struct datarec *p = &prev->rxq[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "rxq:%d", i);
+ print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PPS(pps), DROP(drop), ERR(err));
+ }
+}
+
+static void stats_get_cpumap_enqueue(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ int nr_cpus)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i, to_cpu;
+
+ /* cpumap enqueue stats */
+ for (to_cpu = 0; to_cpu < sample_n_cpus; to_cpu++) {
+ rec = &stats_rec->enq[to_cpu];
+ prev = &stats_prev->enq[to_cpu];
+ t = calc_period(rec, prev);
+
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ if (pps > 0 || drop > 0) {
+ char str[64];
+
+ snprintf(str, sizeof(str), "enqueue to cpu %d", to_cpu);
+
+ if (err > 0)
+ err = pps / err; /* calc average bulk size */
+
+ print_err(drop,
+ " %-20s " FMT_COLUMNf FMT_COLUMNf __COLUMN(
+ ".2f") "\n",
+ str, PPS(pps), DROP(drop), err, "bulk-avg");
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d->%d", i, to_cpu);
+ if (err > 0)
+ err = pps / err; /* calc average bulk size */
+ print_default(
+ " %-18s " FMT_COLUMNf FMT_COLUMNf __COLUMN(
+ ".2f") "\n",
+ str, PPS(pps), DROP(drop), err, "bulk-avg");
+ }
+ }
+}
+
+static void stats_get_cpumap_remote(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ int nr_cpus)
+{
+ double xdp_pass, xdp_drop, xdp_redirect;
+ struct record *rec, *prev;
+ double t;
+ int i;
+
+ rec = &stats_rec->kthread;
+ prev = &stats_prev->kthread;
+ t = calc_period(rec, prev);
+
+ calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop,
+ &xdp_redirect, t);
+ if (xdp_pass || xdp_drop || xdp_redirect) {
+ print_err(xdp_drop,
+ " %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n",
+ "xdp_stats", PASS(xdp_pass), DROP(xdp_drop),
+ REDIR(xdp_redirect));
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ calc_xdp_pps(r, p, &xdp_pass, &xdp_drop, &xdp_redirect, t);
+ if (!xdp_pass && !xdp_drop && !xdp_redirect)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-16s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PASS(xdp_pass), DROP(xdp_drop),
+ REDIR(xdp_redirect));
+ }
+}
+
+static void stats_get_cpumap_kthread(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ int nr_cpus)
+{
+ struct record *rec, *prev;
+ double t, pps, drop, err;
+ int i;
+
+ rec = &stats_rec->kthread;
+ prev = &stats_prev->kthread;
+ t = calc_period(rec, prev);
+
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ print_err(drop, " %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n",
+ pps ? "kthread total" : "kthread", PPS(pps), DROP(drop), err,
+ "sched");
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ "\n",
+ str, PPS(pps), DROP(drop), err, "sched");
+ }
+}
+
+static void stats_get_redirect_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ int nr_cpus,
+ struct sample_output *out)
+{
+ struct record *rec, *prev;
+ double t, pps;
+ int i;
+
+ rec = &stats_rec->redir_err[0];
+ prev = &stats_prev->redir_err[0];
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ if (!pps)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-18s " FMT_COLUMNf "\n", str, REDIR(pps));
+ }
+
+ if (out) {
+ pps = calc_pps(&rec->total, &prev->total, t);
+ out->redir_cnt.suc = pps;
+ out->totals.redir += pps;
+ }
+}
+
+static void stats_get_redirect_err_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ int nr_cpus,
+ struct sample_output *out)
+{
+ struct record *rec, *prev;
+ double t, drop, sum = 0;
+ int rec_i, i;
+
+ for (rec_i = 1; rec_i < XDP_REDIRECT_ERR_MAX; rec_i++) {
+ char str[64];
+
+ rec = &stats_rec->redir_err[rec_i];
+ prev = &stats_prev->redir_err[rec_i];
+ t = calc_period(rec, prev);
+
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ if (drop > 0 && !out) {
+ snprintf(str, sizeof(str),
+ sample_log_level & LL_DEFAULT ? "%s total" :
+ "%s",
+ xdp_redirect_err_names[rec_i]);
+ print_err(drop, " %-18s " FMT_COLUMNf "\n", str,
+ ERR(drop));
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ double drop;
+
+ drop = calc_drop_pps(r, p, t);
+ if (!drop)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-16s" FMT_COLUMNf "\n", str,
+ ERR(drop));
+ }
+
+ sum += drop;
+ }
+
+ if (out) {
+ out->redir_cnt.err = sum;
+ out->totals.err += sum;
+ }
+}
+
+static void stats_get_exception_cnt(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ int nr_cpus,
+ struct sample_output *out)
+{
+ double t, drop, sum = 0;
+ struct record *rec, *prev;
+ int rec_i, i;
+
+ for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) {
+ rec = &stats_rec->exception[rec_i];
+ prev = &stats_prev->exception[rec_i];
+ t = calc_period(rec, prev);
+
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ /* Fold out errors after heading */
+ sum += drop;
+
+ if (drop > 0 && !out) {
+ print_always(" %-18s " FMT_COLUMNf "\n",
+ xdp_action2str(rec_i), ERR(drop));
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+ double drop;
+
+ drop = calc_drop_pps(r, p, t);
+ if (!drop)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ print_default(" %-16s" FMT_COLUMNf "\n",
+ str, ERR(drop));
+ }
+ }
+ }
+
+ if (out) {
+ out->except_cnt.hits = sum;
+ out->totals.err += sum;
+ }
+}
+
+static void stats_get_devmap_xmit(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ int nr_cpus,
+ struct sample_output *out)
+{
+ double pps, drop, info, err;
+ struct record *rec, *prev;
+ double t;
+ int i;
+
+ rec = &stats_rec->devmap_xmit;
+ prev = &stats_prev->devmap_xmit;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+ char str[64];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ info = calc_info_pps(r, p, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+ print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ __COLUMN(".2f") "\n",
+ str, XMIT(pps), DROP(drop), err, "drv_err/s",
+ info, "bulk-avg");
+ }
+ if (out) {
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ info = calc_info_pps(&rec->total, &prev->total, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+
+ out->xmit_cnt.pps = pps;
+ out->xmit_cnt.drop = drop;
+ out->xmit_cnt.bavg = info;
+ out->xmit_cnt.err = err;
+ out->totals.xmit += pps;
+ out->totals.drop_xmit += drop;
+ out->totals.err += err;
+ }
+}
+
+static void stats_get_devmap_xmit_multi(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ int nr_cpus,
+ struct sample_output *out)
+{
+ double pps, drop, info, err;
+ struct map_entry *entry;
+ struct record *r, *p;
+ unsigned int bkt;
+ double t;
+
+ hash_for_each(stats_rec->xmit_map, bkt, entry, node) {
+ struct map_entry *e, *x = NULL;
+ char ifname_from[IFNAMSIZ];
+ char ifname_to[IFNAMSIZ];
+ const char *fstr, *tstr;
+ unsigned long prev_time;
+ struct record beg = {};
+ __u32 from_idx, to_idx;
+ char str[128];
+ __u64 pair;
+ int i;
+
+ prev_time = sample_interval * NANOSEC_PER_SEC;
+
+ pair = entry->pair;
+ from_idx = pair >> 32;
+ to_idx = pair & 0xFFFFFFFF;
+
+ r = &entry->val;
+ beg.timestamp = r->timestamp - prev_time;
+
+ /* Find matching entry from stats_prev map */
+ hash_for_each_possible(stats_prev->xmit_map, e, node, pair) {
+ if (e->pair == pair) {
+ x = e;
+ break;
+ }
+ }
+ if (x)
+ p = &x->val;
+ else
+ p = &beg;
+ t = calc_period(r, p);
+ pps = calc_pps(&r->total, &p->total, t);
+ drop = calc_drop_pps(&r->total, &p->total, t);
+ info = calc_info_pps(&r->total, &p->total, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+ err = calc_errs_pps(&r->total, &p->total, t);
+
+ if (out) {
+ /* We are responsible for filling out totals */
+ out->totals.xmit += pps;
+ out->totals.drop_xmit += drop;
+ out->totals.err += err;
+ continue;
+ }
+
+ fstr = tstr = NULL;
+ if (if_indextoname(from_idx, ifname_from))
+ fstr = ifname_from;
+ if (if_indextoname(to_idx, ifname_to))
+ tstr = ifname_to;
+
+ snprintf(str, sizeof(str), "xmit %s->%s", fstr ?: "?",
+ tstr ?: "?");
+ /* Skip idle streams of redirection */
+ if (pps || drop || err) {
+ print_err(drop * !(sample_mask & SAMPLE_DROP_OK),
+ " %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ __COLUMN(".2f") "\n", str, XMIT(pps), DROP(drop),
+ err, "drv_err/s", info, "bulk-avg");
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *rc = &r->cpu[i];
+ struct datarec *pc, p_beg = {};
+ char str[64];
+
+ pc = p == &beg ? &p_beg : &p->cpu[i];
+
+ pps = calc_pps(rc, pc, t);
+ drop = calc_drop_pps(rc, pc, t);
+ err = calc_errs_pps(rc, pc, t);
+
+ if (!pps && !drop && !err)
+ continue;
+
+ snprintf(str, sizeof(str), "cpu:%d", i);
+ info = calc_info_pps(rc, pc, t);
+ if (info > 0)
+ info = (pps + drop) / info; /* calc avg bulk */
+
+ print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf
+ __COLUMN(".2f") "\n", str, XMIT(pps),
+ DROP(drop), err, "drv_err/s", info, "bulk-avg");
+ }
+ }
+}
+
+static void stats_print(const char *prefix, int mask, struct stats_record *r,
+ struct stats_record *p, struct sample_output *out)
+{
+ int nr_cpus = libbpf_num_possible_cpus();
+ const char *str;
+
+ print_always("%-23s", prefix ?: "Summary");
+ if (mask & SAMPLE_RX_CNT)
+ print_always(FMT_COLUMNl, RX(out->totals.rx));
+ if (mask & SAMPLE_REDIRECT_CNT)
+ print_always(FMT_COLUMNl, REDIR(out->totals.redir));
+ printf(FMT_COLUMNl,
+ out->totals.err + ((out->totals.drop_xmit + out->totals.drop) * !(mask & SAMPLE_DROP_OK)),
+ (mask & SAMPLE_DROP_OK) ? "err/s" : "err,drop/s");
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT ||
+ mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ printf(FMT_COLUMNl, XMIT(out->totals.xmit));
+ printf("\n");
+
+ if (mask & SAMPLE_RX_CNT) {
+ str = (sample_log_level & LL_DEFAULT) && out->rx_cnt.pps ?
+ "receive total" :
+ "receive";
+ print_err((out->rx_cnt.err || (out->rx_cnt.drop && !(mask & SAMPLE_DROP_OK))),
+ " %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl "\n",
+ str, PPS(out->rx_cnt.pps), DROP(out->rx_cnt.drop),
+ ERR(out->rx_cnt.err));
+
+ stats_get_rx_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_RXQ_STATS)
+ stats_get_rxq_cnt(r, p);
+
+ if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT)
+ stats_get_cpumap_enqueue(r, p, nr_cpus);
+
+ if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) {
+ stats_get_cpumap_kthread(r, p, nr_cpus);
+ stats_get_cpumap_remote(r, p, nr_cpus);
+ }
+
+ if (mask & SAMPLE_REDIRECT_CNT) {
+ str = out->redir_cnt.suc ? "redirect total" : "redirect";
+ print_default(" %-20s " FMT_COLUMNl "\n", str,
+ REDIR(out->redir_cnt.suc));
+
+ stats_get_redirect_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_REDIRECT_ERR_CNT) {
+ str = (sample_log_level & LL_DEFAULT) && out->redir_cnt.err ?
+ "redirect_err total" :
+ "redirect_err";
+ print_err(out->redir_cnt.err, " %-20s " FMT_COLUMNl "\n", str,
+ ERR(out->redir_cnt.err));
+
+ stats_get_redirect_err_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_EXCEPTION_CNT) {
+ str = out->except_cnt.hits ? "xdp_exception total" :
+ "xdp_exception";
+
+ print_err(out->except_cnt.hits, " %-20s " FMT_COLUMNl "\n", str,
+ HITS(out->except_cnt.hits));
+
+ stats_get_exception_cnt(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT) {
+ str = (sample_log_level & LL_DEFAULT) && out->xmit_cnt.pps ?
+ "devmap_xmit total" :
+ "devmap_xmit";
+
+ print_err(out->xmit_cnt.err || out->xmit_cnt.drop,
+ " %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl
+ __COLUMN(".2f") "\n",
+ str, XMIT(out->xmit_cnt.pps),
+ DROP(out->xmit_cnt.drop), (uint64_t)out->xmit_cnt.err,
+ "drv_err/s", out->xmit_cnt.bavg, "bulk-avg");
+
+ stats_get_devmap_xmit(r, p, nr_cpus, NULL);
+ }
+
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ stats_get_devmap_xmit_multi(r, p, nr_cpus, NULL);
+
+ if (sample_log_level & LL_DEFAULT ||
+ ((sample_log_level & LL_SIMPLE) && sample_err_exp)) {
+ sample_err_exp = false;
+ printf("\n");
+ }
+}
+
+static int get_num_rxqs(const char *ifname)
+{
+ struct ethtool_channels ch = {
+ .cmd = ETHTOOL_GCHANNELS,
+ };
+
+ struct ifreq ifr = {
+ .ifr_data = (void *)&ch,
+ };
+ int fd, ret;
+
+ if (!ifname || strlen(ifname) > sizeof(ifr.ifr_name) - 1)
+ return 0;
+
+ strcpy(ifr.ifr_name, ifname);
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (fd < 0) {
+ ret = -errno;
+ pr_warn("Couldn't open socket socket: %s\n", strerror(-ret));
+ return ret;
+ }
+
+ ret = ioctl(fd, SIOCETHTOOL, &ifr);
+ if (ret < 0) {
+ ret = -errno;
+ pr_debug("Error in ethtool ioctl: %s\n", strerror(-ret));
+ goto out;
+ }
+
+ ret = ch.rx_count + ch.combined_count;
+out:
+ close(fd);
+ pr_debug("Got %d queues for ifname %s\n", ret, ifname);
+ return ret;
+}
+
+
+int sample_setup_maps(struct bpf_map **maps, const char *ifname)
+{
+ sample_n_cpus = libbpf_num_possible_cpus();
+
+ for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) {
+ sample_map[i] = maps[i];
+ int n_cpus;
+
+ switch (i) {
+ case MAP_RX:
+ case MAP_CPUMAP_KTHREAD:
+ case MAP_DEVMAP_XMIT:
+ sample_map_count[i] = sample_n_cpus;
+ break;
+ case MAP_RXQ:
+ sample_n_rxqs = get_num_rxqs(ifname);
+ sample_map_count[i] = sample_n_rxqs > 0 ? sample_n_rxqs : 1;
+ break;
+ case MAP_REDIRECT_ERR:
+ sample_map_count[i] =
+ XDP_REDIRECT_ERR_MAX * sample_n_cpus;
+ break;
+ case MAP_EXCEPTION:
+ sample_map_count[i] = XDP_ACTION_MAX * sample_n_cpus;
+ break;
+ case MAP_CPUMAP_ENQUEUE:
+ if (__builtin_mul_overflow(sample_n_cpus, sample_n_cpus, &n_cpus))
+ return -EOVERFLOW;
+ sample_map_count[i] = n_cpus;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (bpf_map__set_max_entries(sample_map[i], sample_map_count[i]) < 0)
+ return -errno;
+ }
+ sample_map[MAP_DEVMAP_XMIT_MULTI] = maps[MAP_DEVMAP_XMIT_MULTI];
+ return 0;
+}
+
+static int sample_setup_maps_mappings(void)
+{
+ for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) {
+ size_t size = sample_map_count[i] * sizeof(struct datarec);
+
+ sample_mmap[i] = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, bpf_map__fd(sample_map[i]), 0);
+ if (sample_mmap[i] == MAP_FAILED)
+ return -errno;
+ }
+ return 0;
+}
+
+int __sample_init(int mask, int ifindex_from, int ifindex_to)
+{
+ sigset_t st;
+
+ if (mask & SAMPLE_RXQ_STATS && sample_n_rxqs <= 0) {
+ pr_warn("Couldn't retrieve the number of RXQs, so can't enable RXQ stats\n");
+ return -EINVAL;
+ }
+
+ sigemptyset(&st);
+ sigaddset(&st, SIGQUIT);
+ sigaddset(&st, SIGINT);
+ sigaddset(&st, SIGTERM);
+
+ if (sigprocmask(SIG_BLOCK, &st, NULL) < 0)
+ return -errno;
+
+ sample_sig_fd = signalfd(-1, &st, SFD_CLOEXEC | SFD_NONBLOCK);
+ if (sample_sig_fd < 0)
+ return -errno;
+
+ sample_mask = mask;
+ ifindex[0] = ifindex_from;
+ ifindex[1] = ifindex_to;
+
+ return sample_setup_maps_mappings();
+}
+
+static void sample_summary_print(void)
+{
+ double num = sample_out.rx_cnt.num;
+
+ if (sample_out.totals.rx) {
+ double pkts = sample_out.totals.rx;
+
+ print_always(" Packets received : %'-10" PRIu64 "\n",
+ (uint64_t)sample_out.totals.rx);
+ print_always(" Average packets/s : %'-10.0f\n",
+ sample_round(pkts / num));
+ }
+ if (sample_out.totals.redir) {
+ double pkts = sample_out.totals.redir;
+
+ print_always(" Packets redirected : %'-10" PRIu64 "\n",
+ sample_out.totals.redir);
+ print_always(" Average redir/s : %'-10.0f\n",
+ sample_round(pkts / num));
+ }
+ if (sample_out.totals.drop)
+ print_always(" Rx dropped : %'-10" PRIu64 "\n",
+ sample_out.totals.drop);
+ if (sample_out.totals.drop_xmit)
+ print_always(" Tx dropped : %'-10" PRIu64 "\n",
+ sample_out.totals.drop_xmit);
+ if (sample_out.totals.err)
+ print_always(" Errors recorded : %'-10" PRIu64 "\n",
+ sample_out.totals.err);
+ if (sample_out.totals.xmit) {
+ double pkts = sample_out.totals.xmit;
+
+ print_always(" Packets transmitted : %'-10" PRIu64 "\n",
+ sample_out.totals.xmit);
+ print_always(" Average transmit/s : %'-10.0f\n",
+ sample_round(pkts / num));
+ }
+}
+
+void sample_teardown(void)
+{
+ size_t size;
+
+ for (int i = 0; i < NUM_MAP; i++) {
+ size = sample_map_count[i] * sizeof(**sample_mmap);
+ munmap(sample_mmap[i], size);
+ }
+ sample_summary_print();
+ close(sample_sig_fd);
+}
+
+static int sample_stats_collect(struct stats_record *rec)
+{
+ int i;
+
+ if (sample_mask & SAMPLE_RX_CNT)
+ map_collect_percpu(sample_mmap[MAP_RX], &rec->rx_cnt);
+
+ if (sample_mask & SAMPLE_RXQ_STATS)
+ map_collect_rxqs(sample_mmap[MAP_RXQ], &rec->rxq_cnt);
+
+ if (sample_mask & SAMPLE_REDIRECT_CNT)
+ map_collect_percpu(sample_mmap[MAP_REDIRECT_ERR], &rec->redir_err[0]);
+
+ if (sample_mask & SAMPLE_REDIRECT_ERR_CNT) {
+ for (i = 1; i < XDP_REDIRECT_ERR_MAX; i++)
+ map_collect_percpu(&sample_mmap[MAP_REDIRECT_ERR][i * sample_n_cpus],
+ &rec->redir_err[i]);
+ }
+
+ if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT)
+ for (i = 0; i < sample_n_cpus; i++)
+ map_collect_percpu(&sample_mmap[MAP_CPUMAP_ENQUEUE][i * sample_n_cpus],
+ &rec->enq[i]);
+
+ if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT)
+ map_collect_percpu(sample_mmap[MAP_CPUMAP_KTHREAD],
+ &rec->kthread);
+
+ if (sample_mask & SAMPLE_EXCEPTION_CNT)
+ for (i = 0; i < XDP_ACTION_MAX; i++)
+ map_collect_percpu(&sample_mmap[MAP_EXCEPTION][i * sample_n_cpus],
+ &rec->exception[i]);
+
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT)
+ map_collect_percpu(sample_mmap[MAP_DEVMAP_XMIT], &rec->devmap_xmit);
+
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) {
+ if (map_collect_percpu_devmap(bpf_map__fd(sample_map[MAP_DEVMAP_XMIT_MULTI]), rec) < 0)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void sample_summary_update(struct sample_output *out)
+{
+ sample_out.totals.rx += out->totals.rx;
+ sample_out.totals.redir += out->totals.redir;
+ sample_out.totals.drop += out->totals.drop;
+ sample_out.totals.drop_xmit += out->totals.drop_xmit;
+ sample_out.totals.err += out->totals.err;
+ sample_out.totals.xmit += out->totals.xmit;
+ sample_out.rx_cnt.num++;
+}
+
+static void sample_stats_print(int mask, struct stats_record *cur,
+ struct stats_record *prev, char *prog_name)
+{
+ struct sample_output out = {};
+
+ if (mask & SAMPLE_RX_CNT)
+ stats_get_rx_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_REDIRECT_CNT)
+ stats_get_redirect_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_REDIRECT_ERR_CNT)
+ stats_get_redirect_err_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_EXCEPTION_CNT)
+ stats_get_exception_cnt(cur, prev, 0, &out);
+ if (mask & SAMPLE_DEVMAP_XMIT_CNT)
+ stats_get_devmap_xmit(cur, prev, 0, &out);
+ else if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
+ stats_get_devmap_xmit_multi(cur, prev, 0, &out);
+ sample_summary_update(&out);
+
+ stats_print(prog_name, mask, cur, prev, &out);
+}
+
+void sample_switch_mode(void)
+{
+ sample_log_level ^= LL_DEBUG - 1;
+}
+
+static int sample_signal_cb(void)
+{
+ struct signalfd_siginfo si;
+ int r;
+
+ r = read(sample_sig_fd, &si, sizeof(si));
+ if (r < 0)
+ return -errno;
+
+ switch (si.ssi_signo) {
+ case SIGQUIT:
+ sample_switch_mode();
+ printf("\n");
+ break;
+ default:
+ printf("\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Pointer swap trick */
+static void swap(struct stats_record **a, struct stats_record **b)
+{
+ struct stats_record *tmp;
+
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
+static int sample_timer_cb(int timerfd, struct stats_record **rec,
+ struct stats_record **prev)
+{
+ char line[64] = "Summary";
+ int ret;
+ __u64 t;
+
+ ret = read(timerfd, &t, sizeof(t));
+ if (ret < 0)
+ return -errno;
+
+ swap(prev, rec);
+ ret = sample_stats_collect(*rec);
+ if (ret < 0)
+ return ret;
+
+ if (ifindex[0] && !(sample_mask & SAMPLE_SKIP_HEADING)) {
+ char fi[IFNAMSIZ];
+ char to[IFNAMSIZ];
+ const char *f, *t;
+
+ f = t = NULL;
+ if (if_indextoname(ifindex[0], fi))
+ f = fi;
+ if (if_indextoname(ifindex[1], to))
+ t = to;
+
+ snprintf(line, sizeof(line), "%s->%s", f ?: "?", t ?: "?");
+ }
+
+ sample_stats_print(sample_mask, *rec, *prev, line);
+ return 0;
+}
+
+int sample_run(int interval, void (*post_cb)(void *), void *ctx)
+{
+ struct timespec ts = { interval, 0 };
+ struct itimerspec its = { ts, ts };
+ struct stats_record *rec, *prev;
+ struct pollfd pfd[2] = {};
+ bool imm_exit = false;
+ const char *envval;
+ int timerfd, ret;
+
+ envval = secure_getenv("XDP_SAMPLE_IMMEDIATE_EXIT");
+ if (envval && envval[0] == '1' && envval[1] == '\0') {
+ pr_debug("XDP_SAMPLE_IMMEDIATE_EXIT envvar set, exiting immediately after setup\n");
+ imm_exit = true;
+ }
+
+ if (!interval) {
+ pr_warn("Incorrect interval 0\n");
+ return -EINVAL;
+ }
+ sample_interval = interval;
+ /* Pretty print numbers */
+ setlocale(LC_NUMERIC, "en_US.UTF-8");
+
+ timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK);
+ if (timerfd < 0)
+ return -errno;
+ timerfd_settime(timerfd, 0, &its, NULL);
+
+ pfd[0].fd = sample_sig_fd;
+ pfd[0].events = POLLIN;
+
+ pfd[1].fd = timerfd;
+ pfd[1].events = POLLIN;
+
+ ret = -ENOMEM;
+ rec = alloc_stats_record();
+ if (!rec)
+ goto end;
+ prev = alloc_stats_record();
+ if (!prev)
+ goto end_rec;
+
+ ret = sample_stats_collect(rec);
+ if (ret < 0)
+ goto end_rec_prev;
+
+ if (imm_exit)
+ goto end_rec_prev;
+
+ for (;;) {
+ ret = poll(pfd, 2, -1);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ else
+ break;
+ }
+
+ if (pfd[0].revents & POLLIN)
+ ret = sample_signal_cb();
+ else if (pfd[1].revents & POLLIN)
+ ret = sample_timer_cb(timerfd, &rec, &prev);
+
+ if (ret)
+ break;
+
+ if (post_cb)
+ post_cb(ctx);
+ }
+
+end_rec_prev:
+ free_stats_record(prev);
+end_rec:
+ free_stats_record(rec);
+end:
+ close(timerfd);
+
+ return ret;
+}
+
+const char *get_driver_name(int ifindex)
+{
+ struct ethtool_drvinfo drv = {};
+ char ifname[IF_NAMESIZE];
+ static char drvname[32];
+ struct ifreq ifr = {};
+ int fd, r = 0;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return "[error]";
+
+ if (!if_indextoname(ifindex, ifname))
+ goto end;
+
+ drv.cmd = ETHTOOL_GDRVINFO;
+ safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+ ifr.ifr_data = (void *)&drv;
+
+ r = ioctl(fd, SIOCETHTOOL, &ifr);
+ if (r)
+ goto end;
+
+ safe_strncpy(drvname, drv.driver, sizeof(drvname));
+
+ close(fd);
+ return drvname;
+
+end:
+ r = errno;
+ close(fd);
+ return r == EOPNOTSUPP ? "loopback" : "[error]";
+}
+
+int get_mac_addr(int ifindex, void *mac_addr)
+{
+ char ifname[IF_NAMESIZE];
+ struct ifreq ifr = {};
+ int fd, r;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (!if_indextoname(ifindex, ifname)) {
+ r = -errno;
+ goto end;
+ }
+
+ safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+ r = ioctl(fd, SIOCGIFHWADDR, &ifr);
+ if (r) {
+ r = -errno;
+ goto end;
+ }
+
+ memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+
+end:
+ close(fd);
+ return r;
+}
diff --git a/lib/util/xdp_sample.h b/lib/util/xdp_sample.h
new file mode 100644
index 0000000..701f83f
--- /dev/null
+++ b/lib/util/xdp_sample.h
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef XDP_SAMPLE_USER_H
+#define XDP_SAMPLE_USER_H
+
+#include <bpf/libbpf.h>
+#include <getopt.h>
+
+#include <xdp/xdp_sample_shared.h>
+
+enum stats_mask {
+ _SAMPLE_REDIRECT_MAP = 1U << 0,
+ SAMPLE_RX_CNT = 1U << 1,
+ SAMPLE_REDIRECT_ERR_CNT = 1U << 2,
+ SAMPLE_CPUMAP_ENQUEUE_CNT = 1U << 3,
+ SAMPLE_CPUMAP_KTHREAD_CNT = 1U << 4,
+ SAMPLE_EXCEPTION_CNT = 1U << 5,
+ SAMPLE_DEVMAP_XMIT_CNT = 1U << 6,
+ SAMPLE_REDIRECT_CNT = 1U << 7,
+ SAMPLE_REDIRECT_MAP_CNT = SAMPLE_REDIRECT_CNT | _SAMPLE_REDIRECT_MAP,
+ SAMPLE_REDIRECT_ERR_MAP_CNT = SAMPLE_REDIRECT_ERR_CNT | _SAMPLE_REDIRECT_MAP,
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI = 1U << 8,
+ SAMPLE_SKIP_HEADING = 1U << 9,
+ SAMPLE_RXQ_STATS = 1U << 10,
+ SAMPLE_DROP_OK = 1U << 11,
+};
+
+enum sample_compat {
+ SAMPLE_COMPAT_CPUMAP_KTHREAD,
+ __SAMPLE_COMPAT_MAX
+};
+#define SAMPLE_COMPAT_MAX __SAMPLE_COMPAT_MAX
+
+/* Exit return codes */
+#define EXIT_OK 0
+#define EXIT_FAIL 1
+#define EXIT_FAIL_OPTION 2
+#define EXIT_FAIL_XDP 3
+#define EXIT_FAIL_BPF 4
+#define EXIT_FAIL_MEM 5
+
+int sample_setup_maps(struct bpf_map **maps, const char *ifname);
+int __sample_init(int mask, int ifindex_from, int ifindex_to);
+void sample_teardown(void);
+int sample_run(int interval, void (*post_cb)(void *), void *ctx);
+bool sample_is_compat(enum sample_compat compat_value);
+bool sample_probe_cpumap_compat(void);
+void sample_check_cpumap_compat(struct bpf_program *prog,
+ struct bpf_program *prog_compat);
+
+void sample_switch_mode(void);
+
+const char *get_driver_name(int ifindex);
+int get_mac_addr(int ifindex, void *mac_addr);
+
+#pragma GCC diagnostic push
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Wstringop-truncation"
+#endif
+__attribute__((unused))
+static inline char *safe_strncpy(char *dst, const char *src, size_t size)
+{
+ if (!size)
+ return dst;
+ strncpy(dst, src, size - 1);
+ dst[size - 1] = '\0';
+ return dst;
+}
+#pragma GCC diagnostic pop
+
+#define __attach_tp(name) \
+ ({ \
+ if (bpf_program__type(skel->progs.name) != BPF_PROG_TYPE_TRACING)\
+ return -EINVAL; \
+ skel->links.name = bpf_program__attach(skel->progs.name); \
+ if (!skel->links.name) \
+ return -errno; \
+ })
+
+#define __attach_tp_compat(name, name_compat, _compat) \
+ ({ \
+ if (sample_is_compat(SAMPLE_COMPAT_ ## _compat)) \
+ __attach_tp(name); \
+ else \
+ __attach_tp(name_compat); \
+ })
+
+#define sample_init_pre_load(skel, ifname) \
+ ({ \
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus(); \
+ sample_check_cpumap_compat(skel->progs.tp_xdp_cpumap_kthread, \
+ skel->progs.tp_xdp_cpumap_compat); \
+ sample_setup_maps((struct bpf_map *[]){ \
+ skel->maps.rx_cnt, skel->maps.rxq_cnt, \
+ skel->maps.redir_err_cnt, \
+ skel->maps.cpumap_enqueue_cnt, \
+ skel->maps.cpumap_kthread_cnt, \
+ skel->maps.exception_cnt, skel->maps.devmap_xmit_cnt, \
+ skel->maps.devmap_xmit_cnt_multi}, ifname); \
+ })
+
+#define DEFINE_SAMPLE_INIT(name) \
+ static int sample_init(struct name *skel, int sample_mask, \
+ int ifindex_from, int ifindex_to) \
+ { \
+ int ret; \
+ ret = __sample_init(sample_mask, ifindex_from, \
+ ifindex_to); \
+ if (ret < 0) \
+ return ret; \
+ if (sample_mask & SAMPLE_REDIRECT_MAP_CNT) \
+ __attach_tp(tp_xdp_redirect_map); \
+ if (sample_mask & SAMPLE_REDIRECT_CNT) \
+ __attach_tp(tp_xdp_redirect); \
+ if (sample_mask & SAMPLE_REDIRECT_ERR_MAP_CNT) \
+ __attach_tp(tp_xdp_redirect_map_err); \
+ if (sample_mask & SAMPLE_REDIRECT_ERR_CNT) \
+ __attach_tp(tp_xdp_redirect_err); \
+ if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT) \
+ __attach_tp(tp_xdp_cpumap_enqueue); \
+ if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT) \
+ __attach_tp_compat(tp_xdp_cpumap_kthread, \
+ tp_xdp_cpumap_compat, \
+ CPUMAP_KTHREAD); \
+ if (sample_mask & SAMPLE_EXCEPTION_CNT) \
+ __attach_tp(tp_xdp_exception); \
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT) \
+ __attach_tp(tp_xdp_devmap_xmit); \
+ if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) \
+ __attach_tp(tp_xdp_devmap_xmit_multi); \
+ return 0; \
+ }
+
+#endif
diff --git a/lib/util/xpcapng.c b/lib/util/xpcapng.c
new file mode 100644
index 0000000..e453b88
--- /dev/null
+++ b/lib/util/xpcapng.c
@@ -0,0 +1,635 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Description:
+ * Simple PcapNG library developed from scratch as no library existed that
+ * met the requirements for xdpdump. It can also be used by other XDP
+ * applications that would like to capture packets for debugging purposes.
+ */
+
+/*****************************************************************************
+ * Include files
+ *****************************************************************************/
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/uio.h>
+
+#include "xpcapng.h"
+
+/*****************************************************************************
+ * Simple roundup() macro
+ *****************************************************************************/
+#ifndef roundup
+#define roundup(x, y) ( \
+{ \
+ typeof(y) __y = y; \
+ (((x) + (__y - 1)) / __y) * __y; \
+} \
+)
+#endif
+
+/*****************************************************************************
+ * pcapng_dumper structure
+ *****************************************************************************/
+struct xpcapng_dumper {
+ int pd_fd;
+ uint32_t pd_interfaces;
+};
+
+/*****************************************************************************
+ * general pcapng block and option definitions
+ *****************************************************************************/
+enum pcapng_block_types {
+ PCAPNG_SECTION_BLOCK = 0x0A0D0D0A,
+ PCAPNG_INTERFACE_BLOCK = 1,
+ PCAPNG_PACKET_BLOCK,
+ PCAPNG_SIMPLE_PACKET_BLOCK,
+ PCAPNG_NAME_RESOLUTION_BLOCK,
+ PCAPNG_INTERFACE_STATS_BLOCK,
+ PCAPNG_ENHANCED_PACKET_BLOCK
+};
+
+struct pcapng_option {
+ uint16_t po_type;
+ uint16_t po_length;
+ uint8_t po_data[];
+} __attribute__((__packed__));
+
+enum pcapng_opt {
+ PCAPNG_OPT_END = 0,
+ PCAPNG_OPT_COMMENT = 1,
+ PCAPNG_OPT_CUSTOME_2988 = 2988,
+ PCAPNG_OPT_CUSTOME_2989 = 2989,
+ PCAPNG_OPT_CUSTOME_19372 = 19372,
+ PCAPNG_OPT_CUSTOME_19373 = 29373
+};
+
+/*****************************************************************************
+ * pcapng section header block definitions
+ *****************************************************************************/
+struct pcapng_section_header_block {
+ uint32_t shb_block_type;
+ uint32_t shb_block_length;
+ uint32_t shb_byte_order_magic;
+ uint16_t shb_major_version;
+ uint16_t shb_minor_version;
+ uint64_t shb_section_length;
+ uint8_t shb_options[];
+ /* The options are followed by another:
+ * uint32_t shb_block_length;
+ */
+} __attribute__((__packed__));
+
+#define PCAPNG_BYTE_ORDER_MAGIC 0x1A2B3C4D
+#define PCAPNG_MAJOR_VERSION 1
+#define PCAPNG_MINOR_VERSION 0
+
+enum pcapng_opt_shb {
+ PCAPNG_OPT_SHB_HARDWARE = 2,
+ PCAPNG_OPT_SHB_OS,
+ PCAPNG_OPT_SHB_USERAPPL
+};
+
+/*****************************************************************************
+ * pcapng interface description block definitions
+ *****************************************************************************/
+struct pcapng_interface_description_block {
+ uint32_t idb_block_type;
+ uint32_t idb_block_length;
+ uint16_t idb_link_type;
+ uint16_t idb_reserved;
+ uint32_t idb_snap_len;
+ uint8_t idb_options[];
+ /* The options are followed by another:
+ * uint32_t idb_block_length;
+ */
+} __attribute__((__packed__));
+
+enum pcapng_opt_idb {
+ PCAPNG_OPT_IDB_IF_NAME = 2,
+ PCAPNG_OPT_IDB_IF_DESCRIPTION,
+ PCAPNG_OPT_IDB_IF_IPV4_ADDR,
+ PCAPNG_OPT_IDB_IF_IPV6_ADDR,
+ PCAPNG_OPT_IDB_IF_MAC_ADDR,
+ PCAPNG_OPT_IDB_IF_EUI_ADDR,
+ PCAPNG_OPT_IDB_IF_SPEED,
+ PCAPNG_OPT_IDB_IF_TSRESOL,
+ PCAPNG_OPT_IDB_IF_TZONE,
+ PCAPNG_OPT_IDB_IF_FILTER,
+ PCAPNG_OPT_IDB_IF_OS,
+ PCAPNG_OPT_IDB_IF_FCSLEN,
+ PCAPNG_OPT_IDB_IF_TOFFSET,
+ PCAPNG_OPT_IDB_IF_HARDWARE
+};
+
+/*****************************************************************************
+ * pcapng interface description block definitions
+ *****************************************************************************/
+struct pcapng_enhanced_packet_block {
+ uint32_t epb_block_type;
+ uint32_t epb_block_length;
+ uint32_t epb_interface_id;
+ uint32_t epb_timestamp_hi;
+ uint32_t epb_timestamp_low;
+ uint32_t epb_captured_length;
+ uint32_t epb_original_length;
+ uint8_t epb_packet_data[];
+ /* The packet data is followed by:
+ * uint8_t epb_options[];
+ * uint32_t epb_block_length;
+ */
+} __attribute__((__packed__));
+
+enum pcapng_opt_epb {
+ PCAPNG_OPT_EPB_FLAGS = 2,
+ PCAPNG_OPT_EPB_HASH,
+ PCAPNG_OPT_EPB_DROPCOUNT,
+ PCAPNG_OPT_EPB_PACKETID,
+ PCAPNG_OPT_EPB_QUEUE,
+ PCAPNG_OPT_EPB_VERDICT
+};
+
+enum pcapng_epb_vedict_type {
+ PCAPNG_EPB_VEDRICT_TYPE_HARDWARE = 0,
+ PCAPNG_EPB_VEDRICT_TYPE_EBPF_TC,
+ PCAPNG_EPB_VEDRICT_TYPE_EBPF_XDP
+};
+
+/*****************************************************************************
+ * pcapng_get_option_length()
+ *****************************************************************************/
+static size_t pcapng_get_option_length(size_t len)
+{
+ return roundup(sizeof(struct pcapng_option) + len, sizeof(uint32_t));
+}
+
+/*****************************************************************************
+ * pcapng_add_option()
+ *****************************************************************************/
+static struct pcapng_option *pcapng_add_option(struct pcapng_option *opt,
+ uint16_t type, uint16_t length,
+ const void *data)
+{
+ if (opt == NULL)
+ return NULL;
+
+ opt->po_type = type;
+ opt->po_length = length;
+ if (data)
+ memcpy(opt->po_data, data, length);
+
+ return (struct pcapng_option *)
+ ((uint8_t *)opt + pcapng_get_option_length(length));
+}
+
+/*****************************************************************************
+ * pcapng_write_shb()
+ *****************************************************************************/
+static bool pcapng_write_shb(struct xpcapng_dumper *pd, const char *comment,
+ const char *hardware, const char *os,
+ const char *user_application)
+{
+ int rc;
+ size_t shb_length;
+ struct pcapng_section_header_block *shb;
+ struct pcapng_option *opt;
+
+ if (pd == NULL) {
+ errno = EINVAL;
+ return false;
+ }
+
+ /* First calculate the total length of the SHB. */
+ shb_length = sizeof(*shb);
+
+ if (comment)
+ shb_length += pcapng_get_option_length(strlen(comment));
+
+ if (hardware)
+ shb_length += pcapng_get_option_length(strlen(hardware));
+
+ if (os)
+ shb_length += pcapng_get_option_length(strlen(os));
+
+ if (user_application)
+ shb_length += pcapng_get_option_length(
+ strlen(user_application));
+
+ shb_length += pcapng_get_option_length(0);
+ shb_length += sizeof(uint32_t);
+
+ /* Allocate the SHB and fill it. */
+ shb = calloc(shb_length, 1);
+ if (shb == NULL) {
+ errno = ENOMEM;
+ return false;
+ }
+
+ shb->shb_block_type = PCAPNG_SECTION_BLOCK;
+ shb->shb_block_length = shb_length;
+ shb->shb_byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC;
+ shb->shb_major_version = PCAPNG_MAJOR_VERSION;
+ shb->shb_minor_version = PCAPNG_MINOR_VERSION;
+ shb->shb_section_length = UINT64_MAX;
+
+ /* Add the options and block_length value */
+ opt = (struct pcapng_option *) &shb->shb_options;
+
+ if (comment)
+ opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
+ strlen(comment), comment);
+
+ if (hardware)
+ opt = pcapng_add_option(opt, PCAPNG_OPT_SHB_HARDWARE,
+ strlen(hardware), hardware);
+
+ if (os)
+ opt = pcapng_add_option(opt, PCAPNG_OPT_SHB_OS,
+ strlen(os), os);
+
+ if (user_application)
+ opt = pcapng_add_option(opt, PCAPNG_OPT_SHB_USERAPPL,
+ strlen(user_application),
+ user_application);
+ /* WARNING: If a new option is added, make sure the length calculation
+ * above is also updated!
+ */
+
+ opt = pcapng_add_option(opt, PCAPNG_OPT_END, 0, NULL);
+ memcpy(opt, &shb->shb_block_length, sizeof(shb->shb_block_length));
+
+ /* Write the SHB, and free its memory. */
+ rc = write(pd->pd_fd, shb, shb_length);
+ free(shb);
+
+ if ((size_t)rc != shb_length)
+ return false;
+
+ return true;
+}
+
+/*****************************************************************************
+ * pcapng_write_idb()
+ *****************************************************************************/
+static bool pcapng_write_idb(struct xpcapng_dumper *pd, const char *name,
+ uint16_t snap_len, const char *description,
+ const uint8_t *mac, uint64_t speed,
+ uint8_t ts_resolution, const char *hardware)
+{
+ int rc;
+ size_t idb_length;
+ struct pcapng_interface_description_block *idb;
+ struct pcapng_option *opt;
+
+ if (pd == NULL) {
+ errno = EINVAL;
+ return false;
+ }
+
+ /* First calculate the total length of the IDB. */
+ idb_length = sizeof(*idb);
+
+ if (name)
+ idb_length += pcapng_get_option_length(strlen(name));
+
+ if (description)
+ idb_length += pcapng_get_option_length(strlen(description));
+
+ if (mac)
+ idb_length += pcapng_get_option_length(6);
+
+ if (speed)
+ idb_length += pcapng_get_option_length(sizeof(uint64_t));
+
+ if (ts_resolution != 6 && ts_resolution != 0)
+ idb_length += pcapng_get_option_length(1);
+
+ if (hardware)
+ idb_length += pcapng_get_option_length(strlen(hardware));
+
+ idb_length += pcapng_get_option_length(0);
+ idb_length += sizeof(uint32_t);
+
+ /* Allocate the IDB and fill it. */
+ idb = calloc(idb_length, 1);
+ if (idb == NULL) {
+ errno = ENOMEM;
+ return false;
+ }
+
+ idb->idb_block_type = PCAPNG_INTERFACE_BLOCK;
+ idb->idb_block_length = idb_length;
+ idb->idb_link_type = 1; /* Ethernet */
+ idb->idb_snap_len = snap_len;
+
+ /* Add the options and block_length value */
+ opt = (struct pcapng_option *) &idb->idb_options;
+
+ if (name)
+ opt = pcapng_add_option(opt, PCAPNG_OPT_IDB_IF_NAME,
+ strlen(name), name);
+
+ if (description)
+ opt = pcapng_add_option(opt, PCAPNG_OPT_IDB_IF_DESCRIPTION,
+ strlen(description), description);
+
+ if (mac)
+ opt = pcapng_add_option(opt, PCAPNG_OPT_IDB_IF_MAC_ADDR, 6,
+ mac);
+
+ if (speed)
+ opt = pcapng_add_option(opt, PCAPNG_OPT_IDB_IF_SPEED,
+ sizeof(uint64_t), &speed);
+
+ if (ts_resolution != 6 && ts_resolution != 0)
+ opt = pcapng_add_option(opt, PCAPNG_OPT_IDB_IF_TSRESOL,
+ sizeof(uint8_t), &ts_resolution);
+
+ if (hardware)
+ opt = pcapng_add_option(opt, PCAPNG_OPT_IDB_IF_HARDWARE,
+ strlen(hardware), hardware);
+ /* WARNING: If a new option is added, make sure the length calculation
+ * above is also updated!
+ */
+
+ opt = pcapng_add_option(opt, PCAPNG_OPT_END, 0, NULL);
+ memcpy(opt, &idb->idb_block_length, sizeof(idb->idb_block_length));
+
+ /* Write the IDB, and free it's memory. */
+ rc = write(pd->pd_fd, idb, idb_length);
+ free(idb);
+
+ if ((size_t)rc != idb_length)
+ return false;
+
+ return true;
+}
+
+/*****************************************************************************
+ * pcapng_write_epb()
+ *****************************************************************************/
+static bool pcapng_write_epb(struct xpcapng_dumper *pd, uint32_t ifid,
+ const uint8_t *pkt, uint32_t len,
+ uint32_t caplen, uint64_t timestamp,
+ struct xpcapng_epb_options_s *epb_options)
+{
+ int i = 0;
+ int rc;
+ size_t pad_length;
+ size_t com_length = 0;
+ size_t epb_length;
+ struct pcapng_enhanced_packet_block epb;
+ struct pcapng_option *opt;
+ struct iovec iov[7];
+ static uint8_t pad[4] = {0, 0, 0, 0};
+ uint8_t options[8 + 12 + 12 + 8 + 16 + 4 + 4];
+ /* PCAPNG_OPT_EPB_FLAGS[8] +
+ * PCAPNG_OPT_EPB_DROPCOUNT[12] +
+ * PCAPNG_OPT_EPB_PACKETID[12] +
+ * PCAPNG_OPT_EPB_QUEUE[8] +
+ * PCAPNG_OPT_EPB_VERDICT[16] +
+ * PCAPNG_OPT_END[4] +
+ * epb_block_length
+ */
+ static struct xdp_verdict {
+ uint8_t type;
+ int64_t verdict;
+ }__attribute__((__packed__)) verdict = {
+ PCAPNG_EPB_VEDRICT_TYPE_EBPF_XDP, 0 };
+
+ if (pd == NULL) {
+ errno = EINVAL;
+ return false;
+ }
+
+ /* First calculate the total length of the EPB. */
+ pad_length = roundup(caplen, sizeof(uint32_t)) - caplen;
+
+ epb_length = sizeof(epb);
+ epb_length += caplen + pad_length;
+
+ if (epb_options->flags)
+ epb_length += pcapng_get_option_length(sizeof(uint32_t));
+
+ if (epb_options->dropcount)
+ epb_length += pcapng_get_option_length(sizeof(uint64_t));
+
+ if (epb_options->packetid)
+ epb_length += pcapng_get_option_length(sizeof(uint64_t));
+
+ if (epb_options->queue)
+ epb_length += pcapng_get_option_length(sizeof(uint32_t));
+
+ if (epb_options->xdp_verdict)
+ epb_length += pcapng_get_option_length(sizeof(verdict));
+
+ if (epb_options->comment) {
+ com_length = strlen(epb_options->comment);
+ epb_length += pcapng_get_option_length(com_length);
+ }
+
+ epb_length += pcapng_get_option_length(0);
+ epb_length += sizeof(uint32_t);
+
+ /* Fill in the EPB. */
+ epb.epb_block_type = PCAPNG_ENHANCED_PACKET_BLOCK;
+ epb.epb_block_length = epb_length;
+ epb.epb_interface_id = ifid;
+ epb.epb_timestamp_hi = timestamp >> 32;
+ epb.epb_timestamp_low = (uint32_t) timestamp;
+ epb.epb_captured_length = caplen;
+ epb.epb_original_length = len;
+
+ /* Add the flag/end option and block_length value */
+ opt = (struct pcapng_option *) options;
+
+ if (epb_options->flags)
+ opt = pcapng_add_option(opt, PCAPNG_OPT_EPB_FLAGS,
+ sizeof(uint32_t), &epb_options->flags);
+
+ if (epb_options->dropcount)
+ opt = pcapng_add_option(opt, PCAPNG_OPT_EPB_DROPCOUNT,
+ sizeof(uint64_t),
+ &epb_options->dropcount);
+
+ if (epb_options->packetid)
+ opt = pcapng_add_option(opt, PCAPNG_OPT_EPB_PACKETID,
+ sizeof(uint64_t),
+ epb_options->packetid);
+
+ if (epb_options->queue)
+ opt = pcapng_add_option(opt, PCAPNG_OPT_EPB_QUEUE,
+ sizeof(uint32_t), epb_options->queue);
+
+ if (epb_options->xdp_verdict) {
+ verdict.verdict = *epb_options->xdp_verdict;
+ opt = pcapng_add_option(opt, PCAPNG_OPT_EPB_VERDICT,
+ sizeof(verdict), &verdict);
+ }
+ /* WARNING: If a new option is added, make sure the length calculation
+ * and the options[] variable above are also updated!
+ */
+
+ opt = pcapng_add_option(opt, PCAPNG_OPT_END, 0, NULL);
+ memcpy(opt, &epb.epb_block_length, sizeof(epb.epb_block_length));
+
+ /* Write the EPB in parts, including the options, this looks not as
+ * straightforward as pcapng_write_idb() but here we would like to
+ * avoid as many memcopy's as possible.
+ */
+
+ /* Add base EPB structure. */
+ iov[i].iov_base = &epb;
+ iov[i++].iov_len = sizeof(epb);
+
+ /* Add Packet Data. */
+ iov[i].iov_base = (void *)pkt;
+ iov[i++].iov_len = caplen;
+
+ /* Add Packet Data padding if needed. */
+ if (pad_length > 0) {
+ iov[i].iov_base = pad;
+ iov[i++].iov_len = pad_length;
+ }
+
+ /* Add comment if supplied */
+ if (epb_options->comment) {
+ uint16_t opt[2] = {PCAPNG_OPT_COMMENT, com_length};
+ size_t opt_pad = roundup(com_length,
+ sizeof(uint32_t)) - com_length;
+ /* Add option header. */
+ iov[i].iov_base = opt;
+ iov[i++].iov_len = sizeof(opt);
+
+ /* Add actual comment string. */
+ iov[i].iov_base = (void *)epb_options->comment;
+ iov[i++].iov_len = com_length;
+
+ /* Add padding to uint32_t if needed. */
+ if (opt_pad) {
+ iov[i].iov_base = pad;
+ iov[i++].iov_len = opt_pad;
+ }
+ }
+
+ /* Write other options and final EPB size. */
+ iov[i].iov_base = options;
+ iov[i++].iov_len = 8 + (epb_options->flags ? 8 : 0) +
+ (epb_options->dropcount ? 12 : 0) +
+ (epb_options->packetid ? 12 : 0) +
+ (epb_options->queue ? 8 : 0) +
+ (epb_options->xdp_verdict ? 16 : 0);
+ rc = writev(pd->pd_fd, iov, i);
+ if ((size_t)rc != epb_length)
+ return false;
+
+ return true;
+}
+
+/*****************************************************************************
+ * xpcapng_dump_open()
+ *****************************************************************************/
+struct xpcapng_dumper *xpcapng_dump_open(const char *file,
+ const char *comment,
+ const char *hardware,
+ const char *os,
+ const char *user_application)
+{
+ struct xpcapng_dumper *pd = NULL;
+
+ if (file == NULL) {
+ errno = EINVAL;
+ goto error_exit;
+ }
+
+ pd = calloc(sizeof(*pd), 1);
+ if (pd == NULL) {
+ errno = ENOMEM;
+ goto error_exit;
+ }
+ pd->pd_fd = -1;
+
+ if (strcmp(file, "-") == 0) {
+ pd->pd_fd = STDOUT_FILENO;
+ } else {
+ pd->pd_fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+ if (pd->pd_fd < 0)
+ goto error_exit;
+ }
+
+ if (!pcapng_write_shb(pd, comment, hardware, os, user_application))
+ goto error_exit;
+
+ return pd;
+
+error_exit:
+ if (pd) {
+ if (pd->pd_fd >= 0 && pd->pd_fd != STDOUT_FILENO)
+ close(pd->pd_fd);
+
+ free(pd);
+ }
+ return NULL;
+}
+
+/*****************************************************************************
+ * xpcapng_dump_close()
+ *****************************************************************************/
+void xpcapng_dump_close(struct xpcapng_dumper *pd)
+{
+ if (pd == NULL)
+ return;
+
+ if (pd->pd_fd < 0 && pd->pd_fd != STDOUT_FILENO)
+ close(pd->pd_fd);
+
+ free(pd);
+}
+
+/*****************************************************************************
+ * xpcapng_dump_flush()
+ *****************************************************************************/
+int xpcapng_dump_flush(struct xpcapng_dumper *pd)
+{
+ if (pd != NULL)
+ return fsync(pd->pd_fd);
+
+ errno = EINVAL;
+ return -1;
+}
+
+/*****************************************************************************
+ * pcapng_dump_add_interface()
+ *****************************************************************************/
+int xpcapng_dump_add_interface(struct xpcapng_dumper *pd, uint16_t snap_len,
+ const char *name, const char *description,
+ const uint8_t *mac, uint64_t speed,
+ uint8_t ts_resolution, const char *hardware)
+{
+ if (!pcapng_write_idb(pd, name, snap_len, description, mac, speed,
+ ts_resolution, hardware))
+ return -1;
+
+ return pd->pd_interfaces++;
+}
+
+/*****************************************************************************
+ * xpcapng_dump_enhanced_pkt()
+ *****************************************************************************/
+bool xpcapng_dump_enhanced_pkt(struct xpcapng_dumper *pd, uint32_t ifid,
+ const uint8_t *pkt, uint32_t len,
+ uint32_t caplen, uint64_t timestamp,
+ struct xpcapng_epb_options_s *options)
+{
+ struct xpcapng_epb_options_s default_options = {};
+
+ return pcapng_write_epb(pd, ifid, pkt, len, caplen, timestamp,
+ options ?: &default_options);
+}
diff --git a/lib/util/xpcapng.h b/lib/util/xpcapng.h
new file mode 100644
index 0000000..1590a4c
--- /dev/null
+++ b/lib/util/xpcapng.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*****************************************************************************
+ * Multiple include protection
+ *****************************************************************************/
+#ifndef __XPCAPNG_H_
+#define __XPCAPNG_H_
+
+/*****************************************************************************
+ * Handle
+ *****************************************************************************/
+struct xpcapng_dumper;
+
+/*****************************************************************************
+ * Flag variables
+ *****************************************************************************/
+enum xpcapng_epb_flags {
+ PCAPNG_EPB_FLAG_INBOUND = 0x1,
+ PCAPNG_EPB_FLAG_OUTBOUND = 0x2
+};
+
+/*****************************************************************************
+ * EPB options structure
+ *****************************************************************************/
+struct xpcapng_epb_options_s {
+ enum xpcapng_epb_flags flags;
+ uint64_t dropcount;
+ uint64_t *packetid;
+ uint32_t *queue;
+ int64_t *xdp_verdict;
+ const char *comment;
+};
+
+/*****************************************************************************
+ * APIs
+ *****************************************************************************/
+extern struct xpcapng_dumper *xpcapng_dump_open(const char *file,
+ const char *comment,
+ const char *hardware,
+ const char *os,
+ const char *user_application);
+extern void xpcapng_dump_close(struct xpcapng_dumper *pd);
+extern int xpcapng_dump_flush(struct xpcapng_dumper *pd);
+extern int xpcapng_dump_add_interface(struct xpcapng_dumper *pd,
+ uint16_t snap_len,
+ const char *name, const char *description,
+ const uint8_t *mac, uint64_t speed,
+ uint8_t ts_resolution,
+ const char *hardware);
+extern bool xpcapng_dump_enhanced_pkt(struct xpcapng_dumper *pd, uint32_t ifid,
+ const uint8_t *pkt, uint32_t len,
+ uint32_t caplen, uint64_t timestamp,
+ struct xpcapng_epb_options_s *options);
+
+/*****************************************************************************
+ * End-of include file
+ *****************************************************************************/
+#endif /* __XPCAPNG_H_ */