diff options
Diffstat (limited to 'lib/util')
-rw-r--r-- | lib/util/Makefile | 34 | ||||
-rw-r--r-- | lib/util/compat.h | 12 | ||||
-rw-r--r-- | lib/util/logging.c | 96 | ||||
-rw-r--r-- | lib/util/logging.h | 35 | ||||
-rw-r--r-- | lib/util/params.c | 808 | ||||
-rw-r--r-- | lib/util/params.h | 149 | ||||
-rw-r--r-- | lib/util/stats.c | 292 | ||||
-rw-r--r-- | lib/util/stats.h | 26 | ||||
-rw-r--r-- | lib/util/util.c | 946 | ||||
-rw-r--r-- | lib/util/util.h | 97 | ||||
-rw-r--r-- | lib/util/util.mk | 2 | ||||
-rw-r--r-- | lib/util/xdp_sample.bpf.c | 18 | ||||
-rw-r--r-- | lib/util/xdp_sample.c | 1643 | ||||
-rw-r--r-- | lib/util/xdp_sample.h | 133 | ||||
-rw-r--r-- | lib/util/xpcapng.c | 635 | ||||
-rw-r--r-- | lib/util/xpcapng.h | 58 |
16 files changed, 4984 insertions, 0 deletions
diff --git a/lib/util/Makefile b/lib/util/Makefile new file mode 100644 index 0000000..24070f0 --- /dev/null +++ b/lib/util/Makefile @@ -0,0 +1,34 @@ +include util.mk + +LIB_DIR ?= .. + +include $(LIB_DIR)/defines.mk +include $(LIBXDP_DIR)/libxdp.mk + +all: $(UTIL_OBJS) + +UTIL_SKEL_H = $(UTIL_BPF_OBJS:.bpf.o=.skel.h) + +$(UTIL_OBJS): %.o: %.c %.h $(UTIL_SKEL_H) $(LIBMK) + $(QUIET_CC)$(CC) $(CFLAGS) $(CPPFLAGS) -Wall -I../../headers -c -o $@ $< + +clean: + $(Q)rm -f $(UTIL_OBJS) $(UTIL_BPF_OBJS) $(UTIL_SKEL_H) + +BPF_CFLAGS += -I$(HEADER_DIR) $(ARCH_INCLUDES) + +$(UTIL_BPF_OBJS): %.o: %.c $(KERN_USER_H) $(BPF_HEADERS) $(LIBMK) + $(QUIET_CLANG)$(CLANG) -S \ + -target $(BPF_TARGET) \ + -D __BPF_TRACING__ \ + $(BPF_CFLAGS) \ + -Wall \ + -Wno-unused-value \ + -Wno-pointer-sign \ + -Wno-compare-distinct-pointer-types \ + -Werror \ + -O2 -emit-llvm -c -g -o ${@:.o=.ll} $< + $(QUIET_LLC)$(LLC) -march=$(BPF_TARGET) -filetype=obj -o $@ ${@:.o=.ll} + +$(UTIL_SKEL_H): %.skel.h: %.bpf.o + $(QUIET_GEN)$(BPFTOOL) gen skeleton $< name ${@:.skel.h=} > $@ diff --git a/lib/util/compat.h b/lib/util/compat.h new file mode 100644 index 0000000..f058e9c --- /dev/null +++ b/lib/util/compat.h @@ -0,0 +1,12 @@ +#ifndef __COMPAT_H +#define __COMPAT_H + +#ifndef HAVE_LIBBPF_BTF__TYPE_CNT +static __u32 btf__type_cnt(const struct btf *btf) +{ + /* old function didn't include 'void' type in count */ + return btf__get_nr_types(btf) + 1; +} +#endif + +#endif diff --git a/lib/util/logging.c b/lib/util/logging.c new file mode 100644 index 0000000..7ad21d7 --- /dev/null +++ b/lib/util/logging.c @@ -0,0 +1,96 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <stdio.h> +#include <stdarg.h> + +#include <bpf/libbpf.h> +#include <xdp/libxdp.h> + +#include "logging.h" +#include "util.h" + +static enum logging_print_level log_level = LOG_INFO; + +static int print_func(enum logging_print_level level, int indent, + const char *format, va_list args) +{ + int i; + if (level > log_level) + return 0; + + for (i = 0; i < indent; i++) + fprintf(stderr, " "); + + return vfprintf(stderr, format, args); +} + +static int libbpf_print_func(enum libbpf_print_level level, const char *format, + va_list args) +{ + return print_func(level + 1, 2, format, args); +} + +static int libbpf_silent_func(__unused enum libbpf_print_level level, + __unused const char *format, + __unused va_list args) +{ + return 0; +} + +static int libxdp_print_func(enum libxdp_print_level level, const char *format, + va_list args) +{ + return print_func(level + 1, 1, format, args); +} + +static int libxdp_silent_func(__unused enum libxdp_print_level level, + __unused const char *format, + __unused va_list args) +{ + return 0; +} + +#define __printf(a, b) __attribute__((format(printf, a, b))) + +__printf(2, 3) void logging_print(enum logging_print_level level, + const char *format, ...) +{ + va_list args; + + va_start(args, format); + print_func(level, 0, format, args); + va_end(args); +} + +void init_lib_logging(void) +{ + libbpf_set_print(libbpf_print_func); + libxdp_set_print(libxdp_print_func); +} + +void silence_libbpf_logging(void) +{ + if (log_level < LOG_VERBOSE) + libbpf_set_print(libbpf_silent_func); +} + +void silence_libxdp_logging(void) +{ + if (log_level < LOG_VERBOSE) + libxdp_set_print(libxdp_silent_func); +} + +enum logging_print_level set_log_level(enum logging_print_level level) +{ + enum logging_print_level old_level = log_level; + + log_level = level; + return old_level; +} + +enum logging_print_level increase_log_level(void) +{ + if (log_level < LOG_VERBOSE) + log_level++; + return log_level; +} diff --git a/lib/util/logging.h b/lib/util/logging.h new file mode 100644 index 0000000..16c4e74 --- /dev/null +++ b/lib/util/logging.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LOGGING_H +#define __LOGGING_H + +/* This matches the libbpf logging levels, but with an additional VERBOSE level; + * we demote all libbpf messages by one level so debug messages only show up on + * VERBOSE. + */ +enum logging_print_level { + LOG_WARN, + LOG_INFO, + LOG_DEBUG, + LOG_VERBOSE, +}; + +extern void logging_print(enum logging_print_level level, const char *format, + ...) __attribute__((format(printf, 2, 3))); + +#define __pr(level, fmt, ...) \ + do { \ + logging_print(level, fmt, ##__VA_ARGS__); \ + } while (0) + +#define pr_warn(fmt, ...) __pr(LOG_WARN, fmt, ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr(LOG_INFO, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) __pr(LOG_DEBUG, fmt, ##__VA_ARGS__) + +void init_lib_logging(void); +void silence_libbpf_logging(void); +void silence_libxdp_logging(void); +enum logging_print_level set_log_level(enum logging_print_level level); +enum logging_print_level increase_log_level(); + +#endif diff --git a/lib/util/params.c b/lib/util/params.c new file mode 100644 index 0000000..838a520 --- /dev/null +++ b/lib/util/params.c @@ -0,0 +1,808 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#define _GNU_SOURCE + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdbool.h> +#include <getopt.h> +#include <errno.h> + +#include <net/if.h> +#include <linux/if_ether.h> +#include <linux/if_link.h> /* XDP_FLAGS_* depend on kernel-headers installed */ +#include <linux/if_xdp.h> +#include <arpa/inet.h> + +#include "params.h" +#include "logging.h" +#include "util.h" + +#define BUFSIZE 30 +#define FIRST_PRINTABLE 65 /* ord('A') = 65 */ +#define VERSION_SHORT_OPT 0 + +static bool opt_needs_arg(const struct prog_option *opt) +{ + return opt->type > OPT_BOOL && !opt->positional; +} + +static bool opt_is_multi(const struct prog_option *opt) +{ + return opt->type == OPT_MULTISTRING || opt->type == OPT_IFNAME_MULTI || + opt->type == OPT_U32_MULTI; +} + +static int handle_bool(__unused char *optarg, void *tgt, __unused struct prog_option *opt) +{ + bool *opt_set = tgt; + + *opt_set = true; + return 0; +} + +static int handle_string(char *optarg, void *tgt, __unused struct prog_option *opt) +{ + char **opt_set = tgt; + + *opt_set = optarg; + return 0; +} + +static int handle_multistring(char *optarg, void *tgt, __unused struct prog_option *opt) +{ + struct multistring *opt_set = tgt; + void *ptr; + + if (opt_set->num_strings +1 > SIZE_MAX / sizeof(*opt_set->strings)) + return -ENOMEM; + + ptr = realloc(opt_set->strings, sizeof(*opt_set->strings) * (opt_set->num_strings +1)); + + if (!ptr) + return -errno; + + opt_set->strings = ptr; + opt_set->strings[opt_set->num_strings++] = optarg; + return 0; +} + +static int handle_u32(char *optarg, void *tgt, __unused struct prog_option *opt) +{ + __u32 *opt_set = tgt; + unsigned long val; + + errno = 0; + val = strtoul(optarg, NULL, 10); + if (errno || val > 0xffffffff) + return -EINVAL; + + *opt_set = val; + return 0; +} + +static int handle_u32_multi(char *optarg, void *tgt, struct prog_option *opt) +{ + struct u32_multi *opt_set = tgt; + __u32 val; + void *ptr; + int ret; + + if (opt_set->num_vals +1 > SIZE_MAX / sizeof(*opt_set->vals)) + return -ENOMEM; + + ret = handle_u32(optarg, &val, opt); + if (ret) + return ret; + + ptr = realloc(opt_set->vals, sizeof(*opt_set->vals) * (opt_set->num_vals +1)); + if (!ptr) + return -errno; + + opt_set->vals = ptr; + opt_set->vals[opt_set->num_vals++] = val; + return 0; +} + +static int handle_u16(char *optarg, void *tgt, __unused struct prog_option *opt) +{ + __u16 *opt_set = tgt; + unsigned long val; + + errno = 0; + val = strtoul(optarg, NULL, 10); + if (errno || val > 0xffff) + return -EINVAL; + *opt_set = val; + return 0; +} + +static int parse_mac(char *str, unsigned char mac[ETH_ALEN]) +{ + unsigned int v[ETH_ALEN]; + int len, i; + + /* Based on https://stackoverflow.com/a/20553913 */ + len = sscanf(str, "%x:%x:%x:%x:%x:%x%*c", + &v[0], &v[1], &v[2], &v[3], &v[4], &v[5]); + + if (len != ETH_ALEN) + return -EINVAL; + + for (i = 0; i < ETH_ALEN; i++) { + if (v[i] > 0xFF) + return -EINVAL; + mac[i] = v[i]; + } + return 0; +} + +static int handle_macaddr(char *optarg, void *tgt, __unused struct prog_option *opt) +{ + struct mac_addr *opt_set = tgt; + int err; + + err = parse_mac(optarg, opt_set->addr); + if (err) + pr_warn("Invalid MAC address: %s\n", optarg); + + return err; +} + +void print_macaddr(char *buf, size_t buf_len, const struct mac_addr *addr) +{ + int i, len; + + for (i = 0; buf_len > 0 && i < ETH_ALEN; i++) { + len = snprintf(buf, buf_len, "%02x", addr->addr[i]); + if (len < 0 || (size_t)len >= buf_len) + break; + + buf += len; + buf_len -= len; + + if (i < ETH_ALEN - 1) { + *buf++ = ':'; + buf_len -= 1; + } + } + + *buf = '\0'; +} + +bool macaddr_is_null(const struct mac_addr *addr) { + static struct mac_addr nulladdr = {}; + + return memcmp(addr, &nulladdr, sizeof(nulladdr)) == 0; +} + +static const struct flag_val *find_flag(const struct flag_val *flag_vals, + const char *chr) +{ + while (flag_vals->flagstring) { + if (strcmp(chr, flag_vals->flagstring) == 0) + return flag_vals; + flag_vals++; + } + return NULL; +} + +static int handle_flags(char *optarg, void *tgt, struct prog_option *opt) +{ + const struct flag_val *flag, *flag_vals = opt->typearg; + unsigned int *opt_set = tgt; + unsigned int flagval = 0; + char *c = NULL; + + while (*optarg) { + c = strchr(optarg, ','); + if (c) + *c = '\0'; + flag = find_flag(flag_vals, optarg); + if (!flag) + return -EINVAL; + flagval |= flag->flagval; + + if (!c) + break; + optarg = c + 1; + } + *opt_set = flagval; + return 0; +} + +static int get_ifindex(const char *ifname) +{ + int ifindex; + + ifindex = if_nametoindex(ifname); + if (!ifindex) { + pr_warn("Couldn't find network interface '%s'.\n", ifname); + return -ENOENT; + } + return ifindex; +} + +static int handle_ifname(char *optarg, void *tgt, __unused struct prog_option *opt) +{ + struct iface *iface = tgt; + int ifindex; + + ifindex = get_ifindex(optarg); + if (ifindex < 0) + return ifindex; + + iface->ifname = optarg; + iface->ifindex = ifindex; + return 0; +} + +static int handle_ifname_multi(char *optarg, void *tgt, __unused struct prog_option *opt) +{ + struct iface **ifaces = tgt; + struct iface *iface, *tmp; + int ifindex; + + ifindex = get_ifindex(optarg); + if (ifindex < 0) + return ifindex; + + iface = calloc(sizeof(*iface), 1); + if (!iface) + return -ENOMEM; + + iface->ifname = optarg; + iface->ifindex = ifindex; + + if (!*ifaces) { + *ifaces = iface; + return 0; + } + + tmp = *ifaces; + while(tmp->next) + tmp = tmp->next; + + tmp->next = iface; + return 0; +} + +void print_addr(char *buf, size_t buf_len, const struct ip_addr *addr) +{ + inet_ntop(addr->af, &addr->addr, buf, buf_len); +} + +bool ipaddr_is_null(const struct ip_addr *addr) { + static struct ip_addr nulladdr = {}; + + return memcmp(addr, &nulladdr, sizeof(nulladdr)) == 0; +} + +static int handle_ipaddr(char *optarg, void *tgt, __unused struct prog_option *opt) +{ + struct ip_addr *addr = tgt; + int af; + + af = strchr(optarg, ':') ? AF_INET6 : AF_INET; + + if (inet_pton(af, optarg, &addr->addr) != 1) { + pr_warn("Invalid IP address: %s\n", optarg); + return -ENOENT; /* caller won't print error on ENOENT */ + } + + addr->af = af; + return 0; +} + +static const struct enum_val *find_enum(const struct enum_val *enum_vals, + const char *chr) +{ + while (enum_vals->name) { + if (strcmp(chr, enum_vals->name) == 0) + return enum_vals; + enum_vals++; + } + return NULL; +} + +static int handle_enum(char *optarg, void *tgt, struct prog_option *opt) +{ + const struct enum_val *val, *all_vals = opt->typearg; + unsigned int *opt_set = tgt; + + val = find_enum(all_vals, optarg); + if (!val) + return -EINVAL; + *opt_set = val->value; + return 0; +} + +static void print_enum_vals(char *buf, size_t buf_len, + const struct enum_val *vals) +{ + const struct enum_val *val; + bool first = true; + + for (val = vals; buf_len && val->name; val++) { + int len; + + if (!first) { + *buf++ = ','; + buf_len--; + } + first = false; + + len = snprintf(buf, buf_len, "%s", val->name); + if (len < 0 || (size_t)len >= buf_len) + break; + buf += len; + buf_len -= len; + } + *buf = '\0'; +} + +const char *get_enum_name(const struct enum_val *vals, unsigned int value) +{ + const struct enum_val *val; + + for (val = vals; val->name; val++) + if (val->value == value) + return val->name; + return NULL; +} + +static const struct opthandler { + int (*func)(char *optarg, void *tgt, struct prog_option *opt); +} handlers[__OPT_MAX] = { + {NULL}, + {handle_bool}, + {handle_flags}, + {handle_string}, + {handle_u16}, + {handle_u32}, + {handle_u32_multi}, + {handle_macaddr}, + {handle_ifname}, + {handle_ifname_multi}, + {handle_ipaddr}, + {handle_enum}, + {handle_multistring} +}; + +void print_flags(char *buf, size_t buf_len, const struct flag_val *flags, + unsigned long flags_set) +{ + const struct flag_val *flag; + bool first = true; + + for (flag = flags; buf_len && flag->flagstring; flag++) { + int len; + + if (!(flag->flagval & flags_set)) + continue; + + if (!first) { + *buf++ = ','; + buf_len--; + } + first = false; + len = snprintf(buf, buf_len, "%s", flag->flagstring); + if (len < 0 || (size_t)len >= buf_len) + break; + buf += len; + buf_len -= len; + } + *buf = '\0'; +} + +static void print_help_flags(const struct prog_option *opt) +{ + char buf[100] = {}; + + if (!opt->typearg) + pr_warn("Missing typearg for opt %s\n", opt->name); + else + print_flags(buf, sizeof(buf), opt->typearg, -1); + + printf(" %s (valid values: %s)", opt->help, buf); +} + +static void print_help_enum(const struct prog_option *opt) +{ + char buf[100] = {}; + + if (!opt->typearg) + pr_warn("Missing typearg for opt %s\n", opt->name); + else + print_enum_vals(buf, sizeof(buf), opt->typearg); + + printf(" %s (valid values: %s)", opt->help, buf); +} + +static const struct helprinter { + void (*func)(const struct prog_option *opt); +} help_printers[__OPT_MAX] = { + {NULL}, + {NULL}, + {print_help_flags}, + {NULL}, + {NULL}, + {NULL}, + {NULL}, + {NULL}, + {NULL}, + {NULL}, + {NULL}, + {print_help_enum}, + {NULL} +}; + + +static void _print_positional(const struct prog_option *long_options) +{ + const struct prog_option *opt; + + FOR_EACH_OPTION (long_options, opt) { + if (!opt->positional) + continue; + + printf(" %s", opt->metavar ?: opt->name); + } +} + +static void _print_options(const struct prog_option *poptions, bool required) +{ + const struct prog_option *opt; + + FOR_EACH_OPTION (poptions, opt) { + if (opt->required != required) + continue; + + if (opt->positional) { + printf(" %-30s", opt->metavar ?: opt->name); + } else { + char buf[BUFSIZE]; + int pos; + + if (opt->short_opt >= FIRST_PRINTABLE) + printf(" -%c,", opt->short_opt); + else + printf(" "); + pos = snprintf(buf, BUFSIZE, " --%s", opt->name); + if (pos < 0 || pos >= BUFSIZE) { + pr_warn("opt name too long: %s\n", opt->name); + continue; + } + if (opt->metavar) + snprintf(&buf[pos], BUFSIZE - pos, " %s", + opt->metavar); + printf("%-28s", buf); + } + + if (help_printers[opt->type].func != NULL) + help_printers[opt->type].func(opt); + else if (opt->help) + printf(" %s", opt->help); + printf("\n"); + } +} + +bool is_prefix(const char *pfx, const char *str) +{ + if (!pfx) + return false; + if (strlen(str) < strlen(pfx)) + return false; + + return !memcmp(str, pfx, strlen(pfx)); +} + +void usage(const char *prog_name, const char *doc, + const struct prog_option *poptions, bool full) +{ + const struct prog_option *opt; + int num_req = 0; + + printf("\nUsage: %s [options]", prog_name); + _print_positional(poptions); + printf("\n"); + + if (!full) { + printf("Use --help (or -h) to see full option list.\n"); + return; + } + + FOR_EACH_OPTION (poptions, opt) + if (opt->required) + num_req++; + + printf("\n %s\n\n", doc); + if (num_req) { + printf("Required parameters:\n"); + _print_options(poptions, true); + printf("\n"); + } + printf("Options:\n"); + _print_options(poptions, false); + printf(" -v, --verbose Enable verbose logging (-vv: more verbose)\n"); + printf(" --version Display version information\n"); + printf(" -h, --help Show this help\n"); + printf("\n"); +} + +static int prog_options_to_options(struct prog_option *poptions, + struct option **options, char **optstring) +{ + int num = 0, num_cmn = 0, n_sopt = VERSION_SHORT_OPT + 1; + struct option *new_options, *nopt; + struct prog_option *opt; + char buf[100], *c = buf; + + struct option common_opts[] = { + {"help", no_argument, NULL, 'h'}, + {"verbose", no_argument, NULL, 'v'}, + {"version", no_argument, NULL, VERSION_SHORT_OPT}, + {} + }; + + for (nopt = common_opts; nopt->name; nopt++) { + num++; + num_cmn++; + if (nopt->val != VERSION_SHORT_OPT) + *c++ = nopt->val; + } + + FOR_EACH_OPTION (poptions, opt) + if (!opt->positional) + num++; + + new_options = calloc(num + 1, sizeof(struct option)); + if (!new_options) + return -ENOMEM; + + memcpy(new_options, &common_opts, sizeof(struct option) * num_cmn); + nopt = new_options + num_cmn; + + FOR_EACH_OPTION (poptions, opt) { + if (opt->positional) + continue; + if (opt->short_opt) { + *(c++) = opt->short_opt; + if (opt_needs_arg(opt)) + *(c++) = ':'; + } else { + /* getopt expects options to have unique values in the + * 'val' field, however we want to be able to define + * options that don't have a short opt. So get around + * that, just number such options sequentially. + */ + if (n_sopt >= FIRST_PRINTABLE) { + pr_warn("Too many options with no short opt\n"); + goto err; + } + opt->short_opt = n_sopt++; + } + nopt->has_arg = opt_needs_arg(opt) ? required_argument : no_argument; + nopt->name = opt->name; + nopt->val = opt->short_opt; + nopt->flag = NULL; + nopt++; + } + *(c++) = '\0'; + + *optstring = strdup(buf); + if (!*optstring) + goto err; + + /* Make sure we clear the last option, or else we crash. */ + memset(new_options + num, 0, sizeof(struct option)); + + *options = new_options; + return 0; + +err: + free(new_options); + return -EINVAL; +} + +static struct prog_option *find_opt(struct prog_option *all_opts, int optchar) +{ + struct prog_option *opt; + + FOR_EACH_OPTION (all_opts, opt) + if (opt->short_opt == optchar) + return opt; + return NULL; +} + +static int _set_opt(void *cfg, struct prog_option *opt, char *optarg) +{ + int ret; + + if (opt->max_num && opt->num_set + 1 > opt->max_num) { + pr_warn("Too many parameters for %s (max %u)\n", + opt->metavar ?: opt->name, opt->max_num); + return -E2BIG; + } + + ret = handlers[opt->type].func(optarg, (cfg + opt->cfg_offset), opt); + if (!ret) + opt->num_set++; + else if (ret != -ENOENT) + pr_warn("Couldn't parse option %s: %s.\n", opt->name, strerror(-ret)); + return ret; +} + +static int set_opt(void *cfg, struct prog_option *all_opts, int optchar, + char *optarg) +{ + struct prog_option *opt; + + if (!cfg) + return -EFAULT; + + opt = find_opt(all_opts, optchar); + if (!opt) + return -ENOENT; + + return _set_opt(cfg, opt, optarg); +} + +static int set_pos_opt(void *cfg, struct prog_option *all_opts, char *optarg) +{ + struct prog_option *o, *opt = NULL; + + FOR_EACH_OPTION (all_opts, o) { + if (o->positional && (!o->num_set || opt_is_multi(o))) { + opt = o; + break; + } + } + + if (!opt) + return -ENOENT; + + return _set_opt(cfg, opt, optarg); +} + +int parse_cmdline_args(int argc, char **argv, struct prog_option *poptions, + void *cfg, const char *prog, const char *usage_cmd, + const char *doc, const void *defaults) +{ + struct prog_option *opt_iter; + struct option *long_options; + bool full_help = false; + int i, opt, err = 0; + int longindex = 0; + char *optstring; + + if (prog_options_to_options(poptions, &long_options, &optstring)) { + pr_warn("Unable to malloc()\n"); + return -ENOMEM; + } + + /* Parse commands line args */ + while ((opt = getopt_long(argc, argv, optstring, + long_options, &longindex)) != -1) { + switch (opt) { + case 'h': + usage(usage_cmd, doc, poptions, true); + err = EXIT_FAILURE; + goto out; + case 'v': + increase_log_level(); + break; + case VERSION_SHORT_OPT: + printf("%s version %s using libbpf version %s\n", + prog, + TOOLS_VERSION, + get_libbpf_version()); + err = EXIT_FAILURE; + goto out; + default: + if (set_opt(cfg, poptions, opt, optarg)) { + usage(prog, doc, poptions, full_help); + err = EXIT_FAILURE; + goto out; + } + break; + } + } + + for (i = optind; i < argc; i++) { + if (set_pos_opt(cfg, poptions, argv[i])) { + usage(usage_cmd, doc, poptions, full_help); + err = EXIT_FAILURE; + goto out; + } + } + + FOR_EACH_OPTION (poptions, opt_iter) { + if (opt_iter->num_set && (!opt_iter->min_num || + opt_iter->num_set >= opt_iter->min_num)) + continue; + + if (opt_iter->required) { + if (opt_iter->positional) + pr_warn("Missing required parameter %s\n", + opt_iter->metavar ?: opt_iter->name); + else + pr_warn("Missing required option '--%s'\n", + opt_iter->name); + usage(prog, doc, poptions, full_help); + err = EXIT_FAILURE; + goto out; + } else if (defaults) { + void *dst = cfg + opt_iter->cfg_offset; + const void *src = defaults + opt_iter->cfg_offset; + + memcpy(dst, src, opt_iter->opt_size); + } + } +out: + free(long_options); + free(optstring); + + return err; +} + +int dispatch_commands(const char *argv0, int argc, char **argv, + const struct prog_command *cmds, size_t cfg_size, + const char *prog_name, bool needs_bpffs) +{ + const struct prog_command *c, *cmd = NULL; + int ret = EXIT_FAILURE, err, len; + char pin_root_path[PATH_MAX]; + char usagebuf[100]; + void *cfg; + + for (c = cmds; c->name; c++) { + if (is_prefix(argv0, c->name)) { + cmd = c; + break; + } + } + + if (!cmd) { + pr_warn("Command '%s' is unknown, try '%s help'.\n", + argv0, prog_name); + return EXIT_FAILURE; + } + + if (cmd->no_cfg) + return cmd->func(NULL, NULL); + + cfg = calloc(1, cfg_size); + if (!cfg) { + pr_warn("Couldn't allocate memory\n"); + return EXIT_FAILURE; + } + + len = snprintf(usagebuf, sizeof(usagebuf), "%s %s", prog_name, cmd->name); + if (len < 0 || (size_t)len >= sizeof(usagebuf)) + goto out; + + err = parse_cmdline_args(argc, argv, cmd->options, cfg, prog_name, usagebuf, + cmd->doc, cmd->default_cfg); + if (err) + goto out; + + err = get_bpf_root_dir(pin_root_path, sizeof(pin_root_path), prog_name, + needs_bpffs); + if (err && needs_bpffs) + goto out; + + err = check_bpf_environ(); + if (err) + goto out; + + if (prog_lock_get(prog_name)) + goto out; + + ret = cmd->func(cfg, pin_root_path); + prog_lock_release(0); +out: + free(cfg); + return ret; +} diff --git a/lib/util/params.h b/lib/util/params.h new file mode 100644 index 0000000..fa77964 --- /dev/null +++ b/lib/util/params.h @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __PARAMS_H +#define __PARAMS_H + +#include <getopt.h> +#include <stdbool.h> +#include <stdlib.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/if_ether.h> +#include <bpf/libbpf.h> + +enum option_type { + OPT_NONE, + OPT_BOOL, + OPT_FLAGS, + OPT_STRING, + OPT_U16, + OPT_U32, + OPT_U32_MULTI, + OPT_MACADDR, + OPT_IFNAME, + OPT_IFNAME_MULTI, + OPT_IPADDR, + OPT_ENUM, + OPT_MULTISTRING, + __OPT_MAX +}; + +struct prog_option { + enum option_type type; + size_t cfg_size; + size_t cfg_offset; + size_t opt_size; + char *name; + char short_opt; + char *help; + char *metavar; + void *typearg; + bool required; + bool positional; + unsigned int min_num; + unsigned int max_num; + unsigned int num_set; +}; + +struct flag_val { + const char *flagstring; + unsigned int flagval; +}; + +struct enum_val { + const char *name; + unsigned int value; +}; + +struct multistring { + const char **strings; + size_t num_strings; +}; + +struct u32_multi { + __u32 *vals; + size_t num_vals; +}; + +struct iface { + struct iface *next; + char *ifname; + int ifindex; +}; + +struct ip_addr { + int af; + union { + struct in_addr addr4; + struct in6_addr addr6; + } addr; +}; + +struct mac_addr { + unsigned char addr[ETH_ALEN]; +}; + +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) + +#define DEFINE_OPTION(_name, _type, _cfgtype, _cfgmember, ...) \ + { \ + .cfg_size = sizeof(_cfgtype), \ + .opt_size = sizeof_field(_cfgtype, _cfgmember), \ + .cfg_offset = offsetof(_cfgtype, _cfgmember), .name = _name, \ + .type = _type, __VA_ARGS__ \ + } + +#define END_OPTIONS \ + { \ + } + +#define FOR_EACH_OPTION(_options, _opt) \ + for (_opt = _options; _opt->type != OPT_NONE; _opt++) + +struct prog_command { + const char *name; + int (*func)(const void *cfg, const char *pin_root_path); + struct prog_option *options; + const void *default_cfg; + char *doc; + bool no_cfg; +}; + +#define DEFINE_COMMAND_NAME(_name, _func, _doc) \ + { \ + .name = _name, .func = do_##_func, \ + .options = _func##_options, .default_cfg = &defaults_##_func, \ + .doc = _doc \ + } +#define DEFINE_COMMAND(_name, _doc) DEFINE_COMMAND_NAME(textify(_name), _name, _doc) + +#define DEFINE_COMMAND_NODEF(_name, _doc) \ + { \ + .name = textify(_name), .func = do_##_name, \ + .options = _name##_options, .doc = _doc \ + } + +#define END_COMMANDS \ + { \ + } + +const char *get_enum_name(const struct enum_val *vals, unsigned int value); +void print_flags(char *buf, size_t buf_len, const struct flag_val *flags, + unsigned long flags_val); +void print_addr(char *buf, size_t buf_len, const struct ip_addr *addr); +void print_macaddr(char *buf, size_t buf_len, const struct mac_addr *addr); +bool macaddr_is_null(const struct mac_addr *addr); +bool ipaddr_is_null(const struct ip_addr *addr); +bool is_prefix(const char *prefix, const char *string); +void usage(const char *prog_name, const char *doc, + const struct prog_option *long_options, bool full); + +int parse_cmdline_args(int argc, char **argv, struct prog_option *long_options, + void *cfg, const char *prog, const char *usage_cmd, + const char *doc, const void *defaults); + +int dispatch_commands(const char *argv0, int argc, char **argv, + const struct prog_command *cmds, size_t cfg_size, + const char *prog_name, bool needs_bpffs); + +#endif /* __COMMON_PARAMS_H */ diff --git a/lib/util/stats.c b/lib/util/stats.c new file mode 100644 index 0000000..f04c968 --- /dev/null +++ b/lib/util/stats.c @@ -0,0 +1,292 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <getopt.h> + +#include <locale.h> +#include <unistd.h> +#include <time.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "stats.h" +#include "util.h" +#include "logging.h" + +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ +static int gettime(__u64 *nstime) +{ + struct timespec t; + int res; + + res = clock_gettime(CLOCK_MONOTONIC, &t); + if (res < 0) { + pr_warn("Error with gettimeofday! (%i)\n", res); + return res; + } + + *nstime = (__u64)t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; + return 0; +} + +static double calc_period(struct record *r, struct record *p) +{ + double period_ = 0; + __u64 period = 0; + + period = r->timestamp - p->timestamp; + if (period > 0) + period_ = ((double)period / NANOSEC_PER_SEC); + + return period_; +} + +int stats_print_one(struct stats_record *stats_rec) +{ + __u64 packets, bytes; + struct record *rec; + int i, err; + + /* Print for each XDP actions stats */ + for (i = 0; i < XDP_ACTION_MAX; i++) { + char *fmt = " %-35s %'11lld pkts %'11lld KiB\n"; + const char *action = action2str(i); + + rec = &stats_rec->stats[i]; + packets = rec->total.rx_packets; + bytes = rec->total.rx_bytes; + + if (rec->enabled) { + err = printf(fmt, action, packets, bytes / 1024); + if (err < 0) + return err; + } + } + + return 0; +} + +int stats_print(struct stats_record *stats_rec, struct stats_record *stats_prev) +{ + struct record *rec, *prev; + __u64 packets, bytes; + struct timespec t; + bool first = true; + double period; + double pps; /* packets per sec */ + double bps; /* bits per sec */ + int i, err; + + err = clock_gettime(CLOCK_REALTIME, &t); + if (err < 0) { + pr_warn("Error with gettimeofday! (%i)\n", err); + return err; + } + + /* Print for each XDP actions stats */ + for (i = 0; i < XDP_ACTION_MAX; i++) { + char *fmt = "%-12s %'11lld pkts (%'10.0f pps)" + " %'11lld KiB (%'6.0f Mbits/s)\n"; + const char *action = action2str(i); + + rec = &stats_rec->stats[i]; + prev = &stats_prev->stats[i]; + + if (!rec->enabled) + continue; + + packets = rec->total.rx_packets - prev->total.rx_packets; + bytes = rec->total.rx_bytes - prev->total.rx_bytes; + + period = calc_period(rec, prev); + if (period == 0) + return 0; + + if (first) { + printf("Period of %fs ending at %ld.%06ld\n", period, + (long) t.tv_sec, (long) t.tv_nsec / 1000); + first = false; + } + + pps = packets / period; + + bps = (bytes * 8) / period / 1000000; + + printf(fmt, action, rec->total.rx_packets, pps, + rec->total.rx_bytes / 1024, bps, period); + } + printf("\n"); + + return 0; +} + +/* BPF_MAP_TYPE_ARRAY */ +static int map_get_value_array(int fd, __u32 key, struct xdp_stats_record *value) +{ + int err = 0; + + err = bpf_map_lookup_elem(fd, &key, value); + if (err) + pr_debug("bpf_map_lookup_elem failed key:0x%X\n", key); + + return err; +} + +/* BPF_MAP_TYPE_PERCPU_ARRAY */ +static int map_get_value_percpu_array(int fd, __u32 key, struct xdp_stats_record *value) +{ + /* For percpu maps, userspace gets a value per possible CPU */ + int nr_cpus = libbpf_num_possible_cpus(); + struct xdp_stats_record *values; + __u64 sum_bytes = 0; + __u64 sum_pkts = 0; + int i, err; + + if (nr_cpus < 0) + return nr_cpus; + + values = calloc(nr_cpus, sizeof(*values)); + if (!values) + return -ENOMEM; + + err = bpf_map_lookup_elem(fd, &key, values); + if (err) { + pr_debug("bpf_map_lookup_elem failed key:0x%X\n", key); + goto out; + } + + /* Sum values from each CPU */ + for (i = 0; i < nr_cpus; i++) { + sum_pkts += values[i].rx_packets; + sum_bytes += values[i].rx_bytes; + } + value->rx_packets = sum_pkts; + value->rx_bytes = sum_bytes; +out: + free(values); + return err; +} + +static int map_collect(int fd, __u32 map_type, __u32 key, struct record *rec) +{ + struct xdp_stats_record value = {}; + int err; + + /* Get time as close as possible to reading map contents */ + err = gettime(&rec->timestamp); + if (err) + return err; + + switch (map_type) { + case BPF_MAP_TYPE_ARRAY: + err = map_get_value_array(fd, key, &value); + break; + case BPF_MAP_TYPE_PERCPU_ARRAY: + err = map_get_value_percpu_array(fd, key, &value); + break; + default: + pr_warn("Unknown map_type: %u cannot handle\n", map_type); + err = -EINVAL; + break; + } + + if (err) + return err; + + rec->total.rx_packets = value.rx_packets; + rec->total.rx_bytes = value.rx_bytes; + return 0; +} + +int stats_collect(int map_fd, __u32 map_type, struct stats_record *stats_rec) +{ + /* Collect all XDP actions stats */ + __u32 key; + int err; + + for (key = 0; key < XDP_ACTION_MAX; key++) { + if (!stats_rec->stats[key].enabled) + continue; + + err = map_collect(map_fd, map_type, key, + &stats_rec->stats[key]); + if (err) + return err; + } + + return 0; +} + +static int check_map_pin(__u32 map_id, const char *pin_dir, const char *map_name) +{ + struct bpf_map_info info = {}; + int fd, ret = 0; + + fd = get_pinned_map_fd(pin_dir, map_name, &info); + if (fd < 0) { + if (fd == -ENOENT) + pr_warn("Stats map disappeared while polling\n"); + else + pr_warn("Unable to re-open stats map\n"); + return fd; + } + + if (info.id != map_id) { + pr_warn("Stats map ID changed while polling\n"); + ret = -EINVAL; + } + close(fd); + + return ret; +} + +int stats_poll(int map_fd, int interval, bool *exit, + const char *pin_dir, const char *map_name) +{ + struct bpf_map_info info = {}; + struct stats_record prev, record = { 0 }; + __u32 info_len = sizeof(info); + __u32 map_type, map_id; + int err; + + record.stats[XDP_DROP].enabled = true; + record.stats[XDP_PASS].enabled = true; + record.stats[XDP_REDIRECT].enabled = true; + record.stats[XDP_TX].enabled = true; + + if (!interval) + return -EINVAL; + + err = bpf_obj_get_info_by_fd(map_fd, &info, &info_len); + if (err) + return -errno; + map_type = info.type; + map_id = info.id; + + /* Get initial reading quickly */ + stats_collect(map_fd, map_type, &record); + + usleep(1000000 / 4); + + while (!*exit) { + if (pin_dir) { + err = check_map_pin(map_id, pin_dir, map_name); + if (err) + return err; + } + + memset(&info, 0, sizeof(info)); + prev = record; /* struct copy */ + stats_collect(map_fd, map_type, &record); + err = stats_print(&record, &prev); + if (err) + return err; + usleep(interval * 1000); + } + + return 0; +} diff --git a/lib/util/stats.h b/lib/util/stats.h new file mode 100644 index 0000000..9ee0cad --- /dev/null +++ b/lib/util/stats.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __STATS_H +#define __STATS_H + +#include <bpf/libbpf.h> + +#include "xdp/xdp_stats_kern_user.h" + +struct record { + __u64 timestamp; + bool enabled; + struct xdp_stats_record total; /* defined in common_kern_user.h */ +}; + +struct stats_record { + struct record stats[XDP_ACTION_MAX]; +}; + +int stats_print_one(struct stats_record *stats_rec); +int stats_print(struct stats_record *stats_rec, + struct stats_record *stats_prev); +int stats_collect(int map_fd, __u32 map_type, struct stats_record *stats_rec); +int stats_poll(int map_fd, int interval, bool *exit, const char *pin_dir, const char *map_name); + +#endif diff --git a/lib/util/util.c b/lib/util/util.c new file mode 100644 index 0000000..70c5d18 --- /dev/null +++ b/lib/util/util.c @@ -0,0 +1,946 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <errno.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/resource.h> +#include <sys/vfs.h> +#include <sys/stat.h> +#include <linux/if_link.h> /* Need XDP flags */ +#include <linux/magic.h> /* BPF FS magic */ +#include <linux/err.h> /* ERR_PTR */ +#include <bpf/bpf.h> +#include <dirent.h> +#include <net/if.h> + +#include "util.h" +#include "logging.h" + +static struct enum_val xdp_modes[] = { + {"native", XDP_MODE_NATIVE}, + {"skb", XDP_MODE_SKB}, + {"hw", XDP_MODE_HW}, + {"unspecified", XDP_MODE_UNSPEC}, + {NULL, 0} +}; + +int try_snprintf(char *buf, size_t buf_len, const char *format, ...) +{ + va_list args; + int len; + + va_start(args, format); + len = vsnprintf(buf, buf_len, format, args); + va_end(args); + + if (len < 0) + return -EINVAL; + else if ((size_t)len >= buf_len) + return -ENAMETOOLONG; + + return 0; +} + +static int set_rlimit(unsigned int min_limit) +{ + struct rlimit limit; + int err = 0; + + err = getrlimit(RLIMIT_MEMLOCK, &limit); + if (err) { + err = -errno; + pr_warn("Couldn't get current rlimit\n"); + return err; + } + + if (limit.rlim_cur == RLIM_INFINITY || limit.rlim_cur == 0) { + pr_debug("Current rlimit is infinity or 0. Not raising\n"); + return -ENOMEM; + } + + if (min_limit) { + if (limit.rlim_cur >= min_limit) { + pr_debug("Current rlimit %ju already >= minimum %u\n", + (uintmax_t)limit.rlim_cur, min_limit); + return 0; + } + pr_debug("Setting rlimit to minimum %u\n", min_limit); + limit.rlim_cur = min_limit; + } else { + pr_debug("Doubling current rlimit of %ju\n", (uintmax_t)limit.rlim_cur); + limit.rlim_cur <<= 1; + } + limit.rlim_max = max(limit.rlim_cur, limit.rlim_max); + + err = setrlimit(RLIMIT_MEMLOCK, &limit); + if (err) { + err = -errno; + pr_warn("Couldn't raise rlimit: %s\n", strerror(-err)); + return err; + } + + return 0; +} + +int double_rlimit(void) +{ + pr_debug("Permission denied when loading eBPF object; " + "raising rlimit and retrying\n"); + + return set_rlimit(0); +} + +static const char *_libbpf_compile_version = LIBBPF_VERSION; +static char _libbpf_version[10] = {}; + +const char *get_libbpf_version(void) +{ + /* Start by copying compile-time version into buffer so we have a + * fallback value in case we are dynamically linked, or can't find a + * version in /proc/self/maps below. + */ + strncpy(_libbpf_version, _libbpf_compile_version, + sizeof(_libbpf_version)-1); + +#ifdef LIBBPF_DYNAMIC + char path[PATH_MAX], buf[PATH_MAX], *s; + bool found = false; + FILE *fp; + + /* When dynamically linking against libbpf, we can't be sure that the + * version we discovered at compile time is actually the one we are + * using at runtime. This can lead to hard-to-debug errors, so we try to + * discover the correct version at runtime. + * + * The simple solution to this would be if libbpf itself exported a + * version in its API. But since it doesn't, we work around this by + * parsing the mappings of the binary at runtime, looking for the full + * filename of libbpf.so and using that. + */ + fp = fopen("/proc/self/maps", "r"); + if (fp == NULL) + goto out; + + while ((s = fgets(buf, sizeof(buf), fp)) != NULL) { + /* We are looking for a line like: + * 7f63c2105000-7f63c2106000 rw-p 00032000 fe:02 4200947 /usr/lib/libbpf.so.0.1.0 + */ + if (sscanf(s, "%*x-%*x %*4c %*x %*5c %*d %s\n", path) == 1 && + (s = strstr(path, "libbpf.so.")) != NULL) { + strncpy(_libbpf_version, s+10, sizeof(_libbpf_version)-1); + found = true; + break; + } + } + + fclose(fp); +out: + if (!found) + pr_warn("Couldn't find runtime libbpf version - falling back to compile-time value!\n"); + +#endif + _libbpf_version[sizeof(_libbpf_version)-1] = '\0'; + return _libbpf_version; +} + +int find_bpf_file(char *buf, size_t buf_size, const char *progname) +{ + static char *bpf_obj_paths[] = { +#ifdef DEBUG + ".", +#endif + BPF_OBJECT_PATH, + NULL + }; + struct stat sb = {}; + char **path; + int err; + + for (path = bpf_obj_paths; *path; path++) { + err = try_snprintf(buf, buf_size, "%s/%s", *path, progname); + if (err) + return err; + + pr_debug("Looking for '%s'\n", buf); + err = stat(buf, &sb); + if (err) + continue; + + return 0; + } + + pr_warn("Couldn't find a BPF file with name %s\n", progname); + return -ENOENT; +} + +struct bpf_object *open_bpf_file(const char *progname, + struct bpf_object_open_opts *opts) +{ + char buf[PATH_MAX]; + int err; + + err = find_bpf_file(buf, sizeof(buf), progname); + if (err) + return ERR_PTR(err); + + pr_debug("Loading bpf file '%s' from '%s'\n", progname, buf); + return bpf_object__open_file(buf, opts); +} + +static int get_pinned_object_fd(const char *path, void *info, __u32 *info_len) +{ + char errmsg[STRERR_BUFSIZE]; + int pin_fd, err; + + pin_fd = bpf_obj_get(path); + if (pin_fd < 0) { + err = -errno; + libbpf_strerror(-err, errmsg, sizeof(errmsg)); + pr_debug("Couldn't retrieve pinned object '%s': %s\n", path, errmsg); + return err; + } + + if (info) { + err = bpf_obj_get_info_by_fd(pin_fd, info, info_len); + if (err) { + err = -errno; + libbpf_strerror(-err, errmsg, sizeof(errmsg)); + pr_debug("Couldn't retrieve object info: %s\n", errmsg); + return err; + } + } + + return pin_fd; +} + +int make_dir_subdir(const char *parent, const char *dir) +{ + char path[PATH_MAX]; + int err; + + err = try_snprintf(path, sizeof(path), "%s/%s", parent, dir); + if (err) + return err; + + err = mkdir(parent, S_IRWXU); + if (err && errno != EEXIST) { + err = -errno; + return err; + } + + err = mkdir(path, S_IRWXU); + if (err && errno != EEXIST) { + err = -errno; + return err; + } + + return 0; +} + +int attach_xdp_program(struct xdp_program *prog, const struct iface *iface, + enum xdp_attach_mode mode, const char *pin_root_path) +{ + char pin_path[PATH_MAX]; + int err = 0; + + if (!prog || !pin_root_path) + return -EINVAL; + + err = make_dir_subdir(pin_root_path, "programs"); + if (err) { + pr_warn("Unable to create pin directory: %s\n", strerror(-err)); + return err; + } + + err = try_snprintf(pin_path, sizeof(pin_path), "%s/programs/%s/%s", + pin_root_path, iface->ifname, + xdp_program__name(prog)); + if (err) + return err; + + err = xdp_program__attach(prog, iface->ifindex, mode, 0); + if (err) { + if (pin_root_path && err != -EEXIST) + unlink(pin_path); + return err; + } + + pr_debug("Program '%s' loaded on interface '%s'%s\n", + xdp_program__name(prog), iface->ifname, + mode == XDP_MODE_SKB ? " in skb mode" : ""); + + err = xdp_program__pin(prog, pin_path); + if (err) { + pr_warn("Unable to pin XDP program at %s: %s\n", + pin_path, strerror(-err)); + goto unload; + } + pr_debug("XDP program pinned at %s\n", pin_path); + return err; + +unload: + xdp_program__detach(prog, iface->ifindex, mode, 0); + return err; +} + +int detach_xdp_program(struct xdp_program *prog, const struct iface *iface, + enum xdp_attach_mode mode, const char *pin_root_path) +{ + char pin_path[PATH_MAX]; + int err; + + err = xdp_program__detach(prog, iface->ifindex, mode, 0); + if (err) + goto out; + + err = try_snprintf(pin_path, sizeof(pin_path), "%s/programs/%s/%s", + pin_root_path, iface->ifname, + xdp_program__name(prog)); + if (err) + return err; + + err = unlink(pin_path); + if (err && errno != ENOENT) + goto out; + + err = try_snprintf(pin_path, sizeof(pin_path), "%s/programs/%s", + pin_root_path, iface->ifname); + if (err) + goto out; + + err = rmdir(pin_path); + if (err && errno == ENOENT) + err = 0; + else if (err) + err = -errno; +out: + return err; +} + +int get_pinned_program(const struct iface *iface, const char *pin_root_path, + enum xdp_attach_mode *mode, + struct xdp_program **xdp_prog) +{ + int ret = -ENOENT, err, ifindex = iface->ifindex; + char pin_path[PATH_MAX]; + bool remove_all = false; + enum xdp_attach_mode m; + struct dirent *de; + DIR *dr; + + err = try_snprintf(pin_path, sizeof(pin_path), "%s/programs/%s", + pin_root_path, iface->ifname); + if (err) + return err; + + dr = opendir(pin_path); + if (!dr) { + err = -errno; + pr_debug("Couldn't open pin directory %s: %s\n", + pin_path, strerror(-err)); + return err; + } + + if (!ifindex) + ifindex = if_nametoindex(iface->ifname); + if (!ifindex) { + pr_debug("Interface %s no longer exists\n", iface->ifname); + remove_all = true; + ret = -ENODEV; + } + + while ((de = readdir(dr)) != NULL) { + DECLARE_LIBXDP_OPTS(xdp_program_opts, opts, 0); + struct xdp_program *prog; + + if (!strcmp(".", de->d_name) || !strcmp("..", de->d_name)) + continue; + + err = try_snprintf(pin_path, sizeof(pin_path), + "%s/programs/%s/%s", pin_root_path, + iface->ifname, de->d_name); + if (err) + goto out; + + if (remove_all) { + err = unlink(pin_path); + if (err) + ret = err; + continue; + } + + opts.pin_path = pin_path; + prog = xdp_program__create(&opts); + if (libxdp_get_error(prog) || + !(m = xdp_program__is_attached(prog, iface->ifindex))) { + ret = libxdp_get_error(prog) ?: -ENOENT; + pr_debug("Program %s no longer loaded on %s: %s\n", + de->d_name, iface->ifname, strerror(-ret)); + err = unlink(pin_path); + if (err) + ret = err; + if (prog) + xdp_program__close(prog); + } else { + if (strcmp(xdp_program__name(prog), de->d_name)) { + pr_warn("Pinned and kernel prog names differ: %s/%s\n", + xdp_program__name(prog), de->d_name); + ret = -EFAULT; + xdp_program__close(prog); + } else { + ret = 0; + *xdp_prog = prog; + if (mode) + *mode = m; + } + break; + } + } +out: + closedir(dr); + return ret; +} + +int iterate_pinned_programs(const char *pin_root_path, program_callback cb, + void *arg) +{ + char pin_path[PATH_MAX]; + struct dirent *de; + int err = 0; + DIR *dr; + + err = try_snprintf(pin_path, sizeof(pin_path), "%s/programs", + pin_root_path); + if (err) + return err; + + dr = opendir(pin_path); + if (!dr) + return -ENOENT; + + while ((de = readdir(dr)) != NULL) { + enum xdp_attach_mode mode = XDP_MODE_UNSPEC; + struct xdp_program *prog = NULL; + struct iface iface = {}; + + if (!strcmp(".", de->d_name) || !strcmp("..", de->d_name)) + continue; + + iface.ifname = de->d_name; + iface.ifindex = if_nametoindex(iface.ifname); + + err = try_snprintf(pin_path, sizeof(pin_path), "%s/programs/%s", + pin_root_path, iface.ifname); + if (err) + goto out; + + err = get_pinned_program(&iface, pin_root_path, &mode, &prog); + if (err == -ENOENT || err == -ENODEV) { + err = rmdir(pin_path); + if (err) + goto out; + continue; + } else if (err) { + goto out; + } + + err = cb(&iface, prog, mode, arg); + xdp_program__close(prog); + if (err) + goto out; + } + +out: + closedir(dr); + return err; +} + +int iterate_iface_multiprogs(multiprog_callback cb, void *arg) +{ + struct if_nameindex *idx, *indexes = NULL; + int err = 0; + + indexes = if_nameindex(); + if (!indexes) { + err = -errno; + pr_warn("Couldn't get list of interfaces: %s\n", strerror(-err)); + return err; + } + + for (idx = indexes; idx->if_index; idx++) { + struct xdp_multiprog *mp; + struct iface iface = { + .ifindex = idx->if_index, + .ifname = idx->if_name, + }; + + mp = xdp_multiprog__get_from_ifindex(iface.ifindex); + if (IS_ERR_OR_NULL(mp)) { + if (PTR_ERR(mp) != -ENOENT) { + err = PTR_ERR(mp); + pr_warn("Error getting XDP status for interface %s: %s\n", + idx->if_name, strerror(-err)); + goto out; + } + mp = NULL; + } + + err = cb(&iface, mp, arg); + xdp_multiprog__close(mp); + if (err) + goto out; + } + +out: + if_freenameindex(indexes); + return err; +} + +static bool bpf_is_valid_mntpt(const char *mnt, unsigned long magic) +{ + struct statfs st_fs; + + if (statfs(mnt, &st_fs) < 0) + return false; + if ((unsigned long)st_fs.f_type != magic) + return false; + + return true; +} + +static const char *bpf_find_mntpt_single(unsigned long magic, char *mnt, + int len, const char *mntpt) +{ + if (bpf_is_valid_mntpt(mntpt, magic)) { + strncpy(mnt, mntpt, len - 1); + mnt[len - 1] = '\0'; + return mnt; + } + + return NULL; +} + +static const char *bpf_find_mntpt(const char *fstype, unsigned long magic, + char *mnt, int len, + const char * const *known_mnts) +{ + const char * const *ptr; + char type[100]; + FILE *fp; + + if (known_mnts) { + ptr = known_mnts; + while (*ptr) { + if (bpf_find_mntpt_single(magic, mnt, len, *ptr)) + return mnt; + ptr++; + } + } + + if (len != PATH_MAX) + return NULL; + + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + return NULL; + + while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n", mnt, + type) == 2) { + if (strcmp(type, fstype) == 0) + break; + } + + fclose(fp); + if (strcmp(type, fstype) != 0) + return NULL; + + return mnt; +} + +static int bpf_mnt_check_target(const char *target) +{ + int ret; + + ret = mkdir(target, S_IRWXU); + if (ret && errno != EEXIST) { + ret = -errno; + pr_warn("mkdir %s failed: %s\n", target, strerror(-ret)); + return ret; + } + + return 0; +} +/* simplified version of code from iproute2 */ +static const char *bpf_get_work_dir() +{ + static char bpf_tmp[PATH_MAX] = BPF_DIR_MNT; + static char bpf_wrk_dir[PATH_MAX]; + static const char *mnt; + static bool bpf_mnt_cached; + static const char *const bpf_known_mnts[] = { + BPF_DIR_MNT, + "/bpf", + 0, + }; + int ret; + + if (bpf_mnt_cached) + return mnt; + + mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_tmp, sizeof(bpf_tmp), + bpf_known_mnts); + if (!mnt) { + mnt = BPF_DIR_MNT; + ret = bpf_mnt_check_target(mnt); + if (ret || !bpf_is_valid_mntpt(mnt, BPF_FS_MAGIC)) { + mnt = NULL; + goto out; + } + } + + strncpy(bpf_wrk_dir, mnt, sizeof(bpf_wrk_dir)); + bpf_wrk_dir[sizeof(bpf_wrk_dir) - 1] = '\0'; + mnt = bpf_wrk_dir; +out: + bpf_mnt_cached = true; + return mnt; +} + +int get_bpf_root_dir(char *buf, size_t buf_len, const char *subdir, bool fatal) +{ + const char *bpf_dir; + + bpf_dir = bpf_get_work_dir(); + if (!bpf_dir) { + logging_print(fatal ? LOG_WARN : LOG_DEBUG, + "Could not find BPF working dir - bpffs not mounted?\n"); + return -ENOENT; + } + + if (subdir) + return try_snprintf(buf, buf_len, "%s/%s", bpf_dir, subdir); + else + return try_snprintf(buf, buf_len, "%s", bpf_dir); +} + +int get_pinned_map_fd(const char *bpf_root, const char *map_name, + struct bpf_map_info *info) +{ + __u32 info_len = sizeof(*info); + char buf[PATH_MAX]; + int err; + + err = try_snprintf(buf, sizeof(buf), "%s/%s", bpf_root, map_name); + if (err) + return err; + + pr_debug("Getting pinned object from %s\n", buf); + return get_pinned_object_fd(buf, info, &info_len); +} + +int unlink_pinned_map(int dir_fd, const char *map_name) +{ + struct stat statbuf = {}; + int err; + + err = fstatat(dir_fd, map_name, &statbuf, 0); + if (err && errno == ENOENT) { + pr_debug("Map name %s not pinned\n", map_name); + return 0; + } else if (err) { + err = -errno; + pr_warn("Couldn't stat pinned map %s: %s\n", + map_name, strerror(-err)); + return err; + } + + pr_debug("Unlinking pinned map %s\n", map_name); + err = unlinkat(dir_fd, map_name, 0); + if (err) { + err = -errno; + pr_warn("Couldn't unlink pinned map %s: %s\n", + map_name, strerror(-err)); + return -errno; + } + + return 0; +} + +#define XDP_UNKNOWN (XDP_REDIRECT + 1) +#ifndef XDP_ACTION_MAX +#define XDP_ACTION_MAX (XDP_UNKNOWN + 1) +#endif + +static const char *xdp_action_names[XDP_ACTION_MAX] = { + [XDP_ABORTED] = "XDP_ABORTED", + [XDP_DROP] = "XDP_DROP", + [XDP_PASS] = "XDP_PASS", + [XDP_TX] = "XDP_TX", + [XDP_REDIRECT] = "XDP_REDIRECT", + [XDP_UNKNOWN] = "XDP_UNKNOWN", +}; + +const char *action2str(__u32 action) +{ + if (action < XDP_ACTION_MAX) + return xdp_action_names[action]; + return NULL; +} + +int check_bpf_environ(void) +{ + init_lib_logging(); + + if (geteuid() != 0) { + pr_warn("This program must be run as root.\n"); + return 1; + } + + /* Try to avoid probing errors due to rlimit exhaustion by starting out + * with an rlimit of 1 MiB. This is not going to solve all issues, but + * it will at least make things work when there is nothing else loaded. + * + * Ignore return code because an error shouldn't abort running. + */ + set_rlimit(1024 * 1024); + + return 0; +} + +static const char *lock_dir = RUNDIR; +static char *prog_lock_file = NULL; +static int prog_lock_fd = -1; +static pid_t prog_pid = 0; + +void prog_lock_release(int signal) +{ + struct sigaction sigact = { .sa_flags = SA_RESETHAND }; + int err; + + if (prog_lock_fd < 0 || !prog_lock_file) + return; + + sigaction(SIGHUP, &sigact, NULL); + sigaction(SIGINT, &sigact, NULL); + sigaction(SIGSEGV, &sigact, NULL); + sigaction(SIGFPE, &sigact, NULL); + sigaction(SIGTERM, &sigact, NULL); + + err = unlink(prog_lock_file); + if (err) { + err = -errno; + pr_warn("Unable to unlink lock file: %s\n", strerror(-err)); + goto out; + } + + close(prog_lock_fd); + free(prog_lock_file); + prog_lock_fd = -1; + prog_lock_file = NULL; + +out: + if (signal) { + pr_debug("Exiting on signal %d\n", signal); + if (prog_pid) + kill(prog_pid, signal); + else + exit(signal); + } +} + +int prog_lock_get(const char *progname) +{ + char buf[PATH_MAX]; + int err; + struct sigaction sigact = { .sa_handler = prog_lock_release }; + + if (prog_lock_fd >= 0) { + pr_warn("Attempt to get prog_lock twice.\n"); + return -EFAULT; + } + + if (!prog_lock_file) { + err = try_snprintf(buf, sizeof(buf), "%s/%s.lck", lock_dir, + progname); + if (err) + return err; + + prog_lock_file = strdup(buf); + if (!prog_lock_file) + return -ENOMEM; + } + + prog_pid = getpid(); + + if (sigaction(SIGHUP, &sigact, NULL) || + sigaction(SIGINT, &sigact, NULL) || + sigaction(SIGSEGV, &sigact, NULL) || + sigaction(SIGFPE, &sigact, NULL) || + sigaction(SIGTERM, &sigact, NULL)) { + err = -errno; + pr_warn("Unable to install signal handler: %s\n", strerror(-err)); + return err; + } + + prog_lock_fd = open(prog_lock_file, O_WRONLY | O_CREAT | O_EXCL, 0644); + if (prog_lock_fd < 0) { + err = -errno; + if (err == -EEXIST) { + pid_t pid = 0; + char buf[100]; + ssize_t len; + int fd; + + fd = open(prog_lock_file, O_RDONLY); + if (fd < 0) { + err = -errno; + pr_warn("Unable to open lockfile for reading: %s\n", + strerror(-err)); + return err; + } + + len = read(fd, buf, sizeof(buf) - 1); + err = -errno; + close(fd); + if (len > 0) { + buf[len] = '\0'; + pid = strtoul(buf, NULL, 10); + } + if (!pid || err) { + pr_warn("Unable to read PID from lockfile: %s\n", + strerror(-err)); + return err; + } + pr_warn("Unable to get program lock: Already held by pid %d\n", + pid); + } else { + pr_warn("Unable to get program lock: %s\n", strerror(-err)); + } + return err; + } + + err = dprintf(prog_lock_fd, "%d\n", prog_pid); + if (err < 0) { + err = -errno; + pr_warn("Unable to write pid to lock file: %s\n", strerror(-err)); + goto out_err; + } + + err = fsync(prog_lock_fd); + if (err) { + err = -errno; + pr_warn("Unable fsync() lock file: %s\n", strerror(-err)); + goto out_err; + } + + return 0; +out_err: + unlink(prog_lock_file); + close(prog_lock_fd); + free(prog_lock_file); + prog_lock_file = NULL; + prog_lock_fd = -1; + return err; +} + +static char *print_bpf_tag(char buf[BPF_TAG_SIZE * 2 + 1], + const unsigned char tag[BPF_TAG_SIZE]) +{ + int i; + + for (i = 0; i < BPF_TAG_SIZE; i++) + sprintf(&buf[i * 2], "%02x", tag[i]); + buf[BPF_TAG_SIZE * 2] = '\0'; + return buf; +} + +static int print_iface_status(const struct iface *iface, + const struct xdp_multiprog *mp, + __unused void *arg) +{ + struct xdp_program *prog, *dispatcher, *hw_prog; + char tag[BPF_TAG_SIZE * 2 + 1]; + char buf[STRERR_BUFSIZE]; + int err; + + if (!mp) { + printf("%-22s <No XDP program loaded!>\n", iface->ifname); + return 0; + } + + hw_prog = xdp_multiprog__hw_prog(mp); + if (hw_prog) { + printf("%-16s %-5s %-17s %-8s %-4d %-17s\n", + iface->ifname, + "", + xdp_program__name(hw_prog), + get_enum_name(xdp_modes, XDP_MODE_HW), + xdp_program__id(hw_prog), + print_bpf_tag(tag, xdp_program__tag(hw_prog))); + } + + dispatcher = xdp_multiprog__main_prog(mp); + if (dispatcher) { + printf("%-16s %-5s %-17s %-8s %-4d %-17s\n", + iface->ifname, + "", + xdp_program__name(dispatcher), + get_enum_name(xdp_modes, xdp_multiprog__attach_mode(mp)), + xdp_program__id(dispatcher), + print_bpf_tag(tag, xdp_program__tag(dispatcher))); + + + for (prog = xdp_multiprog__next_prog(NULL, mp); + prog; + prog = xdp_multiprog__next_prog(prog, mp)) { + + err = xdp_program__print_chain_call_actions(prog, buf, + sizeof(buf)); + if (err) + return err; + + printf("%-16s %-5d %-16s %-8s %-4u %-17s %s\n", + " =>", xdp_program__run_prio(prog), + xdp_program__name(prog), + "", xdp_program__id(prog), + print_bpf_tag(tag, xdp_program__tag(prog)), + buf); + } + } + + return 0; +} + +int iface_print_status(const struct iface *iface) +{ + int err = 0; + + printf("%-16s %-5s %-17s Mode ID %-17s %s\n", + "Interface", "Prio", "Program name", "Tag", "Chain actions"); + printf("--------------------------------------------------------------------------------------\n"); + + if (iface) { + struct xdp_multiprog *mp; + + mp = xdp_multiprog__get_from_ifindex(iface->ifindex); + if (IS_ERR_OR_NULL(mp)) { + if (PTR_ERR(mp) != -ENOENT) { + err = PTR_ERR(mp); + pr_warn("Error getting XDP status for interface %s: %s\n", + iface->ifname, strerror(-err)); + goto out; + } + mp = NULL; + } + print_iface_status(iface, mp, NULL); + } else { + err = iterate_iface_multiprogs(print_iface_status, NULL); + } + printf("\n"); +out: + return err; +} diff --git a/lib/util/util.h b/lib/util/util.h new file mode 100644 index 0000000..8848e41 --- /dev/null +++ b/lib/util/util.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __UTIL_H +#define __UTIL_H + +#include <bpf/libbpf.h> +#include <xdp/libxdp.h> +#include "params.h" + +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif +#define STRERR_BUFSIZE 1024 +#define _textify(x) #x +#define textify(x) _textify(x) + +#define __unused __attribute__((unused)) + +#ifndef BPF_DIR_MNT +#define BPF_DIR_MNT "/sys/fs/bpf" +#endif + +#ifndef BPF_OBJECT_PATH +#define BPF_OBJECT_PATH "/usr/lib/bpf" +#endif + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) + +#define FOR_EACH_MAP_KEY(_err, _map_fd, _map_key, _prev_key) \ + for (_err = bpf_map_get_next_key(_map_fd, NULL, &_map_key); \ + !_err; \ + _prev_key = _map_key, \ + _err = bpf_map_get_next_key(_map_fd, &_prev_key, &_map_key)) + +#define min(x, y) ((x) < (y) ? x : y) +#define max(x, y) ((x) > (y) ? x : y) + +#ifndef offsetof +#define offsetof(type, member) ((size_t) & ((type *)0)->member) +#endif + +#ifndef container_of +#define container_of(ptr, type, member) \ + ({ \ + const typeof(((type *)0)->member) *__mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)); \ + }) +#endif + +#ifndef roundup +#define roundup(x, y) \ + ({ \ + typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ + }) +#endif + +int try_snprintf(char *buf, size_t buf_len, const char *format, ...); +int make_dir_subdir(const char *parent, const char *dir); + +int check_bpf_environ(void); +int double_rlimit(void); + +int attach_xdp_program(struct xdp_program *prog, const struct iface *iface, + enum xdp_attach_mode mode, const char *pin_root_dir); +int detach_xdp_program(struct xdp_program *prog, const struct iface *iface, + enum xdp_attach_mode mode, const char *pin_root_dir); + +int find_bpf_file(char *buf, size_t buf_size, const char *progname); +struct bpf_object *open_bpf_file(const char *progname, + struct bpf_object_open_opts *opts); + +typedef int (*program_callback)(const struct iface *iface, + struct xdp_program *prog, + enum xdp_attach_mode mode, void *arg); +typedef int (*multiprog_callback)(const struct iface *iface, + const struct xdp_multiprog *mp, void *arg); +int get_pinned_program(const struct iface *iface, const char *pin_root_path, + enum xdp_attach_mode *mode, struct xdp_program **prog); +int iterate_pinned_programs(const char *pin_root_path, program_callback cb, + void *arg); +int iterate_iface_multiprogs(multiprog_callback cb, void *arg); + +int get_bpf_root_dir(char *buf, size_t buf_len, const char *subdir, bool fatal); +int get_pinned_map_fd(const char *bpf_root, const char *map_name, + struct bpf_map_info *info); +int unlink_pinned_map(int dir_fd, const char *map_name); + +const char *action2str(__u32 action); + +int prog_lock_get(const char *progname); +void prog_lock_release(int signal); + +const char *get_libbpf_version(void); +int iface_print_status(const struct iface *iface); + +#endif diff --git a/lib/util/util.mk b/lib/util/util.mk new file mode 100644 index 0000000..7fc3b43 --- /dev/null +++ b/lib/util/util.mk @@ -0,0 +1,2 @@ +UTIL_OBJS := params.o logging.o util.o stats.o xpcapng.o xdp_sample.o +UTIL_BPF_OBJS := xdp_sample.bpf.o diff --git a/lib/util/xdp_sample.bpf.c b/lib/util/xdp_sample.bpf.c new file mode 100644 index 0000000..3e301bb --- /dev/null +++ b/lib/util/xdp_sample.bpf.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <bpf/vmlinux.h> +#include <linux/bpf.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_helpers.h> + +SEC("tp_btf/xdp_cpumap_kthread") +int BPF_PROG(tp_xdp_cpumap_kthread, int map_id, unsigned int processed, + unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats) +{ + bpf_printk("Stats: %d %u %u %d %d\n", + map_id, processed, drops, sched, xdp_stats->pass); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/lib/util/xdp_sample.c b/lib/util/xdp_sample.c new file mode 100644 index 0000000..4385ea5 --- /dev/null +++ b/lib/util/xdp_sample.c @@ -0,0 +1,1643 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE + +#include <math.h> +#include <poll.h> +#include <time.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <getopt.h> +#include <locale.h> +#include <net/if.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <bpf/bpf.h> +#include <stdbool.h> +#include <inttypes.h> +#include <sys/mman.h> +#include <arpa/inet.h> +#include <sys/ioctl.h> +#include <bpf/libbpf.h> +#include <sys/sysinfo.h> +#include <sys/timerfd.h> +#include <sys/utsname.h> +#include <linux/limits.h> +#include <sys/resource.h> +#include <sys/signalfd.h> +#include <linux/ethtool.h> +#include <linux/if_link.h> +#include <linux/sockios.h> +#include <linux/hashtable.h> + +#include "xdp_sample.h" +#include "logging.h" + +#include "xdp_sample.skel.h" + +#define __sample_print(fmt, cond, ...) \ + ({ \ + if (cond) \ + printf(fmt, ##__VA_ARGS__); \ + }) + +#define print_always(fmt, ...) __sample_print(fmt, 1, ##__VA_ARGS__) +#define print_default(fmt, ...) \ + __sample_print(fmt, sample_log_level & LL_DEFAULT, ##__VA_ARGS__) +#define __print_err(err, fmt, ...) \ + ({ \ + __sample_print(fmt, err > 0 || sample_log_level & LL_DEFAULT, \ + ##__VA_ARGS__); \ + sample_err_exp = sample_err_exp ? true : err > 0; \ + }) +#define print_err(err, fmt, ...) __print_err(err, fmt, ##__VA_ARGS__) + +#define __COLUMN(x) "%'10" x " %-13s" +#define FMT_COLUMNf __COLUMN(".0f") +#define FMT_COLUMNd __COLUMN("d") +#define FMT_COLUMNl __COLUMN(PRIu64) +#define RX(rx) rx, "rx/s" +#define PPS(pps) pps, "pkt/s" +#define DROP(drop) drop, "drop/s" +#define ERR(err) err, "error/s" +#define HITS(hits) hits, "hit/s" +#define XMIT(xmit) xmit, "xmit/s" +#define PASS(pass) pass, "pass/s" +#define REDIR(redir) redir, "redir/s" +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ + +#define XDP_UNKNOWN (XDP_REDIRECT + 1) +#define XDP_ACTION_MAX (XDP_UNKNOWN + 1) +#define XDP_REDIRECT_ERR_MAX 7 + +enum map_type { + MAP_RX, + MAP_RXQ, + MAP_REDIRECT_ERR, + MAP_CPUMAP_ENQUEUE, + MAP_CPUMAP_KTHREAD, + MAP_EXCEPTION, + MAP_DEVMAP_XMIT, + MAP_DEVMAP_XMIT_MULTI, + NUM_MAP, +}; + +enum log_level { + LL_DEFAULT = 1U << 0, + LL_SIMPLE = 1U << 1, + LL_DEBUG = 1U << 2, +}; + +struct record { + __u64 timestamp; + struct datarec total; + union { + struct datarec *cpu; + struct datarec *rxq; + }; +}; + +struct map_entry { + struct hlist_node node; + __u64 pair; + struct record val; +}; + +struct stats_record { + struct record rx_cnt; + struct record rxq_cnt; + struct record redir_err[XDP_REDIRECT_ERR_MAX]; + struct record kthread; + struct record exception[XDP_ACTION_MAX]; + struct record devmap_xmit; + DECLARE_HASHTABLE(xmit_map, 5); + struct record enq[]; +}; + +struct sample_output { + struct { + uint64_t rx; + uint64_t redir; + uint64_t drop; + uint64_t drop_xmit; + uint64_t err; + uint64_t xmit; + } totals; + struct { + union { + uint64_t pps; + uint64_t num; + }; + uint64_t drop; + uint64_t err; + } rx_cnt; + struct { + uint64_t suc; + uint64_t err; + } redir_cnt; + struct { + uint64_t hits; + } except_cnt; + struct { + uint64_t pps; + uint64_t drop; + uint64_t err; + double bavg; + } xmit_cnt; +}; + +struct datarec *sample_mmap[NUM_MAP]; +struct bpf_map *sample_map[NUM_MAP]; +size_t sample_map_count[NUM_MAP]; +enum log_level sample_log_level; +struct sample_output sample_out; +unsigned long sample_interval; +bool sample_err_exp; +int sample_xdp_cnt; +int sample_n_cpus; +int sample_n_rxqs; +int sample_sig_fd; +int sample_mask; +int ifindex[2]; + +static struct { + bool checked; + bool compat; +} sample_compat[SAMPLE_COMPAT_MAX] = {}; + +bool sample_is_compat(enum sample_compat compat_value) +{ + return sample_compat[compat_value].compat; +} + +bool sample_probe_cpumap_compat(void) +{ + struct xdp_sample *skel; + bool res; + + skel = xdp_sample__open_and_load(); + res = !!skel; + xdp_sample__destroy(skel); + + return res; +} + +void sample_check_cpumap_compat(struct bpf_program *prog, + struct bpf_program *prog_compat) +{ + bool res = sample_compat[SAMPLE_COMPAT_CPUMAP_KTHREAD].compat; + + if (!sample_compat[SAMPLE_COMPAT_CPUMAP_KTHREAD].checked) { + res = sample_probe_cpumap_compat(); + + sample_compat[SAMPLE_COMPAT_CPUMAP_KTHREAD].checked = true; + sample_compat[SAMPLE_COMPAT_CPUMAP_KTHREAD].compat = res; + } + + if (res) { + pr_debug("Kernel supports 5-arg xdp_cpumap_kthread tracepoint\n"); + bpf_program__set_autoload(prog_compat, false); + } else { + pr_debug("Kernel does not support 5-arg xdp_cpumap_kthread tracepoint, using compat version\n"); + bpf_program__set_autoload(prog, false); + } +} + +static const char *xdp_redirect_err_names[XDP_REDIRECT_ERR_MAX] = { + /* Key=1 keeps unknown errors */ + "Success", + "Unknown", + "EINVAL", + "ENETDOWN", + "EMSGSIZE", + "EOPNOTSUPP", + "ENOSPC", +}; + +static const char *xdp_action_names[XDP_ACTION_MAX] = { + [XDP_ABORTED] = "XDP_ABORTED", + [XDP_DROP] = "XDP_DROP", + [XDP_PASS] = "XDP_PASS", + [XDP_TX] = "XDP_TX", + [XDP_REDIRECT] = "XDP_REDIRECT", + [XDP_UNKNOWN] = "XDP_UNKNOWN", +}; + +static __u64 gettime(void) +{ + struct timespec t; + int res; + + res = clock_gettime(CLOCK_MONOTONIC, &t); + if (res < 0) { + pr_warn("Error with gettimeofday! (%i)\n", res); + return UINT64_MAX; + } + return (__u64)t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; +} + +static const char *xdp_action2str(int action) +{ + if (action < XDP_ACTION_MAX) + return xdp_action_names[action]; + return NULL; +} + +static struct datarec *alloc_records(int nr_entries) +{ + struct datarec *array; + + if (nr_entries <= 0) + return NULL; + + array = calloc(nr_entries, sizeof(*array)); + if (!array) { + pr_warn("Failed to allocate memory (nr_entries: %u)\n", nr_entries); + return NULL; + } + return array; +} + +static int map_entry_init(struct map_entry *e, __u64 pair) +{ + e->pair = pair; + INIT_HLIST_NODE(&e->node); + e->val.timestamp = gettime(); + e->val.cpu = alloc_records(libbpf_num_possible_cpus()); + if (!e->val.cpu) + return -ENOMEM; + return 0; +} + +static void map_collect_rxqs(struct datarec *values, struct record *rec) +{ + int i; + + /* Get time as close as possible to reading map contents */ + rec->timestamp = gettime(); + + /* Record and sum values from each RXQ */ + for (i = 0; i < sample_n_rxqs; i++) { + pr_debug("%d: %lx %lx\n", i, (unsigned long)&rec->rxq[i], (unsigned long)&values[i]); + rec->rxq[i].processed = READ_ONCE(values[i].processed); + rec->rxq[i].dropped = READ_ONCE(values[i].dropped); + rec->rxq[i].issue = READ_ONCE(values[i].issue); + rec->rxq[i].xdp_pass = READ_ONCE(values[i].xdp_pass); + rec->rxq[i].xdp_drop = READ_ONCE(values[i].xdp_drop); + rec->rxq[i].xdp_redirect = READ_ONCE(values[i].xdp_redirect); + } +} + +static void map_collect_percpu(struct datarec *values, struct record *rec) +{ + /* For percpu maps, userspace gets a value per possible CPU */ + int nr_cpus = libbpf_num_possible_cpus(); + __u64 sum_xdp_redirect = 0; + __u64 sum_processed = 0; + __u64 sum_xdp_pass = 0; + __u64 sum_xdp_drop = 0; + __u64 sum_dropped = 0; + __u64 sum_issue = 0; + int i; + + /* Get time as close as possible to reading map contents */ + rec->timestamp = gettime(); + + /* Record and sum values from each CPU */ + for (i = 0; i < nr_cpus; i++) { + rec->cpu[i].processed = READ_ONCE(values[i].processed); + rec->cpu[i].dropped = READ_ONCE(values[i].dropped); + rec->cpu[i].issue = READ_ONCE(values[i].issue); + rec->cpu[i].xdp_pass = READ_ONCE(values[i].xdp_pass); + rec->cpu[i].xdp_drop = READ_ONCE(values[i].xdp_drop); + rec->cpu[i].xdp_redirect = READ_ONCE(values[i].xdp_redirect); + + sum_processed += rec->cpu[i].processed; + sum_dropped += rec->cpu[i].dropped; + sum_issue += rec->cpu[i].issue; + sum_xdp_pass += rec->cpu[i].xdp_pass; + sum_xdp_drop += rec->cpu[i].xdp_drop; + sum_xdp_redirect += rec->cpu[i].xdp_redirect; + } + + rec->total.processed = sum_processed; + rec->total.dropped = sum_dropped; + rec->total.issue = sum_issue; + rec->total.xdp_pass = sum_xdp_pass; + rec->total.xdp_drop = sum_xdp_drop; + rec->total.xdp_redirect = sum_xdp_redirect; +} + +static int map_collect_percpu_devmap(int map_fd, struct stats_record *rec) +{ + int nr_cpus = libbpf_num_possible_cpus(); + int i, ret, count = 32; + struct datarec *values; + bool init = false; + __u32 batch; + __u64 *keys; + + keys = calloc(count, sizeof(__u64)); + if (!keys) + return -ENOMEM; + values = calloc(count * nr_cpus, sizeof(struct datarec)); + if (!values) { + free(keys); + return -ENOMEM; + } + + for (;;) { + bool exit = false; + + ret = bpf_map_lookup_batch(map_fd, init ? &batch : NULL, &batch, + keys, values, (__u32 *)&count, NULL); + if (ret < 0 && errno != ENOENT) + break; + if (errno == ENOENT) + exit = true; + + init = true; + for (i = 0; i < count; i++) { + struct map_entry *e, *x = NULL; + __u64 pair = keys[i]; + struct datarec *arr; + + arr = &values[i * nr_cpus]; + hash_for_each_possible(rec->xmit_map, e, node, pair) { + if (e->pair == pair) { + x = e; + break; + } + } + if (!x) { + x = calloc(1, sizeof(*x)); + if (!x) + goto cleanup; + if (map_entry_init(x, pair) < 0) { + free(x); + goto cleanup; + } + hash_add(rec->xmit_map, &x->node, pair); + } + map_collect_percpu(arr, &x->val); + } + + if (exit) + break; + count = 32; + } + + free(values); + free(keys); + return 0; +cleanup: + free(values); + free(keys); + return -ENOMEM; +} + +static struct stats_record *alloc_stats_record(void) +{ + struct stats_record *rec; + int i; + + rec = calloc(1, sizeof(*rec) + sample_n_cpus * sizeof(struct record)); + if (!rec) { + pr_warn("Failed to allocate memory\n"); + return NULL; + } + + if (sample_mask & SAMPLE_RX_CNT) { + rec->rx_cnt.cpu = alloc_records(libbpf_num_possible_cpus()); + if (!rec->rx_cnt.cpu) { + pr_warn("Failed to allocate rx_cnt per-CPU array\n"); + goto end_rec; + } + } + if (sample_mask & SAMPLE_RXQ_STATS) { + if (sample_n_rxqs <= 0) { + pr_warn("Invalid number of RXQs: %d\n", sample_n_rxqs); + goto end_rx_cnt; + } + + rec->rxq_cnt.rxq = alloc_records(sample_n_rxqs); + if (!rec->rxq_cnt.rxq) { + pr_warn("Failed to allocate rxq_cnt per RXQ array\n"); + goto end_rx_cnt; + } + } + if (sample_mask & (SAMPLE_REDIRECT_CNT | SAMPLE_REDIRECT_ERR_CNT)) { + for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++) { + rec->redir_err[i].cpu = alloc_records(libbpf_num_possible_cpus()); + if (!rec->redir_err[i].cpu) { + pr_warn("Failed to allocate redir_err per-CPU array for \"%s\" case\n", + xdp_redirect_err_names[i]); + while (i--) + free(rec->redir_err[i].cpu); + goto end_rxq_cnt; + } + } + } + if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT) { + rec->kthread.cpu = alloc_records(libbpf_num_possible_cpus()); + if (!rec->kthread.cpu) { + pr_warn("Failed to allocate kthread per-CPU array\n"); + goto end_redir; + } + } + if (sample_mask & SAMPLE_EXCEPTION_CNT) { + for (i = 0; i < XDP_ACTION_MAX; i++) { + rec->exception[i].cpu = alloc_records(libbpf_num_possible_cpus()); + if (!rec->exception[i].cpu) { + pr_warn("Failed to allocate exception per-CPU array for \"%s\" case\n", + xdp_action2str(i)); + while (i--) + free(rec->exception[i].cpu); + goto end_kthread; + } + } + } + if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT) { + rec->devmap_xmit.cpu = alloc_records(libbpf_num_possible_cpus()); + if (!rec->devmap_xmit.cpu) { + pr_warn("Failed to allocate devmap_xmit per-CPU array\n"); + goto end_exception; + } + } + if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) + hash_init(rec->xmit_map); + if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT) { + for (i = 0; i < sample_n_cpus; i++) { + rec->enq[i].cpu = alloc_records(libbpf_num_possible_cpus()); + if (!rec->enq[i].cpu) { + pr_warn("Failed to allocate enqueue per-CPU array for CPU %d\n", i); + while (i--) + free(rec->enq[i].cpu); + goto end_devmap_xmit; + } + } + } + + return rec; + +end_devmap_xmit: + free(rec->devmap_xmit.cpu); +end_exception: + for (i = 0; i < XDP_ACTION_MAX; i++) + free(rec->exception[i].cpu); +end_kthread: + free(rec->kthread.cpu); +end_redir: + for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++) + free(rec->redir_err[i].cpu); +end_rxq_cnt: + free(rec->rxq_cnt.rxq); +end_rx_cnt: + free(rec->rx_cnt.cpu); +end_rec: + free(rec); + return NULL; +} + +static void free_stats_record(struct stats_record *r) +{ + struct hlist_node *tmp; + struct map_entry *e; + unsigned int bkt; + int i; + + for (i = 0; i < sample_n_cpus; i++) + free(r->enq[i].cpu); + hash_for_each_safe(r->xmit_map, bkt, tmp, e, node) { + hash_del(&e->node); + free(e->val.cpu); + free(e); + } + free(r->devmap_xmit.cpu); + for (i = 0; i < XDP_ACTION_MAX; i++) + free(r->exception[i].cpu); + free(r->kthread.cpu); + for (i = 0; i < XDP_REDIRECT_ERR_MAX; i++) + free(r->redir_err[i].cpu); + free(r->rx_cnt.cpu); + free(r); +} + +static double calc_period(struct record *r, struct record *p) +{ + double period_ = 0; + __u64 period = 0; + + period = r->timestamp - p->timestamp; + if (period > 0) + period_ = ((double)period / NANOSEC_PER_SEC); + + return period_; +} + +static double sample_round(double val) +{ + if (val - floor(val) < 0.5) + return floor(val); + return ceil(val); +} + +static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_) +{ + __u64 packets = 0; + __u64 pps = 0; + + if (period_ > 0) { + packets = r->processed - p->processed; + pps = sample_round(packets / period_); + } + return pps; +} + +static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_) +{ + __u64 packets = 0; + __u64 pps = 0; + + if (period_ > 0) { + packets = r->dropped - p->dropped; + pps = sample_round(packets / period_); + } + return pps; +} + +static __u64 calc_errs_pps(struct datarec *r, struct datarec *p, double period_) +{ + __u64 packets = 0; + __u64 pps = 0; + + if (period_ > 0) { + packets = r->issue - p->issue; + pps = sample_round(packets / period_); + } + return pps; +} + +static __u64 calc_info_pps(struct datarec *r, struct datarec *p, double period_) +{ + __u64 packets = 0; + __u64 pps = 0; + + if (period_ > 0) { + packets = r->info - p->info; + pps = sample_round(packets / period_); + } + return pps; +} + +static void calc_xdp_pps(struct datarec *r, struct datarec *p, double *xdp_pass, + double *xdp_drop, double *xdp_redirect, double period_) +{ + *xdp_pass = 0, *xdp_drop = 0, *xdp_redirect = 0; + if (period_ > 0) { + *xdp_redirect = (r->xdp_redirect - p->xdp_redirect) / period_; + *xdp_pass = (r->xdp_pass - p->xdp_pass) / period_; + *xdp_drop = (r->xdp_drop - p->xdp_drop) / period_; + } +} + +static void stats_get_rx_cnt(struct stats_record *stats_rec, + struct stats_record *stats_prev, + int nr_cpus, struct sample_output *out) +{ + struct record *rec, *prev; + double t, pps, drop, err; + int i; + + rec = &stats_rec->rx_cnt; + prev = &stats_prev->rx_cnt; + t = calc_period(rec, prev); + + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + char str[64]; + + pps = calc_pps(r, p, t); + drop = calc_drop_pps(r, p, t); + err = calc_errs_pps(r, p, t); + if (!pps && !drop && !err) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf + "\n", + str, PPS(pps), DROP(drop), ERR(err)); + } + + if (out) { + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop_pps(&rec->total, &prev->total, t); + err = calc_errs_pps(&rec->total, &prev->total, t); + + out->rx_cnt.pps = pps; + out->rx_cnt.drop = drop; + out->rx_cnt.err = err; + out->totals.rx += pps; + out->totals.drop += drop; + out->totals.err += err; + } +} + +static void stats_get_rxq_cnt(struct stats_record *stats_rec, + struct stats_record *stats_prev) +{ + struct record *rec, *prev; + double t, pps, drop, err; + int i; + + rec = &stats_rec->rxq_cnt; + prev = &stats_prev->rxq_cnt; + t = calc_period(rec, prev); + + print_default("\n"); + for (i = 0; i < sample_n_rxqs; i++) { + struct datarec *r = &rec->rxq[i]; + struct datarec *p = &prev->rxq[i]; + char str[64]; + + pps = calc_pps(r, p, t); + drop = calc_drop_pps(r, p, t); + err = calc_errs_pps(r, p, t); + if (!pps && !drop && !err) + continue; + + snprintf(str, sizeof(str), "rxq:%d", i); + print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf + "\n", + str, PPS(pps), DROP(drop), ERR(err)); + } +} + +static void stats_get_cpumap_enqueue(struct stats_record *stats_rec, + struct stats_record *stats_prev, + int nr_cpus) +{ + struct record *rec, *prev; + double t, pps, drop, err; + int i, to_cpu; + + /* cpumap enqueue stats */ + for (to_cpu = 0; to_cpu < sample_n_cpus; to_cpu++) { + rec = &stats_rec->enq[to_cpu]; + prev = &stats_prev->enq[to_cpu]; + t = calc_period(rec, prev); + + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop_pps(&rec->total, &prev->total, t); + err = calc_errs_pps(&rec->total, &prev->total, t); + + if (pps > 0 || drop > 0) { + char str[64]; + + snprintf(str, sizeof(str), "enqueue to cpu %d", to_cpu); + + if (err > 0) + err = pps / err; /* calc average bulk size */ + + print_err(drop, + " %-20s " FMT_COLUMNf FMT_COLUMNf __COLUMN( + ".2f") "\n", + str, PPS(pps), DROP(drop), err, "bulk-avg"); + } + + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + char str[64]; + + pps = calc_pps(r, p, t); + drop = calc_drop_pps(r, p, t); + err = calc_errs_pps(r, p, t); + if (!pps && !drop && !err) + continue; + + snprintf(str, sizeof(str), "cpu:%d->%d", i, to_cpu); + if (err > 0) + err = pps / err; /* calc average bulk size */ + print_default( + " %-18s " FMT_COLUMNf FMT_COLUMNf __COLUMN( + ".2f") "\n", + str, PPS(pps), DROP(drop), err, "bulk-avg"); + } + } +} + +static void stats_get_cpumap_remote(struct stats_record *stats_rec, + struct stats_record *stats_prev, + int nr_cpus) +{ + double xdp_pass, xdp_drop, xdp_redirect; + struct record *rec, *prev; + double t; + int i; + + rec = &stats_rec->kthread; + prev = &stats_prev->kthread; + t = calc_period(rec, prev); + + calc_xdp_pps(&rec->total, &prev->total, &xdp_pass, &xdp_drop, + &xdp_redirect, t); + if (xdp_pass || xdp_drop || xdp_redirect) { + print_err(xdp_drop, + " %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n", + "xdp_stats", PASS(xdp_pass), DROP(xdp_drop), + REDIR(xdp_redirect)); + } + + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + char str[64]; + + calc_xdp_pps(r, p, &xdp_pass, &xdp_drop, &xdp_redirect, t); + if (!xdp_pass && !xdp_drop && !xdp_redirect) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + print_default(" %-16s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf + "\n", + str, PASS(xdp_pass), DROP(xdp_drop), + REDIR(xdp_redirect)); + } +} + +static void stats_get_cpumap_kthread(struct stats_record *stats_rec, + struct stats_record *stats_prev, + int nr_cpus) +{ + struct record *rec, *prev; + double t, pps, drop, err; + int i; + + rec = &stats_rec->kthread; + prev = &stats_prev->kthread; + t = calc_period(rec, prev); + + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop_pps(&rec->total, &prev->total, t); + err = calc_errs_pps(&rec->total, &prev->total, t); + + print_err(drop, " %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf "\n", + pps ? "kthread total" : "kthread", PPS(pps), DROP(drop), err, + "sched"); + + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + char str[64]; + + pps = calc_pps(r, p, t); + drop = calc_drop_pps(r, p, t); + err = calc_errs_pps(r, p, t); + if (!pps && !drop && !err) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + print_default(" %-18s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf + "\n", + str, PPS(pps), DROP(drop), err, "sched"); + } +} + +static void stats_get_redirect_cnt(struct stats_record *stats_rec, + struct stats_record *stats_prev, + int nr_cpus, + struct sample_output *out) +{ + struct record *rec, *prev; + double t, pps; + int i; + + rec = &stats_rec->redir_err[0]; + prev = &stats_prev->redir_err[0]; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + char str[64]; + + pps = calc_pps(r, p, t); + if (!pps) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + print_default(" %-18s " FMT_COLUMNf "\n", str, REDIR(pps)); + } + + if (out) { + pps = calc_pps(&rec->total, &prev->total, t); + out->redir_cnt.suc = pps; + out->totals.redir += pps; + } +} + +static void stats_get_redirect_err_cnt(struct stats_record *stats_rec, + struct stats_record *stats_prev, + int nr_cpus, + struct sample_output *out) +{ + struct record *rec, *prev; + double t, drop, sum = 0; + int rec_i, i; + + for (rec_i = 1; rec_i < XDP_REDIRECT_ERR_MAX; rec_i++) { + char str[64]; + + rec = &stats_rec->redir_err[rec_i]; + prev = &stats_prev->redir_err[rec_i]; + t = calc_period(rec, prev); + + drop = calc_drop_pps(&rec->total, &prev->total, t); + if (drop > 0 && !out) { + snprintf(str, sizeof(str), + sample_log_level & LL_DEFAULT ? "%s total" : + "%s", + xdp_redirect_err_names[rec_i]); + print_err(drop, " %-18s " FMT_COLUMNf "\n", str, + ERR(drop)); + } + + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + double drop; + + drop = calc_drop_pps(r, p, t); + if (!drop) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + print_default(" %-16s" FMT_COLUMNf "\n", str, + ERR(drop)); + } + + sum += drop; + } + + if (out) { + out->redir_cnt.err = sum; + out->totals.err += sum; + } +} + +static void stats_get_exception_cnt(struct stats_record *stats_rec, + struct stats_record *stats_prev, + int nr_cpus, + struct sample_output *out) +{ + double t, drop, sum = 0; + struct record *rec, *prev; + int rec_i, i; + + for (rec_i = 0; rec_i < XDP_ACTION_MAX; rec_i++) { + rec = &stats_rec->exception[rec_i]; + prev = &stats_prev->exception[rec_i]; + t = calc_period(rec, prev); + + drop = calc_drop_pps(&rec->total, &prev->total, t); + /* Fold out errors after heading */ + sum += drop; + + if (drop > 0 && !out) { + print_always(" %-18s " FMT_COLUMNf "\n", + xdp_action2str(rec_i), ERR(drop)); + + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + char str[64]; + double drop; + + drop = calc_drop_pps(r, p, t); + if (!drop) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + print_default(" %-16s" FMT_COLUMNf "\n", + str, ERR(drop)); + } + } + } + + if (out) { + out->except_cnt.hits = sum; + out->totals.err += sum; + } +} + +static void stats_get_devmap_xmit(struct stats_record *stats_rec, + struct stats_record *stats_prev, + int nr_cpus, + struct sample_output *out) +{ + double pps, drop, info, err; + struct record *rec, *prev; + double t; + int i; + + rec = &stats_rec->devmap_xmit; + prev = &stats_prev->devmap_xmit; + t = calc_period(rec, prev); + for (i = 0; i < nr_cpus; i++) { + struct datarec *r = &rec->cpu[i]; + struct datarec *p = &prev->cpu[i]; + char str[64]; + + pps = calc_pps(r, p, t); + drop = calc_drop_pps(r, p, t); + err = calc_errs_pps(r, p, t); + + if (!pps && !drop && !err) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + info = calc_info_pps(r, p, t); + if (info > 0) + info = (pps + drop) / info; /* calc avg bulk */ + print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf + __COLUMN(".2f") "\n", + str, XMIT(pps), DROP(drop), err, "drv_err/s", + info, "bulk-avg"); + } + if (out) { + pps = calc_pps(&rec->total, &prev->total, t); + drop = calc_drop_pps(&rec->total, &prev->total, t); + info = calc_info_pps(&rec->total, &prev->total, t); + if (info > 0) + info = (pps + drop) / info; /* calc avg bulk */ + err = calc_errs_pps(&rec->total, &prev->total, t); + + out->xmit_cnt.pps = pps; + out->xmit_cnt.drop = drop; + out->xmit_cnt.bavg = info; + out->xmit_cnt.err = err; + out->totals.xmit += pps; + out->totals.drop_xmit += drop; + out->totals.err += err; + } +} + +static void stats_get_devmap_xmit_multi(struct stats_record *stats_rec, + struct stats_record *stats_prev, + int nr_cpus, + struct sample_output *out) +{ + double pps, drop, info, err; + struct map_entry *entry; + struct record *r, *p; + unsigned int bkt; + double t; + + hash_for_each(stats_rec->xmit_map, bkt, entry, node) { + struct map_entry *e, *x = NULL; + char ifname_from[IFNAMSIZ]; + char ifname_to[IFNAMSIZ]; + const char *fstr, *tstr; + unsigned long prev_time; + struct record beg = {}; + __u32 from_idx, to_idx; + char str[128]; + __u64 pair; + int i; + + prev_time = sample_interval * NANOSEC_PER_SEC; + + pair = entry->pair; + from_idx = pair >> 32; + to_idx = pair & 0xFFFFFFFF; + + r = &entry->val; + beg.timestamp = r->timestamp - prev_time; + + /* Find matching entry from stats_prev map */ + hash_for_each_possible(stats_prev->xmit_map, e, node, pair) { + if (e->pair == pair) { + x = e; + break; + } + } + if (x) + p = &x->val; + else + p = &beg; + t = calc_period(r, p); + pps = calc_pps(&r->total, &p->total, t); + drop = calc_drop_pps(&r->total, &p->total, t); + info = calc_info_pps(&r->total, &p->total, t); + if (info > 0) + info = (pps + drop) / info; /* calc avg bulk */ + err = calc_errs_pps(&r->total, &p->total, t); + + if (out) { + /* We are responsible for filling out totals */ + out->totals.xmit += pps; + out->totals.drop_xmit += drop; + out->totals.err += err; + continue; + } + + fstr = tstr = NULL; + if (if_indextoname(from_idx, ifname_from)) + fstr = ifname_from; + if (if_indextoname(to_idx, ifname_to)) + tstr = ifname_to; + + snprintf(str, sizeof(str), "xmit %s->%s", fstr ?: "?", + tstr ?: "?"); + /* Skip idle streams of redirection */ + if (pps || drop || err) { + print_err(drop * !(sample_mask & SAMPLE_DROP_OK), + " %-20s " FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf + __COLUMN(".2f") "\n", str, XMIT(pps), DROP(drop), + err, "drv_err/s", info, "bulk-avg"); + } + + for (i = 0; i < nr_cpus; i++) { + struct datarec *rc = &r->cpu[i]; + struct datarec *pc, p_beg = {}; + char str[64]; + + pc = p == &beg ? &p_beg : &p->cpu[i]; + + pps = calc_pps(rc, pc, t); + drop = calc_drop_pps(rc, pc, t); + err = calc_errs_pps(rc, pc, t); + + if (!pps && !drop && !err) + continue; + + snprintf(str, sizeof(str), "cpu:%d", i); + info = calc_info_pps(rc, pc, t); + if (info > 0) + info = (pps + drop) / info; /* calc avg bulk */ + + print_default(" %-18s" FMT_COLUMNf FMT_COLUMNf FMT_COLUMNf + __COLUMN(".2f") "\n", str, XMIT(pps), + DROP(drop), err, "drv_err/s", info, "bulk-avg"); + } + } +} + +static void stats_print(const char *prefix, int mask, struct stats_record *r, + struct stats_record *p, struct sample_output *out) +{ + int nr_cpus = libbpf_num_possible_cpus(); + const char *str; + + print_always("%-23s", prefix ?: "Summary"); + if (mask & SAMPLE_RX_CNT) + print_always(FMT_COLUMNl, RX(out->totals.rx)); + if (mask & SAMPLE_REDIRECT_CNT) + print_always(FMT_COLUMNl, REDIR(out->totals.redir)); + printf(FMT_COLUMNl, + out->totals.err + ((out->totals.drop_xmit + out->totals.drop) * !(mask & SAMPLE_DROP_OK)), + (mask & SAMPLE_DROP_OK) ? "err/s" : "err,drop/s"); + if (mask & SAMPLE_DEVMAP_XMIT_CNT || + mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) + printf(FMT_COLUMNl, XMIT(out->totals.xmit)); + printf("\n"); + + if (mask & SAMPLE_RX_CNT) { + str = (sample_log_level & LL_DEFAULT) && out->rx_cnt.pps ? + "receive total" : + "receive"; + print_err((out->rx_cnt.err || (out->rx_cnt.drop && !(mask & SAMPLE_DROP_OK))), + " %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl "\n", + str, PPS(out->rx_cnt.pps), DROP(out->rx_cnt.drop), + ERR(out->rx_cnt.err)); + + stats_get_rx_cnt(r, p, nr_cpus, NULL); + } + + if (mask & SAMPLE_RXQ_STATS) + stats_get_rxq_cnt(r, p); + + if (mask & SAMPLE_CPUMAP_ENQUEUE_CNT) + stats_get_cpumap_enqueue(r, p, nr_cpus); + + if (mask & SAMPLE_CPUMAP_KTHREAD_CNT) { + stats_get_cpumap_kthread(r, p, nr_cpus); + stats_get_cpumap_remote(r, p, nr_cpus); + } + + if (mask & SAMPLE_REDIRECT_CNT) { + str = out->redir_cnt.suc ? "redirect total" : "redirect"; + print_default(" %-20s " FMT_COLUMNl "\n", str, + REDIR(out->redir_cnt.suc)); + + stats_get_redirect_cnt(r, p, nr_cpus, NULL); + } + + if (mask & SAMPLE_REDIRECT_ERR_CNT) { + str = (sample_log_level & LL_DEFAULT) && out->redir_cnt.err ? + "redirect_err total" : + "redirect_err"; + print_err(out->redir_cnt.err, " %-20s " FMT_COLUMNl "\n", str, + ERR(out->redir_cnt.err)); + + stats_get_redirect_err_cnt(r, p, nr_cpus, NULL); + } + + if (mask & SAMPLE_EXCEPTION_CNT) { + str = out->except_cnt.hits ? "xdp_exception total" : + "xdp_exception"; + + print_err(out->except_cnt.hits, " %-20s " FMT_COLUMNl "\n", str, + HITS(out->except_cnt.hits)); + + stats_get_exception_cnt(r, p, nr_cpus, NULL); + } + + if (mask & SAMPLE_DEVMAP_XMIT_CNT) { + str = (sample_log_level & LL_DEFAULT) && out->xmit_cnt.pps ? + "devmap_xmit total" : + "devmap_xmit"; + + print_err(out->xmit_cnt.err || out->xmit_cnt.drop, + " %-20s " FMT_COLUMNl FMT_COLUMNl FMT_COLUMNl + __COLUMN(".2f") "\n", + str, XMIT(out->xmit_cnt.pps), + DROP(out->xmit_cnt.drop), (uint64_t)out->xmit_cnt.err, + "drv_err/s", out->xmit_cnt.bavg, "bulk-avg"); + + stats_get_devmap_xmit(r, p, nr_cpus, NULL); + } + + if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) + stats_get_devmap_xmit_multi(r, p, nr_cpus, NULL); + + if (sample_log_level & LL_DEFAULT || + ((sample_log_level & LL_SIMPLE) && sample_err_exp)) { + sample_err_exp = false; + printf("\n"); + } +} + +static int get_num_rxqs(const char *ifname) +{ + struct ethtool_channels ch = { + .cmd = ETHTOOL_GCHANNELS, + }; + + struct ifreq ifr = { + .ifr_data = (void *)&ch, + }; + int fd, ret; + + if (!ifname || strlen(ifname) > sizeof(ifr.ifr_name) - 1) + return 0; + + strcpy(ifr.ifr_name, ifname); + fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) { + ret = -errno; + pr_warn("Couldn't open socket socket: %s\n", strerror(-ret)); + return ret; + } + + ret = ioctl(fd, SIOCETHTOOL, &ifr); + if (ret < 0) { + ret = -errno; + pr_debug("Error in ethtool ioctl: %s\n", strerror(-ret)); + goto out; + } + + ret = ch.rx_count + ch.combined_count; +out: + close(fd); + pr_debug("Got %d queues for ifname %s\n", ret, ifname); + return ret; +} + + +int sample_setup_maps(struct bpf_map **maps, const char *ifname) +{ + sample_n_cpus = libbpf_num_possible_cpus(); + + for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) { + sample_map[i] = maps[i]; + int n_cpus; + + switch (i) { + case MAP_RX: + case MAP_CPUMAP_KTHREAD: + case MAP_DEVMAP_XMIT: + sample_map_count[i] = sample_n_cpus; + break; + case MAP_RXQ: + sample_n_rxqs = get_num_rxqs(ifname); + sample_map_count[i] = sample_n_rxqs > 0 ? sample_n_rxqs : 1; + break; + case MAP_REDIRECT_ERR: + sample_map_count[i] = + XDP_REDIRECT_ERR_MAX * sample_n_cpus; + break; + case MAP_EXCEPTION: + sample_map_count[i] = XDP_ACTION_MAX * sample_n_cpus; + break; + case MAP_CPUMAP_ENQUEUE: + if (__builtin_mul_overflow(sample_n_cpus, sample_n_cpus, &n_cpus)) + return -EOVERFLOW; + sample_map_count[i] = n_cpus; + break; + default: + return -EINVAL; + } + if (bpf_map__set_max_entries(sample_map[i], sample_map_count[i]) < 0) + return -errno; + } + sample_map[MAP_DEVMAP_XMIT_MULTI] = maps[MAP_DEVMAP_XMIT_MULTI]; + return 0; +} + +static int sample_setup_maps_mappings(void) +{ + for (int i = 0; i < MAP_DEVMAP_XMIT_MULTI; i++) { + size_t size = sample_map_count[i] * sizeof(struct datarec); + + sample_mmap[i] = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_SHARED, bpf_map__fd(sample_map[i]), 0); + if (sample_mmap[i] == MAP_FAILED) + return -errno; + } + return 0; +} + +int __sample_init(int mask, int ifindex_from, int ifindex_to) +{ + sigset_t st; + + if (mask & SAMPLE_RXQ_STATS && sample_n_rxqs <= 0) { + pr_warn("Couldn't retrieve the number of RXQs, so can't enable RXQ stats\n"); + return -EINVAL; + } + + sigemptyset(&st); + sigaddset(&st, SIGQUIT); + sigaddset(&st, SIGINT); + sigaddset(&st, SIGTERM); + + if (sigprocmask(SIG_BLOCK, &st, NULL) < 0) + return -errno; + + sample_sig_fd = signalfd(-1, &st, SFD_CLOEXEC | SFD_NONBLOCK); + if (sample_sig_fd < 0) + return -errno; + + sample_mask = mask; + ifindex[0] = ifindex_from; + ifindex[1] = ifindex_to; + + return sample_setup_maps_mappings(); +} + +static void sample_summary_print(void) +{ + double num = sample_out.rx_cnt.num; + + if (sample_out.totals.rx) { + double pkts = sample_out.totals.rx; + + print_always(" Packets received : %'-10" PRIu64 "\n", + (uint64_t)sample_out.totals.rx); + print_always(" Average packets/s : %'-10.0f\n", + sample_round(pkts / num)); + } + if (sample_out.totals.redir) { + double pkts = sample_out.totals.redir; + + print_always(" Packets redirected : %'-10" PRIu64 "\n", + sample_out.totals.redir); + print_always(" Average redir/s : %'-10.0f\n", + sample_round(pkts / num)); + } + if (sample_out.totals.drop) + print_always(" Rx dropped : %'-10" PRIu64 "\n", + sample_out.totals.drop); + if (sample_out.totals.drop_xmit) + print_always(" Tx dropped : %'-10" PRIu64 "\n", + sample_out.totals.drop_xmit); + if (sample_out.totals.err) + print_always(" Errors recorded : %'-10" PRIu64 "\n", + sample_out.totals.err); + if (sample_out.totals.xmit) { + double pkts = sample_out.totals.xmit; + + print_always(" Packets transmitted : %'-10" PRIu64 "\n", + sample_out.totals.xmit); + print_always(" Average transmit/s : %'-10.0f\n", + sample_round(pkts / num)); + } +} + +void sample_teardown(void) +{ + size_t size; + + for (int i = 0; i < NUM_MAP; i++) { + size = sample_map_count[i] * sizeof(**sample_mmap); + munmap(sample_mmap[i], size); + } + sample_summary_print(); + close(sample_sig_fd); +} + +static int sample_stats_collect(struct stats_record *rec) +{ + int i; + + if (sample_mask & SAMPLE_RX_CNT) + map_collect_percpu(sample_mmap[MAP_RX], &rec->rx_cnt); + + if (sample_mask & SAMPLE_RXQ_STATS) + map_collect_rxqs(sample_mmap[MAP_RXQ], &rec->rxq_cnt); + + if (sample_mask & SAMPLE_REDIRECT_CNT) + map_collect_percpu(sample_mmap[MAP_REDIRECT_ERR], &rec->redir_err[0]); + + if (sample_mask & SAMPLE_REDIRECT_ERR_CNT) { + for (i = 1; i < XDP_REDIRECT_ERR_MAX; i++) + map_collect_percpu(&sample_mmap[MAP_REDIRECT_ERR][i * sample_n_cpus], + &rec->redir_err[i]); + } + + if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT) + for (i = 0; i < sample_n_cpus; i++) + map_collect_percpu(&sample_mmap[MAP_CPUMAP_ENQUEUE][i * sample_n_cpus], + &rec->enq[i]); + + if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT) + map_collect_percpu(sample_mmap[MAP_CPUMAP_KTHREAD], + &rec->kthread); + + if (sample_mask & SAMPLE_EXCEPTION_CNT) + for (i = 0; i < XDP_ACTION_MAX; i++) + map_collect_percpu(&sample_mmap[MAP_EXCEPTION][i * sample_n_cpus], + &rec->exception[i]); + + if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT) + map_collect_percpu(sample_mmap[MAP_DEVMAP_XMIT], &rec->devmap_xmit); + + if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) { + if (map_collect_percpu_devmap(bpf_map__fd(sample_map[MAP_DEVMAP_XMIT_MULTI]), rec) < 0) + return -EINVAL; + } + return 0; +} + +static void sample_summary_update(struct sample_output *out) +{ + sample_out.totals.rx += out->totals.rx; + sample_out.totals.redir += out->totals.redir; + sample_out.totals.drop += out->totals.drop; + sample_out.totals.drop_xmit += out->totals.drop_xmit; + sample_out.totals.err += out->totals.err; + sample_out.totals.xmit += out->totals.xmit; + sample_out.rx_cnt.num++; +} + +static void sample_stats_print(int mask, struct stats_record *cur, + struct stats_record *prev, char *prog_name) +{ + struct sample_output out = {}; + + if (mask & SAMPLE_RX_CNT) + stats_get_rx_cnt(cur, prev, 0, &out); + if (mask & SAMPLE_REDIRECT_CNT) + stats_get_redirect_cnt(cur, prev, 0, &out); + if (mask & SAMPLE_REDIRECT_ERR_CNT) + stats_get_redirect_err_cnt(cur, prev, 0, &out); + if (mask & SAMPLE_EXCEPTION_CNT) + stats_get_exception_cnt(cur, prev, 0, &out); + if (mask & SAMPLE_DEVMAP_XMIT_CNT) + stats_get_devmap_xmit(cur, prev, 0, &out); + else if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) + stats_get_devmap_xmit_multi(cur, prev, 0, &out); + sample_summary_update(&out); + + stats_print(prog_name, mask, cur, prev, &out); +} + +void sample_switch_mode(void) +{ + sample_log_level ^= LL_DEBUG - 1; +} + +static int sample_signal_cb(void) +{ + struct signalfd_siginfo si; + int r; + + r = read(sample_sig_fd, &si, sizeof(si)); + if (r < 0) + return -errno; + + switch (si.ssi_signo) { + case SIGQUIT: + sample_switch_mode(); + printf("\n"); + break; + default: + printf("\n"); + return 1; + } + + return 0; +} + +/* Pointer swap trick */ +static void swap(struct stats_record **a, struct stats_record **b) +{ + struct stats_record *tmp; + + tmp = *a; + *a = *b; + *b = tmp; +} + +static int sample_timer_cb(int timerfd, struct stats_record **rec, + struct stats_record **prev) +{ + char line[64] = "Summary"; + int ret; + __u64 t; + + ret = read(timerfd, &t, sizeof(t)); + if (ret < 0) + return -errno; + + swap(prev, rec); + ret = sample_stats_collect(*rec); + if (ret < 0) + return ret; + + if (ifindex[0] && !(sample_mask & SAMPLE_SKIP_HEADING)) { + char fi[IFNAMSIZ]; + char to[IFNAMSIZ]; + const char *f, *t; + + f = t = NULL; + if (if_indextoname(ifindex[0], fi)) + f = fi; + if (if_indextoname(ifindex[1], to)) + t = to; + + snprintf(line, sizeof(line), "%s->%s", f ?: "?", t ?: "?"); + } + + sample_stats_print(sample_mask, *rec, *prev, line); + return 0; +} + +int sample_run(int interval, void (*post_cb)(void *), void *ctx) +{ + struct timespec ts = { interval, 0 }; + struct itimerspec its = { ts, ts }; + struct stats_record *rec, *prev; + struct pollfd pfd[2] = {}; + bool imm_exit = false; + const char *envval; + int timerfd, ret; + + envval = secure_getenv("XDP_SAMPLE_IMMEDIATE_EXIT"); + if (envval && envval[0] == '1' && envval[1] == '\0') { + pr_debug("XDP_SAMPLE_IMMEDIATE_EXIT envvar set, exiting immediately after setup\n"); + imm_exit = true; + } + + if (!interval) { + pr_warn("Incorrect interval 0\n"); + return -EINVAL; + } + sample_interval = interval; + /* Pretty print numbers */ + setlocale(LC_NUMERIC, "en_US.UTF-8"); + + timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK); + if (timerfd < 0) + return -errno; + timerfd_settime(timerfd, 0, &its, NULL); + + pfd[0].fd = sample_sig_fd; + pfd[0].events = POLLIN; + + pfd[1].fd = timerfd; + pfd[1].events = POLLIN; + + ret = -ENOMEM; + rec = alloc_stats_record(); + if (!rec) + goto end; + prev = alloc_stats_record(); + if (!prev) + goto end_rec; + + ret = sample_stats_collect(rec); + if (ret < 0) + goto end_rec_prev; + + if (imm_exit) + goto end_rec_prev; + + for (;;) { + ret = poll(pfd, 2, -1); + if (ret < 0) { + if (errno == EINTR) + continue; + else + break; + } + + if (pfd[0].revents & POLLIN) + ret = sample_signal_cb(); + else if (pfd[1].revents & POLLIN) + ret = sample_timer_cb(timerfd, &rec, &prev); + + if (ret) + break; + + if (post_cb) + post_cb(ctx); + } + +end_rec_prev: + free_stats_record(prev); +end_rec: + free_stats_record(rec); +end: + close(timerfd); + + return ret; +} + +const char *get_driver_name(int ifindex) +{ + struct ethtool_drvinfo drv = {}; + char ifname[IF_NAMESIZE]; + static char drvname[32]; + struct ifreq ifr = {}; + int fd, r = 0; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return "[error]"; + + if (!if_indextoname(ifindex, ifname)) + goto end; + + drv.cmd = ETHTOOL_GDRVINFO; + safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + ifr.ifr_data = (void *)&drv; + + r = ioctl(fd, SIOCETHTOOL, &ifr); + if (r) + goto end; + + safe_strncpy(drvname, drv.driver, sizeof(drvname)); + + close(fd); + return drvname; + +end: + r = errno; + close(fd); + return r == EOPNOTSUPP ? "loopback" : "[error]"; +} + +int get_mac_addr(int ifindex, void *mac_addr) +{ + char ifname[IF_NAMESIZE]; + struct ifreq ifr = {}; + int fd, r; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return -errno; + + if (!if_indextoname(ifindex, ifname)) { + r = -errno; + goto end; + } + + safe_strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + + r = ioctl(fd, SIOCGIFHWADDR, &ifr); + if (r) { + r = -errno; + goto end; + } + + memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char)); + +end: + close(fd); + return r; +} diff --git a/lib/util/xdp_sample.h b/lib/util/xdp_sample.h new file mode 100644 index 0000000..701f83f --- /dev/null +++ b/lib/util/xdp_sample.h @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0-only +#ifndef XDP_SAMPLE_USER_H +#define XDP_SAMPLE_USER_H + +#include <bpf/libbpf.h> +#include <getopt.h> + +#include <xdp/xdp_sample_shared.h> + +enum stats_mask { + _SAMPLE_REDIRECT_MAP = 1U << 0, + SAMPLE_RX_CNT = 1U << 1, + SAMPLE_REDIRECT_ERR_CNT = 1U << 2, + SAMPLE_CPUMAP_ENQUEUE_CNT = 1U << 3, + SAMPLE_CPUMAP_KTHREAD_CNT = 1U << 4, + SAMPLE_EXCEPTION_CNT = 1U << 5, + SAMPLE_DEVMAP_XMIT_CNT = 1U << 6, + SAMPLE_REDIRECT_CNT = 1U << 7, + SAMPLE_REDIRECT_MAP_CNT = SAMPLE_REDIRECT_CNT | _SAMPLE_REDIRECT_MAP, + SAMPLE_REDIRECT_ERR_MAP_CNT = SAMPLE_REDIRECT_ERR_CNT | _SAMPLE_REDIRECT_MAP, + SAMPLE_DEVMAP_XMIT_CNT_MULTI = 1U << 8, + SAMPLE_SKIP_HEADING = 1U << 9, + SAMPLE_RXQ_STATS = 1U << 10, + SAMPLE_DROP_OK = 1U << 11, +}; + +enum sample_compat { + SAMPLE_COMPAT_CPUMAP_KTHREAD, + __SAMPLE_COMPAT_MAX +}; +#define SAMPLE_COMPAT_MAX __SAMPLE_COMPAT_MAX + +/* Exit return codes */ +#define EXIT_OK 0 +#define EXIT_FAIL 1 +#define EXIT_FAIL_OPTION 2 +#define EXIT_FAIL_XDP 3 +#define EXIT_FAIL_BPF 4 +#define EXIT_FAIL_MEM 5 + +int sample_setup_maps(struct bpf_map **maps, const char *ifname); +int __sample_init(int mask, int ifindex_from, int ifindex_to); +void sample_teardown(void); +int sample_run(int interval, void (*post_cb)(void *), void *ctx); +bool sample_is_compat(enum sample_compat compat_value); +bool sample_probe_cpumap_compat(void); +void sample_check_cpumap_compat(struct bpf_program *prog, + struct bpf_program *prog_compat); + +void sample_switch_mode(void); + +const char *get_driver_name(int ifindex); +int get_mac_addr(int ifindex, void *mac_addr); + +#pragma GCC diagnostic push +#ifndef __clang__ +#pragma GCC diagnostic ignored "-Wstringop-truncation" +#endif +__attribute__((unused)) +static inline char *safe_strncpy(char *dst, const char *src, size_t size) +{ + if (!size) + return dst; + strncpy(dst, src, size - 1); + dst[size - 1] = '\0'; + return dst; +} +#pragma GCC diagnostic pop + +#define __attach_tp(name) \ + ({ \ + if (bpf_program__type(skel->progs.name) != BPF_PROG_TYPE_TRACING)\ + return -EINVAL; \ + skel->links.name = bpf_program__attach(skel->progs.name); \ + if (!skel->links.name) \ + return -errno; \ + }) + +#define __attach_tp_compat(name, name_compat, _compat) \ + ({ \ + if (sample_is_compat(SAMPLE_COMPAT_ ## _compat)) \ + __attach_tp(name); \ + else \ + __attach_tp(name_compat); \ + }) + +#define sample_init_pre_load(skel, ifname) \ + ({ \ + skel->rodata->nr_cpus = libbpf_num_possible_cpus(); \ + sample_check_cpumap_compat(skel->progs.tp_xdp_cpumap_kthread, \ + skel->progs.tp_xdp_cpumap_compat); \ + sample_setup_maps((struct bpf_map *[]){ \ + skel->maps.rx_cnt, skel->maps.rxq_cnt, \ + skel->maps.redir_err_cnt, \ + skel->maps.cpumap_enqueue_cnt, \ + skel->maps.cpumap_kthread_cnt, \ + skel->maps.exception_cnt, skel->maps.devmap_xmit_cnt, \ + skel->maps.devmap_xmit_cnt_multi}, ifname); \ + }) + +#define DEFINE_SAMPLE_INIT(name) \ + static int sample_init(struct name *skel, int sample_mask, \ + int ifindex_from, int ifindex_to) \ + { \ + int ret; \ + ret = __sample_init(sample_mask, ifindex_from, \ + ifindex_to); \ + if (ret < 0) \ + return ret; \ + if (sample_mask & SAMPLE_REDIRECT_MAP_CNT) \ + __attach_tp(tp_xdp_redirect_map); \ + if (sample_mask & SAMPLE_REDIRECT_CNT) \ + __attach_tp(tp_xdp_redirect); \ + if (sample_mask & SAMPLE_REDIRECT_ERR_MAP_CNT) \ + __attach_tp(tp_xdp_redirect_map_err); \ + if (sample_mask & SAMPLE_REDIRECT_ERR_CNT) \ + __attach_tp(tp_xdp_redirect_err); \ + if (sample_mask & SAMPLE_CPUMAP_ENQUEUE_CNT) \ + __attach_tp(tp_xdp_cpumap_enqueue); \ + if (sample_mask & SAMPLE_CPUMAP_KTHREAD_CNT) \ + __attach_tp_compat(tp_xdp_cpumap_kthread, \ + tp_xdp_cpumap_compat, \ + CPUMAP_KTHREAD); \ + if (sample_mask & SAMPLE_EXCEPTION_CNT) \ + __attach_tp(tp_xdp_exception); \ + if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT) \ + __attach_tp(tp_xdp_devmap_xmit); \ + if (sample_mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI) \ + __attach_tp(tp_xdp_devmap_xmit_multi); \ + return 0; \ + } + +#endif diff --git a/lib/util/xpcapng.c b/lib/util/xpcapng.c new file mode 100644 index 0000000..e453b88 --- /dev/null +++ b/lib/util/xpcapng.c @@ -0,0 +1,635 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Description: + * Simple PcapNG library developed from scratch as no library existed that + * met the requirements for xdpdump. It can also be used by other XDP + * applications that would like to capture packets for debugging purposes. + */ + +/***************************************************************************** + * Include files + *****************************************************************************/ +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <inttypes.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sys/uio.h> + +#include "xpcapng.h" + +/***************************************************************************** + * Simple roundup() macro + *****************************************************************************/ +#ifndef roundup +#define roundup(x, y) ( \ +{ \ + typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +#endif + +/***************************************************************************** + * pcapng_dumper structure + *****************************************************************************/ +struct xpcapng_dumper { + int pd_fd; + uint32_t pd_interfaces; +}; + +/***************************************************************************** + * general pcapng block and option definitions + *****************************************************************************/ +enum pcapng_block_types { + PCAPNG_SECTION_BLOCK = 0x0A0D0D0A, + PCAPNG_INTERFACE_BLOCK = 1, + PCAPNG_PACKET_BLOCK, + PCAPNG_SIMPLE_PACKET_BLOCK, + PCAPNG_NAME_RESOLUTION_BLOCK, + PCAPNG_INTERFACE_STATS_BLOCK, + PCAPNG_ENHANCED_PACKET_BLOCK +}; + +struct pcapng_option { + uint16_t po_type; + uint16_t po_length; + uint8_t po_data[]; +} __attribute__((__packed__)); + +enum pcapng_opt { + PCAPNG_OPT_END = 0, + PCAPNG_OPT_COMMENT = 1, + PCAPNG_OPT_CUSTOME_2988 = 2988, + PCAPNG_OPT_CUSTOME_2989 = 2989, + PCAPNG_OPT_CUSTOME_19372 = 19372, + PCAPNG_OPT_CUSTOME_19373 = 29373 +}; + +/***************************************************************************** + * pcapng section header block definitions + *****************************************************************************/ +struct pcapng_section_header_block { + uint32_t shb_block_type; + uint32_t shb_block_length; + uint32_t shb_byte_order_magic; + uint16_t shb_major_version; + uint16_t shb_minor_version; + uint64_t shb_section_length; + uint8_t shb_options[]; + /* The options are followed by another: + * uint32_t shb_block_length; + */ +} __attribute__((__packed__)); + +#define PCAPNG_BYTE_ORDER_MAGIC 0x1A2B3C4D +#define PCAPNG_MAJOR_VERSION 1 +#define PCAPNG_MINOR_VERSION 0 + +enum pcapng_opt_shb { + PCAPNG_OPT_SHB_HARDWARE = 2, + PCAPNG_OPT_SHB_OS, + PCAPNG_OPT_SHB_USERAPPL +}; + +/***************************************************************************** + * pcapng interface description block definitions + *****************************************************************************/ +struct pcapng_interface_description_block { + uint32_t idb_block_type; + uint32_t idb_block_length; + uint16_t idb_link_type; + uint16_t idb_reserved; + uint32_t idb_snap_len; + uint8_t idb_options[]; + /* The options are followed by another: + * uint32_t idb_block_length; + */ +} __attribute__((__packed__)); + +enum pcapng_opt_idb { + PCAPNG_OPT_IDB_IF_NAME = 2, + PCAPNG_OPT_IDB_IF_DESCRIPTION, + PCAPNG_OPT_IDB_IF_IPV4_ADDR, + PCAPNG_OPT_IDB_IF_IPV6_ADDR, + PCAPNG_OPT_IDB_IF_MAC_ADDR, + PCAPNG_OPT_IDB_IF_EUI_ADDR, + PCAPNG_OPT_IDB_IF_SPEED, + PCAPNG_OPT_IDB_IF_TSRESOL, + PCAPNG_OPT_IDB_IF_TZONE, + PCAPNG_OPT_IDB_IF_FILTER, + PCAPNG_OPT_IDB_IF_OS, + PCAPNG_OPT_IDB_IF_FCSLEN, + PCAPNG_OPT_IDB_IF_TOFFSET, + PCAPNG_OPT_IDB_IF_HARDWARE +}; + +/***************************************************************************** + * pcapng interface description block definitions + *****************************************************************************/ +struct pcapng_enhanced_packet_block { + uint32_t epb_block_type; + uint32_t epb_block_length; + uint32_t epb_interface_id; + uint32_t epb_timestamp_hi; + uint32_t epb_timestamp_low; + uint32_t epb_captured_length; + uint32_t epb_original_length; + uint8_t epb_packet_data[]; + /* The packet data is followed by: + * uint8_t epb_options[]; + * uint32_t epb_block_length; + */ +} __attribute__((__packed__)); + +enum pcapng_opt_epb { + PCAPNG_OPT_EPB_FLAGS = 2, + PCAPNG_OPT_EPB_HASH, + PCAPNG_OPT_EPB_DROPCOUNT, + PCAPNG_OPT_EPB_PACKETID, + PCAPNG_OPT_EPB_QUEUE, + PCAPNG_OPT_EPB_VERDICT +}; + +enum pcapng_epb_vedict_type { + PCAPNG_EPB_VEDRICT_TYPE_HARDWARE = 0, + PCAPNG_EPB_VEDRICT_TYPE_EBPF_TC, + PCAPNG_EPB_VEDRICT_TYPE_EBPF_XDP +}; + +/***************************************************************************** + * pcapng_get_option_length() + *****************************************************************************/ +static size_t pcapng_get_option_length(size_t len) +{ + return roundup(sizeof(struct pcapng_option) + len, sizeof(uint32_t)); +} + +/***************************************************************************** + * pcapng_add_option() + *****************************************************************************/ +static struct pcapng_option *pcapng_add_option(struct pcapng_option *opt, + uint16_t type, uint16_t length, + const void *data) +{ + if (opt == NULL) + return NULL; + + opt->po_type = type; + opt->po_length = length; + if (data) + memcpy(opt->po_data, data, length); + + return (struct pcapng_option *) + ((uint8_t *)opt + pcapng_get_option_length(length)); +} + +/***************************************************************************** + * pcapng_write_shb() + *****************************************************************************/ +static bool pcapng_write_shb(struct xpcapng_dumper *pd, const char *comment, + const char *hardware, const char *os, + const char *user_application) +{ + int rc; + size_t shb_length; + struct pcapng_section_header_block *shb; + struct pcapng_option *opt; + + if (pd == NULL) { + errno = EINVAL; + return false; + } + + /* First calculate the total length of the SHB. */ + shb_length = sizeof(*shb); + + if (comment) + shb_length += pcapng_get_option_length(strlen(comment)); + + if (hardware) + shb_length += pcapng_get_option_length(strlen(hardware)); + + if (os) + shb_length += pcapng_get_option_length(strlen(os)); + + if (user_application) + shb_length += pcapng_get_option_length( + strlen(user_application)); + + shb_length += pcapng_get_option_length(0); + shb_length += sizeof(uint32_t); + + /* Allocate the SHB and fill it. */ + shb = calloc(shb_length, 1); + if (shb == NULL) { + errno = ENOMEM; + return false; + } + + shb->shb_block_type = PCAPNG_SECTION_BLOCK; + shb->shb_block_length = shb_length; + shb->shb_byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC; + shb->shb_major_version = PCAPNG_MAJOR_VERSION; + shb->shb_minor_version = PCAPNG_MINOR_VERSION; + shb->shb_section_length = UINT64_MAX; + + /* Add the options and block_length value */ + opt = (struct pcapng_option *) &shb->shb_options; + + if (comment) + opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT, + strlen(comment), comment); + + if (hardware) + opt = pcapng_add_option(opt, PCAPNG_OPT_SHB_HARDWARE, + strlen(hardware), hardware); + + if (os) + opt = pcapng_add_option(opt, PCAPNG_OPT_SHB_OS, + strlen(os), os); + + if (user_application) + opt = pcapng_add_option(opt, PCAPNG_OPT_SHB_USERAPPL, + strlen(user_application), + user_application); + /* WARNING: If a new option is added, make sure the length calculation + * above is also updated! + */ + + opt = pcapng_add_option(opt, PCAPNG_OPT_END, 0, NULL); + memcpy(opt, &shb->shb_block_length, sizeof(shb->shb_block_length)); + + /* Write the SHB, and free its memory. */ + rc = write(pd->pd_fd, shb, shb_length); + free(shb); + + if ((size_t)rc != shb_length) + return false; + + return true; +} + +/***************************************************************************** + * pcapng_write_idb() + *****************************************************************************/ +static bool pcapng_write_idb(struct xpcapng_dumper *pd, const char *name, + uint16_t snap_len, const char *description, + const uint8_t *mac, uint64_t speed, + uint8_t ts_resolution, const char *hardware) +{ + int rc; + size_t idb_length; + struct pcapng_interface_description_block *idb; + struct pcapng_option *opt; + + if (pd == NULL) { + errno = EINVAL; + return false; + } + + /* First calculate the total length of the IDB. */ + idb_length = sizeof(*idb); + + if (name) + idb_length += pcapng_get_option_length(strlen(name)); + + if (description) + idb_length += pcapng_get_option_length(strlen(description)); + + if (mac) + idb_length += pcapng_get_option_length(6); + + if (speed) + idb_length += pcapng_get_option_length(sizeof(uint64_t)); + + if (ts_resolution != 6 && ts_resolution != 0) + idb_length += pcapng_get_option_length(1); + + if (hardware) + idb_length += pcapng_get_option_length(strlen(hardware)); + + idb_length += pcapng_get_option_length(0); + idb_length += sizeof(uint32_t); + + /* Allocate the IDB and fill it. */ + idb = calloc(idb_length, 1); + if (idb == NULL) { + errno = ENOMEM; + return false; + } + + idb->idb_block_type = PCAPNG_INTERFACE_BLOCK; + idb->idb_block_length = idb_length; + idb->idb_link_type = 1; /* Ethernet */ + idb->idb_snap_len = snap_len; + + /* Add the options and block_length value */ + opt = (struct pcapng_option *) &idb->idb_options; + + if (name) + opt = pcapng_add_option(opt, PCAPNG_OPT_IDB_IF_NAME, + strlen(name), name); + + if (description) + opt = pcapng_add_option(opt, PCAPNG_OPT_IDB_IF_DESCRIPTION, + strlen(description), description); + + if (mac) + opt = pcapng_add_option(opt, PCAPNG_OPT_IDB_IF_MAC_ADDR, 6, + mac); + + if (speed) + opt = pcapng_add_option(opt, PCAPNG_OPT_IDB_IF_SPEED, + sizeof(uint64_t), &speed); + + if (ts_resolution != 6 && ts_resolution != 0) + opt = pcapng_add_option(opt, PCAPNG_OPT_IDB_IF_TSRESOL, + sizeof(uint8_t), &ts_resolution); + + if (hardware) + opt = pcapng_add_option(opt, PCAPNG_OPT_IDB_IF_HARDWARE, + strlen(hardware), hardware); + /* WARNING: If a new option is added, make sure the length calculation + * above is also updated! + */ + + opt = pcapng_add_option(opt, PCAPNG_OPT_END, 0, NULL); + memcpy(opt, &idb->idb_block_length, sizeof(idb->idb_block_length)); + + /* Write the IDB, and free it's memory. */ + rc = write(pd->pd_fd, idb, idb_length); + free(idb); + + if ((size_t)rc != idb_length) + return false; + + return true; +} + +/***************************************************************************** + * pcapng_write_epb() + *****************************************************************************/ +static bool pcapng_write_epb(struct xpcapng_dumper *pd, uint32_t ifid, + const uint8_t *pkt, uint32_t len, + uint32_t caplen, uint64_t timestamp, + struct xpcapng_epb_options_s *epb_options) +{ + int i = 0; + int rc; + size_t pad_length; + size_t com_length = 0; + size_t epb_length; + struct pcapng_enhanced_packet_block epb; + struct pcapng_option *opt; + struct iovec iov[7]; + static uint8_t pad[4] = {0, 0, 0, 0}; + uint8_t options[8 + 12 + 12 + 8 + 16 + 4 + 4]; + /* PCAPNG_OPT_EPB_FLAGS[8] + + * PCAPNG_OPT_EPB_DROPCOUNT[12] + + * PCAPNG_OPT_EPB_PACKETID[12] + + * PCAPNG_OPT_EPB_QUEUE[8] + + * PCAPNG_OPT_EPB_VERDICT[16] + + * PCAPNG_OPT_END[4] + + * epb_block_length + */ + static struct xdp_verdict { + uint8_t type; + int64_t verdict; + }__attribute__((__packed__)) verdict = { + PCAPNG_EPB_VEDRICT_TYPE_EBPF_XDP, 0 }; + + if (pd == NULL) { + errno = EINVAL; + return false; + } + + /* First calculate the total length of the EPB. */ + pad_length = roundup(caplen, sizeof(uint32_t)) - caplen; + + epb_length = sizeof(epb); + epb_length += caplen + pad_length; + + if (epb_options->flags) + epb_length += pcapng_get_option_length(sizeof(uint32_t)); + + if (epb_options->dropcount) + epb_length += pcapng_get_option_length(sizeof(uint64_t)); + + if (epb_options->packetid) + epb_length += pcapng_get_option_length(sizeof(uint64_t)); + + if (epb_options->queue) + epb_length += pcapng_get_option_length(sizeof(uint32_t)); + + if (epb_options->xdp_verdict) + epb_length += pcapng_get_option_length(sizeof(verdict)); + + if (epb_options->comment) { + com_length = strlen(epb_options->comment); + epb_length += pcapng_get_option_length(com_length); + } + + epb_length += pcapng_get_option_length(0); + epb_length += sizeof(uint32_t); + + /* Fill in the EPB. */ + epb.epb_block_type = PCAPNG_ENHANCED_PACKET_BLOCK; + epb.epb_block_length = epb_length; + epb.epb_interface_id = ifid; + epb.epb_timestamp_hi = timestamp >> 32; + epb.epb_timestamp_low = (uint32_t) timestamp; + epb.epb_captured_length = caplen; + epb.epb_original_length = len; + + /* Add the flag/end option and block_length value */ + opt = (struct pcapng_option *) options; + + if (epb_options->flags) + opt = pcapng_add_option(opt, PCAPNG_OPT_EPB_FLAGS, + sizeof(uint32_t), &epb_options->flags); + + if (epb_options->dropcount) + opt = pcapng_add_option(opt, PCAPNG_OPT_EPB_DROPCOUNT, + sizeof(uint64_t), + &epb_options->dropcount); + + if (epb_options->packetid) + opt = pcapng_add_option(opt, PCAPNG_OPT_EPB_PACKETID, + sizeof(uint64_t), + epb_options->packetid); + + if (epb_options->queue) + opt = pcapng_add_option(opt, PCAPNG_OPT_EPB_QUEUE, + sizeof(uint32_t), epb_options->queue); + + if (epb_options->xdp_verdict) { + verdict.verdict = *epb_options->xdp_verdict; + opt = pcapng_add_option(opt, PCAPNG_OPT_EPB_VERDICT, + sizeof(verdict), &verdict); + } + /* WARNING: If a new option is added, make sure the length calculation + * and the options[] variable above are also updated! + */ + + opt = pcapng_add_option(opt, PCAPNG_OPT_END, 0, NULL); + memcpy(opt, &epb.epb_block_length, sizeof(epb.epb_block_length)); + + /* Write the EPB in parts, including the options, this looks not as + * straightforward as pcapng_write_idb() but here we would like to + * avoid as many memcopy's as possible. + */ + + /* Add base EPB structure. */ + iov[i].iov_base = &epb; + iov[i++].iov_len = sizeof(epb); + + /* Add Packet Data. */ + iov[i].iov_base = (void *)pkt; + iov[i++].iov_len = caplen; + + /* Add Packet Data padding if needed. */ + if (pad_length > 0) { + iov[i].iov_base = pad; + iov[i++].iov_len = pad_length; + } + + /* Add comment if supplied */ + if (epb_options->comment) { + uint16_t opt[2] = {PCAPNG_OPT_COMMENT, com_length}; + size_t opt_pad = roundup(com_length, + sizeof(uint32_t)) - com_length; + /* Add option header. */ + iov[i].iov_base = opt; + iov[i++].iov_len = sizeof(opt); + + /* Add actual comment string. */ + iov[i].iov_base = (void *)epb_options->comment; + iov[i++].iov_len = com_length; + + /* Add padding to uint32_t if needed. */ + if (opt_pad) { + iov[i].iov_base = pad; + iov[i++].iov_len = opt_pad; + } + } + + /* Write other options and final EPB size. */ + iov[i].iov_base = options; + iov[i++].iov_len = 8 + (epb_options->flags ? 8 : 0) + + (epb_options->dropcount ? 12 : 0) + + (epb_options->packetid ? 12 : 0) + + (epb_options->queue ? 8 : 0) + + (epb_options->xdp_verdict ? 16 : 0); + rc = writev(pd->pd_fd, iov, i); + if ((size_t)rc != epb_length) + return false; + + return true; +} + +/***************************************************************************** + * xpcapng_dump_open() + *****************************************************************************/ +struct xpcapng_dumper *xpcapng_dump_open(const char *file, + const char *comment, + const char *hardware, + const char *os, + const char *user_application) +{ + struct xpcapng_dumper *pd = NULL; + + if (file == NULL) { + errno = EINVAL; + goto error_exit; + } + + pd = calloc(sizeof(*pd), 1); + if (pd == NULL) { + errno = ENOMEM; + goto error_exit; + } + pd->pd_fd = -1; + + if (strcmp(file, "-") == 0) { + pd->pd_fd = STDOUT_FILENO; + } else { + pd->pd_fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0600); + if (pd->pd_fd < 0) + goto error_exit; + } + + if (!pcapng_write_shb(pd, comment, hardware, os, user_application)) + goto error_exit; + + return pd; + +error_exit: + if (pd) { + if (pd->pd_fd >= 0 && pd->pd_fd != STDOUT_FILENO) + close(pd->pd_fd); + + free(pd); + } + return NULL; +} + +/***************************************************************************** + * xpcapng_dump_close() + *****************************************************************************/ +void xpcapng_dump_close(struct xpcapng_dumper *pd) +{ + if (pd == NULL) + return; + + if (pd->pd_fd < 0 && pd->pd_fd != STDOUT_FILENO) + close(pd->pd_fd); + + free(pd); +} + +/***************************************************************************** + * xpcapng_dump_flush() + *****************************************************************************/ +int xpcapng_dump_flush(struct xpcapng_dumper *pd) +{ + if (pd != NULL) + return fsync(pd->pd_fd); + + errno = EINVAL; + return -1; +} + +/***************************************************************************** + * pcapng_dump_add_interface() + *****************************************************************************/ +int xpcapng_dump_add_interface(struct xpcapng_dumper *pd, uint16_t snap_len, + const char *name, const char *description, + const uint8_t *mac, uint64_t speed, + uint8_t ts_resolution, const char *hardware) +{ + if (!pcapng_write_idb(pd, name, snap_len, description, mac, speed, + ts_resolution, hardware)) + return -1; + + return pd->pd_interfaces++; +} + +/***************************************************************************** + * xpcapng_dump_enhanced_pkt() + *****************************************************************************/ +bool xpcapng_dump_enhanced_pkt(struct xpcapng_dumper *pd, uint32_t ifid, + const uint8_t *pkt, uint32_t len, + uint32_t caplen, uint64_t timestamp, + struct xpcapng_epb_options_s *options) +{ + struct xpcapng_epb_options_s default_options = {}; + + return pcapng_write_epb(pd, ifid, pkt, len, caplen, timestamp, + options ?: &default_options); +} diff --git a/lib/util/xpcapng.h b/lib/util/xpcapng.h new file mode 100644 index 0000000..1590a4c --- /dev/null +++ b/lib/util/xpcapng.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/***************************************************************************** + * Multiple include protection + *****************************************************************************/ +#ifndef __XPCAPNG_H_ +#define __XPCAPNG_H_ + +/***************************************************************************** + * Handle + *****************************************************************************/ +struct xpcapng_dumper; + +/***************************************************************************** + * Flag variables + *****************************************************************************/ +enum xpcapng_epb_flags { + PCAPNG_EPB_FLAG_INBOUND = 0x1, + PCAPNG_EPB_FLAG_OUTBOUND = 0x2 +}; + +/***************************************************************************** + * EPB options structure + *****************************************************************************/ +struct xpcapng_epb_options_s { + enum xpcapng_epb_flags flags; + uint64_t dropcount; + uint64_t *packetid; + uint32_t *queue; + int64_t *xdp_verdict; + const char *comment; +}; + +/***************************************************************************** + * APIs + *****************************************************************************/ +extern struct xpcapng_dumper *xpcapng_dump_open(const char *file, + const char *comment, + const char *hardware, + const char *os, + const char *user_application); +extern void xpcapng_dump_close(struct xpcapng_dumper *pd); +extern int xpcapng_dump_flush(struct xpcapng_dumper *pd); +extern int xpcapng_dump_add_interface(struct xpcapng_dumper *pd, + uint16_t snap_len, + const char *name, const char *description, + const uint8_t *mac, uint64_t speed, + uint8_t ts_resolution, + const char *hardware); +extern bool xpcapng_dump_enhanced_pkt(struct xpcapng_dumper *pd, uint32_t ifid, + const uint8_t *pkt, uint32_t len, + uint32_t caplen, uint64_t timestamp, + struct xpcapng_epb_options_s *options); + +/***************************************************************************** + * End-of include file + *****************************************************************************/ +#endif /* __XPCAPNG_H_ */ |