diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 14:18:53 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 14:18:53 +0000 |
commit | a0e0018c9a7ef5ce7f6d2c3ae16aecbbd16a8f67 (patch) | |
tree | 8feaf1a1932871b139b3b30be4c09c66489918be /lib | |
parent | Initial commit. (diff) | |
download | iproute2-a0e0018c9a7ef5ce7f6d2c3ae16aecbbd16a8f67.tar.xz iproute2-a0e0018c9a7ef5ce7f6d2c3ae16aecbbd16a8f67.zip |
Adding upstream version 6.1.0.upstream/6.1.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | lib/Makefile | 32 | ||||
-rw-r--r-- | lib/ax25_ntop.c | 82 | ||||
-rw-r--r-- | lib/bpf_glue.c | 91 | ||||
-rw-r--r-- | lib/bpf_legacy.c | 3360 | ||||
-rw-r--r-- | lib/bpf_libbpf.c | 383 | ||||
-rw-r--r-- | lib/cg_map.c | 134 | ||||
-rw-r--r-- | lib/color.c | 183 | ||||
-rw-r--r-- | lib/coverity_model.c | 17 | ||||
-rw-r--r-- | lib/exec.c | 46 | ||||
-rw-r--r-- | lib/fs.c | 369 | ||||
-rw-r--r-- | lib/inet_proto.c | 69 | ||||
-rw-r--r-- | lib/json_print.c | 361 | ||||
-rw-r--r-- | lib/json_print_math.c | 37 | ||||
-rw-r--r-- | lib/json_writer.c | 386 | ||||
-rw-r--r-- | lib/libgenl.c | 159 | ||||
-rw-r--r-- | lib/libnetlink.c | 1657 | ||||
-rw-r--r-- | lib/ll_addr.c | 95 | ||||
-rw-r--r-- | lib/ll_map.c | 410 | ||||
-rw-r--r-- | lib/ll_proto.c | 103 | ||||
-rw-r--r-- | lib/ll_types.c | 122 | ||||
-rw-r--r-- | lib/mnl_utils.c | 254 | ||||
-rw-r--r-- | lib/mpls_ntop.c | 52 | ||||
-rw-r--r-- | lib/mpls_pton.c | 63 | ||||
-rw-r--r-- | lib/names.c | 152 | ||||
-rw-r--r-- | lib/namespace.c | 145 | ||||
-rw-r--r-- | lib/netrom_ntop.c | 23 | ||||
-rw-r--r-- | lib/ppp_proto.c | 52 | ||||
-rw-r--r-- | lib/rose_ntop.c | 56 | ||||
-rw-r--r-- | lib/rt_names.c | 788 | ||||
-rw-r--r-- | lib/utils.c | 1961 | ||||
-rw-r--r-- | lib/utils_math.c | 123 |
31 files changed, 11765 insertions, 0 deletions
diff --git a/lib/Makefile b/lib/Makefile new file mode 100644 index 0000000..ddedd37 --- /dev/null +++ b/lib/Makefile @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../config.mk + +CFLAGS += -fPIC + +UTILOBJ = utils.o utils_math.o rt_names.o ll_map.o ll_types.o ll_proto.o ll_addr.o \ + inet_proto.o namespace.o json_writer.o json_print.o json_print_math.o \ + names.o color.o bpf_legacy.o bpf_glue.o exec.o fs.o cg_map.o ppp_proto.o + +ifeq ($(HAVE_ELF),y) +ifeq ($(HAVE_LIBBPF),y) +UTILOBJ += bpf_libbpf.o +endif +endif + +NLOBJ=libgenl.o libnetlink.o +ifeq ($(HAVE_MNL),y) +NLOBJ += mnl_utils.o +endif + +all: libnetlink.a libutil.a + +libnetlink.a: $(NLOBJ) + $(QUIET_AR)$(AR) rcs $@ $^ + +libutil.a: $(UTILOBJ) $(ADDLIB) + $(QUIET_AR)$(AR) rcs $@ $^ + +install: + +clean: + rm -f $(NLOBJ) $(UTILOBJ) $(ADDLIB) libnetlink.a libutil.a diff --git a/lib/ax25_ntop.c b/lib/ax25_ntop.c new file mode 100644 index 0000000..3a72a43 --- /dev/null +++ b/lib/ax25_ntop.c @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#include <errno.h> +#include <sys/socket.h> +#include <linux/ax25.h> + +#include "utils.h" + +const char *ax25_ntop1(const ax25_address *src, char *dst, socklen_t size); + +/* + * AX.25 addresses are based on Amateur radio callsigns followed by an SSID + * like XXXXXX-SS where the callsign consists of up to 6 ASCII characters + * which are either letters or digits and the SSID is a decimal number in the + * range 0..15. + * Amateur radio callsigns are assigned by a country's relevant authorities + * and are 3..6 characters though a few countries have assigned callsigns + * longer than that. AX.25 is not able to handle such longer callsigns. + * There are further restrictions on the format of valid callsigns by + * applicable national and international law. Linux doesn't need to care and + * will happily accept anything that consists of 6 ASCII characters in the + * range of A-Z and 0-9 for a callsign such as the default AX.25 MAC address + * LINUX-1 and the default broadcast address QST-0. + * The SSID is just a number and not encoded in ASCII digits. + * + * Being based on HDLC AX.25 encodes addresses by shifting them one bit left + * thus zeroing bit 0, the HDLC extension bit for all but the last bit of + * a packet's address field but for our purposes here we're not considering + * the HDLC extension bit that is it will always be zero. + * + * Linux' internal representation of AX.25 addresses in Linux is very similar + * to this on the on-air or on-the-wire format. The callsign is padded to + * 6 octets by adding spaces, followed by the SSID octet then all 7 octets + * are left-shifted by one bit. + * + * For example, for the address "LINUX-1" the callsign is LINUX and SSID is 1 + * the internal format is 98:92:9c:aa:b0:40:02. + */ + +const char *ax25_ntop1(const ax25_address *src, char *dst, socklen_t size) +{ + char c, *s; + int n; + + for (n = 0, s = dst; n < 6; n++) { + c = (src->ax25_call[n] >> 1) & 0x7f; + if (c != ' ') + *s++ = c; + } + + *s++ = '-'; + + n = ((src->ax25_call[6] >> 1) & 0x0f); + if (n > 9) { + *s++ = '1'; + n -= 10; + } + + *s++ = n + '0'; + *s++ = '\0'; + + if (*dst == '\0' || *dst == '-') { + dst[0] = '*'; + dst[1] = '\0'; + } + + return dst; +} + +const char *ax25_ntop(int af, const void *addr, char *buf, socklen_t buflen) +{ + switch (af) { + case AF_AX25: + errno = 0; + return ax25_ntop1((ax25_address *)addr, buf, buflen); + + default: + errno = EAFNOSUPPORT; + } + + return NULL; +} diff --git a/lib/bpf_glue.c b/lib/bpf_glue.c new file mode 100644 index 0000000..88a2475 --- /dev/null +++ b/lib/bpf_glue.c @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * bpf_glue.c: BPF code to call both legacy and libbpf code + * Authors: Hangbin Liu <haliu@redhat.com> + * + */ +#include <sys/syscall.h> +#include <limits.h> +#include <unistd.h> +#include <errno.h> + +#include "bpf_util.h" +#ifdef HAVE_LIBBPF +#include <bpf/bpf.h> +#endif + +int bpf(int cmd, union bpf_attr *attr, unsigned int size) +{ +#ifdef __NR_bpf + return syscall(__NR_bpf, cmd, attr, size); +#else + fprintf(stderr, "No bpf syscall, kernel headers too old?\n"); + errno = ENOSYS; + return -1; +#endif +} + +int bpf_program_attach(int prog_fd, int target_fd, enum bpf_attach_type type) +{ +#ifdef HAVE_LIBBPF + return bpf_prog_attach(prog_fd, target_fd, type, 0); +#else + return bpf_prog_attach_fd(prog_fd, target_fd, type); +#endif +} + +#ifdef HAVE_LIBBPF +static const char *_libbpf_compile_version = LIBBPF_VERSION; +static char _libbpf_version[10] = {}; + +const char *get_libbpf_version(void) +{ + /* Start by copying compile-time version into buffer so we have a + * fallback value in case we are dynamically linked, or can't find a + * version in /proc/self/maps below. + */ + strncpy(_libbpf_version, _libbpf_compile_version, + sizeof(_libbpf_version)-1); +#ifdef LIBBPF_DYNAMIC + char buf[PATH_MAX], *s; + bool found = false; + FILE *fp; + + /* When dynamically linking against libbpf, we can't be sure that the + * version we discovered at compile time is actually the one we are + * using at runtime. This can lead to hard-to-debug errors, so we try to + * discover the correct version at runtime. + * + * The simple solution to this would be if libbpf itself exported a + * version in its API. But since it doesn't, we work around this by + * parsing the mappings of the binary at runtime, looking for the full + * filename of libbpf.so and using that. + */ + fp = fopen("/proc/self/maps", "r"); + if (fp == NULL) + goto out; + + while (fgets(buf, sizeof(buf), fp) != NULL) { + if ((s = strstr(buf, "libbpf.so.")) != NULL) { + strncpy(_libbpf_version, s+10, sizeof(_libbpf_version)-1); + strtok(_libbpf_version, "\n"); + found = true; + break; + } + } + + fclose(fp); +out: + if (!found) + fprintf(stderr, "Couldn't find runtime libbpf version - falling back to compile-time value!\n"); +#endif /* LIBBPF_DYNAMIC */ + + _libbpf_version[sizeof(_libbpf_version)-1] = '\0'; + return _libbpf_version; +} +#else +const char *get_libbpf_version(void) +{ + return NULL; +} +#endif /* HAVE_LIBBPF */ diff --git a/lib/bpf_legacy.c b/lib/bpf_legacy.c new file mode 100644 index 0000000..4fabdcc --- /dev/null +++ b/lib/bpf_legacy.c @@ -0,0 +1,3360 @@ +/* + * bpf.c BPF common code + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Daniel Borkmann <daniel@iogearbox.net> + * Jiri Pirko <jiri@resnulli.us> + * Alexei Starovoitov <ast@kernel.org> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <stdbool.h> +#include <stdint.h> +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <limits.h> +#include <assert.h> + +#ifdef HAVE_ELF +#include <libelf.h> +#include <gelf.h> +#endif + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/un.h> +#include <sys/vfs.h> +#include <sys/mount.h> +#include <sys/sendfile.h> +#include <sys/resource.h> + +#include <arpa/inet.h> + +#include "utils.h" +#include "json_print.h" + +#include "bpf_util.h" +#include "bpf_elf.h" +#include "bpf_scm.h" + +struct bpf_prog_meta { + const char *type; + const char *subdir; + const char *section; + bool may_uds_export; +}; + +static const enum bpf_prog_type __bpf_types[] = { + BPF_PROG_TYPE_SCHED_CLS, + BPF_PROG_TYPE_SCHED_ACT, + BPF_PROG_TYPE_XDP, + BPF_PROG_TYPE_LWT_IN, + BPF_PROG_TYPE_LWT_OUT, + BPF_PROG_TYPE_LWT_XMIT, +}; + +static const struct bpf_prog_meta __bpf_prog_meta[] = { + [BPF_PROG_TYPE_SCHED_CLS] = { + .type = "cls", + .subdir = "tc", + .section = ELF_SECTION_CLASSIFIER, + .may_uds_export = true, + }, + [BPF_PROG_TYPE_SCHED_ACT] = { + .type = "act", + .subdir = "tc", + .section = ELF_SECTION_ACTION, + .may_uds_export = true, + }, + [BPF_PROG_TYPE_XDP] = { + .type = "xdp", + .subdir = "xdp", + .section = ELF_SECTION_PROG, + }, + [BPF_PROG_TYPE_LWT_IN] = { + .type = "lwt_in", + .subdir = "ip", + .section = ELF_SECTION_PROG, + }, + [BPF_PROG_TYPE_LWT_OUT] = { + .type = "lwt_out", + .subdir = "ip", + .section = ELF_SECTION_PROG, + }, + [BPF_PROG_TYPE_LWT_XMIT] = { + .type = "lwt_xmit", + .subdir = "ip", + .section = ELF_SECTION_PROG, + }, + [BPF_PROG_TYPE_LWT_SEG6LOCAL] = { + .type = "lwt_seg6local", + .subdir = "ip", + .section = ELF_SECTION_PROG, + }, +}; + +static const char *bpf_prog_to_subdir(enum bpf_prog_type type) +{ + assert(type < ARRAY_SIZE(__bpf_prog_meta) && + __bpf_prog_meta[type].subdir); + return __bpf_prog_meta[type].subdir; +} + +const char *bpf_prog_to_default_section(enum bpf_prog_type type) +{ + assert(type < ARRAY_SIZE(__bpf_prog_meta) && + __bpf_prog_meta[type].section); + return __bpf_prog_meta[type].section; +} + +#ifdef HAVE_ELF +static int bpf_obj_open(const char *path, enum bpf_prog_type type, + const char *sec, __u32 ifindex, bool verbose); +#else +static int bpf_obj_open(const char *path, enum bpf_prog_type type, + const char *sec, __u32 ifindex, bool verbose) +{ + fprintf(stderr, "No ELF library support compiled in.\n"); + errno = ENOSYS; + return -1; +} +#endif + +static inline __u64 bpf_ptr_to_u64(const void *ptr) +{ + return (__u64)(unsigned long)ptr; +} + +static int bpf_map_update(int fd, const void *key, const void *value, + uint64_t flags) +{ + union bpf_attr attr = {}; + + attr.map_fd = fd; + attr.key = bpf_ptr_to_u64(key); + attr.value = bpf_ptr_to_u64(value); + attr.flags = flags; + + return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); +} + +static int bpf_prog_fd_by_id(uint32_t id) +{ + union bpf_attr attr = {}; + + attr.prog_id = id; + + return bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); +} + +static int bpf_prog_info_by_fd(int fd, struct bpf_prog_info *info, + uint32_t *info_len) +{ + union bpf_attr attr = {}; + int ret; + + attr.info.bpf_fd = fd; + attr.info.info = bpf_ptr_to_u64(info); + attr.info.info_len = *info_len; + + *info_len = 0; + ret = bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); + if (!ret) + *info_len = attr.info.info_len; + + return ret; +} + +int bpf_dump_prog_info(FILE *f, uint32_t id) +{ + struct bpf_prog_info info = {}; + uint32_t len = sizeof(info); + int fd, ret, dump_ok = 0; + SPRINT_BUF(tmp); + + open_json_object("prog"); + print_uint(PRINT_ANY, "id", "id %u ", id); + + fd = bpf_prog_fd_by_id(id); + if (fd < 0) + goto out; + + ret = bpf_prog_info_by_fd(fd, &info, &len); + if (!ret && len) { + int jited = !!info.jited_prog_len; + + print_string(PRINT_ANY, "name", "name %s ", info.name); + print_string(PRINT_ANY, "tag", "tag %s ", + hexstring_n2a(info.tag, sizeof(info.tag), + tmp, sizeof(tmp))); + print_uint(PRINT_JSON, "jited", NULL, jited); + if (jited && !is_json_context()) + fprintf(f, "jited "); + + if (show_details) { + if (info.load_time) { + /* ns since boottime */ + print_lluint(PRINT_ANY, "load_time", + "load_time %llu ", info.load_time); + + print_luint(PRINT_ANY, "created_by_uid", + "created_by_uid %lu ", + info.created_by_uid); + } + + if (info.btf_id) + print_luint(PRINT_ANY, "btf_id", "btf_id %lu ", + info.btf_id); + } + + dump_ok = 1; + } + + close(fd); +out: + close_json_object(); + return dump_ok; +} + +static int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len, + char **bpf_string, bool *need_release, + const char separator) +{ + char sp; + + if (from_file) { + size_t tmp_len, op_len = sizeof("65535 255 255 4294967295,"); + char *tmp_string, *pos, c_prev = ' '; + FILE *fp; + int c; + + tmp_len = sizeof("4096,") + BPF_MAXINSNS * op_len; + tmp_string = pos = calloc(1, tmp_len); + if (tmp_string == NULL) + return -ENOMEM; + + fp = fopen(arg, "r"); + if (fp == NULL) { + perror("Cannot fopen"); + free(tmp_string); + return -ENOENT; + } + + while ((c = fgetc(fp)) != EOF) { + switch (c) { + case '\n': + if (c_prev != ',') + *(pos++) = ','; + c_prev = ','; + break; + case ' ': + case '\t': + if (c_prev != ' ') + *(pos++) = c; + c_prev = ' '; + break; + default: + *(pos++) = c; + c_prev = c; + } + if (pos - tmp_string == tmp_len) + break; + } + + if (!feof(fp)) { + free(tmp_string); + fclose(fp); + return -E2BIG; + } + + fclose(fp); + *pos = 0; + + *need_release = true; + *bpf_string = tmp_string; + } else { + *need_release = false; + *bpf_string = arg; + } + + if (sscanf(*bpf_string, "%hu%c", bpf_len, &sp) != 2 || + sp != separator) { + if (*need_release) + free(*bpf_string); + return -EINVAL; + } + + return 0; +} + +static int bpf_ops_parse(int argc, char **argv, struct sock_filter *bpf_ops, + bool from_file) +{ + char *bpf_string, *token, separator = ','; + int ret = 0, i = 0; + bool need_release; + __u16 bpf_len = 0; + + if (argc < 1) + return -EINVAL; + if (bpf_parse_string(argv[0], from_file, &bpf_len, &bpf_string, + &need_release, separator)) + return -EINVAL; + if (bpf_len == 0 || bpf_len > BPF_MAXINSNS) { + ret = -EINVAL; + goto out; + } + + token = bpf_string; + while ((token = strchr(token, separator)) && (++token)[0]) { + if (i >= bpf_len) { + fprintf(stderr, "Real program length exceeds encoded length parameter!\n"); + ret = -EINVAL; + goto out; + } + + if (sscanf(token, "%hu %hhu %hhu %u,", + &bpf_ops[i].code, &bpf_ops[i].jt, + &bpf_ops[i].jf, &bpf_ops[i].k) != 4) { + fprintf(stderr, "Error at instruction %d!\n", i); + ret = -EINVAL; + goto out; + } + + i++; + } + + if (i != bpf_len) { + fprintf(stderr, "Parsed program length is less than encoded length parameter!\n"); + ret = -EINVAL; + goto out; + } + ret = bpf_len; +out: + if (need_release) + free(bpf_string); + + return ret; +} + +void bpf_print_ops(struct rtattr *bpf_ops, __u16 len) +{ + struct sock_filter *ops = RTA_DATA(bpf_ops); + int i; + + if (len == 0) + return; + + open_json_object("bytecode"); + print_uint(PRINT_ANY, "length", "bytecode \'%u,", len); + open_json_array(PRINT_JSON, "insns"); + + for (i = 0; i < len; i++) { + open_json_object(NULL); + print_hu(PRINT_ANY, "code", "%hu ", ops[i].code); + print_hhu(PRINT_ANY, "jt", "%hhu ", ops[i].jt); + print_hhu(PRINT_ANY, "jf", "%hhu ", ops[i].jf); + if (i == len - 1) + print_uint(PRINT_ANY, "k", "%u\'", ops[i].k); + else + print_uint(PRINT_ANY, "k", "%u,", ops[i].k); + close_json_object(); + } + + close_json_array(PRINT_JSON, NULL); + close_json_object(); +} + +static void bpf_map_pin_report(const struct bpf_elf_map *pin, + const struct bpf_elf_map *obj) +{ + fprintf(stderr, "Map specification differs from pinned file!\n"); + + if (obj->type != pin->type) + fprintf(stderr, " - Type: %u (obj) != %u (pin)\n", + obj->type, pin->type); + if (obj->size_key != pin->size_key) + fprintf(stderr, " - Size key: %u (obj) != %u (pin)\n", + obj->size_key, pin->size_key); + if (obj->size_value != pin->size_value) + fprintf(stderr, " - Size value: %u (obj) != %u (pin)\n", + obj->size_value, pin->size_value); + if (obj->max_elem != pin->max_elem) + fprintf(stderr, " - Max elems: %u (obj) != %u (pin)\n", + obj->max_elem, pin->max_elem); + if (obj->flags != pin->flags) + fprintf(stderr, " - Flags: %#x (obj) != %#x (pin)\n", + obj->flags, pin->flags); + + fprintf(stderr, "\n"); +} + +struct bpf_prog_data { + unsigned int type; + unsigned int jited; +}; + +struct bpf_map_ext { + struct bpf_prog_data owner; + unsigned int btf_id_key; + unsigned int btf_id_val; +}; + +static int bpf_derive_elf_map_from_fdinfo(int fd, struct bpf_elf_map *map, + struct bpf_map_ext *ext) +{ + unsigned int val, owner_type = 0, owner_jited = 0; + char file[PATH_MAX], buff[4096]; + FILE *fp; + + snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); + memset(map, 0, sizeof(*map)); + + fp = fopen(file, "r"); + if (!fp) { + fprintf(stderr, "No procfs support?!\n"); + return -EIO; + } + + while (fgets(buff, sizeof(buff), fp)) { + if (sscanf(buff, "map_type:\t%u", &val) == 1) + map->type = val; + else if (sscanf(buff, "key_size:\t%u", &val) == 1) + map->size_key = val; + else if (sscanf(buff, "value_size:\t%u", &val) == 1) + map->size_value = val; + else if (sscanf(buff, "max_entries:\t%u", &val) == 1) + map->max_elem = val; + else if (sscanf(buff, "map_flags:\t%i", &val) == 1) + map->flags = val; + else if (sscanf(buff, "owner_prog_type:\t%i", &val) == 1) + owner_type = val; + else if (sscanf(buff, "owner_jited:\t%i", &val) == 1) + owner_jited = val; + } + + fclose(fp); + if (ext) { + memset(ext, 0, sizeof(*ext)); + ext->owner.type = owner_type; + ext->owner.jited = owner_jited; + } + + return 0; +} + +static int bpf_map_selfcheck_pinned(int fd, const struct bpf_elf_map *map, + struct bpf_map_ext *ext, int length, + enum bpf_prog_type type) +{ + struct bpf_elf_map tmp, zero = {}; + int ret; + + ret = bpf_derive_elf_map_from_fdinfo(fd, &tmp, ext); + if (ret < 0) + return ret; + + /* The decision to reject this is on kernel side eventually, but + * at least give the user a chance to know what's wrong. + */ + if (ext->owner.type && ext->owner.type != type) + fprintf(stderr, "Program array map owner types differ: %u (obj) != %u (pin)\n", + type, ext->owner.type); + + if (!memcmp(&tmp, map, length)) { + return 0; + } else { + /* If kernel doesn't have eBPF-related fdinfo, we cannot do much, + * so just accept it. We know we do have an eBPF fd and in this + * case, everything is 0. It is guaranteed that no such map exists + * since map type of 0 is unloadable BPF_MAP_TYPE_UNSPEC. + */ + if (!memcmp(&tmp, &zero, length)) + return 0; + + bpf_map_pin_report(&tmp, map); + return -EINVAL; + } +} + +static int bpf_mnt_fs(const char *target) +{ + bool bind_done = false; + + while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) { + if (errno != EINVAL || bind_done) { + fprintf(stderr, "mount --make-private %s failed: %s\n", + target, strerror(errno)); + return -1; + } + + if (mount(target, target, "none", MS_BIND, NULL)) { + fprintf(stderr, "mount --bind %s %s failed: %s\n", + target, target, strerror(errno)); + return -1; + } + + bind_done = true; + } + + if (mount("bpf", target, "bpf", 0, "mode=0700")) { + fprintf(stderr, "mount -t bpf bpf %s failed: %s\n", + target, strerror(errno)); + return -1; + } + + return 0; +} + +static int bpf_mnt_check_target(const char *target) +{ + int ret; + + ret = mkdir(target, S_IRWXU); + if (ret) { + if (errno == EEXIST) + return 0; + fprintf(stderr, "mkdir %s failed: %s\n", target, + strerror(errno)); + } + + return ret; +} + +static int bpf_valid_mntpt(const char *mnt, unsigned long magic) +{ + struct statfs st_fs; + + if (statfs(mnt, &st_fs) < 0) + return -ENOENT; + if ((unsigned long)st_fs.f_type != magic) + return -ENOENT; + + return 0; +} + +static const char *bpf_find_mntpt_single(unsigned long magic, char *mnt, + int len, const char *mntpt) +{ + int ret; + + ret = bpf_valid_mntpt(mntpt, magic); + if (!ret) { + strlcpy(mnt, mntpt, len); + return mnt; + } + + return NULL; +} + +static const char *bpf_find_mntpt(const char *fstype, unsigned long magic, + char *mnt, int len, + const char * const *known_mnts) +{ + const char * const *ptr; + char type[100]; + FILE *fp; + + if (known_mnts) { + ptr = known_mnts; + while (*ptr) { + if (bpf_find_mntpt_single(magic, mnt, len, *ptr)) + return mnt; + ptr++; + } + } + + if (len != PATH_MAX) + return NULL; + + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + return NULL; + + while (fscanf(fp, "%*s %" textify(PATH_MAX) "s %99s %*s %*d %*d\n", + mnt, type) == 2) { + if (strcmp(type, fstype) == 0) + break; + } + + fclose(fp); + if (strcmp(type, fstype) != 0) + return NULL; + + return mnt; +} + +int bpf_trace_pipe(void) +{ + char tracefs_mnt[PATH_MAX] = TRACE_DIR_MNT; + static const char * const tracefs_known_mnts[] = { + TRACE_DIR_MNT, + "/sys/kernel/debug/tracing", + "/tracing", + "/trace", + 0, + }; + int fd_in, fd_out = STDERR_FILENO; + char tpipe[PATH_MAX]; + const char *mnt; + + mnt = bpf_find_mntpt("tracefs", TRACEFS_MAGIC, tracefs_mnt, + sizeof(tracefs_mnt), tracefs_known_mnts); + if (!mnt) { + fprintf(stderr, "tracefs not mounted?\n"); + return -1; + } + + snprintf(tpipe, sizeof(tpipe), "%s/trace_pipe", mnt); + + fd_in = open(tpipe, O_RDONLY); + if (fd_in < 0) + return -1; + + fprintf(stderr, "Running! Hang up with ^C!\n\n"); + while (1) { + static char buff[4096]; + ssize_t ret; + + ret = read(fd_in, buff, sizeof(buff)); + if (ret > 0 && write(fd_out, buff, ret) == ret) + continue; + break; + } + + close(fd_in); + return -1; +} + +static int bpf_gen_global(const char *bpf_sub_dir) +{ + char bpf_glo_dir[PATH_MAX]; + int ret; + + snprintf(bpf_glo_dir, sizeof(bpf_glo_dir), "%s/%s/", + bpf_sub_dir, BPF_DIR_GLOBALS); + + ret = mkdir(bpf_glo_dir, S_IRWXU); + if (ret && errno != EEXIST) { + fprintf(stderr, "mkdir %s failed: %s\n", bpf_glo_dir, + strerror(errno)); + return ret; + } + + return 0; +} + +static int bpf_gen_master(const char *base, const char *name) +{ + char bpf_sub_dir[PATH_MAX + NAME_MAX + 1]; + int ret; + + snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s/", base, name); + + ret = mkdir(bpf_sub_dir, S_IRWXU); + if (ret && errno != EEXIST) { + fprintf(stderr, "mkdir %s failed: %s\n", bpf_sub_dir, + strerror(errno)); + return ret; + } + + return bpf_gen_global(bpf_sub_dir); +} + +static int bpf_slave_via_bind_mnt(const char *full_name, + const char *full_link) +{ + int ret; + + ret = mkdir(full_name, S_IRWXU); + if (ret) { + assert(errno != EEXIST); + fprintf(stderr, "mkdir %s failed: %s\n", full_name, + strerror(errno)); + return ret; + } + + ret = mount(full_link, full_name, "none", MS_BIND, NULL); + if (ret) { + rmdir(full_name); + fprintf(stderr, "mount --bind %s %s failed: %s\n", + full_link, full_name, strerror(errno)); + } + + return ret; +} + +static int bpf_gen_slave(const char *base, const char *name, + const char *link) +{ + char bpf_lnk_dir[PATH_MAX + NAME_MAX + 1]; + char bpf_sub_dir[PATH_MAX + NAME_MAX]; + struct stat sb = {}; + int ret; + + snprintf(bpf_lnk_dir, sizeof(bpf_lnk_dir), "%s%s/", base, link); + snprintf(bpf_sub_dir, sizeof(bpf_sub_dir), "%s%s", base, name); + + ret = symlink(bpf_lnk_dir, bpf_sub_dir); + if (ret) { + if (errno != EEXIST) { + if (errno != EPERM) { + fprintf(stderr, "symlink %s failed: %s\n", + bpf_sub_dir, strerror(errno)); + return ret; + } + + return bpf_slave_via_bind_mnt(bpf_sub_dir, + bpf_lnk_dir); + } + + ret = lstat(bpf_sub_dir, &sb); + if (ret) { + fprintf(stderr, "lstat %s failed: %s\n", + bpf_sub_dir, strerror(errno)); + return ret; + } + + if ((sb.st_mode & S_IFMT) != S_IFLNK) + return bpf_gen_global(bpf_sub_dir); + } + + return 0; +} + +static int bpf_gen_hierarchy(const char *base) +{ + int ret, i; + + ret = bpf_gen_master(base, bpf_prog_to_subdir(__bpf_types[0])); + for (i = 1; i < ARRAY_SIZE(__bpf_types) && !ret; i++) + ret = bpf_gen_slave(base, + bpf_prog_to_subdir(__bpf_types[i]), + bpf_prog_to_subdir(__bpf_types[0])); + return ret; +} + +static const char *bpf_get_work_dir(enum bpf_prog_type type) +{ + static char bpf_tmp[PATH_MAX] = BPF_DIR_MNT; + static char bpf_wrk_dir[PATH_MAX]; + static const char *mnt; + static bool bpf_mnt_cached; + const char *mnt_env = getenv(BPF_ENV_MNT); + static const char * const bpf_known_mnts[] = { + BPF_DIR_MNT, + "/bpf", + 0, + }; + int ret; + + if (bpf_mnt_cached) { + const char *out = mnt; + + if (out && type) { + snprintf(bpf_tmp, sizeof(bpf_tmp), "%s%s/", + out, bpf_prog_to_subdir(type)); + out = bpf_tmp; + } + return out; + } + + if (mnt_env) + mnt = bpf_find_mntpt_single(BPF_FS_MAGIC, bpf_tmp, + sizeof(bpf_tmp), mnt_env); + else + mnt = bpf_find_mntpt("bpf", BPF_FS_MAGIC, bpf_tmp, + sizeof(bpf_tmp), bpf_known_mnts); + if (!mnt) { + mnt = mnt_env ? : BPF_DIR_MNT; + ret = bpf_mnt_check_target(mnt); + if (!ret) + ret = bpf_mnt_fs(mnt); + if (ret) { + mnt = NULL; + goto out; + } + } + + ret = snprintf(bpf_wrk_dir, sizeof(bpf_wrk_dir), "%s/", mnt); + if (ret < 0 || ret >= sizeof(bpf_wrk_dir)) { + mnt = NULL; + goto out; + } + + ret = bpf_gen_hierarchy(bpf_wrk_dir); + if (ret) { + mnt = NULL; + goto out; + } + + mnt = bpf_wrk_dir; +out: + bpf_mnt_cached = true; + return mnt; +} + +static int bpf_obj_get(const char *pathname, enum bpf_prog_type type) +{ + union bpf_attr attr = {}; + char tmp[PATH_MAX]; + + if (strlen(pathname) > 2 && pathname[0] == 'm' && + pathname[1] == ':' && bpf_get_work_dir(type)) { + snprintf(tmp, sizeof(tmp), "%s/%s", + bpf_get_work_dir(type), pathname + 2); + pathname = tmp; + } + + attr.pathname = bpf_ptr_to_u64(pathname); + + return bpf(BPF_OBJ_GET, &attr, sizeof(attr)); +} + +static int bpf_obj_pinned(const char *pathname, enum bpf_prog_type type) +{ + int prog_fd = bpf_obj_get(pathname, type); + + if (prog_fd < 0) + fprintf(stderr, "Couldn\'t retrieve pinned program \'%s\': %s\n", + pathname, strerror(errno)); + return prog_fd; +} + +static int bpf_do_parse(struct bpf_cfg_in *cfg, const bool *opt_tbl) +{ + const char *file, *section, *uds_name, *prog_name; + bool verbose = false; + int i, ret, argc; + char **argv; + + argv = cfg->argv; + argc = cfg->argc; + + if (opt_tbl[CBPF_BYTECODE] && + (matches(*argv, "bytecode") == 0 || + strcmp(*argv, "bc") == 0)) { + cfg->mode = CBPF_BYTECODE; + } else if (opt_tbl[CBPF_FILE] && + (matches(*argv, "bytecode-file") == 0 || + strcmp(*argv, "bcf") == 0)) { + cfg->mode = CBPF_FILE; + } else if (opt_tbl[EBPF_OBJECT] && + (matches(*argv, "object-file") == 0 || + strcmp(*argv, "obj") == 0)) { + cfg->mode = EBPF_OBJECT; + } else if (opt_tbl[EBPF_PINNED] && + (matches(*argv, "object-pinned") == 0 || + matches(*argv, "pinned") == 0 || + matches(*argv, "fd") == 0)) { + cfg->mode = EBPF_PINNED; + } else { + fprintf(stderr, "What mode is \"%s\"?\n", *argv); + return -1; + } + + NEXT_ARG(); + file = section = uds_name = prog_name = NULL; + if (cfg->mode == EBPF_OBJECT || cfg->mode == EBPF_PINNED) { + file = *argv; + NEXT_ARG_FWD(); + + if (cfg->type == BPF_PROG_TYPE_UNSPEC) { + if (argc > 0 && matches(*argv, "type") == 0) { + NEXT_ARG(); + for (i = 0; i < ARRAY_SIZE(__bpf_prog_meta); + i++) { + if (!__bpf_prog_meta[i].type) + continue; + if (!matches(*argv, + __bpf_prog_meta[i].type)) { + cfg->type = i; + break; + } + } + + if (cfg->type == BPF_PROG_TYPE_UNSPEC) { + fprintf(stderr, "What type is \"%s\"?\n", + *argv); + return -1; + } + NEXT_ARG_FWD(); + } else { + cfg->type = BPF_PROG_TYPE_SCHED_CLS; + } + } + + section = bpf_prog_to_default_section(cfg->type); + if (argc > 0 && matches(*argv, "section") == 0) { + NEXT_ARG(); + section = *argv; + NEXT_ARG_FWD(); + } + + if (argc > 0 && strcmp(*argv, "program") == 0) { + NEXT_ARG(); + prog_name = *argv; + NEXT_ARG_FWD(); + } + + if (__bpf_prog_meta[cfg->type].may_uds_export) { + uds_name = getenv(BPF_ENV_UDS); + if (argc > 0 && !uds_name && + matches(*argv, "export") == 0) { + NEXT_ARG(); + uds_name = *argv; + NEXT_ARG_FWD(); + } + } + + if (argc > 0 && matches(*argv, "verbose") == 0) { + verbose = true; + NEXT_ARG_FWD(); + } + + PREV_ARG(); + } + + if (cfg->mode == CBPF_BYTECODE || cfg->mode == CBPF_FILE) { + ret = bpf_ops_parse(argc, argv, cfg->opcodes, + cfg->mode == CBPF_FILE); + cfg->n_opcodes = ret; + } else if (cfg->mode == EBPF_OBJECT) { + ret = 0; /* program will be loaded by load stage */ + } else if (cfg->mode == EBPF_PINNED) { + ret = bpf_obj_pinned(file, cfg->type); + cfg->prog_fd = ret; + } else { + return -1; + } + + cfg->object = file; + cfg->section = section; + cfg->uds = uds_name; + cfg->argc = argc; + cfg->argv = argv; + cfg->verbose = verbose; + cfg->prog_name = prog_name; + + return ret; +} + +static int bpf_do_load(struct bpf_cfg_in *cfg) +{ + if (cfg->mode == EBPF_OBJECT) { +#ifdef HAVE_LIBBPF + return iproute2_load_libbpf(cfg); +#endif + cfg->prog_fd = bpf_obj_open(cfg->object, cfg->type, + cfg->section, cfg->ifindex, + cfg->verbose); + return cfg->prog_fd; + } + return 0; +} + +int bpf_load_common(struct bpf_cfg_in *cfg, const struct bpf_cfg_ops *ops, + void *nl) +{ + char annotation[256]; + int ret; + + ret = bpf_do_load(cfg); + if (ret < 0) + return ret; + + if (cfg->mode == CBPF_BYTECODE || cfg->mode == CBPF_FILE) + ops->cbpf_cb(nl, cfg->opcodes, cfg->n_opcodes); + if (cfg->mode == EBPF_OBJECT || cfg->mode == EBPF_PINNED) { + snprintf(annotation, sizeof(annotation), "%s:[%s]", + basename(cfg->object), cfg->mode == EBPF_PINNED ? + "*fsobj" : cfg->section); + ops->ebpf_cb(nl, cfg->prog_fd, annotation); + } + + return 0; +} + +int bpf_parse_common(struct bpf_cfg_in *cfg, const struct bpf_cfg_ops *ops) +{ + bool opt_tbl[BPF_MODE_MAX] = {}; + + if (ops->cbpf_cb) { + opt_tbl[CBPF_BYTECODE] = true; + opt_tbl[CBPF_FILE] = true; + } + + if (ops->ebpf_cb) { + opt_tbl[EBPF_OBJECT] = true; + opt_tbl[EBPF_PINNED] = true; + } + + return bpf_do_parse(cfg, opt_tbl); +} + +int bpf_parse_and_load_common(struct bpf_cfg_in *cfg, + const struct bpf_cfg_ops *ops, void *nl) +{ + int ret; + + ret = bpf_parse_common(cfg, ops); + if (ret < 0) + return ret; + + return bpf_load_common(cfg, ops, nl); +} + +int bpf_graft_map(const char *map_path, uint32_t *key, int argc, char **argv) +{ + const bool opt_tbl[BPF_MODE_MAX] = { + [EBPF_OBJECT] = true, + [EBPF_PINNED] = true, + }; + const struct bpf_elf_map test = { + .type = BPF_MAP_TYPE_PROG_ARRAY, + .size_key = sizeof(int), + .size_value = sizeof(int), + }; + struct bpf_cfg_in cfg = { + .type = BPF_PROG_TYPE_UNSPEC, + .argc = argc, + .argv = argv, + }; + struct bpf_map_ext ext = {}; + int ret, prog_fd, map_fd; + uint32_t map_key; + + ret = bpf_do_parse(&cfg, opt_tbl); + if (ret < 0) + return ret; + + ret = bpf_do_load(&cfg); + if (ret < 0) + return ret; + + prog_fd = cfg.prog_fd; + + if (key) { + map_key = *key; + } else { + ret = sscanf(cfg.section, "%*i/%i", &map_key); + if (ret != 1) { + fprintf(stderr, "Couldn\'t infer map key from section name! Please provide \'key\' argument!\n"); + ret = -EINVAL; + goto out_prog; + } + } + + map_fd = bpf_obj_get(map_path, cfg.type); + if (map_fd < 0) { + fprintf(stderr, "Couldn\'t retrieve pinned map \'%s\': %s\n", + map_path, strerror(errno)); + ret = map_fd; + goto out_prog; + } + + ret = bpf_map_selfcheck_pinned(map_fd, &test, &ext, + offsetof(struct bpf_elf_map, max_elem), + cfg.type); + if (ret < 0) { + fprintf(stderr, "Map \'%s\' self-check failed!\n", map_path); + goto out_map; + } + + ret = bpf_map_update(map_fd, &map_key, &prog_fd, BPF_ANY); + if (ret < 0) + fprintf(stderr, "Map update failed: %s\n", strerror(errno)); +out_map: + close(map_fd); +out_prog: + close(prog_fd); + return ret; +} + +int bpf_prog_attach_fd(int prog_fd, int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr = {}; + + attr.target_fd = target_fd; + attr.attach_bpf_fd = prog_fd; + attr.attach_type = type; + + return bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); +} + +int bpf_prog_detach_fd(int target_fd, enum bpf_attach_type type) +{ + union bpf_attr attr = {}; + + attr.target_fd = target_fd; + attr.attach_type = type; + + return bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); +} + +int bpf_prog_load_dev(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t size_insns, const char *license, __u32 ifindex, + char *log, size_t size_log) +{ + union bpf_attr attr = {}; + + attr.prog_type = type; + attr.insns = bpf_ptr_to_u64(insns); + attr.insn_cnt = size_insns / sizeof(struct bpf_insn); + attr.license = bpf_ptr_to_u64(license); + attr.prog_ifindex = ifindex; + + if (size_log > 0) { + attr.log_buf = bpf_ptr_to_u64(log); + attr.log_size = size_log; + attr.log_level = 1; + } + + return bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); +} + +int bpf_program_load(enum bpf_prog_type type, const struct bpf_insn *insns, + size_t size_insns, const char *license, char *log, + size_t size_log) +{ + return bpf_prog_load_dev(type, insns, size_insns, license, 0, log, size_log); +} + +#ifdef HAVE_ELF +struct bpf_elf_prog { + enum bpf_prog_type type; + struct bpf_insn *insns; + unsigned int insns_num; + size_t size; + const char *license; +}; + +struct bpf_hash_entry { + unsigned int pinning; + const char *subpath; + struct bpf_hash_entry *next; +}; + +struct bpf_config { + unsigned int jit_enabled; +}; + +struct bpf_btf { + const struct btf_header *hdr; + const void *raw; + const char *strings; + const struct btf_type **types; + int types_num; +}; + +struct bpf_elf_ctx { + struct bpf_config cfg; + Elf *elf_fd; + GElf_Ehdr elf_hdr; + Elf_Data *sym_tab; + Elf_Data *str_tab; + Elf_Data *btf_data; + char obj_uid[64]; + int obj_fd; + int btf_fd; + int map_fds[ELF_MAX_MAPS]; + struct bpf_elf_map maps[ELF_MAX_MAPS]; + struct bpf_map_ext maps_ext[ELF_MAX_MAPS]; + struct bpf_elf_prog prog_text; + struct bpf_btf btf; + int sym_num; + int map_num; + int map_len; + bool *sec_done; + int sec_maps; + int sec_text; + int sec_btf; + char license[ELF_MAX_LICENSE_LEN]; + enum bpf_prog_type type; + __u32 ifindex; + bool verbose; + bool noafalg; + struct bpf_elf_st stat; + struct bpf_hash_entry *ht[256]; + char *log; + size_t log_size; +}; + +struct bpf_elf_sec_data { + GElf_Shdr sec_hdr; + Elf_Data *sec_data; + const char *sec_name; +}; + +struct bpf_map_data { + int *fds; + const char *obj; + struct bpf_elf_st *st; + struct bpf_elf_map *ent; +}; + +static bool bpf_log_has_data(struct bpf_elf_ctx *ctx) +{ + return ctx->log && ctx->log[0]; +} + +static __check_format_string(2, 3) void +bpf_dump_error(struct bpf_elf_ctx *ctx, const char *format, ...) +{ + va_list vl; + + va_start(vl, format); + vfprintf(stderr, format, vl); + va_end(vl); + + if (bpf_log_has_data(ctx)) { + if (ctx->verbose) { + fprintf(stderr, "%s\n", ctx->log); + } else { + unsigned int off = 0, len = strlen(ctx->log); + + if (len > BPF_MAX_LOG) { + off = len - BPF_MAX_LOG; + fprintf(stderr, "Skipped %u bytes, use \'verb\' option for the full verbose log.\n[...]\n", + off); + } + fprintf(stderr, "%s\n", ctx->log + off); + } + + memset(ctx->log, 0, ctx->log_size); + } +} + +static int bpf_log_realloc(struct bpf_elf_ctx *ctx) +{ + const size_t log_max = UINT_MAX >> 8; + size_t log_size = ctx->log_size; + char *ptr; + + if (!ctx->log) { + log_size = 65536; + } else if (log_size < log_max) { + log_size <<= 1; + if (log_size > log_max) + log_size = log_max; + } else { + return -EINVAL; + } + + ptr = realloc(ctx->log, log_size); + if (!ptr) + return -ENOMEM; + + ptr[0] = 0; + ctx->log = ptr; + ctx->log_size = log_size; + + return 0; +} + +static int bpf_map_create(enum bpf_map_type type, uint32_t size_key, + uint32_t size_value, uint32_t max_elem, + uint32_t flags, int inner_fd, int btf_fd, + uint32_t ifindex, uint32_t btf_id_key, + uint32_t btf_id_val) +{ + union bpf_attr attr = {}; + + attr.map_type = type; + attr.key_size = size_key; + attr.value_size = inner_fd ? sizeof(int) : size_value; + attr.max_entries = max_elem; + attr.map_flags = flags; + attr.inner_map_fd = inner_fd; + attr.map_ifindex = ifindex; + attr.btf_fd = btf_fd; + attr.btf_key_type_id = btf_id_key; + attr.btf_value_type_id = btf_id_val; + + return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); +} + +static int bpf_btf_load(void *btf, size_t size_btf, + char *log, size_t size_log) +{ + union bpf_attr attr = {}; + + attr.btf = bpf_ptr_to_u64(btf); + attr.btf_size = size_btf; + + if (size_log > 0) { + attr.btf_log_buf = bpf_ptr_to_u64(log); + attr.btf_log_size = size_log; + attr.btf_log_level = 1; + } + + return bpf(BPF_BTF_LOAD, &attr, sizeof(attr)); +} + +static int bpf_obj_pin(int fd, const char *pathname) +{ + union bpf_attr attr = {}; + + attr.pathname = bpf_ptr_to_u64(pathname); + attr.bpf_fd = fd; + + return bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); +} + +static int bpf_obj_hash(const char *object, uint8_t *out, size_t len) +{ + struct sockaddr_alg alg = { + .salg_family = AF_ALG, + .salg_type = "hash", + .salg_name = "sha1", + }; + int ret, cfd, ofd, ffd; + struct stat stbuff; + ssize_t size; + + if (!object || len != 20) + return -EINVAL; + + cfd = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (cfd < 0) + return cfd; + + ret = bind(cfd, (struct sockaddr *)&alg, sizeof(alg)); + if (ret < 0) + goto out_cfd; + + ofd = accept(cfd, NULL, 0); + if (ofd < 0) { + ret = ofd; + goto out_cfd; + } + + ffd = open(object, O_RDONLY); + if (ffd < 0) { + fprintf(stderr, "Error opening object %s: %s\n", + object, strerror(errno)); + ret = ffd; + goto out_ofd; + } + + ret = fstat(ffd, &stbuff); + if (ret < 0) { + fprintf(stderr, "Error doing fstat: %s\n", + strerror(errno)); + goto out_ffd; + } + + size = sendfile(ofd, ffd, NULL, stbuff.st_size); + if (size != stbuff.st_size) { + fprintf(stderr, "Error from sendfile (%zd vs %zu bytes): %s\n", + size, stbuff.st_size, strerror(errno)); + ret = -1; + goto out_ffd; + } + + size = read(ofd, out, len); + if (size != len) { + fprintf(stderr, "Error from read (%zd vs %zu bytes): %s\n", + size, len, strerror(errno)); + ret = -1; + } else { + ret = 0; + } +out_ffd: + close(ffd); +out_ofd: + close(ofd); +out_cfd: + close(cfd); + return ret; +} + +static void bpf_init_env(void) +{ + struct rlimit limit = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + /* Don't bother in case we fail! */ + setrlimit(RLIMIT_MEMLOCK, &limit); + + if (!bpf_get_work_dir(BPF_PROG_TYPE_UNSPEC)) + fprintf(stderr, "Continuing without mounted eBPF fs. Too old kernel?\n"); +} + +static const char *bpf_custom_pinning(const struct bpf_elf_ctx *ctx, + uint32_t pinning) +{ + struct bpf_hash_entry *entry; + + entry = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)]; + while (entry && entry->pinning != pinning) + entry = entry->next; + + return entry ? entry->subpath : NULL; +} + +static bool bpf_no_pinning(const struct bpf_elf_ctx *ctx, + uint32_t pinning) +{ + switch (pinning) { + case PIN_OBJECT_NS: + case PIN_GLOBAL_NS: + return false; + case PIN_NONE: + return true; + default: + return !bpf_custom_pinning(ctx, pinning); + } +} + +static void bpf_make_pathname(char *pathname, size_t len, const char *name, + const struct bpf_elf_ctx *ctx, uint32_t pinning) +{ + switch (pinning) { + case PIN_OBJECT_NS: + snprintf(pathname, len, "%s/%s/%s", + bpf_get_work_dir(ctx->type), + ctx->obj_uid, name); + break; + case PIN_GLOBAL_NS: + snprintf(pathname, len, "%s/%s/%s", + bpf_get_work_dir(ctx->type), + BPF_DIR_GLOBALS, name); + break; + default: + snprintf(pathname, len, "%s/../%s/%s", + bpf_get_work_dir(ctx->type), + bpf_custom_pinning(ctx, pinning), name); + break; + } +} + +static int bpf_probe_pinned(const char *name, const struct bpf_elf_ctx *ctx, + uint32_t pinning) +{ + char pathname[PATH_MAX]; + + if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type)) + return 0; + + bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning); + return bpf_obj_get(pathname, ctx->type); +} + +static int bpf_make_obj_path(const struct bpf_elf_ctx *ctx) +{ + char tmp[PATH_MAX]; + int ret; + + snprintf(tmp, sizeof(tmp), "%s/%s", bpf_get_work_dir(ctx->type), + ctx->obj_uid); + + ret = mkdir(tmp, S_IRWXU); + if (ret && errno != EEXIST) { + fprintf(stderr, "mkdir %s failed: %s\n", tmp, strerror(errno)); + return ret; + } + + return 0; +} + +static int bpf_make_custom_path(const struct bpf_elf_ctx *ctx, + const char *todo) +{ + char tmp[PATH_MAX], rem[PATH_MAX], *sub; + int ret; + + snprintf(tmp, sizeof(tmp), "%s/../", bpf_get_work_dir(ctx->type)); + snprintf(rem, sizeof(rem), "%s/", todo); + sub = strtok(rem, "/"); + + while (sub) { + if (strlen(tmp) + strlen(sub) + 2 > PATH_MAX) + return -EINVAL; + + strcat(tmp, sub); + strcat(tmp, "/"); + + ret = mkdir(tmp, S_IRWXU); + if (ret && errno != EEXIST) { + fprintf(stderr, "mkdir %s failed: %s\n", tmp, + strerror(errno)); + return ret; + } + + sub = strtok(NULL, "/"); + } + + return 0; +} + +static int bpf_place_pinned(int fd, const char *name, + const struct bpf_elf_ctx *ctx, uint32_t pinning) +{ + char pathname[PATH_MAX]; + const char *tmp; + int ret = 0; + + if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type)) + return 0; + + if (pinning == PIN_OBJECT_NS) + ret = bpf_make_obj_path(ctx); + else if ((tmp = bpf_custom_pinning(ctx, pinning))) + ret = bpf_make_custom_path(ctx, tmp); + if (ret < 0) + return ret; + + bpf_make_pathname(pathname, sizeof(pathname), name, ctx, pinning); + return bpf_obj_pin(fd, pathname); +} + +static void bpf_prog_report(int fd, const char *section, + const struct bpf_elf_prog *prog, + struct bpf_elf_ctx *ctx) +{ + unsigned int insns = prog->size / sizeof(struct bpf_insn); + + fprintf(stderr, "\nProg section \'%s\' %s%s (%d)!\n", section, + fd < 0 ? "rejected: " : "loaded", + fd < 0 ? strerror(errno) : "", + fd < 0 ? errno : fd); + + fprintf(stderr, " - Type: %u\n", prog->type); + fprintf(stderr, " - Instructions: %u (%u over limit)\n", + insns, insns > BPF_MAXINSNS ? insns - BPF_MAXINSNS : 0); + fprintf(stderr, " - License: %s\n\n", prog->license); + + bpf_dump_error(ctx, "Verifier analysis:\n\n"); +} + +static int bpf_prog_attach(const char *section, + const struct bpf_elf_prog *prog, + struct bpf_elf_ctx *ctx) +{ + int tries = 0, fd; +retry: + errno = 0; + fd = bpf_prog_load_dev(prog->type, prog->insns, prog->size, + prog->license, ctx->ifindex, + ctx->log, ctx->log_size); + if (fd < 0 || ctx->verbose) { + /* The verifier log is pretty chatty, sometimes so chatty + * on larger programs, that we could fail to dump everything + * into our buffer. Still, try to give a debuggable error + * log for the user, so enlarge it and re-fail. + */ + if (fd < 0 && errno == ENOSPC) { + if (tries++ < 10 && !bpf_log_realloc(ctx)) + goto retry; + + fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n", + ctx->log_size, tries); + return fd; + } + + bpf_prog_report(fd, section, prog, ctx); + } + + return fd; +} + +static void bpf_map_report(int fd, const char *name, + const struct bpf_elf_map *map, + struct bpf_elf_ctx *ctx, int inner_fd) +{ + fprintf(stderr, "Map object \'%s\' %s%s (%d)!\n", name, + fd < 0 ? "rejected: " : "loaded", + fd < 0 ? strerror(errno) : "", + fd < 0 ? errno : fd); + + fprintf(stderr, " - Type: %u\n", map->type); + fprintf(stderr, " - Identifier: %u\n", map->id); + fprintf(stderr, " - Pinning: %u\n", map->pinning); + fprintf(stderr, " - Size key: %u\n", map->size_key); + fprintf(stderr, " - Size value: %u\n", + inner_fd ? (int)sizeof(int) : map->size_value); + fprintf(stderr, " - Max elems: %u\n", map->max_elem); + fprintf(stderr, " - Flags: %#x\n\n", map->flags); +} + +static int bpf_find_map_id(const struct bpf_elf_ctx *ctx, uint32_t id) +{ + int i; + + for (i = 0; i < ctx->map_num; i++) { + if (ctx->maps[i].id != id) + continue; + if (ctx->map_fds[i] < 0) + return -EINVAL; + + return ctx->map_fds[i]; + } + + return -ENOENT; +} + +static void bpf_report_map_in_map(int outer_fd, uint32_t idx) +{ + struct bpf_elf_map outer_map; + int ret; + + fprintf(stderr, "Cannot insert map into map! "); + + ret = bpf_derive_elf_map_from_fdinfo(outer_fd, &outer_map, NULL); + if (!ret) { + if (idx >= outer_map.max_elem && + outer_map.type == BPF_MAP_TYPE_ARRAY_OF_MAPS) { + fprintf(stderr, "Outer map has %u elements, index %u is invalid!\n", + outer_map.max_elem, idx); + return; + } + } + + fprintf(stderr, "Different map specs used for outer and inner map?\n"); +} + +static bool bpf_is_map_in_map_type(const struct bpf_elf_map *map) +{ + return map->type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + map->type == BPF_MAP_TYPE_HASH_OF_MAPS; +} + +static bool bpf_map_offload_neutral(enum bpf_map_type type) +{ + return type == BPF_MAP_TYPE_PERF_EVENT_ARRAY; +} + +static int bpf_map_attach(const char *name, struct bpf_elf_ctx *ctx, + const struct bpf_elf_map *map, struct bpf_map_ext *ext, + int *have_map_in_map) +{ + int fd, ifindex, ret, map_inner_fd = 0; + bool retried = false; + +probe: + fd = bpf_probe_pinned(name, ctx, map->pinning); + if (fd > 0) { + ret = bpf_map_selfcheck_pinned(fd, map, ext, + offsetof(struct bpf_elf_map, + id), ctx->type); + if (ret < 0) { + close(fd); + fprintf(stderr, "Map \'%s\' self-check failed!\n", + name); + return ret; + } + if (ctx->verbose) + fprintf(stderr, "Map \'%s\' loaded as pinned!\n", + name); + return fd; + } + + if (have_map_in_map && bpf_is_map_in_map_type(map)) { + (*have_map_in_map)++; + if (map->inner_id) + return 0; + fprintf(stderr, "Map \'%s\' cannot be created since no inner map ID defined!\n", + name); + return -EINVAL; + } + + if (!have_map_in_map && bpf_is_map_in_map_type(map)) { + map_inner_fd = bpf_find_map_id(ctx, map->inner_id); + if (map_inner_fd < 0) { + fprintf(stderr, "Map \'%s\' cannot be loaded. Inner map with ID %u not found!\n", + name, map->inner_id); + return -EINVAL; + } + } + + ifindex = bpf_map_offload_neutral(map->type) ? 0 : ctx->ifindex; + errno = 0; + fd = bpf_map_create(map->type, map->size_key, map->size_value, + map->max_elem, map->flags, map_inner_fd, ctx->btf_fd, + ifindex, ext->btf_id_key, ext->btf_id_val); + + if (fd < 0 || ctx->verbose) { + bpf_map_report(fd, name, map, ctx, map_inner_fd); + if (fd < 0) + return fd; + } + + ret = bpf_place_pinned(fd, name, ctx, map->pinning); + if (ret < 0) { + close(fd); + if (!retried && errno == EEXIST) { + retried = true; + goto probe; + } + fprintf(stderr, "Could not pin %s map: %s\n", name, + strerror(errno)); + return ret; + } + + return fd; +} + +static const char *bpf_str_tab_name(const struct bpf_elf_ctx *ctx, + const GElf_Sym *sym) +{ + return ctx->str_tab->d_buf + sym->st_name; +} + +static int bpf_btf_find(struct bpf_elf_ctx *ctx, const char *name) +{ + const struct btf_type *type; + const char *res; + int id; + + for (id = 1; id < ctx->btf.types_num; id++) { + type = ctx->btf.types[id]; + if (type->name_off >= ctx->btf.hdr->str_len) + continue; + res = &ctx->btf.strings[type->name_off]; + if (!strcmp(res, name)) + return id; + } + + return -ENOENT; +} + +static int bpf_btf_find_kv(struct bpf_elf_ctx *ctx, const struct bpf_elf_map *map, + const char *name, uint32_t *id_key, uint32_t *id_val) +{ + const struct btf_member *key, *val; + const struct btf_type *type; + char btf_name[512]; + const char *res; + int id; + + snprintf(btf_name, sizeof(btf_name), "____btf_map_%s", name); + id = bpf_btf_find(ctx, btf_name); + if (id < 0) + return id; + + type = ctx->btf.types[id]; + if (BTF_INFO_KIND(type->info) != BTF_KIND_STRUCT) + return -EINVAL; + if (BTF_INFO_VLEN(type->info) != 2) + return -EINVAL; + + key = ((void *) type) + sizeof(*type); + val = key + 1; + if (!key->type || key->type >= ctx->btf.types_num || + !val->type || val->type >= ctx->btf.types_num) + return -EINVAL; + + if (key->name_off >= ctx->btf.hdr->str_len || + val->name_off >= ctx->btf.hdr->str_len) + return -EINVAL; + + res = &ctx->btf.strings[key->name_off]; + if (strcmp(res, "key")) + return -EINVAL; + + res = &ctx->btf.strings[val->name_off]; + if (strcmp(res, "value")) + return -EINVAL; + + *id_key = key->type; + *id_val = val->type; + return 0; +} + +static void bpf_btf_annotate(struct bpf_elf_ctx *ctx, int which, const char *name) +{ + uint32_t id_key = 0, id_val = 0; + + if (!bpf_btf_find_kv(ctx, &ctx->maps[which], name, &id_key, &id_val)) { + ctx->maps_ext[which].btf_id_key = id_key; + ctx->maps_ext[which].btf_id_val = id_val; + } +} + +static const char *bpf_map_fetch_name(struct bpf_elf_ctx *ctx, int which) +{ + const char *name; + GElf_Sym sym; + int i; + + for (i = 0; i < ctx->sym_num; i++) { + int type; + + if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) + continue; + + type = GELF_ST_TYPE(sym.st_info); + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || + (type != STT_NOTYPE && type != STT_OBJECT) || + sym.st_shndx != ctx->sec_maps || + sym.st_value / ctx->map_len != which) + continue; + + name = bpf_str_tab_name(ctx, &sym); + bpf_btf_annotate(ctx, which, name); + return name; + } + + return NULL; +} + +static int bpf_maps_attach_all(struct bpf_elf_ctx *ctx) +{ + int i, j, ret, fd, inner_fd, inner_idx, have_map_in_map = 0; + const char *map_name; + + for (i = 0; i < ctx->map_num; i++) { + if (ctx->maps[i].pinning == PIN_OBJECT_NS && + ctx->noafalg) { + fprintf(stderr, "Missing kernel AF_ALG support for PIN_OBJECT_NS!\n"); + return -ENOTSUP; + } + + map_name = bpf_map_fetch_name(ctx, i); + if (!map_name) + return -EIO; + + fd = bpf_map_attach(map_name, ctx, &ctx->maps[i], + &ctx->maps_ext[i], &have_map_in_map); + if (fd < 0) + return fd; + + ctx->map_fds[i] = !fd ? -1 : fd; + } + + for (i = 0; have_map_in_map && i < ctx->map_num; i++) { + if (ctx->map_fds[i] >= 0) + continue; + + map_name = bpf_map_fetch_name(ctx, i); + if (!map_name) + return -EIO; + + fd = bpf_map_attach(map_name, ctx, &ctx->maps[i], + &ctx->maps_ext[i], NULL); + if (fd < 0) + return fd; + + ctx->map_fds[i] = fd; + } + + for (i = 0; have_map_in_map && i < ctx->map_num; i++) { + if (!ctx->maps[i].id || + ctx->maps[i].inner_id || + ctx->maps[i].inner_idx == -1) + continue; + + inner_fd = ctx->map_fds[i]; + inner_idx = ctx->maps[i].inner_idx; + + for (j = 0; j < ctx->map_num; j++) { + if (!bpf_is_map_in_map_type(&ctx->maps[j])) + continue; + if (ctx->maps[j].inner_id != ctx->maps[i].id) + continue; + + ret = bpf_map_update(ctx->map_fds[j], &inner_idx, + &inner_fd, BPF_ANY); + if (ret < 0) { + bpf_report_map_in_map(ctx->map_fds[j], + inner_idx); + return ret; + } + } + } + + return 0; +} + +static int bpf_map_num_sym(struct bpf_elf_ctx *ctx) +{ + int i, num = 0; + GElf_Sym sym; + + for (i = 0; i < ctx->sym_num; i++) { + int type; + + if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) + continue; + + type = GELF_ST_TYPE(sym.st_info); + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || + (type != STT_NOTYPE && type != STT_OBJECT) || + sym.st_shndx != ctx->sec_maps) + continue; + num++; + } + + return num; +} + +static int bpf_fill_section_data(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + Elf_Data *sec_edata; + GElf_Shdr sec_hdr; + Elf_Scn *sec_fd; + char *sec_name; + + memset(data, 0, sizeof(*data)); + + sec_fd = elf_getscn(ctx->elf_fd, section); + if (!sec_fd) + return -EINVAL; + if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr) + return -EIO; + + sec_name = elf_strptr(ctx->elf_fd, ctx->elf_hdr.e_shstrndx, + sec_hdr.sh_name); + if (!sec_name || !sec_hdr.sh_size) + return -ENOENT; + + sec_edata = elf_getdata(sec_fd, NULL); + if (!sec_edata || elf_getdata(sec_fd, sec_edata)) + return -EIO; + + memcpy(&data->sec_hdr, &sec_hdr, sizeof(sec_hdr)); + + data->sec_name = sec_name; + data->sec_data = sec_edata; + return 0; +} + +struct bpf_elf_map_min { + __u32 type; + __u32 size_key; + __u32 size_value; + __u32 max_elem; +}; + +static int bpf_fetch_maps_begin(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + ctx->map_num = data->sec_data->d_size; + ctx->sec_maps = section; + ctx->sec_done[section] = true; + + if (ctx->map_num > sizeof(ctx->maps)) { + fprintf(stderr, "Too many BPF maps in ELF section!\n"); + return -ENOMEM; + } + + memcpy(ctx->maps, data->sec_data->d_buf, ctx->map_num); + return 0; +} + +static int bpf_map_verify_all_offs(struct bpf_elf_ctx *ctx, int end) +{ + GElf_Sym sym; + int off, i; + + for (off = 0; off < end; off += ctx->map_len) { + /* Order doesn't need to be linear here, hence we walk + * the table again. + */ + for (i = 0; i < ctx->sym_num; i++) { + int type; + + if (gelf_getsym(ctx->sym_tab, i, &sym) != &sym) + continue; + + type = GELF_ST_TYPE(sym.st_info); + if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL || + (type != STT_NOTYPE && type != STT_OBJECT) || + sym.st_shndx != ctx->sec_maps) + continue; + if (sym.st_value == off) + break; + if (i == ctx->sym_num - 1) + return -1; + } + } + + return off == end ? 0 : -1; +} + +static int bpf_fetch_maps_end(struct bpf_elf_ctx *ctx) +{ + struct bpf_elf_map fixup[ARRAY_SIZE(ctx->maps)] = {}; + int i, sym_num = bpf_map_num_sym(ctx); + __u8 *buff; + + if (sym_num == 0 || sym_num > ARRAY_SIZE(ctx->maps)) { + fprintf(stderr, "%u maps not supported in current map section!\n", + sym_num); + return -EINVAL; + } + + if (ctx->map_num % sym_num != 0 || + ctx->map_num % sizeof(__u32) != 0) { + fprintf(stderr, "Number BPF map symbols are not multiple of struct bpf_elf_map!\n"); + return -EINVAL; + } + + ctx->map_len = ctx->map_num / sym_num; + if (bpf_map_verify_all_offs(ctx, ctx->map_num)) { + fprintf(stderr, "Different struct bpf_elf_map in use!\n"); + return -EINVAL; + } + + if (ctx->map_len == sizeof(struct bpf_elf_map)) { + ctx->map_num = sym_num; + return 0; + } else if (ctx->map_len > sizeof(struct bpf_elf_map)) { + fprintf(stderr, "struct bpf_elf_map not supported, coming from future version?\n"); + return -EINVAL; + } else if (ctx->map_len < sizeof(struct bpf_elf_map_min)) { + fprintf(stderr, "struct bpf_elf_map too small, not supported!\n"); + return -EINVAL; + } + + ctx->map_num = sym_num; + for (i = 0, buff = (void *)ctx->maps; i < ctx->map_num; + i++, buff += ctx->map_len) { + /* The fixup leaves the rest of the members as zero, which + * is fine currently, but option exist to set some other + * default value as well when needed in future. + */ + memcpy(&fixup[i], buff, ctx->map_len); + } + + memcpy(ctx->maps, fixup, sizeof(fixup)); + if (ctx->verbose) + printf("%zu bytes struct bpf_elf_map fixup performed due to size mismatch!\n", + sizeof(struct bpf_elf_map) - ctx->map_len); + return 0; +} + +static int bpf_fetch_license(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + if (data->sec_data->d_size > sizeof(ctx->license)) + return -ENOMEM; + + memcpy(ctx->license, data->sec_data->d_buf, data->sec_data->d_size); + ctx->sec_done[section] = true; + return 0; +} + +static int bpf_fetch_symtab(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + ctx->sym_tab = data->sec_data; + ctx->sym_num = data->sec_hdr.sh_size / data->sec_hdr.sh_entsize; + ctx->sec_done[section] = true; + return 0; +} + +static int bpf_fetch_strtab(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + ctx->str_tab = data->sec_data; + ctx->sec_done[section] = true; + return 0; +} + +static int bpf_fetch_text(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + ctx->sec_text = section; + ctx->sec_done[section] = true; + return 0; +} + +static void bpf_btf_report(int fd, struct bpf_elf_ctx *ctx) +{ + fprintf(stderr, "\nBTF debug data section \'.BTF\' %s%s (%d)!\n", + fd < 0 ? "rejected: " : "loaded", + fd < 0 ? strerror(errno) : "", + fd < 0 ? errno : fd); + + fprintf(stderr, " - Length: %zu\n", ctx->btf_data->d_size); + + bpf_dump_error(ctx, "Verifier analysis:\n\n"); +} + +static int bpf_btf_attach(struct bpf_elf_ctx *ctx) +{ + int tries = 0, fd; +retry: + errno = 0; + fd = bpf_btf_load(ctx->btf_data->d_buf, ctx->btf_data->d_size, + ctx->log, ctx->log_size); + if (fd < 0 || ctx->verbose) { + if (fd < 0 && errno == ENOSPC) { + if (tries++ < 10 && !bpf_log_realloc(ctx)) + goto retry; + + fprintf(stderr, "Log buffer too small to dump verifier log %zu bytes (%d tries)!\n", + ctx->log_size, tries); + return fd; + } + + if (bpf_log_has_data(ctx)) + bpf_btf_report(fd, ctx); + } + + return fd; +} + +static int bpf_fetch_btf_begin(struct bpf_elf_ctx *ctx, int section, + struct bpf_elf_sec_data *data) +{ + ctx->btf_data = data->sec_data; + ctx->sec_btf = section; + ctx->sec_done[section] = true; + return 0; +} + +static int bpf_btf_check_header(struct bpf_elf_ctx *ctx) +{ + const struct btf_header *hdr = ctx->btf_data->d_buf; + const char *str_start, *str_end; + unsigned int data_len; + + if (hdr->magic != BTF_MAGIC) { + fprintf(stderr, "Object has wrong BTF magic: %x, expected: %x!\n", + hdr->magic, BTF_MAGIC); + return -EINVAL; + } + + if (hdr->version != BTF_VERSION) { + fprintf(stderr, "Object has wrong BTF version: %u, expected: %u!\n", + hdr->version, BTF_VERSION); + return -EINVAL; + } + + if (hdr->flags) { + fprintf(stderr, "Object has unsupported BTF flags %x!\n", + hdr->flags); + return -EINVAL; + } + + data_len = ctx->btf_data->d_size - sizeof(*hdr); + if (data_len < hdr->type_off || + data_len < hdr->str_off || + data_len < hdr->type_len + hdr->str_len || + hdr->type_off >= hdr->str_off || + hdr->type_off + hdr->type_len != hdr->str_off || + hdr->str_off + hdr->str_len != data_len || + (hdr->type_off & (sizeof(uint32_t) - 1))) { + fprintf(stderr, "Object has malformed BTF data!\n"); + return -EINVAL; + } + + ctx->btf.hdr = hdr; + ctx->btf.raw = hdr + 1; + + str_start = ctx->btf.raw + hdr->str_off; + str_end = str_start + hdr->str_len; + if (!hdr->str_len || + hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || + str_start[0] || str_end[-1]) { + fprintf(stderr, "Object has malformed BTF string data!\n"); + return -EINVAL; + } + + ctx->btf.strings = str_start; + return 0; +} + +static int bpf_btf_register_type(struct bpf_elf_ctx *ctx, + const struct btf_type *type) +{ + int cur = ctx->btf.types_num, num = cur + 1; + const struct btf_type **types; + + types = realloc(ctx->btf.types, num * sizeof(type)); + if (!types) { + free(ctx->btf.types); + ctx->btf.types = NULL; + ctx->btf.types_num = 0; + return -ENOMEM; + } + + ctx->btf.types = types; + ctx->btf.types[cur] = type; + ctx->btf.types_num = num; + return 0; +} + +static struct btf_type btf_type_void; + +static int bpf_btf_prep_type_data(struct bpf_elf_ctx *ctx) +{ + const void *type_cur = ctx->btf.raw + ctx->btf.hdr->type_off; + const void *type_end = ctx->btf.raw + ctx->btf.hdr->str_off; + const struct btf_type *type; + uint16_t var_len; + int ret, kind; + + ret = bpf_btf_register_type(ctx, &btf_type_void); + if (ret < 0) + return ret; + + while (type_cur < type_end) { + type = type_cur; + type_cur += sizeof(*type); + + var_len = BTF_INFO_VLEN(type->info); + kind = BTF_INFO_KIND(type->info); + + switch (kind) { + case BTF_KIND_INT: + type_cur += sizeof(int); + break; + case BTF_KIND_ARRAY: + type_cur += sizeof(struct btf_array); + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + type_cur += var_len * sizeof(struct btf_member); + break; + case BTF_KIND_ENUM: + type_cur += var_len * sizeof(struct btf_enum); + break; + case BTF_KIND_FUNC_PROTO: + type_cur += var_len * sizeof(struct btf_param); + break; + case BTF_KIND_TYPEDEF: + case BTF_KIND_PTR: + case BTF_KIND_FWD: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_FUNC: + break; + default: + fprintf(stderr, "Object has unknown BTF type: %u!\n", kind); + return -EINVAL; + } + + ret = bpf_btf_register_type(ctx, type); + if (ret < 0) + return ret; + } + + return 0; +} + +static int bpf_btf_prep_data(struct bpf_elf_ctx *ctx) +{ + int ret = bpf_btf_check_header(ctx); + + if (!ret) + return bpf_btf_prep_type_data(ctx); + return ret; +} + +static void bpf_fetch_btf_end(struct bpf_elf_ctx *ctx) +{ + int fd = bpf_btf_attach(ctx); + + if (fd < 0) + return; + ctx->btf_fd = fd; + if (bpf_btf_prep_data(ctx) < 0) { + close(ctx->btf_fd); + ctx->btf_fd = 0; + } +} + +static bool bpf_has_map_data(const struct bpf_elf_ctx *ctx) +{ + return ctx->sym_tab && ctx->str_tab && ctx->sec_maps; +} + +static bool bpf_has_btf_data(const struct bpf_elf_ctx *ctx) +{ + return ctx->sec_btf; +} + +static bool bpf_has_call_data(const struct bpf_elf_ctx *ctx) +{ + return ctx->sec_text; +} + +static int bpf_fetch_ancillary(struct bpf_elf_ctx *ctx, bool check_text_sec) +{ + struct bpf_elf_sec_data data; + int i, ret = -1; + + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { + ret = bpf_fill_section_data(ctx, i, &data); + if (ret < 0) + continue; + + if (data.sec_hdr.sh_type == SHT_PROGBITS && + !strcmp(data.sec_name, ELF_SECTION_MAPS)) + ret = bpf_fetch_maps_begin(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_PROGBITS && + !strcmp(data.sec_name, ELF_SECTION_LICENSE)) + ret = bpf_fetch_license(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_PROGBITS && + (data.sec_hdr.sh_flags & SHF_EXECINSTR) && + !strcmp(data.sec_name, ".text") && + check_text_sec) + ret = bpf_fetch_text(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_SYMTAB && + !strcmp(data.sec_name, ".symtab")) + ret = bpf_fetch_symtab(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_STRTAB && + !strcmp(data.sec_name, ".strtab")) + ret = bpf_fetch_strtab(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_PROGBITS && + !strcmp(data.sec_name, ".BTF")) + ret = bpf_fetch_btf_begin(ctx, i, &data); + if (ret < 0) { + fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n", + i); + return ret; + } + } + + if (bpf_has_btf_data(ctx)) + bpf_fetch_btf_end(ctx); + if (bpf_has_map_data(ctx)) { + ret = bpf_fetch_maps_end(ctx); + if (ret < 0) { + fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n"); + return ret; + } + + ret = bpf_maps_attach_all(ctx); + if (ret < 0) { + fprintf(stderr, "Error loading maps into kernel!\n"); + return ret; + } + } + + return ret; +} + +static int bpf_fetch_prog(struct bpf_elf_ctx *ctx, const char *section, + bool *sseen) +{ + struct bpf_elf_sec_data data; + struct bpf_elf_prog prog; + int ret, i, fd = -1; + + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { + if (ctx->sec_done[i]) + continue; + + ret = bpf_fill_section_data(ctx, i, &data); + if (ret < 0 || + !(data.sec_hdr.sh_type == SHT_PROGBITS && + (data.sec_hdr.sh_flags & SHF_EXECINSTR) && + !strcmp(data.sec_name, section))) + continue; + + *sseen = true; + + memset(&prog, 0, sizeof(prog)); + prog.type = ctx->type; + prog.license = ctx->license; + prog.size = data.sec_data->d_size; + prog.insns_num = prog.size / sizeof(struct bpf_insn); + prog.insns = data.sec_data->d_buf; + + fd = bpf_prog_attach(section, &prog, ctx); + if (fd < 0) + return fd; + + ctx->sec_done[i] = true; + break; + } + + return fd; +} + +struct bpf_relo_props { + struct bpf_tail_call { + unsigned int total; + unsigned int jited; + } tc; + int main_num; +}; + +static int bpf_apply_relo_map(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog, + GElf_Rel *relo, GElf_Sym *sym, + struct bpf_relo_props *props) +{ + unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn); + unsigned int map_idx = sym->st_value / ctx->map_len; + + if (insn_off >= prog->insns_num) + return -EINVAL; + if (prog->insns[insn_off].code != (BPF_LD | BPF_IMM | BPF_DW)) { + fprintf(stderr, "ELF contains relo data for non ld64 instruction at offset %u! Compiler bug?!\n", + insn_off); + return -EINVAL; + } + + if (map_idx >= ARRAY_SIZE(ctx->map_fds)) + return -EINVAL; + if (!ctx->map_fds[map_idx]) + return -EINVAL; + if (ctx->maps[map_idx].type == BPF_MAP_TYPE_PROG_ARRAY) { + props->tc.total++; + if (ctx->maps_ext[map_idx].owner.jited || + (ctx->maps_ext[map_idx].owner.type == 0 && + ctx->cfg.jit_enabled)) + props->tc.jited++; + } + + prog->insns[insn_off].src_reg = BPF_PSEUDO_MAP_FD; + prog->insns[insn_off].imm = ctx->map_fds[map_idx]; + return 0; +} + +static int bpf_apply_relo_call(struct bpf_elf_ctx *ctx, struct bpf_elf_prog *prog, + GElf_Rel *relo, GElf_Sym *sym, + struct bpf_relo_props *props) +{ + unsigned int insn_off = relo->r_offset / sizeof(struct bpf_insn); + struct bpf_elf_prog *prog_text = &ctx->prog_text; + + if (insn_off >= prog->insns_num) + return -EINVAL; + if (prog->insns[insn_off].code != (BPF_JMP | BPF_CALL) && + prog->insns[insn_off].src_reg != BPF_PSEUDO_CALL) { + fprintf(stderr, "ELF contains relo data for non call instruction at offset %u! Compiler bug?!\n", + insn_off); + return -EINVAL; + } + + if (!props->main_num) { + struct bpf_insn *insns = realloc(prog->insns, + prog->size + prog_text->size); + if (!insns) + return -ENOMEM; + + memcpy(insns + prog->insns_num, prog_text->insns, + prog_text->size); + props->main_num = prog->insns_num; + prog->insns = insns; + prog->insns_num += prog_text->insns_num; + prog->size += prog_text->size; + } + + prog->insns[insn_off].imm += props->main_num - insn_off; + return 0; +} + +static int bpf_apply_relo_data(struct bpf_elf_ctx *ctx, + struct bpf_elf_sec_data *data_relo, + struct bpf_elf_prog *prog, + struct bpf_relo_props *props) +{ + GElf_Shdr *rhdr = &data_relo->sec_hdr; + int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize; + + for (relo_ent = 0; relo_ent < relo_num; relo_ent++) { + GElf_Rel relo; + GElf_Sym sym; + int ret = -EIO; + + if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo) + return -EIO; + if (gelf_getsym(ctx->sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym) + return -EIO; + + if (sym.st_shndx == ctx->sec_maps) + ret = bpf_apply_relo_map(ctx, prog, &relo, &sym, props); + else if (sym.st_shndx == ctx->sec_text) + ret = bpf_apply_relo_call(ctx, prog, &relo, &sym, props); + else + fprintf(stderr, "ELF contains non-{map,call} related relo data in entry %u pointing to section %u! Compiler bug?!\n", + relo_ent, sym.st_shndx); + if (ret < 0) + return ret; + } + + return 0; +} + +static int bpf_fetch_prog_relo(struct bpf_elf_ctx *ctx, const char *section, + bool *lderr, bool *sseen, struct bpf_elf_prog *prog) +{ + struct bpf_elf_sec_data data_relo, data_insn; + int ret, idx, i, fd = -1; + + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { + struct bpf_relo_props props = {}; + + ret = bpf_fill_section_data(ctx, i, &data_relo); + if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL) + continue; + + idx = data_relo.sec_hdr.sh_info; + + ret = bpf_fill_section_data(ctx, idx, &data_insn); + if (ret < 0 || + !(data_insn.sec_hdr.sh_type == SHT_PROGBITS && + (data_insn.sec_hdr.sh_flags & SHF_EXECINSTR) && + !strcmp(data_insn.sec_name, section))) + continue; + if (sseen) + *sseen = true; + + memset(prog, 0, sizeof(*prog)); + prog->type = ctx->type; + prog->license = ctx->license; + prog->size = data_insn.sec_data->d_size; + prog->insns_num = prog->size / sizeof(struct bpf_insn); + prog->insns = malloc(prog->size); + if (!prog->insns) { + *lderr = true; + return -ENOMEM; + } + + memcpy(prog->insns, data_insn.sec_data->d_buf, prog->size); + + ret = bpf_apply_relo_data(ctx, &data_relo, prog, &props); + if (ret < 0) { + *lderr = true; + if (ctx->sec_text != idx) + free(prog->insns); + return ret; + } + if (ctx->sec_text == idx) { + fd = 0; + goto out; + } + + fd = bpf_prog_attach(section, prog, ctx); + free(prog->insns); + if (fd < 0) { + *lderr = true; + if (props.tc.total) { + if (ctx->cfg.jit_enabled && + props.tc.total != props.tc.jited) + fprintf(stderr, "JIT enabled, but only %u/%u tail call maps in the program have JITed owner!\n", + props.tc.jited, props.tc.total); + if (!ctx->cfg.jit_enabled && + props.tc.jited) + fprintf(stderr, "JIT disabled, but %u/%u tail call maps in the program have JITed owner!\n", + props.tc.jited, props.tc.total); + } + return fd; + } +out: + ctx->sec_done[i] = true; + ctx->sec_done[idx] = true; + break; + } + + return fd; +} + +static int bpf_fetch_prog_sec(struct bpf_elf_ctx *ctx, const char *section) +{ + bool lderr = false, sseen = false; + struct bpf_elf_prog prog; + int ret = -1; + + if (bpf_has_call_data(ctx)) { + ret = bpf_fetch_prog_relo(ctx, ".text", &lderr, NULL, + &ctx->prog_text); + if (ret < 0) + return ret; + } + + if (bpf_has_map_data(ctx) || bpf_has_call_data(ctx)) + ret = bpf_fetch_prog_relo(ctx, section, &lderr, &sseen, &prog); + if (ret < 0 && !lderr) + ret = bpf_fetch_prog(ctx, section, &sseen); + if (ret < 0 && !sseen) + fprintf(stderr, "Program section \'%s\' not found in ELF file!\n", + section); + return ret; +} + +static int bpf_find_map_by_id(struct bpf_elf_ctx *ctx, uint32_t id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) + if (ctx->map_fds[i] && ctx->maps[i].id == id && + ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) + return i; + return -1; +} + +struct bpf_jited_aux { + int prog_fd; + int map_fd; + struct bpf_prog_data prog; + struct bpf_map_ext map; +}; + +static int bpf_derive_prog_from_fdinfo(int fd, struct bpf_prog_data *prog) +{ + char file[PATH_MAX], buff[4096]; + unsigned int val; + FILE *fp; + + snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); + memset(prog, 0, sizeof(*prog)); + + fp = fopen(file, "r"); + if (!fp) { + fprintf(stderr, "No procfs support?!\n"); + return -EIO; + } + + while (fgets(buff, sizeof(buff), fp)) { + if (sscanf(buff, "prog_type:\t%u", &val) == 1) + prog->type = val; + else if (sscanf(buff, "prog_jited:\t%u", &val) == 1) + prog->jited = val; + } + + fclose(fp); + return 0; +} + +static int bpf_tail_call_get_aux(struct bpf_jited_aux *aux) +{ + struct bpf_elf_map tmp; + int ret; + + ret = bpf_derive_elf_map_from_fdinfo(aux->map_fd, &tmp, &aux->map); + if (!ret) + ret = bpf_derive_prog_from_fdinfo(aux->prog_fd, &aux->prog); + + return ret; +} + +static int bpf_fill_prog_arrays(struct bpf_elf_ctx *ctx) +{ + struct bpf_elf_sec_data data; + uint32_t map_id, key_id; + int fd, i, ret, idx; + + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { + if (ctx->sec_done[i]) + continue; + + ret = bpf_fill_section_data(ctx, i, &data); + if (ret < 0) + continue; + + ret = sscanf(data.sec_name, "%i/%i", &map_id, &key_id); + if (ret != 2) + continue; + + idx = bpf_find_map_by_id(ctx, map_id); + if (idx < 0) + continue; + + fd = bpf_fetch_prog_sec(ctx, data.sec_name); + if (fd < 0) + return -EIO; + + ret = bpf_map_update(ctx->map_fds[idx], &key_id, + &fd, BPF_ANY); + if (ret < 0) { + struct bpf_jited_aux aux = {}; + + ret = -errno; + if (errno == E2BIG) { + fprintf(stderr, "Tail call key %u for map %u out of bounds?\n", + key_id, map_id); + return ret; + } + + aux.map_fd = ctx->map_fds[idx]; + aux.prog_fd = fd; + + if (bpf_tail_call_get_aux(&aux)) + return ret; + if (!aux.map.owner.type) + return ret; + + if (aux.prog.type != aux.map.owner.type) + fprintf(stderr, "Tail call map owned by prog type %u, but prog type is %u!\n", + aux.map.owner.type, aux.prog.type); + if (aux.prog.jited != aux.map.owner.jited) + fprintf(stderr, "Tail call map %s jited, but prog %s!\n", + aux.map.owner.jited ? "is" : "not", + aux.prog.jited ? "is" : "not"); + return ret; + } + + ctx->sec_done[i] = true; + } + + return 0; +} + +static void bpf_save_finfo(struct bpf_elf_ctx *ctx) +{ + struct stat st; + int ret; + + memset(&ctx->stat, 0, sizeof(ctx->stat)); + + ret = fstat(ctx->obj_fd, &st); + if (ret < 0) { + fprintf(stderr, "Stat of elf file failed: %s\n", + strerror(errno)); + return; + } + + ctx->stat.st_dev = st.st_dev; + ctx->stat.st_ino = st.st_ino; +} + +static int bpf_read_pin_mapping(FILE *fp, uint32_t *id, char *path) +{ + char buff[PATH_MAX]; + + while (fgets(buff, sizeof(buff), fp)) { + char *ptr = buff; + + while (*ptr == ' ' || *ptr == '\t') + ptr++; + + if (*ptr == '#' || *ptr == '\n' || *ptr == 0) + continue; + + if (sscanf(ptr, "%i %s\n", id, path) != 2 && + sscanf(ptr, "%i %s #", id, path) != 2) { + strcpy(path, ptr); + return -1; + } + + return 1; + } + + return 0; +} + +static bool bpf_pinning_reserved(uint32_t pinning) +{ + switch (pinning) { + case PIN_NONE: + case PIN_OBJECT_NS: + case PIN_GLOBAL_NS: + return true; + default: + return false; + } +} + +static void bpf_hash_init(struct bpf_elf_ctx *ctx, const char *db_file) +{ + struct bpf_hash_entry *entry; + char subpath[PATH_MAX] = {}; + uint32_t pinning; + FILE *fp; + int ret; + + fp = fopen(db_file, "r"); + if (!fp) + return; + + while ((ret = bpf_read_pin_mapping(fp, &pinning, subpath))) { + if (ret == -1) { + fprintf(stderr, "Database %s is corrupted at: %s\n", + db_file, subpath); + fclose(fp); + return; + } + + if (bpf_pinning_reserved(pinning)) { + fprintf(stderr, "Database %s, id %u is reserved - ignoring!\n", + db_file, pinning); + continue; + } + + entry = malloc(sizeof(*entry)); + if (!entry) { + fprintf(stderr, "No memory left for db entry!\n"); + continue; + } + + entry->pinning = pinning; + entry->subpath = strdup(subpath); + if (!entry->subpath) { + fprintf(stderr, "No memory left for db entry!\n"); + free(entry); + continue; + } + + entry->next = ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)]; + ctx->ht[pinning & (ARRAY_SIZE(ctx->ht) - 1)] = entry; + } + + fclose(fp); +} + +static void bpf_hash_destroy(struct bpf_elf_ctx *ctx) +{ + struct bpf_hash_entry *entry; + int i; + + for (i = 0; i < ARRAY_SIZE(ctx->ht); i++) { + while ((entry = ctx->ht[i]) != NULL) { + ctx->ht[i] = entry->next; + free((char *)entry->subpath); + free(entry); + } + } +} + +static int bpf_elf_check_ehdr(const struct bpf_elf_ctx *ctx) +{ + if (ctx->elf_hdr.e_type != ET_REL || + (ctx->elf_hdr.e_machine != EM_NONE && + ctx->elf_hdr.e_machine != EM_BPF) || + ctx->elf_hdr.e_version != EV_CURRENT) { + fprintf(stderr, "ELF format error, ELF file not for eBPF?\n"); + return -EINVAL; + } + + switch (ctx->elf_hdr.e_ident[EI_DATA]) { + default: + fprintf(stderr, "ELF format error, wrong endianness info?\n"); + return -EINVAL; + case ELFDATA2LSB: + if (htons(1) == 1) { + fprintf(stderr, + "We are big endian, eBPF object is little endian!\n"); + return -EIO; + } + break; + case ELFDATA2MSB: + if (htons(1) != 1) { + fprintf(stderr, + "We are little endian, eBPF object is big endian!\n"); + return -EIO; + } + break; + } + + return 0; +} + +static void bpf_get_cfg(struct bpf_elf_ctx *ctx) +{ + static const char *path_jit = "/proc/sys/net/core/bpf_jit_enable"; + int fd; + + fd = open(path_jit, O_RDONLY); + if (fd >= 0) { + char tmp[16] = {}; + + if (read(fd, tmp, sizeof(tmp)) > 0) + ctx->cfg.jit_enabled = atoi(tmp); + close(fd); + } +} + +static int bpf_elf_ctx_init(struct bpf_elf_ctx *ctx, const char *pathname, + enum bpf_prog_type type, __u32 ifindex, + bool verbose) +{ + uint8_t tmp[20]; + int ret; + + if (elf_version(EV_CURRENT) == EV_NONE) + return -EINVAL; + + bpf_init_env(); + + memset(ctx, 0, sizeof(*ctx)); + bpf_get_cfg(ctx); + + ret = bpf_obj_hash(pathname, tmp, sizeof(tmp)); + if (ret) + ctx->noafalg = true; + else + hexstring_n2a(tmp, sizeof(tmp), ctx->obj_uid, + sizeof(ctx->obj_uid)); + + ctx->verbose = verbose; + ctx->type = type; + ctx->ifindex = ifindex; + + ctx->obj_fd = open(pathname, O_RDONLY); + if (ctx->obj_fd < 0) + return ctx->obj_fd; + + ctx->elf_fd = elf_begin(ctx->obj_fd, ELF_C_READ, NULL); + if (!ctx->elf_fd) { + ret = -EINVAL; + goto out_fd; + } + + if (elf_kind(ctx->elf_fd) != ELF_K_ELF) { + ret = -EINVAL; + goto out_fd; + } + + if (gelf_getehdr(ctx->elf_fd, &ctx->elf_hdr) != + &ctx->elf_hdr) { + ret = -EIO; + goto out_elf; + } + + ret = bpf_elf_check_ehdr(ctx); + if (ret < 0) + goto out_elf; + + ctx->sec_done = calloc(ctx->elf_hdr.e_shnum, + sizeof(*(ctx->sec_done))); + if (!ctx->sec_done) { + ret = -ENOMEM; + goto out_elf; + } + + if (ctx->verbose && bpf_log_realloc(ctx)) { + ret = -ENOMEM; + goto out_free; + } + + bpf_save_finfo(ctx); + bpf_hash_init(ctx, CONFDIR "/bpf_pinning"); + + return 0; +out_free: + free(ctx->sec_done); +out_elf: + elf_end(ctx->elf_fd); +out_fd: + close(ctx->obj_fd); + return ret; +} + +static int bpf_maps_count(struct bpf_elf_ctx *ctx) +{ + int i, count = 0; + + for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) { + if (!ctx->map_fds[i]) + break; + count++; + } + + return count; +} + +static void bpf_maps_teardown(struct bpf_elf_ctx *ctx) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ctx->map_fds); i++) { + if (ctx->map_fds[i]) + close(ctx->map_fds[i]); + } + + if (ctx->btf_fd) + close(ctx->btf_fd); + free(ctx->btf.types); +} + +static void bpf_elf_ctx_destroy(struct bpf_elf_ctx *ctx, bool failure) +{ + if (failure) + bpf_maps_teardown(ctx); + + bpf_hash_destroy(ctx); + + free(ctx->prog_text.insns); + free(ctx->sec_done); + free(ctx->log); + + elf_end(ctx->elf_fd); + close(ctx->obj_fd); +} + +static struct bpf_elf_ctx __ctx; + +static int bpf_obj_open(const char *pathname, enum bpf_prog_type type, + const char *section, __u32 ifindex, bool verbose) +{ + struct bpf_elf_ctx *ctx = &__ctx; + int fd = 0, ret; + + ret = bpf_elf_ctx_init(ctx, pathname, type, ifindex, verbose); + if (ret < 0) { + fprintf(stderr, "Cannot initialize ELF context!\n"); + return ret; + } + + ret = bpf_fetch_ancillary(ctx, strcmp(section, ".text")); + if (ret < 0) { + fprintf(stderr, "Error fetching ELF ancillary data!\n"); + goto out; + } + + fd = bpf_fetch_prog_sec(ctx, section); + if (fd < 0) { + fprintf(stderr, "Error fetching program/map!\n"); + ret = fd; + goto out; + } + + ret = bpf_fill_prog_arrays(ctx); + if (ret < 0) + fprintf(stderr, "Error filling program arrays!\n"); +out: + bpf_elf_ctx_destroy(ctx, ret < 0); + if (ret < 0) { + if (fd >= 0) + close(fd); + return ret; + } + + return fd; +} + +static int +bpf_map_set_send(int fd, struct sockaddr_un *addr, unsigned int addr_len, + const struct bpf_map_data *aux, unsigned int entries) +{ + struct bpf_map_set_msg msg = { + .aux.uds_ver = BPF_SCM_AUX_VER, + .aux.num_ent = entries, + }; + int *cmsg_buf, min_fd; + char *amsg_buf; + int i; + + strlcpy(msg.aux.obj_name, aux->obj, sizeof(msg.aux.obj_name)); + memcpy(&msg.aux.obj_st, aux->st, sizeof(msg.aux.obj_st)); + + cmsg_buf = bpf_map_set_init(&msg, addr, addr_len); + amsg_buf = (char *)msg.aux.ent; + + for (i = 0; i < entries; i += min_fd) { + int ret; + + min_fd = min(BPF_SCM_MAX_FDS * 1U, entries - i); + bpf_map_set_init_single(&msg, min_fd); + + memcpy(cmsg_buf, &aux->fds[i], sizeof(aux->fds[0]) * min_fd); + memcpy(amsg_buf, &aux->ent[i], sizeof(aux->ent[0]) * min_fd); + + ret = sendmsg(fd, &msg.hdr, 0); + if (ret <= 0) + return ret ? : -1; + } + + return 0; +} + +static int +bpf_map_set_recv(int fd, int *fds, struct bpf_map_aux *aux, + unsigned int entries) +{ + struct bpf_map_set_msg msg; + int *cmsg_buf, min_fd; + char *amsg_buf, *mmsg_buf; + unsigned int needed = 1; + int i; + + cmsg_buf = bpf_map_set_init(&msg, NULL, 0); + amsg_buf = (char *)msg.aux.ent; + mmsg_buf = (char *)&msg.aux; + + for (i = 0; i < min(entries, needed); i += min_fd) { + struct cmsghdr *cmsg; + int ret; + + min_fd = min(entries, entries - i); + bpf_map_set_init_single(&msg, min_fd); + + ret = recvmsg(fd, &msg.hdr, 0); + if (ret <= 0) + return ret ? : -1; + + cmsg = CMSG_FIRSTHDR(&msg.hdr); + if (!cmsg || cmsg->cmsg_type != SCM_RIGHTS) + return -EINVAL; + if (msg.hdr.msg_flags & MSG_CTRUNC) + return -EIO; + if (msg.aux.uds_ver != BPF_SCM_AUX_VER) + return -ENOSYS; + + min_fd = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof(fd); + if (min_fd > entries || min_fd <= 0) + return -EINVAL; + + memcpy(&fds[i], cmsg_buf, sizeof(fds[0]) * min_fd); + memcpy(&aux->ent[i], amsg_buf, sizeof(aux->ent[0]) * min_fd); + memcpy(aux, mmsg_buf, offsetof(struct bpf_map_aux, ent)); + + needed = aux->num_ent; + } + + return 0; +} + +int bpf_send_map_fds(const char *path, const char *obj) +{ + struct bpf_elf_ctx *ctx = &__ctx; + struct sockaddr_un addr = { .sun_family = AF_UNIX }; + struct bpf_map_data bpf_aux = { + .fds = ctx->map_fds, + .ent = ctx->maps, + .st = &ctx->stat, + .obj = obj, + }; + int fd, ret = -1; + + fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) { + fprintf(stderr, "Cannot open socket: %s\n", + strerror(errno)); + goto out; + } + + strlcpy(addr.sun_path, path, sizeof(addr.sun_path)); + + ret = connect(fd, (struct sockaddr *)&addr, sizeof(addr)); + if (ret < 0) { + fprintf(stderr, "Cannot connect to %s: %s\n", + path, strerror(errno)); + goto out; + } + + ret = bpf_map_set_send(fd, &addr, sizeof(addr), &bpf_aux, + bpf_maps_count(ctx)); + if (ret < 0) + fprintf(stderr, "Cannot send fds to %s: %s\n", + path, strerror(errno)); + + bpf_maps_teardown(ctx); +out: + if (fd >= 0) + close(fd); + return ret; +} + +int bpf_recv_map_fds(const char *path, int *fds, struct bpf_map_aux *aux, + unsigned int entries) +{ + struct sockaddr_un addr = { .sun_family = AF_UNIX }; + int fd, ret = -1; + + fd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (fd < 0) { + fprintf(stderr, "Cannot open socket: %s\n", + strerror(errno)); + goto out; + } + + strlcpy(addr.sun_path, path, sizeof(addr.sun_path)); + + ret = bind(fd, (struct sockaddr *)&addr, sizeof(addr)); + if (ret < 0) { + fprintf(stderr, "Cannot bind to socket: %s\n", + strerror(errno)); + goto out; + } + + ret = bpf_map_set_recv(fd, fds, aux, entries); + if (ret < 0) + fprintf(stderr, "Cannot recv fds from %s: %s\n", + path, strerror(errno)); + + unlink(addr.sun_path); + +out: + if (fd >= 0) + close(fd); + return ret; +} + +#ifdef HAVE_LIBBPF +/* The following functions are wrapper functions for libbpf code to be + * compatible with the legacy format. So all the functions have prefix + * with iproute2_ + */ +int iproute2_bpf_elf_ctx_init(struct bpf_cfg_in *cfg) +{ + struct bpf_elf_ctx *ctx = &__ctx; + + return bpf_elf_ctx_init(ctx, cfg->object, cfg->type, cfg->ifindex, cfg->verbose); +} + +int iproute2_bpf_fetch_ancillary(void) +{ + struct bpf_elf_ctx *ctx = &__ctx; + struct bpf_elf_sec_data data; + int i, ret = 0; + + for (i = 1; i < ctx->elf_hdr.e_shnum; i++) { + ret = bpf_fill_section_data(ctx, i, &data); + if (ret < 0) + continue; + + if (data.sec_hdr.sh_type == SHT_PROGBITS && + !strcmp(data.sec_name, ELF_SECTION_MAPS)) + ret = bpf_fetch_maps_begin(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_SYMTAB && + !strcmp(data.sec_name, ".symtab")) + ret = bpf_fetch_symtab(ctx, i, &data); + else if (data.sec_hdr.sh_type == SHT_STRTAB && + !strcmp(data.sec_name, ".strtab")) + ret = bpf_fetch_strtab(ctx, i, &data); + if (ret < 0) { + fprintf(stderr, "Error parsing section %d! Perhaps check with readelf -a?\n", + i); + return ret; + } + } + + if (bpf_has_map_data(ctx)) { + ret = bpf_fetch_maps_end(ctx); + if (ret < 0) { + fprintf(stderr, "Error fixing up map structure, incompatible struct bpf_elf_map used?\n"); + return ret; + } + } + + return ret; +} + +int iproute2_get_root_path(char *root_path, size_t len) +{ + struct bpf_elf_ctx *ctx = &__ctx; + int ret = 0; + + snprintf(root_path, len, "%s/%s", + bpf_get_work_dir(ctx->type), BPF_DIR_GLOBALS); + + ret = mkdir(root_path, S_IRWXU); + if (ret && errno != EEXIST) { + fprintf(stderr, "mkdir %s failed: %s\n", root_path, strerror(errno)); + return ret; + } + + return 0; +} + +bool iproute2_is_pin_map(const char *libbpf_map_name, char *pathname) +{ + struct bpf_elf_ctx *ctx = &__ctx; + const char *map_name, *tmp; + unsigned int pinning; + int i, ret = 0; + + for (i = 0; i < ctx->map_num; i++) { + if (ctx->maps[i].pinning == PIN_OBJECT_NS && + ctx->noafalg) { + fprintf(stderr, "Missing kernel AF_ALG support for PIN_OBJECT_NS!\n"); + return false; + } + + map_name = bpf_map_fetch_name(ctx, i); + if (!map_name) { + return false; + } + + if (strcmp(libbpf_map_name, map_name)) + continue; + + pinning = ctx->maps[i].pinning; + + if (bpf_no_pinning(ctx, pinning) || !bpf_get_work_dir(ctx->type)) + return false; + + if (pinning == PIN_OBJECT_NS) + ret = bpf_make_obj_path(ctx); + else if ((tmp = bpf_custom_pinning(ctx, pinning))) + ret = bpf_make_custom_path(ctx, tmp); + if (ret < 0) + return false; + + bpf_make_pathname(pathname, PATH_MAX, map_name, ctx, pinning); + + return true; + } + + return false; +} + +bool iproute2_is_map_in_map(const char *libbpf_map_name, struct bpf_elf_map *imap, + struct bpf_elf_map *omap, char *omap_name) +{ + struct bpf_elf_ctx *ctx = &__ctx; + const char *inner_map_name, *outer_map_name; + int i, j; + + for (i = 0; i < ctx->map_num; i++) { + inner_map_name = bpf_map_fetch_name(ctx, i); + if (!inner_map_name) { + return false; + } + + if (strcmp(libbpf_map_name, inner_map_name)) + continue; + + if (!ctx->maps[i].id || + ctx->maps[i].inner_id) + continue; + + *imap = ctx->maps[i]; + + for (j = 0; j < ctx->map_num; j++) { + if (!bpf_is_map_in_map_type(&ctx->maps[j])) + continue; + if (ctx->maps[j].inner_id != ctx->maps[i].id) + continue; + + *omap = ctx->maps[j]; + outer_map_name = bpf_map_fetch_name(ctx, j); + if (!outer_map_name) + return false; + + memcpy(omap_name, outer_map_name, strlen(outer_map_name) + 1); + + return true; + } + } + + return false; +} + +int iproute2_find_map_name_by_id(unsigned int map_id, char *name) +{ + struct bpf_elf_ctx *ctx = &__ctx; + const char *map_name; + int i, idx = -1; + + for (i = 0; i < ctx->map_num; i++) { + if (ctx->maps[i].id == map_id && + ctx->maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) { + idx = i; + break; + } + } + + if (idx < 0) + return -1; + + map_name = bpf_map_fetch_name(ctx, idx); + if (!map_name) + return -1; + + memcpy(name, map_name, strlen(map_name) + 1); + return 0; +} +#endif /* HAVE_LIBBPF */ +#endif /* HAVE_ELF */ diff --git a/lib/bpf_libbpf.c b/lib/bpf_libbpf.c new file mode 100644 index 0000000..e1c211a --- /dev/null +++ b/lib/bpf_libbpf.c @@ -0,0 +1,383 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * bpf_libbpf.c BPF code relay on libbpf + * Authors: Hangbin Liu <haliu@redhat.com> + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <stdbool.h> +#include <stdint.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> + +#include <libelf.h> +#include <gelf.h> + +#include <bpf/libbpf.h> +#include <bpf/bpf.h> + +#include "bpf_util.h" + +static int __attribute__((format(printf, 2, 0))) +verbose_print(enum libbpf_print_level level, const char *format, va_list args) +{ + return vfprintf(stderr, format, args); +} + +static int __attribute__((format(printf, 2, 0))) +silent_print(enum libbpf_print_level level, const char *format, va_list args) +{ + if (level > LIBBPF_WARN) + return 0; + + /* Skip warning from bpf_object__init_user_maps() for legacy maps */ + if (strstr(format, "has unrecognized, non-zero options")) + return 0; + + return vfprintf(stderr, format, args); +} + +static const char *get_bpf_program__section_name(const struct bpf_program *prog) +{ +#ifdef HAVE_LIBBPF_SECTION_NAME + return bpf_program__section_name(prog); +#else + return bpf_program__title(prog, false); +#endif +} + +static int create_map(const char *name, struct bpf_elf_map *map, + __u32 ifindex, int inner_fd) +{ + union bpf_attr attr = {}; + + attr.map_type = map->type; + strlcpy(attr.map_name, name, sizeof(attr.map_name)); + attr.map_flags = map->flags; + attr.key_size = map->size_key; + attr.value_size = map->size_value; + attr.max_entries = map->max_elem; + attr.map_ifindex = ifindex; + attr.inner_map_fd = inner_fd; + + return bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); +} + +static int create_map_in_map(struct bpf_object *obj, struct bpf_map *map, + struct bpf_elf_map *elf_map, int inner_fd, + bool *reuse_pin_map) +{ + char pathname[PATH_MAX]; + const char *map_name; + bool pin_map = false; + int map_fd, ret = 0; + + map_name = bpf_map__name(map); + + if (iproute2_is_pin_map(map_name, pathname)) { + pin_map = true; + + /* Check if there already has a pinned map */ + map_fd = bpf_obj_get(pathname); + if (map_fd > 0) { + if (reuse_pin_map) + *reuse_pin_map = true; + close(map_fd); + return bpf_map__set_pin_path(map, pathname); + } + } + + map_fd = create_map(map_name, elf_map, bpf_map__ifindex(map), inner_fd); + if (map_fd < 0) { + fprintf(stderr, "create map %s failed\n", map_name); + return map_fd; + } + + ret = bpf_map__reuse_fd(map, map_fd); + if (ret < 0) { + fprintf(stderr, "map %s reuse fd failed\n", map_name); + goto err_out; + } + + if (pin_map) { + ret = bpf_map__set_pin_path(map, pathname); + if (ret < 0) + goto err_out; + } + + return 0; +err_out: + close(map_fd); + return ret; +} + +static int +handle_legacy_map_in_map(struct bpf_object *obj, struct bpf_map *inner_map, + const char *inner_map_name) +{ + int inner_fd, outer_fd, inner_idx, ret = 0; + struct bpf_elf_map imap, omap; + struct bpf_map *outer_map; + /* What's the size limit of map name? */ + char outer_map_name[128]; + bool reuse_pin_map = false; + + /* Deal with map-in-map */ + if (iproute2_is_map_in_map(inner_map_name, &imap, &omap, outer_map_name)) { + ret = create_map_in_map(obj, inner_map, &imap, -1, NULL); + if (ret < 0) + return ret; + + inner_fd = bpf_map__fd(inner_map); + outer_map = bpf_object__find_map_by_name(obj, outer_map_name); + ret = create_map_in_map(obj, outer_map, &omap, inner_fd, &reuse_pin_map); + if (ret < 0) + return ret; + + if (!reuse_pin_map) { + inner_idx = imap.inner_idx; + outer_fd = bpf_map__fd(outer_map); + ret = bpf_map_update_elem(outer_fd, &inner_idx, &inner_fd, 0); + if (ret < 0) + fprintf(stderr, "Cannot update inner_idx into outer_map\n"); + } + } + + return ret; +} + +static int find_legacy_tail_calls(struct bpf_program *prog, struct bpf_object *obj, + struct bpf_map **pmap) +{ + unsigned int map_id, key_id; + const char *sec_name; + struct bpf_map *map; + char map_name[128]; + int ret; + + /* Handle iproute2 tail call */ + sec_name = get_bpf_program__section_name(prog); + ret = sscanf(sec_name, "%i/%i", &map_id, &key_id); + if (ret != 2) + return -1; + + ret = iproute2_find_map_name_by_id(map_id, map_name); + if (ret < 0) { + fprintf(stderr, "unable to find map id %u for tail call\n", map_id); + return ret; + } + + map = bpf_object__find_map_by_name(obj, map_name); + if (!map) + return -1; + + if (pmap) + *pmap = map; + + return 0; +} + +static int update_legacy_tail_call_maps(struct bpf_object *obj) +{ + int prog_fd, map_fd, ret = 0; + unsigned int map_id, key_id; + struct bpf_program *prog; + const char *sec_name; + struct bpf_map *map; + + bpf_object__for_each_program(prog, obj) { + /* load_bpf_object has already verified find_legacy_tail_calls + * succeeds when it should + */ + if (find_legacy_tail_calls(prog, obj, &map) < 0) + continue; + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) + continue; + + sec_name = get_bpf_program__section_name(prog); + ret = sscanf(sec_name, "%i/%i", &map_id, &key_id); + if (ret != 2) + continue; + + map_fd = bpf_map__fd(map); + ret = bpf_map_update_elem(map_fd, &key_id, &prog_fd, 0); + if (ret < 0) { + fprintf(stderr, "Cannot update map key for tail call!\n"); + return ret; + } + } + + return 0; +} + +static int handle_legacy_maps(struct bpf_object *obj) +{ + char pathname[PATH_MAX]; + struct bpf_map *map; + const char *map_name; + int map_fd, ret = 0; + + bpf_object__for_each_map(map, obj) { + map_name = bpf_map__name(map); + + ret = handle_legacy_map_in_map(obj, map, map_name); + if (ret) + return ret; + + /* If it is a iproute2 legacy pin maps, just set pin path + * and let bpf_object__load() to deal with the map creation. + * We need to ignore map-in-maps which have pinned maps manually + */ + map_fd = bpf_map__fd(map); + if (map_fd < 0 && iproute2_is_pin_map(map_name, pathname)) { + ret = bpf_map__set_pin_path(map, pathname); + if (ret) { + fprintf(stderr, "map '%s': couldn't set pin path.\n", map_name); + break; + } + } + + } + + return ret; +} + +static bool bpf_map_is_offload_neutral(const struct bpf_map *map) +{ + return bpf_map__type(map) == BPF_MAP_TYPE_PERF_EVENT_ARRAY; +} + +static bool find_prog_to_attach(struct bpf_program *prog, + struct bpf_program *exist_prog, + const char *section, const char *prog_name) +{ + if (exist_prog) + return false; + + /* We have default section name 'prog'. So do not check + * section name if there already has program name. + */ + if (prog_name) + return !strcmp(bpf_program__name(prog), prog_name); + else + return !strcmp(get_bpf_program__section_name(prog), section); +} + +static int load_bpf_object(struct bpf_cfg_in *cfg) +{ + struct bpf_program *p, *prog = NULL; + struct bpf_object *obj; + char root_path[PATH_MAX]; + struct bpf_map *map; + int prog_fd, ret = 0; + + ret = iproute2_get_root_path(root_path, PATH_MAX); + if (ret) + return ret; + + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts, + .relaxed_maps = true, + .pin_root_path = root_path, + ); + + obj = bpf_object__open_file(cfg->object, &open_opts); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return -ENOENT; + } + + bpf_object__for_each_program(p, obj) { + bool prog_to_attach = find_prog_to_attach(p, prog, + cfg->section, + cfg->prog_name); + + /* Only load the programs that will either be subsequently + * attached or inserted into a tail call map */ + if (find_legacy_tail_calls(p, obj, NULL) < 0 && + !prog_to_attach) { + ret = bpf_program__set_autoload(p, false); + if (ret) + return -EINVAL; + continue; + } + + bpf_program__set_type(p, cfg->type); + bpf_program__set_ifindex(p, cfg->ifindex); + + if (prog_to_attach) + prog = p; + } + + bpf_object__for_each_map(map, obj) { + if (!bpf_map_is_offload_neutral(map)) + bpf_map__set_ifindex(map, cfg->ifindex); + } + + if (!prog) { + if (cfg->prog_name) + fprintf(stderr, "object file doesn't contain prog %s\n", cfg->prog_name); + else + fprintf(stderr, "object file doesn't contain sec %s\n", cfg->section); + return -ENOENT; + } + + /* Handle iproute2 legacy pin maps and map-in-maps */ + ret = handle_legacy_maps(obj); + if (ret) + goto unload_obj; + + ret = bpf_object__load(obj); + if (ret) + goto unload_obj; + + ret = update_legacy_tail_call_maps(obj); + if (ret) + goto unload_obj; + + prog_fd = fcntl(bpf_program__fd(prog), F_DUPFD_CLOEXEC, 1); + if (prog_fd < 0) + ret = -errno; + else + cfg->prog_fd = prog_fd; + +unload_obj: + /* Close obj as we don't need it */ + bpf_object__close(obj); + return ret; +} + +/* Load ebpf and return prog fd */ +int iproute2_load_libbpf(struct bpf_cfg_in *cfg) +{ + int ret = 0; + + if (cfg->verbose) + libbpf_set_print(verbose_print); + else + libbpf_set_print(silent_print); + + ret = iproute2_bpf_elf_ctx_init(cfg); + if (ret < 0) { + fprintf(stderr, "Cannot initialize ELF context!\n"); + return ret; + } + + ret = iproute2_bpf_fetch_ancillary(); + if (ret < 0) { + fprintf(stderr, "Error fetching ELF ancillary data!\n"); + return ret; + } + + ret = load_bpf_object(cfg); + if (ret) + return ret; + + return cfg->prog_fd; +} diff --git a/lib/cg_map.c b/lib/cg_map.c new file mode 100644 index 0000000..39f244d --- /dev/null +++ b/lib/cg_map.c @@ -0,0 +1,134 @@ +/* + * cg_map.c cgroup v2 cache + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Dmitry Yakunin <zeil@yandex-team.ru> + */ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <stdbool.h> +#include <linux/types.h> +#include <linux/limits.h> +#include <ftw.h> + +#include "cg_map.h" +#include "list.h" +#include "utils.h" + +struct cg_cache { + struct hlist_node id_hash; + __u64 id; + char path[]; +}; + +#define IDMAP_SIZE 1024 +static struct hlist_head id_head[IDMAP_SIZE]; + +static struct cg_cache *cg_get_by_id(__u64 id) +{ + unsigned int h = id & (IDMAP_SIZE - 1); + struct hlist_node *n; + + hlist_for_each(n, &id_head[h]) { + struct cg_cache *cg; + + cg = container_of(n, struct cg_cache, id_hash); + if (cg->id == id) + return cg; + } + + return NULL; +} + +static struct cg_cache *cg_entry_create(__u64 id, const char *path) +{ + unsigned int h = id & (IDMAP_SIZE - 1); + struct cg_cache *cg; + + cg = malloc(sizeof(*cg) + strlen(path) + 1); + if (!cg) { + fprintf(stderr, + "Failed to allocate memory for cgroup2 cache entry"); + return NULL; + } + cg->id = id; + strcpy(cg->path, path); + + hlist_add_head(&cg->id_hash, &id_head[h]); + + return cg; +} + +static int mntlen; + +static int nftw_fn(const char *fpath, const struct stat *sb, + int typeflag, struct FTW *ftw) +{ + const char *path; + __u64 id; + + if (typeflag != FTW_D) + return 0; + + id = get_cgroup2_id(fpath); + if (!id) + return -1; + + path = fpath + mntlen; + if (*path == '\0') + /* root cgroup */ + path = "/"; + if (!cg_entry_create(id, path)) + return -1; + + return 0; +} + +static void cg_init_map(void) +{ + char *mnt; + + mnt = find_cgroup2_mount(false); + if (!mnt) + return; + + mntlen = strlen(mnt); + (void) nftw(mnt, nftw_fn, 1024, FTW_MOUNT); + + free(mnt); +} + +const char *cg_id_to_path(__u64 id) +{ + static int initialized; + static char buf[64]; + + const struct cg_cache *cg; + char *path; + + if (!initialized) { + cg_init_map(); + initialized = 1; + } + + cg = cg_get_by_id(id); + if (cg) + return cg->path; + + path = get_cgroup2_path(id, false); + if (path) { + cg = cg_entry_create(id, path); + free(path); + if (cg) + return cg->path; + } + + snprintf(buf, sizeof(buf), "unreachable:%llx", id); + return buf; +} diff --git a/lib/color.c b/lib/color.c new file mode 100644 index 0000000..5997684 --- /dev/null +++ b/lib/color.c @@ -0,0 +1,183 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <linux/if.h> + +#include "color.h" +#include "utils.h" + +static void set_color_palette(void); + +enum color { + C_RED, + C_GREEN, + C_YELLOW, + C_BLUE, + C_MAGENTA, + C_CYAN, + C_WHITE, + C_BOLD_RED, + C_BOLD_GREEN, + C_BOLD_YELLOW, + C_BOLD_BLUE, + C_BOLD_MAGENTA, + C_BOLD_CYAN, + C_BOLD_WHITE, + C_CLEAR +}; + +static const char * const color_codes[] = { + "\e[31m", + "\e[32m", + "\e[33m", + "\e[34m", + "\e[35m", + "\e[36m", + "\e[37m", + "\e[1;31m", + "\e[1;32m", + "\e[1;33m", + "\e[1;34m", + "\e[1;35m", + "\e[1;36m", + "\e[1;37m", + "\e[0m", + NULL, +}; + +/* light background */ +static enum color attr_colors_light[] = { + C_CYAN, + C_YELLOW, + C_MAGENTA, + C_BLUE, + C_GREEN, + C_RED, + C_CLEAR, +}; + +/* dark background */ +static enum color attr_colors_dark[] = { + C_BOLD_CYAN, + C_BOLD_YELLOW, + C_BOLD_MAGENTA, + C_BOLD_BLUE, + C_BOLD_GREEN, + C_BOLD_RED, + C_CLEAR +}; + +static int is_dark_bg; +static int color_is_enabled; + +static void enable_color(void) +{ + color_is_enabled = 1; + set_color_palette(); +} + +bool check_enable_color(int color, int json) +{ + if (json || color == COLOR_OPT_NEVER) + return false; + + if (color == COLOR_OPT_ALWAYS || isatty(fileno(stdout))) { + enable_color(); + return true; + } + return false; +} + +bool matches_color(const char *arg, int *val) +{ + char *dup, *p; + + if (!val) + return false; + + dup = strdupa(arg); + p = strchrnul(dup, '='); + if (*p) + *(p++) = '\0'; + + if (matches(dup, "-color")) + return false; + + if (*p == '\0' || !strcmp(p, "always")) + *val = COLOR_OPT_ALWAYS; + else if (!strcmp(p, "auto")) + *val = COLOR_OPT_AUTO; + else if (!strcmp(p, "never")) + *val = COLOR_OPT_NEVER; + else + return false; + return true; +} + +static void set_color_palette(void) +{ + char *p = getenv("COLORFGBG"); + + /* + * COLORFGBG environment variable usually contains either two or three + * values separated by semicolons; we want the last value in either case. + * If this value is 0-6 or 8, background is dark. + */ + if (p && (p = strrchr(p, ';')) != NULL + && ((p[1] >= '0' && p[1] <= '6') || p[1] == '8') + && p[2] == '\0') + is_dark_bg = 1; +} + +__attribute__((format(printf, 3, 4))) +int color_fprintf(FILE *fp, enum color_attr attr, const char *fmt, ...) +{ + int ret = 0; + va_list args; + + va_start(args, fmt); + + if (!color_is_enabled || attr == COLOR_NONE) { + ret = vfprintf(fp, fmt, args); + goto end; + } + + ret += fprintf(fp, "%s", color_codes[is_dark_bg ? + attr_colors_dark[attr] : attr_colors_light[attr]]); + + ret += vfprintf(fp, fmt, args); + ret += fprintf(fp, "%s", color_codes[C_CLEAR]); + +end: + va_end(args); + return ret; +} + +enum color_attr ifa_family_color(__u8 ifa_family) +{ + switch (ifa_family) { + case AF_INET: + return COLOR_INET; + case AF_INET6: + return COLOR_INET6; + default: + return COLOR_NONE; + } +} + +enum color_attr oper_state_color(__u8 state) +{ + switch (state) { + case IF_OPER_UP: + return COLOR_OPERSTATE_UP; + case IF_OPER_DOWN: + return COLOR_OPERSTATE_DOWN; + default: + return COLOR_NONE; + } +} diff --git a/lib/coverity_model.c b/lib/coverity_model.c new file mode 100644 index 0000000..1321fe8 --- /dev/null +++ b/lib/coverity_model.c @@ -0,0 +1,17 @@ +/* + * Coverity Scan model + * + * This is a modeling file for Coverity Scan. Modeling helps to avoid false + * positives. + * + * - A model file can't import any header files. + * - Therefore only some built-in primitives like int, char and void are + * available but not wchar_t, NULL etc. + * - Modeling doesn't need full structs and typedefs. Rudimentary structs + * and similar types are sufficient. + * - An uninitialized local pointer is not an error. It signifies that the + * variable could be either NULL or have some data. + * + * Coverity Scan doesn't pick up modifications automatically. The model file + * must be uploaded by an admin. + */ diff --git a/lib/exec.c b/lib/exec.c new file mode 100644 index 0000000..9b1c8f4 --- /dev/null +++ b/lib/exec.c @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <sys/wait.h> +#include <stdio.h> +#include <errno.h> +#include <unistd.h> + +#include "utils.h" +#include "namespace.h" + +int cmd_exec(const char *cmd, char **argv, bool do_fork, + int (*setup)(void *), void *arg) +{ + fflush(stdout); + if (do_fork) { + int status; + pid_t pid; + + pid = fork(); + if (pid < 0) { + perror("fork"); + exit(1); + } + + if (pid != 0) { + /* Parent */ + if (waitpid(pid, &status, 0) < 0) { + perror("waitpid"); + exit(1); + } + + if (WIFEXITED(status)) { + return WEXITSTATUS(status); + } + + exit(1); + } + } + + if (setup && setup(arg)) + return -1; + + if (execvp(cmd, argv) < 0) + fprintf(stderr, "exec of \"%s\" failed: %s\n", + cmd, strerror(errno)); + _exit(1); +} diff --git a/lib/fs.c b/lib/fs.c new file mode 100644 index 0000000..3752931 --- /dev/null +++ b/lib/fs.c @@ -0,0 +1,369 @@ +/* + * fs.c filesystem APIs + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: David Ahern <dsa@cumulusnetworks.com> + * + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/mount.h> +#include <ctype.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <limits.h> + +#include "utils.h" + +#ifndef HAVE_HANDLE_AT +# include <sys/syscall.h> +#endif + +#define CGROUP2_FS_NAME "cgroup2" + +/* if not already mounted cgroup2 is mounted here for iproute2's use */ +#define MNT_CGRP2_PATH "/var/run/cgroup2" + + +#ifndef HAVE_HANDLE_AT +struct file_handle { + unsigned handle_bytes; + int handle_type; + unsigned char f_handle[]; +}; + +static int name_to_handle_at(int dirfd, const char *pathname, + struct file_handle *handle, int *mount_id, int flags) +{ + return syscall(__NR_name_to_handle_at, dirfd, pathname, handle, + mount_id, flags); +} + +static int open_by_handle_at(int mount_fd, struct file_handle *handle, int flags) +{ + return syscall(__NR_open_by_handle_at, mount_fd, handle, flags); +} +#endif + +/* return mount path of first occurrence of given fstype */ +static char *find_fs_mount(const char *fs_to_find) +{ + char path[4096]; + char fstype[128]; /* max length of any filesystem name */ + char *mnt = NULL; + FILE *fp; + + fp = fopen("/proc/mounts", "r"); + if (!fp) { + fprintf(stderr, + "Failed to open mounts file: %s\n", strerror(errno)); + return NULL; + } + + while (fscanf(fp, "%*s %4095s %127s %*s %*d %*d\n", + path, fstype) == 2) { + if (strcmp(fstype, fs_to_find) == 0) { + mnt = strdup(path); + break; + } + } + + fclose(fp); + + return mnt; +} + +/* caller needs to free string returned */ +char *find_cgroup2_mount(bool do_mount) +{ + char *mnt = find_fs_mount(CGROUP2_FS_NAME); + + if (mnt) + return mnt; + + if (!do_mount) { + fprintf(stderr, "Failed to find cgroup2 mount\n"); + return NULL; + } + + mnt = strdup(MNT_CGRP2_PATH); + if (!mnt) { + fprintf(stderr, "Failed to allocate memory for cgroup2 path\n"); + return NULL; + + } + + if (make_path(mnt, 0755)) { + fprintf(stderr, "Failed to setup cgroup2 directory\n"); + free(mnt); + return NULL; + } + + if (mount("none", mnt, CGROUP2_FS_NAME, 0, NULL)) { + /* EBUSY means already mounted */ + if (errno == EBUSY) + goto out; + + if (errno == ENODEV) { + fprintf(stderr, + "Failed to mount cgroup2. Are CGROUPS enabled in your kernel?\n"); + } else { + fprintf(stderr, + "Failed to mount cgroup2: %s\n", + strerror(errno)); + } + free(mnt); + return NULL; + } +out: + return mnt; +} + +__u64 get_cgroup2_id(const char *path) +{ + char fh_buf[sizeof(struct file_handle) + sizeof(__u64)] = { 0 }; + struct file_handle *fhp = (struct file_handle *)fh_buf; + union { + __u64 id; + unsigned char bytes[sizeof(__u64)]; + } cg_id = { .id = 0 }; + char *mnt = NULL; + int mnt_fd = -1; + int mnt_id; + + if (!path) { + fprintf(stderr, "Invalid cgroup2 path\n"); + return 0; + } + + fhp->handle_bytes = sizeof(__u64); + if (name_to_handle_at(AT_FDCWD, path, fhp, &mnt_id, 0) < 0) { + /* try at cgroup2 mount */ + + while (*path == '/') + path++; + if (*path == '\0') { + fprintf(stderr, "Invalid cgroup2 path\n"); + goto out; + } + + mnt = find_cgroup2_mount(false); + if (!mnt) + goto out; + + mnt_fd = open(mnt, O_RDONLY); + if (mnt_fd < 0) { + fprintf(stderr, "Failed to open cgroup2 mount\n"); + goto out; + } + + fhp->handle_bytes = sizeof(__u64); + if (name_to_handle_at(mnt_fd, path, fhp, &mnt_id, 0) < 0) { + fprintf(stderr, "Failed to get cgroup2 ID: %s\n", + strerror(errno)); + goto out; + } + } + if (fhp->handle_bytes != sizeof(__u64)) { + fprintf(stderr, "Invalid size of cgroup2 ID\n"); + goto out; + } + + memcpy(cg_id.bytes, fhp->f_handle, sizeof(__u64)); + +out: + if (mnt_fd >= 0) + close(mnt_fd); + free(mnt); + + return cg_id.id; +} + +#define FILEID_INO32_GEN 1 + +/* caller needs to free string returned */ +char *get_cgroup2_path(__u64 id, bool full) +{ + char fh_buf[sizeof(struct file_handle) + sizeof(__u64)] = { 0 }; + struct file_handle *fhp = (struct file_handle *)fh_buf; + union { + __u64 id; + unsigned char bytes[sizeof(__u64)]; + } cg_id = { .id = id }; + int mnt_fd = -1, fd = -1; + char link_buf[PATH_MAX]; + char *path = NULL; + char fd_path[64]; + int link_len; + char *mnt = NULL; + + if (!id) { + fprintf(stderr, "Invalid cgroup2 ID\n"); + goto out; + } + + mnt = find_cgroup2_mount(false); + if (!mnt) + goto out; + + mnt_fd = open(mnt, O_RDONLY); + if (mnt_fd < 0) { + fprintf(stderr, "Failed to open cgroup2 mount\n"); + goto out; + } + + fhp->handle_bytes = sizeof(__u64); + fhp->handle_type = FILEID_INO32_GEN; + memcpy(fhp->f_handle, cg_id.bytes, sizeof(__u64)); + + fd = open_by_handle_at(mnt_fd, fhp, 0); + if (fd < 0) { + fprintf(stderr, "Failed to open cgroup2 by ID\n"); + goto out; + } + + snprintf(fd_path, sizeof(fd_path), "/proc/self/fd/%d", fd); + link_len = readlink(fd_path, link_buf, sizeof(link_buf) - 1); + if (link_len < 0) { + fprintf(stderr, + "Failed to read value of symbolic link %s\n", + fd_path); + goto out; + } + link_buf[link_len] = '\0'; + + if (full) + path = strdup(link_buf); + else + path = strdup(link_buf + strlen(mnt)); + if (!path) + fprintf(stderr, + "Failed to allocate memory for cgroup2 path\n"); + +out: + if (fd >= 0) + close(fd); + if (mnt_fd >= 0) + close(mnt_fd); + free(mnt); + + return path; +} + +int make_path(const char *path, mode_t mode) +{ + char *dir, *delim; + int rc = -1; + + delim = dir = strdup(path); + if (dir == NULL) { + fprintf(stderr, "strdup failed copying path"); + return -1; + } + + /* skip '/' -- it had better exist */ + if (*delim == '/') + delim++; + + while (1) { + delim = strchr(delim, '/'); + if (delim) + *delim = '\0'; + + rc = mkdir(dir, mode); + if (rc && errno != EEXIST) { + fprintf(stderr, "mkdir failed for %s: %s\n", + dir, strerror(errno)); + goto out; + } + + if (delim == NULL) + break; + + *delim = '/'; + delim++; + if (*delim == '\0') + break; + } + rc = 0; +out: + free(dir); + + return rc; +} + +int get_command_name(const char *pid, char *comm, size_t len) +{ + char path[PATH_MAX]; + char line[128]; + FILE *fp; + + if (snprintf(path, sizeof(path), + "/proc/%s/status", pid) >= sizeof(path)) { + return -1; + } + + fp = fopen(path, "r"); + if (!fp) + return -1; + + comm[0] = '\0'; + while (fgets(line, sizeof(line), fp)) { + char *nl, *name; + + name = strstr(line, "Name:"); + if (!name) + continue; + + name += 5; + while (isspace(*name)) + name++; + + nl = strchr(name, '\n'); + if (nl) + *nl = '\0'; + + strlcpy(comm, name, len); + break; + } + + fclose(fp); + + return 0; +} + +int get_task_name(pid_t pid, char *name, size_t len) +{ + char path[PATH_MAX]; + FILE *f; + + if (!pid) + return -1; + + if (snprintf(path, sizeof(path), "/proc/%d/comm", pid) >= sizeof(path)) + return -1; + + f = fopen(path, "r"); + if (!f) + return -1; + + if (!fgets(name, len, f)) + return -1; + + /* comm ends in \n, get rid of it */ + name[strcspn(name, "\n")] = '\0'; + + fclose(f); + + return 0; +} diff --git a/lib/inet_proto.c b/lib/inet_proto.c new file mode 100644 index 0000000..41e2e8b --- /dev/null +++ b/lib/inet_proto.c @@ -0,0 +1,69 @@ +/* + * inet_proto.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <netdb.h> +#include <string.h> + +#include "rt_names.h" +#include "utils.h" + +const char *inet_proto_n2a(int proto, char *buf, int len) +{ + static char *ncache; + static int icache = -1; + struct protoent *pe; + + if (proto == icache) + return ncache; + + pe = getprotobynumber(proto); + if (pe && !numeric) { + if (icache != -1) + free(ncache); + icache = proto; + ncache = strdup(pe->p_name); + strlcpy(buf, pe->p_name, len); + return buf; + } + snprintf(buf, len, "ipproto-%d", proto); + return buf; +} + +int inet_proto_a2n(const char *buf) +{ + static char *ncache; + static int icache = -1; + struct protoent *pe; + __u8 ret; + + if (icache != -1 && strcmp(ncache, buf) == 0) + return icache; + + if (!get_u8(&ret, buf, 10)) + return ret; + + pe = getprotobyname(buf); + if (pe) { + if (icache != -1) + free(ncache); + icache = pe->p_proto; + ncache = strdup(pe->p_name); + return pe->p_proto; + } + return -1; +} diff --git a/lib/json_print.c b/lib/json_print.c new file mode 100644 index 0000000..741acdc --- /dev/null +++ b/lib/json_print.c @@ -0,0 +1,361 @@ +/* + * json_print.c "print regular or json output, based on json_writer". + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Julien Fortin, <julien@cumulusnetworks.com> + */ + +#include <stdarg.h> +#include <stdio.h> + +#include "utils.h" +#include "json_print.h" + +static json_writer_t *_jw; + +static void __new_json_obj(int json, bool have_array) +{ + if (json) { + _jw = jsonw_new(stdout); + if (!_jw) { + perror("json object"); + exit(1); + } + if (pretty) + jsonw_pretty(_jw, true); + if (have_array) + jsonw_start_array(_jw); + } +} + +static void __delete_json_obj(bool have_array) +{ + if (_jw) { + if (have_array) + jsonw_end_array(_jw); + jsonw_destroy(&_jw); + } +} + +void new_json_obj(int json) +{ + __new_json_obj(json, true); +} + +void delete_json_obj(void) +{ + __delete_json_obj(true); +} + +void new_json_obj_plain(int json) +{ + __new_json_obj(json, false); +} + +void delete_json_obj_plain(void) +{ + __delete_json_obj(false); +} + +bool is_json_context(void) +{ + return _jw != NULL; +} + +json_writer_t *get_json_writer(void) +{ + return _jw; +} + +void open_json_object(const char *str) +{ + if (_IS_JSON_CONTEXT(PRINT_JSON)) { + if (str) + jsonw_name(_jw, str); + jsonw_start_object(_jw); + } +} + +void close_json_object(void) +{ + if (_IS_JSON_CONTEXT(PRINT_JSON)) + jsonw_end_object(_jw); +} + +/* + * Start json array or string array using + * the provided string as json key (if not null) + * or as array delimiter in non-json context. + */ +void open_json_array(enum output_type type, const char *str) +{ + if (_IS_JSON_CONTEXT(type)) { + if (str) + jsonw_name(_jw, str); + jsonw_start_array(_jw); + } else if (_IS_FP_CONTEXT(type)) { + printf("%s", str); + } +} + +/* + * End json array or string array + */ +void close_json_array(enum output_type type, const char *str) +{ + if (_IS_JSON_CONTEXT(type)) { + jsonw_end_array(_jw); + } else if (_IS_FP_CONTEXT(type)) { + printf("%s", str); + } +} + +/* + * pre-processor directive to generate similar + * functions handling different types + */ +#define _PRINT_FUNC(type_name, type) \ + __attribute__((format(printf, 4, 0))) \ + int print_color_##type_name(enum output_type t, \ + enum color_attr color, \ + const char *key, \ + const char *fmt, \ + type value) \ + { \ + int ret = 0; \ + if (_IS_JSON_CONTEXT(t)) { \ + if (!key) \ + jsonw_##type_name(_jw, value); \ + else \ + jsonw_##type_name##_field(_jw, key, value); \ + } else if (_IS_FP_CONTEXT(t)) { \ + ret = color_fprintf(stdout, color, fmt, value); \ + } \ + return ret; \ + } +_PRINT_FUNC(int, int); +_PRINT_FUNC(s64, int64_t); +_PRINT_FUNC(hhu, unsigned char); +_PRINT_FUNC(hu, unsigned short); +_PRINT_FUNC(uint, unsigned int); +_PRINT_FUNC(u64, uint64_t); +_PRINT_FUNC(luint, unsigned long); +_PRINT_FUNC(lluint, unsigned long long); +_PRINT_FUNC(float, double); +#undef _PRINT_FUNC + +#define _PRINT_NAME_VALUE_FUNC(type_name, type, format_char) \ + void print_##type_name##_name_value(const char *name, type value)\ + { \ + SPRINT_BUF(format); \ + \ + snprintf(format, SPRINT_BSIZE, \ + "%s %%"#format_char, name); \ + print_##type_name(PRINT_ANY, name, format, value); \ + } +_PRINT_NAME_VALUE_FUNC(uint, unsigned int, u); +_PRINT_NAME_VALUE_FUNC(string, const char*, s); +#undef _PRINT_NAME_VALUE_FUNC + +int print_color_string(enum output_type type, + enum color_attr color, + const char *key, + const char *fmt, + const char *value) +{ + int ret = 0; + + if (_IS_JSON_CONTEXT(type)) { + if (key && !value) + jsonw_name(_jw, key); + else if (!key && value) + jsonw_string(_jw, value); + else + jsonw_string_field(_jw, key, value); + } else if (_IS_FP_CONTEXT(type)) { + ret = color_fprintf(stdout, color, fmt, value); + } + + return ret; +} + +/* + * value's type is bool. When using this function in FP context you can't pass + * a value to it, you will need to use "is_json_context()" to have different + * branch for json and regular output. grep -r "print_bool" for example + */ +static int __print_color_bool(enum output_type type, + enum color_attr color, + const char *key, + const char *fmt, + bool value, + const char *str) +{ + int ret = 0; + + if (_IS_JSON_CONTEXT(type)) { + if (key) + jsonw_bool_field(_jw, key, value); + else + jsonw_bool(_jw, value); + } else if (_IS_FP_CONTEXT(type)) { + ret = color_fprintf(stdout, color, fmt, str); + } + + return ret; +} + +int print_color_bool(enum output_type type, + enum color_attr color, + const char *key, + const char *fmt, + bool value) +{ + return __print_color_bool(type, color, key, fmt, value, + value ? "true" : "false"); +} + +int print_color_on_off(enum output_type type, + enum color_attr color, + const char *key, + const char *fmt, + bool value) +{ + return __print_color_bool(type, color, key, fmt, value, + value ? "on" : "off"); +} + +/* + * In JSON context uses hardcode %#x format: 42 -> 0x2a + */ +int print_color_0xhex(enum output_type type, + enum color_attr color, + const char *key, + const char *fmt, + unsigned long long hex) +{ + int ret = 0; + + if (_IS_JSON_CONTEXT(type)) { + SPRINT_BUF(b1); + + snprintf(b1, sizeof(b1), "%#llx", hex); + print_string(PRINT_JSON, key, NULL, b1); + } else if (_IS_FP_CONTEXT(type)) { + ret = color_fprintf(stdout, color, fmt, hex); + } + + return ret; +} + +int print_color_hex(enum output_type type, + enum color_attr color, + const char *key, + const char *fmt, + unsigned int hex) +{ + int ret = 0; + + if (_IS_JSON_CONTEXT(type)) { + SPRINT_BUF(b1); + + snprintf(b1, sizeof(b1), "%x", hex); + if (key) + jsonw_string_field(_jw, key, b1); + else + jsonw_string(_jw, b1); + } else if (_IS_FP_CONTEXT(type)) { + ret = color_fprintf(stdout, color, fmt, hex); + } + + return ret; +} + +/* + * In JSON context we don't use the argument "value" we simply call jsonw_null + * whereas FP context can use "value" to output anything + */ +int print_color_null(enum output_type type, + enum color_attr color, + const char *key, + const char *fmt, + const char *value) +{ + int ret = 0; + + if (_IS_JSON_CONTEXT(type)) { + if (key) + jsonw_null_field(_jw, key); + else + jsonw_null(_jw); + } else if (_IS_FP_CONTEXT(type)) { + ret = color_fprintf(stdout, color, fmt, value); + } + + return ret; +} + +/* + * This function does take printf style argument but applying + * format attribute to causes more warnings since the print_XXX + * functions are used with NULL for format if unused. + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wformat-nonliteral" +int print_color_tv(enum output_type type, + enum color_attr color, + const char *key, + const char *fmt, + const struct timeval *tv) +{ + double usecs = tv->tv_usec; + double secs = tv->tv_sec; + double time = secs + usecs / 1000000; + + return print_color_float(type, color, key, fmt, time); +} +#pragma GCC diagnostic pop + +/* Print line separator (if not in JSON mode) */ +void print_nl(void) +{ + if (!_jw) + printf("%s", _SL_); +} + +int print_color_rate(bool use_iec, enum output_type type, enum color_attr color, + const char *key, const char *fmt, unsigned long long rate) +{ + unsigned long kilo = use_iec ? 1024 : 1000; + const char *str = use_iec ? "i" : ""; + static char *units[5] = {"", "K", "M", "G", "T"}; + char *buf; + int rc; + int i; + + if (_IS_JSON_CONTEXT(type)) + return print_color_lluint(type, color, key, "%llu", rate); + + rate <<= 3; /* bytes/sec -> bits/sec */ + + for (i = 0; i < ARRAY_SIZE(units) - 1; i++) { + if (rate < kilo) + break; + if (((rate % kilo) != 0) && rate < 1000*kilo) + break; + rate /= kilo; + } + + rc = asprintf(&buf, "%.0f%s%sbit", (double)rate, units[i], + i > 0 ? str : ""); + if (rc < 0) + return -1; + + rc = print_color_string(type, color, key, fmt, buf); + free(buf); + return rc; +} diff --git a/lib/json_print_math.c b/lib/json_print_math.c new file mode 100644 index 0000000..f4d5049 --- /dev/null +++ b/lib/json_print_math.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <stdarg.h> +#include <stdio.h> +#include <math.h> + +#include "utils.h" +#include "json_print.h" + +char *sprint_size(__u32 sz, char *buf) +{ + long kilo = 1024; + long mega = kilo * kilo; + size_t len = SPRINT_BSIZE - 1; + double tmp = sz; + + if (sz >= mega && fabs(mega * rint(tmp / mega) - sz) < 1024) + snprintf(buf, len, "%gMb", rint(tmp / mega)); + else if (sz >= kilo && fabs(kilo * rint(tmp / kilo) - sz) < 16) + snprintf(buf, len, "%gKb", rint(tmp / kilo)); + else + snprintf(buf, len, "%ub", sz); + + return buf; +} + +int print_color_size(enum output_type type, enum color_attr color, + const char *key, const char *fmt, __u32 sz) +{ + SPRINT_BUF(buf); + + if (_IS_JSON_CONTEXT(type)) + return print_color_uint(type, color, key, "%u", sz); + + sprint_size(sz, buf); + return print_color_string(type, color, key, fmt, buf); +} diff --git a/lib/json_writer.c b/lib/json_writer.c new file mode 100644 index 0000000..2f3936c --- /dev/null +++ b/lib/json_writer.c @@ -0,0 +1,386 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ +/* + * Simple streaming JSON writer + * + * This takes care of the annoying bits of JSON syntax like the commas + * after elements + * + * Authors: Stephen Hemminger <stephen@networkplumber.org> + */ + +#include <stdio.h> +#include <stdbool.h> +#include <stdarg.h> +#include <assert.h> +#include <malloc.h> +#include <inttypes.h> +#include <stdint.h> + +#include "json_writer.h" + +struct json_writer { + FILE *out; /* output file */ + unsigned depth; /* nesting */ + bool pretty; /* optional whitepace */ + char sep; /* either nul or comma */ +}; + +/* indentation for pretty print */ +static void jsonw_indent(json_writer_t *self) +{ + unsigned i; + for (i = 0; i < self->depth; ++i) + fputs(" ", self->out); +} + +/* end current line and indent if pretty printing */ +static void jsonw_eol(json_writer_t *self) +{ + if (!self->pretty) + return; + + putc('\n', self->out); + jsonw_indent(self); +} + +/* If current object is not empty print a comma */ +static void jsonw_eor(json_writer_t *self) +{ + if (self->sep != '\0') + putc(self->sep, self->out); + self->sep = ','; +} + + +/* Output JSON encoded string */ +/* Handles C escapes, does not do Unicode */ +static void jsonw_puts(json_writer_t *self, const char *str) +{ + putc('"', self->out); + for (; *str; ++str) + switch (*str) { + case '\t': + fputs("\\t", self->out); + break; + case '\n': + fputs("\\n", self->out); + break; + case '\r': + fputs("\\r", self->out); + break; + case '\f': + fputs("\\f", self->out); + break; + case '\b': + fputs("\\b", self->out); + break; + case '\\': + fputs("\\\\", self->out); + break; + case '"': + fputs("\\\"", self->out); + break; + default: + putc(*str, self->out); + } + putc('"', self->out); +} + +/* Create a new JSON stream */ +json_writer_t *jsonw_new(FILE *f) +{ + json_writer_t *self = malloc(sizeof(*self)); + if (self) { + self->out = f; + self->depth = 0; + self->pretty = false; + self->sep = '\0'; + } + return self; +} + +/* End output to JSON stream */ +void jsonw_destroy(json_writer_t **self_p) +{ + json_writer_t *self = *self_p; + + assert(self->depth == 0); + fputs("\n", self->out); + fflush(self->out); + free(self); + *self_p = NULL; +} + +void jsonw_pretty(json_writer_t *self, bool on) +{ + self->pretty = on; +} + +/* Basic blocks */ +static void jsonw_begin(json_writer_t *self, int c) +{ + jsonw_eor(self); + putc(c, self->out); + ++self->depth; + self->sep = '\0'; +} + +static void jsonw_end(json_writer_t *self, int c) +{ + assert(self->depth > 0); + + --self->depth; + if (self->sep != '\0') + jsonw_eol(self); + putc(c, self->out); + self->sep = ','; +} + + +/* Add a JSON property name */ +void jsonw_name(json_writer_t *self, const char *name) +{ + jsonw_eor(self); + jsonw_eol(self); + self->sep = '\0'; + jsonw_puts(self, name); + putc(':', self->out); + if (self->pretty) + putc(' ', self->out); +} + +__attribute__((format(printf, 2, 3))) +void jsonw_printf(json_writer_t *self, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + jsonw_eor(self); + vfprintf(self->out, fmt, ap); + va_end(ap); +} + +/* Collections */ +void jsonw_start_object(json_writer_t *self) +{ + jsonw_begin(self, '{'); +} + +void jsonw_end_object(json_writer_t *self) +{ + jsonw_end(self, '}'); +} + +void jsonw_start_array(json_writer_t *self) +{ + jsonw_begin(self, '['); + if (self->pretty) + putc(' ', self->out); +} + +void jsonw_end_array(json_writer_t *self) +{ + if (self->pretty && self->sep) + putc(' ', self->out); + self->sep = '\0'; + jsonw_end(self, ']'); +} + +/* JSON value types */ +void jsonw_string(json_writer_t *self, const char *value) +{ + jsonw_eor(self); + jsonw_puts(self, value); +} + +void jsonw_bool(json_writer_t *self, bool val) +{ + jsonw_printf(self, "%s", val ? "true" : "false"); +} + +void jsonw_null(json_writer_t *self) +{ + jsonw_printf(self, "null"); +} + +void jsonw_float(json_writer_t *self, double num) +{ + jsonw_printf(self, "%g", num); +} + +void jsonw_hhu(json_writer_t *self, unsigned char num) +{ + jsonw_printf(self, "%hhu", num); +} + +void jsonw_hu(json_writer_t *self, unsigned short num) +{ + jsonw_printf(self, "%hu", num); +} + +void jsonw_uint(json_writer_t *self, unsigned int num) +{ + jsonw_printf(self, "%u", num); +} + +void jsonw_u64(json_writer_t *self, uint64_t num) +{ + jsonw_printf(self, "%"PRIu64, num); +} + +void jsonw_xint(json_writer_t *self, uint64_t num) +{ + jsonw_printf(self, "%"PRIx64, num); +} + +void jsonw_luint(json_writer_t *self, unsigned long num) +{ + jsonw_printf(self, "%lu", num); +} + +void jsonw_lluint(json_writer_t *self, unsigned long long num) +{ + jsonw_printf(self, "%llu", num); +} + +void jsonw_int(json_writer_t *self, int num) +{ + jsonw_printf(self, "%d", num); +} + +void jsonw_s64(json_writer_t *self, int64_t num) +{ + jsonw_printf(self, "%"PRId64, num); +} + +/* Basic name/value objects */ +void jsonw_string_field(json_writer_t *self, const char *prop, const char *val) +{ + jsonw_name(self, prop); + jsonw_string(self, val); +} + +void jsonw_bool_field(json_writer_t *self, const char *prop, bool val) +{ + jsonw_name(self, prop); + jsonw_bool(self, val); +} + +void jsonw_float_field(json_writer_t *self, const char *prop, double val) +{ + jsonw_name(self, prop); + jsonw_float(self, val); +} + +void jsonw_uint_field(json_writer_t *self, const char *prop, unsigned int num) +{ + jsonw_name(self, prop); + jsonw_uint(self, num); +} + +void jsonw_u64_field(json_writer_t *self, const char *prop, uint64_t num) +{ + jsonw_name(self, prop); + jsonw_u64(self, num); +} + +void jsonw_xint_field(json_writer_t *self, const char *prop, uint64_t num) +{ + jsonw_name(self, prop); + jsonw_xint(self, num); +} + +void jsonw_hhu_field(json_writer_t *self, const char *prop, unsigned char num) +{ + jsonw_name(self, prop); + jsonw_hhu(self, num); +} + +void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num) +{ + jsonw_name(self, prop); + jsonw_hu(self, num); +} + +void jsonw_luint_field(json_writer_t *self, + const char *prop, + unsigned long num) +{ + jsonw_name(self, prop); + jsonw_luint(self, num); +} + +void jsonw_lluint_field(json_writer_t *self, + const char *prop, + unsigned long long num) +{ + jsonw_name(self, prop); + jsonw_lluint(self, num); +} + +void jsonw_int_field(json_writer_t *self, const char *prop, int num) +{ + jsonw_name(self, prop); + jsonw_int(self, num); +} + +void jsonw_s64_field(json_writer_t *self, const char *prop, int64_t num) +{ + jsonw_name(self, prop); + jsonw_s64(self, num); +} + +void jsonw_null_field(json_writer_t *self, const char *prop) +{ + jsonw_name(self, prop); + jsonw_null(self); +} + +#ifdef TEST +int main(int argc, char **argv) +{ + json_writer_t *wr = jsonw_new(stdout); + + jsonw_start_object(wr); + jsonw_pretty(wr, true); + jsonw_name(wr, "Vyatta"); + jsonw_start_object(wr); + jsonw_string_field(wr, "url", "http://vyatta.com"); + jsonw_uint_field(wr, "downloads", 2000000ul); + jsonw_float_field(wr, "stock", 8.16); + + jsonw_name(wr, "ARGV"); + jsonw_start_array(wr); + while (--argc) + jsonw_string(wr, *++argv); + jsonw_end_array(wr); + + jsonw_name(wr, "empty"); + jsonw_start_array(wr); + jsonw_end_array(wr); + + jsonw_name(wr, "NIL"); + jsonw_start_object(wr); + jsonw_end_object(wr); + + jsonw_null_field(wr, "my_null"); + + jsonw_name(wr, "special chars"); + jsonw_start_array(wr); + jsonw_string_field(wr, "slash", "/"); + jsonw_string_field(wr, "newline", "\n"); + jsonw_string_field(wr, "tab", "\t"); + jsonw_string_field(wr, "ff", "\f"); + jsonw_string_field(wr, "quote", "\""); + jsonw_string_field(wr, "tick", "\'"); + jsonw_string_field(wr, "backslash", "\\"); + jsonw_end_array(wr); + + jsonw_end_object(wr); + + jsonw_end_object(wr); + jsonw_destroy(&wr); + return 0; +} + +#endif diff --git a/lib/libgenl.c b/lib/libgenl.c new file mode 100644 index 0000000..fca07f9 --- /dev/null +++ b/lib/libgenl.c @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * libgenl.c GENL library + */ + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <linux/genetlink.h> +#include "libgenl.h" + +static int genl_parse_getfamily(struct nlmsghdr *nlh) +{ + struct rtattr *tb[CTRL_ATTR_MAX + 1]; + struct genlmsghdr *ghdr = NLMSG_DATA(nlh); + int len = nlh->nlmsg_len; + struct rtattr *attrs; + + if (nlh->nlmsg_type != GENL_ID_CTRL) { + fprintf(stderr, "Not a controller message, nlmsg_len=%d " + "nlmsg_type=0x%x\n", nlh->nlmsg_len, nlh->nlmsg_type); + return -1; + } + + len -= NLMSG_LENGTH(GENL_HDRLEN); + + if (len < 0) { + fprintf(stderr, "wrong controller message len %d\n", len); + return -1; + } + + if (ghdr->cmd != CTRL_CMD_NEWFAMILY) { + fprintf(stderr, "Unknown controller command %d\n", ghdr->cmd); + return -1; + } + + attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN); + parse_rtattr(tb, CTRL_ATTR_MAX, attrs, len); + + if (tb[CTRL_ATTR_FAMILY_ID] == NULL) { + fprintf(stderr, "Missing family id TLV\n"); + return -1; + } + + return rta_getattr_u16(tb[CTRL_ATTR_FAMILY_ID]); +} + +int genl_resolve_family(struct rtnl_handle *grth, const char *family) +{ + GENL_REQUEST(req, 1024, GENL_ID_CTRL, 0, 0, CTRL_CMD_GETFAMILY, + NLM_F_REQUEST); + struct nlmsghdr *answer; + int fnum; + + addattr_l(&req.n, sizeof(req), CTRL_ATTR_FAMILY_NAME, + family, strlen(family) + 1); + + if (rtnl_talk(grth, &req.n, &answer) < 0) { + fprintf(stderr, "Error talking to the kernel\n"); + return -2; + } + + fnum = genl_parse_getfamily(answer); + free(answer); + + return fnum; +} + +static int genl_parse_grps(struct rtattr *attr, const char *name, unsigned int *id) +{ + const struct rtattr *pos; + + rtattr_for_each_nested(pos, attr) { + struct rtattr *tb[CTRL_ATTR_MCAST_GRP_MAX + 1]; + + parse_rtattr_nested(tb, CTRL_ATTR_MCAST_GRP_MAX, pos); + + if (tb[CTRL_ATTR_MCAST_GRP_NAME] && tb[CTRL_ATTR_MCAST_GRP_ID]) { + if (strcmp(name, rta_getattr_str(tb[CTRL_ATTR_MCAST_GRP_NAME])) == 0) { + *id = rta_getattr_u32(tb[CTRL_ATTR_MCAST_GRP_ID]); + return 0; + } + } + } + + errno = ENOENT; + return -1; +} + +int genl_add_mcast_grp(struct rtnl_handle *grth, __u16 fnum, const char *group) +{ + GENL_REQUEST(req, 1024, GENL_ID_CTRL, 0, 0, CTRL_CMD_GETFAMILY, + NLM_F_REQUEST); + struct rtattr *tb[CTRL_ATTR_MAX + 1]; + struct nlmsghdr *answer = NULL; + struct genlmsghdr *ghdr; + struct rtattr *attrs; + int len, ret = -1; + unsigned int id; + + addattr16(&req.n, sizeof(req), CTRL_ATTR_FAMILY_ID, fnum); + + if (rtnl_talk(grth, &req.n, &answer) < 0) { + fprintf(stderr, "Error talking to the kernel\n"); + return -2; + } + + ghdr = NLMSG_DATA(answer); + len = answer->nlmsg_len; + + if (answer->nlmsg_type != GENL_ID_CTRL) { + errno = EINVAL; + goto err_free; + } + + len -= NLMSG_LENGTH(GENL_HDRLEN); + if (len < 0) { + errno = EINVAL; + goto err_free; + } + + attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN); + parse_rtattr(tb, CTRL_ATTR_MAX, attrs, len); + + if (tb[CTRL_ATTR_MCAST_GROUPS] == NULL) { + errno = ENOENT; + fprintf(stderr, "Missing mcast groups TLV\n"); + goto err_free; + } + + if (genl_parse_grps(tb[CTRL_ATTR_MCAST_GROUPS], group, &id) < 0) + goto err_free; + + ret = rtnl_add_nl_group(grth, id); + +err_free: + free(answer); + return ret; +} + +int genl_init_handle(struct rtnl_handle *grth, const char *family, + int *genl_family) +{ + if (*genl_family >= 0) + return 0; + + if (rtnl_open_byproto(grth, 0, NETLINK_GENERIC) < 0) { + fprintf(stderr, "Cannot open generic netlink socket\n"); + return -1; + } + + *genl_family = genl_resolve_family(grth, family); + if (*genl_family < 0) + return -1; + + return 0; +} diff --git a/lib/libnetlink.c b/lib/libnetlink.c new file mode 100644 index 0000000..001efc1 --- /dev/null +++ b/lib/libnetlink.c @@ -0,0 +1,1657 @@ +/* + * libnetlink.c RTnetlink service routines. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <unistd.h> +#include <fcntl.h> +#include <net/if_arp.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <string.h> +#include <errno.h> +#include <time.h> +#include <sys/uio.h> +#include <linux/fib_rules.h> +#include <linux/if_addrlabel.h> +#include <linux/if_bridge.h> +#include <linux/nexthop.h> + +#include "libnetlink.h" +#include "utils.h" + +#ifndef __aligned +#define __aligned(x) __attribute__((aligned(x))) +#endif + +#ifndef SOL_NETLINK +#define SOL_NETLINK 270 +#endif + +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +int rcvbuf = 1024 * 1024; + +#ifdef HAVE_LIBMNL +#include <libmnl/libmnl.h> + +static const enum mnl_attr_data_type extack_policy[NLMSGERR_ATTR_MAX + 1] = { + [NLMSGERR_ATTR_MSG] = MNL_TYPE_NUL_STRING, + [NLMSGERR_ATTR_OFFS] = MNL_TYPE_U32, +}; + +static int err_attr_cb(const struct nlattr *attr, void *data) +{ + const struct nlattr **tb = data; + uint16_t type; + + if (mnl_attr_type_valid(attr, NLMSGERR_ATTR_MAX) < 0) { + fprintf(stderr, "Invalid extack attribute\n"); + return MNL_CB_ERROR; + } + + type = mnl_attr_get_type(attr); + if (mnl_attr_validate(attr, extack_policy[type]) < 0) { + fprintf(stderr, "extack attribute %d failed validation\n", + type); + return MNL_CB_ERROR; + } + + tb[type] = attr; + return MNL_CB_OK; +} + +static void print_ext_ack_msg(bool is_err, const char *msg) +{ + fprintf(stderr, "%s: %s", is_err ? "Error" : "Warning", msg); + if (msg[strlen(msg) - 1] != '.') + fprintf(stderr, "."); + fprintf(stderr, "\n"); +} + +/* dump netlink extended ack error message */ +int nl_dump_ext_ack(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn) +{ + struct nlattr *tb[NLMSGERR_ATTR_MAX + 1] = {}; + const struct nlmsgerr *err = mnl_nlmsg_get_payload(nlh); + const struct nlmsghdr *err_nlh = NULL; + unsigned int hlen = sizeof(*err); + const char *msg = NULL; + uint32_t off = 0; + + /* no TLVs, nothing to do here */ + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + return 0; + + /* if NLM_F_CAPPED is set then the inner err msg was capped */ + if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) + hlen += mnl_nlmsg_get_payload_len(&err->msg); + + if (mnl_attr_parse(nlh, hlen, err_attr_cb, tb) != MNL_CB_OK) + return 0; + + if (tb[NLMSGERR_ATTR_MSG]) + msg = mnl_attr_get_str(tb[NLMSGERR_ATTR_MSG]); + + if (tb[NLMSGERR_ATTR_OFFS]) { + off = mnl_attr_get_u32(tb[NLMSGERR_ATTR_OFFS]); + + if (off > nlh->nlmsg_len) { + fprintf(stderr, + "Invalid offset for NLMSGERR_ATTR_OFFS\n"); + off = 0; + } else if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) + err_nlh = &err->msg; + } + + if (errfn) + return errfn(msg, off, err_nlh); + + if (msg && *msg != '\0') { + bool is_err = !!err->error; + + print_ext_ack_msg(is_err, msg); + return is_err ? 1 : 0; + } + + return 0; +} + +int nl_dump_ext_ack_done(const struct nlmsghdr *nlh, unsigned int offset, int error) +{ + struct nlattr *tb[NLMSGERR_ATTR_MAX + 1] = {}; + const char *msg = NULL; + + if (mnl_attr_parse(nlh, offset, err_attr_cb, tb) != MNL_CB_OK) + return 0; + + if (tb[NLMSGERR_ATTR_MSG]) + msg = mnl_attr_get_str(tb[NLMSGERR_ATTR_MSG]); + + if (msg && *msg != '\0') { + bool is_err = !!error; + + print_ext_ack_msg(is_err, msg); + return is_err ? 1 : 0; + } + + return 0; +} +#else +#warning "libmnl required for error support" + +/* No extended error ack without libmnl */ +int nl_dump_ext_ack(const struct nlmsghdr *nlh, nl_ext_ack_fn_t errfn) +{ + return 0; +} + +int nl_dump_ext_ack_done(const struct nlmsghdr *nlh, unsigned int offset, int error) +{ + return 0; +} +#endif + +/* Older kernels may not support strict dump and filtering */ +void rtnl_set_strict_dump(struct rtnl_handle *rth) +{ + int one = 1; + + if (setsockopt(rth->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, + &one, sizeof(one)) < 0) + return; + + rth->flags |= RTNL_HANDLE_F_STRICT_CHK; +} + +int rtnl_add_nl_group(struct rtnl_handle *rth, unsigned int group) +{ + return setsockopt(rth->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, + &group, sizeof(group)); +} + +void rtnl_close(struct rtnl_handle *rth) +{ + if (rth->fd >= 0) { + close(rth->fd); + rth->fd = -1; + } +} + +int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions, + int protocol) +{ + socklen_t addr_len; + int sndbuf = 32768; + int one = 1; + + memset(rth, 0, sizeof(*rth)); + + rth->proto = protocol; + rth->fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol); + if (rth->fd < 0) { + perror("Cannot open netlink socket"); + return -1; + } + + if (setsockopt(rth->fd, SOL_SOCKET, SO_SNDBUF, + &sndbuf, sizeof(sndbuf)) < 0) { + perror("SO_SNDBUF"); + goto err; + } + + if (setsockopt(rth->fd, SOL_SOCKET, SO_RCVBUF, + &rcvbuf, sizeof(rcvbuf)) < 0) { + perror("SO_RCVBUF"); + goto err; + } + + /* Older kernels may no support extended ACK reporting */ + setsockopt(rth->fd, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)); + + memset(&rth->local, 0, sizeof(rth->local)); + rth->local.nl_family = AF_NETLINK; + rth->local.nl_groups = subscriptions; + + if (bind(rth->fd, (struct sockaddr *)&rth->local, + sizeof(rth->local)) < 0) { + perror("Cannot bind netlink socket"); + goto err; + } + addr_len = sizeof(rth->local); + if (getsockname(rth->fd, (struct sockaddr *)&rth->local, + &addr_len) < 0) { + perror("Cannot getsockname"); + goto err; + } + if (addr_len != sizeof(rth->local)) { + fprintf(stderr, "Wrong address length %d\n", addr_len); + goto err; + } + if (rth->local.nl_family != AF_NETLINK) { + fprintf(stderr, "Wrong address family %d\n", + rth->local.nl_family); + goto err; + } + rth->seq = time(NULL); + return 0; +err: + rtnl_close(rth); + return -1; +} + +int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions) +{ + return rtnl_open_byproto(rth, subscriptions, NETLINK_ROUTE); +} + +int rtnl_nexthopdump_req(struct rtnl_handle *rth, int family, + req_filter_fn_t filter_fn) +{ + struct { + struct nlmsghdr nlh; + struct nhmsg nhm; + char buf[128]; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)), + .nlh.nlmsg_type = RTM_GETNEXTHOP, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .nhm.nh_family = family, + }; + + if (filter_fn) { + int err; + + err = filter_fn(&req.nlh, sizeof(req)); + if (err) + return err; + } + + return send(rth->fd, &req, sizeof(req), 0); +} + +int rtnl_nexthop_bucket_dump_req(struct rtnl_handle *rth, int family, + req_filter_fn_t filter_fn) +{ + struct { + struct nlmsghdr nlh; + struct nhmsg nhm; + char buf[128]; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)), + .nlh.nlmsg_type = RTM_GETNEXTHOPBUCKET, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .nhm.nh_family = family, + }; + + if (filter_fn) { + int err; + + err = filter_fn(&req.nlh, sizeof(req)); + if (err) + return err; + } + + return send(rth->fd, &req, sizeof(req), 0); +} + +int rtnl_addrdump_req(struct rtnl_handle *rth, int family, + req_filter_fn_t filter_fn) +{ + struct { + struct nlmsghdr nlh; + struct ifaddrmsg ifm; + char buf[128]; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), + .nlh.nlmsg_type = RTM_GETADDR, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .ifm.ifa_family = family, + }; + + if (filter_fn) { + int err; + + err = filter_fn(&req.nlh, sizeof(req)); + if (err) + return err; + } + + return send(rth->fd, &req, sizeof(req), 0); +} + +int rtnl_addrlbldump_req(struct rtnl_handle *rth, int family) +{ + struct { + struct nlmsghdr nlh; + struct ifaddrlblmsg ifal; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrlblmsg)), + .nlh.nlmsg_type = RTM_GETADDRLABEL, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .ifal.ifal_family = family, + }; + + return send(rth->fd, &req, sizeof(req), 0); +} + +int rtnl_routedump_req(struct rtnl_handle *rth, int family, + req_filter_fn_t filter_fn) +{ + struct { + struct nlmsghdr nlh; + struct rtmsg rtm; + char buf[128]; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)), + .nlh.nlmsg_type = RTM_GETROUTE, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .rtm.rtm_family = family, + }; + + if (filter_fn) { + int err; + + err = filter_fn(&req.nlh, sizeof(req)); + if (err) + return err; + } + + return send(rth->fd, &req, sizeof(req), 0); +} + +int rtnl_ruledump_req(struct rtnl_handle *rth, int family) +{ + struct { + struct nlmsghdr nlh; + struct fib_rule_hdr frh; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), + .nlh.nlmsg_type = RTM_GETRULE, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .frh.family = family + }; + + return send(rth->fd, &req, sizeof(req), 0); +} + +int rtnl_neighdump_req(struct rtnl_handle *rth, int family, + req_filter_fn_t filter_fn) +{ + struct { + struct nlmsghdr nlh; + struct ndmsg ndm; + char buf[256]; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)), + .nlh.nlmsg_type = RTM_GETNEIGH, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .ndm.ndm_family = family, + }; + + if (filter_fn) { + int err; + + err = filter_fn(&req.nlh, sizeof(req)); + if (err) + return err; + } + + return send(rth->fd, &req, sizeof(req), 0); +} + +int rtnl_neightbldump_req(struct rtnl_handle *rth, int family) +{ + struct { + struct nlmsghdr nlh; + struct ndtmsg ndtmsg; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndtmsg)), + .nlh.nlmsg_type = RTM_GETNEIGHTBL, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .ndtmsg.ndtm_family = family, + }; + + return send(rth->fd, &req, sizeof(req), 0); +} + +int rtnl_mdbdump_req(struct rtnl_handle *rth, int family) +{ + struct { + struct nlmsghdr nlh; + struct br_port_msg bpm; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct br_port_msg)), + .nlh.nlmsg_type = RTM_GETMDB, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .bpm.family = family, + }; + + return send(rth->fd, &req, sizeof(req), 0); +} + +int rtnl_brvlandump_req(struct rtnl_handle *rth, int family, __u32 dump_flags) +{ + struct { + struct nlmsghdr nlh; + struct br_vlan_msg bvm; + char buf[256]; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct br_vlan_msg)), + .nlh.nlmsg_type = RTM_GETVLAN, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .bvm.family = family, + }; + + addattr32(&req.nlh, sizeof(req), BRIDGE_VLANDB_DUMP_FLAGS, dump_flags); + + return send(rth->fd, &req, sizeof(req), 0); +} + +int rtnl_netconfdump_req(struct rtnl_handle *rth, int family) +{ + struct { + struct nlmsghdr nlh; + struct netconfmsg ncm; + char buf[0] __aligned(NLMSG_ALIGNTO); + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct netconfmsg))), + .nlh.nlmsg_type = RTM_GETNETCONF, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .ncm.ncm_family = family, + }; + + return send(rth->fd, &req, sizeof(req), 0); +} + +int rtnl_nsiddump_req_filter_fn(struct rtnl_handle *rth, int family, + req_filter_fn_t filter_fn) +{ + struct { + struct nlmsghdr nlh; + struct rtgenmsg rtm; + char buf[1024]; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct rtgenmsg))), + .nlh.nlmsg_type = RTM_GETNSID, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .rtm.rtgen_family = family, + }; + int err; + + if (!filter_fn) + return -EINVAL; + + err = filter_fn(&req.nlh, sizeof(req)); + if (err) + return err; + + return send(rth->fd, &req, req.nlh.nlmsg_len, 0); +} + +static int __rtnl_linkdump_req(struct rtnl_handle *rth, int family) +{ + struct { + struct nlmsghdr nlh; + struct ifinfomsg ifm; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlh.nlmsg_type = RTM_GETLINK, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .ifm.ifi_family = family, + }; + + return send(rth->fd, &req, sizeof(req), 0); +} + +int rtnl_linkdump_req(struct rtnl_handle *rth, int family) +{ + if (family == AF_UNSPEC) + return rtnl_linkdump_req_filter(rth, family, RTEXT_FILTER_VF); + + return __rtnl_linkdump_req(rth, family); +} + +int rtnl_linkdump_req_filter(struct rtnl_handle *rth, int family, + __u32 filt_mask) +{ + if (family == AF_UNSPEC || family == AF_BRIDGE) { + struct { + struct nlmsghdr nlh; + struct ifinfomsg ifm; + /* attribute has to be NLMSG aligned */ + struct rtattr ext_req __aligned(NLMSG_ALIGNTO); + __u32 ext_filter_mask; + } req = { + .nlh.nlmsg_len = sizeof(req), + .nlh.nlmsg_type = RTM_GETLINK, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .ifm.ifi_family = family, + .ext_req.rta_type = IFLA_EXT_MASK, + .ext_req.rta_len = RTA_LENGTH(sizeof(__u32)), + .ext_filter_mask = filt_mask, + }; + + return send(rth->fd, &req, sizeof(req), 0); + } + + return __rtnl_linkdump_req(rth, family); +} + +int rtnl_linkdump_req_filter_fn(struct rtnl_handle *rth, int family, + req_filter_fn_t filter_fn) +{ + if (family == AF_UNSPEC || family == AF_PACKET) { + struct { + struct nlmsghdr nlh; + struct ifinfomsg ifm; + char buf[1024]; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlh.nlmsg_type = RTM_GETLINK, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .ifm.ifi_family = family, + }; + int err; + + if (!filter_fn) + return -EINVAL; + + err = filter_fn(&req.nlh, sizeof(req)); + if (err) + return err; + + return send(rth->fd, &req, req.nlh.nlmsg_len, 0); + } + + return __rtnl_linkdump_req(rth, family); +} + +int rtnl_fdb_linkdump_req_filter_fn(struct rtnl_handle *rth, + req_filter_fn_t filter_fn) +{ + struct { + struct nlmsghdr nlh; + struct ifinfomsg ifm; + char buf[128]; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .nlh.nlmsg_type = RTM_GETNEIGH, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .ifm.ifi_family = PF_BRIDGE, + }; + int err; + + err = filter_fn(&req.nlh, sizeof(req)); + if (err) + return err; + + return send(rth->fd, &req, sizeof(req), 0); +} + +int rtnl_statsdump_req_filter(struct rtnl_handle *rth, int fam, + __u32 filt_mask, + int (*filter_fn)(struct ipstats_req *req, + void *data), + void *filter_data) +{ + struct ipstats_req req; + + memset(&req, 0, sizeof(req)); + req.nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct if_stats_msg)); + req.nlh.nlmsg_type = RTM_GETSTATS; + req.nlh.nlmsg_flags = NLM_F_DUMP|NLM_F_REQUEST; + req.nlh.nlmsg_pid = 0; + req.nlh.nlmsg_seq = rth->dump = ++rth->seq; + req.ifsm.family = fam; + req.ifsm.filter_mask = filt_mask; + + if (filter_fn) { + int err; + + err = filter_fn(&req, filter_data); + if (err) + return err; + } + + return send(rth->fd, &req, sizeof(req), 0); +} + +int rtnl_send(struct rtnl_handle *rth, const void *buf, int len) +{ + return send(rth->fd, buf, len, 0); +} + +int rtnl_send_check(struct rtnl_handle *rth, const void *buf, int len) +{ + struct nlmsghdr *h; + int status; + char resp[1024]; + + status = send(rth->fd, buf, len, 0); + if (status < 0) + return status; + + /* Check for immediate errors */ + status = recv(rth->fd, resp, sizeof(resp), MSG_DONTWAIT|MSG_PEEK); + if (status < 0) { + if (errno == EAGAIN) + return 0; + return -1; + } + + for (h = (struct nlmsghdr *)resp; NLMSG_OK(h, status); + h = NLMSG_NEXT(h, status)) { + if (h->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); + + if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) + fprintf(stderr, "ERROR truncated\n"); + else + errno = -err->error; + return -1; + } + } + + return 0; +} + +int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req, int len) +{ + struct nlmsghdr nlh = { + .nlmsg_len = NLMSG_LENGTH(len), + .nlmsg_type = type, + .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlmsg_seq = rth->dump = ++rth->seq, + }; + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + struct iovec iov[2] = { + { .iov_base = &nlh, .iov_len = sizeof(nlh) }, + { .iov_base = req, .iov_len = len } + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = 2, + }; + + return sendmsg(rth->fd, &msg, 0); +} + +int rtnl_dump_request_n(struct rtnl_handle *rth, struct nlmsghdr *n) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + struct iovec iov = { + .iov_base = n, + .iov_len = n->nlmsg_len + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + n->nlmsg_flags = NLM_F_DUMP|NLM_F_REQUEST; + n->nlmsg_pid = 0; + n->nlmsg_seq = rth->dump = ++rth->seq; + + return sendmsg(rth->fd, &msg, 0); +} + +static int rtnl_dump_done(struct nlmsghdr *h, + const struct rtnl_dump_filter_arg *a) +{ + int len = *(int *)NLMSG_DATA(h); + + if (h->nlmsg_len < NLMSG_LENGTH(sizeof(int))) { + fprintf(stderr, "DONE truncated\n"); + return -1; + } + + if (len < 0) { + errno = -len; + + if (a->errhndlr && (a->errhndlr(h, a->arg2) & RTNL_SUPPRESS_NLMSG_DONE_NLERR)) + return 0; + + /* check for any messages returned from kernel */ + if (nl_dump_ext_ack_done(h, sizeof(int), len)) + return len; + + switch (errno) { + case ENOENT: + case EOPNOTSUPP: + return -1; + case EMSGSIZE: + fprintf(stderr, + "Error: Buffer too small for object.\n"); + break; + default: + perror("RTNETLINK answers"); + } + return len; + } + + /* check for any messages returned from kernel */ + nl_dump_ext_ack(h, NULL); + + return 0; +} + +static int rtnl_dump_error(const struct rtnl_handle *rth, + struct nlmsghdr *h, + const struct rtnl_dump_filter_arg *a) +{ + + if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) { + fprintf(stderr, "ERROR truncated\n"); + } else { + const struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); + + errno = -err->error; + if (rth->proto == NETLINK_SOCK_DIAG && + (errno == ENOENT || + errno == EOPNOTSUPP)) + return -1; + + if (a->errhndlr && (a->errhndlr(h, a->arg2) & RTNL_SUPPRESS_NLMSG_ERROR_NLERR)) + return 0; + + if (!(rth->flags & RTNL_HANDLE_F_SUPPRESS_NLERR)) + perror("RTNETLINK answers"); + } + + return -1; +} + +static int __rtnl_recvmsg(int fd, struct msghdr *msg, int flags) +{ + int len; + + do { + len = recvmsg(fd, msg, flags); + } while (len < 0 && (errno == EINTR || errno == EAGAIN)); + + if (len < 0) { + fprintf(stderr, "netlink receive error %s (%d)\n", + strerror(errno), errno); + return -errno; + } + + if (len == 0) { + fprintf(stderr, "EOF on netlink\n"); + return -ENODATA; + } + + return len; +} + +static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer) +{ + struct iovec *iov = msg->msg_iov; + char *buf; + int len; + + iov->iov_base = NULL; + iov->iov_len = 0; + + len = __rtnl_recvmsg(fd, msg, MSG_PEEK | MSG_TRUNC); + if (len < 0) + return len; + + if (len < 32768) + len = 32768; + buf = malloc(len); + if (!buf) { + fprintf(stderr, "malloc error: not enough buffer\n"); + return -ENOMEM; + } + + iov->iov_base = buf; + iov->iov_len = len; + + len = __rtnl_recvmsg(fd, msg, 0); + if (len < 0) { + free(buf); + return len; + } + + if (answer) + *answer = buf; + else + free(buf); + + return len; +} + +static int rtnl_dump_filter_l(struct rtnl_handle *rth, + const struct rtnl_dump_filter_arg *arg) +{ + struct sockaddr_nl nladdr; + struct iovec iov; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + char *buf; + int dump_intr = 0; + + while (1) { + int status; + const struct rtnl_dump_filter_arg *a; + int found_done = 0; + int msglen = 0; + + status = rtnl_recvmsg(rth->fd, &msg, &buf); + if (status < 0) + return status; + + if (rth->dump_fp) + fwrite(buf, 1, NLMSG_ALIGN(status), rth->dump_fp); + + for (a = arg; a->filter; a++) { + struct nlmsghdr *h = (struct nlmsghdr *)buf; + + msglen = status; + + while (NLMSG_OK(h, msglen)) { + int err = 0; + + h->nlmsg_flags &= ~a->nc_flags; + + if (nladdr.nl_pid != 0 || + h->nlmsg_pid != rth->local.nl_pid || + h->nlmsg_seq != rth->dump) + goto skip_it; + + if (h->nlmsg_flags & NLM_F_DUMP_INTR) + dump_intr = 1; + + if (h->nlmsg_type == NLMSG_DONE) { + err = rtnl_dump_done(h, a); + if (err < 0) { + free(buf); + return -1; + } + + found_done = 1; + break; /* process next filter */ + } + + if (h->nlmsg_type == NLMSG_ERROR) { + err = rtnl_dump_error(rth, h, a); + if (err < 0) { + free(buf); + return -1; + } + + goto skip_it; + } + + if (!rth->dump_fp) { + err = a->filter(h, a->arg1); + if (err < 0) { + free(buf); + return err; + } + } + +skip_it: + h = NLMSG_NEXT(h, msglen); + } + } + free(buf); + + if (found_done) { + if (dump_intr) + fprintf(stderr, + "Dump was interrupted and may be inconsistent.\n"); + return 0; + } + + if (msg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "Message truncated\n"); + continue; + } + if (msglen) { + fprintf(stderr, "!!!Remnant of size %d\n", msglen); + exit(1); + } + } +} + +int rtnl_dump_filter_nc(struct rtnl_handle *rth, + rtnl_filter_t filter, + void *arg1, __u16 nc_flags) +{ + const struct rtnl_dump_filter_arg a[] = { + { + .filter = filter, .arg1 = arg1, + .nc_flags = nc_flags, + }, + { }, + }; + + return rtnl_dump_filter_l(rth, a); +} + +int rtnl_dump_filter_errhndlr_nc(struct rtnl_handle *rth, + rtnl_filter_t filter, + void *arg1, + rtnl_err_hndlr_t errhndlr, + void *arg2, + __u16 nc_flags) +{ + const struct rtnl_dump_filter_arg a[] = { + { + .filter = filter, .arg1 = arg1, + .errhndlr = errhndlr, .arg2 = arg2, + .nc_flags = nc_flags, + }, + { }, + }; + + return rtnl_dump_filter_l(rth, a); +} + +static void rtnl_talk_error(struct nlmsghdr *h, struct nlmsgerr *err, + nl_ext_ack_fn_t errfn) +{ + if (nl_dump_ext_ack(h, errfn)) + return; + + fprintf(stderr, "RTNETLINK answers: %s\n", + strerror(-err->error)); +} + + +static int __rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iov, + size_t iovlen, struct nlmsghdr **answer, + bool show_rtnl_err, nl_ext_ack_fn_t errfn) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + struct iovec riov; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = iovlen, + }; + unsigned int seq = 0; + struct nlmsghdr *h; + int i, status; + char *buf; + + for (i = 0; i < iovlen; i++) { + h = iov[i].iov_base; + h->nlmsg_seq = seq = ++rtnl->seq; + if (answer == NULL) + h->nlmsg_flags |= NLM_F_ACK; + } + + status = sendmsg(rtnl->fd, &msg, 0); + if (status < 0) { + perror("Cannot talk to rtnetlink"); + return -1; + } + + /* change msg to use the response iov */ + msg.msg_iov = &riov; + msg.msg_iovlen = 1; + i = 0; + while (1) { +next: + status = rtnl_recvmsg(rtnl->fd, &msg, &buf); + ++i; + + if (status < 0) + return status; + + if (msg.msg_namelen != sizeof(nladdr)) { + fprintf(stderr, + "sender address length == %d\n", + msg.msg_namelen); + exit(1); + } + for (h = (struct nlmsghdr *)buf; status >= sizeof(*h); ) { + int len = h->nlmsg_len; + int l = len - sizeof(*h); + + if (l < 0 || len > status) { + if (msg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "Truncated message\n"); + free(buf); + return -1; + } + fprintf(stderr, + "!!!malformed message: len=%d\n", + len); + exit(1); + } + + if (nladdr.nl_pid != 0 || + h->nlmsg_pid != rtnl->local.nl_pid || + h->nlmsg_seq > seq || h->nlmsg_seq < seq - iovlen) { + /* Don't forget to skip that message. */ + status -= NLMSG_ALIGN(len); + h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len)); + continue; + } + + if (h->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h); + int error = err->error; + + if (l < sizeof(struct nlmsgerr)) { + fprintf(stderr, "ERROR truncated\n"); + free(buf); + return -1; + } + + if (!error) { + /* check messages from kernel */ + nl_dump_ext_ack(h, errfn); + } else { + errno = -error; + + if (rtnl->proto != NETLINK_SOCK_DIAG && + show_rtnl_err) + rtnl_talk_error(h, err, errfn); + } + + if (i < iovlen) { + free(buf); + goto next; + } + + if (error) { + free(buf); + return -i; + } + + if (answer) + *answer = (struct nlmsghdr *)buf; + return 0; + } + + if (answer) { + *answer = (struct nlmsghdr *)buf; + return 0; + } + + fprintf(stderr, "Unexpected reply!!!\n"); + + status -= NLMSG_ALIGN(len); + h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len)); + } + free(buf); + + if (msg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "Message truncated\n"); + continue; + } + + if (status) { + fprintf(stderr, "!!!Remnant of size %d\n", status); + exit(1); + } + } +} + +static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, + struct nlmsghdr **answer, + bool show_rtnl_err, nl_ext_ack_fn_t errfn) +{ + struct iovec iov = { + .iov_base = n, + .iov_len = n->nlmsg_len + }; + + return __rtnl_talk_iov(rtnl, &iov, 1, answer, show_rtnl_err, errfn); +} + +int rtnl_echo_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, int json, + int (*print_info)(struct nlmsghdr *n, void *arg)) +{ + struct nlmsghdr *answer; + int ret; + + n->nlmsg_flags |= NLM_F_ECHO | NLM_F_ACK; + + ret = rtnl_talk(rtnl, n, &answer); + if (ret) + return ret; + + new_json_obj(json); + open_json_object(NULL); + print_info(answer, stdout); + close_json_object(); + delete_json_obj(); + free(answer); + + return 0; +} + +int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, + struct nlmsghdr **answer) +{ + return __rtnl_talk(rtnl, n, answer, true, NULL); +} + +int rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iovec, size_t iovlen, + struct nlmsghdr **answer) +{ + return __rtnl_talk_iov(rtnl, iovec, iovlen, answer, true, NULL); +} + +int rtnl_talk_suppress_rtnl_errmsg(struct rtnl_handle *rtnl, struct nlmsghdr *n, + struct nlmsghdr **answer) +{ + return __rtnl_talk(rtnl, n, answer, false, NULL); +} + +int rtnl_listen_all_nsid(struct rtnl_handle *rth) +{ + unsigned int on = 1; + + if (setsockopt(rth->fd, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, &on, + sizeof(on)) < 0) { + perror("NETLINK_LISTEN_ALL_NSID"); + return -1; + } + rth->flags |= RTNL_HANDLE_F_LISTEN_ALL_NSID; + return 0; +} + +int rtnl_listen(struct rtnl_handle *rtnl, + rtnl_listen_filter_t handler, + void *jarg) +{ + int status; + struct nlmsghdr *h; + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + struct iovec iov; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + char buf[16384]; + char cmsgbuf[BUFSIZ]; + + iov.iov_base = buf; + while (1) { + struct rtnl_ctrl_data ctrl; + struct cmsghdr *cmsg; + + if (rtnl->flags & RTNL_HANDLE_F_LISTEN_ALL_NSID) { + msg.msg_control = &cmsgbuf; + msg.msg_controllen = sizeof(cmsgbuf); + } + + iov.iov_len = sizeof(buf); + status = recvmsg(rtnl->fd, &msg, 0); + + if (status < 0) { + if (errno == EINTR || errno == EAGAIN) + continue; + fprintf(stderr, "netlink receive error %s (%d)\n", + strerror(errno), errno); + if (errno == ENOBUFS) + continue; + return -1; + } + if (status == 0) { + fprintf(stderr, "EOF on netlink\n"); + return -1; + } + if (msg.msg_namelen != sizeof(nladdr)) { + fprintf(stderr, + "Sender address length == %d\n", + msg.msg_namelen); + exit(1); + } + + if (rtnl->flags & RTNL_HANDLE_F_LISTEN_ALL_NSID) { + memset(&ctrl, 0, sizeof(ctrl)); + ctrl.nsid = -1; + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; + cmsg = CMSG_NXTHDR(&msg, cmsg)) + if (cmsg->cmsg_level == SOL_NETLINK && + cmsg->cmsg_type == NETLINK_LISTEN_ALL_NSID && + cmsg->cmsg_len == CMSG_LEN(sizeof(int))) { + int *data = (int *)CMSG_DATA(cmsg); + + ctrl.nsid = *data; + } + } + + for (h = (struct nlmsghdr *)buf; status >= sizeof(*h); ) { + int err; + int len = h->nlmsg_len; + int l = len - sizeof(*h); + + if (l < 0 || len > status) { + if (msg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "Truncated message\n"); + return -1; + } + fprintf(stderr, + "!!!malformed message: len=%d\n", + len); + exit(1); + } + + err = handler(&ctrl, h, jarg); + if (err < 0) + return err; + + status -= NLMSG_ALIGN(len); + h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len)); + } + if (msg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "Message truncated\n"); + continue; + } + if (status) { + fprintf(stderr, "!!!Remnant of size %d\n", status); + exit(1); + } + } +} + +int rtnl_from_file(FILE *rtnl, rtnl_listen_filter_t handler, + void *jarg) +{ + size_t status; + char buf[16384]; + struct nlmsghdr *h = (struct nlmsghdr *)buf; + + while (1) { + int err, len; + int l; + + status = fread(&buf, 1, sizeof(*h), rtnl); + + if (status == 0 && feof(rtnl)) + return 0; + if (status != sizeof(*h)) { + if (ferror(rtnl)) + perror("rtnl_from_file: fread"); + if (feof(rtnl)) + fprintf(stderr, "rtnl-from_file: truncated message\n"); + return -1; + } + + len = h->nlmsg_len; + l = len - sizeof(*h); + + if (l < 0 || len > sizeof(buf)) { + fprintf(stderr, "!!!malformed message: len=%d @%lu\n", + len, ftell(rtnl)); + return -1; + } + + status = fread(NLMSG_DATA(h), 1, NLMSG_ALIGN(l), rtnl); + + if (status != NLMSG_ALIGN(l)) { + if (ferror(rtnl)) + perror("rtnl_from_file: fread"); + if (feof(rtnl)) + fprintf(stderr, "rtnl-from_file: truncated message\n"); + return -1; + } + + err = handler(NULL, h, jarg); + if (err < 0) + return err; + } +} + +int addattr(struct nlmsghdr *n, int maxlen, int type) +{ + return addattr_l(n, maxlen, type, NULL, 0); +} + +int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data) +{ + return addattr_l(n, maxlen, type, &data, sizeof(__u8)); +} + +int addattr16(struct nlmsghdr *n, int maxlen, int type, __u16 data) +{ + return addattr_l(n, maxlen, type, &data, sizeof(__u16)); +} + +int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data) +{ + return addattr_l(n, maxlen, type, &data, sizeof(__u32)); +} + +int addattr64(struct nlmsghdr *n, int maxlen, int type, __u64 data) +{ + return addattr_l(n, maxlen, type, &data, sizeof(__u64)); +} + +int addattrstrz(struct nlmsghdr *n, int maxlen, int type, const char *str) +{ + return addattr_l(n, maxlen, type, str, strlen(str)+1); +} + +int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, + int alen) +{ + int len = RTA_LENGTH(alen); + struct rtattr *rta; + + if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) { + fprintf(stderr, + "addattr_l ERROR: message exceeded bound of %d\n", + maxlen); + return -1; + } + rta = NLMSG_TAIL(n); + rta->rta_type = type; + rta->rta_len = len; + if (alen) + memcpy(RTA_DATA(rta), data, alen); + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); + return 0; +} + +int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len) +{ + if (NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len) > maxlen) { + fprintf(stderr, + "addraw_l ERROR: message exceeded bound of %d\n", + maxlen); + return -1; + } + + memcpy(NLMSG_TAIL(n), data, len); + memset((void *) NLMSG_TAIL(n) + len, 0, NLMSG_ALIGN(len) - len); + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len); + return 0; +} + +struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type) +{ + struct rtattr *nest = NLMSG_TAIL(n); + + addattr_l(n, maxlen, type, NULL, 0); + return nest; +} + +int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest) +{ + nest->rta_len = (void *)NLMSG_TAIL(n) - (void *)nest; + return n->nlmsg_len; +} + +struct rtattr *addattr_nest_compat(struct nlmsghdr *n, int maxlen, int type, + const void *data, int len) +{ + struct rtattr *start = NLMSG_TAIL(n); + + addattr_l(n, maxlen, type, data, len); + addattr_nest(n, maxlen, type); + return start; +} + +int addattr_nest_compat_end(struct nlmsghdr *n, struct rtattr *start) +{ + struct rtattr *nest = (void *)start + NLMSG_ALIGN(start->rta_len); + + start->rta_len = (void *)NLMSG_TAIL(n) - (void *)start; + addattr_nest_end(n, nest); + return n->nlmsg_len; +} + +int rta_addattr32(struct rtattr *rta, int maxlen, int type, __u32 data) +{ + int len = RTA_LENGTH(4); + struct rtattr *subrta; + + if (RTA_ALIGN(rta->rta_len) + len > maxlen) { + fprintf(stderr, + "rta_addattr32: Error! max allowed bound %d exceeded\n", + maxlen); + return -1; + } + subrta = (struct rtattr *)(((char *)rta) + RTA_ALIGN(rta->rta_len)); + subrta->rta_type = type; + subrta->rta_len = len; + memcpy(RTA_DATA(subrta), &data, 4); + rta->rta_len = NLMSG_ALIGN(rta->rta_len) + len; + return 0; +} + +int rta_addattr_l(struct rtattr *rta, int maxlen, int type, + const void *data, int alen) +{ + struct rtattr *subrta; + int len = RTA_LENGTH(alen); + + if (RTA_ALIGN(rta->rta_len) + RTA_ALIGN(len) > maxlen) { + fprintf(stderr, + "rta_addattr_l: Error! max allowed bound %d exceeded\n", + maxlen); + return -1; + } + subrta = (struct rtattr *)(((char *)rta) + RTA_ALIGN(rta->rta_len)); + subrta->rta_type = type; + subrta->rta_len = len; + if (alen) + memcpy(RTA_DATA(subrta), data, alen); + rta->rta_len = NLMSG_ALIGN(rta->rta_len) + RTA_ALIGN(len); + return 0; +} + +int rta_addattr8(struct rtattr *rta, int maxlen, int type, __u8 data) +{ + return rta_addattr_l(rta, maxlen, type, &data, sizeof(__u8)); +} + +int rta_addattr16(struct rtattr *rta, int maxlen, int type, __u16 data) +{ + return rta_addattr_l(rta, maxlen, type, &data, sizeof(__u16)); +} + +int rta_addattr64(struct rtattr *rta, int maxlen, int type, __u64 data) +{ + return rta_addattr_l(rta, maxlen, type, &data, sizeof(__u64)); +} + +struct rtattr *rta_nest(struct rtattr *rta, int maxlen, int type) +{ + struct rtattr *nest = RTA_TAIL(rta); + + rta_addattr_l(rta, maxlen, type, NULL, 0); + nest->rta_type |= NLA_F_NESTED; + + return nest; +} + +int rta_nest_end(struct rtattr *rta, struct rtattr *nest) +{ + nest->rta_len = (void *)RTA_TAIL(rta) - (void *)nest; + + return rta->rta_len; +} + +int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len) +{ + return parse_rtattr_flags(tb, max, rta, len, 0); +} + +int parse_rtattr_flags(struct rtattr *tb[], int max, struct rtattr *rta, + int len, unsigned short flags) +{ + unsigned short type; + + memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); + while (RTA_OK(rta, len)) { + type = rta->rta_type & ~flags; + if ((type <= max) && (!tb[type])) + tb[type] = rta; + rta = RTA_NEXT(rta, len); + } + if (len) + fprintf(stderr, "!!!Deficit %d, rta_len=%d\n", + len, rta->rta_len); + return 0; +} + +struct rtattr *parse_rtattr_one(int type, struct rtattr *rta, int len) +{ + while (RTA_OK(rta, len)) { + if (rta->rta_type == type) + return rta; + rta = RTA_NEXT(rta, len); + } + + if (len) + fprintf(stderr, "!!!Deficit %d, rta_len=%d\n", + len, rta->rta_len); + return NULL; +} + +int __parse_rtattr_nested_compat(struct rtattr *tb[], int max, + struct rtattr *rta, + int len) +{ + if (RTA_PAYLOAD(rta) < len) + return -1; + if (RTA_PAYLOAD(rta) >= RTA_ALIGN(len) + sizeof(struct rtattr)) { + rta = RTA_DATA(rta) + RTA_ALIGN(len); + return parse_rtattr_nested(tb, max, rta); + } + memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); + return 0; +} + +static const char *get_nla_type_str(unsigned int attr) +{ + switch (attr) { +#define C(x) case NL_ATTR_TYPE_ ## x: return #x + C(U8); + C(U16); + C(U32); + C(U64); + C(STRING); + C(FLAG); + C(NESTED); + C(NESTED_ARRAY); + C(NUL_STRING); + C(BINARY); + C(S8); + C(S16); + C(S32); + C(S64); + C(BITFIELD32); + default: + return "unknown"; + } +} + +void nl_print_policy(const struct rtattr *attr, FILE *fp) +{ + const struct rtattr *pos; + + rtattr_for_each_nested(pos, attr) { + const struct rtattr *attr; + + fprintf(fp, " policy[%u]:", pos->rta_type & ~NLA_F_NESTED); + + rtattr_for_each_nested(attr, pos) { + struct rtattr *tp[NL_POLICY_TYPE_ATTR_MAX + 1]; + + parse_rtattr_nested(tp, ARRAY_SIZE(tp) - 1, attr); + + if (tp[NL_POLICY_TYPE_ATTR_TYPE]) + fprintf(fp, "attr[%u]: type=%s", + attr->rta_type & ~NLA_F_NESTED, + get_nla_type_str(rta_getattr_u32(tp[NL_POLICY_TYPE_ATTR_TYPE]))); + + if (tp[NL_POLICY_TYPE_ATTR_POLICY_IDX]) + fprintf(fp, " policy:%u", + rta_getattr_u32(tp[NL_POLICY_TYPE_ATTR_POLICY_IDX])); + + if (tp[NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE]) + fprintf(fp, " maxattr:%u", + rta_getattr_u32(tp[NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE])); + + if (tp[NL_POLICY_TYPE_ATTR_MIN_VALUE_S] && tp[NL_POLICY_TYPE_ATTR_MAX_VALUE_S]) + fprintf(fp, " range:[%lld,%lld]", + (signed long long)rta_getattr_u64(tp[NL_POLICY_TYPE_ATTR_MIN_VALUE_S]), + (signed long long)rta_getattr_u64(tp[NL_POLICY_TYPE_ATTR_MAX_VALUE_S])); + + if (tp[NL_POLICY_TYPE_ATTR_MIN_VALUE_U] && tp[NL_POLICY_TYPE_ATTR_MAX_VALUE_U]) + fprintf(fp, " range:[%llu,%llu]", + (unsigned long long)rta_getattr_u64(tp[NL_POLICY_TYPE_ATTR_MIN_VALUE_U]), + (unsigned long long)rta_getattr_u64(tp[NL_POLICY_TYPE_ATTR_MAX_VALUE_U])); + + if (tp[NL_POLICY_TYPE_ATTR_MIN_LENGTH]) + fprintf(fp, " min len:%u", + rta_getattr_u32(tp[NL_POLICY_TYPE_ATTR_MIN_LENGTH])); + + if (tp[NL_POLICY_TYPE_ATTR_MAX_LENGTH]) + fprintf(fp, " max len:%u", + rta_getattr_u32(tp[NL_POLICY_TYPE_ATTR_MAX_LENGTH])); + } + } +} + +int rtnl_tunneldump_req(struct rtnl_handle *rth, int family, int ifindex, + __u8 flags) +{ + struct { + struct nlmsghdr nlh; + struct tunnel_msg tmsg; + char buf[256]; + } req = { + .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tunnel_msg)), + .nlh.nlmsg_type = RTM_GETTUNNEL, + .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, + .nlh.nlmsg_seq = rth->dump = ++rth->seq, + .tmsg.family = family, + .tmsg.flags = flags, + .tmsg.ifindex = ifindex, + }; + + return send(rth->fd, &req, sizeof(req), 0); +} diff --git a/lib/ll_addr.c b/lib/ll_addr.c new file mode 100644 index 0000000..d6fd736 --- /dev/null +++ b/lib/ll_addr.c @@ -0,0 +1,95 @@ +/* + * ll_addr.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <string.h> + +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/sockios.h> + +#include "rt_names.h" +#include "utils.h" + +const char *ll_addr_n2a(const unsigned char *addr, int alen, int type, + char *buf, int blen) +{ + int i; + int l; + + if (alen == 4 && + (type == ARPHRD_TUNNEL || type == ARPHRD_SIT + || type == ARPHRD_IPGRE)) + return inet_ntop(AF_INET, addr, buf, blen); + + if (alen == 16 && (type == ARPHRD_TUNNEL6 || type == ARPHRD_IP6GRE)) + return inet_ntop(AF_INET6, addr, buf, blen); + if (alen == 7 && type == ARPHRD_AX25) + return ax25_ntop(AF_AX25, addr, buf, blen); + if (alen == 7 && type == ARPHRD_NETROM) + return netrom_ntop(AF_NETROM, addr, buf, blen); + if (alen == 5 && type == ARPHRD_ROSE) + return rose_ntop(AF_ROSE, addr, buf, blen); + + snprintf(buf, blen, "%02x", addr[0]); + for (i = 1, l = 2; i < alen && l < blen; i++, l += 3) + snprintf(buf + l, blen - l, ":%02x", addr[i]); + return buf; +} + +/*NB: lladdr is char * (rather than u8 *) because sa_data is char * (1003.1g) */ +int ll_addr_a2n(char *lladdr, int len, const char *arg) +{ + if (strchr(arg, '.')) { + inet_prefix pfx; + if (get_addr_1(&pfx, arg, AF_INET)) { + fprintf(stderr, "\"%s\" is invalid lladdr.\n", arg); + return -1; + } + if (len < 4) + return -1; + memcpy(lladdr, pfx.data, 4); + return 4; + } else { + int i; + + for (i = 0; i < len; i++) { + int temp; + char *cp = strchr(arg, ':'); + if (cp) { + *cp = 0; + cp++; + } + if (sscanf(arg, "%x", &temp) != 1) { + fprintf(stderr, "\"%s\" is invalid lladdr.\n", + arg); + return -1; + } + if (temp < 0 || temp > 255) { + fprintf(stderr, "\"%s\" is invalid lladdr.\n", + arg); + return -1; + } + lladdr[i] = temp; + if (!cp) + break; + arg = cp; + } + return i + 1; + } +} diff --git a/lib/ll_map.c b/lib/ll_map.c new file mode 100644 index 0000000..70ea3d4 --- /dev/null +++ b/lib/ll_map.c @@ -0,0 +1,410 @@ +/* + * ll_map.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <string.h> +#include <net/if.h> + +#include "libnetlink.h" +#include "ll_map.h" +#include "list.h" +#include "utils.h" + +struct ll_cache { + struct hlist_node idx_hash; + struct hlist_node name_hash; + unsigned flags; + unsigned index; + unsigned short type; + struct list_head altnames_list; + char name[]; +}; + +#define IDXMAP_SIZE 1024 +static struct hlist_head idx_head[IDXMAP_SIZE]; +static struct hlist_head name_head[IDXMAP_SIZE]; + +static struct ll_cache *ll_get_by_index(unsigned index) +{ + struct hlist_node *n; + unsigned h = index & (IDXMAP_SIZE - 1); + + hlist_for_each(n, &idx_head[h]) { + struct ll_cache *im + = container_of(n, struct ll_cache, idx_hash); + if (im->index == index) + return im; + } + + return NULL; +} + +unsigned namehash(const char *str) +{ + unsigned hash = 5381; + + while (*str) + hash = ((hash << 5) + hash) + *str++; /* hash * 33 + c */ + + return hash; +} + +static struct ll_cache *ll_get_by_name(const char *name) +{ + struct hlist_node *n; + unsigned h = namehash(name) & (IDXMAP_SIZE - 1); + + hlist_for_each(n, &name_head[h]) { + struct ll_cache *im + = container_of(n, struct ll_cache, name_hash); + + if (strcmp(im->name, name) == 0) + return im; + } + + return NULL; +} + +static struct ll_cache *ll_entry_create(struct ifinfomsg *ifi, + const char *ifname, + struct ll_cache *parent_im) +{ + struct ll_cache *im; + unsigned int h; + + im = malloc(sizeof(*im) + strlen(ifname) + 1); + if (!im) + return NULL; + im->index = ifi->ifi_index; + strcpy(im->name, ifname); + im->type = ifi->ifi_type; + im->flags = ifi->ifi_flags; + + if (parent_im) { + list_add_tail(&im->altnames_list, &parent_im->altnames_list); + } else { + /* This is parent, insert to index hash. */ + h = ifi->ifi_index & (IDXMAP_SIZE - 1); + hlist_add_head(&im->idx_hash, &idx_head[h]); + INIT_LIST_HEAD(&im->altnames_list); + } + + h = namehash(ifname) & (IDXMAP_SIZE - 1); + hlist_add_head(&im->name_hash, &name_head[h]); + return im; +} + +static void ll_entry_destroy(struct ll_cache *im, bool im_is_parent) +{ + hlist_del(&im->name_hash); + if (im_is_parent) + hlist_del(&im->idx_hash); + else + list_del(&im->altnames_list); + free(im); +} + +static void ll_entry_update(struct ll_cache *im, struct ifinfomsg *ifi, + const char *ifname) +{ + unsigned int h; + + im->flags = ifi->ifi_flags; + if (!strcmp(im->name, ifname)) + return; + hlist_del(&im->name_hash); + h = namehash(ifname) & (IDXMAP_SIZE - 1); + hlist_add_head(&im->name_hash, &name_head[h]); +} + +static void ll_altname_entries_create(struct ll_cache *parent_im, + struct ifinfomsg *ifi, struct rtattr **tb) +{ + struct rtattr *i, *proplist = tb[IFLA_PROP_LIST]; + int rem; + + if (!proplist) + return; + rem = RTA_PAYLOAD(proplist); + for (i = RTA_DATA(proplist); RTA_OK(i, rem); + i = RTA_NEXT(i, rem)) { + if (i->rta_type != IFLA_ALT_IFNAME) + continue; + ll_entry_create(ifi, rta_getattr_str(i), parent_im); + } +} + +static void ll_altname_entries_destroy(struct ll_cache *parent_im) +{ + struct ll_cache *im, *tmp; + + list_for_each_entry_safe(im, tmp, &parent_im->altnames_list, + altnames_list) + ll_entry_destroy(im, false); +} + +static void ll_altname_entries_update(struct ll_cache *parent_im, + struct ifinfomsg *ifi, struct rtattr **tb) +{ + struct rtattr *i, *proplist = tb[IFLA_PROP_LIST]; + struct ll_cache *im; + int rem; + + if (!proplist) { + ll_altname_entries_destroy(parent_im); + return; + } + + /* Simply compare the altname list with the cached one + * and if it does not fit 1:1, recreate the cached list + * from scratch. + */ + im = list_first_entry(&parent_im->altnames_list, typeof(*im), + altnames_list); + rem = RTA_PAYLOAD(proplist); + for (i = RTA_DATA(proplist); RTA_OK(i, rem); + i = RTA_NEXT(i, rem)) { + if (i->rta_type != IFLA_ALT_IFNAME) + continue; + if (!im || strcmp(rta_getattr_str(i), im->name)) + goto recreate_altname_entries; + im = list_next_entry(im, altnames_list); + } + if (list_next_entry(im, altnames_list)) + goto recreate_altname_entries; + return; + +recreate_altname_entries: + ll_altname_entries_destroy(parent_im); + ll_altname_entries_create(parent_im, ifi, tb); +} + +static void ll_entries_create(struct ifinfomsg *ifi, struct rtattr **tb) +{ + struct ll_cache *parent_im; + + if (!tb[IFLA_IFNAME]) + return; + parent_im = ll_entry_create(ifi, rta_getattr_str(tb[IFLA_IFNAME]), + NULL); + if (!parent_im) + return; + ll_altname_entries_create(parent_im, ifi, tb); +} + +static void ll_entries_destroy(struct ll_cache *parent_im) +{ + ll_altname_entries_destroy(parent_im); + ll_entry_destroy(parent_im, true); +} + +static void ll_entries_update(struct ll_cache *parent_im, + struct ifinfomsg *ifi, struct rtattr **tb) +{ + if (tb[IFLA_IFNAME]) + ll_entry_update(parent_im, ifi, + rta_getattr_str(tb[IFLA_IFNAME])); + ll_altname_entries_update(parent_im, ifi, tb); +} + +int ll_remember_index(struct nlmsghdr *n, void *arg) +{ + struct ifinfomsg *ifi = NLMSG_DATA(n); + struct ll_cache *im; + struct rtattr *tb[IFLA_MAX+1]; + + if (n->nlmsg_type != RTM_NEWLINK && n->nlmsg_type != RTM_DELLINK) + return 0; + + if (n->nlmsg_len < NLMSG_LENGTH(sizeof(*ifi))) + return -1; + + im = ll_get_by_index(ifi->ifi_index); + if (n->nlmsg_type == RTM_DELLINK) { + if (im) + ll_entries_destroy(im); + return 0; + } + + parse_rtattr_flags(tb, IFLA_MAX, IFLA_RTA(ifi), + IFLA_PAYLOAD(n), NLA_F_NESTED); + if (im) + ll_entries_update(im, ifi, tb); + else + ll_entries_create(ifi, tb); + return 0; +} + +const char *ll_idx_n2a(unsigned int idx) +{ + static char buf[IFNAMSIZ]; + + snprintf(buf, sizeof(buf), "if%u", idx); + return buf; +} + +static unsigned int ll_idx_a2n(const char *name) +{ + unsigned int idx; + + if (sscanf(name, "if%u", &idx) != 1) + return 0; + return idx; +} + +static int ll_link_get(const char *name, int index) +{ + struct { + struct nlmsghdr n; + struct ifinfomsg ifm; + char buf[1024]; + } req = { + .n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), + .n.nlmsg_flags = NLM_F_REQUEST, + .n.nlmsg_type = RTM_GETLINK, + .ifm.ifi_index = index, + }; + __u32 filt_mask = RTEXT_FILTER_VF | RTEXT_FILTER_SKIP_STATS; + struct rtnl_handle rth = {}; + struct nlmsghdr *answer; + int rc = 0; + + if (rtnl_open(&rth, 0) < 0) + return 0; + + addattr32(&req.n, sizeof(req), IFLA_EXT_MASK, filt_mask); + if (name) + addattr_l(&req.n, sizeof(req), + !check_ifname(name) ? IFLA_IFNAME : IFLA_ALT_IFNAME, + name, strlen(name) + 1); + + if (rtnl_talk_suppress_rtnl_errmsg(&rth, &req.n, &answer) < 0) + goto out; + + /* add entry to cache */ + rc = ll_remember_index(answer, NULL); + if (!rc) { + struct ifinfomsg *ifm = NLMSG_DATA(answer); + + rc = ifm->ifi_index; + } + + free(answer); +out: + rtnl_close(&rth); + return rc; +} + +const char *ll_index_to_name(unsigned int idx) +{ + static char buf[IFNAMSIZ]; + const struct ll_cache *im; + + if (idx == 0) + return "*"; + + im = ll_get_by_index(idx); + if (im) + return im->name; + + if (ll_link_get(NULL, idx) == idx) { + im = ll_get_by_index(idx); + if (im) + return im->name; + } + + if (if_indextoname(idx, buf) == NULL) + snprintf(buf, IFNAMSIZ, "if%u", idx); + + return buf; +} + +int ll_index_to_type(unsigned idx) +{ + const struct ll_cache *im; + + if (idx == 0) + return -1; + + im = ll_get_by_index(idx); + return im ? im->type : -1; +} + +int ll_index_to_flags(unsigned idx) +{ + const struct ll_cache *im; + + if (idx == 0) + return 0; + + im = ll_get_by_index(idx); + return im ? im->flags : -1; +} + +unsigned ll_name_to_index(const char *name) +{ + const struct ll_cache *im; + unsigned idx; + + if (name == NULL) + return 0; + + im = ll_get_by_name(name); + if (im) + return im->index; + + idx = ll_link_get(name, 0); + if (idx == 0) + idx = if_nametoindex(name); + if (idx == 0) + idx = ll_idx_a2n(name); + return idx; +} + +void ll_drop_by_index(unsigned index) +{ + struct ll_cache *im; + + im = ll_get_by_index(index); + if (!im) + return; + + hlist_del(&im->idx_hash); + hlist_del(&im->name_hash); + + free(im); +} + +void ll_init_map(struct rtnl_handle *rth) +{ + static int initialized; + + if (initialized) + return; + + if (rtnl_linkdump_req(rth, AF_UNSPEC) < 0) { + perror("Cannot send dump request"); + exit(1); + } + + if (rtnl_dump_filter(rth, ll_remember_index, NULL) < 0) { + fprintf(stderr, "Dump terminated\n"); + exit(1); + } + + initialized = 1; +} diff --git a/lib/ll_proto.c b/lib/ll_proto.c new file mode 100644 index 0000000..925e2ca --- /dev/null +++ b/lib/ll_proto.c @@ -0,0 +1,103 @@ +/* + * ll_proto.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <string.h> + +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/sockios.h> + +#include "utils.h" +#include "rt_names.h" + + +#define __PF(f,n) { ETH_P_##f, #n }, + +static const struct proto llproto_names[] = { +__PF(LOOP,loop) +__PF(PUP,pup) +__PF(PUPAT,pupat) +__PF(IP,ip) +__PF(X25,x25) +__PF(ARP,arp) +__PF(BPQ,bpq) +__PF(IEEEPUP,ieeepup) +__PF(IEEEPUPAT,ieeepupat) +__PF(DEC,dec) +__PF(DNA_DL,dna_dl) +__PF(DNA_RC,dna_rc) +__PF(DNA_RT,dna_rt) +__PF(LAT,lat) +__PF(DIAG,diag) +__PF(CUST,cust) +__PF(SCA,sca) +__PF(RARP,rarp) +__PF(ATALK,atalk) +__PF(AARP,aarp) +__PF(IPX,ipx) +__PF(IPV6,ipv6) +__PF(PPP_DISC,ppp_disc) +__PF(PPP_SES,ppp_ses) +__PF(ATMMPOA,atmmpoa) +__PF(ATMFATE,atmfate) +__PF(802_3,802_3) +__PF(AX25,ax25) +__PF(ALL,all) +__PF(802_2,802_2) +__PF(SNAP,snap) +__PF(DDCMP,ddcmp) +__PF(WAN_PPP,wan_ppp) +__PF(PPP_MP,ppp_mp) +__PF(LOCALTALK,localtalk) +__PF(CAN,can) +__PF(PPPTALK,ppptalk) +__PF(TR_802_2,tr_802_2) +__PF(MOBITEX,mobitex) +__PF(CONTROL,control) +__PF(IRDA,irda) +__PF(ECONET,econet) +__PF(TIPC,tipc) +__PF(PROFINET,profinet) +__PF(AOE,aoe) +__PF(ETHERCAT,ethercat) +__PF(8021Q,802.1Q) +__PF(8021AD,802.1ad) +__PF(MPLS_UC,mpls_uc) +__PF(MPLS_MC,mpls_mc) +__PF(TEB,teb) + +{ 0x8100, "802.1Q" }, +{ 0x88cc, "LLDP" }, +{ ETH_P_IP, "ipv4" }, +}; +#undef __PF + +const char *ll_proto_n2a(unsigned short id, char *buf, int len) +{ + size_t len_tb = ARRAY_SIZE(llproto_names); + + return proto_n2a(id, buf, len, llproto_names, len_tb); +} + +int ll_proto_a2n(unsigned short *id, const char *buf) +{ + size_t len_tb = ARRAY_SIZE(llproto_names); + + return proto_a2n(id, buf, llproto_names, len_tb); +} diff --git a/lib/ll_types.c b/lib/ll_types.c new file mode 100644 index 0000000..49da15d --- /dev/null +++ b/lib/ll_types.c @@ -0,0 +1,122 @@ +/* + * ll_types.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <string.h> + +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/sockios.h> + +#include "rt_names.h" +#include "utils.h" + +const char * ll_type_n2a(int type, char *buf, int len) +{ +#define __PF(f,n) { ARPHRD_##f, #n }, +static const struct { + int type; + const char *name; +} arphrd_names[] = { +__PF(NETROM,netrom) +__PF(ETHER,ether) +__PF(EETHER,eether) +__PF(AX25,ax25) +__PF(PRONET,pronet) +__PF(CHAOS,chaos) +__PF(IEEE802,ieee802) +__PF(ARCNET,arcnet) +__PF(APPLETLK,atalk) +__PF(DLCI,dlci) +__PF(ATM,atm) +__PF(METRICOM,metricom) +__PF(IEEE1394,ieee1394) +__PF(INFINIBAND,infiniband) +__PF(SLIP,slip) +__PF(CSLIP,cslip) +__PF(SLIP6,slip6) +__PF(CSLIP6,cslip6) +__PF(RSRVD,rsrvd) +__PF(ADAPT,adapt) +__PF(ROSE,rose) +__PF(X25,x25) +__PF(HWX25,hwx25) +__PF(CAN,can) +__PF(PPP,ppp) +__PF(HDLC,hdlc) +__PF(LAPB,lapb) +__PF(DDCMP,ddcmp) +__PF(RAWHDLC,rawhdlc) +__PF(TUNNEL,ipip) +__PF(TUNNEL6,tunnel6) +__PF(FRAD,frad) +__PF(SKIP,skip) +__PF(LOOPBACK,loopback) +__PF(LOCALTLK,ltalk) +__PF(FDDI,fddi) +__PF(BIF,bif) +__PF(SIT,sit) +__PF(IPDDP,ip/ddp) +__PF(IPGRE,gre) +__PF(PIMREG,pimreg) +__PF(HIPPI,hippi) +__PF(ASH,ash) +__PF(ECONET,econet) +__PF(IRDA,irda) +__PF(FCPP,fcpp) +__PF(FCAL,fcal) +__PF(FCPL,fcpl) +__PF(FCFABRIC,fcfb0) +__PF(FCFABRIC+1,fcfb1) +__PF(FCFABRIC+2,fcfb2) +__PF(FCFABRIC+3,fcfb3) +__PF(FCFABRIC+4,fcfb4) +__PF(FCFABRIC+5,fcfb5) +__PF(FCFABRIC+6,fcfb6) +__PF(FCFABRIC+7,fcfb7) +__PF(FCFABRIC+8,fcfb8) +__PF(FCFABRIC+9,fcfb9) +__PF(FCFABRIC+10,fcfb10) +__PF(FCFABRIC+11,fcfb11) +__PF(FCFABRIC+12,fcfb12) +__PF(IEEE802_TR,tr) +__PF(IEEE80211,ieee802.11) +__PF(IEEE80211_PRISM,ieee802.11/prism) +__PF(IEEE80211_RADIOTAP,ieee802.11/radiotap) +__PF(IEEE802154, ieee802.15.4) +__PF(IEEE802154_MONITOR, ieee802.15.4/monitor) +__PF(PHONET, phonet) +__PF(PHONET_PIPE, phonet_pipe) +__PF(CAIF, caif) +__PF(IP6GRE, gre6) +__PF(NETLINK, netlink) +__PF(6LOWPAN, 6lowpan) + +__PF(NONE, none) +__PF(VOID,void) +}; +#undef __PF + + int i; + for (i=0; !numeric && i<sizeof(arphrd_names)/sizeof(arphrd_names[0]); i++) { + if (arphrd_names[i].type == type) + return arphrd_names[i].name; + } + snprintf(buf, len, "[%d]", type); + return buf; +} diff --git a/lib/mnl_utils.c b/lib/mnl_utils.c new file mode 100644 index 0000000..f8e07d2 --- /dev/null +++ b/lib/mnl_utils.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * mnl_utils.c Helpers for working with libmnl. + */ + +#include <errno.h> +#include <string.h> +#include <time.h> +#include <libmnl/libmnl.h> +#include <linux/genetlink.h> + +#include "libnetlink.h" +#include "mnl_utils.h" +#include "utils.h" + +struct mnl_socket *mnlu_socket_open(int bus) +{ + struct mnl_socket *nl; + int one = 1; + + nl = mnl_socket_open(bus); + if (nl == NULL) + return NULL; + + mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &one, sizeof(one)); + mnl_socket_setsockopt(nl, NETLINK_EXT_ACK, &one, sizeof(one)); + + if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) + goto err_bind; + + return nl; + +err_bind: + mnl_socket_close(nl); + return NULL; +} + +struct nlmsghdr *mnlu_msg_prepare(void *buf, uint32_t nlmsg_type, uint16_t flags, + void *extra_header, size_t extra_header_size) +{ + struct nlmsghdr *nlh; + void *eh; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = nlmsg_type; + nlh->nlmsg_flags = flags; + nlh->nlmsg_seq = time(NULL); + + eh = mnl_nlmsg_put_extra_header(nlh, extra_header_size); + memcpy(eh, extra_header, extra_header_size); + + return nlh; +} + +static int mnlu_cb_noop(const struct nlmsghdr *nlh, void *data) +{ + return MNL_CB_OK; +} + +static int mnlu_cb_error(const struct nlmsghdr *nlh, void *data) +{ + const struct nlmsgerr *err = mnl_nlmsg_get_payload(nlh); + + /* Netlink subsystems returns the errno value with different signess */ + if (err->error < 0) + errno = -err->error; + else + errno = err->error; + + if (nl_dump_ext_ack(nlh, NULL)) + return MNL_CB_ERROR; + + return err->error == 0 ? MNL_CB_STOP : MNL_CB_ERROR; +} + +static int mnlu_cb_stop(const struct nlmsghdr *nlh, void *data) +{ + int len = *(int *)NLMSG_DATA(nlh); + + if (len < 0) { + errno = -len; + nl_dump_ext_ack_done(nlh, sizeof(int), len); + return MNL_CB_ERROR; + } + return MNL_CB_STOP; +} + +static mnl_cb_t mnlu_cb_array[NLMSG_MIN_TYPE] = { + [NLMSG_NOOP] = mnlu_cb_noop, + [NLMSG_ERROR] = mnlu_cb_error, + [NLMSG_DONE] = mnlu_cb_stop, + [NLMSG_OVERRUN] = mnlu_cb_noop, +}; + +int mnlu_socket_recv_run(struct mnl_socket *nl, unsigned int seq, void *buf, size_t buf_size, + mnl_cb_t cb, void *data) +{ + unsigned int portid = mnl_socket_get_portid(nl); + int err; + + do { + err = mnl_socket_recvfrom(nl, buf, buf_size); + if (err <= 0) + break; + err = mnl_cb_run2(buf, err, seq, portid, + cb, data, mnlu_cb_array, + ARRAY_SIZE(mnlu_cb_array)); + } while (err > 0); + + return err; +} + +static int get_family_attrs_cb(const struct nlattr *attr, void *data) +{ + int type = mnl_attr_get_type(attr); + const struct nlattr **tb = data; + + if (mnl_attr_type_valid(attr, CTRL_ATTR_MAX) < 0) + return MNL_CB_ERROR; + + if (type == CTRL_ATTR_FAMILY_ID && + mnl_attr_validate(attr, MNL_TYPE_U16) < 0) + return MNL_CB_ERROR; + if (type == CTRL_ATTR_MAXATTR && + mnl_attr_validate(attr, MNL_TYPE_U32) < 0) + return MNL_CB_ERROR; + tb[type] = attr; + return MNL_CB_OK; +} + +static int get_family_cb(const struct nlmsghdr *nlh, void *data) +{ + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *tb[CTRL_ATTR_MAX + 1] = {}; + struct mnlu_gen_socket *nlg = data; + + mnl_attr_parse(nlh, sizeof(*genl), get_family_attrs_cb, tb); + if (!tb[CTRL_ATTR_FAMILY_ID]) + return MNL_CB_ERROR; + if (!tb[CTRL_ATTR_MAXATTR]) + return MNL_CB_ERROR; + nlg->family = mnl_attr_get_u16(tb[CTRL_ATTR_FAMILY_ID]); + nlg->maxattr = mnl_attr_get_u32(tb[CTRL_ATTR_MAXATTR]); + return MNL_CB_OK; +} + +static int family_get(struct mnlu_gen_socket *nlg, const char *family_name) +{ + struct genlmsghdr hdr = {}; + struct nlmsghdr *nlh; + int err; + + hdr.cmd = CTRL_CMD_GETFAMILY; + hdr.version = 0x1; + + nlh = mnlu_msg_prepare(nlg->buf, GENL_ID_CTRL, + NLM_F_REQUEST | NLM_F_ACK, + &hdr, sizeof(hdr)); + + mnl_attr_put_strz(nlh, CTRL_ATTR_FAMILY_NAME, family_name); + + err = mnl_socket_sendto(nlg->nl, nlh, nlh->nlmsg_len); + if (err < 0) + return err; + + err = mnlu_socket_recv_run(nlg->nl, nlh->nlmsg_seq, nlg->buf, + MNL_SOCKET_BUFFER_SIZE, + get_family_cb, nlg); + return err; +} + +int mnlu_gen_socket_open(struct mnlu_gen_socket *nlg, const char *family_name, + uint8_t version) +{ + int err; + + nlg->buf = malloc(MNL_SOCKET_BUFFER_SIZE); + if (!nlg->buf) + goto err_buf_alloc; + + nlg->nl = mnlu_socket_open(NETLINK_GENERIC); + if (!nlg->nl) + goto err_socket_open; + + err = family_get(nlg, family_name); + if (err) + goto err_socket; + + return 0; + +err_socket: + mnl_socket_close(nlg->nl); +err_socket_open: + free(nlg->buf); +err_buf_alloc: + return -1; +} + +void mnlu_gen_socket_close(struct mnlu_gen_socket *nlg) +{ + mnl_socket_close(nlg->nl); + free(nlg->buf); +} + +struct nlmsghdr * +_mnlu_gen_socket_cmd_prepare(struct mnlu_gen_socket *nlg, + uint8_t cmd, uint16_t flags, + uint32_t id, uint8_t version) +{ + struct genlmsghdr hdr = {}; + struct nlmsghdr *nlh; + + hdr.cmd = cmd; + hdr.version = version; + nlh = mnlu_msg_prepare(nlg->buf, id, flags, &hdr, sizeof(hdr)); + nlg->seq = nlh->nlmsg_seq; + return nlh; +} + +struct nlmsghdr *mnlu_gen_socket_cmd_prepare(struct mnlu_gen_socket *nlg, + uint8_t cmd, uint16_t flags) +{ + return _mnlu_gen_socket_cmd_prepare(nlg, cmd, flags, nlg->family, + nlg->version); +} + +int mnlu_gen_socket_sndrcv(struct mnlu_gen_socket *nlg, const struct nlmsghdr *nlh, + mnl_cb_t data_cb, void *data) +{ + int err; + + err = mnl_socket_sendto(nlg->nl, nlh, nlh->nlmsg_len); + if (err < 0) { + perror("Failed to send data"); + return -errno; + } + + err = mnlu_socket_recv_run(nlg->nl, nlh->nlmsg_seq, nlg->buf, + MNL_SOCKET_BUFFER_SIZE, + data_cb, data); + if (err < 0) { + fprintf(stderr, "kernel answers: %s\n", strerror(errno)); + return -errno; + } + return 0; +} + +int mnlu_gen_socket_recv_run(struct mnlu_gen_socket *nlg, mnl_cb_t cb, + void *data) +{ + return mnlu_socket_recv_run(nlg->nl, nlg->seq, nlg->buf, + MNL_SOCKET_BUFFER_SIZE, + cb, data); +} diff --git a/lib/mpls_ntop.c b/lib/mpls_ntop.c new file mode 100644 index 0000000..f8d89f4 --- /dev/null +++ b/lib/mpls_ntop.c @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <errno.h> +#include <string.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <linux/mpls.h> + +#include "utils.h" + +static const char *mpls_ntop1(const struct mpls_label *addr, char *buf, size_t buflen) +{ + size_t destlen = buflen; + char *dest = buf; + int count = 0; + + while (1) { + uint32_t entry = ntohl(addr[count++].entry); + uint32_t label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; + int len = snprintf(dest, destlen, "%u", label); + + if (len >= destlen) + break; + + /* Is this the end? */ + if (entry & MPLS_LS_S_MASK) + return buf; + + dest += len; + destlen -= len; + if (destlen) { + *dest = '/'; + dest++; + destlen--; + } + } + errno = -E2BIG; + return NULL; +} + +const char *mpls_ntop(int af, const void *addr, char *buf, size_t buflen) +{ + switch(af) { + case AF_MPLS: + errno = 0; + return mpls_ntop1((struct mpls_label *)addr, buf, buflen); + default: + errno = EAFNOSUPPORT; + } + + return NULL; +} diff --git a/lib/mpls_pton.c b/lib/mpls_pton.c new file mode 100644 index 0000000..065374e --- /dev/null +++ b/lib/mpls_pton.c @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <errno.h> +#include <string.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <linux/mpls.h> + +#include "utils.h" + + +static int mpls_pton1(const char *name, struct mpls_label *addr, + unsigned int maxlabels) +{ + char *endp; + unsigned count; + + for (count = 0; count < maxlabels; count++) { + unsigned long label; + + label = strtoul(name, &endp, 0); + /* Fail when the label value is out or range */ + if (label >= (1 << 20)) + return 0; + + if (endp == name) /* no digits */ + return 0; + + addr->entry = htonl(label << MPLS_LS_LABEL_SHIFT); + if (*endp == '\0') { + addr->entry |= htonl(1 << MPLS_LS_S_SHIFT); + return 1; + } + + /* Bad character in the address */ + if (*endp != '/') + return 0; + + name = endp + 1; + addr += 1; + } + /* The address was too long */ + fprintf(stderr, "Error: too many labels.\n"); + return 0; +} + +int mpls_pton(int af, const char *src, void *addr, size_t alen) +{ + unsigned int maxlabels = alen / sizeof(struct mpls_label); + int err; + + switch(af) { + case AF_MPLS: + errno = 0; + err = mpls_pton1(src, (struct mpls_label *)addr, maxlabels); + break; + default: + errno = EAFNOSUPPORT; + err = -1; + } + + return err; +} diff --git a/lib/names.c b/lib/names.c new file mode 100644 index 0000000..b46ea79 --- /dev/null +++ b/lib/names.c @@ -0,0 +1,152 @@ +/* + * names.c db names + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> + +#include "names.h" +#include "utils.h" + +#define MAX_ENTRIES 256 +#define NAME_MAX_LEN 512 + +static int read_id_name(FILE *fp, int *id, char *name) +{ + char buf[NAME_MAX_LEN]; + int min, maj; + + while (fgets(buf, sizeof(buf), fp)) { + char *p = buf; + + while (*p == ' ' || *p == '\t') + p++; + + if (*p == '#' || *p == '\n' || *p == 0) + continue; + + if (sscanf(p, "%x:%x %s\n", &maj, &min, name) == 3) { + *id = (maj << 16) | min; + } else if (sscanf(p, "%x:%x %s #", &maj, &min, name) == 3) { + *id = (maj << 16) | min; + } else if (sscanf(p, "0x%x %s\n", id, name) != 2 && + sscanf(p, "0x%x %s #", id, name) != 2 && + sscanf(p, "%d %s\n", id, name) != 2 && + sscanf(p, "%d %s #", id, name) != 2) { + strcpy(name, p); + return -1; + } + return 1; + } + + return 0; +} + +struct db_names *db_names_alloc(void) +{ + struct db_names *db; + + db = calloc(1, sizeof(*db)); + if (!db) + return NULL; + + db->size = MAX_ENTRIES; + db->hash = calloc(db->size, sizeof(struct db_entry *)); + + return db; +} + +int db_names_load(struct db_names *db, const char *path) +{ + struct db_entry *entry; + FILE *fp; + int id; + char namebuf[NAME_MAX_LEN] = {0}; + int ret = -1; + + fp = fopen(path, "r"); + if (!fp) + return -ENOENT; + + while ((ret = read_id_name(fp, &id, &namebuf[0]))) { + if (ret == -1) { + fprintf(stderr, "Database %s is corrupted at %s\n", + path, namebuf); + goto Exit; + } + ret = -1; + + if (id < 0) + continue; + + entry = malloc(sizeof(*entry)); + if (!entry) + goto Exit; + + entry->name = strdup(namebuf); + if (!entry->name) { + free(entry); + goto Exit; + } + + entry->id = id; + entry->next = db->hash[id & (db->size - 1)]; + db->hash[id & (db->size - 1)] = entry; + } + ret = 0; + +Exit: + fclose(fp); + return ret; +} + +void db_names_free(struct db_names *db) +{ + int i; + + if (!db) + return; + + for (i = 0; i < db->size; i++) { + struct db_entry *entry = db->hash[i]; + + while (entry) { + struct db_entry *next = entry->next; + + free(entry->name); + free(entry); + entry = next; + } + } + + free(db->hash); + free(db); +} + +char *id_to_name(struct db_names *db, int id, char *name) +{ + struct db_entry *entry; + + if (!db) + return NULL; + + entry = db->hash[id & (db->size - 1)]; + while (entry && entry->id != id) + entry = entry->next; + + if (entry) { + strncpy(name, entry->name, IDNAME_MAX); + return name; + } + + snprintf(name, IDNAME_MAX, "%d", id); + return NULL; +} diff --git a/lib/namespace.c b/lib/namespace.c new file mode 100644 index 0000000..45a7ded --- /dev/null +++ b/lib/namespace.c @@ -0,0 +1,145 @@ +/* + * namespace.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <sys/statvfs.h> +#include <fcntl.h> +#include <dirent.h> +#include <limits.h> + +#include "utils.h" +#include "namespace.h" + +static void bind_etc(const char *name) +{ + char etc_netns_path[sizeof(NETNS_ETC_DIR) + NAME_MAX]; + char netns_name[PATH_MAX]; + char etc_name[PATH_MAX]; + struct dirent *entry; + DIR *dir; + + if (strlen(name) >= NAME_MAX) + return; + + snprintf(etc_netns_path, sizeof(etc_netns_path), "%s/%s", NETNS_ETC_DIR, name); + dir = opendir(etc_netns_path); + if (!dir) + return; + + while ((entry = readdir(dir)) != NULL) { + if (strcmp(entry->d_name, ".") == 0) + continue; + if (strcmp(entry->d_name, "..") == 0) + continue; + snprintf(netns_name, sizeof(netns_name), "%s/%s", etc_netns_path, entry->d_name); + snprintf(etc_name, sizeof(etc_name), "/etc/%s", entry->d_name); + if (mount(netns_name, etc_name, "none", MS_BIND, NULL) < 0) { + fprintf(stderr, "Bind %s -> %s failed: %s\n", + netns_name, etc_name, strerror(errno)); + } + } + closedir(dir); +} + +int netns_switch(char *name) +{ + char net_path[PATH_MAX]; + int netns; + unsigned long mountflags = 0; + struct statvfs fsstat; + + snprintf(net_path, sizeof(net_path), "%s/%s", NETNS_RUN_DIR, name); + netns = open(net_path, O_RDONLY | O_CLOEXEC); + if (netns < 0) { + fprintf(stderr, "Cannot open network namespace \"%s\": %s\n", + name, strerror(errno)); + return -1; + } + + if (setns(netns, CLONE_NEWNET) < 0) { + fprintf(stderr, "setting the network namespace \"%s\" failed: %s\n", + name, strerror(errno)); + close(netns); + return -1; + } + close(netns); + + if (unshare(CLONE_NEWNS) < 0) { + fprintf(stderr, "unshare failed: %s\n", strerror(errno)); + return -1; + } + /* Don't let any mounts propagate back to the parent */ + if (mount("", "/", "none", MS_SLAVE | MS_REC, NULL)) { + fprintf(stderr, "\"mount --make-rslave /\" failed: %s\n", + strerror(errno)); + return -1; + } + + /* Mount a version of /sys that describes the network namespace */ + + if (umount2("/sys", MNT_DETACH) < 0) { + /* If this fails, perhaps there wasn't a sysfs instance mounted. Good. */ + if (statvfs("/sys", &fsstat) == 0) { + /* We couldn't umount the sysfs, we'll attempt to overlay it. + * A read-only instance can't be shadowed with a read-write one. */ + if (fsstat.f_flag & ST_RDONLY) + mountflags = MS_RDONLY; + } + } + if (mount(name, "/sys", "sysfs", mountflags, NULL) < 0) { + fprintf(stderr, "mount of /sys failed: %s\n",strerror(errno)); + return -1; + } + + /* Setup bind mounts for config files in /etc */ + bind_etc(name); + return 0; +} + +int netns_get_fd(const char *name) +{ + char pathbuf[PATH_MAX]; + const char *path, *ptr; + + path = name; + ptr = strchr(name, '/'); + if (!ptr) { + snprintf(pathbuf, sizeof(pathbuf), "%s/%s", + NETNS_RUN_DIR, name ); + path = pathbuf; + } + return open(path, O_RDONLY); +} + +int netns_foreach(int (*func)(char *nsname, void *arg), void *arg) +{ + DIR *dir; + struct dirent *entry; + + dir = opendir(NETNS_RUN_DIR); + if (!dir) { + if (errno == ENOENT) + return 0; + + fprintf(stderr, "Failed to open directory %s: %s\n", + NETNS_RUN_DIR, strerror(errno)); + return -1; + } + + while ((entry = readdir(dir)) != NULL) { + if (strcmp(entry->d_name, ".") == 0) + continue; + if (strcmp(entry->d_name, "..") == 0) + continue; + if (func(entry->d_name, arg)) + break; + } + + closedir(dir); + return 0; +} diff --git a/lib/netrom_ntop.c b/lib/netrom_ntop.c new file mode 100644 index 0000000..3dd6cb0 --- /dev/null +++ b/lib/netrom_ntop.c @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <sys/socket.h> +#include <errno.h> +#include <linux/ax25.h> + +#include "utils.h" + +const char *ax25_ntop1(const ax25_address *src, char *dst, socklen_t size); + +const char *netrom_ntop(int af, const void *addr, char *buf, socklen_t buflen) +{ + switch (af) { + case AF_NETROM: + errno = 0; + return ax25_ntop1((ax25_address *)addr, buf, buflen); + + default: + errno = EAFNOSUPPORT; + } + + return NULL; +} diff --git a/lib/ppp_proto.c b/lib/ppp_proto.c new file mode 100644 index 0000000..a634664 --- /dev/null +++ b/lib/ppp_proto.c @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Utilities for translating PPP protocols from strings + * and vice versa. + * + * Authors: Wojciech Drewek <wojciech.drewek@intel.com> + */ + +#include <linux/ppp_defs.h> +#include <linux/if_ether.h> +#include "utils.h" +#include "rt_names.h" + +static const struct proto ppp_proto_names[] = { + {PPP_IP, "ip"}, + {PPP_AT, "at"}, + {PPP_IPX, "ipx"}, + {PPP_VJC_COMP, "vjc_comp"}, + {PPP_VJC_UNCOMP, "vjc_uncomp"}, + {PPP_MP, "mp"}, + {PPP_IPV6, "ipv6"}, + {PPP_COMPFRAG, "compfrag"}, + {PPP_COMP, "comp"}, + {PPP_MPLS_UC, "mpls_uc"}, + {PPP_MPLS_MC, "mpls_mc"}, + {PPP_IPCP, "ipcp"}, + {PPP_ATCP, "atcp"}, + {PPP_IPXCP, "ipxcp"}, + {PPP_IPV6CP, "ipv6cp"}, + {PPP_CCPFRAG, "ccpfrag"}, + {PPP_CCP, "ccp"}, + {PPP_MPLSCP, "mplscp"}, + {PPP_LCP, "lcp"}, + {PPP_PAP, "pap"}, + {PPP_LQR, "lqr"}, + {PPP_CHAP, "chap"}, + {PPP_CBCP, "cbcp"}, +}; + +const char *ppp_proto_n2a(unsigned short id, char *buf, int len) +{ + size_t len_tb = ARRAY_SIZE(ppp_proto_names); + + return proto_n2a(id, buf, len, ppp_proto_names, len_tb); +} + +int ppp_proto_a2n(unsigned short *id, const char *buf) +{ + size_t len_tb = ARRAY_SIZE(ppp_proto_names); + + return proto_a2n(id, buf, ppp_proto_names, len_tb); +} diff --git a/lib/rose_ntop.c b/lib/rose_ntop.c new file mode 100644 index 0000000..c9ba712 --- /dev/null +++ b/lib/rose_ntop.c @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <string.h> +#include <errno.h> + +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/sockios.h> +#include <linux/rose.h> + +#include "rt_names.h" +#include "utils.h" + +static const char *rose_ntop1(const rose_address *src, char *dst, + socklen_t size) +{ + char *p = dst; + int i; + + if (size < 10) + return NULL; + + for (i = 0; i < 5; i++) { + *p++ = '0' + ((src->rose_addr[i] >> 4) & 0xf); + *p++ = '0' + ((src->rose_addr[i] ) & 0xf); + } + + if (size == 10) + return dst; + + *p = '\0'; + + return dst; +} + +const char *rose_ntop(int af, const void *addr, char *buf, socklen_t buflen) +{ + switch (af) { + case AF_ROSE: + errno = 0; + return rose_ntop1((rose_address *)addr, buf, buflen); + + default: + errno = EAFNOSUPPORT; + } + + return NULL; +} diff --git a/lib/rt_names.c b/lib/rt_names.c new file mode 100644 index 0000000..b976471 --- /dev/null +++ b/lib/rt_names.c @@ -0,0 +1,788 @@ +/* + * rt_names.c rtnetlink names DB. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <sys/time.h> +#include <sys/socket.h> +#include <dirent.h> +#include <limits.h> + +#include <asm/types.h> +#include <linux/rtnetlink.h> + +#include "rt_names.h" +#include "utils.h" + +#define NAME_MAX_LEN 512 + +int numeric; + +struct rtnl_hash_entry { + struct rtnl_hash_entry *next; + const char *name; + unsigned int id; +}; + +static int fread_id_name(FILE *fp, int *id, char *namebuf) +{ + char buf[NAME_MAX_LEN]; + + while (fgets(buf, sizeof(buf), fp)) { + char *p = buf; + + while (*p == ' ' || *p == '\t') + p++; + + if (*p == '#' || *p == '\n' || *p == 0) + continue; + + if (sscanf(p, "0x%x %s\n", id, namebuf) != 2 && + sscanf(p, "0x%x %s #", id, namebuf) != 2 && + sscanf(p, "%d %s\n", id, namebuf) != 2 && + sscanf(p, "%d %s #", id, namebuf) != 2) { + strcpy(namebuf, p); + return -1; + } + return 1; + } + return 0; +} + +static void +rtnl_hash_initialize(const char *file, struct rtnl_hash_entry **hash, int size) +{ + struct rtnl_hash_entry *entry; + FILE *fp; + int id; + char namebuf[NAME_MAX_LEN] = {0}; + int ret; + + fp = fopen(file, "r"); + if (!fp) + return; + + while ((ret = fread_id_name(fp, &id, &namebuf[0]))) { + if (ret == -1) { + fprintf(stderr, "Database %s is corrupted at %s\n", + file, namebuf); + fclose(fp); + return; + } + + if (id < 0) + continue; + + entry = malloc(sizeof(*entry)); + entry->id = id; + entry->name = strdup(namebuf); + entry->next = hash[id & (size - 1)]; + hash[id & (size - 1)] = entry; + } + fclose(fp); +} + +static void rtnl_tab_initialize(const char *file, char **tab, int size) +{ + FILE *fp; + int id; + char namebuf[NAME_MAX_LEN] = {0}; + int ret; + + fp = fopen(file, "r"); + if (!fp) + return; + + while ((ret = fread_id_name(fp, &id, &namebuf[0]))) { + if (ret == -1) { + fprintf(stderr, "Database %s is corrupted at %s\n", + file, namebuf); + fclose(fp); + return; + } + if (id < 0 || id > size) + continue; + + tab[id] = strdup(namebuf); + } + fclose(fp); +} + +static char *rtnl_rtprot_tab[256] = { + [RTPROT_UNSPEC] = "unspec", + [RTPROT_REDIRECT] = "redirect", + [RTPROT_KERNEL] = "kernel", + [RTPROT_BOOT] = "boot", + [RTPROT_STATIC] = "static", + + [RTPROT_GATED] = "gated", + [RTPROT_RA] = "ra", + [RTPROT_MRT] = "mrt", + [RTPROT_ZEBRA] = "zebra", + [RTPROT_BIRD] = "bird", + [RTPROT_BABEL] = "babel", + [RTPROT_DNROUTED] = "dnrouted", + [RTPROT_XORP] = "xorp", + [RTPROT_NTK] = "ntk", + [RTPROT_DHCP] = "dhcp", + [RTPROT_KEEPALIVED] = "keepalived", + [RTPROT_BGP] = "bgp", + [RTPROT_ISIS] = "isis", + [RTPROT_OSPF] = "ospf", + [RTPROT_RIP] = "rip", + [RTPROT_EIGRP] = "eigrp", +}; + + +static int rtnl_rtprot_init; + +static void rtnl_rtprot_initialize(void) +{ + struct dirent *de; + DIR *d; + + rtnl_rtprot_init = 1; + rtnl_tab_initialize(CONFDIR "/rt_protos", + rtnl_rtprot_tab, 256); + + d = opendir(CONFDIR "/rt_protos.d"); + if (!d) + return; + + while ((de = readdir(d)) != NULL) { + char path[PATH_MAX]; + size_t len; + + if (*de->d_name == '.') + continue; + + /* only consider filenames ending in '.conf' */ + len = strlen(de->d_name); + if (len <= 5) + continue; + if (strcmp(de->d_name + len - 5, ".conf")) + continue; + + snprintf(path, sizeof(path), CONFDIR "/rt_protos.d/%s", + de->d_name); + rtnl_tab_initialize(path, rtnl_rtprot_tab, 256); + } + closedir(d); +} + +const char *rtnl_rtprot_n2a(int id, char *buf, int len) +{ + if (id < 0 || id >= 256 || numeric) { + snprintf(buf, len, "%u", id); + return buf; + } + if (!rtnl_rtprot_tab[id]) { + if (!rtnl_rtprot_init) + rtnl_rtprot_initialize(); + } + if (rtnl_rtprot_tab[id]) + return rtnl_rtprot_tab[id]; + snprintf(buf, len, "%u", id); + return buf; +} + +int rtnl_rtprot_a2n(__u32 *id, const char *arg) +{ + static char *cache; + static unsigned long res; + char *end; + int i; + + if (cache && strcmp(cache, arg) == 0) { + *id = res; + return 0; + } + + if (!rtnl_rtprot_init) + rtnl_rtprot_initialize(); + + for (i = 0; i < 256; i++) { + if (rtnl_rtprot_tab[i] && + strcmp(rtnl_rtprot_tab[i], arg) == 0) { + cache = rtnl_rtprot_tab[i]; + res = i; + *id = res; + return 0; + } + } + + res = strtoul(arg, &end, 0); + if (!end || end == arg || *end || res > 255) + return -1; + *id = res; + return 0; +} + + +static char *rtnl_rtscope_tab[256] = { + [RT_SCOPE_UNIVERSE] = "global", + [RT_SCOPE_NOWHERE] = "nowhere", + [RT_SCOPE_HOST] = "host", + [RT_SCOPE_LINK] = "link", + [RT_SCOPE_SITE] = "site", +}; + +static int rtnl_rtscope_init; + +static void rtnl_rtscope_initialize(void) +{ + rtnl_rtscope_init = 1; + rtnl_tab_initialize(CONFDIR "/rt_scopes", + rtnl_rtscope_tab, 256); +} + +const char *rtnl_rtscope_n2a(int id, char *buf, int len) +{ + if (id < 0 || id >= 256 || numeric) { + snprintf(buf, len, "%d", id); + return buf; + } + + if (!rtnl_rtscope_tab[id]) { + if (!rtnl_rtscope_init) + rtnl_rtscope_initialize(); + } + + if (rtnl_rtscope_tab[id]) + return rtnl_rtscope_tab[id]; + + snprintf(buf, len, "%d", id); + return buf; +} + +int rtnl_rtscope_a2n(__u32 *id, const char *arg) +{ + static const char *cache; + static unsigned long res; + char *end; + int i; + + if (cache && strcmp(cache, arg) == 0) { + *id = res; + return 0; + } + + if (!rtnl_rtscope_init) + rtnl_rtscope_initialize(); + + for (i = 0; i < 256; i++) { + if (rtnl_rtscope_tab[i] && + strcmp(rtnl_rtscope_tab[i], arg) == 0) { + cache = rtnl_rtscope_tab[i]; + res = i; + *id = res; + return 0; + } + } + + res = strtoul(arg, &end, 0); + if (!end || end == arg || *end || res > 255) + return -1; + *id = res; + return 0; +} + + +static char *rtnl_rtrealm_tab[256] = { + "unknown", +}; + +static int rtnl_rtrealm_init; + +static void rtnl_rtrealm_initialize(void) +{ + rtnl_rtrealm_init = 1; + rtnl_tab_initialize(CONFDIR "/rt_realms", + rtnl_rtrealm_tab, 256); +} + +const char *rtnl_rtrealm_n2a(int id, char *buf, int len) +{ + if (id < 0 || id >= 256 || numeric) { + snprintf(buf, len, "%d", id); + return buf; + } + if (!rtnl_rtrealm_tab[id]) { + if (!rtnl_rtrealm_init) + rtnl_rtrealm_initialize(); + } + if (rtnl_rtrealm_tab[id]) + return rtnl_rtrealm_tab[id]; + snprintf(buf, len, "%d", id); + return buf; +} + + +int rtnl_rtrealm_a2n(__u32 *id, const char *arg) +{ + static char *cache; + static unsigned long res; + char *end; + int i; + + if (cache && strcmp(cache, arg) == 0) { + *id = res; + return 0; + } + + if (!rtnl_rtrealm_init) + rtnl_rtrealm_initialize(); + + for (i = 0; i < 256; i++) { + if (rtnl_rtrealm_tab[i] && + strcmp(rtnl_rtrealm_tab[i], arg) == 0) { + cache = rtnl_rtrealm_tab[i]; + res = i; + *id = res; + return 0; + } + } + + res = strtoul(arg, &end, 0); + if (!end || end == arg || *end || res > 255) + return -1; + *id = res; + return 0; +} + + +static struct rtnl_hash_entry dflt_table_entry = { .name = "default" }; +static struct rtnl_hash_entry main_table_entry = { .name = "main" }; +static struct rtnl_hash_entry local_table_entry = { .name = "local" }; + +static struct rtnl_hash_entry *rtnl_rttable_hash[256] = { + [RT_TABLE_DEFAULT] = &dflt_table_entry, + [RT_TABLE_MAIN] = &main_table_entry, + [RT_TABLE_LOCAL] = &local_table_entry, +}; + +static int rtnl_rttable_init; + +static void rtnl_rttable_initialize(void) +{ + struct dirent *de; + DIR *d; + int i; + + rtnl_rttable_init = 1; + for (i = 0; i < 256; i++) { + if (rtnl_rttable_hash[i]) + rtnl_rttable_hash[i]->id = i; + } + rtnl_hash_initialize(CONFDIR "/rt_tables", + rtnl_rttable_hash, 256); + + d = opendir(CONFDIR "/rt_tables.d"); + if (!d) + return; + + while ((de = readdir(d)) != NULL) { + char path[PATH_MAX]; + size_t len; + + if (*de->d_name == '.') + continue; + + /* only consider filenames ending in '.conf' */ + len = strlen(de->d_name); + if (len <= 5) + continue; + if (strcmp(de->d_name + len - 5, ".conf")) + continue; + + snprintf(path, sizeof(path), + CONFDIR "/rt_tables.d/%s", de->d_name); + rtnl_hash_initialize(path, rtnl_rttable_hash, 256); + } + closedir(d); +} + +const char *rtnl_rttable_n2a(__u32 id, char *buf, int len) +{ + struct rtnl_hash_entry *entry; + + if (!rtnl_rttable_init) + rtnl_rttable_initialize(); + entry = rtnl_rttable_hash[id & 255]; + while (entry && entry->id != id) + entry = entry->next; + if (!numeric && entry) + return entry->name; + snprintf(buf, len, "%u", id); + return buf; +} + +int rtnl_rttable_a2n(__u32 *id, const char *arg) +{ + static const char *cache; + static unsigned long res; + struct rtnl_hash_entry *entry; + char *end; + unsigned long i; + + if (cache && strcmp(cache, arg) == 0) { + *id = res; + return 0; + } + + if (!rtnl_rttable_init) + rtnl_rttable_initialize(); + + for (i = 0; i < 256; i++) { + entry = rtnl_rttable_hash[i]; + while (entry && strcmp(entry->name, arg)) + entry = entry->next; + if (entry) { + cache = entry->name; + res = entry->id; + *id = res; + return 0; + } + } + + i = strtoul(arg, &end, 0); + if (!end || end == arg || *end || i > RT_TABLE_MAX) + return -1; + *id = i; + return 0; +} + + +static char *rtnl_rtdsfield_tab[256] = { + "0", +}; + +static int rtnl_rtdsfield_init; + +static void rtnl_rtdsfield_initialize(void) +{ + rtnl_rtdsfield_init = 1; + rtnl_tab_initialize(CONFDIR "/rt_dsfield", + rtnl_rtdsfield_tab, 256); +} + +const char *rtnl_dsfield_n2a(int id, char *buf, int len) +{ + const char *name; + + if (id < 0 || id >= 256) { + snprintf(buf, len, "%d", id); + return buf; + } + if (!numeric) { + name = rtnl_dsfield_get_name(id); + if (name != NULL) + return name; + } + snprintf(buf, len, "0x%02x", id); + return buf; +} + +const char *rtnl_dsfield_get_name(int id) +{ + if (id < 0 || id >= 256) + return NULL; + if (!rtnl_rtdsfield_tab[id]) { + if (!rtnl_rtdsfield_init) + rtnl_rtdsfield_initialize(); + } + return rtnl_rtdsfield_tab[id]; +} + + +int rtnl_dsfield_a2n(__u32 *id, const char *arg) +{ + static char *cache; + static unsigned long res; + char *end; + int i; + + if (cache && strcmp(cache, arg) == 0) { + *id = res; + return 0; + } + + if (!rtnl_rtdsfield_init) + rtnl_rtdsfield_initialize(); + + for (i = 0; i < 256; i++) { + if (rtnl_rtdsfield_tab[i] && + strcmp(rtnl_rtdsfield_tab[i], arg) == 0) { + cache = rtnl_rtdsfield_tab[i]; + res = i; + *id = res; + return 0; + } + } + + res = strtoul(arg, &end, 16); + if (!end || end == arg || *end || res > 255) + return -1; + *id = res; + return 0; +} + + +static struct rtnl_hash_entry dflt_group_entry = { + .id = 0, .name = "default" +}; + +static struct rtnl_hash_entry *rtnl_group_hash[256] = { + [0] = &dflt_group_entry, +}; + +static int rtnl_group_init; + +static void rtnl_group_initialize(void) +{ + rtnl_group_init = 1; + rtnl_hash_initialize(CONFDIR "/group", + rtnl_group_hash, 256); +} + +int rtnl_group_a2n(int *id, const char *arg) +{ + static const char *cache; + static unsigned long res; + struct rtnl_hash_entry *entry; + char *end; + int i; + + if (cache && strcmp(cache, arg) == 0) { + *id = res; + return 0; + } + + if (!rtnl_group_init) + rtnl_group_initialize(); + + for (i = 0; i < 256; i++) { + entry = rtnl_group_hash[i]; + while (entry && strcmp(entry->name, arg)) + entry = entry->next; + if (entry) { + cache = entry->name; + res = entry->id; + *id = res; + return 0; + } + } + + i = strtol(arg, &end, 0); + if (!end || end == arg || *end || i < 0) + return -1; + *id = i; + return 0; +} + +const char *rtnl_group_n2a(int id, char *buf, int len) +{ + struct rtnl_hash_entry *entry; + int i; + + if (!rtnl_group_init) + rtnl_group_initialize(); + + for (i = 0; !numeric && i < 256; i++) { + entry = rtnl_group_hash[i]; + + while (entry) { + if (entry->id == id) + return entry->name; + entry = entry->next; + } + } + + snprintf(buf, len, "%d", id); + return buf; +} + +static char *nl_proto_tab[256] = { + [NETLINK_ROUTE] = "rtnl", + [NETLINK_UNUSED] = "unused", + [NETLINK_USERSOCK] = "usersock", + [NETLINK_FIREWALL] = "fw", + [NETLINK_SOCK_DIAG] = "tcpdiag", + [NETLINK_NFLOG] = "nflog", + [NETLINK_XFRM] = "xfrm", + [NETLINK_SELINUX] = "selinux", + [NETLINK_ISCSI] = "iscsi", + [NETLINK_AUDIT] = "audit", + [NETLINK_FIB_LOOKUP] = "fiblookup", + [NETLINK_CONNECTOR] = "connector", + [NETLINK_NETFILTER] = "nft", + [NETLINK_IP6_FW] = "ip6fw", + [NETLINK_DNRTMSG] = "dec-rt", + [NETLINK_KOBJECT_UEVENT] = "uevent", + [NETLINK_GENERIC] = "genl", + [NETLINK_SCSITRANSPORT] = "scsi-trans", + [NETLINK_ECRYPTFS] = "ecryptfs", + [NETLINK_RDMA] = "rdma", + [NETLINK_CRYPTO] = "crypto", +}; + +static int nl_proto_init; + +static void nl_proto_initialize(void) +{ + nl_proto_init = 1; + rtnl_tab_initialize(CONFDIR "/nl_protos", + nl_proto_tab, 256); +} + +const char *nl_proto_n2a(int id, char *buf, int len) +{ + if (id < 0 || id >= 256 || numeric) { + snprintf(buf, len, "%d", id); + return buf; + } + + if (!nl_proto_init) + nl_proto_initialize(); + + if (nl_proto_tab[id]) + return nl_proto_tab[id]; + + snprintf(buf, len, "%u", id); + return buf; +} + +int nl_proto_a2n(__u32 *id, const char *arg) +{ + static char *cache; + static unsigned long res; + char *end; + int i; + + if (cache && strcmp(cache, arg) == 0) { + *id = res; + return 0; + } + + if (!nl_proto_init) + nl_proto_initialize(); + + for (i = 0; i < 256; i++) { + if (nl_proto_tab[i] && + strcmp(nl_proto_tab[i], arg) == 0) { + cache = nl_proto_tab[i]; + res = i; + *id = res; + return 0; + } + } + + res = strtoul(arg, &end, 0); + if (!end || end == arg || *end || res > 255) + return -1; + *id = res; + return 0; +} + +#define PROTODOWN_REASON_NUM_BITS 32 +static char *protodown_reason_tab[PROTODOWN_REASON_NUM_BITS] = { +}; + +static int protodown_reason_init; + +static void protodown_reason_initialize(void) +{ + struct dirent *de; + DIR *d; + + protodown_reason_init = 1; + + d = opendir(CONFDIR "/protodown_reasons.d"); + if (!d) + return; + + while ((de = readdir(d)) != NULL) { + char path[PATH_MAX]; + size_t len; + + if (*de->d_name == '.') + continue; + + /* only consider filenames ending in '.conf' */ + len = strlen(de->d_name); + if (len <= 5) + continue; + if (strcmp(de->d_name + len - 5, ".conf")) + continue; + + snprintf(path, sizeof(path), CONFDIR "/protodown_reasons.d/%s", + de->d_name); + rtnl_tab_initialize(path, protodown_reason_tab, + PROTODOWN_REASON_NUM_BITS); + } + closedir(d); +} + +int protodown_reason_n2a(int id, char *buf, int len) +{ + if (id < 0 || id >= PROTODOWN_REASON_NUM_BITS) + return -1; + + if (numeric) { + snprintf(buf, len, "%d", id); + return 0; + } + + if (!protodown_reason_init) + protodown_reason_initialize(); + + if (protodown_reason_tab[id]) + snprintf(buf, len, "%s", protodown_reason_tab[id]); + else + snprintf(buf, len, "%d", id); + + return 0; +} + +int protodown_reason_a2n(__u32 *id, const char *arg) +{ + static char *cache; + static unsigned long res; + char *end; + int i; + + if (cache && strcmp(cache, arg) == 0) { + *id = res; + return 0; + } + + if (!protodown_reason_init) + protodown_reason_initialize(); + + for (i = 0; i < PROTODOWN_REASON_NUM_BITS; i++) { + if (protodown_reason_tab[i] && + strcmp(protodown_reason_tab[i], arg) == 0) { + cache = protodown_reason_tab[i]; + res = i; + *id = res; + return 0; + } + } + + res = strtoul(arg, &end, 0); + if (!end || end == arg || *end || res >= PROTODOWN_REASON_NUM_BITS) + return -1; + *id = res; + return 0; +} diff --git a/lib/utils.c b/lib/utils.c new file mode 100644 index 0000000..dd3cdb3 --- /dev/null +++ b/lib/utils.c @@ -0,0 +1,1961 @@ +/* + * utils.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <unistd.h> +#include <fcntl.h> +#include <limits.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <string.h> +#include <ctype.h> +#include <netdb.h> +#include <arpa/inet.h> +#include <asm/types.h> +#include <linux/pkt_sched.h> +#include <linux/param.h> +#include <linux/if_arp.h> +#include <linux/mpls.h> +#include <linux/snmp.h> +#include <time.h> +#include <sys/time.h> +#include <errno.h> +#ifdef HAVE_LIBCAP +#include <sys/capability.h> +#endif + +#include "rt_names.h" +#include "utils.h" +#include "ll_map.h" +#include "namespace.h" + +int resolve_hosts; +int timestamp_short; +int pretty; +const char *_SL_ = "\n"; + +static int af_byte_len(int af); +static void print_time(char *buf, int len, __u32 time); +static void print_time64(char *buf, int len, __s64 time); + +int read_prop(const char *dev, char *prop, long *value) +{ + char fname[128], buf[80], *endp, *nl; + FILE *fp; + long result; + int ret; + + ret = snprintf(fname, sizeof(fname), "/sys/class/net/%s/%s", + dev, prop); + + if (ret <= 0 || ret >= sizeof(fname)) { + fprintf(stderr, "could not build pathname for property\n"); + return -1; + } + + fp = fopen(fname, "r"); + if (fp == NULL) { + fprintf(stderr, "fopen %s: %s\n", fname, strerror(errno)); + return -1; + } + + if (!fgets(buf, sizeof(buf), fp)) { + fprintf(stderr, "property \"%s\" in file %s is currently unknown\n", prop, fname); + fclose(fp); + goto out; + } + + nl = strchr(buf, '\n'); + if (nl) + *nl = '\0'; + + fclose(fp); + result = strtol(buf, &endp, 0); + + if (*endp || buf == endp) { + fprintf(stderr, "value \"%s\" in file %s is not a number\n", + buf, fname); + goto out; + } + + if ((result == LONG_MAX || result == LONG_MIN) && errno == ERANGE) { + fprintf(stderr, "strtol %s: %s", fname, strerror(errno)); + goto out; + } + + *value = result; + return 0; +out: + fprintf(stderr, "Failed to parse %s\n", fname); + return -1; +} + +int get_hex(char c) +{ + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + if (c >= '0' && c <= '9') + return c - '0'; + + return -1; +} + +int get_integer(int *val, const char *arg, int base) +{ + long res; + char *ptr; + + if (!arg || !*arg) + return -1; + + res = strtol(arg, &ptr, base); + + /* If there were no digits at all, strtol() stores + * the original value of nptr in *endptr (and returns 0). + * In particular, if *nptr is not '\0' but **endptr is '\0' on return, + * the entire string is valid. + */ + if (!ptr || ptr == arg || *ptr) + return -1; + + /* If an underflow occurs, strtol() returns LONG_MIN. + * If an overflow occurs, strtol() returns LONG_MAX. + * In both cases, errno is set to ERANGE. + */ + if ((res == LONG_MAX || res == LONG_MIN) && errno == ERANGE) + return -1; + + /* Outside range of int */ + if (res < INT_MIN || res > INT_MAX) + return -1; + + *val = res; + return 0; +} + +int mask2bits(__u32 netmask) +{ + unsigned int bits = 0; + __u32 mask = ntohl(netmask); + __u32 host = ~mask; + + /* a valid netmask must be 2^n - 1 */ + if ((host & (host + 1)) != 0) + return -1; + + for (; mask; mask <<= 1) + ++bits; + return bits; +} + +static int get_netmask(unsigned int *val, const char *arg, int base) +{ + inet_prefix addr; + + if (!get_unsigned(val, arg, base)) + return 0; + + /* try converting dotted quad to CIDR */ + if (!get_addr_1(&addr, arg, AF_INET) && addr.family == AF_INET) { + int b = mask2bits(addr.data[0]); + + if (b >= 0) { + *val = b; + return 0; + } + } + + return -1; +} + +int get_unsigned(unsigned int *val, const char *arg, int base) +{ + unsigned long res; + char *ptr; + + if (!arg || !*arg) + return -1; + + res = strtoul(arg, &ptr, base); + + /* empty string or trailing non-digits */ + if (!ptr || ptr == arg || *ptr) + return -1; + + /* overflow */ + if (res == ULONG_MAX && errno == ERANGE) + return -1; + + /* out side range of unsigned */ + if (res > UINT_MAX) + return -1; + + *val = res; + return 0; +} + +/* + * get_time_rtt is "translated" from a similar routine "get_time" in + * tc_util.c. We don't use the exact same routine because tc passes + * microseconds to the kernel and the callers of get_time_rtt want to + * pass milliseconds (standard unit for rtt values since 2.6.27), and + * have a different assumption for the units of a "raw" number. + */ +int get_time_rtt(unsigned int *val, const char *arg, int *raw) +{ + double t; + unsigned long res; + char *p; + + if (strchr(arg, '.') != NULL) { + t = strtod(arg, &p); + if (t < 0.0) + return -1; + + /* no digits? */ + if (!p || p == arg) + return -1; + + /* over/underflow */ + if ((t == HUGE_VALF || t == HUGE_VALL) && errno == ERANGE) + return -1; + } else { + res = strtoul(arg, &p, 0); + + /* empty string? */ + if (!p || p == arg) + return -1; + + /* overflow */ + if (res == ULONG_MAX && errno == ERANGE) + return -1; + + t = (double)res; + } + + if (p == arg) + return -1; + *raw = 1; + + if (*p) { + *raw = 0; + if (strcasecmp(p, "s") == 0 || + strcasecmp(p, "sec") == 0 || + strcasecmp(p, "secs") == 0) + t *= 1000; + else if (strcasecmp(p, "ms") == 0 || + strcasecmp(p, "msec") == 0 || + strcasecmp(p, "msecs") == 0) + t *= 1.0; /* allow suffix, do nothing */ + else + return -1; + } + + /* emulate ceil() without having to bring-in -lm and always be >= 1 */ + *val = t; + if (*val < t) + *val += 1; + + return 0; + +} + +int get_u64(__u64 *val, const char *arg, int base) +{ + unsigned long long res; + char *ptr; + + if (!arg || !*arg) + return -1; + + res = strtoull(arg, &ptr, base); + + /* empty string or trailing non-digits */ + if (!ptr || ptr == arg || *ptr) + return -1; + + /* overflow */ + if (res == ULLONG_MAX && errno == ERANGE) + return -1; + + /* in case ULL is 128 bits */ + if (res > 0xFFFFFFFFFFFFFFFFULL) + return -1; + + *val = res; + return 0; +} + +int get_u32(__u32 *val, const char *arg, int base) +{ + unsigned long res; + char *ptr; + + if (!arg || !*arg) + return -1; + res = strtoul(arg, &ptr, base); + + /* empty string or trailing non-digits */ + if (!ptr || ptr == arg || *ptr) + return -1; + + /* overflow */ + if (res == ULONG_MAX && errno == ERANGE) + return -1; + + /* in case UL > 32 bits */ + if (res > 0xFFFFFFFFUL) + return -1; + + *val = res; + return 0; +} + +int get_u16(__u16 *val, const char *arg, int base) +{ + unsigned long res; + char *ptr; + + if (!arg || !*arg) + return -1; + res = strtoul(arg, &ptr, base); + + /* empty string or trailing non-digits */ + if (!ptr || ptr == arg || *ptr) + return -1; + + /* overflow */ + if (res == ULONG_MAX && errno == ERANGE) + return -1; + + if (res > 0xFFFFUL) + return -1; + + *val = res; + return 0; +} + +int get_u8(__u8 *val, const char *arg, int base) +{ + unsigned long res; + char *ptr; + + if (!arg || !*arg) + return -1; + + res = strtoul(arg, &ptr, base); + /* empty string or trailing non-digits */ + if (!ptr || ptr == arg || *ptr) + return -1; + + /* overflow */ + if (res == ULONG_MAX && errno == ERANGE) + return -1; + + if (res > 0xFFUL) + return -1; + + *val = res; + return 0; +} + +int get_s64(__s64 *val, const char *arg, int base) +{ + long long res; + char *ptr; + + errno = 0; + + if (!arg || !*arg) + return -1; + res = strtoll(arg, &ptr, base); + if (!ptr || ptr == arg || *ptr) + return -1; + if ((res == LLONG_MIN || res == LLONG_MAX) && errno == ERANGE) + return -1; + if (res > INT64_MAX || res < INT64_MIN) + return -1; + + *val = res; + return 0; +} + +int get_s32(__s32 *val, const char *arg, int base) +{ + long res; + char *ptr; + + errno = 0; + + if (!arg || !*arg) + return -1; + res = strtol(arg, &ptr, base); + if (!ptr || ptr == arg || *ptr) + return -1; + if ((res == LONG_MIN || res == LONG_MAX) && errno == ERANGE) + return -1; + if (res > INT32_MAX || res < INT32_MIN) + return -1; + + *val = res; + return 0; +} + +int get_be64(__be64 *val, const char *arg, int base) +{ + __u64 v; + int ret = get_u64(&v, arg, base); + + if (!ret) + *val = htonll(v); + + return ret; +} + +int get_be32(__be32 *val, const char *arg, int base) +{ + __u32 v; + int ret = get_u32(&v, arg, base); + + if (!ret) + *val = htonl(v); + + return ret; +} + +int get_be16(__be16 *val, const char *arg, int base) +{ + __u16 v; + int ret = get_u16(&v, arg, base); + + if (!ret) + *val = htons(v); + + return ret; +} + +/* This uses a non-standard parsing (ie not inet_aton, or inet_pton) + * because of legacy choice to parse 10.8 as 10.8.0.0 not 10.0.0.8 + */ +static int get_addr_ipv4(__u8 *ap, const char *cp) +{ + int i; + + for (i = 0; i < 4; i++) { + unsigned long n; + char *endp; + + n = strtoul(cp, &endp, 0); + if (n > 255) + return -1; /* bogus network value */ + + if (endp == cp) /* no digits */ + return -1; + + ap[i] = n; + + if (*endp == '\0') + break; + + if (i == 3 || *endp != '.') + return -1; /* extra characters */ + cp = endp + 1; + } + + return 1; +} + +int get_addr64(__u64 *ap, const char *cp) +{ + int i; + + union { + __u16 v16[4]; + __u64 v64; + } val; + + for (i = 0; i < 4; i++) { + unsigned long n; + char *endp; + + n = strtoul(cp, &endp, 16); + if (n > 0xffff) + return -1; /* bogus network value */ + + if (endp == cp) /* no digits */ + return -1; + + val.v16[i] = htons(n); + + if (*endp == '\0') + break; + + if (i == 3 || *endp != ':') + return -1; /* extra characters */ + cp = endp + 1; + } + + *ap = val.v64; + + return 1; +} + +static void set_address_type(inet_prefix *addr) +{ + switch (addr->family) { + case AF_INET: + if (!addr->data[0]) + addr->flags |= ADDRTYPE_INET_UNSPEC; + else if (IN_MULTICAST(ntohl(addr->data[0]))) + addr->flags |= ADDRTYPE_INET_MULTI; + else + addr->flags |= ADDRTYPE_INET; + break; + case AF_INET6: + if (IN6_IS_ADDR_UNSPECIFIED(addr->data)) + addr->flags |= ADDRTYPE_INET_UNSPEC; + else if (IN6_IS_ADDR_MULTICAST(addr->data)) + addr->flags |= ADDRTYPE_INET_MULTI; + else + addr->flags |= ADDRTYPE_INET; + break; + } +} + +static int __get_addr_1(inet_prefix *addr, const char *name, int family) +{ + memset(addr, 0, sizeof(*addr)); + + if (strcmp(name, "default") == 0) { + if (family == AF_MPLS) + return -1; + addr->family = family; + addr->bytelen = af_byte_len(addr->family); + addr->bitlen = -2; + addr->flags |= PREFIXLEN_SPECIFIED; + return 0; + } + + if (strcmp(name, "all") == 0 || + strcmp(name, "any") == 0) { + if (family == AF_MPLS) + return -1; + addr->family = family; + addr->bytelen = 0; + addr->bitlen = -2; + return 0; + } + + if (family == AF_PACKET) { + int len; + + len = ll_addr_a2n((char *) &addr->data, sizeof(addr->data), + name); + if (len < 0) + return -1; + + addr->family = AF_PACKET; + addr->bytelen = len; + addr->bitlen = len * 8; + return 0; + } + + if (strchr(name, ':')) { + addr->family = AF_INET6; + if (family != AF_UNSPEC && family != AF_INET6) + return -1; + if (inet_pton(AF_INET6, name, addr->data) <= 0) + return -1; + addr->bytelen = 16; + addr->bitlen = -1; + return 0; + } + + if (family == AF_MPLS) { + unsigned int maxlabels; + int i; + + addr->family = AF_MPLS; + if (mpls_pton(AF_MPLS, name, addr->data, + sizeof(addr->data)) <= 0) + return -1; + addr->bytelen = 4; + addr->bitlen = 20; + /* How many bytes do I need? */ + maxlabels = sizeof(addr->data) / sizeof(struct mpls_label); + for (i = 0; i < maxlabels; i++) { + if (ntohl(addr->data[i]) & MPLS_LS_S_MASK) { + addr->bytelen = (i + 1)*4; + break; + } + } + return 0; + } + + addr->family = AF_INET; + if (family != AF_UNSPEC && family != AF_INET) + return -1; + + if (get_addr_ipv4((__u8 *)addr->data, name) <= 0) + return -1; + + addr->bytelen = 4; + addr->bitlen = -1; + return 0; +} + +int get_addr_1(inet_prefix *addr, const char *name, int family) +{ + int ret; + + ret = __get_addr_1(addr, name, family); + if (ret) + return ret; + + set_address_type(addr); + return 0; +} + +int af_bit_len(int af) +{ + switch (af) { + case AF_INET6: + return 128; + case AF_INET: + return 32; + case AF_MPLS: + return 20; + } + + return 0; +} + +static int af_byte_len(int af) +{ + return af_bit_len(af) / 8; +} + +int get_prefix_1(inet_prefix *dst, char *arg, int family) +{ + char *slash; + int err, bitlen, flags; + + slash = strchr(arg, '/'); + if (slash) + *slash = 0; + + err = get_addr_1(dst, arg, family); + + if (slash) + *slash = '/'; + + if (err) + return err; + + bitlen = af_bit_len(dst->family); + + flags = 0; + if (slash) { + unsigned int plen; + + if (dst->bitlen == -2) + return -1; + if (get_netmask(&plen, slash + 1, 0)) + return -1; + if (plen > bitlen) + return -1; + + flags |= PREFIXLEN_SPECIFIED; + bitlen = plen; + } else { + if (dst->bitlen == -2) + bitlen = 0; + } + + dst->flags |= flags; + dst->bitlen = bitlen; + + return 0; +} + +static const char *family_name_verbose(int family) +{ + if (family == AF_UNSPEC) + return "any valid"; + return family_name(family); +} + +int get_addr(inet_prefix *dst, const char *arg, int family) +{ + if (get_addr_1(dst, arg, family)) { + fprintf(stderr, + "Error: %s address is expected rather than \"%s\".\n", + family_name_verbose(family), arg); + exit(1); + } + return 0; +} + +int get_addr_rta(inet_prefix *dst, const struct rtattr *rta, int family) +{ + const int len = RTA_PAYLOAD(rta); + const void *data = RTA_DATA(rta); + + switch (len) { + case 4: + dst->family = AF_INET; + dst->bytelen = 4; + memcpy(dst->data, data, 4); + break; + case 16: + dst->family = AF_INET6; + dst->bytelen = 16; + memcpy(dst->data, data, 16); + break; + default: + return -1; + } + + if (family != AF_UNSPEC && family != dst->family) + return -2; + + dst->bitlen = -1; + dst->flags = 0; + + set_address_type(dst); + return 0; +} + +int get_prefix(inet_prefix *dst, char *arg, int family) +{ + if (family == AF_PACKET) { + fprintf(stderr, + "Error: \"%s\" may be inet prefix, but it is not allowed in this context.\n", + arg); + exit(1); + } + + if (get_prefix_1(dst, arg, family)) { + fprintf(stderr, + "Error: %s prefix is expected rather than \"%s\".\n", + family_name_verbose(family), arg); + exit(1); + } + return 0; +} + +__u32 get_addr32(const char *name) +{ + inet_prefix addr; + + if (get_addr_1(&addr, name, AF_INET)) { + fprintf(stderr, + "Error: an IP address is expected rather than \"%s\"\n", + name); + exit(1); + } + return addr.data[0]; +} + +void incomplete_command(void) +{ + fprintf(stderr, "Command line is not complete. Try option \"help\"\n"); + exit(-1); +} + +void missarg(const char *key) +{ + fprintf(stderr, "Error: argument \"%s\" is required\n", key); + exit(-1); +} + +void invarg(const char *msg, const char *arg) +{ + fprintf(stderr, "Error: argument \"%s\" is wrong: %s\n", arg, msg); + exit(-1); +} + +void duparg(const char *key, const char *arg) +{ + fprintf(stderr, + "Error: duplicate \"%s\": \"%s\" is the second value.\n", + key, arg); + exit(-1); +} + +void duparg2(const char *key, const char *arg) +{ + fprintf(stderr, + "Error: either \"%s\" is duplicate, or \"%s\" is a garbage.\n", + key, arg); + exit(-1); +} + +int nodev(const char *dev) +{ + fprintf(stderr, "Cannot find device \"%s\"\n", dev); + return -1; +} + +static int __check_ifname(const char *name) +{ + if (*name == '\0') + return -1; + while (*name) { + if (*name == '/' || isspace(*name)) + return -1; + ++name; + } + return 0; +} + +int check_ifname(const char *name) +{ + /* These checks mimic kernel checks in dev_valid_name */ + if (strlen(name) >= IFNAMSIZ) + return -1; + return __check_ifname(name); +} + +int check_altifname(const char *name) +{ + return __check_ifname(name); +} + +/* buf is assumed to be IFNAMSIZ */ +int get_ifname(char *buf, const char *name) +{ + int ret; + + ret = check_ifname(name); + if (ret == 0) + strncpy(buf, name, IFNAMSIZ); + + return ret; +} + +const char *get_ifname_rta(int ifindex, const struct rtattr *rta) +{ + const char *name; + + if (rta) { + name = rta_getattr_str(rta); + } else { + fprintf(stderr, + "BUG: device with ifindex %d has nil ifname\n", + ifindex); + name = ll_idx_n2a(ifindex); + } + + if (check_ifname(name)) + return NULL; + + return name; +} + +/* Returns false if 'prefix' is a not empty prefix of 'string'. + */ +bool matches(const char *prefix, const char *string) +{ + if (!*prefix) + return true; + while (*string && *prefix == *string) { + prefix++; + string++; + } + + return !!*prefix; +} + +int inet_addr_match(const inet_prefix *a, const inet_prefix *b, int bits) +{ + const __u32 *a1 = a->data; + const __u32 *a2 = b->data; + int words = bits >> 0x05; + + bits &= 0x1f; + + if (words) + if (memcmp(a1, a2, words << 2)) + return -1; + + if (bits) { + __u32 w1, w2; + __u32 mask; + + w1 = a1[words]; + w2 = a2[words]; + + mask = htonl((0xffffffff) << (0x20 - bits)); + + if ((w1 ^ w2) & mask) + return 1; + } + + return 0; +} + +int inet_addr_match_rta(const inet_prefix *m, const struct rtattr *rta) +{ + inet_prefix dst; + + if (!rta || m->family == AF_UNSPEC || m->bitlen <= 0) + return 0; + + if (get_addr_rta(&dst, rta, m->family)) + return -1; + + return inet_addr_match(&dst, m, m->bitlen); +} + +int __iproute2_hz_internal; + +int __get_hz(void) +{ + char name[1024]; + int hz = 0; + FILE *fp; + + if (getenv("HZ")) + return atoi(getenv("HZ")) ? : HZ; + + if (getenv("PROC_NET_PSCHED")) + snprintf(name, sizeof(name)-1, + "%s", getenv("PROC_NET_PSCHED")); + else if (getenv("PROC_ROOT")) + snprintf(name, sizeof(name)-1, + "%s/net/psched", getenv("PROC_ROOT")); + else + strcpy(name, "/proc/net/psched"); + + fp = fopen(name, "r"); + + if (fp) { + unsigned int nom, denom; + + if (fscanf(fp, "%*08x%*08x%08x%08x", &nom, &denom) == 2) + if (nom == 1000000) + hz = denom; + fclose(fp); + } + if (hz) + return hz; + return HZ; +} + +int __iproute2_user_hz_internal; + +int __get_user_hz(void) +{ + return sysconf(_SC_CLK_TCK); +} + +const char *rt_addr_n2a_r(int af, int len, + const void *addr, char *buf, int buflen) +{ + switch (af) { + case AF_INET: + case AF_INET6: + return inet_ntop(af, addr, buf, buflen); + case AF_MPLS: + return mpls_ntop(af, addr, buf, buflen); + case AF_PACKET: + return ll_addr_n2a(addr, len, ARPHRD_VOID, buf, buflen); + case AF_BRIDGE: + { + const union { + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + } *sa = addr; + + switch (sa->sa.sa_family) { + case AF_INET: + return inet_ntop(AF_INET, &sa->sin.sin_addr, + buf, buflen); + case AF_INET6: + return inet_ntop(AF_INET6, &sa->sin6.sin6_addr, + buf, buflen); + } + + /* fallthrough */ + } + default: + return "???"; + } +} + +const char *rt_addr_n2a(int af, int len, const void *addr) +{ + static char buf[256]; + + return rt_addr_n2a_r(af, len, addr, buf, 256); +} + +int read_family(const char *name) +{ + int family = AF_UNSPEC; + + if (strcmp(name, "inet") == 0) + family = AF_INET; + else if (strcmp(name, "inet6") == 0) + family = AF_INET6; + else if (strcmp(name, "link") == 0) + family = AF_PACKET; + else if (strcmp(name, "mpls") == 0) + family = AF_MPLS; + else if (strcmp(name, "bridge") == 0) + family = AF_BRIDGE; + return family; +} + +const char *family_name(int family) +{ + if (family == AF_INET) + return "inet"; + if (family == AF_INET6) + return "inet6"; + if (family == AF_PACKET) + return "link"; + if (family == AF_MPLS) + return "mpls"; + if (family == AF_BRIDGE) + return "bridge"; + return "???"; +} + +#ifdef RESOLVE_HOSTNAMES +struct namerec { + struct namerec *next; + const char *name; + inet_prefix addr; +}; + +#define NHASH 257 +static struct namerec *nht[NHASH]; + +static const char *resolve_address(const void *addr, int len, int af) +{ + struct namerec *n; + struct hostent *h_ent; + unsigned int hash; + static int notfirst; + + + if (af == AF_INET6 && ((__u32 *)addr)[0] == 0 && + ((__u32 *)addr)[1] == 0 && ((__u32 *)addr)[2] == htonl(0xffff)) { + af = AF_INET; + addr += 12; + len = 4; + } + + hash = *(__u32 *)(addr + len - 4) % NHASH; + + for (n = nht[hash]; n; n = n->next) { + if (n->addr.family == af && + n->addr.bytelen == len && + memcmp(n->addr.data, addr, len) == 0) + return n->name; + } + n = malloc(sizeof(*n)); + if (n == NULL) + return NULL; + n->addr.family = af; + n->addr.bytelen = len; + n->name = NULL; + memcpy(n->addr.data, addr, len); + n->next = nht[hash]; + nht[hash] = n; + if (++notfirst == 1) + sethostent(1); + fflush(stdout); + + h_ent = gethostbyaddr(addr, len, af); + if (h_ent != NULL) + n->name = strdup(h_ent->h_name); + + /* Even if we fail, "negative" entry is remembered. */ + return n->name; +} +#endif + +const char *format_host_r(int af, int len, const void *addr, + char *buf, int buflen) +{ +#ifdef RESOLVE_HOSTNAMES + if (resolve_hosts) { + const char *n; + + len = len <= 0 ? af_byte_len(af) : len; + + if (len > 0 && + (n = resolve_address(addr, len, af)) != NULL) + return n; + } +#endif + return rt_addr_n2a_r(af, len, addr, buf, buflen); +} + +const char *format_host(int af, int len, const void *addr) +{ + static char buf[256]; + + return format_host_r(af, len, addr, buf, 256); +} + + +char *hexstring_n2a(const __u8 *str, int len, char *buf, int blen) +{ + char *ptr = buf; + int i; + + for (i = 0; i < len; i++) { + if (blen < 3) + break; + sprintf(ptr, "%02x", str[i]); + ptr += 2; + blen -= 2; + } + return buf; +} + +__u8 *hexstring_a2n(const char *str, __u8 *buf, int blen, unsigned int *len) +{ + unsigned int cnt = 0; + char *endptr; + + if (strlen(str) % 2) + return NULL; + while (cnt < blen && strlen(str) > 1) { + unsigned int tmp; + char tmpstr[3]; + + strncpy(tmpstr, str, 2); + tmpstr[2] = '\0'; + errno = 0; + tmp = strtoul(tmpstr, &endptr, 16); + if (errno != 0 || tmp > 0xFF || *endptr != '\0') + return NULL; + buf[cnt++] = tmp; + str += 2; + } + + if (len) + *len = cnt; + + return buf; +} + +int hex2mem(const char *buf, uint8_t *mem, int count) +{ + int i, j; + int c; + + for (i = 0, j = 0; i < count; i++, j += 2) { + c = get_hex(buf[j]); + if (c < 0) + return -1; + + mem[i] = c << 4; + + c = get_hex(buf[j + 1]); + if (c < 0) + return -1; + + mem[i] |= c; + } + + return 0; +} + +int addr64_n2a(__u64 addr, char *buff, size_t len) +{ + __u16 *words = (__u16 *)&addr; + __u16 v; + int i, ret; + size_t written = 0; + char *sep = ":"; + + for (i = 0; i < 4; i++) { + v = ntohs(words[i]); + + if (i == 3) + sep = ""; + + ret = snprintf(&buff[written], len - written, "%x%s", v, sep); + if (ret < 0) + return ret; + + written += ret; + } + + return written; +} + +/* Print buffer and escape bytes that are !isprint or among 'escape' */ +void print_escape_buf(const __u8 *buf, size_t len, const char *escape) +{ + size_t i; + + for (i = 0; i < len; ++i) { + if (isprint(buf[i]) && buf[i] != '\\' && + !strchr(escape, buf[i])) + printf("%c", buf[i]); + else + printf("\\%03o", buf[i]); + } +} + +int print_timestamp(FILE *fp) +{ + struct timeval tv; + struct tm *tm; + + gettimeofday(&tv, NULL); + tm = localtime(&tv.tv_sec); + + if (timestamp_short) { + char tshort[40]; + + strftime(tshort, sizeof(tshort), "%Y-%m-%dT%H:%M:%S", tm); + fprintf(fp, "[%s.%06ld] ", tshort, tv.tv_usec); + } else { + char *tstr = asctime(tm); + + tstr[strlen(tstr)-1] = 0; + fprintf(fp, "Timestamp: %s %ld usec\n", + tstr, tv.tv_usec); + } + + return 0; +} + +unsigned int print_name_and_link(const char *fmt, + const char *name, struct rtattr *tb[]) +{ + const char *link = NULL; + unsigned int m_flag = 0; + SPRINT_BUF(b1); + + if (tb[IFLA_LINK]) { + int iflink = rta_getattr_u32(tb[IFLA_LINK]); + + if (iflink) { + if (tb[IFLA_LINK_NETNSID]) { + if (is_json_context()) { + print_int(PRINT_JSON, + "link_index", NULL, iflink); + } else { + link = ll_idx_n2a(iflink); + } + } else { + link = ll_index_to_name(iflink); + + if (is_json_context()) { + print_string(PRINT_JSON, + "link", NULL, link); + link = NULL; + } + + m_flag = ll_index_to_flags(iflink); + m_flag = !(m_flag & IFF_UP); + } + } else { + if (is_json_context()) + print_null(PRINT_JSON, "link", NULL, NULL); + else + link = "NONE"; + } + + if (link) { + snprintf(b1, sizeof(b1), "%s@%s", name, link); + name = b1; + } + } + + print_color_string(PRINT_ANY, COLOR_IFNAME, "ifname", fmt, name); + + return m_flag; +} + +int cmdlineno; + +/* Like glibc getline but handle continuation lines and comments */ +ssize_t getcmdline(char **linep, size_t *lenp, FILE *in) +{ + ssize_t cc; + char *cp; + + cc = getline(linep, lenp, in); + if (cc < 0) + return cc; /* eof or error */ + ++cmdlineno; + + cp = strchr(*linep, '#'); + if (cp) + *cp = '\0'; + + while ((cp = strstr(*linep, "\\\n")) != NULL) { + char *line1 = NULL; + size_t len1 = 0; + ssize_t cc1; + + cc1 = getline(&line1, &len1, in); + if (cc1 < 0) { + fprintf(stderr, "Missing continuation line\n"); + return cc1; + } + + ++cmdlineno; + *cp = 0; + + cp = strchr(line1, '#'); + if (cp) + *cp = '\0'; + + *lenp = strlen(*linep) + strlen(line1) + 1; + *linep = realloc(*linep, *lenp); + if (!*linep) { + fprintf(stderr, "Out of memory\n"); + *lenp = 0; + return -1; + } + cc += cc1 - 2; + strcat(*linep, line1); + free(line1); + } + return cc; +} + +/* split command line into argument vector */ +int makeargs(char *line, char *argv[], int maxargs) +{ + static const char ws[] = " \t\r\n"; + char *cp = line; + int argc = 0; + + while (*cp) { + /* skip leading whitespace */ + cp += strspn(cp, ws); + + if (*cp == '\0') + break; + + if (argc >= (maxargs - 1)) { + fprintf(stderr, "Too many arguments to command\n"); + exit(1); + } + + /* word begins with quote */ + if (*cp == '\'' || *cp == '"') { + char quote = *cp++; + + argv[argc++] = cp; + /* find ending quote */ + cp = strchr(cp, quote); + if (cp == NULL) { + fprintf(stderr, "Unterminated quoted string\n"); + exit(1); + } + } else { + argv[argc++] = cp; + + /* find end of word */ + cp += strcspn(cp, ws); + if (*cp == '\0') + break; + } + + /* separate words */ + *cp++ = 0; + } + argv[argc] = NULL; + + return argc; +} + +void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n) +{ + char *tstr; + time_t secs = ((__u32 *)NLMSG_DATA(n))[0]; + long usecs = ((__u32 *)NLMSG_DATA(n))[1]; + + tstr = asctime(localtime(&secs)); + tstr[strlen(tstr)-1] = 0; + fprintf(fp, "Timestamp: %s %lu us\n", tstr, usecs); +} + +char *int_to_str(int val, char *buf) +{ + sprintf(buf, "%d", val); + return buf; +} + +int get_guid(__u64 *guid, const char *arg) +{ + unsigned long tmp; + char *endptr; + int i; + +#define GUID_STR_LEN 23 + /* Verify strict format: format string must be + * xx:xx:xx:xx:xx:xx:xx:xx where xx can be an arbitrary + * hex digit + */ + + if (strlen(arg) != GUID_STR_LEN) + return -1; + + /* make sure columns are in place */ + for (i = 0; i < 7; i++) + if (arg[2 + i * 3] != ':') + return -1; + + *guid = 0; + for (i = 0; i < 8; i++) { + tmp = strtoul(arg + i * 3, &endptr, 16); + if (endptr != arg + i * 3 + 2) + return -1; + + if (tmp > 255) + return -1; + + *guid |= tmp << (56 - 8 * i); + } + + return 0; +} + +/* This is a necessary workaround for multicast route dumps */ +int get_real_family(int rtm_type, int rtm_family) +{ + if (rtm_type != RTN_MULTICAST) + return rtm_family; + + if (rtm_family == RTNL_FAMILY_IPMR) + return AF_INET; + + if (rtm_family == RTNL_FAMILY_IP6MR) + return AF_INET6; + + return rtm_family; +} + +/* Based on copy_rtnl_link_stats() from kernel at net/core/rtnetlink.c */ +static void copy_rtnl_link_stats64(struct rtnl_link_stats64 *stats64, + const struct rtnl_link_stats *stats) +{ + __u64 *a = (__u64 *)stats64; + const __u32 *b = (const __u32 *)stats; + const __u32 *e = b + sizeof(*stats) / sizeof(*b); + + while (b < e) + *a++ = *b++; +} + +#define IPSTATS_MIB_MAX_LEN (__IPSTATS_MIB_MAX * sizeof(__u64)) +static void get_snmp_counters(struct rtnl_link_stats64 *stats64, + struct rtattr *s) +{ + __u64 *mib = (__u64 *)RTA_DATA(s); + + memset(stats64, 0, sizeof(*stats64)); + + stats64->rx_packets = mib[IPSTATS_MIB_INPKTS]; + stats64->rx_bytes = mib[IPSTATS_MIB_INOCTETS]; + stats64->tx_packets = mib[IPSTATS_MIB_OUTPKTS]; + stats64->tx_bytes = mib[IPSTATS_MIB_OUTOCTETS]; + stats64->rx_errors = mib[IPSTATS_MIB_INDISCARDS]; + stats64->tx_errors = mib[IPSTATS_MIB_OUTDISCARDS]; + stats64->multicast = mib[IPSTATS_MIB_INMCASTPKTS]; + stats64->rx_frame_errors = mib[IPSTATS_MIB_CSUMERRORS]; +} + +int get_rtnl_link_stats_rta(struct rtnl_link_stats64 *stats64, + struct rtattr *tb[]) +{ + struct rtnl_link_stats stats; + void *s; + struct rtattr *rta; + int size, len; + + if (tb[IFLA_STATS64]) { + rta = tb[IFLA_STATS64]; + size = sizeof(struct rtnl_link_stats64); + s = stats64; + } else if (tb[IFLA_STATS]) { + rta = tb[IFLA_STATS]; + size = sizeof(struct rtnl_link_stats); + s = &stats; + } else if (tb[IFLA_PROTINFO]) { + struct rtattr *ptb[IPSTATS_MIB_MAX_LEN + 1]; + + parse_rtattr_nested(ptb, IPSTATS_MIB_MAX_LEN, + tb[IFLA_PROTINFO]); + if (ptb[IFLA_INET6_STATS]) + get_snmp_counters(stats64, ptb[IFLA_INET6_STATS]); + return sizeof(*stats64); + } else { + return -1; + } + + len = RTA_PAYLOAD(rta); + if (len < size) + memset(s + len, 0, size - len); + else + len = size; + + memcpy(s, RTA_DATA(rta), len); + + if (s != stats64) + copy_rtnl_link_stats64(stats64, s); + return size; +} + +#ifdef NEED_STRLCPY +size_t strlcpy(char *dst, const char *src, size_t size) +{ + size_t srclen = strlen(src); + + if (size) { + size_t minlen = min(srclen, size - 1); + + memcpy(dst, src, minlen); + dst[minlen] = '\0'; + } + return srclen; +} + +size_t strlcat(char *dst, const char *src, size_t size) +{ + size_t dlen = strlen(dst); + + if (dlen >= size) + return dlen + strlen(src); + + return dlen + strlcpy(dst + dlen, src, size - dlen); +} +#endif + +void drop_cap(void) +{ +#ifdef HAVE_LIBCAP + /* don't harmstring root/sudo */ + if (getuid() != 0 && geteuid() != 0) { + cap_t capabilities; + cap_value_t net_admin = CAP_NET_ADMIN; + cap_flag_t inheritable = CAP_INHERITABLE; + cap_flag_value_t is_set; + + capabilities = cap_get_proc(); + if (!capabilities) + exit(EXIT_FAILURE); + if (cap_get_flag(capabilities, net_admin, inheritable, + &is_set) != 0) + exit(EXIT_FAILURE); + /* apps with ambient caps can fork and call ip */ + if (is_set == CAP_CLEAR) { + if (cap_clear(capabilities) != 0) + exit(EXIT_FAILURE); + if (cap_set_proc(capabilities) != 0) + exit(EXIT_FAILURE); + } + cap_free(capabilities); + } +#endif +} + +int get_time(unsigned int *time, const char *str) +{ + double t; + char *p; + + t = strtod(str, &p); + if (p == str) + return -1; + + if (*p) { + if (strcasecmp(p, "s") == 0 || strcasecmp(p, "sec") == 0 || + strcasecmp(p, "secs") == 0) + t *= TIME_UNITS_PER_SEC; + else if (strcasecmp(p, "ms") == 0 || strcasecmp(p, "msec") == 0 || + strcasecmp(p, "msecs") == 0) + t *= TIME_UNITS_PER_SEC/1000; + else if (strcasecmp(p, "us") == 0 || strcasecmp(p, "usec") == 0 || + strcasecmp(p, "usecs") == 0) + t *= TIME_UNITS_PER_SEC/1000000; + else + return -1; + } + + *time = t; + return 0; +} + +static void print_time(char *buf, int len, __u32 time) +{ + double tmp = time; + + if (tmp >= TIME_UNITS_PER_SEC) + snprintf(buf, len, "%.3gs", tmp/TIME_UNITS_PER_SEC); + else if (tmp >= TIME_UNITS_PER_SEC/1000) + snprintf(buf, len, "%.3gms", tmp/(TIME_UNITS_PER_SEC/1000)); + else + snprintf(buf, len, "%uus", time); +} + +char *sprint_time(__u32 time, char *buf) +{ + print_time(buf, SPRINT_BSIZE-1, time); + return buf; +} + +/* 64 bit times are represented internally in nanoseconds */ +int get_time64(__s64 *time, const char *str) +{ + double nsec; + char *p; + + nsec = strtod(str, &p); + if (p == str) + return -1; + + if (*p) { + if (strcasecmp(p, "s") == 0 || + strcasecmp(p, "sec") == 0 || + strcasecmp(p, "secs") == 0) + nsec *= NSEC_PER_SEC; + else if (strcasecmp(p, "ms") == 0 || + strcasecmp(p, "msec") == 0 || + strcasecmp(p, "msecs") == 0) + nsec *= NSEC_PER_MSEC; + else if (strcasecmp(p, "us") == 0 || + strcasecmp(p, "usec") == 0 || + strcasecmp(p, "usecs") == 0) + nsec *= NSEC_PER_USEC; + else if (strcasecmp(p, "ns") == 0 || + strcasecmp(p, "nsec") == 0 || + strcasecmp(p, "nsecs") == 0) + nsec *= 1; + else + return -1; + } + + *time = nsec; + return 0; +} + +static void print_time64(char *buf, int len, __s64 time) +{ + double nsec = time; + + if (time >= NSEC_PER_SEC) + snprintf(buf, len, "%.3gs", nsec/NSEC_PER_SEC); + else if (time >= NSEC_PER_MSEC) + snprintf(buf, len, "%.3gms", nsec/NSEC_PER_MSEC); + else if (time >= NSEC_PER_USEC) + snprintf(buf, len, "%.3gus", nsec/NSEC_PER_USEC); + else + snprintf(buf, len, "%lldns", time); +} + +char *sprint_time64(__s64 time, char *buf) +{ + print_time64(buf, SPRINT_BSIZE-1, time); + return buf; +} + +int do_batch(const char *name, bool force, + int (*cmd)(int argc, char *argv[], void *data), void *data) +{ + char *line = NULL; + size_t len = 0; + int ret = EXIT_SUCCESS; + + if (name && strcmp(name, "-") != 0) { + if (freopen(name, "r", stdin) == NULL) { + fprintf(stderr, + "Cannot open file \"%s\" for reading: %s\n", + name, strerror(errno)); + return EXIT_FAILURE; + } + } + + cmdlineno = 0; + while (getcmdline(&line, &len, stdin) != -1) { + char *largv[MAX_ARGS]; + int largc; + + largc = makeargs(line, largv, MAX_ARGS); + if (!largc) + continue; /* blank line */ + + if (cmd(largc, largv, data)) { + fprintf(stderr, "Command failed %s:%d\n", + name, cmdlineno); + ret = EXIT_FAILURE; + if (!force) + break; + } + } + + if (line) + free(line); + + return ret; +} + +int parse_one_of(const char *msg, const char *realval, const char * const *list, + size_t len, int *p_err) +{ + int i; + + for (i = 0; i < len; i++) { + if (list[i] && matches(realval, list[i]) == 0) { + *p_err = 0; + return i; + } + } + + fprintf(stderr, "Error: argument of \"%s\" must be one of ", msg); + for (i = 0; i < len; i++) + if (list[i]) + fprintf(stderr, "\"%s\", ", list[i]); + fprintf(stderr, "not \"%s\"\n", realval); + *p_err = -EINVAL; + return 0; +} + +bool parse_on_off(const char *msg, const char *realval, int *p_err) +{ + static const char * const values_on_off[] = { "off", "on" }; + + return parse_one_of(msg, realval, values_on_off, ARRAY_SIZE(values_on_off), p_err); +} + +int parse_mapping_gen(int *argcp, char ***argvp, + int (*key_cb)(__u32 *keyp, const char *key), + int (*mapping_cb)(__u32 key, char *value, void *data), + void *mapping_cb_data) +{ + int argc = *argcp; + char **argv = *argvp; + int ret = 0; + + while (argc > 0) { + char *colon = strchr(*argv, ':'); + __u32 key; + + if (!colon) + break; + *colon = '\0'; + + if (key_cb(&key, *argv)) { + ret = 1; + break; + } + if (mapping_cb(key, colon + 1, mapping_cb_data)) { + ret = 1; + break; + } + + argc--, argv++; + } + + *argcp = argc; + *argvp = argv; + return ret; +} + +static int parse_mapping_num(__u32 *keyp, const char *key) +{ + return get_u32(keyp, key, 0); +} + +int parse_mapping_num_all(__u32 *keyp, const char *key) +{ + if (matches(key, "all") == 0) { + *keyp = (__u32) -1; + return 0; + } + return parse_mapping_num(keyp, key); +} + +int parse_mapping(int *argcp, char ***argvp, bool allow_all, + int (*mapping_cb)(__u32 key, char *value, void *data), + void *mapping_cb_data) +{ + if (allow_all) + return parse_mapping_gen(argcp, argvp, parse_mapping_num_all, + mapping_cb, mapping_cb_data); + else + return parse_mapping_gen(argcp, argvp, parse_mapping_num, + mapping_cb, mapping_cb_data); +} + +int str_map_lookup_str(const struct str_num_map *map, const char *needle) +{ + if (!needle) + return -EINVAL; + + /* Process array which is NULL terminated by the string. */ + while (map && map->str) { + if (strcmp(map->str, needle) == 0) + return map->num; + + map++; + } + return -EINVAL; +} + +const char *str_map_lookup_uint(const struct str_num_map *map, unsigned int val) +{ + unsigned int num = val; + + while (map && map->str) { + if (num == map->num) + return map->str; + + map++; + } + return NULL; +} + +const char *str_map_lookup_u16(const struct str_num_map *map, uint16_t val) +{ + unsigned int num = val; + + while (map && map->str) { + if (num == map->num) + return map->str; + + map++; + } + return NULL; +} + +const char *str_map_lookup_u8(const struct str_num_map *map, uint8_t val) +{ + unsigned int num = val; + + while (map && map->str) { + if (num == map->num) + return map->str; + + map++; + } + return NULL; +} + +unsigned int get_str_char_count(const char *str, int match) +{ + unsigned int count = 0; + const char *pos = str; + + while ((pos = strchr(pos, match))) { + count++; + pos++; + } + return count; +} + +int str_split_by_char(char *str, char **before, char **after, int match) +{ + char *slash; + + slash = strrchr(str, match); + if (!slash) + return -EINVAL; + *slash = '\0'; + *before = str; + *after = slash + 1; + return 0; +} + +struct indent_mem *alloc_indent_mem(void) +{ + struct indent_mem *mem = malloc(sizeof(*mem)); + + if (!mem) + return NULL; + strcpy(mem->indent_str, ""); + mem->indent_level = 0; + return mem; +} + +void free_indent_mem(struct indent_mem *mem) +{ + free(mem); +} + +#define INDENT_STR_STEP 2 + +void inc_indent(struct indent_mem *mem) +{ + if (mem->indent_level + INDENT_STR_STEP > INDENT_STR_MAXLEN) + return; + mem->indent_level += INDENT_STR_STEP; + memset(mem->indent_str, ' ', sizeof(mem->indent_str)); + mem->indent_str[mem->indent_level] = '\0'; +} + +void dec_indent(struct indent_mem *mem) +{ + if (mem->indent_level - INDENT_STR_STEP < 0) + return; + mem->indent_level -= INDENT_STR_STEP; + mem->indent_str[mem->indent_level] = '\0'; +} + +void print_indent(struct indent_mem *mem) +{ + if (mem->indent_level) + printf("%s", mem->indent_str); +} + +const char *proto_n2a(unsigned short id, char *buf, int len, + const struct proto *proto_tb, size_t tb_len) +{ + int i; + + id = ntohs(id); + + for (i = 0; !numeric && i < tb_len; i++) { + if (proto_tb[i].id == id) + return proto_tb[i].name; + } + + snprintf(buf, len, "[%d]", id); + + return buf; +} + +int proto_a2n(unsigned short *id, const char *buf, + const struct proto *proto_tb, size_t tb_len) +{ + int i; + + for (i = 0; i < tb_len; i++) { + if (strcasecmp(proto_tb[i].name, buf) == 0) { + *id = htons(proto_tb[i].id); + return 0; + } + } + if (get_be16(id, buf, 0)) + return -1; + + return 0; +} diff --git a/lib/utils_math.c b/lib/utils_math.c new file mode 100644 index 0000000..9ef3dd6 --- /dev/null +++ b/lib/utils_math.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <asm/types.h> + +#include "utils.h" + +/* See http://physics.nist.gov/cuu/Units/binary.html */ +static const struct rate_suffix { + const char *name; + double scale; +} suffixes[] = { + { "bit", 1. }, + { "Kibit", 1024. }, + { "kbit", 1000. }, + { "mibit", 1024.*1024. }, + { "mbit", 1000000. }, + { "gibit", 1024.*1024.*1024. }, + { "gbit", 1000000000. }, + { "tibit", 1024.*1024.*1024.*1024. }, + { "tbit", 1000000000000. }, + { "Bps", 8. }, + { "KiBps", 8.*1024. }, + { "KBps", 8000. }, + { "MiBps", 8.*1024*1024. }, + { "MBps", 8000000. }, + { "GiBps", 8.*1024.*1024.*1024. }, + { "GBps", 8000000000. }, + { "TiBps", 8.*1024.*1024.*1024.*1024. }, + { "TBps", 8000000000000. }, + { NULL } +}; + +int get_rate(unsigned int *rate, const char *str) +{ + char *p; + double bps = strtod(str, &p); + const struct rate_suffix *s; + + if (p == str) + return -1; + + for (s = suffixes; s->name; ++s) { + if (strcasecmp(s->name, p) == 0) { + bps *= s->scale; + p += strlen(p); + break; + } + } + + if (*p) + return -1; /* unknown suffix */ + + bps /= 8; /* -> bytes per second */ + *rate = bps; + /* detect if an overflow happened */ + if (*rate != floor(bps)) + return -1; + return 0; +} + +int get_rate64(__u64 *rate, const char *str) +{ + char *p; + double bps = strtod(str, &p); + const struct rate_suffix *s; + + if (p == str) + return -1; + + for (s = suffixes; s->name; ++s) { + if (strcasecmp(s->name, p) == 0) { + bps *= s->scale; + p += strlen(p); + break; + } + } + + if (*p) + return -1; /* unknown suffix */ + + bps /= 8; /* -> bytes per second */ + *rate = bps; + return 0; +} + +int get_size(unsigned int *size, const char *str) +{ + double sz; + char *p; + + sz = strtod(str, &p); + if (p == str) + return -1; + + if (*p) { + if (strcasecmp(p, "kb") == 0 || strcasecmp(p, "k") == 0) + sz *= 1024; + else if (strcasecmp(p, "gb") == 0 || strcasecmp(p, "g") == 0) + sz *= 1024*1024*1024; + else if (strcasecmp(p, "gbit") == 0) + sz *= 1024*1024*1024/8; + else if (strcasecmp(p, "mb") == 0 || strcasecmp(p, "m") == 0) + sz *= 1024*1024; + else if (strcasecmp(p, "mbit") == 0) + sz *= 1024*1024/8; + else if (strcasecmp(p, "kbit") == 0) + sz *= 1024/8; + else if (strcasecmp(p, "b") != 0) + return -1; + } + + *size = sz; + + /* detect if an overflow happened */ + if (*size != floor(sz)) + return -1; + + return 0; +} |