diff options
Diffstat (limited to 'tools/lib/bpf')
-rw-r--r-- | tools/lib/bpf/bpf.c | 16 | ||||
-rw-r--r-- | tools/lib/bpf/bpf.h | 5 | ||||
-rw-r--r-- | tools/lib/bpf/bpf_helpers.h | 1 | ||||
-rw-r--r-- | tools/lib/bpf/btf.c | 160 | ||||
-rw-r--r-- | tools/lib/bpf/elf.c | 143 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.c | 239 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.h | 88 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf.map | 8 | ||||
-rw-r--r-- | tools/lib/bpf/libbpf_common.h | 13 | ||||
-rw-r--r-- | tools/lib/bpf/ringbuf.c | 85 |
10 files changed, 698 insertions, 60 deletions
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index b0f1913763..9dc9625651 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -810,6 +810,22 @@ int bpf_link_create(int prog_fd, int target_fd, if (!OPTS_ZEROED(opts, tcx)) return libbpf_err(-EINVAL); break; + case BPF_NETKIT_PRIMARY: + case BPF_NETKIT_PEER: + relative_fd = OPTS_GET(opts, netkit.relative_fd, 0); + relative_id = OPTS_GET(opts, netkit.relative_id, 0); + if (relative_fd && relative_id) + return libbpf_err(-EINVAL); + if (relative_id) { + attr.link_create.netkit.relative_id = relative_id; + attr.link_create.flags |= BPF_F_ID; + } else { + attr.link_create.netkit.relative_fd = relative_fd; + } + attr.link_create.netkit.expected_revision = OPTS_GET(opts, netkit.expected_revision, 0); + if (!OPTS_ZEROED(opts, netkit)) + return libbpf_err(-EINVAL); + break; default: if (!OPTS_ZEROED(opts, flags)) return libbpf_err(-EINVAL); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 74c2887cfd..d0f53772bd 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -415,6 +415,11 @@ struct bpf_link_create_opts { __u32 relative_id; __u64 expected_revision; } tcx; + struct { + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + } netkit; }; size_t :0; }; diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index bbab9ad9dc..77ceea575d 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -181,6 +181,7 @@ enum libbpf_tristate { #define __ksym __attribute__((section(".ksyms"))) #define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted"))) #define __kptr __attribute__((btf_type_tag("kptr"))) +#define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr"))) #define bpf_ksym_exists(sym) ({ \ _Static_assert(!__builtin_constant_p(!!sym), #sym " should be marked as __weak"); \ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 8484b563b5..ee95fd379d 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -448,6 +448,165 @@ static int btf_parse_type_sec(struct btf *btf) return 0; } +static int btf_validate_str(const struct btf *btf, __u32 str_off, const char *what, __u32 type_id) +{ + const char *s; + + s = btf__str_by_offset(btf, str_off); + if (!s) { + pr_warn("btf: type [%u]: invalid %s (string offset %u)\n", type_id, what, str_off); + return -EINVAL; + } + + return 0; +} + +static int btf_validate_id(const struct btf *btf, __u32 id, __u32 ctx_id) +{ + const struct btf_type *t; + + t = btf__type_by_id(btf, id); + if (!t) { + pr_warn("btf: type [%u]: invalid referenced type ID %u\n", ctx_id, id); + return -EINVAL; + } + + return 0; +} + +static int btf_validate_type(const struct btf *btf, const struct btf_type *t, __u32 id) +{ + __u32 kind = btf_kind(t); + int err, i, n; + + err = btf_validate_str(btf, t->name_off, "type name", id); + if (err) + return err; + + switch (kind) { + case BTF_KIND_UNKN: + case BTF_KIND_INT: + case BTF_KIND_FWD: + case BTF_KIND_FLOAT: + break; + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + case BTF_KIND_VAR: + case BTF_KIND_DECL_TAG: + case BTF_KIND_TYPE_TAG: + err = btf_validate_id(btf, t->type, id); + if (err) + return err; + break; + case BTF_KIND_ARRAY: { + const struct btf_array *a = btf_array(t); + + err = btf_validate_id(btf, a->type, id); + err = err ?: btf_validate_id(btf, a->index_type, id); + if (err) + return err; + break; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = btf_members(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "field name", id); + err = err ?: btf_validate_id(btf, m->type, id); + if (err) + return err; + } + break; + } + case BTF_KIND_ENUM: { + const struct btf_enum *m = btf_enum(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "enum name", id); + if (err) + return err; + } + break; + } + case BTF_KIND_ENUM64: { + const struct btf_enum64 *m = btf_enum64(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "enum name", id); + if (err) + return err; + } + break; + } + case BTF_KIND_FUNC: { + const struct btf_type *ft; + + err = btf_validate_id(btf, t->type, id); + if (err) + return err; + ft = btf__type_by_id(btf, t->type); + if (btf_kind(ft) != BTF_KIND_FUNC_PROTO) { + pr_warn("btf: type [%u]: referenced type [%u] is not FUNC_PROTO\n", id, t->type); + return -EINVAL; + } + break; + } + case BTF_KIND_FUNC_PROTO: { + const struct btf_param *m = btf_params(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_str(btf, m->name_off, "param name", id); + err = err ?: btf_validate_id(btf, m->type, id); + if (err) + return err; + } + break; + } + case BTF_KIND_DATASEC: { + const struct btf_var_secinfo *m = btf_var_secinfos(t); + + n = btf_vlen(t); + for (i = 0; i < n; i++, m++) { + err = btf_validate_id(btf, m->type, id); + if (err) + return err; + } + break; + } + default: + pr_warn("btf: type [%u]: unrecognized kind %u\n", id, kind); + return -EINVAL; + } + return 0; +} + +/* Validate basic sanity of BTF. It's intentionally less thorough than + * kernel's validation and validates only properties of BTF that libbpf relies + * on to be correct (e.g., valid type IDs, valid string offsets, etc) + */ +static int btf_sanity_check(const struct btf *btf) +{ + const struct btf_type *t; + __u32 i, n = btf__type_cnt(btf); + int err; + + for (i = 1; i < n; i++) { + t = btf_type_by_id(btf, i); + err = btf_validate_type(btf, t, i); + if (err) + return err; + } + return 0; +} + __u32 btf__type_cnt(const struct btf *btf) { return btf->start_id + btf->nr_types; @@ -902,6 +1061,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) err = btf_parse_str_sec(btf); err = err ?: btf_parse_type_sec(btf); + err = err ?: btf_sanity_check(btf); if (err) goto done; diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index 9d0296c172..2a62bf411b 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #include <libelf.h> #include <gelf.h> #include <fcntl.h> @@ -10,6 +13,17 @@ #define STRERR_BUFSIZE 128 +/* A SHT_GNU_versym section holds 16-bit words. This bit is set if + * the symbol is hidden and can only be seen when referenced using an + * explicit version number. This is a GNU extension. + */ +#define VERSYM_HIDDEN 0x8000 + +/* This is the mask for the rest of the data in a word read from a + * SHT_GNU_versym section. + */ +#define VERSYM_VERSION 0x7fff + int elf_open(const char *binary_path, struct elf_fd *elf_fd) { char errmsg[STRERR_BUFSIZE]; @@ -64,13 +78,18 @@ struct elf_sym { const char *name; GElf_Sym sym; GElf_Shdr sh; + int ver; + bool hidden; }; struct elf_sym_iter { Elf *elf; Elf_Data *syms; + Elf_Data *versyms; + Elf_Data *verdefs; size_t nr_syms; size_t strtabidx; + size_t verdef_strtabidx; size_t next_sym_idx; struct elf_sym sym; int st_type; @@ -111,6 +130,27 @@ static int elf_sym_iter_new(struct elf_sym_iter *iter, iter->nr_syms = iter->syms->d_size / sh.sh_entsize; iter->elf = elf; iter->st_type = st_type; + + /* Version symbol table is meaningful to dynsym only */ + if (sh_type != SHT_DYNSYM) + return 0; + + scn = elf_find_next_scn_by_type(elf, SHT_GNU_versym, NULL); + if (!scn) + return 0; + iter->versyms = elf_getdata(scn, 0); + + scn = elf_find_next_scn_by_type(elf, SHT_GNU_verdef, NULL); + if (!scn) + return 0; + + iter->verdefs = elf_getdata(scn, 0); + if (!iter->verdefs || !gelf_getshdr(scn, &sh)) { + pr_warn("elf: failed to get verdef ELF section in '%s'\n", binary_path); + return -EINVAL; + } + iter->verdef_strtabidx = sh.sh_link; + return 0; } @@ -119,6 +159,7 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) struct elf_sym *ret = &iter->sym; GElf_Sym *sym = &ret->sym; const char *name = NULL; + GElf_Versym versym; Elf_Scn *sym_scn; size_t idx; @@ -138,12 +179,83 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) iter->next_sym_idx = idx + 1; ret->name = name; + ret->ver = 0; + ret->hidden = false; + + if (iter->versyms) { + if (!gelf_getversym(iter->versyms, idx, &versym)) + continue; + ret->ver = versym & VERSYM_VERSION; + ret->hidden = versym & VERSYM_HIDDEN; + } return ret; } return NULL; } +static const char *elf_get_vername(struct elf_sym_iter *iter, int ver) +{ + GElf_Verdaux verdaux; + GElf_Verdef verdef; + int offset; + + if (!iter->verdefs) + return NULL; + + offset = 0; + while (gelf_getverdef(iter->verdefs, offset, &verdef)) { + if (verdef.vd_ndx != ver) { + if (!verdef.vd_next) + break; + + offset += verdef.vd_next; + continue; + } + + if (!gelf_getverdaux(iter->verdefs, offset + verdef.vd_aux, &verdaux)) + break; + + return elf_strptr(iter->elf, iter->verdef_strtabidx, verdaux.vda_name); + + } + return NULL; +} + +static bool symbol_match(struct elf_sym_iter *iter, int sh_type, struct elf_sym *sym, + const char *name, size_t name_len, const char *lib_ver) +{ + const char *ver_name; + + /* Symbols are in forms of func, func@LIB_VER or func@@LIB_VER + * make sure the func part matches the user specified name + */ + if (strncmp(sym->name, name, name_len) != 0) + return false; + + /* ...but we don't want a search for "foo" to match 'foo2" also, so any + * additional characters in sname should be of the form "@@LIB". + */ + if (sym->name[name_len] != '\0' && sym->name[name_len] != '@') + return false; + + /* If user does not specify symbol version, then we got a match */ + if (!lib_ver) + return true; + + /* If user specifies symbol version, for dynamic symbols, + * get version name from ELF verdef section for comparison. + */ + if (sh_type == SHT_DYNSYM) { + ver_name = elf_get_vername(iter, sym->ver); + if (!ver_name) + return false; + return strcmp(ver_name, lib_ver) == 0; + } + + /* For normal symbols, it is already in form of func@LIB_VER */ + return strcmp(sym->name, name) == 0; +} /* Transform symbol's virtual address (absolute for binaries and relative * for shared libs) into file offset, which is what kernel is expecting @@ -166,7 +278,8 @@ static unsigned long elf_sym_offset(struct elf_sym *sym) long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) { int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; - bool is_shared_lib, is_name_qualified; + const char *at_symbol, *lib_ver; + bool is_shared_lib; long ret = -ENOENT; size_t name_len; GElf_Ehdr ehdr; @@ -179,9 +292,18 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) /* for shared lib case, we do not need to calculate relative offset */ is_shared_lib = ehdr.e_type == ET_DYN; - name_len = strlen(name); - /* Does name specify "@@LIB"? */ - is_name_qualified = strstr(name, "@@") != NULL; + /* Does name specify "@@LIB_VER" or "@LIB_VER" ? */ + at_symbol = strchr(name, '@'); + if (at_symbol) { + name_len = at_symbol - name; + /* skip second @ if it's @@LIB_VER case */ + if (at_symbol[1] == '@') + at_symbol++; + lib_ver = at_symbol + 1; + } else { + name_len = strlen(name); + lib_ver = NULL; + } /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically @@ -201,20 +323,17 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) goto out; while ((sym = elf_sym_iter_next(&iter))) { - /* User can specify func, func@@LIB or func@@LIB_VERSION. */ - if (strncmp(sym->name, name, name_len) != 0) - continue; - /* ...but we don't want a search for "foo" to match 'foo2" also, so any - * additional characters in sname should be of the form "@@LIB". - */ - if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@') + if (!symbol_match(&iter, sh_types[i], sym, name, name_len, lib_ver)) continue; cur_bind = GELF_ST_BIND(sym->sym.st_info); if (ret > 0) { /* handle multiple matches */ - if (last_bind != STB_WEAK && cur_bind != STB_WEAK) { + if (elf_sym_offset(sym) == ret) { + /* same offset, no problem */ + continue; + } else if (last_bind != STB_WEAK && cur_bind != STB_WEAK) { /* Only accept one non-weak bind. */ pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", sym->name, name, binary_path); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 96ff1aa4bf..df1b550f74 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -82,17 +82,22 @@ static const char * const attach_type_name[] = { [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind", [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect", [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect", + [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect", [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind", [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind", [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername", [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername", + [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername", [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname", [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname", + [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname", [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg", [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg", + [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg", [BPF_CGROUP_SYSCTL] = "cgroup_sysctl", [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg", [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg", + [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg", [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt", [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt", [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", @@ -121,6 +126,8 @@ static const char * const attach_type_name[] = { [BPF_TCX_INGRESS] = "tcx_ingress", [BPF_TCX_EGRESS] = "tcx_egress", [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", + [BPF_NETKIT_PRIMARY] = "netkit_primary", + [BPF_NETKIT_PEER] = "netkit_peer", }; static const char * const link_type_name[] = { @@ -137,6 +144,7 @@ static const char * const link_type_name[] = { [BPF_LINK_TYPE_NETFILTER] = "netfilter", [BPF_LINK_TYPE_TCX] = "tcx", [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi", + [BPF_LINK_TYPE_NETKIT] = "netkit", }; static const char * const map_type_name[] = { @@ -436,9 +444,11 @@ struct bpf_program { int fd; bool autoload; bool autoattach; + bool sym_global; bool mark_btf_static; enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; + int exception_cb_idx; int prog_ifindex; __u32 attach_btf_obj_fd; @@ -765,6 +775,7 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->type = BPF_PROG_TYPE_UNSPEC; prog->fd = -1; + prog->exception_cb_idx = -1; /* libbpf's convention for SEC("?abc...") is that it's just like * SEC("abc...") but the corresponding bpf_program starts out with @@ -871,14 +882,16 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, if (err) return err; + if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL) + prog->sym_global = true; + /* if function is a global/weak symbol, but has restricted * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC * as static to enable more permissive BPF verification mode * with more outside context available to BPF verifier */ - if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL - && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN - || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) + if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN + || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)) prog->mark_btf_static = true; nr_progs++; @@ -3142,6 +3155,86 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) } } + if (!kernel_supports(obj, FEAT_BTF_DECL_TAG)) + goto skip_exception_cb; + for (i = 0; i < obj->nr_programs; i++) { + struct bpf_program *prog = &obj->programs[i]; + int j, k, n; + + if (prog_is_subprog(obj, prog)) + continue; + n = btf__type_cnt(obj->btf); + for (j = 1; j < n; j++) { + const char *str = "exception_callback:", *name; + size_t len = strlen(str); + struct btf_type *t; + + t = btf_type_by_id(obj->btf, j); + if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1) + continue; + + name = btf__str_by_offset(obj->btf, t->name_off); + if (strncmp(name, str, len)) + continue; + + t = btf_type_by_id(obj->btf, t->type); + if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) { + pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n", + prog->name); + return -EINVAL; + } + if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off))) + continue; + /* Multiple callbacks are specified for the same prog, + * the verifier will eventually return an error for this + * case, hence simply skip appending a subprog. + */ + if (prog->exception_cb_idx >= 0) { + prog->exception_cb_idx = -1; + break; + } + + name += len; + if (str_is_empty(name)) { + pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n", + prog->name); + return -EINVAL; + } + + for (k = 0; k < obj->nr_programs; k++) { + struct bpf_program *subprog = &obj->programs[k]; + + if (!prog_is_subprog(obj, subprog)) + continue; + if (strcmp(name, subprog->name)) + continue; + /* Enforce non-hidden, as from verifier point of + * view it expects global functions, whereas the + * mark_btf_static fixes up linkage as static. + */ + if (!subprog->sym_global || subprog->mark_btf_static) { + pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n", + prog->name, subprog->name); + return -EINVAL; + } + /* Let's see if we already saw a static exception callback with the same name */ + if (prog->exception_cb_idx >= 0) { + pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n", + prog->name, subprog->name); + return -EINVAL; + } + prog->exception_cb_idx = k; + break; + } + + if (prog->exception_cb_idx >= 0) + continue; + pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name); + return -ENOENT; + } + } +skip_exception_cb: + sanitize = btf_needs_sanitization(obj); if (sanitize) { const void *raw_data; @@ -4251,6 +4344,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat scn = elf_sec_by_idx(obj, sec_idx); scn_data = elf_sec_data(obj, scn); + if (!scn_data) + return -LIBBPF_ERRNO__FORMAT; relo_sec_name = elf_sec_str(obj, shdr->sh_name); sec_name = elf_sec_name(obj, scn); @@ -6235,13 +6330,45 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra } static int +bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog, + struct bpf_program *subprog) +{ + struct bpf_insn *insns; + size_t new_cnt; + int err; + + subprog->sub_insn_off = main_prog->insns_cnt; + + new_cnt = main_prog->insns_cnt + subprog->insns_cnt; + insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); + if (!insns) { + pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); + return -ENOMEM; + } + main_prog->insns = insns; + main_prog->insns_cnt = new_cnt; + + memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, + subprog->insns_cnt * sizeof(*insns)); + + pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", + main_prog->name, subprog->insns_cnt, subprog->name); + + /* The subprog insns are now appended. Append its relos too. */ + err = append_subprog_relos(main_prog, subprog); + if (err) + return err; + return 0; +} + +static int bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, struct bpf_program *prog) { - size_t sub_insn_idx, insn_idx, new_cnt; + size_t sub_insn_idx, insn_idx; struct bpf_program *subprog; - struct bpf_insn *insns, *insn; struct reloc_desc *relo; + struct bpf_insn *insn; int err; err = reloc_prog_func_and_line_info(obj, main_prog, prog); @@ -6316,25 +6443,7 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog, * and relocate. */ if (subprog->sub_insn_off == 0) { - subprog->sub_insn_off = main_prog->insns_cnt; - - new_cnt = main_prog->insns_cnt + subprog->insns_cnt; - insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns)); - if (!insns) { - pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name); - return -ENOMEM; - } - main_prog->insns = insns; - main_prog->insns_cnt = new_cnt; - - memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns, - subprog->insns_cnt * sizeof(*insns)); - - pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n", - main_prog->name, subprog->insns_cnt, subprog->name); - - /* The subprog insns are now appended. Append its relos too. */ - err = append_subprog_relos(main_prog, subprog); + err = bpf_object__append_subprog_code(obj, main_prog, subprog); if (err) return err; err = bpf_object__reloc_code(obj, main_prog, subprog); @@ -6568,6 +6677,25 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) prog->name, err); return err; } + + /* Now, also append exception callback if it has not been done already. */ + if (prog->exception_cb_idx >= 0) { + struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx]; + + /* Calling exception callback directly is disallowed, which the + * verifier will reject later. In case it was processed already, + * we can skip this step, otherwise for all other valid cases we + * have to append exception callback now. + */ + if (subprog->sub_insn_off == 0) { + err = bpf_object__append_subprog_code(obj, prog, subprog); + if (err) + return err; + err = bpf_object__reloc_code(obj, prog, subprog); + if (err) + return err; + } + } } /* Process data relos for main programs */ for (i = 0; i < obj->nr_programs; i++) { @@ -8792,6 +8920,8 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */ SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */ + SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE), + SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE), SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp), SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp), SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp), @@ -8842,14 +8972,19 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE), SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE), SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE), + SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE), SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE), + SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE), SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE), + SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE), SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE), SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE), SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE), @@ -10996,7 +11131,7 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru *link = NULL; - n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%ms", + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", &probe_type, &binary_path, &func_name); switch (n) { case 1: @@ -11506,14 +11641,14 @@ err_out: static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); - char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; - int n, ret = -EINVAL; + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off; + int n, c, ret = -EINVAL; long offset = 0; *link = NULL; - n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li", - &probe_type, &binary_path, &func_name, &offset); + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]", + &probe_type, &binary_path, &func_name); switch (n) { case 1: /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ @@ -11524,7 +11659,17 @@ static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf prog->name, prog->sec_name); break; case 3: - case 4: + /* check if user specifies `+offset`, if yes, this should be + * the last part of the string, make sure sscanf read to EOL + */ + func_off = strrchr(func_name, '+'); + if (func_off) { + n = sscanf(func_off, "+%li%n", &offset, &c); + if (n == 1 && *(func_off + c) == '\0') + func_off[0] = '\0'; + else + offset = 0; + } opts.retprobe = strcmp(probe_type, "uretprobe") == 0 || strcmp(probe_type, "uretprobe.s") == 0; if (opts.retprobe && offset != 0) { @@ -11988,6 +12133,40 @@ bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts); } +struct bpf_link * +bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, + const struct bpf_netkit_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); + __u32 relative_id; + int relative_fd; + + if (!OPTS_VALID(opts, bpf_netkit_opts)) + return libbpf_err_ptr(-EINVAL); + + relative_id = OPTS_GET(opts, relative_id, 0); + relative_fd = OPTS_GET(opts, relative_fd, 0); + + /* validate we don't have unexpected combinations of non-zero fields */ + if (!ifindex) { + pr_warn("prog '%s': target netdevice ifindex cannot be zero\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + if (relative_fd && relative_id) { + pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0); + link_create_opts.netkit.relative_fd = relative_fd; + link_create_opts.netkit.relative_id = relative_id; + link_create_opts.flags = OPTS_GET(opts, flags, 0); + + return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts); +} + struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, int target_fd, const char *attach_func_name) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 0e52621cba..6cd9c50162 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -800,6 +800,21 @@ LIBBPF_API struct bpf_link * bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, const struct bpf_tcx_opts *opts); +struct bpf_netkit_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + __u32 flags; + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + size_t :0; +}; +#define bpf_netkit_opts__last_field expected_revision + +LIBBPF_API struct bpf_link * +bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, + const struct bpf_netkit_opts *opts); + struct bpf_map; LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map); @@ -1229,6 +1244,7 @@ LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook, /* Ring buffer APIs */ struct ring_buffer; +struct ring; struct user_ring_buffer; typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size); @@ -1249,6 +1265,78 @@ LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms); LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb); LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb); +/** + * @brief **ring_buffer__ring()** returns the ringbuffer object inside a given + * ringbuffer manager representing a single BPF_MAP_TYPE_RINGBUF map instance. + * + * @param rb A ringbuffer manager object. + * @param idx An index into the ringbuffers contained within the ringbuffer + * manager object. The index is 0-based and corresponds to the order in which + * ring_buffer__add was called. + * @return A ringbuffer object on success; NULL and errno set if the index is + * invalid. + */ +LIBBPF_API struct ring *ring_buffer__ring(struct ring_buffer *rb, + unsigned int idx); + +/** + * @brief **ring__consumer_pos()** returns the current consumer position in the + * given ringbuffer. + * + * @param r A ringbuffer object. + * @return The current consumer position. + */ +LIBBPF_API unsigned long ring__consumer_pos(const struct ring *r); + +/** + * @brief **ring__producer_pos()** returns the current producer position in the + * given ringbuffer. + * + * @param r A ringbuffer object. + * @return The current producer position. + */ +LIBBPF_API unsigned long ring__producer_pos(const struct ring *r); + +/** + * @brief **ring__avail_data_size()** returns the number of bytes in the + * ringbuffer not yet consumed. This has no locking associated with it, so it + * can be inaccurate if operations are ongoing while this is called. However, it + * should still show the correct trend over the long-term. + * + * @param r A ringbuffer object. + * @return The number of bytes not yet consumed. + */ +LIBBPF_API size_t ring__avail_data_size(const struct ring *r); + +/** + * @brief **ring__size()** returns the total size of the ringbuffer's map data + * area (excluding special producer/consumer pages). Effectively this gives the + * amount of usable bytes of data inside the ringbuffer. + * + * @param r A ringbuffer object. + * @return The total size of the ringbuffer map data area. + */ +LIBBPF_API size_t ring__size(const struct ring *r); + +/** + * @brief **ring__map_fd()** returns the file descriptor underlying the given + * ringbuffer. + * + * @param r A ringbuffer object. + * @return The underlying ringbuffer file descriptor + */ +LIBBPF_API int ring__map_fd(const struct ring *r); + +/** + * @brief **ring__consume()** consumes available ringbuffer data without event + * polling. + * + * @param r A ringbuffer object. + * @return The number of records consumed (or INT_MAX, whichever is less), or + * a negative number if any of the callbacks return an error. + */ +LIBBPF_API int ring__consume(struct ring *r); + struct user_ring_buffer_opts { size_t sz; /* size of this struct, for forward/backward compatibility */ }; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 5771232149..b52dc28dc8 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -398,6 +398,14 @@ LIBBPF_1.3.0 { bpf_object__unpin; bpf_prog_detach_opts; bpf_program__attach_netfilter; + bpf_program__attach_netkit; bpf_program__attach_tcx; bpf_program__attach_uprobe_multi; + ring__avail_data_size; + ring__consume; + ring__consumer_pos; + ring__map_fd; + ring__producer_pos; + ring__size; + ring_buffer__ring; } LIBBPF_1.2.0; diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h index b7060f2544..8fe248e14e 100644 --- a/tools/lib/bpf/libbpf_common.h +++ b/tools/lib/bpf/libbpf_common.h @@ -79,11 +79,14 @@ */ #define LIBBPF_OPTS_RESET(NAME, ...) \ do { \ - memset(&NAME, 0, sizeof(NAME)); \ - NAME = (typeof(NAME)) { \ - .sz = sizeof(NAME), \ - __VA_ARGS__ \ - }; \ + typeof(NAME) ___##NAME = ({ \ + memset(&___##NAME, 0, sizeof(NAME)); \ + (typeof(NAME)) { \ + .sz = sizeof(NAME), \ + __VA_ARGS__ \ + }; \ + }); \ + memcpy(&NAME, &___##NAME, sizeof(NAME)); \ } while (0) #endif /* __LIBBPF_LIBBPF_COMMON_H */ diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 02199364db..aacb64278a 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -34,7 +34,7 @@ struct ring { struct ring_buffer { struct epoll_event *events; - struct ring *rings; + struct ring **rings; size_t page_size; int epoll_fd; int ring_cnt; @@ -57,7 +57,7 @@ struct ringbuf_hdr { __u32 pad; }; -static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r) +static void ringbuf_free_ring(struct ring_buffer *rb, struct ring *r) { if (r->consumer_pos) { munmap(r->consumer_pos, rb->page_size); @@ -67,6 +67,8 @@ static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r) munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1)); r->producer_pos = NULL; } + + free(r); } /* Add extra RINGBUF maps to this ring buffer manager */ @@ -107,8 +109,10 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, return libbpf_err(-ENOMEM); rb->events = tmp; - r = &rb->rings[rb->ring_cnt]; - memset(r, 0, sizeof(*r)); + r = calloc(1, sizeof(*r)); + if (!r) + return libbpf_err(-ENOMEM); + rb->rings[rb->ring_cnt] = r; r->map_fd = map_fd; r->sample_cb = sample_cb; @@ -121,7 +125,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, err = -errno; pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", map_fd, err); - return libbpf_err(err); + goto err_out; } r->consumer_pos = tmp; @@ -131,16 +135,16 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, */ mmap_sz = rb->page_size + 2 * (__u64)info.max_entries; if (mmap_sz != (__u64)(size_t)mmap_sz) { + err = -E2BIG; pr_warn("ringbuf: ring buffer size (%u) is too big\n", info.max_entries); - return libbpf_err(-E2BIG); + goto err_out; } tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; - ringbuf_unmap_ring(rb, r); pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n", map_fd, err); - return libbpf_err(err); + goto err_out; } r->producer_pos = tmp; r->data = tmp + rb->page_size; @@ -152,14 +156,17 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, e->data.fd = rb->ring_cnt; if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) { err = -errno; - ringbuf_unmap_ring(rb, r); pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err); - return libbpf_err(err); + goto err_out; } rb->ring_cnt++; return 0; + +err_out: + ringbuf_free_ring(rb, r); + return libbpf_err(err); } void ring_buffer__free(struct ring_buffer *rb) @@ -170,7 +177,7 @@ void ring_buffer__free(struct ring_buffer *rb) return; for (i = 0; i < rb->ring_cnt; ++i) - ringbuf_unmap_ring(rb, &rb->rings[i]); + ringbuf_free_ring(rb, rb->rings[i]); if (rb->epoll_fd >= 0) close(rb->epoll_fd); @@ -278,7 +285,7 @@ int ring_buffer__consume(struct ring_buffer *rb) int i; for (i = 0; i < rb->ring_cnt; i++) { - struct ring *ring = &rb->rings[i]; + struct ring *ring = rb->rings[i]; err = ringbuf_process_ring(ring); if (err < 0) @@ -305,7 +312,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms) for (i = 0; i < cnt; i++) { __u32 ring_id = rb->events[i].data.fd; - struct ring *ring = &rb->rings[ring_id]; + struct ring *ring = rb->rings[ring_id]; err = ringbuf_process_ring(ring); if (err < 0) @@ -323,6 +330,58 @@ int ring_buffer__epoll_fd(const struct ring_buffer *rb) return rb->epoll_fd; } +struct ring *ring_buffer__ring(struct ring_buffer *rb, unsigned int idx) +{ + if (idx >= rb->ring_cnt) + return errno = ERANGE, NULL; + + return rb->rings[idx]; +} + +unsigned long ring__consumer_pos(const struct ring *r) +{ + /* Synchronizes with smp_store_release() in ringbuf_process_ring(). */ + return smp_load_acquire(r->consumer_pos); +} + +unsigned long ring__producer_pos(const struct ring *r) +{ + /* Synchronizes with smp_store_release() in __bpf_ringbuf_reserve() in + * the kernel. + */ + return smp_load_acquire(r->producer_pos); +} + +size_t ring__avail_data_size(const struct ring *r) +{ + unsigned long cons_pos, prod_pos; + + cons_pos = ring__consumer_pos(r); + prod_pos = ring__producer_pos(r); + return prod_pos - cons_pos; +} + +size_t ring__size(const struct ring *r) +{ + return r->mask + 1; +} + +int ring__map_fd(const struct ring *r) +{ + return r->map_fd; +} + +int ring__consume(struct ring *r) +{ + int64_t res; + + res = ringbuf_process_ring(r); + if (res < 0) + return libbpf_err(res); + + return res > INT_MAX ? INT_MAX : res; +} + static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb) { if (rb->consumer_pos) { |