summaryrefslogtreecommitdiffstats
path: root/src/contrib/libbpf
diff options
context:
space:
mode:
Diffstat (limited to 'src/contrib/libbpf')
-rw-r--r--src/contrib/libbpf/LICENSE1
-rw-r--r--src/contrib/libbpf/bpf/bpf.c710
-rw-r--r--src/contrib/libbpf/bpf/bpf.h184
-rw-r--r--src/contrib/libbpf/bpf/bpf_core_read.h263
-rw-r--r--src/contrib/libbpf/bpf/bpf_endian.h72
-rw-r--r--src/contrib/libbpf/bpf/bpf_helper_defs.h2759
-rw-r--r--src/contrib/libbpf/bpf/bpf_helpers.h47
-rw-r--r--src/contrib/libbpf/bpf/bpf_prog_linfo.c246
-rw-r--r--src/contrib/libbpf/bpf/bpf_tracing.h195
-rw-r--r--src/contrib/libbpf/bpf/btf.c2884
-rw-r--r--src/contrib/libbpf/bpf/btf.h311
-rw-r--r--src/contrib/libbpf/bpf/btf_dump.c1386
-rw-r--r--src/contrib/libbpf/bpf/hashmap.c229
-rw-r--r--src/contrib/libbpf/bpf/hashmap.h178
-rw-r--r--src/contrib/libbpf/bpf/libbpf.c6581
-rw-r--r--src/contrib/libbpf/bpf/libbpf.h637
-rw-r--r--src/contrib/libbpf/bpf/libbpf_errno.c63
-rw-r--r--src/contrib/libbpf/bpf/libbpf_internal.h217
-rw-r--r--src/contrib/libbpf/bpf/libbpf_probes.c323
-rw-r--r--src/contrib/libbpf/bpf/libbpf_util.h47
-rw-r--r--src/contrib/libbpf/bpf/netlink.c451
-rw-r--r--src/contrib/libbpf/bpf/nlattr.c195
-rw-r--r--src/contrib/libbpf/bpf/nlattr.h106
-rw-r--r--src/contrib/libbpf/bpf/str_error.c18
-rw-r--r--src/contrib/libbpf/bpf/str_error.h6
-rw-r--r--src/contrib/libbpf/bpf/xsk.c797
-rw-r--r--src/contrib/libbpf/bpf/xsk.h246
-rw-r--r--src/contrib/libbpf/include/asm/barrier.h7
-rw-r--r--src/contrib/libbpf/include/linux/compiler.h70
-rw-r--r--src/contrib/libbpf/include/linux/err.h38
-rw-r--r--src/contrib/libbpf/include/linux/filter.h118
-rw-r--r--src/contrib/libbpf/include/linux/kernel.h44
-rw-r--r--src/contrib/libbpf/include/linux/list.h82
-rw-r--r--src/contrib/libbpf/include/linux/overflow.h90
-rw-r--r--src/contrib/libbpf/include/linux/ring_buffer.h18
-rw-r--r--src/contrib/libbpf/include/linux/types.h31
-rw-r--r--src/contrib/libbpf/include/uapi/linux/bpf.h3692
-rw-r--r--src/contrib/libbpf/include/uapi/linux/bpf_common.h57
-rw-r--r--src/contrib/libbpf/include/uapi/linux/btf.h165
-rw-r--r--src/contrib/libbpf/include/uapi/linux/if_link.h1033
-rw-r--r--src/contrib/libbpf/include/uapi/linux/if_xdp.h108
-rw-r--r--src/contrib/libbpf/include/uapi/linux/netlink.h252
42 files changed, 24957 insertions, 0 deletions
diff --git a/src/contrib/libbpf/LICENSE b/src/contrib/libbpf/LICENSE
new file mode 100644
index 0000000..149c7b0
--- /dev/null
+++ b/src/contrib/libbpf/LICENSE
@@ -0,0 +1 @@
+../licenses/LGPL-2.1 \ No newline at end of file
diff --git a/src/contrib/libbpf/bpf/bpf.c b/src/contrib/libbpf/bpf/bpf.c
new file mode 100644
index 0000000..98596e1
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf.c
@@ -0,0 +1,710 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * common eBPF ELF operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <errno.h>
+#include <linux/bpf.h>
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+/*
+ * When building perf, unistd.h is overridden. __NR_bpf is
+ * required to be defined explicitly.
+ */
+#ifndef __NR_bpf
+# if defined(__i386__)
+# define __NR_bpf 357
+# elif defined(__x86_64__)
+# define __NR_bpf 321
+# elif defined(__aarch64__)
+# define __NR_bpf 280
+# elif defined(__sparc__)
+# define __NR_bpf 349
+# elif defined(__s390__)
+# define __NR_bpf 351
+# elif defined(__arc__)
+# define __NR_bpf 280
+# else
+# error __NR_bpf not defined. libbpf does not support your arch.
+# endif
+#endif
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
+ unsigned int size)
+{
+ return syscall(__NR_bpf, cmd, attr, size);
+}
+
+static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
+{
+ int fd;
+
+ do {
+ fd = sys_bpf(BPF_PROG_LOAD, attr, size);
+ } while (fd < 0 && errno == EAGAIN);
+
+ return fd;
+}
+
+int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
+{
+ union bpf_attr attr;
+
+ memset(&attr, '\0', sizeof(attr));
+
+ attr.map_type = create_attr->map_type;
+ attr.key_size = create_attr->key_size;
+ attr.value_size = create_attr->value_size;
+ attr.max_entries = create_attr->max_entries;
+ attr.map_flags = create_attr->map_flags;
+ if (create_attr->name)
+ memcpy(attr.map_name, create_attr->name,
+ min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1));
+ attr.numa_node = create_attr->numa_node;
+ attr.btf_fd = create_attr->btf_fd;
+ attr.btf_key_type_id = create_attr->btf_key_type_id;
+ attr.btf_value_type_id = create_attr->btf_value_type_id;
+ attr.map_ifindex = create_attr->map_ifindex;
+ attr.inner_map_fd = create_attr->inner_map_fd;
+
+ return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size, int max_entries,
+ __u32 map_flags, int node)
+{
+ struct bpf_create_map_attr map_attr = {};
+
+ map_attr.name = name;
+ map_attr.map_type = map_type;
+ map_attr.map_flags = map_flags;
+ map_attr.key_size = key_size;
+ map_attr.value_size = value_size;
+ map_attr.max_entries = max_entries;
+ if (node >= 0) {
+ map_attr.numa_node = node;
+ map_attr.map_flags |= BPF_F_NUMA_NODE;
+ }
+
+ return bpf_create_map_xattr(&map_attr);
+}
+
+int bpf_create_map(enum bpf_map_type map_type, int key_size,
+ int value_size, int max_entries, __u32 map_flags)
+{
+ struct bpf_create_map_attr map_attr = {};
+
+ map_attr.map_type = map_type;
+ map_attr.map_flags = map_flags;
+ map_attr.key_size = key_size;
+ map_attr.value_size = value_size;
+ map_attr.max_entries = max_entries;
+
+ return bpf_create_map_xattr(&map_attr);
+}
+
+int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size, int max_entries,
+ __u32 map_flags)
+{
+ struct bpf_create_map_attr map_attr = {};
+
+ map_attr.name = name;
+ map_attr.map_type = map_type;
+ map_attr.map_flags = map_flags;
+ map_attr.key_size = key_size;
+ map_attr.value_size = value_size;
+ map_attr.max_entries = max_entries;
+
+ return bpf_create_map_xattr(&map_attr);
+}
+
+int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
+ int key_size, int inner_map_fd, int max_entries,
+ __u32 map_flags, int node)
+{
+ union bpf_attr attr;
+
+ memset(&attr, '\0', sizeof(attr));
+
+ attr.map_type = map_type;
+ attr.key_size = key_size;
+ attr.value_size = 4;
+ attr.inner_map_fd = inner_map_fd;
+ attr.max_entries = max_entries;
+ attr.map_flags = map_flags;
+ if (name)
+ memcpy(attr.map_name, name,
+ min(strlen(name), BPF_OBJ_NAME_LEN - 1));
+
+ if (node >= 0) {
+ attr.map_flags |= BPF_F_NUMA_NODE;
+ attr.numa_node = node;
+ }
+
+ return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
+ int key_size, int inner_map_fd, int max_entries,
+ __u32 map_flags)
+{
+ return bpf_create_map_in_map_node(map_type, name, key_size,
+ inner_map_fd, max_entries, map_flags,
+ -1);
+}
+
+static void *
+alloc_zero_tailing_info(const void *orecord, __u32 cnt,
+ __u32 actual_rec_size, __u32 expected_rec_size)
+{
+ __u64 info_len = (__u64)actual_rec_size * cnt;
+ void *info, *nrecord;
+ int i;
+
+ info = malloc(info_len);
+ if (!info)
+ return NULL;
+
+ /* zero out bytes kernel does not understand */
+ nrecord = info;
+ for (i = 0; i < cnt; i++) {
+ memcpy(nrecord, orecord, expected_rec_size);
+ memset(nrecord + expected_rec_size, 0,
+ actual_rec_size - expected_rec_size);
+ orecord += actual_rec_size;
+ nrecord += actual_rec_size;
+ }
+
+ return info;
+}
+
+int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+ char *log_buf, size_t log_buf_sz)
+{
+ void *finfo = NULL, *linfo = NULL;
+ union bpf_attr attr;
+ __u32 log_level;
+ int fd;
+
+ if (!load_attr || !log_buf != !log_buf_sz)
+ return -EINVAL;
+
+ log_level = load_attr->log_level;
+ if (log_level > (4 | 2 | 1) || (log_level && !log_buf))
+ return -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = load_attr->prog_type;
+ attr.expected_attach_type = load_attr->expected_attach_type;
+ if (attr.prog_type == BPF_PROG_TYPE_TRACING) {
+ attr.attach_btf_id = load_attr->attach_btf_id;
+ attr.attach_prog_fd = load_attr->attach_prog_fd;
+ } else {
+ attr.prog_ifindex = load_attr->prog_ifindex;
+ attr.kern_version = load_attr->kern_version;
+ }
+ attr.insn_cnt = (__u32)load_attr->insns_cnt;
+ attr.insns = ptr_to_u64(load_attr->insns);
+ attr.license = ptr_to_u64(load_attr->license);
+
+ attr.log_level = log_level;
+ if (log_level) {
+ attr.log_buf = ptr_to_u64(log_buf);
+ attr.log_size = log_buf_sz;
+ } else {
+ attr.log_buf = ptr_to_u64(NULL);
+ attr.log_size = 0;
+ }
+
+ attr.prog_btf_fd = load_attr->prog_btf_fd;
+ attr.func_info_rec_size = load_attr->func_info_rec_size;
+ attr.func_info_cnt = load_attr->func_info_cnt;
+ attr.func_info = ptr_to_u64(load_attr->func_info);
+ attr.line_info_rec_size = load_attr->line_info_rec_size;
+ attr.line_info_cnt = load_attr->line_info_cnt;
+ attr.line_info = ptr_to_u64(load_attr->line_info);
+ if (load_attr->name)
+ memcpy(attr.prog_name, load_attr->name,
+ min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
+ attr.prog_flags = load_attr->prog_flags;
+
+ fd = sys_bpf_prog_load(&attr, sizeof(attr));
+ if (fd >= 0)
+ return fd;
+
+ /* After bpf_prog_load, the kernel may modify certain attributes
+ * to give user space a hint how to deal with loading failure.
+ * Check to see whether we can make some changes and load again.
+ */
+ while (errno == E2BIG && (!finfo || !linfo)) {
+ if (!finfo && attr.func_info_cnt &&
+ attr.func_info_rec_size < load_attr->func_info_rec_size) {
+ /* try with corrected func info records */
+ finfo = alloc_zero_tailing_info(load_attr->func_info,
+ load_attr->func_info_cnt,
+ load_attr->func_info_rec_size,
+ attr.func_info_rec_size);
+ if (!finfo)
+ goto done;
+
+ attr.func_info = ptr_to_u64(finfo);
+ attr.func_info_rec_size = load_attr->func_info_rec_size;
+ } else if (!linfo && attr.line_info_cnt &&
+ attr.line_info_rec_size <
+ load_attr->line_info_rec_size) {
+ linfo = alloc_zero_tailing_info(load_attr->line_info,
+ load_attr->line_info_cnt,
+ load_attr->line_info_rec_size,
+ attr.line_info_rec_size);
+ if (!linfo)
+ goto done;
+
+ attr.line_info = ptr_to_u64(linfo);
+ attr.line_info_rec_size = load_attr->line_info_rec_size;
+ } else {
+ break;
+ }
+
+ fd = sys_bpf_prog_load(&attr, sizeof(attr));
+
+ if (fd >= 0)
+ goto done;
+ }
+
+ if (log_level || !log_buf)
+ goto done;
+
+ /* Try again with log */
+ attr.log_buf = ptr_to_u64(log_buf);
+ attr.log_size = log_buf_sz;
+ attr.log_level = 1;
+ log_buf[0] = 0;
+ fd = sys_bpf_prog_load(&attr, sizeof(attr));
+done:
+ free(finfo);
+ free(linfo);
+ return fd;
+}
+
+int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t insns_cnt, const char *license,
+ __u32 kern_version, char *log_buf,
+ size_t log_buf_sz)
+{
+ struct bpf_load_program_attr load_attr;
+
+ memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+ load_attr.prog_type = type;
+ load_attr.expected_attach_type = 0;
+ load_attr.name = NULL;
+ load_attr.insns = insns;
+ load_attr.insns_cnt = insns_cnt;
+ load_attr.license = license;
+ load_attr.kern_version = kern_version;
+
+ return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
+}
+
+int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t insns_cnt, __u32 prog_flags, const char *license,
+ __u32 kern_version, char *log_buf, size_t log_buf_sz,
+ int log_level)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = type;
+ attr.insn_cnt = (__u32)insns_cnt;
+ attr.insns = ptr_to_u64(insns);
+ attr.license = ptr_to_u64(license);
+ attr.log_buf = ptr_to_u64(log_buf);
+ attr.log_size = log_buf_sz;
+ attr.log_level = log_level;
+ log_buf[0] = 0;
+ attr.kern_version = kern_version;
+ attr.prog_flags = prog_flags;
+
+ return sys_bpf_prog_load(&attr, sizeof(attr));
+}
+
+int bpf_map_update_elem(int fd, const void *key, const void *value,
+ __u64 flags)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+ attr.flags = flags;
+
+ return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_lookup_elem(int fd, const void *key, void *value)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+
+ return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+ attr.flags = flags;
+
+ return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+
+ return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_delete_elem(int fd, const void *key)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+
+ return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_get_next_key(int fd, const void *key, void *next_key)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.next_key = ptr_to_u64(next_key);
+
+ return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+}
+
+int bpf_map_freeze(int fd)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+
+ return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+}
+
+int bpf_obj_pin(int fd, const char *pathname)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.pathname = ptr_to_u64((void *)pathname);
+ attr.bpf_fd = fd;
+
+ return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+}
+
+int bpf_obj_get(const char *pathname)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.pathname = ptr_to_u64((void *)pathname);
+
+ return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+}
+
+int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
+ unsigned int flags)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.target_fd = target_fd;
+ attr.attach_bpf_fd = prog_fd;
+ attr.attach_type = type;
+ attr.attach_flags = flags;
+
+ return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.target_fd = target_fd;
+ attr.attach_type = type;
+
+ return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.target_fd = target_fd;
+ attr.attach_bpf_fd = prog_fd;
+ attr.attach_type = type;
+
+ return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
+ __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
+{
+ union bpf_attr attr;
+ int ret;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.query.target_fd = target_fd;
+ attr.query.attach_type = type;
+ attr.query.query_flags = query_flags;
+ attr.query.prog_cnt = *prog_cnt;
+ attr.query.prog_ids = ptr_to_u64(prog_ids);
+
+ ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
+ if (attach_flags)
+ *attach_flags = attr.query.attach_flags;
+ *prog_cnt = attr.query.prog_cnt;
+ return ret;
+}
+
+int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
+ void *data_out, __u32 *size_out, __u32 *retval,
+ __u32 *duration)
+{
+ union bpf_attr attr;
+ int ret;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.test.prog_fd = prog_fd;
+ attr.test.data_in = ptr_to_u64(data);
+ attr.test.data_out = ptr_to_u64(data_out);
+ attr.test.data_size_in = size;
+ attr.test.repeat = repeat;
+
+ ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+ if (size_out)
+ *size_out = attr.test.data_size_out;
+ if (retval)
+ *retval = attr.test.retval;
+ if (duration)
+ *duration = attr.test.duration;
+ return ret;
+}
+
+int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
+{
+ union bpf_attr attr;
+ int ret;
+
+ if (!test_attr->data_out && test_attr->data_size_out > 0)
+ return -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.test.prog_fd = test_attr->prog_fd;
+ attr.test.data_in = ptr_to_u64(test_attr->data_in);
+ attr.test.data_out = ptr_to_u64(test_attr->data_out);
+ attr.test.data_size_in = test_attr->data_size_in;
+ attr.test.data_size_out = test_attr->data_size_out;
+ attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in);
+ attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out);
+ attr.test.ctx_size_in = test_attr->ctx_size_in;
+ attr.test.ctx_size_out = test_attr->ctx_size_out;
+ attr.test.repeat = test_attr->repeat;
+
+ ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+ test_attr->data_size_out = attr.test.data_size_out;
+ test_attr->ctx_size_out = attr.test.ctx_size_out;
+ test_attr->retval = attr.test.retval;
+ test_attr->duration = attr.test.duration;
+ return ret;
+}
+
+static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
+{
+ union bpf_attr attr;
+ int err;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.start_id = start_id;
+
+ err = sys_bpf(cmd, &attr, sizeof(attr));
+ if (!err)
+ *next_id = attr.next_id;
+
+ return err;
+}
+
+int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
+{
+ return bpf_obj_get_next_id(start_id, next_id, BPF_PROG_GET_NEXT_ID);
+}
+
+int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
+{
+ return bpf_obj_get_next_id(start_id, next_id, BPF_MAP_GET_NEXT_ID);
+}
+
+int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id)
+{
+ return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID);
+}
+
+int bpf_prog_get_fd_by_id(__u32 id)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_id = id;
+
+ return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_map_get_fd_by_id(__u32 id)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_id = id;
+
+ return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_btf_get_fd_by_id(__u32 id)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.btf_id = id;
+
+ return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
+{
+ union bpf_attr attr;
+ int err;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.info.bpf_fd = prog_fd;
+ attr.info.info_len = *info_len;
+ attr.info.info = ptr_to_u64(info);
+
+ err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+ if (!err)
+ *info_len = attr.info.info_len;
+
+ return err;
+}
+
+int bpf_raw_tracepoint_open(const char *name, int prog_fd)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.raw_tracepoint.name = ptr_to_u64(name);
+ attr.raw_tracepoint.prog_fd = prog_fd;
+
+ return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+}
+
+int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
+ bool do_log)
+{
+ union bpf_attr attr = {};
+ int fd;
+
+ attr.btf = ptr_to_u64(btf);
+ attr.btf_size = btf_size;
+
+retry:
+ if (do_log && log_buf && log_buf_size) {
+ attr.btf_log_level = 1;
+ attr.btf_log_size = log_buf_size;
+ attr.btf_log_buf = ptr_to_u64(log_buf);
+ }
+
+ fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
+ if (fd == -1 && !do_log && log_buf && log_buf_size) {
+ do_log = true;
+ goto retry;
+ }
+
+ return fd;
+}
+
+int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
+ __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
+ __u64 *probe_addr)
+{
+ union bpf_attr attr = {};
+ int err;
+
+ attr.task_fd_query.pid = pid;
+ attr.task_fd_query.fd = fd;
+ attr.task_fd_query.flags = flags;
+ attr.task_fd_query.buf = ptr_to_u64(buf);
+ attr.task_fd_query.buf_len = *buf_len;
+
+ err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+ *buf_len = attr.task_fd_query.buf_len;
+ *prog_id = attr.task_fd_query.prog_id;
+ *fd_type = attr.task_fd_query.fd_type;
+ *probe_offset = attr.task_fd_query.probe_offset;
+ *probe_addr = attr.task_fd_query.probe_addr;
+
+ return err;
+}
diff --git a/src/contrib/libbpf/bpf/bpf.h b/src/contrib/libbpf/bpf/bpf.h
new file mode 100644
index 0000000..3c791fa
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * common eBPF ELF operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ */
+#ifndef __LIBBPF_BPF_H
+#define __LIBBPF_BPF_H
+
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef LIBBPF_API
+#define LIBBPF_API __attribute__((visibility("default")))
+#endif
+
+struct bpf_create_map_attr {
+ const char *name;
+ enum bpf_map_type map_type;
+ __u32 map_flags;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 numa_node;
+ __u32 btf_fd;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
+ __u32 map_ifindex;
+ __u32 inner_map_fd;
+};
+
+LIBBPF_API int
+bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
+LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size,
+ int max_entries, __u32 map_flags, int node);
+LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size,
+ int max_entries, __u32 map_flags);
+LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size,
+ int value_size, int max_entries, __u32 map_flags);
+LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type,
+ const char *name, int key_size,
+ int inner_map_fd, int max_entries,
+ __u32 map_flags, int node);
+LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type,
+ const char *name, int key_size,
+ int inner_map_fd, int max_entries,
+ __u32 map_flags);
+
+struct bpf_load_program_attr {
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ const char *name;
+ const struct bpf_insn *insns;
+ size_t insns_cnt;
+ const char *license;
+ union {
+ __u32 kern_version;
+ __u32 attach_prog_fd;
+ };
+ union {
+ __u32 prog_ifindex;
+ __u32 attach_btf_id;
+ };
+ __u32 prog_btf_fd;
+ __u32 func_info_rec_size;
+ const void *func_info;
+ __u32 func_info_cnt;
+ __u32 line_info_rec_size;
+ const void *line_info;
+ __u32 line_info_cnt;
+ __u32 log_level;
+ __u32 prog_flags;
+};
+
+/* Flags to direct loading requirements */
+#define MAPS_RELAX_COMPAT 0x01
+
+/* Recommend log buffer size */
+#define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */
+LIBBPF_API int
+bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+ char *log_buf, size_t log_buf_sz);
+LIBBPF_API int bpf_load_program(enum bpf_prog_type type,
+ const struct bpf_insn *insns, size_t insns_cnt,
+ const char *license, __u32 kern_version,
+ char *log_buf, size_t log_buf_sz);
+LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
+ const struct bpf_insn *insns,
+ size_t insns_cnt, __u32 prog_flags,
+ const char *license, __u32 kern_version,
+ char *log_buf, size_t log_buf_sz,
+ int log_level);
+
+LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,
+ __u64 flags);
+
+LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value);
+LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value,
+ __u64 flags);
+LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
+ void *value);
+LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
+LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
+LIBBPF_API int bpf_map_freeze(int fd);
+LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
+LIBBPF_API int bpf_obj_get(const char *pathname);
+LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
+ enum bpf_attach_type type, unsigned int flags);
+LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
+LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
+ enum bpf_attach_type type);
+
+struct bpf_prog_test_run_attr {
+ int prog_fd;
+ int repeat;
+ const void *data_in;
+ __u32 data_size_in;
+ void *data_out; /* optional */
+ __u32 data_size_out; /* in: max length of data_out
+ * out: length of data_out */
+ __u32 retval; /* out: return code of the BPF program */
+ __u32 duration; /* out: average per repetition in ns */
+ const void *ctx_in; /* optional */
+ __u32 ctx_size_in;
+ void *ctx_out; /* optional */
+ __u32 ctx_size_out; /* in: max length of ctx_out
+ * out: length of cxt_out */
+};
+
+LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr);
+
+/*
+ * bpf_prog_test_run does not check that data_out is large enough. Consider
+ * using bpf_prog_test_run_xattr instead.
+ */
+LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
+ __u32 size, void *data_out, __u32 *size_out,
+ __u32 *retval, __u32 *duration);
+LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
+LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
+ __u32 query_flags, __u32 *attach_flags,
+ __u32 *prog_ids, __u32 *prog_cnt);
+LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
+LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf,
+ __u32 log_buf_size, bool do_log);
+LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
+ __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
+ __u64 *probe_offset, __u64 *probe_addr);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_BPF_H */
diff --git a/src/contrib/libbpf/bpf/bpf_core_read.h b/src/contrib/libbpf/bpf/bpf_core_read.h
new file mode 100644
index 0000000..7009dc9
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf_core_read.h
@@ -0,0 +1,263 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_CORE_READ_H__
+#define __BPF_CORE_READ_H__
+
+/*
+ * enum bpf_field_info_kind is passed as a second argument into
+ * __builtin_preserve_field_info() built-in to get a specific aspect of
+ * a field, captured as a first argument. __builtin_preserve_field_info(field,
+ * info_kind) returns __u32 integer and produces BTF field relocation, which
+ * is understood and processed by libbpf during BPF object loading. See
+ * selftests/bpf for examples.
+ */
+enum bpf_field_info_kind {
+ BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
+ BPF_FIELD_BYTE_SIZE = 1,
+ BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
+ BPF_FIELD_SIGNED = 3,
+ BPF_FIELD_LSHIFT_U64 = 4,
+ BPF_FIELD_RSHIFT_U64 = 5,
+};
+
+#define __CORE_RELO(src, field, info) \
+ __builtin_preserve_field_info((src)->field, BPF_FIELD_##info)
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
+ bpf_probe_read((void *)dst, \
+ __CORE_RELO(src, fld, BYTE_SIZE), \
+ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+#else
+/* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so
+ * for big-endian we need to adjust destination pointer accordingly, based on
+ * field byte size
+ */
+#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
+ bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \
+ __CORE_RELO(src, fld, BYTE_SIZE), \
+ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+#endif
+
+/*
+ * Extract bitfield, identified by s->field, and return its value as u64.
+ * All this is done in relocatable manner, so bitfield changes such as
+ * signedness, bit size, offset changes, this will be handled automatically.
+ * This version of macro is using bpf_probe_read() to read underlying integer
+ * storage. Macro functions as an expression and its return type is
+ * bpf_probe_read()'s return value: 0, on success, <0 on error.
+ */
+#define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \
+ unsigned long long val = 0; \
+ \
+ __CORE_BITFIELD_PROBE_READ(&val, s, field); \
+ val <<= __CORE_RELO(s, field, LSHIFT_U64); \
+ if (__CORE_RELO(s, field, SIGNED)) \
+ val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \
+ else \
+ val = val >> __CORE_RELO(s, field, RSHIFT_U64); \
+ val; \
+})
+
+/*
+ * Extract bitfield, identified by s->field, and return its value as u64.
+ * This version of macro is using direct memory reads and should be used from
+ * BPF program types that support such functionality (e.g., typed raw
+ * tracepoints).
+ */
+#define BPF_CORE_READ_BITFIELD(s, field) ({ \
+ const void *p = (const void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \
+ unsigned long long val; \
+ \
+ switch (__CORE_RELO(s, field, BYTE_SIZE)) { \
+ case 1: val = *(const unsigned char *)p; \
+ case 2: val = *(const unsigned short *)p; \
+ case 4: val = *(const unsigned int *)p; \
+ case 8: val = *(const unsigned long long *)p; \
+ } \
+ val <<= __CORE_RELO(s, field, LSHIFT_U64); \
+ if (__CORE_RELO(s, field, SIGNED)) \
+ val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \
+ else \
+ val = val >> __CORE_RELO(s, field, RSHIFT_U64); \
+ val; \
+})
+
+/*
+ * Convenience macro to check that field actually exists in target kernel's.
+ * Returns:
+ * 1, if matching field is present in target kernel;
+ * 0, if no matching field found.
+ */
+#define bpf_core_field_exists(field) \
+ __builtin_preserve_field_info(field, BPF_FIELD_EXISTS)
+
+/*
+ * Convenience macro to get byte size of a field. Works for integers,
+ * struct/unions, pointers, arrays, and enums.
+ */
+#define bpf_core_field_size(field) \
+ __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE)
+
+/*
+ * bpf_core_read() abstracts away bpf_probe_read() call and captures offset
+ * relocation for source address using __builtin_preserve_access_index()
+ * built-in, provided by Clang.
+ *
+ * __builtin_preserve_access_index() takes as an argument an expression of
+ * taking an address of a field within struct/union. It makes compiler emit
+ * a relocation, which records BTF type ID describing root struct/union and an
+ * accessor string which describes exact embedded field that was used to take
+ * an address. See detailed description of this relocation format and
+ * semantics in comments to struct bpf_field_reloc in libbpf_internal.h.
+ *
+ * This relocation allows libbpf to adjust BPF instruction to use correct
+ * actual field offset, based on target kernel BTF type that matches original
+ * (local) BTF, used to record relocation.
+ */
+#define bpf_core_read(dst, sz, src) \
+ bpf_probe_read(dst, sz, \
+ (const void *)__builtin_preserve_access_index(src))
+
+/*
+ * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str()
+ * additionally emitting BPF CO-RE field relocation for specified source
+ * argument.
+ */
+#define bpf_core_read_str(dst, sz, src) \
+ bpf_probe_read_str(dst, sz, \
+ (const void *)__builtin_preserve_access_index(src))
+
+#define ___concat(a, b) a ## b
+#define ___apply(fn, n) ___concat(fn, n)
+#define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N
+
+/*
+ * return number of provided arguments; used for switch-based variadic macro
+ * definitions (see ___last, ___arrow, etc below)
+ */
+#define ___narg(...) ___nth(_, ##__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+/*
+ * return 0 if no arguments are passed, N - otherwise; used for
+ * recursively-defined macros to specify termination (0) case, and generic
+ * (N) case (e.g., ___read_ptrs, ___core_read)
+ */
+#define ___empty(...) ___nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
+
+#define ___last1(x) x
+#define ___last2(a, x) x
+#define ___last3(a, b, x) x
+#define ___last4(a, b, c, x) x
+#define ___last5(a, b, c, d, x) x
+#define ___last6(a, b, c, d, e, x) x
+#define ___last7(a, b, c, d, e, f, x) x
+#define ___last8(a, b, c, d, e, f, g, x) x
+#define ___last9(a, b, c, d, e, f, g, h, x) x
+#define ___last10(a, b, c, d, e, f, g, h, i, x) x
+#define ___last(...) ___apply(___last, ___narg(__VA_ARGS__))(__VA_ARGS__)
+
+#define ___nolast2(a, _) a
+#define ___nolast3(a, b, _) a, b
+#define ___nolast4(a, b, c, _) a, b, c
+#define ___nolast5(a, b, c, d, _) a, b, c, d
+#define ___nolast6(a, b, c, d, e, _) a, b, c, d, e
+#define ___nolast7(a, b, c, d, e, f, _) a, b, c, d, e, f
+#define ___nolast8(a, b, c, d, e, f, g, _) a, b, c, d, e, f, g
+#define ___nolast9(a, b, c, d, e, f, g, h, _) a, b, c, d, e, f, g, h
+#define ___nolast10(a, b, c, d, e, f, g, h, i, _) a, b, c, d, e, f, g, h, i
+#define ___nolast(...) ___apply(___nolast, ___narg(__VA_ARGS__))(__VA_ARGS__)
+
+#define ___arrow1(a) a
+#define ___arrow2(a, b) a->b
+#define ___arrow3(a, b, c) a->b->c
+#define ___arrow4(a, b, c, d) a->b->c->d
+#define ___arrow5(a, b, c, d, e) a->b->c->d->e
+#define ___arrow6(a, b, c, d, e, f) a->b->c->d->e->f
+#define ___arrow7(a, b, c, d, e, f, g) a->b->c->d->e->f->g
+#define ___arrow8(a, b, c, d, e, f, g, h) a->b->c->d->e->f->g->h
+#define ___arrow9(a, b, c, d, e, f, g, h, i) a->b->c->d->e->f->g->h->i
+#define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j
+#define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__)
+
+#define ___type(...) typeof(___arrow(__VA_ARGS__))
+
+#define ___read(read_fn, dst, src_type, src, accessor) \
+ read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor)
+
+/* "recursively" read a sequence of inner pointers using local __t var */
+#define ___rd_first(src, a) ___read(bpf_core_read, &__t, ___type(src), src, a);
+#define ___rd_last(...) \
+ ___read(bpf_core_read, &__t, \
+ ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__));
+#define ___rd_p1(...) const void *__t; ___rd_first(__VA_ARGS__)
+#define ___rd_p2(...) ___rd_p1(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p3(...) ___rd_p2(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p4(...) ___rd_p3(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p5(...) ___rd_p4(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p6(...) ___rd_p5(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p7(...) ___rd_p6(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p8(...) ___rd_p7(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p9(...) ___rd_p8(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___read_ptrs(src, ...) \
+ ___apply(___rd_p, ___narg(__VA_ARGS__))(src, __VA_ARGS__)
+
+#define ___core_read0(fn, dst, src, a) \
+ ___read(fn, dst, ___type(src), src, a);
+#define ___core_readN(fn, dst, src, ...) \
+ ___read_ptrs(src, ___nolast(__VA_ARGS__)) \
+ ___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t, \
+ ___last(__VA_ARGS__));
+#define ___core_read(fn, dst, src, a, ...) \
+ ___apply(___core_read, ___empty(__VA_ARGS__))(fn, dst, \
+ src, a, ##__VA_ARGS__)
+
+/*
+ * BPF_CORE_READ_INTO() is a more performance-conscious variant of
+ * BPF_CORE_READ(), in which final field is read into user-provided storage.
+ * See BPF_CORE_READ() below for more details on general usage.
+ */
+#define BPF_CORE_READ_INTO(dst, src, a, ...) \
+ ({ \
+ ___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__) \
+ })
+
+/*
+ * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as
+ * BPF_CORE_READ() for intermediate pointers, but then executes (and returns
+ * corresponding error code) bpf_core_read_str() for final string read.
+ */
+#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \
+ ({ \
+ ___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \
+ })
+
+/*
+ * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially
+ * when there are few pointer chasing steps.
+ * E.g., what in non-BPF world (or in BPF w/ BCC) would be something like:
+ * int x = s->a.b.c->d.e->f->g;
+ * can be succinctly achieved using BPF_CORE_READ as:
+ * int x = BPF_CORE_READ(s, a.b.c, d.e, f, g);
+ *
+ * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF
+ * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to:
+ * 1. const void *__t = s->a.b.c;
+ * 2. __t = __t->d.e;
+ * 3. __t = __t->f;
+ * 4. return __t->g;
+ *
+ * Equivalence is logical, because there is a heavy type casting/preservation
+ * involved, as well as all the reads are happening through bpf_probe_read()
+ * calls using __builtin_preserve_access_index() to emit CO-RE relocations.
+ *
+ * N.B. Only up to 9 "field accessors" are supported, which should be more
+ * than enough for any practical purpose.
+ */
+#define BPF_CORE_READ(src, a, ...) \
+ ({ \
+ ___type(src, a, ##__VA_ARGS__) __r; \
+ BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__); \
+ __r; \
+ })
+
+#endif
+
diff --git a/src/contrib/libbpf/bpf/bpf_endian.h b/src/contrib/libbpf/bpf/bpf_endian.h
new file mode 100644
index 0000000..fbe2800
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf_endian.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_ENDIAN__
+#define __BPF_ENDIAN__
+
+#include <linux/stddef.h>
+#include <linux/swab.h>
+
+/* LLVM's BPF target selects the endianness of the CPU
+ * it compiles on, or the user specifies (bpfel/bpfeb),
+ * respectively. The used __BYTE_ORDER__ is defined by
+ * the compiler, we cannot rely on __BYTE_ORDER from
+ * libc headers, since it doesn't reflect the actual
+ * requested byte order.
+ *
+ * Note, LLVM's BPF target has different __builtin_bswapX()
+ * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
+ * in bpfel and bpfeb case, which means below, that we map
+ * to cpu_to_be16(). We could use it unconditionally in BPF
+ * case, but better not rely on it, so that this header here
+ * can be used from application and BPF program side, which
+ * use different targets.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define __bpf_ntohs(x) __builtin_bswap16(x)
+# define __bpf_htons(x) __builtin_bswap16(x)
+# define __bpf_constant_ntohs(x) ___constant_swab16(x)
+# define __bpf_constant_htons(x) ___constant_swab16(x)
+# define __bpf_ntohl(x) __builtin_bswap32(x)
+# define __bpf_htonl(x) __builtin_bswap32(x)
+# define __bpf_constant_ntohl(x) ___constant_swab32(x)
+# define __bpf_constant_htonl(x) ___constant_swab32(x)
+# define __bpf_be64_to_cpu(x) __builtin_bswap64(x)
+# define __bpf_cpu_to_be64(x) __builtin_bswap64(x)
+# define __bpf_constant_be64_to_cpu(x) ___constant_swab64(x)
+# define __bpf_constant_cpu_to_be64(x) ___constant_swab64(x)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# define __bpf_ntohs(x) (x)
+# define __bpf_htons(x) (x)
+# define __bpf_constant_ntohs(x) (x)
+# define __bpf_constant_htons(x) (x)
+# define __bpf_ntohl(x) (x)
+# define __bpf_htonl(x) (x)
+# define __bpf_constant_ntohl(x) (x)
+# define __bpf_constant_htonl(x) (x)
+# define __bpf_be64_to_cpu(x) (x)
+# define __bpf_cpu_to_be64(x) (x)
+# define __bpf_constant_be64_to_cpu(x) (x)
+# define __bpf_constant_cpu_to_be64(x) (x)
+#else
+# error "Fix your compiler's __BYTE_ORDER__?!"
+#endif
+
+#define bpf_htons(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_htons(x) : __bpf_htons(x))
+#define bpf_ntohs(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_ntohs(x) : __bpf_ntohs(x))
+#define bpf_htonl(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_htonl(x) : __bpf_htonl(x))
+#define bpf_ntohl(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_ntohl(x) : __bpf_ntohl(x))
+#define bpf_cpu_to_be64(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x))
+#define bpf_be64_to_cpu(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x))
+
+#endif /* __BPF_ENDIAN__ */
diff --git a/src/contrib/libbpf/bpf/bpf_helper_defs.h b/src/contrib/libbpf/bpf/bpf_helper_defs.h
new file mode 100644
index 0000000..1f357f6
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf_helper_defs.h
@@ -0,0 +1,2759 @@
+/* This is auto-generated file. See bpf_helpers_doc.py for details. */
+
+/* Forward declarations of BPF structs */
+struct bpf_fib_lookup;
+struct bpf_perf_event_data;
+struct bpf_perf_event_value;
+struct bpf_sock;
+struct bpf_sock_addr;
+struct bpf_sock_ops;
+struct bpf_sock_tuple;
+struct bpf_spin_lock;
+struct bpf_sysctl;
+struct bpf_tcp_sock;
+struct bpf_tunnel_key;
+struct bpf_xfrm_state;
+struct pt_regs;
+struct sk_reuseport_md;
+struct sockaddr;
+struct tcphdr;
+struct __sk_buff;
+struct sk_msg_md;
+struct xdp_md;
+
+/*
+ * bpf_map_lookup_elem
+ *
+ * Perform a lookup in *map* for an entry associated to *key*.
+ *
+ * Returns
+ * Map value associated to *key*, or **NULL** if no entry was
+ * found.
+ */
+static void *(*bpf_map_lookup_elem)(void *map, const void *key) = (void *) 1;
+
+/*
+ * bpf_map_update_elem
+ *
+ * Add or update the value of the entry associated to *key* in
+ * *map* with *value*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * Flag value **BPF_NOEXIST** cannot be used for maps of types
+ * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all
+ * elements always exist), the helper would return an error.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_map_update_elem)(void *map, const void *key, const void *value, __u64 flags) = (void *) 2;
+
+/*
+ * bpf_map_delete_elem
+ *
+ * Delete entry with *key* from *map*.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_map_delete_elem)(void *map, const void *key) = (void *) 3;
+
+/*
+ * bpf_probe_read
+ *
+ * For tracing programs, safely attempt to read *size* bytes from
+ * kernel space address *unsafe_ptr* and store the data in *dst*.
+ *
+ * Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
+ * instead.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_probe_read)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 4;
+
+/*
+ * bpf_ktime_get_ns
+ *
+ * Return the time elapsed since system boot, in nanoseconds.
+ *
+ * Returns
+ * Current *ktime*.
+ */
+static __u64 (*bpf_ktime_get_ns)(void) = (void *) 5;
+
+/*
+ * bpf_trace_printk
+ *
+ * This helper is a "printk()-like" facility for debugging. It
+ * prints a message defined by format *fmt* (of size *fmt_size*)
+ * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * available. It can take up to three additional **u64**
+ * arguments (as an eBPF helpers, the total number of arguments is
+ * limited to five).
+ *
+ * Each time the helper is called, it appends a line to the trace.
+ * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
+ * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
+ * The format of the trace is customizable, and the exact output
+ * one will get depends on the options set in
+ * *\/sys/kernel/debug/tracing/trace_options* (see also the
+ * *README* file under the same directory). However, it usually
+ * defaults to something like:
+ *
+ * ::
+ *
+ * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+ *
+ * In the above:
+ *
+ * * ``telnet`` is the name of the current task.
+ * * ``470`` is the PID of the current task.
+ * * ``001`` is the CPU number on which the task is
+ * running.
+ * * In ``.N..``, each character refers to a set of
+ * options (whether irqs are enabled, scheduling
+ * options, whether hard/softirqs are running, level of
+ * preempt_disabled respectively). **N** means that
+ * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+ * are set.
+ * * ``419421.045894`` is a timestamp.
+ * * ``0x00000001`` is a fake value used by BPF for the
+ * instruction pointer register.
+ * * ``<formatted msg>`` is the message formatted with
+ * *fmt*.
+ *
+ * The conversion specifiers supported by *fmt* are similar, but
+ * more limited than for printk(). They are **%d**, **%i**,
+ * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+ * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+ * of field, padding with zeroes, etc.) is available, and the
+ * helper will return **-EINVAL** (but print nothing) if it
+ * encounters an unknown specifier.
+ *
+ * Also, note that **bpf_trace_printk**\ () is slow, and should
+ * only be used for debugging purposes. For this reason, a notice
+ * bloc (spanning several lines) is printed to kernel logs and
+ * states that the helper should not be used "for production use"
+ * the first time this helper is used (or more precisely, when
+ * **trace_printk**\ () buffers are allocated). For passing values
+ * to user space, perf events should be preferred.
+ *
+ * Returns
+ * The number of bytes written to the buffer, or a negative error
+ * in case of failure.
+ */
+static int (*bpf_trace_printk)(const char *fmt, __u32 fmt_size, ...) = (void *) 6;
+
+/*
+ * bpf_get_prandom_u32
+ *
+ * Get a pseudo-random number.
+ *
+ * From a security point of view, this helper uses its own
+ * pseudo-random internal state, and cannot be used to infer the
+ * seed of other random functions in the kernel. However, it is
+ * essential to note that the generator used by the helper is not
+ * cryptographically secure.
+ *
+ * Returns
+ * A random 32-bit unsigned value.
+ */
+static __u32 (*bpf_get_prandom_u32)(void) = (void *) 7;
+
+/*
+ * bpf_get_smp_processor_id
+ *
+ * Get the SMP (symmetric multiprocessing) processor id. Note that
+ * all programs run with preemption disabled, which means that the
+ * SMP processor id is stable during all the execution of the
+ * program.
+ *
+ * Returns
+ * The SMP id of the processor running the program.
+ */
+static __u32 (*bpf_get_smp_processor_id)(void) = (void *) 8;
+
+/*
+ * bpf_skb_store_bytes
+ *
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. *flags* are a combination of
+ * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+ * checksum for the packet after storing the bytes) and
+ * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+ * **->swhash** and *skb*\ **->l4hash** to 0).
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_store_bytes)(struct __sk_buff *skb, __u32 offset, const void *from, __u32 len, __u64 flags) = (void *) 9;
+
+/*
+ * bpf_l3_csum_replace
+ *
+ * Recompute the layer 3 (e.g. IP) checksum for the packet
+ * associated to *skb*. Computation is incremental, so the helper
+ * must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored in *size*.
+ * Alternatively, it is possible to store the difference between
+ * the previous and the new values of the header field in *to*, by
+ * setting *from* and *size* to 0. For both methods, *offset*
+ * indicates the location of the IP checksum within the packet.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_l3_csum_replace)(struct __sk_buff *skb, __u32 offset, __u64 from, __u64 to, __u64 size) = (void *) 10;
+
+/*
+ * bpf_l4_csum_replace
+ *
+ * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+ * packet associated to *skb*. Computation is incremental, so the
+ * helper must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored on the lowest
+ * four bits of *flags*. Alternatively, it is possible to store
+ * the difference between the previous and the new values of the
+ * header field in *to*, by setting *from* and the four lowest
+ * bits of *flags* to 0. For both methods, *offset* indicates the
+ * location of the IP checksum within the packet. In addition to
+ * the size of the field, *flags* can be added (bitwise OR) actual
+ * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+ * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+ * for updates resulting in a null checksum the value is set to
+ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ * the checksum is to be computed against a pseudo-header.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_l4_csum_replace)(struct __sk_buff *skb, __u32 offset, __u64 from, __u64 to, __u64 flags) = (void *) 11;
+
+/*
+ * bpf_tail_call
+ *
+ * This special helper is used to trigger a "tail call", or in
+ * other words, to jump into another eBPF program. The same stack
+ * frame is used (but values on stack and in registers for the
+ * caller are not accessible to the callee). This mechanism allows
+ * for program chaining, either for raising the maximum number of
+ * available eBPF instructions, or to execute given programs in
+ * conditional blocks. For security reasons, there is an upper
+ * limit to the number of successive tail calls that can be
+ * performed.
+ *
+ * Upon call of this helper, the program attempts to jump into a
+ * program referenced at index *index* in *prog_array_map*, a
+ * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+ * *ctx*, a pointer to the context.
+ *
+ * If the call succeeds, the kernel immediately runs the first
+ * instruction of the new program. This is not a function call,
+ * and it never returns to the previous program. If the call
+ * fails, then the helper has no effect, and the caller continues
+ * to run its subsequent instructions. A call can fail if the
+ * destination program for the jump does not exist (i.e. *index*
+ * is superior to the number of entries in *prog_array_map*), or
+ * if the maximum number of tail calls has been reached for this
+ * chain of programs. This limit is defined in the kernel by the
+ * macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+ * which is currently set to 32.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_tail_call)(void *ctx, void *prog_array_map, __u32 index) = (void *) 12;
+
+/*
+ * bpf_clone_redirect
+ *
+ * Clone and redirect the packet associated to *skb* to another
+ * net device of index *ifindex*. Both ingress and egress
+ * interfaces can be used for redirection. The **BPF_F_INGRESS**
+ * value in *flags* is used to make the distinction (ingress path
+ * is selected if the flag is present, egress path otherwise).
+ * This is the only flag supported for now.
+ *
+ * In comparison with **bpf_redirect**\ () helper,
+ * **bpf_clone_redirect**\ () has the associated cost of
+ * duplicating the packet buffer, but this can be executed out of
+ * the eBPF program. Conversely, **bpf_redirect**\ () is more
+ * efficient, but it is handled through an action code where the
+ * redirection happens only after the eBPF program has returned.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_clone_redirect)(struct __sk_buff *skb, __u32 ifindex, __u64 flags) = (void *) 13;
+
+/*
+ * bpf_get_current_pid_tgid
+ *
+ *
+ * Returns
+ * A 64-bit integer containing the current tgid and pid, and
+ * created as such:
+ * *current_task*\ **->tgid << 32 \|**
+ * *current_task*\ **->pid**.
+ */
+static __u64 (*bpf_get_current_pid_tgid)(void) = (void *) 14;
+
+/*
+ * bpf_get_current_uid_gid
+ *
+ *
+ * Returns
+ * A 64-bit integer containing the current GID and UID, and
+ * created as such: *current_gid* **<< 32 \|** *current_uid*.
+ */
+static __u64 (*bpf_get_current_uid_gid)(void) = (void *) 15;
+
+/*
+ * bpf_get_current_comm
+ *
+ * Copy the **comm** attribute of the current task into *buf* of
+ * *size_of_buf*. The **comm** attribute contains the name of
+ * the executable (excluding the path) for the current task. The
+ * *size_of_buf* must be strictly positive. On success, the
+ * helper makes sure that the *buf* is NUL-terminated. On failure,
+ * it is filled with zeroes.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_get_current_comm)(void *buf, __u32 size_of_buf) = (void *) 16;
+
+/*
+ * bpf_get_cgroup_classid
+ *
+ * Retrieve the classid for the current task, i.e. for the net_cls
+ * cgroup to which *skb* belongs.
+ *
+ * This helper can be used on TC egress path, but not on ingress.
+ *
+ * The net_cls cgroup provides an interface to tag network packets
+ * based on a user-provided identifier for all traffic coming from
+ * the tasks belonging to the related cgroup. See also the related
+ * kernel documentation, available from the Linux sources in file
+ * *Documentation/admin-guide/cgroup-v1/net_cls.rst*.
+ *
+ * The Linux kernel has two versions for cgroups: there are
+ * cgroups v1 and cgroups v2. Both are available to users, who can
+ * use a mixture of them, but note that the net_cls cgroup is for
+ * cgroup v1 only. This makes it incompatible with BPF programs
+ * run on cgroups, which is a cgroup-v2-only feature (a socket can
+ * only hold data for one version of cgroups at a time).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+ * "**y**" or to "**m**".
+ *
+ * Returns
+ * The classid, or 0 for the default unconfigured classid.
+ */
+static __u32 (*bpf_get_cgroup_classid)(struct __sk_buff *skb) = (void *) 17;
+
+/*
+ * bpf_skb_vlan_push
+ *
+ * Push a *vlan_tci* (VLAN tag control information) of protocol
+ * *vlan_proto* to the packet associated to *skb*, then update
+ * the checksum. Note that if *vlan_proto* is different from
+ * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+ * be **ETH_P_8021Q**.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_vlan_push)(struct __sk_buff *skb, __be16 vlan_proto, __u16 vlan_tci) = (void *) 18;
+
+/*
+ * bpf_skb_vlan_pop
+ *
+ * Pop a VLAN header from the packet associated to *skb*.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_vlan_pop)(struct __sk_buff *skb) = (void *) 19;
+
+/*
+ * bpf_skb_get_tunnel_key
+ *
+ * Get tunnel metadata. This helper takes a pointer *key* to an
+ * empty **struct bpf_tunnel_key** of **size**, that will be
+ * filled with tunnel metadata for the packet associated to *skb*.
+ * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+ * indicates that the tunnel is based on IPv6 protocol instead of
+ * IPv4.
+ *
+ * The **struct bpf_tunnel_key** is an object that generalizes the
+ * principal parameters used by various tunneling protocols into a
+ * single struct. This way, it can be used to easily make a
+ * decision based on the contents of the encapsulation header,
+ * "summarized" in this struct. In particular, it holds the IP
+ * address of the remote end (IPv4 or IPv6, depending on the case)
+ * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+ * this struct exposes the *key*\ **->tunnel_id**, which is
+ * generally mapped to a VNI (Virtual Network Identifier), making
+ * it programmable together with the **bpf_skb_set_tunnel_key**\
+ * () helper.
+ *
+ * Let's imagine that the following code is part of a program
+ * attached to the TC ingress interface, on one end of a GRE
+ * tunnel, and is supposed to filter out all messages coming from
+ * remote ends with IPv4 address other than 10.0.0.1:
+ *
+ * ::
+ *
+ * int ret;
+ * struct bpf_tunnel_key key = {};
+ *
+ * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ * if (ret < 0)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * if (key.remote_ipv4 != 0x0a000001)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * return TC_ACT_OK; // accept packet
+ *
+ * This interface can also be used with all encapsulation devices
+ * that can operate in "collect metadata" mode: instead of having
+ * one network device per specific configuration, the "collect
+ * metadata" mode only requires a single device where the
+ * configuration can be extracted from this helper.
+ *
+ * This can be used together with various tunnels such as VXLan,
+ * Geneve, GRE or IP in IP (IPIP).
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_get_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_key *key, __u32 size, __u64 flags) = (void *) 20;
+
+/*
+ * bpf_skb_set_tunnel_key
+ *
+ * Populate tunnel metadata for packet associated to *skb.* The
+ * tunnel metadata is set to the contents of *key*, of *size*. The
+ * *flags* can be set to a combination of the following values:
+ *
+ * **BPF_F_TUNINFO_IPV6**
+ * Indicate that the tunnel is based on IPv6 protocol
+ * instead of IPv4.
+ * **BPF_F_ZERO_CSUM_TX**
+ * For IPv4 packets, add a flag to tunnel metadata
+ * indicating that checksum computation should be skipped
+ * and checksum set to zeroes.
+ * **BPF_F_DONT_FRAGMENT**
+ * Add a flag to tunnel metadata indicating that the
+ * packet should not be fragmented.
+ * **BPF_F_SEQ_NUMBER**
+ * Add a flag to tunnel metadata indicating that a
+ * sequence number should be added to tunnel header before
+ * sending the packet. This flag was added for GRE
+ * encapsulation, but might be used with other protocols
+ * as well in the future.
+ *
+ * Here is a typical usage on the transmit path:
+ *
+ * ::
+ *
+ * struct bpf_tunnel_key key;
+ * populate key ...
+ * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+ *
+ * See also the description of the **bpf_skb_get_tunnel_key**\ ()
+ * helper for additional information.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_set_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_key *key, __u32 size, __u64 flags) = (void *) 21;
+
+/*
+ * bpf_perf_event_read
+ *
+ * Read the value of a perf event counter. This helper relies on a
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+ * the perf event counter is selected when *map* is updated with
+ * perf event file descriptors. The *map* is an array whose size
+ * is the number of available CPUs, and each cell contains a value
+ * relative to one CPU. The value to retrieve is indicated by
+ * *flags*, that contains the index of the CPU to look up, masked
+ * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * Note that before Linux 4.13, only hardware perf event can be
+ * retrieved.
+ *
+ * Also, be aware that the newer helper
+ * **bpf_perf_event_read_value**\ () is recommended over
+ * **bpf_perf_event_read**\ () in general. The latter has some ABI
+ * quirks where error and counter value are used as a return code
+ * (which is wrong to do since ranges may overlap). This issue is
+ * fixed with **bpf_perf_event_read_value**\ (), which at the same
+ * time provides more features over the **bpf_perf_event_read**\
+ * () interface. Please refer to the description of
+ * **bpf_perf_event_read_value**\ () for details.
+ *
+ * Returns
+ * The value of the perf event counter read from the map, or a
+ * negative error code in case of failure.
+ */
+static __u64 (*bpf_perf_event_read)(void *map, __u64 flags) = (void *) 22;
+
+/*
+ * bpf_redirect
+ *
+ * Redirect the packet to another net device of index *ifindex*.
+ * This helper is somewhat similar to **bpf_clone_redirect**\
+ * (), except that the packet is not cloned, which provides
+ * increased performance.
+ *
+ * Except for XDP, both ingress and egress interfaces can be used
+ * for redirection. The **BPF_F_INGRESS** value in *flags* is used
+ * to make the distinction (ingress path is selected if the flag
+ * is present, egress path otherwise). Currently, XDP only
+ * supports redirection to the egress interface, and accepts no
+ * flag at all.
+ *
+ * The same effect can be attained with the more generic
+ * **bpf_redirect_map**\ (), which requires specific maps to be
+ * used but offers better performance.
+ *
+ * Returns
+ * For XDP, the helper returns **XDP_REDIRECT** on success or
+ * **XDP_ABORTED** on error. For other program types, the values
+ * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+ * error.
+ */
+static int (*bpf_redirect)(__u32 ifindex, __u64 flags) = (void *) 23;
+
+/*
+ * bpf_get_route_realm
+ *
+ * Retrieve the realm or the route, that is to say the
+ * **tclassid** field of the destination for the *skb*. The
+ * indentifier retrieved is a user-provided tag, similar to the
+ * one used with the net_cls cgroup (see description for
+ * **bpf_get_cgroup_classid**\ () helper), but here this tag is
+ * held by a route (a destination entry), not by a task.
+ *
+ * Retrieving this identifier works with the clsact TC egress hook
+ * (see also **tc-bpf(8)**), or alternatively on conventional
+ * classful egress qdiscs, but not on TC ingress path. In case of
+ * clsact TC egress hook, this has the advantage that, internally,
+ * the destination entry has not been dropped yet in the transmit
+ * path. Therefore, the destination entry does not need to be
+ * artificially held via **netif_keep_dst**\ () for a classful
+ * qdisc until the *skb* is freed.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_IP_ROUTE_CLASSID** configuration option.
+ *
+ * Returns
+ * The realm of the route for the packet associated to *skb*, or 0
+ * if none was found.
+ */
+static __u32 (*bpf_get_route_realm)(struct __sk_buff *skb) = (void *) 24;
+
+/*
+ * bpf_perf_event_output
+ *
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * The context of the program *ctx* needs also be passed to the
+ * helper.
+ *
+ * On user space, a program willing to read the values needs to
+ * call **perf_event_open**\ () on the perf event (either for
+ * one or for all CPUs) and to store the file descriptor into the
+ * *map*. This must be done before the eBPF program can send data
+ * into it. An example is available in file
+ * *samples/bpf/trace_output_user.c* in the Linux kernel source
+ * tree (the eBPF program counterpart is in
+ * *samples/bpf/trace_output_kern.c*).
+ *
+ * **bpf_perf_event_output**\ () achieves better performance
+ * than **bpf_trace_printk**\ () for sharing data with user
+ * space, and is much better suitable for streaming data from eBPF
+ * programs.
+ *
+ * Note that this helper is not restricted to tracing use cases
+ * and can be used with programs attached to TC or XDP as well,
+ * where it allows for passing data to user space listeners. Data
+ * can be:
+ *
+ * * Only custom structs,
+ * * Only the packet payload, or
+ * * A combination of both.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_perf_event_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 25;
+
+/*
+ * bpf_skb_load_bytes
+ *
+ * This helper was provided as an easy way to load data from a
+ * packet. It can be used to load *len* bytes from *offset* from
+ * the packet associated to *skb*, into the buffer pointed by
+ * *to*.
+ *
+ * Since Linux 4.7, usage of this helper has mostly been replaced
+ * by "direct packet access", enabling packet data to be
+ * manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+ * pointing respectively to the first byte of packet data and to
+ * the byte after the last byte of packet data. However, it
+ * remains useful if one wishes to read large quantities of data
+ * at once from a packet into the eBPF stack.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_load_bytes)(const void *skb, __u32 offset, void *to, __u32 len) = (void *) 26;
+
+/*
+ * bpf_get_stackid
+ *
+ * Walk a user or a kernel stack and return its id. To achieve
+ * this, the helper needs *ctx*, which is a pointer to the context
+ * on which the tracing program is executed, and a pointer to a
+ * *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * a combination of the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_FAST_STACK_CMP**
+ * Compare stacks by hash only.
+ * **BPF_F_REUSE_STACKID**
+ * If two different stacks hash into the same *stackid*,
+ * discard the old one.
+ *
+ * The stack id retrieved is a 32 bit long integer handle which
+ * can be further combined with other data (including other stack
+ * ids) and used as a key into maps. This can be useful for
+ * generating a variety of graphs (such as flame graphs or off-cpu
+ * graphs).
+ *
+ * For walking a stack, this helper is an improvement over
+ * **bpf_probe_read**\ (), which can be used with unrolled loops
+ * but is not efficient and consumes a lot of eBPF instructions.
+ * Instead, **bpf_get_stackid**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ *
+ * Returns
+ * The positive or null stack id on success, or a negative error
+ * in case of failure.
+ */
+static int (*bpf_get_stackid)(void *ctx, void *map, __u64 flags) = (void *) 27;
+
+/*
+ * bpf_csum_diff
+ *
+ * Compute a checksum difference, from the raw buffer pointed by
+ * *from*, of length *from_size* (that must be a multiple of 4),
+ * towards the raw buffer pointed by *to*, of size *to_size*
+ * (same remark). An optional *seed* can be added to the value
+ * (this can be cascaded, the seed may come from a previous call
+ * to the helper).
+ *
+ * This is flexible enough to be used in several ways:
+ *
+ * * With *from_size* == 0, *to_size* > 0 and *seed* set to
+ * checksum, it can be used when pushing new data.
+ * * With *from_size* > 0, *to_size* == 0 and *seed* set to
+ * checksum, it can be used when removing data from a packet.
+ * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+ * can be used to compute a diff. Note that *from_size* and
+ * *to_size* do not need to be equal.
+ *
+ * This helper can be used in combination with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+ * which one can feed in the difference computed with
+ * **bpf_csum_diff**\ ().
+ *
+ * Returns
+ * The checksum result, or a negative error code in case of
+ * failure.
+ */
+static __s64 (*bpf_csum_diff)(__be32 *from, __u32 from_size, __be32 *to, __u32 to_size, __wsum seed) = (void *) 28;
+
+/*
+ * bpf_skb_get_tunnel_opt
+ *
+ * Retrieve tunnel options metadata for the packet associated to
+ * *skb*, and store the raw tunnel option data to the buffer *opt*
+ * of *size*.
+ *
+ * This helper can be used with encapsulation devices that can
+ * operate in "collect metadata" mode (please refer to the related
+ * note in the description of **bpf_skb_get_tunnel_key**\ () for
+ * more details). A particular example where this can be used is
+ * in combination with the Geneve encapsulation protocol, where it
+ * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+ * and retrieving arbitrary TLVs (Type-Length-Value headers) from
+ * the eBPF program. This allows for full customization of these
+ * headers.
+ *
+ * Returns
+ * The size of the option data retrieved.
+ */
+static int (*bpf_skb_get_tunnel_opt)(struct __sk_buff *skb, void *opt, __u32 size) = (void *) 29;
+
+/*
+ * bpf_skb_set_tunnel_opt
+ *
+ * Set tunnel options metadata for the packet associated to *skb*
+ * to the option data contained in the raw buffer *opt* of *size*.
+ *
+ * See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ * helper for additional information.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_set_tunnel_opt)(struct __sk_buff *skb, void *opt, __u32 size) = (void *) 30;
+
+/*
+ * bpf_skb_change_proto
+ *
+ * Change the protocol of the *skb* to *proto*. Currently
+ * supported are transition from IPv4 to IPv6, and from IPv6 to
+ * IPv4. The helper takes care of the groundwork for the
+ * transition, including resizing the socket buffer. The eBPF
+ * program is expected to fill the new headers, if any, via
+ * **skb_store_bytes**\ () and to recompute the checksums with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+ * (). The main case for this helper is to perform NAT64
+ * operations out of an eBPF program.
+ *
+ * Internally, the GSO type is marked as dodgy so that headers are
+ * checked and segments are recalculated by the GSO/GRO engine.
+ * The size for GSO target is adapted as well.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_change_proto)(struct __sk_buff *skb, __be16 proto, __u64 flags) = (void *) 31;
+
+/*
+ * bpf_skb_change_type
+ *
+ * Change the packet type for the packet associated to *skb*. This
+ * comes down to setting *skb*\ **->pkt_type** to *type*, except
+ * the eBPF program does not have a write access to *skb*\
+ * **->pkt_type** beside this helper. Using a helper here allows
+ * for graceful handling of errors.
+ *
+ * The major use case is to change incoming *skb*s to
+ * **PACKET_HOST** in a programmatic way instead of having to
+ * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+ * example.
+ *
+ * Note that *type* only allows certain values. At this time, they
+ * are:
+ *
+ * **PACKET_HOST**
+ * Packet is for us.
+ * **PACKET_BROADCAST**
+ * Send packet to all.
+ * **PACKET_MULTICAST**
+ * Send packet to group.
+ * **PACKET_OTHERHOST**
+ * Send packet to someone else.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_change_type)(struct __sk_buff *skb, __u32 type) = (void *) 32;
+
+/*
+ * bpf_skb_under_cgroup
+ *
+ * Check whether *skb* is a descendant of the cgroup2 held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ *
+ * Returns
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* failed the cgroup2 descendant test.
+ * * 1, if the *skb* succeeded the cgroup2 descendant test.
+ * * A negative error code, if an error occurred.
+ */
+static int (*bpf_skb_under_cgroup)(struct __sk_buff *skb, void *map, __u32 index) = (void *) 33;
+
+/*
+ * bpf_get_hash_recalc
+ *
+ * Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+ * not set, in particular if the hash was cleared due to mangling,
+ * recompute this hash. Later accesses to the hash can be done
+ * directly with *skb*\ **->hash**.
+ *
+ * Calling **bpf_set_hash_invalid**\ (), changing a packet
+ * prototype with **bpf_skb_change_proto**\ (), or calling
+ * **bpf_skb_store_bytes**\ () with the
+ * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+ * the hash and to trigger a new computation for the next call to
+ * **bpf_get_hash_recalc**\ ().
+ *
+ * Returns
+ * The 32-bit hash.
+ */
+static __u32 (*bpf_get_hash_recalc)(struct __sk_buff *skb) = (void *) 34;
+
+/*
+ * bpf_get_current_task
+ *
+ *
+ * Returns
+ * A pointer to the current task struct.
+ */
+static __u64 (*bpf_get_current_task)(void) = (void *) 35;
+
+/*
+ * bpf_probe_write_user
+ *
+ * Attempt in a safe way to write *len* bytes from the buffer
+ * *src* to *dst* in memory. It only works for threads that are in
+ * user context, and *dst* must be a valid user space address.
+ *
+ * This helper should not be used to implement any kind of
+ * security mechanism because of TOC-TOU attacks, but rather to
+ * debug, divert, and manipulate execution of semi-cooperative
+ * processes.
+ *
+ * Keep in mind that this feature is meant for experiments, and it
+ * has a risk of crashing the system and running programs.
+ * Therefore, when an eBPF program using this helper is attached,
+ * a warning including PID and process name is printed to kernel
+ * logs.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_probe_write_user)(void *dst, const void *src, __u32 len) = (void *) 36;
+
+/*
+ * bpf_current_task_under_cgroup
+ *
+ * Check whether the probe is being run is the context of a given
+ * subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ *
+ * Returns
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* task belongs to the cgroup2.
+ * * 1, if the *skb* task does not belong to the cgroup2.
+ * * A negative error code, if an error occurred.
+ */
+static int (*bpf_current_task_under_cgroup)(void *map, __u32 index) = (void *) 37;
+
+/*
+ * bpf_skb_change_tail
+ *
+ * Resize (trim or grow) the packet associated to *skb* to the
+ * new *len*. The *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * The basic idea is that the helper performs the needed work to
+ * change the size of the packet, then the eBPF program rewrites
+ * the rest via helpers like **bpf_skb_store_bytes**\ (),
+ * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+ * and others. This helper is a slow path utility intended for
+ * replies with control messages. And because it is targeted for
+ * slow path, the helper itself can afford to be slow: it
+ * implicitly linearizes, unclones and drops offloads from the
+ * *skb*.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_change_tail)(struct __sk_buff *skb, __u32 len, __u64 flags) = (void *) 38;
+
+/*
+ * bpf_skb_pull_data
+ *
+ * Pull in non-linear data in case the *skb* is non-linear and not
+ * all of *len* are part of the linear section. Make *len* bytes
+ * from *skb* readable and writable. If a zero value is passed for
+ * *len*, then the whole length of the *skb* is pulled.
+ *
+ * This helper is only needed for reading and writing with direct
+ * packet access.
+ *
+ * For direct packet access, testing that offsets to access
+ * are within packet boundaries (test on *skb*\ **->data_end**) is
+ * susceptible to fail if offsets are invalid, or if the requested
+ * data is in non-linear parts of the *skb*. On failure the
+ * program can just bail out, or in the case of a non-linear
+ * buffer, use a helper to make the data available. The
+ * **bpf_skb_load_bytes**\ () helper is a first solution to access
+ * the data. Another one consists in using **bpf_skb_pull_data**
+ * to pull in once the non-linear parts, then retesting and
+ * eventually access the data.
+ *
+ * At the same time, this also makes sure the *skb* is uncloned,
+ * which is a necessary condition for direct write. As this needs
+ * to be an invariant for the write part only, the verifier
+ * detects writes and adds a prologue that is calling
+ * **bpf_skb_pull_data()** to effectively unclone the *skb* from
+ * the very beginning in case it is indeed cloned.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_pull_data)(struct __sk_buff *skb, __u32 len) = (void *) 39;
+
+/*
+ * bpf_csum_update
+ *
+ * Add the checksum *csum* into *skb*\ **->csum** in case the
+ * driver has supplied a checksum for the entire packet into that
+ * field. Return an error otherwise. This helper is intended to be
+ * used in combination with **bpf_csum_diff**\ (), in particular
+ * when the checksum needs to be updated after data has been
+ * written into the packet through direct packet access.
+ *
+ * Returns
+ * The checksum on success, or a negative error code in case of
+ * failure.
+ */
+static __s64 (*bpf_csum_update)(struct __sk_buff *skb, __wsum csum) = (void *) 40;
+
+/*
+ * bpf_set_hash_invalid
+ *
+ * Invalidate the current *skb*\ **->hash**. It can be used after
+ * mangling on headers through direct packet access, in order to
+ * indicate that the hash is outdated and to trigger a
+ * recalculation the next time the kernel tries to access this
+ * hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *
+ */
+static void (*bpf_set_hash_invalid)(struct __sk_buff *skb) = (void *) 41;
+
+/*
+ * bpf_get_numa_node_id
+ *
+ * Return the id of the current NUMA node. The primary use case
+ * for this helper is the selection of sockets for the local NUMA
+ * node, when the program is attached to sockets using the
+ * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+ * but the helper is also available to other eBPF program types,
+ * similarly to **bpf_get_smp_processor_id**\ ().
+ *
+ * Returns
+ * The id of current NUMA node.
+ */
+static int (*bpf_get_numa_node_id)(void) = (void *) 42;
+
+/*
+ * bpf_skb_change_head
+ *
+ * Grows headroom of packet associated to *skb* and adjusts the
+ * offset of the MAC header accordingly, adding *len* bytes of
+ * space. It automatically extends and reallocates memory as
+ * required.
+ *
+ * This helper can be used on a layer 3 *skb* to push a MAC header
+ * for redirection into a layer 2 device.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_change_head)(struct __sk_buff *skb, __u32 len, __u64 flags) = (void *) 43;
+
+/*
+ * bpf_xdp_adjust_head
+ *
+ * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+ * it is possible to use a negative value for *delta*. This helper
+ * can be used to prepare the packet for pushing or popping
+ * headers.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_xdp_adjust_head)(struct xdp_md *xdp_md, int delta) = (void *) 44;
+
+/*
+ * bpf_probe_read_str
+ *
+ * Copy a NUL terminated string from an unsafe kernel address
+ * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
+ * more details.
+ *
+ * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
+ * instead.
+ *
+ * Returns
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ */
+static int (*bpf_probe_read_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 45;
+
+/*
+ * bpf_get_socket_cookie
+ *
+ * If the **struct sk_buff** pointed by *skb* has a known socket,
+ * retrieve the cookie (generated by the kernel) of this socket.
+ * If no cookie has been set yet, generate a new cookie. Once
+ * generated, the socket cookie remains stable for the life of the
+ * socket. This helper can be useful for monitoring per socket
+ * networking traffic statistics as it provides a global socket
+ * identifier that can be assumed unique.
+ *
+ * Returns
+ * A 8-byte long non-decreasing number on success, or 0 if the
+ * socket field is missing inside *skb*.
+ */
+static __u64 (*bpf_get_socket_cookie)(void *ctx) = (void *) 46;
+
+/*
+ * bpf_get_socket_uid
+ *
+ *
+ * Returns
+ * The owner UID of the socket associated to *skb*. If the socket
+ * is **NULL**, or if it is not a full socket (i.e. if it is a
+ * time-wait or a request socket instead), **overflowuid** value
+ * is returned (note that **overflowuid** might also be the actual
+ * UID value for the socket).
+ */
+static __u32 (*bpf_get_socket_uid)(struct __sk_buff *skb) = (void *) 47;
+
+/*
+ * bpf_set_hash
+ *
+ * Set the full hash for *skb* (set the field *skb*\ **->hash**)
+ * to value *hash*.
+ *
+ * Returns
+ * 0
+ */
+static __u32 (*bpf_set_hash)(struct __sk_buff *skb, __u32 hash) = (void *) 48;
+
+/*
+ * bpf_setsockopt
+ *
+ * Emulate a call to **setsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **setsockopt(2)** for more information.
+ * The option value of length *optlen* is pointed by *optval*.
+ *
+ * This helper actually implements a subset of **setsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **SOL_SOCKET**, which supports the following *optname*\ s:
+ * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * * **IPPROTO_TCP**, which supports the following *optname*\ s:
+ * **TCP_CONGESTION**, **TCP_BPF_IW**,
+ * **TCP_BPF_SNDCWND_CLAMP**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_setsockopt)(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 49;
+
+/*
+ * bpf_skb_adjust_room
+ *
+ * Grow or shrink the room for data in the packet associated to
+ * *skb* by *len_diff*, and according to the selected *mode*.
+ *
+ * There are two supported modes at this time:
+ *
+ * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
+ * (room space is added or removed below the layer 2 header).
+ *
+ * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+ * (room space is added or removed below the layer 3 header).
+ *
+ * The following flags are supported at this time:
+ *
+ * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
+ * Adjusting mss in this way is not allowed for datagrams.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**,
+ * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**:
+ * Any new space is reserved to hold a tunnel header.
+ * Configure skb offsets and other fields accordingly.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**,
+ * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**:
+ * Use with ENCAP_L3 flags to further specify the tunnel type.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*):
+ * Use with ENCAP_L3/L4 flags to further specify the tunnel
+ * type; *len* is the length of the inner MAC header.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_adjust_room)(struct __sk_buff *skb, __s32 len_diff, __u32 mode, __u64 flags) = (void *) 50;
+
+/*
+ * bpf_redirect_map
+ *
+ * Redirect the packet to the endpoint referenced by *map* at
+ * index *key*. Depending on its type, this *map* can contain
+ * references to net devices (for forwarding packets through other
+ * ports), or to CPUs (for redirecting XDP frames to another CPU;
+ * but this is only implemented for native XDP (with driver
+ * support) as of this writing).
+ *
+ * The lower two bits of *flags* are used as the return code if
+ * the map lookup fails. This is so that the return value can be
+ * one of the XDP program return codes up to XDP_TX, as chosen by
+ * the caller. Any higher bits in the *flags* argument must be
+ * unset.
+ *
+ * When used to redirect packets to net devices, this helper
+ * provides a high performance increase over **bpf_redirect**\ ().
+ * This is due to various implementation details of the underlying
+ * mechanisms, one of which is the fact that **bpf_redirect_map**\
+ * () tries to send packet as a "bulk" to the device.
+ *
+ * Returns
+ * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ */
+static int (*bpf_redirect_map)(void *map, __u32 key, __u64 flags) = (void *) 51;
+
+/*
+ * bpf_sk_redirect_map
+ *
+ * Redirect the packet to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ *
+ * Returns
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ */
+static int (*bpf_sk_redirect_map)(struct __sk_buff *skb, void *map, __u32 key, __u64 flags) = (void *) 52;
+
+/*
+ * bpf_sock_map_update
+ *
+ * Add an entry to, or update a *map* referencing sockets. The
+ * *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_sock_map_update)(struct bpf_sock_ops *skops, void *map, void *key, __u64 flags) = (void *) 53;
+
+/*
+ * bpf_xdp_adjust_meta
+ *
+ * Adjust the address pointed by *xdp_md*\ **->data_meta** by
+ * *delta* (which can be positive or negative). Note that this
+ * operation modifies the address stored in *xdp_md*\ **->data**,
+ * so the latter must be loaded only after the helper has been
+ * called.
+ *
+ * The use of *xdp_md*\ **->data_meta** is optional and programs
+ * are not required to use it. The rationale is that when the
+ * packet is processed with XDP (e.g. as DoS filter), it is
+ * possible to push further meta data along with it before passing
+ * to the stack, and to give the guarantee that an ingress eBPF
+ * program attached as a TC classifier on the same device can pick
+ * this up for further post-processing. Since TC works with socket
+ * buffers, it remains possible to set from XDP the **mark** or
+ * **priority** pointers, or other pointers for the socket buffer.
+ * Having this scratch space generic and programmable allows for
+ * more flexibility as the user is free to store whatever meta
+ * data they need.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_xdp_adjust_meta)(struct xdp_md *xdp_md, int delta) = (void *) 54;
+
+/*
+ * bpf_perf_event_read_value
+ *
+ * Read the value of a perf event counter, and store it into *buf*
+ * of size *buf_size*. This helper relies on a *map* of type
+ * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+ * counter is selected when *map* is updated with perf event file
+ * descriptors. The *map* is an array whose size is the number of
+ * available CPUs, and each cell contains a value relative to one
+ * CPU. The value to retrieve is indicated by *flags*, that
+ * contains the index of the CPU to look up, masked with
+ * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * This helper behaves in a way close to
+ * **bpf_perf_event_read**\ () helper, save that instead of
+ * just returning the value observed, it fills the *buf*
+ * structure. This allows for additional data to be retrieved: in
+ * particular, the enabled and running times (in *buf*\
+ * **->enabled** and *buf*\ **->running**, respectively) are
+ * copied. In general, **bpf_perf_event_read_value**\ () is
+ * recommended over **bpf_perf_event_read**\ (), which has some
+ * ABI issues and provides fewer functionalities.
+ *
+ * These values are interesting, because hardware PMU (Performance
+ * Monitoring Unit) counters are limited resources. When there are
+ * more PMU based perf events opened than available counters,
+ * kernel will multiplex these events so each event gets certain
+ * percentage (but not all) of the PMU time. In case that
+ * multiplexing happens, the number of samples or counter value
+ * will not reflect the case compared to when no multiplexing
+ * occurs. This makes comparison between different runs difficult.
+ * Typically, the counter value should be normalized before
+ * comparing to other experiments. The usual normalization is done
+ * as follows.
+ *
+ * ::
+ *
+ * normalized_counter = counter * t_enabled / t_running
+ *
+ * Where t_enabled is the time enabled for event and t_running is
+ * the time running for event since last normalization. The
+ * enabled and running times are accumulated since the perf event
+ * open. To achieve scaling factor between two invocations of an
+ * eBPF program, users can can use CPU id as the key (which is
+ * typical for perf array usage model) to remember the previous
+ * value and do the calculation inside the eBPF program.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_perf_event_read_value)(void *map, __u64 flags, struct bpf_perf_event_value *buf, __u32 buf_size) = (void *) 55;
+
+/*
+ * bpf_perf_prog_read_value
+ *
+ * For en eBPF program attached to a perf event, retrieve the
+ * value of the event counter associated to *ctx* and store it in
+ * the structure pointed by *buf* and of size *buf_size*. Enabled
+ * and running times are also stored in the structure (see
+ * description of helper **bpf_perf_event_read_value**\ () for
+ * more details).
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_perf_prog_read_value)(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, __u32 buf_size) = (void *) 56;
+
+/*
+ * bpf_getsockopt
+ *
+ * Emulate a call to **getsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **getsockopt(2)** for more information.
+ * The retrieved value is stored in the structure pointed by
+ * *opval* and of length *optlen*.
+ *
+ * This helper actually implements a subset of **getsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **IPPROTO_TCP**, which supports *optname*
+ * **TCP_CONGESTION**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_getsockopt)(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 57;
+
+/*
+ * bpf_override_return
+ *
+ * Used for error injection, this helper uses kprobes to override
+ * the return value of the probed function, and to set it to *rc*.
+ * The first argument is the context *regs* on which the kprobe
+ * works.
+ *
+ * This helper works by setting setting the PC (program counter)
+ * to an override function which is run in place of the original
+ * probed function. This means the probed function is not run at
+ * all. The replacement function just returns with the required
+ * value.
+ *
+ * This helper has security implications, and thus is subject to
+ * restrictions. It is only available if the kernel was compiled
+ * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+ * option, and in this case it only works on functions tagged with
+ * **ALLOW_ERROR_INJECTION** in the kernel code.
+ *
+ * Also, the helper is only available for the architectures having
+ * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+ * x86 architecture is the only one to support this feature.
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_override_return)(struct pt_regs *regs, __u64 rc) = (void *) 58;
+
+/*
+ * bpf_sock_ops_cb_flags_set
+ *
+ * Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+ * for the full TCP socket associated to *bpf_sock_ops* to
+ * *argval*.
+ *
+ * The primary use of this field is to determine if there should
+ * be calls to eBPF programs of type
+ * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+ * code. A program of the same type can change its value, per
+ * connection and as necessary, when the connection is
+ * established. This field is directly accessible for reading, but
+ * this helper must be used for updates in order to return an
+ * error if an eBPF program tries to set a callback that is not
+ * supported in the current kernel.
+ *
+ * *argval* is a flag array which can combine these flags:
+ *
+ * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+ * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+ * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT)
+ *
+ * Therefore, this function can be used to clear a callback flag by
+ * setting the appropriate bit to zero. e.g. to disable the RTO
+ * callback:
+ *
+ * **bpf_sock_ops_cb_flags_set(bpf_sock,**
+ * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)**
+ *
+ * Here are some examples of where one could call such eBPF
+ * program:
+ *
+ * * When RTO fires.
+ * * When a packet is retransmitted.
+ * * When the connection terminates.
+ * * When a packet is sent.
+ * * When a packet is received.
+ *
+ * Returns
+ * Code **-EINVAL** if the socket is not a full TCP socket;
+ * otherwise, a positive number containing the bits that could not
+ * be set is returned (which comes down to 0 if all bits were set
+ * as required).
+ */
+static int (*bpf_sock_ops_cb_flags_set)(struct bpf_sock_ops *bpf_sock, int argval) = (void *) 59;
+
+/*
+ * bpf_msg_redirect_map
+ *
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ *
+ * Returns
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ */
+static int (*bpf_msg_redirect_map)(struct sk_msg_md *msg, void *map, __u32 key, __u64 flags) = (void *) 60;
+
+/*
+ * bpf_msg_apply_bytes
+ *
+ * For socket policies, apply the verdict of the eBPF program to
+ * the next *bytes* (number of bytes) of message *msg*.
+ *
+ * For example, this helper can be used in the following cases:
+ *
+ * * A single **sendmsg**\ () or **sendfile**\ () system call
+ * contains multiple logical messages that the eBPF program is
+ * supposed to read and for which it should apply a verdict.
+ * * An eBPF program only cares to read the first *bytes* of a
+ * *msg*. If the message has a large payload, then setting up
+ * and calling the eBPF program repeatedly for all bytes, even
+ * though the verdict is already known, would create unnecessary
+ * overhead.
+ *
+ * When called from within an eBPF program, the helper sets a
+ * counter internal to the BPF infrastructure, that is used to
+ * apply the last verdict to the next *bytes*. If *bytes* is
+ * smaller than the current data being processed from a
+ * **sendmsg**\ () or **sendfile**\ () system call, the first
+ * *bytes* will be sent and the eBPF program will be re-run with
+ * the pointer for start of data pointing to byte number *bytes*
+ * **+ 1**. If *bytes* is larger than the current data being
+ * processed, then the eBPF verdict will be applied to multiple
+ * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ * consumed.
+ *
+ * Note that if a socket closes with the internal counter holding
+ * a non-zero value, this is not a problem because data is not
+ * being buffered for *bytes* and is sent as it is received.
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_msg_apply_bytes)(struct sk_msg_md *msg, __u32 bytes) = (void *) 61;
+
+/*
+ * bpf_msg_cork_bytes
+ *
+ * For socket policies, prevent the execution of the verdict eBPF
+ * program for message *msg* until *bytes* (byte number) have been
+ * accumulated.
+ *
+ * This can be used when one needs a specific number of bytes
+ * before a verdict can be assigned, even if the data spans
+ * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ * case would be a user calling **sendmsg**\ () repeatedly with
+ * 1-byte long message segments. Obviously, this is bad for
+ * performance, but it is still valid. If the eBPF program needs
+ * *bytes* bytes to validate a header, this helper can be used to
+ * prevent the eBPF program to be called again until *bytes* have
+ * been accumulated.
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_msg_cork_bytes)(struct sk_msg_md *msg, __u32 bytes) = (void *) 62;
+
+/*
+ * bpf_msg_pull_data
+ *
+ * For socket policies, pull in non-linear data from user space
+ * for *msg* and set pointers *msg*\ **->data** and *msg*\
+ * **->data_end** to *start* and *end* bytes offsets into *msg*,
+ * respectively.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it can only parse data that the (**data**, **data_end**)
+ * pointers have already consumed. For **sendmsg**\ () hooks this
+ * is likely the first scatterlist element. But for calls relying
+ * on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ * be the range (**0**, **0**) because the data is shared with
+ * user space and by default the objective is to avoid allowing
+ * user space to modify data while (or after) eBPF verdict is
+ * being decided. This helper can be used to pull in data and to
+ * set the start and end pointer to given values. Data will be
+ * copied if necessary (i.e. if data was not linear and if start
+ * and end pointers do not point to the same chunk).
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_msg_pull_data)(struct sk_msg_md *msg, __u32 start, __u32 end, __u64 flags) = (void *) 63;
+
+/*
+ * bpf_bind
+ *
+ * Bind the socket associated to *ctx* to the address pointed by
+ * *addr*, of length *addr_len*. This allows for making outgoing
+ * connection from the desired IP address, which can be useful for
+ * example when all processes inside a cgroup should use one
+ * single IP address on a host that has multiple IP configured.
+ *
+ * This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+ * domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+ * **AF_INET6**). Looking for a free port to bind to can be
+ * expensive, therefore binding to port is not permitted by the
+ * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+ * must be set to zero.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_bind)(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) = (void *) 64;
+
+/*
+ * bpf_xdp_adjust_tail
+ *
+ * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+ * only possible to shrink the packet as of this writing,
+ * therefore *delta* must be a negative integer.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_xdp_adjust_tail)(struct xdp_md *xdp_md, int delta) = (void *) 65;
+
+/*
+ * bpf_skb_get_xfrm_state
+ *
+ * Retrieve the XFRM state (IP transform framework, see also
+ * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+ *
+ * The retrieved value is stored in the **struct bpf_xfrm_state**
+ * pointed by *xfrm_state* and of length *size*.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_XFRM** configuration option.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_get_xfrm_state)(struct __sk_buff *skb, __u32 index, struct bpf_xfrm_state *xfrm_state, __u32 size, __u64 flags) = (void *) 66;
+
+/*
+ * bpf_get_stack
+ *
+ * Return a user or a kernel stack in bpf program provided buffer.
+ * To achieve this, the helper needs *ctx*, which is a pointer
+ * to the context on which the tracing program is executed.
+ * To store the stacktrace, the bpf program provides *buf* with
+ * a nonnegative *size*.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_USER_BUILD_ID**
+ * Collect buildid+offset instead of ips for user stack,
+ * only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ * **bpf_get_stack**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ * to sufficient large buffer size. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ *
+ * Returns
+ * A non-negative value equal to or less than *size* on success,
+ * or a negative error in case of failure.
+ */
+static int (*bpf_get_stack)(void *ctx, void *buf, __u32 size, __u64 flags) = (void *) 67;
+
+/*
+ * bpf_skb_load_bytes_relative
+ *
+ * This helper is similar to **bpf_skb_load_bytes**\ () in that
+ * it provides an easy way to load *len* bytes from *offset*
+ * from the packet associated to *skb*, into the buffer pointed
+ * by *to*. The difference to **bpf_skb_load_bytes**\ () is that
+ * a fifth argument *start_header* exists in order to select a
+ * base offset to start from. *start_header* can be one of:
+ *
+ * **BPF_HDR_START_MAC**
+ * Base offset to load data from is *skb*'s mac header.
+ * **BPF_HDR_START_NET**
+ * Base offset to load data from is *skb*'s network header.
+ *
+ * In general, "direct packet access" is the preferred method to
+ * access packet data, however, this helper is in particular useful
+ * in socket filters where *skb*\ **->data** does not always point
+ * to the start of the mac header and where "direct packet access"
+ * is not available.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_load_bytes_relative)(const void *skb, __u32 offset, void *to, __u32 len, __u32 start_header) = (void *) 68;
+
+/*
+ * bpf_fib_lookup
+ *
+ * Do FIB lookup in kernel tables using parameters in *params*.
+ * If lookup is successful and result shows packet is to be
+ * forwarded, the neighbor tables are searched for the nexthop.
+ * If successful (ie., FIB lookup shows forwarding and nexthop
+ * is resolved), the nexthop address is returned in ipv4_dst
+ * or ipv6_dst based on family, smac is set to mac address of
+ * egress device, dmac is set to nexthop mac address, rt_metric
+ * is set to metric from route (IPv4/IPv6 only), and ifindex
+ * is set to the device index of the nexthop from the FIB lookup.
+ *
+ * *plen* argument is the size of the passed in struct.
+ * *flags* argument can be a combination of one or more of the
+ * following values:
+ *
+ * **BPF_FIB_LOOKUP_DIRECT**
+ * Do a direct table lookup vs full lookup using FIB
+ * rules.
+ * **BPF_FIB_LOOKUP_OUTPUT**
+ * Perform lookup from an egress perspective (default is
+ * ingress).
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** tc cls_act programs.
+ *
+ * Returns
+ * * < 0 if any input argument is invalid
+ * * 0 on success (packet is forwarded, nexthop neighbor exists)
+ * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
+ * packet is not forwarded or needs assist from full stack
+ */
+static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params, int plen, __u32 flags) = (void *) 69;
+
+/*
+ * bpf_sock_hash_update
+ *
+ * Add an entry to, or update a sockhash *map* referencing sockets.
+ * The *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_sock_hash_update)(struct bpf_sock_ops *skops, void *map, void *key, __u64 flags) = (void *) 70;
+
+/*
+ * bpf_msg_redirect_hash
+ *
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ *
+ * Returns
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ */
+static int (*bpf_msg_redirect_hash)(struct sk_msg_md *msg, void *map, void *key, __u64 flags) = (void *) 71;
+
+/*
+ * bpf_sk_redirect_hash
+ *
+ * This helper is used in programs implementing policies at the
+ * skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
+ * if the verdeict eBPF program returns **SK_PASS**), redirect it
+ * to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress otherwise). This is the only flag supported for now.
+ *
+ * Returns
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ */
+static int (*bpf_sk_redirect_hash)(struct __sk_buff *skb, void *map, void *key, __u64 flags) = (void *) 72;
+
+/*
+ * bpf_lwt_push_encap
+ *
+ * Encapsulate the packet associated to *skb* within a Layer 3
+ * protocol header. This header is provided in the buffer at
+ * address *hdr*, with *len* its size in bytes. *type* indicates
+ * the protocol of the header and can be one of:
+ *
+ * **BPF_LWT_ENCAP_SEG6**
+ * IPv6 encapsulation with Segment Routing Header
+ * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ * the IPv6 header is computed by the kernel.
+ * **BPF_LWT_ENCAP_SEG6_INLINE**
+ * Only works if *skb* contains an IPv6 packet. Insert a
+ * Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ * the IPv6 header.
+ * **BPF_LWT_ENCAP_IP**
+ * IP encapsulation (GRE/GUE/IPIP/etc). The outer header
+ * must be IPv4 or IPv6, followed by zero or more
+ * additional headers, up to **LWT_BPF_MAX_HEADROOM**
+ * total bytes in all prepended headers. Please note that
+ * if **skb_is_gso**\ (*skb*) is true, no more than two
+ * headers can be prepended, and the inner header, if
+ * present, should be either GRE or UDP/GUE.
+ *
+ * **BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs
+ * of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can
+ * be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and
+ * **BPF_PROG_TYPE_LWT_XMIT**.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_lwt_push_encap)(struct __sk_buff *skb, __u32 type, void *hdr, __u32 len) = (void *) 73;
+
+/*
+ * bpf_lwt_seg6_store_bytes
+ *
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ * inside the outermost IPv6 Segment Routing Header can be
+ * modified through this helper.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_lwt_seg6_store_bytes)(struct __sk_buff *skb, __u32 offset, const void *from, __u32 len) = (void *) 74;
+
+/*
+ * bpf_lwt_seg6_adjust_srh
+ *
+ * Adjust the size allocated to TLVs in the outermost IPv6
+ * Segment Routing Header contained in the packet associated to
+ * *skb*, at position *offset* by *delta* bytes. Only offsets
+ * after the segments are accepted. *delta* can be as well
+ * positive (growing) as negative (shrinking).
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_lwt_seg6_adjust_srh)(struct __sk_buff *skb, __u32 offset, __s32 delta) = (void *) 75;
+
+/*
+ * bpf_lwt_seg6_action
+ *
+ * Apply an IPv6 Segment Routing action of type *action* to the
+ * packet associated to *skb*. Each action takes a parameter
+ * contained at address *param*, and of length *param_len* bytes.
+ * *action* can be one of:
+ *
+ * **SEG6_LOCAL_ACTION_END_X**
+ * End.X action: Endpoint with Layer-3 cross-connect.
+ * Type of *param*: **struct in6_addr**.
+ * **SEG6_LOCAL_ACTION_END_T**
+ * End.T action: Endpoint with specific IPv6 table lookup.
+ * Type of *param*: **int**.
+ * **SEG6_LOCAL_ACTION_END_B6**
+ * End.B6 action: Endpoint bound to an SRv6 policy.
+ * Type of *param*: **struct ipv6_sr_hdr**.
+ * **SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ * End.B6.Encap action: Endpoint bound to an SRv6
+ * encapsulation policy.
+ * Type of *param*: **struct ipv6_sr_hdr**.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_lwt_seg6_action)(struct __sk_buff *skb, __u32 action, void *param, __u32 param_len) = (void *) 76;
+
+/*
+ * bpf_rc_repeat
+ *
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded repeat key message. This delays
+ * the generation of a key up event for previously generated
+ * key down event.
+ *
+ * Some IR protocols like NEC have a special IR message for
+ * repeating last button, for when a button is held down.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_rc_repeat)(void *ctx) = (void *) 77;
+
+/*
+ * bpf_rc_keydown
+ *
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded key press with *scancode*,
+ * *toggle* value in the given *protocol*. The scancode will be
+ * translated to a keycode using the rc keymap, and reported as
+ * an input key down event. After a period a key up event is
+ * generated. This period can be extended by calling either
+ * **bpf_rc_keydown**\ () again with the same values, or calling
+ * **bpf_rc_repeat**\ ().
+ *
+ * Some protocols include a toggle bit, in case the button was
+ * released and pressed again between consecutive scancodes.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * The *protocol* is the decoded protocol number (see
+ * **enum rc_proto** for some predefined values).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_rc_keydown)(void *ctx, __u32 protocol, __u64 scancode, __u32 toggle) = (void *) 78;
+
+/*
+ * bpf_skb_cgroup_id
+ *
+ * Return the cgroup v2 id of the socket associated with the *skb*.
+ * This is roughly similar to the **bpf_get_cgroup_classid**\ ()
+ * helper for cgroup v1 by providing a tag resp. identifier that
+ * can be matched on or used for map lookups e.g. to implement
+ * policy. The cgroup v2 id of a given path in the hierarchy is
+ * exposed in user space through the f_handle API in order to get
+ * to the same 64-bit id.
+ *
+ * This helper can be used on TC egress path, but not on ingress,
+ * and is available only if the kernel was compiled with the
+ * **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ *
+ * Returns
+ * The id is returned or 0 in case the id could not be retrieved.
+ */
+static __u64 (*bpf_skb_cgroup_id)(struct __sk_buff *skb) = (void *) 79;
+
+/*
+ * bpf_get_current_cgroup_id
+ *
+ *
+ * Returns
+ * A 64-bit integer containing the current cgroup id based
+ * on the cgroup within which the current task is running.
+ */
+static __u64 (*bpf_get_current_cgroup_id)(void) = (void *) 80;
+
+/*
+ * bpf_get_local_storage
+ *
+ * Get the pointer to the local storage area.
+ * The type and the size of the local storage is defined
+ * by the *map* argument.
+ * The *flags* meaning is specific for each map type,
+ * and has to be 0 for cgroup local storage.
+ *
+ * Depending on the BPF program type, a local storage area
+ * can be shared between multiple instances of the BPF program,
+ * running simultaneously.
+ *
+ * A user should care about the synchronization by himself.
+ * For example, by using the **BPF_STX_XADD** instruction to alter
+ * the shared data.
+ *
+ * Returns
+ * A pointer to the local storage area.
+ */
+static void *(*bpf_get_local_storage)(void *map, __u64 flags) = (void *) 81;
+
+/*
+ * bpf_sk_select_reuseport
+ *
+ * Select a **SO_REUSEPORT** socket from a
+ * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
+ * It checks the selected socket is matching the incoming
+ * request in the socket buffer.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_sk_select_reuseport)(struct sk_reuseport_md *reuse, void *map, void *key, __u64 flags) = (void *) 82;
+
+/*
+ * bpf_skb_ancestor_cgroup_id
+ *
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *skb* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *skb*, then return value will be same as that
+ * of **bpf_skb_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *skb*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_skb_cgroup_id**\ ().
+ *
+ * Returns
+ * The id is returned or 0 in case the id could not be retrieved.
+ */
+static __u64 (*bpf_skb_ancestor_cgroup_id)(struct __sk_buff *skb, int ancestor_level) = (void *) 83;
+
+/*
+ * bpf_sk_lookup_tcp
+ *
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is a negative signed 32-bit integer, then the
+ * socket lookup table in the netns associated with the *ctx* will
+ * will be used. For the TC hooks, this is the netns of the device
+ * in the skb. For socket hooks, this is the netns of the socket.
+ * If *netns* is any other signed 32-bit value greater than or
+ * equal to zero then it specifies the ID of the netns relative to
+ * the netns associated with the *ctx*. *netns* values beyond the
+ * range of 32-bit integers are reserved for future use.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ *
+ * Returns
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from *reuse*\ **->socks**\ [] using the hash of the
+ * tuple.
+ */
+static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 84;
+
+/*
+ * bpf_sk_lookup_udp
+ *
+ * Look for UDP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is a negative signed 32-bit integer, then the
+ * socket lookup table in the netns associated with the *ctx* will
+ * will be used. For the TC hooks, this is the netns of the device
+ * in the skb. For socket hooks, this is the netns of the socket.
+ * If *netns* is any other signed 32-bit value greater than or
+ * equal to zero then it specifies the ID of the netns relative to
+ * the netns associated with the *ctx*. *netns* values beyond the
+ * range of 32-bit integers are reserved for future use.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ *
+ * Returns
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from *reuse*\ **->socks**\ [] using the hash of the
+ * tuple.
+ */
+static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 85;
+
+/*
+ * bpf_sk_release
+ *
+ * Release the reference held by *sock*. *sock* must be a
+ * non-**NULL** pointer that was returned from
+ * **bpf_sk_lookup_xxx**\ ().
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_sk_release)(struct bpf_sock *sock) = (void *) 86;
+
+/*
+ * bpf_map_push_elem
+ *
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is
+ * removed to make room for this.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_map_push_elem)(void *map, const void *value, __u64 flags) = (void *) 87;
+
+/*
+ * bpf_map_pop_elem
+ *
+ * Pop an element from *map*.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_map_pop_elem)(void *map, void *value) = (void *) 88;
+
+/*
+ * bpf_map_peek_elem
+ *
+ * Get an element from *map* without removing it.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_map_peek_elem)(void *map, void *value) = (void *) 89;
+
+/*
+ * bpf_msg_push_data
+ *
+ * For socket policies, insert *len* bytes into *msg* at offset
+ * *start*.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it may want to insert metadata or options into the *msg*.
+ * This can later be read and used by any of the lower layer BPF
+ * hooks.
+ *
+ * This helper may fail if under memory pressure (a malloc
+ * fails) in these cases BPF programs will get an appropriate
+ * error and BPF programs will need to handle them.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_msg_push_data)(struct sk_msg_md *msg, __u32 start, __u32 len, __u64 flags) = (void *) 90;
+
+/*
+ * bpf_msg_pop_data
+ *
+ * Will remove *len* bytes from a *msg* starting at byte *start*.
+ * This may result in **ENOMEM** errors under certain situations if
+ * an allocation and copy are required due to a full ring buffer.
+ * However, the helper will try to avoid doing the allocation
+ * if possible. Other errors can occur if input parameters are
+ * invalid either due to *start* byte not being valid part of *msg*
+ * payload and/or *pop* value being to large.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_msg_pop_data)(struct sk_msg_md *msg, __u32 start, __u32 len, __u64 flags) = (void *) 91;
+
+/*
+ * bpf_rc_pointer_rel
+ *
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded pointer movement.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_rc_pointer_rel)(void *ctx, __s32 rel_x, __s32 rel_y) = (void *) 92;
+
+/*
+ * bpf_spin_lock
+ *
+ * Acquire a spinlock represented by the pointer *lock*, which is
+ * stored as part of a value of a map. Taking the lock allows to
+ * safely update the rest of the fields in that value. The
+ * spinlock can (and must) later be released with a call to
+ * **bpf_spin_unlock**\ (\ *lock*\ ).
+ *
+ * Spinlocks in BPF programs come with a number of restrictions
+ * and constraints:
+ *
+ * * **bpf_spin_lock** objects are only allowed inside maps of
+ * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this
+ * list could be extended in the future).
+ * * BTF description of the map is mandatory.
+ * * The BPF program can take ONE lock at a time, since taking two
+ * or more could cause dead locks.
+ * * Only one **struct bpf_spin_lock** is allowed per map element.
+ * * When the lock is taken, calls (either BPF to BPF or helpers)
+ * are not allowed.
+ * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not
+ * allowed inside a spinlock-ed region.
+ * * The BPF program MUST call **bpf_spin_unlock**\ () to release
+ * the lock, on all execution paths, before it returns.
+ * * The BPF program can access **struct bpf_spin_lock** only via
+ * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ ()
+ * helpers. Loading or storing data into the **struct
+ * bpf_spin_lock** *lock*\ **;** field of a map is not allowed.
+ * * To use the **bpf_spin_lock**\ () helper, the BTF description
+ * of the map value must be a struct and have **struct
+ * bpf_spin_lock** *anyname*\ **;** field at the top level.
+ * Nested lock inside another struct is not allowed.
+ * * The **struct bpf_spin_lock** *lock* field in a map value must
+ * be aligned on a multiple of 4 bytes in that value.
+ * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy
+ * the **bpf_spin_lock** field to user space.
+ * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from
+ * a BPF program, do not update the **bpf_spin_lock** field.
+ * * **bpf_spin_lock** cannot be on the stack or inside a
+ * networking packet (it can only be inside of a map values).
+ * * **bpf_spin_lock** is available to root only.
+ * * Tracing programs and socket filter programs cannot use
+ * **bpf_spin_lock**\ () due to insufficient preemption checks
+ * (but this may change in the future).
+ * * **bpf_spin_lock** is not allowed in inner maps of map-in-map.
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_spin_lock)(struct bpf_spin_lock *lock) = (void *) 93;
+
+/*
+ * bpf_spin_unlock
+ *
+ * Release the *lock* previously locked by a call to
+ * **bpf_spin_lock**\ (\ *lock*\ ).
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_spin_unlock)(struct bpf_spin_lock *lock) = (void *) 94;
+
+/*
+ * bpf_sk_fullsock
+ *
+ * This helper gets a **struct bpf_sock** pointer such
+ * that all the fields in this **bpf_sock** can be accessed.
+ *
+ * Returns
+ * A **struct bpf_sock** pointer on success, or **NULL** in
+ * case of failure.
+ */
+static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = (void *) 95;
+
+/*
+ * bpf_tcp_sock
+ *
+ * This helper gets a **struct bpf_tcp_sock** pointer from a
+ * **struct bpf_sock** pointer.
+ *
+ * Returns
+ * A **struct bpf_tcp_sock** pointer on success, or **NULL** in
+ * case of failure.
+ */
+static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = (void *) 96;
+
+/*
+ * bpf_skb_ecn_set_ce
+ *
+ * Set ECN (Explicit Congestion Notification) field of IP header
+ * to **CE** (Congestion Encountered) if current value is **ECT**
+ * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6
+ * and IPv4.
+ *
+ * Returns
+ * 1 if the **CE** flag is set (either by the current helper call
+ * or because it was already present), 0 if it is not set.
+ */
+static int (*bpf_skb_ecn_set_ce)(struct __sk_buff *skb) = (void *) 97;
+
+/*
+ * bpf_get_listener_sock
+ *
+ * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state.
+ * **bpf_sk_release**\ () is unnecessary and not allowed.
+ *
+ * Returns
+ * A **struct bpf_sock** pointer on success, or **NULL** in
+ * case of failure.
+ */
+static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) = (void *) 98;
+
+/*
+ * bpf_skc_lookup_tcp
+ *
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * This function is identical to **bpf_sk_lookup_tcp**\ (), except
+ * that it also returns timewait or request sockets. Use
+ * **bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the
+ * full structure.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ *
+ * Returns
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from *reuse*\ **->socks**\ [] using the hash of the
+ * tuple.
+ */
+static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 99;
+
+/*
+ * bpf_tcp_check_syncookie
+ *
+ * Check whether *iph* and *th* contain a valid SYN cookie ACK for
+ * the listening socket in *sk*.
+ *
+ * *iph* points to the start of the IPv4 or IPv6 header, while
+ * *iph_len* contains **sizeof**\ (**struct iphdr**) or
+ * **sizeof**\ (**struct ip6hdr**).
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains **sizeof**\ (**struct tcphdr**).
+ *
+ *
+ * Returns
+ * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
+ * error otherwise.
+ */
+static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk, void *iph, __u32 iph_len, struct tcphdr *th, __u32 th_len) = (void *) 100;
+
+/*
+ * bpf_sysctl_get_name
+ *
+ * Get name of sysctl in /proc/sys/ and copy it into provided by
+ * program buffer *buf* of size *buf_len*.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ *
+ * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is
+ * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name
+ * only (e.g. "tcp_mem").
+ *
+ * Returns
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ */
+static int (*bpf_sysctl_get_name)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len, __u64 flags) = (void *) 101;
+
+/*
+ * bpf_sysctl_get_current_value
+ *
+ * Get current value of sysctl as it is presented in /proc/sys
+ * (incl. newline, etc), and copy it as a string into provided
+ * by program buffer *buf* of size *buf_len*.
+ *
+ * The whole value is copied, no matter what file position user
+ * space issued e.g. sys_read at.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ *
+ * Returns
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * **-EINVAL** if current value was unavailable, e.g. because
+ * sysctl is uninitialized and read returns -EIO for it.
+ */
+static int (*bpf_sysctl_get_current_value)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len) = (void *) 102;
+
+/*
+ * bpf_sysctl_get_new_value
+ *
+ * Get new value being written by user space to sysctl (before
+ * the actual write happens) and copy it as a string into
+ * provided by program buffer *buf* of size *buf_len*.
+ *
+ * User space may write new value at file position > 0.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ *
+ * Returns
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * **-EINVAL** if sysctl is being read.
+ */
+static int (*bpf_sysctl_get_new_value)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len) = (void *) 103;
+
+/*
+ * bpf_sysctl_set_new_value
+ *
+ * Override new value being written by user space to sysctl with
+ * value provided by program in buffer *buf* of size *buf_len*.
+ *
+ * *buf* should contain a string in same form as provided by user
+ * space on sysctl write.
+ *
+ * User space may write new value at file position > 0. To override
+ * the whole sysctl value file position should be set to zero.
+ *
+ * Returns
+ * 0 on success.
+ *
+ * **-E2BIG** if the *buf_len* is too big.
+ *
+ * **-EINVAL** if sysctl is being read.
+ */
+static int (*bpf_sysctl_set_new_value)(struct bpf_sysctl *ctx, const char *buf, unsigned long buf_len) = (void *) 104;
+
+/*
+ * bpf_strtol
+ *
+ * Convert the initial part of the string from buffer *buf* of
+ * size *buf_len* to a long integer according to the given base
+ * and save the result in *res*.
+ *
+ * The string may begin with an arbitrary amount of white space
+ * (as determined by **isspace**\ (3)) followed by a single
+ * optional '**-**' sign.
+ *
+ * Five least significant bits of *flags* encode base, other bits
+ * are currently unused.
+ *
+ * Base must be either 8, 10, 16 or 0 to detect it automatically
+ * similar to user space **strtol**\ (3).
+ *
+ * Returns
+ * Number of characters consumed on success. Must be positive but
+ * no more than *buf_len*.
+ *
+ * **-EINVAL** if no valid digits were found or unsupported base
+ * was provided.
+ *
+ * **-ERANGE** if resulting value was out of range.
+ */
+static int (*bpf_strtol)(const char *buf, unsigned long buf_len, __u64 flags, long *res) = (void *) 105;
+
+/*
+ * bpf_strtoul
+ *
+ * Convert the initial part of the string from buffer *buf* of
+ * size *buf_len* to an unsigned long integer according to the
+ * given base and save the result in *res*.
+ *
+ * The string may begin with an arbitrary amount of white space
+ * (as determined by **isspace**\ (3)).
+ *
+ * Five least significant bits of *flags* encode base, other bits
+ * are currently unused.
+ *
+ * Base must be either 8, 10, 16 or 0 to detect it automatically
+ * similar to user space **strtoul**\ (3).
+ *
+ * Returns
+ * Number of characters consumed on success. Must be positive but
+ * no more than *buf_len*.
+ *
+ * **-EINVAL** if no valid digits were found or unsupported base
+ * was provided.
+ *
+ * **-ERANGE** if resulting value was out of range.
+ */
+static int (*bpf_strtoul)(const char *buf, unsigned long buf_len, __u64 flags, unsigned long *res) = (void *) 106;
+
+/*
+ * bpf_sk_storage_get
+ *
+ * Get a bpf-local-storage from a *sk*.
+ *
+ * Logically, it could be thought of getting the value from
+ * a *map* with *sk* as the **key**. From this
+ * perspective, the usage is not much different from
+ * **bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this
+ * helper enforces the key must be a full socket and the map must
+ * be a **BPF_MAP_TYPE_SK_STORAGE** also.
+ *
+ * Underneath, the value is stored locally at *sk* instead of
+ * the *map*. The *map* is used as the bpf-local-storage
+ * "type". The bpf-local-storage "type" (i.e. the *map*) is
+ * searched against all bpf-local-storages residing at *sk*.
+ *
+ * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be
+ * used such that a new bpf-local-storage will be
+ * created if one does not exist. *value* can be used
+ * together with **BPF_SK_STORAGE_GET_F_CREATE** to specify
+ * the initial value of a bpf-local-storage. If *value* is
+ * **NULL**, the new bpf-local-storage will be zero initialized.
+ *
+ * Returns
+ * A bpf-local-storage pointer is returned on success.
+ *
+ * **NULL** if not found or there was an error in adding
+ * a new bpf-local-storage.
+ */
+static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk, void *value, __u64 flags) = (void *) 107;
+
+/*
+ * bpf_sk_storage_delete
+ *
+ * Delete a bpf-local-storage from a *sk*.
+ *
+ * Returns
+ * 0 on success.
+ *
+ * **-ENOENT** if the bpf-local-storage cannot be found.
+ */
+static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) = (void *) 108;
+
+/*
+ * bpf_send_signal
+ *
+ * Send signal *sig* to the current task.
+ *
+ * Returns
+ * 0 on success or successfully queued.
+ *
+ * **-EBUSY** if work queue under nmi is full.
+ *
+ * **-EINVAL** if *sig* is invalid.
+ *
+ * **-EPERM** if no permission to send the *sig*.
+ *
+ * **-EAGAIN** if bpf program can try again.
+ */
+static int (*bpf_send_signal)(__u32 sig) = (void *) 109;
+
+/*
+ * bpf_tcp_gen_syncookie
+ *
+ * Try to issue a SYN cookie for the packet with corresponding
+ * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
+ *
+ * *iph* points to the start of the IPv4 or IPv6 header, while
+ * *iph_len* contains **sizeof**\ (**struct iphdr**) or
+ * **sizeof**\ (**struct ip6hdr**).
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains the length of the TCP header.
+ *
+ *
+ * Returns
+ * On success, lower 32 bits hold the generated SYN cookie in
+ * followed by 16 bits which hold the MSS value for that cookie,
+ * and the top 16 bits are unused.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EINVAL** SYN cookie cannot be issued due to error
+ *
+ * **-ENOENT** SYN cookie should not be issued (no SYN flood)
+ *
+ * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
+ *
+ * **-EPROTONOSUPPORT** IP packet version is not 4 or 6
+ */
+static __s64 (*bpf_tcp_gen_syncookie)(struct bpf_sock *sk, void *iph, __u32 iph_len, struct tcphdr *th, __u32 th_len) = (void *) 110;
+
+/*
+ * bpf_skb_output
+ *
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * *ctx* is a pointer to in-kernel struct sk_buff.
+ *
+ * This helper is similar to **bpf_perf_event_output**\ () but
+ * restricted to raw_tracepoint bpf programs.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 111;
+
+/*
+ * bpf_probe_read_user
+ *
+ * Safely attempt to read *size* bytes from user space address
+ * *unsafe_ptr* and store the data in *dst*.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_probe_read_user)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 112;
+
+/*
+ * bpf_probe_read_kernel
+ *
+ * Safely attempt to read *size* bytes from kernel space address
+ * *unsafe_ptr* and store the data in *dst*.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_probe_read_kernel)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 113;
+
+/*
+ * bpf_probe_read_user_str
+ *
+ * Copy a NUL terminated string from an unsafe user address
+ * *unsafe_ptr* to *dst*. The *size* should include the
+ * terminating NUL byte. In case the string length is smaller than
+ * *size*, the target is not padded with further NUL bytes. If the
+ * string length is larger than *size*, just *size*-1 bytes are
+ * copied and the last byte is set to NUL.
+ *
+ * On success, the length of the copied string is returned. This
+ * makes this helper useful in tracing programs for reading
+ * strings, and more importantly to get its length at runtime. See
+ * the following snippet:
+ *
+ * ::
+ *
+ * SEC("kprobe/sys_open")
+ * void bpf_sys_open(struct pt_regs *ctx)
+ * {
+ * char buf[PATHLEN]; // PATHLEN is defined to 256
+ * int res = bpf_probe_read_user_str(buf, sizeof(buf),
+ * ctx->di);
+ *
+ * // Consume buf, for example push it to
+ * // userspace via bpf_perf_event_output(); we
+ * // can use res (the string length) as event
+ * // size, after checking its boundaries.
+ * }
+ *
+ * In comparison, using **bpf_probe_read_user()** helper here
+ * instead to read the string would require to estimate the length
+ * at compile time, and would often result in copying more memory
+ * than necessary.
+ *
+ * Another useful use case is when parsing individual process
+ * arguments or individual environment variables navigating
+ * *current*\ **->mm->arg_start** and *current*\
+ * **->mm->env_start**: using this helper and the return value,
+ * one can quickly iterate at the right offset of the memory area.
+ *
+ * Returns
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ */
+static int (*bpf_probe_read_user_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 114;
+
+/*
+ * bpf_probe_read_kernel_str
+ *
+ * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
+ * to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
+ *
+ * Returns
+ * On success, the strictly positive length of the string, including
+ * the trailing NUL character. On error, a negative value.
+ */
+static int (*bpf_probe_read_kernel_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 115;
+
+
diff --git a/src/contrib/libbpf/bpf/bpf_helpers.h b/src/contrib/libbpf/bpf/bpf_helpers.h
new file mode 100644
index 0000000..0c7d282
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf_helpers.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_HELPERS__
+#define __BPF_HELPERS__
+
+#include "bpf_helper_defs.h"
+
+#define __uint(name, val) int (*name)[val]
+#define __type(name, val) typeof(val) *name
+
+/* Helper macro to print out debug messages */
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+/*
+ * Helper macro to place programs, maps, license in
+ * different sections in elf_bpf file. Section names
+ * are interpreted by elf_bpf loader
+ */
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+#ifndef __always_inline
+#define __always_inline __attribute__((always_inline))
+#endif
+
+/*
+ * Helper structure used by eBPF C program
+ * to describe BPF map attributes to libbpf loader
+ */
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+ unsigned int map_flags;
+};
+
+enum libbpf_pin_type {
+ LIBBPF_PIN_NONE,
+ /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+ LIBBPF_PIN_BY_NAME,
+};
+
+#endif
diff --git a/src/contrib/libbpf/bpf/bpf_prog_linfo.c b/src/contrib/libbpf/bpf/bpf_prog_linfo.c
new file mode 100644
index 0000000..3ed1a27
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf_prog_linfo.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2018 Facebook */
+
+#include <string.h>
+#include <stdlib.h>
+#include <linux/err.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+struct bpf_prog_linfo {
+ void *raw_linfo;
+ void *raw_jited_linfo;
+ __u32 *nr_jited_linfo_per_func;
+ __u32 *jited_linfo_func_idx;
+ __u32 nr_linfo;
+ __u32 nr_jited_func;
+ __u32 rec_size;
+ __u32 jited_rec_size;
+};
+
+static int dissect_jited_func(struct bpf_prog_linfo *prog_linfo,
+ const __u64 *ksym_func, const __u32 *ksym_len)
+{
+ __u32 nr_jited_func, nr_linfo;
+ const void *raw_jited_linfo;
+ const __u64 *jited_linfo;
+ __u64 last_jited_linfo;
+ /*
+ * Index to raw_jited_linfo:
+ * i: Index for searching the next ksym_func
+ * prev_i: Index to the last found ksym_func
+ */
+ __u32 i, prev_i;
+ __u32 f; /* Index to ksym_func */
+
+ raw_jited_linfo = prog_linfo->raw_jited_linfo;
+ jited_linfo = raw_jited_linfo;
+ if (ksym_func[0] != *jited_linfo)
+ goto errout;
+
+ prog_linfo->jited_linfo_func_idx[0] = 0;
+ nr_jited_func = prog_linfo->nr_jited_func;
+ nr_linfo = prog_linfo->nr_linfo;
+
+ for (prev_i = 0, i = 1, f = 1;
+ i < nr_linfo && f < nr_jited_func;
+ i++) {
+ raw_jited_linfo += prog_linfo->jited_rec_size;
+ last_jited_linfo = *jited_linfo;
+ jited_linfo = raw_jited_linfo;
+
+ if (ksym_func[f] == *jited_linfo) {
+ prog_linfo->jited_linfo_func_idx[f] = i;
+
+ /* Sanity check */
+ if (last_jited_linfo - ksym_func[f - 1] + 1 >
+ ksym_len[f - 1])
+ goto errout;
+
+ prog_linfo->nr_jited_linfo_per_func[f - 1] =
+ i - prev_i;
+ prev_i = i;
+
+ /*
+ * The ksym_func[f] is found in jited_linfo.
+ * Look for the next one.
+ */
+ f++;
+ } else if (*jited_linfo <= last_jited_linfo) {
+ /* Ensure the addr is increasing _within_ a func */
+ goto errout;
+ }
+ }
+
+ if (f != nr_jited_func)
+ goto errout;
+
+ prog_linfo->nr_jited_linfo_per_func[nr_jited_func - 1] =
+ nr_linfo - prev_i;
+
+ return 0;
+
+errout:
+ return -EINVAL;
+}
+
+void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo)
+{
+ if (!prog_linfo)
+ return;
+
+ free(prog_linfo->raw_linfo);
+ free(prog_linfo->raw_jited_linfo);
+ free(prog_linfo->nr_jited_linfo_per_func);
+ free(prog_linfo->jited_linfo_func_idx);
+ free(prog_linfo);
+}
+
+struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
+{
+ struct bpf_prog_linfo *prog_linfo;
+ __u32 nr_linfo, nr_jited_func;
+ __u64 data_sz;
+
+ nr_linfo = info->nr_line_info;
+
+ if (!nr_linfo)
+ return NULL;
+
+ /*
+ * The min size that bpf_prog_linfo has to access for
+ * searching purpose.
+ */
+ if (info->line_info_rec_size <
+ offsetof(struct bpf_line_info, file_name_off))
+ return NULL;
+
+ prog_linfo = calloc(1, sizeof(*prog_linfo));
+ if (!prog_linfo)
+ return NULL;
+
+ /* Copy xlated line_info */
+ prog_linfo->nr_linfo = nr_linfo;
+ prog_linfo->rec_size = info->line_info_rec_size;
+ data_sz = (__u64)nr_linfo * prog_linfo->rec_size;
+ prog_linfo->raw_linfo = malloc(data_sz);
+ if (!prog_linfo->raw_linfo)
+ goto err_free;
+ memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, data_sz);
+
+ nr_jited_func = info->nr_jited_ksyms;
+ if (!nr_jited_func ||
+ !info->jited_line_info ||
+ info->nr_jited_line_info != nr_linfo ||
+ info->jited_line_info_rec_size < sizeof(__u64) ||
+ info->nr_jited_func_lens != nr_jited_func ||
+ !info->jited_ksyms ||
+ !info->jited_func_lens)
+ /* Not enough info to provide jited_line_info */
+ return prog_linfo;
+
+ /* Copy jited_line_info */
+ prog_linfo->nr_jited_func = nr_jited_func;
+ prog_linfo->jited_rec_size = info->jited_line_info_rec_size;
+ data_sz = (__u64)nr_linfo * prog_linfo->jited_rec_size;
+ prog_linfo->raw_jited_linfo = malloc(data_sz);
+ if (!prog_linfo->raw_jited_linfo)
+ goto err_free;
+ memcpy(prog_linfo->raw_jited_linfo,
+ (void *)(long)info->jited_line_info, data_sz);
+
+ /* Number of jited_line_info per jited func */
+ prog_linfo->nr_jited_linfo_per_func = malloc(nr_jited_func *
+ sizeof(__u32));
+ if (!prog_linfo->nr_jited_linfo_per_func)
+ goto err_free;
+
+ /*
+ * For each jited func,
+ * the start idx to the "linfo" and "jited_linfo" array,
+ */
+ prog_linfo->jited_linfo_func_idx = malloc(nr_jited_func *
+ sizeof(__u32));
+ if (!prog_linfo->jited_linfo_func_idx)
+ goto err_free;
+
+ if (dissect_jited_func(prog_linfo,
+ (__u64 *)(long)info->jited_ksyms,
+ (__u32 *)(long)info->jited_func_lens))
+ goto err_free;
+
+ return prog_linfo;
+
+err_free:
+ bpf_prog_linfo__free(prog_linfo);
+ return NULL;
+}
+
+const struct bpf_line_info *
+bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
+ __u64 addr, __u32 func_idx, __u32 nr_skip)
+{
+ __u32 jited_rec_size, rec_size, nr_linfo, start, i;
+ const void *raw_jited_linfo, *raw_linfo;
+ const __u64 *jited_linfo;
+
+ if (func_idx >= prog_linfo->nr_jited_func)
+ return NULL;
+
+ nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx];
+ if (nr_skip >= nr_linfo)
+ return NULL;
+
+ start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip;
+ jited_rec_size = prog_linfo->jited_rec_size;
+ raw_jited_linfo = prog_linfo->raw_jited_linfo +
+ (start * jited_rec_size);
+ jited_linfo = raw_jited_linfo;
+ if (addr < *jited_linfo)
+ return NULL;
+
+ nr_linfo -= nr_skip;
+ rec_size = prog_linfo->rec_size;
+ raw_linfo = prog_linfo->raw_linfo + (start * rec_size);
+ for (i = 0; i < nr_linfo; i++) {
+ if (addr < *jited_linfo)
+ break;
+
+ raw_linfo += rec_size;
+ raw_jited_linfo += jited_rec_size;
+ jited_linfo = raw_jited_linfo;
+ }
+
+ return raw_linfo - rec_size;
+}
+
+const struct bpf_line_info *
+bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
+ __u32 insn_off, __u32 nr_skip)
+{
+ const struct bpf_line_info *linfo;
+ __u32 rec_size, nr_linfo, i;
+ const void *raw_linfo;
+
+ nr_linfo = prog_linfo->nr_linfo;
+ if (nr_skip >= nr_linfo)
+ return NULL;
+
+ rec_size = prog_linfo->rec_size;
+ raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size);
+ linfo = raw_linfo;
+ if (insn_off < linfo->insn_off)
+ return NULL;
+
+ nr_linfo -= nr_skip;
+ for (i = 0; i < nr_linfo; i++) {
+ if (insn_off < linfo->insn_off)
+ break;
+
+ raw_linfo += rec_size;
+ linfo = raw_linfo;
+ }
+
+ return raw_linfo - rec_size;
+}
diff --git a/src/contrib/libbpf/bpf/bpf_tracing.h b/src/contrib/libbpf/bpf/bpf_tracing.h
new file mode 100644
index 0000000..b0dafe8
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf_tracing.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_TRACING_H__
+#define __BPF_TRACING_H__
+
+/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
+#if defined(__TARGET_ARCH_x86)
+ #define bpf_target_x86
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_s390)
+ #define bpf_target_s390
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm)
+ #define bpf_target_arm
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm64)
+ #define bpf_target_arm64
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_mips)
+ #define bpf_target_mips
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_powerpc)
+ #define bpf_target_powerpc
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_sparc)
+ #define bpf_target_sparc
+ #define bpf_target_defined
+#else
+ #undef bpf_target_defined
+#endif
+
+/* Fall back to what the compiler says */
+#ifndef bpf_target_defined
+#if defined(__x86_64__)
+ #define bpf_target_x86
+#elif defined(__s390__)
+ #define bpf_target_s390
+#elif defined(__arm__)
+ #define bpf_target_arm
+#elif defined(__aarch64__)
+ #define bpf_target_arm64
+#elif defined(__mips__)
+ #define bpf_target_mips
+#elif defined(__powerpc__)
+ #define bpf_target_powerpc
+#elif defined(__sparc__)
+ #define bpf_target_sparc
+#endif
+#endif
+
+#if defined(bpf_target_x86)
+
+#ifdef __KERNEL__
+#define PT_REGS_PARM1(x) ((x)->di)
+#define PT_REGS_PARM2(x) ((x)->si)
+#define PT_REGS_PARM3(x) ((x)->dx)
+#define PT_REGS_PARM4(x) ((x)->cx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->sp)
+#define PT_REGS_FP(x) ((x)->bp)
+#define PT_REGS_RC(x) ((x)->ax)
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->ip)
+#else
+#ifdef __i386__
+/* i386 kernel is built with -mregparm=3 */
+#define PT_REGS_PARM1(x) ((x)->eax)
+#define PT_REGS_PARM2(x) ((x)->edx)
+#define PT_REGS_PARM3(x) ((x)->ecx)
+#define PT_REGS_PARM4(x) 0
+#define PT_REGS_PARM5(x) 0
+#define PT_REGS_RET(x) ((x)->esp)
+#define PT_REGS_FP(x) ((x)->ebp)
+#define PT_REGS_RC(x) ((x)->eax)
+#define PT_REGS_SP(x) ((x)->esp)
+#define PT_REGS_IP(x) ((x)->eip)
+#else
+#define PT_REGS_PARM1(x) ((x)->rdi)
+#define PT_REGS_PARM2(x) ((x)->rsi)
+#define PT_REGS_PARM3(x) ((x)->rdx)
+#define PT_REGS_PARM4(x) ((x)->rcx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->rsp)
+#define PT_REGS_FP(x) ((x)->rbp)
+#define PT_REGS_RC(x) ((x)->rax)
+#define PT_REGS_SP(x) ((x)->rsp)
+#define PT_REGS_IP(x) ((x)->rip)
+#endif
+#endif
+
+#elif defined(bpf_target_s390)
+
+/* s390 provides user_pt_regs instead of struct pt_regs to userspace */
+struct pt_regs;
+#define PT_REGS_S390 const volatile user_pt_regs
+#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2])
+#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3])
+#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4])
+#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5])
+#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6])
+#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14])
+/* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11])
+#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2])
+#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15])
+#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr)
+
+#elif defined(bpf_target_arm)
+
+#define PT_REGS_PARM1(x) ((x)->uregs[0])
+#define PT_REGS_PARM2(x) ((x)->uregs[1])
+#define PT_REGS_PARM3(x) ((x)->uregs[2])
+#define PT_REGS_PARM4(x) ((x)->uregs[3])
+#define PT_REGS_PARM5(x) ((x)->uregs[4])
+#define PT_REGS_RET(x) ((x)->uregs[14])
+#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->uregs[0])
+#define PT_REGS_SP(x) ((x)->uregs[13])
+#define PT_REGS_IP(x) ((x)->uregs[12])
+
+#elif defined(bpf_target_arm64)
+
+/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
+struct pt_regs;
+#define PT_REGS_ARM64 const volatile struct user_pt_regs
+#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0])
+#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1])
+#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2])
+#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3])
+#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4])
+#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30])
+/* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29])
+#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0])
+#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp)
+#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc)
+
+#elif defined(bpf_target_mips)
+
+#define PT_REGS_PARM1(x) ((x)->regs[4])
+#define PT_REGS_PARM2(x) ((x)->regs[5])
+#define PT_REGS_PARM3(x) ((x)->regs[6])
+#define PT_REGS_PARM4(x) ((x)->regs[7])
+#define PT_REGS_PARM5(x) ((x)->regs[8])
+#define PT_REGS_RET(x) ((x)->regs[31])
+#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[1])
+#define PT_REGS_SP(x) ((x)->regs[29])
+#define PT_REGS_IP(x) ((x)->cp0_epc)
+
+#elif defined(bpf_target_powerpc)
+
+#define PT_REGS_PARM1(x) ((x)->gpr[3])
+#define PT_REGS_PARM2(x) ((x)->gpr[4])
+#define PT_REGS_PARM3(x) ((x)->gpr[5])
+#define PT_REGS_PARM4(x) ((x)->gpr[6])
+#define PT_REGS_PARM5(x) ((x)->gpr[7])
+#define PT_REGS_RC(x) ((x)->gpr[3])
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->nip)
+
+#elif defined(bpf_target_sparc)
+
+#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
+#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
+#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
+#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
+#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
+#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
+
+/* Should this also be a bpf_target check for the sparc case? */
+#if defined(__arch64__)
+#define PT_REGS_IP(x) ((x)->tpc)
+#else
+#define PT_REGS_IP(x) ((x)->pc)
+#endif
+
+#endif
+
+#if defined(bpf_target_powerpc)
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; })
+#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
+#elif defined(bpf_target_sparc)
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); })
+#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
+#else
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) \
+ ({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \
+ ({ bpf_probe_read(&(ip), sizeof(ip), \
+ (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+#endif
+
+#endif
diff --git a/src/contrib/libbpf/bpf/btf.c b/src/contrib/libbpf/bpf/btf.c
new file mode 100644
index 0000000..88efa2b
--- /dev/null
+++ b/src/contrib/libbpf/bpf/btf.c
@@ -0,0 +1,2884 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2018 Facebook */
+
+#include <endian.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include <gelf.h>
+#include "btf.h"
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "hashmap.h"
+
+#define BTF_MAX_NR_TYPES 0x7fffffff
+#define BTF_MAX_STR_OFFSET 0x7fffffff
+
+static struct btf_type btf_void;
+
+struct btf {
+ union {
+ struct btf_header *hdr;
+ void *data;
+ };
+ struct btf_type **types;
+ const char *strings;
+ void *nohdr_data;
+ __u32 nr_types;
+ __u32 types_size;
+ __u32 data_size;
+ int fd;
+};
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+static int btf_add_type(struct btf *btf, struct btf_type *t)
+{
+ if (btf->types_size - btf->nr_types < 2) {
+ struct btf_type **new_types;
+ __u32 expand_by, new_size;
+
+ if (btf->types_size == BTF_MAX_NR_TYPES)
+ return -E2BIG;
+
+ expand_by = max(btf->types_size >> 2, 16);
+ new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
+
+ new_types = realloc(btf->types, sizeof(*new_types) * new_size);
+ if (!new_types)
+ return -ENOMEM;
+
+ if (btf->nr_types == 0)
+ new_types[0] = &btf_void;
+
+ btf->types = new_types;
+ btf->types_size = new_size;
+ }
+
+ btf->types[++(btf->nr_types)] = t;
+
+ return 0;
+}
+
+static int btf_parse_hdr(struct btf *btf)
+{
+ const struct btf_header *hdr = btf->hdr;
+ __u32 meta_left;
+
+ if (btf->data_size < sizeof(struct btf_header)) {
+ pr_debug("BTF header not found\n");
+ return -EINVAL;
+ }
+
+ if (hdr->magic != BTF_MAGIC) {
+ pr_debug("Invalid BTF magic:%x\n", hdr->magic);
+ return -EINVAL;
+ }
+
+ if (hdr->version != BTF_VERSION) {
+ pr_debug("Unsupported BTF version:%u\n", hdr->version);
+ return -ENOTSUP;
+ }
+
+ if (hdr->flags) {
+ pr_debug("Unsupported BTF flags:%x\n", hdr->flags);
+ return -ENOTSUP;
+ }
+
+ meta_left = btf->data_size - sizeof(*hdr);
+ if (!meta_left) {
+ pr_debug("BTF has no data\n");
+ return -EINVAL;
+ }
+
+ if (meta_left < hdr->type_off) {
+ pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off);
+ return -EINVAL;
+ }
+
+ if (meta_left < hdr->str_off) {
+ pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off);
+ return -EINVAL;
+ }
+
+ if (hdr->type_off >= hdr->str_off) {
+ pr_debug("BTF type section offset >= string section offset. No type?\n");
+ return -EINVAL;
+ }
+
+ if (hdr->type_off & 0x02) {
+ pr_debug("BTF type section is not aligned to 4 bytes\n");
+ return -EINVAL;
+ }
+
+ btf->nohdr_data = btf->hdr + 1;
+
+ return 0;
+}
+
+static int btf_parse_str_sec(struct btf *btf)
+{
+ const struct btf_header *hdr = btf->hdr;
+ const char *start = btf->nohdr_data + hdr->str_off;
+ const char *end = start + btf->hdr->str_len;
+
+ if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET ||
+ start[0] || end[-1]) {
+ pr_debug("Invalid BTF string section\n");
+ return -EINVAL;
+ }
+
+ btf->strings = start;
+
+ return 0;
+}
+
+static int btf_type_size(struct btf_type *t)
+{
+ int base_size = sizeof(struct btf_type);
+ __u16 vlen = btf_vlen(t);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_FWD:
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ return base_size;
+ case BTF_KIND_INT:
+ return base_size + sizeof(__u32);
+ case BTF_KIND_ENUM:
+ return base_size + vlen * sizeof(struct btf_enum);
+ case BTF_KIND_ARRAY:
+ return base_size + sizeof(struct btf_array);
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ return base_size + vlen * sizeof(struct btf_member);
+ case BTF_KIND_FUNC_PROTO:
+ return base_size + vlen * sizeof(struct btf_param);
+ case BTF_KIND_VAR:
+ return base_size + sizeof(struct btf_var);
+ case BTF_KIND_DATASEC:
+ return base_size + vlen * sizeof(struct btf_var_secinfo);
+ default:
+ pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
+ return -EINVAL;
+ }
+}
+
+static int btf_parse_type_sec(struct btf *btf)
+{
+ struct btf_header *hdr = btf->hdr;
+ void *nohdr_data = btf->nohdr_data;
+ void *next_type = nohdr_data + hdr->type_off;
+ void *end_type = nohdr_data + hdr->str_off;
+
+ while (next_type < end_type) {
+ struct btf_type *t = next_type;
+ int type_size;
+ int err;
+
+ type_size = btf_type_size(t);
+ if (type_size < 0)
+ return type_size;
+ next_type += type_size;
+ err = btf_add_type(btf, t);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+__u32 btf__get_nr_types(const struct btf *btf)
+{
+ return btf->nr_types;
+}
+
+const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
+{
+ if (type_id > btf->nr_types)
+ return NULL;
+
+ return btf->types[type_id];
+}
+
+static bool btf_type_is_void(const struct btf_type *t)
+{
+ return t == &btf_void || btf_is_fwd(t);
+}
+
+static bool btf_type_is_void_or_null(const struct btf_type *t)
+{
+ return !t || btf_type_is_void(t);
+}
+
+#define MAX_RESOLVE_DEPTH 32
+
+__s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
+{
+ const struct btf_array *array;
+ const struct btf_type *t;
+ __u32 nelems = 1;
+ __s64 size = -1;
+ int i;
+
+ t = btf__type_by_id(btf, type_id);
+ for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
+ i++) {
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_DATASEC:
+ size = t->size;
+ goto done;
+ case BTF_KIND_PTR:
+ size = sizeof(void *);
+ goto done;
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_VAR:
+ type_id = t->type;
+ break;
+ case BTF_KIND_ARRAY:
+ array = btf_array(t);
+ if (nelems && array->nelems > UINT32_MAX / nelems)
+ return -E2BIG;
+ nelems *= array->nelems;
+ type_id = array->type;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ t = btf__type_by_id(btf, type_id);
+ }
+
+done:
+ if (size < 0)
+ return -EINVAL;
+ if (nelems && size > UINT32_MAX / nelems)
+ return -E2BIG;
+
+ return nelems * size;
+}
+
+int btf__resolve_type(const struct btf *btf, __u32 type_id)
+{
+ const struct btf_type *t;
+ int depth = 0;
+
+ t = btf__type_by_id(btf, type_id);
+ while (depth < MAX_RESOLVE_DEPTH &&
+ !btf_type_is_void_or_null(t) &&
+ (btf_is_mod(t) || btf_is_typedef(t) || btf_is_var(t))) {
+ type_id = t->type;
+ t = btf__type_by_id(btf, type_id);
+ depth++;
+ }
+
+ if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t))
+ return -EINVAL;
+
+ return type_id;
+}
+
+__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
+{
+ __u32 i;
+
+ if (!strcmp(type_name, "void"))
+ return 0;
+
+ for (i = 1; i <= btf->nr_types; i++) {
+ const struct btf_type *t = btf->types[i];
+ const char *name = btf__name_by_offset(btf, t->name_off);
+
+ if (name && !strcmp(type_name, name))
+ return i;
+ }
+
+ return -ENOENT;
+}
+
+__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
+ __u32 kind)
+{
+ __u32 i;
+
+ if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
+ return 0;
+
+ for (i = 1; i <= btf->nr_types; i++) {
+ const struct btf_type *t = btf->types[i];
+ const char *name;
+
+ if (btf_kind(t) != kind)
+ continue;
+ name = btf__name_by_offset(btf, t->name_off);
+ if (name && !strcmp(type_name, name))
+ return i;
+ }
+
+ return -ENOENT;
+}
+
+void btf__free(struct btf *btf)
+{
+ if (!btf)
+ return;
+
+ if (btf->fd != -1)
+ close(btf->fd);
+
+ free(btf->data);
+ free(btf->types);
+ free(btf);
+}
+
+struct btf *btf__new(__u8 *data, __u32 size)
+{
+ struct btf *btf;
+ int err;
+
+ btf = calloc(1, sizeof(struct btf));
+ if (!btf)
+ return ERR_PTR(-ENOMEM);
+
+ btf->fd = -1;
+
+ btf->data = malloc(size);
+ if (!btf->data) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ memcpy(btf->data, data, size);
+ btf->data_size = size;
+
+ err = btf_parse_hdr(btf);
+ if (err)
+ goto done;
+
+ err = btf_parse_str_sec(btf);
+ if (err)
+ goto done;
+
+ err = btf_parse_type_sec(btf);
+
+done:
+ if (err) {
+ btf__free(btf);
+ return ERR_PTR(err);
+ }
+
+ return btf;
+}
+
+static bool btf_check_endianness(const GElf_Ehdr *ehdr)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ return ehdr->e_ident[EI_DATA] == ELFDATA2LSB;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ return ehdr->e_ident[EI_DATA] == ELFDATA2MSB;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+}
+
+struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
+{
+ Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
+ int err = 0, fd = -1, idx = 0;
+ struct btf *btf = NULL;
+ Elf_Scn *scn = NULL;
+ Elf *elf = NULL;
+ GElf_Ehdr ehdr;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ pr_warn("failed to init libelf for %s\n", path);
+ return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
+ }
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ err = -errno;
+ pr_warn("failed to open %s: %s\n", path, strerror(errno));
+ return ERR_PTR(err);
+ }
+
+ err = -LIBBPF_ERRNO__FORMAT;
+
+ elf = elf_begin(fd, ELF_C_READ, NULL);
+ if (!elf) {
+ pr_warn("failed to open %s as ELF file\n", path);
+ goto done;
+ }
+ if (!gelf_getehdr(elf, &ehdr)) {
+ pr_warn("failed to get EHDR from %s\n", path);
+ goto done;
+ }
+ if (!btf_check_endianness(&ehdr)) {
+ pr_warn("non-native ELF endianness is not supported\n");
+ goto done;
+ }
+ if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
+ pr_warn("failed to get e_shstrndx from %s\n", path);
+ goto done;
+ }
+
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ GElf_Shdr sh;
+ char *name;
+
+ idx++;
+ if (gelf_getshdr(scn, &sh) != &sh) {
+ pr_warn("failed to get section(%d) header from %s\n",
+ idx, path);
+ goto done;
+ }
+ name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
+ if (!name) {
+ pr_warn("failed to get section(%d) name from %s\n",
+ idx, path);
+ goto done;
+ }
+ if (strcmp(name, BTF_ELF_SEC) == 0) {
+ btf_data = elf_getdata(scn, 0);
+ if (!btf_data) {
+ pr_warn("failed to get section(%d, %s) data from %s\n",
+ idx, name, path);
+ goto done;
+ }
+ continue;
+ } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) {
+ btf_ext_data = elf_getdata(scn, 0);
+ if (!btf_ext_data) {
+ pr_warn("failed to get section(%d, %s) data from %s\n",
+ idx, name, path);
+ goto done;
+ }
+ continue;
+ }
+ }
+
+ err = 0;
+
+ if (!btf_data) {
+ err = -ENOENT;
+ goto done;
+ }
+ btf = btf__new(btf_data->d_buf, btf_data->d_size);
+ if (IS_ERR(btf))
+ goto done;
+
+ if (btf_ext && btf_ext_data) {
+ *btf_ext = btf_ext__new(btf_ext_data->d_buf,
+ btf_ext_data->d_size);
+ if (IS_ERR(*btf_ext))
+ goto done;
+ } else if (btf_ext) {
+ *btf_ext = NULL;
+ }
+done:
+ if (elf)
+ elf_end(elf);
+ close(fd);
+
+ if (err)
+ return ERR_PTR(err);
+ /*
+ * btf is always parsed before btf_ext, so no need to clean up
+ * btf_ext, if btf loading failed
+ */
+ if (IS_ERR(btf))
+ return btf;
+ if (btf_ext && IS_ERR(*btf_ext)) {
+ btf__free(btf);
+ err = PTR_ERR(*btf_ext);
+ return ERR_PTR(err);
+ }
+ return btf;
+}
+
+static int compare_vsi_off(const void *_a, const void *_b)
+{
+ const struct btf_var_secinfo *a = _a;
+ const struct btf_var_secinfo *b = _b;
+
+ return a->offset - b->offset;
+}
+
+static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
+ struct btf_type *t)
+{
+ __u32 size = 0, off = 0, i, vars = btf_vlen(t);
+ const char *name = btf__name_by_offset(btf, t->name_off);
+ const struct btf_type *t_var;
+ struct btf_var_secinfo *vsi;
+ const struct btf_var *var;
+ int ret;
+
+ if (!name) {
+ pr_debug("No name found in string section for DATASEC kind.\n");
+ return -ENOENT;
+ }
+
+ ret = bpf_object__section_size(obj, name, &size);
+ if (ret || !size || (t->size && t->size != size)) {
+ pr_debug("Invalid size for section %s: %u bytes\n", name, size);
+ return -ENOENT;
+ }
+
+ t->size = size;
+
+ for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
+ t_var = btf__type_by_id(btf, vsi->type);
+ var = btf_var(t_var);
+
+ if (!btf_is_var(t_var)) {
+ pr_debug("Non-VAR type seen in section %s\n", name);
+ return -EINVAL;
+ }
+
+ if (var->linkage == BTF_VAR_STATIC)
+ continue;
+
+ name = btf__name_by_offset(btf, t_var->name_off);
+ if (!name) {
+ pr_debug("No name found in string section for VAR kind\n");
+ return -ENOENT;
+ }
+
+ ret = bpf_object__variable_offset(obj, name, &off);
+ if (ret) {
+ pr_debug("No offset found in symbol table for VAR %s\n",
+ name);
+ return -ENOENT;
+ }
+
+ vsi->offset = off;
+ }
+
+ qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off);
+ return 0;
+}
+
+int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
+{
+ int err = 0;
+ __u32 i;
+
+ for (i = 1; i <= btf->nr_types; i++) {
+ struct btf_type *t = btf->types[i];
+
+ /* Loader needs to fix up some of the things compiler
+ * couldn't get its hands on while emitting BTF. This
+ * is section size and global variable offset. We use
+ * the info from the ELF itself for this purpose.
+ */
+ if (btf_is_datasec(t)) {
+ err = btf_fixup_datasec(obj, btf, t);
+ if (err)
+ break;
+ }
+ }
+
+ return err;
+}
+
+int btf__load(struct btf *btf)
+{
+ __u32 log_buf_size = BPF_LOG_BUF_SIZE;
+ char *log_buf = NULL;
+ int err = 0;
+
+ if (btf->fd >= 0)
+ return -EEXIST;
+
+ log_buf = malloc(log_buf_size);
+ if (!log_buf)
+ return -ENOMEM;
+
+ *log_buf = 0;
+
+ btf->fd = bpf_load_btf(btf->data, btf->data_size,
+ log_buf, log_buf_size, false);
+ if (btf->fd < 0) {
+ err = -errno;
+ pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno);
+ if (*log_buf)
+ pr_warn("%s\n", log_buf);
+ goto done;
+ }
+
+done:
+ free(log_buf);
+ return err;
+}
+
+int btf__fd(const struct btf *btf)
+{
+ return btf->fd;
+}
+
+const void *btf__get_raw_data(const struct btf *btf, __u32 *size)
+{
+ *size = btf->data_size;
+ return btf->data;
+}
+
+const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
+{
+ if (offset < btf->hdr->str_len)
+ return &btf->strings[offset];
+ else
+ return NULL;
+}
+
+int btf__get_from_id(__u32 id, struct btf **btf)
+{
+ struct bpf_btf_info btf_info = { 0 };
+ __u32 len = sizeof(btf_info);
+ __u32 last_size;
+ int btf_fd;
+ void *ptr;
+ int err;
+
+ err = 0;
+ *btf = NULL;
+ btf_fd = bpf_btf_get_fd_by_id(id);
+ if (btf_fd < 0)
+ return 0;
+
+ /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
+ * let's start with a sane default - 4KiB here - and resize it only if
+ * bpf_obj_get_info_by_fd() needs a bigger buffer.
+ */
+ btf_info.btf_size = 4096;
+ last_size = btf_info.btf_size;
+ ptr = malloc(last_size);
+ if (!ptr) {
+ err = -ENOMEM;
+ goto exit_free;
+ }
+
+ memset(ptr, 0, last_size);
+ btf_info.btf = ptr_to_u64(ptr);
+ err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+
+ if (!err && btf_info.btf_size > last_size) {
+ void *temp_ptr;
+
+ last_size = btf_info.btf_size;
+ temp_ptr = realloc(ptr, last_size);
+ if (!temp_ptr) {
+ err = -ENOMEM;
+ goto exit_free;
+ }
+ ptr = temp_ptr;
+ memset(ptr, 0, last_size);
+ btf_info.btf = ptr_to_u64(ptr);
+ err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+ }
+
+ if (err || btf_info.btf_size > last_size) {
+ err = errno;
+ goto exit_free;
+ }
+
+ *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size);
+ if (IS_ERR(*btf)) {
+ err = PTR_ERR(*btf);
+ *btf = NULL;
+ }
+
+exit_free:
+ close(btf_fd);
+ free(ptr);
+
+ return err;
+}
+
+int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
+ __u32 expected_key_size, __u32 expected_value_size,
+ __u32 *key_type_id, __u32 *value_type_id)
+{
+ const struct btf_type *container_type;
+ const struct btf_member *key, *value;
+ const size_t max_name = 256;
+ char container_name[max_name];
+ __s64 key_size, value_size;
+ __s32 container_id;
+
+ if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
+ max_name) {
+ pr_warn("map:%s length of '____btf_map_%s' is too long\n",
+ map_name, map_name);
+ return -EINVAL;
+ }
+
+ container_id = btf__find_by_name(btf, container_name);
+ if (container_id < 0) {
+ pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
+ map_name, container_name);
+ return container_id;
+ }
+
+ container_type = btf__type_by_id(btf, container_id);
+ if (!container_type) {
+ pr_warn("map:%s cannot find BTF type for container_id:%u\n",
+ map_name, container_id);
+ return -EINVAL;
+ }
+
+ if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
+ pr_warn("map:%s container_name:%s is an invalid container struct\n",
+ map_name, container_name);
+ return -EINVAL;
+ }
+
+ key = btf_members(container_type);
+ value = key + 1;
+
+ key_size = btf__resolve_size(btf, key->type);
+ if (key_size < 0) {
+ pr_warn("map:%s invalid BTF key_type_size\n", map_name);
+ return key_size;
+ }
+
+ if (expected_key_size != key_size) {
+ pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
+ map_name, (__u32)key_size, expected_key_size);
+ return -EINVAL;
+ }
+
+ value_size = btf__resolve_size(btf, value->type);
+ if (value_size < 0) {
+ pr_warn("map:%s invalid BTF value_type_size\n", map_name);
+ return value_size;
+ }
+
+ if (expected_value_size != value_size) {
+ pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
+ map_name, (__u32)value_size, expected_value_size);
+ return -EINVAL;
+ }
+
+ *key_type_id = key->type;
+ *value_type_id = value->type;
+
+ return 0;
+}
+
+struct btf_ext_sec_setup_param {
+ __u32 off;
+ __u32 len;
+ __u32 min_rec_size;
+ struct btf_ext_info *ext_info;
+ const char *desc;
+};
+
+static int btf_ext_setup_info(struct btf_ext *btf_ext,
+ struct btf_ext_sec_setup_param *ext_sec)
+{
+ const struct btf_ext_info_sec *sinfo;
+ struct btf_ext_info *ext_info;
+ __u32 info_left, record_size;
+ /* The start of the info sec (including the __u32 record_size). */
+ void *info;
+
+ if (ext_sec->len == 0)
+ return 0;
+
+ if (ext_sec->off & 0x03) {
+ pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n",
+ ext_sec->desc);
+ return -EINVAL;
+ }
+
+ info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off;
+ info_left = ext_sec->len;
+
+ if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) {
+ pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n",
+ ext_sec->desc, ext_sec->off, ext_sec->len);
+ return -EINVAL;
+ }
+
+ /* At least a record size */
+ if (info_left < sizeof(__u32)) {
+ pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc);
+ return -EINVAL;
+ }
+
+ /* The record size needs to meet the minimum standard */
+ record_size = *(__u32 *)info;
+ if (record_size < ext_sec->min_rec_size ||
+ record_size & 0x03) {
+ pr_debug("%s section in .BTF.ext has invalid record size %u\n",
+ ext_sec->desc, record_size);
+ return -EINVAL;
+ }
+
+ sinfo = info + sizeof(__u32);
+ info_left -= sizeof(__u32);
+
+ /* If no records, return failure now so .BTF.ext won't be used. */
+ if (!info_left) {
+ pr_debug("%s section in .BTF.ext has no records", ext_sec->desc);
+ return -EINVAL;
+ }
+
+ while (info_left) {
+ unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec);
+ __u64 total_record_size;
+ __u32 num_records;
+
+ if (info_left < sec_hdrlen) {
+ pr_debug("%s section header is not found in .BTF.ext\n",
+ ext_sec->desc);
+ return -EINVAL;
+ }
+
+ num_records = sinfo->num_info;
+ if (num_records == 0) {
+ pr_debug("%s section has incorrect num_records in .BTF.ext\n",
+ ext_sec->desc);
+ return -EINVAL;
+ }
+
+ total_record_size = sec_hdrlen +
+ (__u64)num_records * record_size;
+ if (info_left < total_record_size) {
+ pr_debug("%s section has incorrect num_records in .BTF.ext\n",
+ ext_sec->desc);
+ return -EINVAL;
+ }
+
+ info_left -= total_record_size;
+ sinfo = (void *)sinfo + total_record_size;
+ }
+
+ ext_info = ext_sec->ext_info;
+ ext_info->len = ext_sec->len - sizeof(__u32);
+ ext_info->rec_size = record_size;
+ ext_info->info = info + sizeof(__u32);
+
+ return 0;
+}
+
+static int btf_ext_setup_func_info(struct btf_ext *btf_ext)
+{
+ struct btf_ext_sec_setup_param param = {
+ .off = btf_ext->hdr->func_info_off,
+ .len = btf_ext->hdr->func_info_len,
+ .min_rec_size = sizeof(struct bpf_func_info_min),
+ .ext_info = &btf_ext->func_info,
+ .desc = "func_info"
+ };
+
+ return btf_ext_setup_info(btf_ext, &param);
+}
+
+static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
+{
+ struct btf_ext_sec_setup_param param = {
+ .off = btf_ext->hdr->line_info_off,
+ .len = btf_ext->hdr->line_info_len,
+ .min_rec_size = sizeof(struct bpf_line_info_min),
+ .ext_info = &btf_ext->line_info,
+ .desc = "line_info",
+ };
+
+ return btf_ext_setup_info(btf_ext, &param);
+}
+
+static int btf_ext_setup_field_reloc(struct btf_ext *btf_ext)
+{
+ struct btf_ext_sec_setup_param param = {
+ .off = btf_ext->hdr->field_reloc_off,
+ .len = btf_ext->hdr->field_reloc_len,
+ .min_rec_size = sizeof(struct bpf_field_reloc),
+ .ext_info = &btf_ext->field_reloc_info,
+ .desc = "field_reloc",
+ };
+
+ return btf_ext_setup_info(btf_ext, &param);
+}
+
+static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
+{
+ const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
+
+ if (data_size < offsetofend(struct btf_ext_header, hdr_len) ||
+ data_size < hdr->hdr_len) {
+ pr_debug("BTF.ext header not found");
+ return -EINVAL;
+ }
+
+ if (hdr->magic != BTF_MAGIC) {
+ pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic);
+ return -EINVAL;
+ }
+
+ if (hdr->version != BTF_VERSION) {
+ pr_debug("Unsupported BTF.ext version:%u\n", hdr->version);
+ return -ENOTSUP;
+ }
+
+ if (hdr->flags) {
+ pr_debug("Unsupported BTF.ext flags:%x\n", hdr->flags);
+ return -ENOTSUP;
+ }
+
+ if (data_size == hdr->hdr_len) {
+ pr_debug("BTF.ext has no data\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void btf_ext__free(struct btf_ext *btf_ext)
+{
+ if (!btf_ext)
+ return;
+ free(btf_ext->data);
+ free(btf_ext);
+}
+
+struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
+{
+ struct btf_ext *btf_ext;
+ int err;
+
+ err = btf_ext_parse_hdr(data, size);
+ if (err)
+ return ERR_PTR(err);
+
+ btf_ext = calloc(1, sizeof(struct btf_ext));
+ if (!btf_ext)
+ return ERR_PTR(-ENOMEM);
+
+ btf_ext->data_size = size;
+ btf_ext->data = malloc(size);
+ if (!btf_ext->data) {
+ err = -ENOMEM;
+ goto done;
+ }
+ memcpy(btf_ext->data, data, size);
+
+ if (btf_ext->hdr->hdr_len <
+ offsetofend(struct btf_ext_header, line_info_len))
+ goto done;
+ err = btf_ext_setup_func_info(btf_ext);
+ if (err)
+ goto done;
+
+ err = btf_ext_setup_line_info(btf_ext);
+ if (err)
+ goto done;
+
+ if (btf_ext->hdr->hdr_len <
+ offsetofend(struct btf_ext_header, field_reloc_len))
+ goto done;
+ err = btf_ext_setup_field_reloc(btf_ext);
+ if (err)
+ goto done;
+
+done:
+ if (err) {
+ btf_ext__free(btf_ext);
+ return ERR_PTR(err);
+ }
+
+ return btf_ext;
+}
+
+const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size)
+{
+ *size = btf_ext->data_size;
+ return btf_ext->data;
+}
+
+static int btf_ext_reloc_info(const struct btf *btf,
+ const struct btf_ext_info *ext_info,
+ const char *sec_name, __u32 insns_cnt,
+ void **info, __u32 *cnt)
+{
+ __u32 sec_hdrlen = sizeof(struct btf_ext_info_sec);
+ __u32 i, record_size, existing_len, records_len;
+ struct btf_ext_info_sec *sinfo;
+ const char *info_sec_name;
+ __u64 remain_len;
+ void *data;
+
+ record_size = ext_info->rec_size;
+ sinfo = ext_info->info;
+ remain_len = ext_info->len;
+ while (remain_len > 0) {
+ records_len = sinfo->num_info * record_size;
+ info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off);
+ if (strcmp(info_sec_name, sec_name)) {
+ remain_len -= sec_hdrlen + records_len;
+ sinfo = (void *)sinfo + sec_hdrlen + records_len;
+ continue;
+ }
+
+ existing_len = (*cnt) * record_size;
+ data = realloc(*info, existing_len + records_len);
+ if (!data)
+ return -ENOMEM;
+
+ memcpy(data + existing_len, sinfo->data, records_len);
+ /* adjust insn_off only, the rest data will be passed
+ * to the kernel.
+ */
+ for (i = 0; i < sinfo->num_info; i++) {
+ __u32 *insn_off;
+
+ insn_off = data + existing_len + (i * record_size);
+ *insn_off = *insn_off / sizeof(struct bpf_insn) +
+ insns_cnt;
+ }
+ *info = data;
+ *cnt += sinfo->num_info;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+int btf_ext__reloc_func_info(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const char *sec_name, __u32 insns_cnt,
+ void **func_info, __u32 *cnt)
+{
+ return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name,
+ insns_cnt, func_info, cnt);
+}
+
+int btf_ext__reloc_line_info(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const char *sec_name, __u32 insns_cnt,
+ void **line_info, __u32 *cnt)
+{
+ return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name,
+ insns_cnt, line_info, cnt);
+}
+
+__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext)
+{
+ return btf_ext->func_info.rec_size;
+}
+
+__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext)
+{
+ return btf_ext->line_info.rec_size;
+}
+
+struct btf_dedup;
+
+static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
+ const struct btf_dedup_opts *opts);
+static void btf_dedup_free(struct btf_dedup *d);
+static int btf_dedup_strings(struct btf_dedup *d);
+static int btf_dedup_prim_types(struct btf_dedup *d);
+static int btf_dedup_struct_types(struct btf_dedup *d);
+static int btf_dedup_ref_types(struct btf_dedup *d);
+static int btf_dedup_compact_types(struct btf_dedup *d);
+static int btf_dedup_remap_types(struct btf_dedup *d);
+
+/*
+ * Deduplicate BTF types and strings.
+ *
+ * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF
+ * section with all BTF type descriptors and string data. It overwrites that
+ * memory in-place with deduplicated types and strings without any loss of
+ * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section
+ * is provided, all the strings referenced from .BTF.ext section are honored
+ * and updated to point to the right offsets after deduplication.
+ *
+ * If function returns with error, type/string data might be garbled and should
+ * be discarded.
+ *
+ * More verbose and detailed description of both problem btf_dedup is solving,
+ * as well as solution could be found at:
+ * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html
+ *
+ * Problem description and justification
+ * =====================================
+ *
+ * BTF type information is typically emitted either as a result of conversion
+ * from DWARF to BTF or directly by compiler. In both cases, each compilation
+ * unit contains information about a subset of all the types that are used
+ * in an application. These subsets are frequently overlapping and contain a lot
+ * of duplicated information when later concatenated together into a single
+ * binary. This algorithm ensures that each unique type is represented by single
+ * BTF type descriptor, greatly reducing resulting size of BTF data.
+ *
+ * Compilation unit isolation and subsequent duplication of data is not the only
+ * problem. The same type hierarchy (e.g., struct and all the type that struct
+ * references) in different compilation units can be represented in BTF to
+ * various degrees of completeness (or, rather, incompleteness) due to
+ * struct/union forward declarations.
+ *
+ * Let's take a look at an example, that we'll use to better understand the
+ * problem (and solution). Suppose we have two compilation units, each using
+ * same `struct S`, but each of them having incomplete type information about
+ * struct's fields:
+ *
+ * // CU #1:
+ * struct S;
+ * struct A {
+ * int a;
+ * struct A* self;
+ * struct S* parent;
+ * };
+ * struct B;
+ * struct S {
+ * struct A* a_ptr;
+ * struct B* b_ptr;
+ * };
+ *
+ * // CU #2:
+ * struct S;
+ * struct A;
+ * struct B {
+ * int b;
+ * struct B* self;
+ * struct S* parent;
+ * };
+ * struct S {
+ * struct A* a_ptr;
+ * struct B* b_ptr;
+ * };
+ *
+ * In case of CU #1, BTF data will know only that `struct B` exist (but no
+ * more), but will know the complete type information about `struct A`. While
+ * for CU #2, it will know full type information about `struct B`, but will
+ * only know about forward declaration of `struct A` (in BTF terms, it will
+ * have `BTF_KIND_FWD` type descriptor with name `B`).
+ *
+ * This compilation unit isolation means that it's possible that there is no
+ * single CU with complete type information describing structs `S`, `A`, and
+ * `B`. Also, we might get tons of duplicated and redundant type information.
+ *
+ * Additional complication we need to keep in mind comes from the fact that
+ * types, in general, can form graphs containing cycles, not just DAGs.
+ *
+ * While algorithm does deduplication, it also merges and resolves type
+ * information (unless disabled throught `struct btf_opts`), whenever possible.
+ * E.g., in the example above with two compilation units having partial type
+ * information for structs `A` and `B`, the output of algorithm will emit
+ * a single copy of each BTF type that describes structs `A`, `B`, and `S`
+ * (as well as type information for `int` and pointers), as if they were defined
+ * in a single compilation unit as:
+ *
+ * struct A {
+ * int a;
+ * struct A* self;
+ * struct S* parent;
+ * };
+ * struct B {
+ * int b;
+ * struct B* self;
+ * struct S* parent;
+ * };
+ * struct S {
+ * struct A* a_ptr;
+ * struct B* b_ptr;
+ * };
+ *
+ * Algorithm summary
+ * =================
+ *
+ * Algorithm completes its work in 6 separate passes:
+ *
+ * 1. Strings deduplication.
+ * 2. Primitive types deduplication (int, enum, fwd).
+ * 3. Struct/union types deduplication.
+ * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func
+ * protos, and const/volatile/restrict modifiers).
+ * 5. Types compaction.
+ * 6. Types remapping.
+ *
+ * Algorithm determines canonical type descriptor, which is a single
+ * representative type for each truly unique type. This canonical type is the
+ * one that will go into final deduplicated BTF type information. For
+ * struct/unions, it is also the type that algorithm will merge additional type
+ * information into (while resolving FWDs), as it discovers it from data in
+ * other CUs. Each input BTF type eventually gets either mapped to itself, if
+ * that type is canonical, or to some other type, if that type is equivalent
+ * and was chosen as canonical representative. This mapping is stored in
+ * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that
+ * FWD type got resolved to.
+ *
+ * To facilitate fast discovery of canonical types, we also maintain canonical
+ * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash
+ * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types
+ * that match that signature. With sufficiently good choice of type signature
+ * hashing function, we can limit number of canonical types for each unique type
+ * signature to a very small number, allowing to find canonical type for any
+ * duplicated type very quickly.
+ *
+ * Struct/union deduplication is the most critical part and algorithm for
+ * deduplicating structs/unions is described in greater details in comments for
+ * `btf_dedup_is_equiv` function.
+ */
+int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
+ const struct btf_dedup_opts *opts)
+{
+ struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts);
+ int err;
+
+ if (IS_ERR(d)) {
+ pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));
+ return -EINVAL;
+ }
+
+ err = btf_dedup_strings(d);
+ if (err < 0) {
+ pr_debug("btf_dedup_strings failed:%d\n", err);
+ goto done;
+ }
+ err = btf_dedup_prim_types(d);
+ if (err < 0) {
+ pr_debug("btf_dedup_prim_types failed:%d\n", err);
+ goto done;
+ }
+ err = btf_dedup_struct_types(d);
+ if (err < 0) {
+ pr_debug("btf_dedup_struct_types failed:%d\n", err);
+ goto done;
+ }
+ err = btf_dedup_ref_types(d);
+ if (err < 0) {
+ pr_debug("btf_dedup_ref_types failed:%d\n", err);
+ goto done;
+ }
+ err = btf_dedup_compact_types(d);
+ if (err < 0) {
+ pr_debug("btf_dedup_compact_types failed:%d\n", err);
+ goto done;
+ }
+ err = btf_dedup_remap_types(d);
+ if (err < 0) {
+ pr_debug("btf_dedup_remap_types failed:%d\n", err);
+ goto done;
+ }
+
+done:
+ btf_dedup_free(d);
+ return err;
+}
+
+#define BTF_UNPROCESSED_ID ((__u32)-1)
+#define BTF_IN_PROGRESS_ID ((__u32)-2)
+
+struct btf_dedup {
+ /* .BTF section to be deduped in-place */
+ struct btf *btf;
+ /*
+ * Optional .BTF.ext section. When provided, any strings referenced
+ * from it will be taken into account when deduping strings
+ */
+ struct btf_ext *btf_ext;
+ /*
+ * This is a map from any type's signature hash to a list of possible
+ * canonical representative type candidates. Hash collisions are
+ * ignored, so even types of various kinds can share same list of
+ * candidates, which is fine because we rely on subsequent
+ * btf_xxx_equal() checks to authoritatively verify type equality.
+ */
+ struct hashmap *dedup_table;
+ /* Canonical types map */
+ __u32 *map;
+ /* Hypothetical mapping, used during type graph equivalence checks */
+ __u32 *hypot_map;
+ __u32 *hypot_list;
+ size_t hypot_cnt;
+ size_t hypot_cap;
+ /* Various option modifying behavior of algorithm */
+ struct btf_dedup_opts opts;
+};
+
+struct btf_str_ptr {
+ const char *str;
+ __u32 new_off;
+ bool used;
+};
+
+struct btf_str_ptrs {
+ struct btf_str_ptr *ptrs;
+ const char *data;
+ __u32 cnt;
+ __u32 cap;
+};
+
+static long hash_combine(long h, long value)
+{
+ return h * 31 + value;
+}
+
+#define for_each_dedup_cand(d, node, hash) \
+ hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash)
+
+static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id)
+{
+ return hashmap__append(d->dedup_table,
+ (void *)hash, (void *)(long)type_id);
+}
+
+static int btf_dedup_hypot_map_add(struct btf_dedup *d,
+ __u32 from_id, __u32 to_id)
+{
+ if (d->hypot_cnt == d->hypot_cap) {
+ __u32 *new_list;
+
+ d->hypot_cap += max(16, d->hypot_cap / 2);
+ new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap);
+ if (!new_list)
+ return -ENOMEM;
+ d->hypot_list = new_list;
+ }
+ d->hypot_list[d->hypot_cnt++] = from_id;
+ d->hypot_map[from_id] = to_id;
+ return 0;
+}
+
+static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
+{
+ int i;
+
+ for (i = 0; i < d->hypot_cnt; i++)
+ d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID;
+ d->hypot_cnt = 0;
+}
+
+static void btf_dedup_free(struct btf_dedup *d)
+{
+ hashmap__free(d->dedup_table);
+ d->dedup_table = NULL;
+
+ free(d->map);
+ d->map = NULL;
+
+ free(d->hypot_map);
+ d->hypot_map = NULL;
+
+ free(d->hypot_list);
+ d->hypot_list = NULL;
+
+ free(d);
+}
+
+static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx)
+{
+ return (size_t)key;
+}
+
+static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx)
+{
+ return 0;
+}
+
+static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)
+{
+ return k1 == k2;
+}
+
+static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
+ const struct btf_dedup_opts *opts)
+{
+ struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
+ hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn;
+ int i, err = 0;
+
+ if (!d)
+ return ERR_PTR(-ENOMEM);
+
+ d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
+ /* dedup_table_size is now used only to force collisions in tests */
+ if (opts && opts->dedup_table_size == 1)
+ hash_fn = btf_dedup_collision_hash_fn;
+
+ d->btf = btf;
+ d->btf_ext = btf_ext;
+
+ d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL);
+ if (IS_ERR(d->dedup_table)) {
+ err = PTR_ERR(d->dedup_table);
+ d->dedup_table = NULL;
+ goto done;
+ }
+
+ d->map = malloc(sizeof(__u32) * (1 + btf->nr_types));
+ if (!d->map) {
+ err = -ENOMEM;
+ goto done;
+ }
+ /* special BTF "void" type is made canonical immediately */
+ d->map[0] = 0;
+ for (i = 1; i <= btf->nr_types; i++) {
+ struct btf_type *t = d->btf->types[i];
+
+ /* VAR and DATASEC are never deduped and are self-canonical */
+ if (btf_is_var(t) || btf_is_datasec(t))
+ d->map[i] = i;
+ else
+ d->map[i] = BTF_UNPROCESSED_ID;
+ }
+
+ d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types));
+ if (!d->hypot_map) {
+ err = -ENOMEM;
+ goto done;
+ }
+ for (i = 0; i <= btf->nr_types; i++)
+ d->hypot_map[i] = BTF_UNPROCESSED_ID;
+
+done:
+ if (err) {
+ btf_dedup_free(d);
+ return ERR_PTR(err);
+ }
+
+ return d;
+}
+
+typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx);
+
+/*
+ * Iterate over all possible places in .BTF and .BTF.ext that can reference
+ * string and pass pointer to it to a provided callback `fn`.
+ */
+static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
+{
+ void *line_data_cur, *line_data_end;
+ int i, j, r, rec_size;
+ struct btf_type *t;
+
+ for (i = 1; i <= d->btf->nr_types; i++) {
+ t = d->btf->types[i];
+ r = fn(&t->name_off, ctx);
+ if (r)
+ return r;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ struct btf_member *m = btf_members(t);
+ __u16 vlen = btf_vlen(t);
+
+ for (j = 0; j < vlen; j++) {
+ r = fn(&m->name_off, ctx);
+ if (r)
+ return r;
+ m++;
+ }
+ break;
+ }
+ case BTF_KIND_ENUM: {
+ struct btf_enum *m = btf_enum(t);
+ __u16 vlen = btf_vlen(t);
+
+ for (j = 0; j < vlen; j++) {
+ r = fn(&m->name_off, ctx);
+ if (r)
+ return r;
+ m++;
+ }
+ break;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *m = btf_params(t);
+ __u16 vlen = btf_vlen(t);
+
+ for (j = 0; j < vlen; j++) {
+ r = fn(&m->name_off, ctx);
+ if (r)
+ return r;
+ m++;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (!d->btf_ext)
+ return 0;
+
+ line_data_cur = d->btf_ext->line_info.info;
+ line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len;
+ rec_size = d->btf_ext->line_info.rec_size;
+
+ while (line_data_cur < line_data_end) {
+ struct btf_ext_info_sec *sec = line_data_cur;
+ struct bpf_line_info_min *line_info;
+ __u32 num_info = sec->num_info;
+
+ r = fn(&sec->sec_name_off, ctx);
+ if (r)
+ return r;
+
+ line_data_cur += sizeof(struct btf_ext_info_sec);
+ for (i = 0; i < num_info; i++) {
+ line_info = line_data_cur;
+ r = fn(&line_info->file_name_off, ctx);
+ if (r)
+ return r;
+ r = fn(&line_info->line_off, ctx);
+ if (r)
+ return r;
+ line_data_cur += rec_size;
+ }
+ }
+
+ return 0;
+}
+
+static int str_sort_by_content(const void *a1, const void *a2)
+{
+ const struct btf_str_ptr *p1 = a1;
+ const struct btf_str_ptr *p2 = a2;
+
+ return strcmp(p1->str, p2->str);
+}
+
+static int str_sort_by_offset(const void *a1, const void *a2)
+{
+ const struct btf_str_ptr *p1 = a1;
+ const struct btf_str_ptr *p2 = a2;
+
+ if (p1->str != p2->str)
+ return p1->str < p2->str ? -1 : 1;
+ return 0;
+}
+
+static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem)
+{
+ const struct btf_str_ptr *p = pelem;
+
+ if (str_ptr != p->str)
+ return (const char *)str_ptr < p->str ? -1 : 1;
+ return 0;
+}
+
+static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx)
+{
+ struct btf_str_ptrs *strs;
+ struct btf_str_ptr *s;
+
+ if (*str_off_ptr == 0)
+ return 0;
+
+ strs = ctx;
+ s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
+ sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
+ if (!s)
+ return -EINVAL;
+ s->used = true;
+ return 0;
+}
+
+static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
+{
+ struct btf_str_ptrs *strs;
+ struct btf_str_ptr *s;
+
+ if (*str_off_ptr == 0)
+ return 0;
+
+ strs = ctx;
+ s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
+ sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
+ if (!s)
+ return -EINVAL;
+ *str_off_ptr = s->new_off;
+ return 0;
+}
+
+/*
+ * Dedup string and filter out those that are not referenced from either .BTF
+ * or .BTF.ext (if provided) sections.
+ *
+ * This is done by building index of all strings in BTF's string section,
+ * then iterating over all entities that can reference strings (e.g., type
+ * names, struct field names, .BTF.ext line info, etc) and marking corresponding
+ * strings as used. After that all used strings are deduped and compacted into
+ * sequential blob of memory and new offsets are calculated. Then all the string
+ * references are iterated again and rewritten using new offsets.
+ */
+static int btf_dedup_strings(struct btf_dedup *d)
+{
+ const struct btf_header *hdr = d->btf->hdr;
+ char *start = (char *)d->btf->nohdr_data + hdr->str_off;
+ char *end = start + d->btf->hdr->str_len;
+ char *p = start, *tmp_strs = NULL;
+ struct btf_str_ptrs strs = {
+ .cnt = 0,
+ .cap = 0,
+ .ptrs = NULL,
+ .data = start,
+ };
+ int i, j, err = 0, grp_idx;
+ bool grp_used;
+
+ /* build index of all strings */
+ while (p < end) {
+ if (strs.cnt + 1 > strs.cap) {
+ struct btf_str_ptr *new_ptrs;
+
+ strs.cap += max(strs.cnt / 2, 16);
+ new_ptrs = realloc(strs.ptrs,
+ sizeof(strs.ptrs[0]) * strs.cap);
+ if (!new_ptrs) {
+ err = -ENOMEM;
+ goto done;
+ }
+ strs.ptrs = new_ptrs;
+ }
+
+ strs.ptrs[strs.cnt].str = p;
+ strs.ptrs[strs.cnt].used = false;
+
+ p += strlen(p) + 1;
+ strs.cnt++;
+ }
+
+ /* temporary storage for deduplicated strings */
+ tmp_strs = malloc(d->btf->hdr->str_len);
+ if (!tmp_strs) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ /* mark all used strings */
+ strs.ptrs[0].used = true;
+ err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs);
+ if (err)
+ goto done;
+
+ /* sort strings by context, so that we can identify duplicates */
+ qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content);
+
+ /*
+ * iterate groups of equal strings and if any instance in a group was
+ * referenced, emit single instance and remember new offset
+ */
+ p = tmp_strs;
+ grp_idx = 0;
+ grp_used = strs.ptrs[0].used;
+ /* iterate past end to avoid code duplication after loop */
+ for (i = 1; i <= strs.cnt; i++) {
+ /*
+ * when i == strs.cnt, we want to skip string comparison and go
+ * straight to handling last group of strings (otherwise we'd
+ * need to handle last group after the loop w/ duplicated code)
+ */
+ if (i < strs.cnt &&
+ !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) {
+ grp_used = grp_used || strs.ptrs[i].used;
+ continue;
+ }
+
+ /*
+ * this check would have been required after the loop to handle
+ * last group of strings, but due to <= condition in a loop
+ * we avoid that duplication
+ */
+ if (grp_used) {
+ int new_off = p - tmp_strs;
+ __u32 len = strlen(strs.ptrs[grp_idx].str);
+
+ memmove(p, strs.ptrs[grp_idx].str, len + 1);
+ for (j = grp_idx; j < i; j++)
+ strs.ptrs[j].new_off = new_off;
+ p += len + 1;
+ }
+
+ if (i < strs.cnt) {
+ grp_idx = i;
+ grp_used = strs.ptrs[i].used;
+ }
+ }
+
+ /* replace original strings with deduped ones */
+ d->btf->hdr->str_len = p - tmp_strs;
+ memmove(start, tmp_strs, d->btf->hdr->str_len);
+ end = start + d->btf->hdr->str_len;
+
+ /* restore original order for further binary search lookups */
+ qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset);
+
+ /* remap string offsets */
+ err = btf_for_each_str_off(d, btf_str_remap_offset, &strs);
+ if (err)
+ goto done;
+
+ d->btf->hdr->str_len = end - start;
+
+done:
+ free(tmp_strs);
+ free(strs.ptrs);
+ return err;
+}
+
+static long btf_hash_common(struct btf_type *t)
+{
+ long h;
+
+ h = hash_combine(0, t->name_off);
+ h = hash_combine(h, t->info);
+ h = hash_combine(h, t->size);
+ return h;
+}
+
+static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
+{
+ return t1->name_off == t2->name_off &&
+ t1->info == t2->info &&
+ t1->size == t2->size;
+}
+
+/* Calculate type signature hash of INT. */
+static long btf_hash_int(struct btf_type *t)
+{
+ __u32 info = *(__u32 *)(t + 1);
+ long h;
+
+ h = btf_hash_common(t);
+ h = hash_combine(h, info);
+ return h;
+}
+
+/* Check structural equality of two INTs. */
+static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
+{
+ __u32 info1, info2;
+
+ if (!btf_equal_common(t1, t2))
+ return false;
+ info1 = *(__u32 *)(t1 + 1);
+ info2 = *(__u32 *)(t2 + 1);
+ return info1 == info2;
+}
+
+/* Calculate type signature hash of ENUM. */
+static long btf_hash_enum(struct btf_type *t)
+{
+ long h;
+
+ /* don't hash vlen and enum members to support enum fwd resolving */
+ h = hash_combine(0, t->name_off);
+ h = hash_combine(h, t->info & ~0xffff);
+ h = hash_combine(h, t->size);
+ return h;
+}
+
+/* Check structural equality of two ENUMs. */
+static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
+{
+ const struct btf_enum *m1, *m2;
+ __u16 vlen;
+ int i;
+
+ if (!btf_equal_common(t1, t2))
+ return false;
+
+ vlen = btf_vlen(t1);
+ m1 = btf_enum(t1);
+ m2 = btf_enum(t2);
+ for (i = 0; i < vlen; i++) {
+ if (m1->name_off != m2->name_off || m1->val != m2->val)
+ return false;
+ m1++;
+ m2++;
+ }
+ return true;
+}
+
+static inline bool btf_is_enum_fwd(struct btf_type *t)
+{
+ return btf_is_enum(t) && btf_vlen(t) == 0;
+}
+
+static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
+{
+ if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
+ return btf_equal_enum(t1, t2);
+ /* ignore vlen when comparing */
+ return t1->name_off == t2->name_off &&
+ (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
+ t1->size == t2->size;
+}
+
+/*
+ * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
+ * as referenced type IDs equivalence is established separately during type
+ * graph equivalence check algorithm.
+ */
+static long btf_hash_struct(struct btf_type *t)
+{
+ const struct btf_member *member = btf_members(t);
+ __u32 vlen = btf_vlen(t);
+ long h = btf_hash_common(t);
+ int i;
+
+ for (i = 0; i < vlen; i++) {
+ h = hash_combine(h, member->name_off);
+ h = hash_combine(h, member->offset);
+ /* no hashing of referenced type ID, it can be unresolved yet */
+ member++;
+ }
+ return h;
+}
+
+/*
+ * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
+ * IDs. This check is performed during type graph equivalence check and
+ * referenced types equivalence is checked separately.
+ */
+static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
+{
+ const struct btf_member *m1, *m2;
+ __u16 vlen;
+ int i;
+
+ if (!btf_equal_common(t1, t2))
+ return false;
+
+ vlen = btf_vlen(t1);
+ m1 = btf_members(t1);
+ m2 = btf_members(t2);
+ for (i = 0; i < vlen; i++) {
+ if (m1->name_off != m2->name_off || m1->offset != m2->offset)
+ return false;
+ m1++;
+ m2++;
+ }
+ return true;
+}
+
+/*
+ * Calculate type signature hash of ARRAY, including referenced type IDs,
+ * under assumption that they were already resolved to canonical type IDs and
+ * are not going to change.
+ */
+static long btf_hash_array(struct btf_type *t)
+{
+ const struct btf_array *info = btf_array(t);
+ long h = btf_hash_common(t);
+
+ h = hash_combine(h, info->type);
+ h = hash_combine(h, info->index_type);
+ h = hash_combine(h, info->nelems);
+ return h;
+}
+
+/*
+ * Check exact equality of two ARRAYs, taking into account referenced
+ * type IDs, under assumption that they were already resolved to canonical
+ * type IDs and are not going to change.
+ * This function is called during reference types deduplication to compare
+ * ARRAY to potential canonical representative.
+ */
+static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2)
+{
+ const struct btf_array *info1, *info2;
+
+ if (!btf_equal_common(t1, t2))
+ return false;
+
+ info1 = btf_array(t1);
+ info2 = btf_array(t2);
+ return info1->type == info2->type &&
+ info1->index_type == info2->index_type &&
+ info1->nelems == info2->nelems;
+}
+
+/*
+ * Check structural compatibility of two ARRAYs, ignoring referenced type
+ * IDs. This check is performed during type graph equivalence check and
+ * referenced types equivalence is checked separately.
+ */
+static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2)
+{
+ if (!btf_equal_common(t1, t2))
+ return false;
+
+ return btf_array(t1)->nelems == btf_array(t2)->nelems;
+}
+
+/*
+ * Calculate type signature hash of FUNC_PROTO, including referenced type IDs,
+ * under assumption that they were already resolved to canonical type IDs and
+ * are not going to change.
+ */
+static long btf_hash_fnproto(struct btf_type *t)
+{
+ const struct btf_param *member = btf_params(t);
+ __u16 vlen = btf_vlen(t);
+ long h = btf_hash_common(t);
+ int i;
+
+ for (i = 0; i < vlen; i++) {
+ h = hash_combine(h, member->name_off);
+ h = hash_combine(h, member->type);
+ member++;
+ }
+ return h;
+}
+
+/*
+ * Check exact equality of two FUNC_PROTOs, taking into account referenced
+ * type IDs, under assumption that they were already resolved to canonical
+ * type IDs and are not going to change.
+ * This function is called during reference types deduplication to compare
+ * FUNC_PROTO to potential canonical representative.
+ */
+static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
+{
+ const struct btf_param *m1, *m2;
+ __u16 vlen;
+ int i;
+
+ if (!btf_equal_common(t1, t2))
+ return false;
+
+ vlen = btf_vlen(t1);
+ m1 = btf_params(t1);
+ m2 = btf_params(t2);
+ for (i = 0; i < vlen; i++) {
+ if (m1->name_off != m2->name_off || m1->type != m2->type)
+ return false;
+ m1++;
+ m2++;
+ }
+ return true;
+}
+
+/*
+ * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
+ * IDs. This check is performed during type graph equivalence check and
+ * referenced types equivalence is checked separately.
+ */
+static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
+{
+ const struct btf_param *m1, *m2;
+ __u16 vlen;
+ int i;
+
+ /* skip return type ID */
+ if (t1->name_off != t2->name_off || t1->info != t2->info)
+ return false;
+
+ vlen = btf_vlen(t1);
+ m1 = btf_params(t1);
+ m2 = btf_params(t2);
+ for (i = 0; i < vlen; i++) {
+ if (m1->name_off != m2->name_off)
+ return false;
+ m1++;
+ m2++;
+ }
+ return true;
+}
+
+/*
+ * Deduplicate primitive types, that can't reference other types, by calculating
+ * their type signature hash and comparing them with any possible canonical
+ * candidate. If no canonical candidate matches, type itself is marked as
+ * canonical and is added into `btf_dedup->dedup_table` as another candidate.
+ */
+static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
+{
+ struct btf_type *t = d->btf->types[type_id];
+ struct hashmap_entry *hash_entry;
+ struct btf_type *cand;
+ /* if we don't find equivalent type, then we are canonical */
+ __u32 new_id = type_id;
+ __u32 cand_id;
+ long h;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_ARRAY:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_FUNC_PROTO:
+ case BTF_KIND_VAR:
+ case BTF_KIND_DATASEC:
+ return 0;
+
+ case BTF_KIND_INT:
+ h = btf_hash_int(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ cand_id = (__u32)(long)hash_entry->value;
+ cand = d->btf->types[cand_id];
+ if (btf_equal_int(t, cand)) {
+ new_id = cand_id;
+ break;
+ }
+ }
+ break;
+
+ case BTF_KIND_ENUM:
+ h = btf_hash_enum(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ cand_id = (__u32)(long)hash_entry->value;
+ cand = d->btf->types[cand_id];
+ if (btf_equal_enum(t, cand)) {
+ new_id = cand_id;
+ break;
+ }
+ if (d->opts.dont_resolve_fwds)
+ continue;
+ if (btf_compat_enum(t, cand)) {
+ if (btf_is_enum_fwd(t)) {
+ /* resolve fwd to full enum */
+ new_id = cand_id;
+ break;
+ }
+ /* resolve canonical enum fwd to full enum */
+ d->map[cand_id] = type_id;
+ }
+ }
+ break;
+
+ case BTF_KIND_FWD:
+ h = btf_hash_common(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ cand_id = (__u32)(long)hash_entry->value;
+ cand = d->btf->types[cand_id];
+ if (btf_equal_common(t, cand)) {
+ new_id = cand_id;
+ break;
+ }
+ }
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ d->map[type_id] = new_id;
+ if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int btf_dedup_prim_types(struct btf_dedup *d)
+{
+ int i, err;
+
+ for (i = 1; i <= d->btf->nr_types; i++) {
+ err = btf_dedup_prim_type(d, i);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+/*
+ * Check whether type is already mapped into canonical one (could be to itself).
+ */
+static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id)
+{
+ return d->map[type_id] <= BTF_MAX_NR_TYPES;
+}
+
+/*
+ * Resolve type ID into its canonical type ID, if any; otherwise return original
+ * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow
+ * STRUCT/UNION link and resolve it into canonical type ID as well.
+ */
+static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id)
+{
+ while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
+ type_id = d->map[type_id];
+ return type_id;
+}
+
+/*
+ * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original
+ * type ID.
+ */
+static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id)
+{
+ __u32 orig_type_id = type_id;
+
+ if (!btf_is_fwd(d->btf->types[type_id]))
+ return type_id;
+
+ while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
+ type_id = d->map[type_id];
+
+ if (!btf_is_fwd(d->btf->types[type_id]))
+ return type_id;
+
+ return orig_type_id;
+}
+
+
+static inline __u16 btf_fwd_kind(struct btf_type *t)
+{
+ return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
+}
+
+/*
+ * Check equivalence of BTF type graph formed by candidate struct/union (we'll
+ * call it "candidate graph" in this description for brevity) to a type graph
+ * formed by (potential) canonical struct/union ("canonical graph" for brevity
+ * here, though keep in mind that not all types in canonical graph are
+ * necessarily canonical representatives themselves, some of them might be
+ * duplicates or its uniqueness might not have been established yet).
+ * Returns:
+ * - >0, if type graphs are equivalent;
+ * - 0, if not equivalent;
+ * - <0, on error.
+ *
+ * Algorithm performs side-by-side DFS traversal of both type graphs and checks
+ * equivalence of BTF types at each step. If at any point BTF types in candidate
+ * and canonical graphs are not compatible structurally, whole graphs are
+ * incompatible. If types are structurally equivalent (i.e., all information
+ * except referenced type IDs is exactly the same), a mapping from `canon_id` to
+ * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`).
+ * If a type references other types, then those referenced types are checked
+ * for equivalence recursively.
+ *
+ * During DFS traversal, if we find that for current `canon_id` type we
+ * already have some mapping in hypothetical map, we check for two possible
+ * situations:
+ * - `canon_id` is mapped to exactly the same type as `cand_id`. This will
+ * happen when type graphs have cycles. In this case we assume those two
+ * types are equivalent.
+ * - `canon_id` is mapped to different type. This is contradiction in our
+ * hypothetical mapping, because same graph in canonical graph corresponds
+ * to two different types in candidate graph, which for equivalent type
+ * graphs shouldn't happen. This condition terminates equivalence check
+ * with negative result.
+ *
+ * If type graphs traversal exhausts types to check and find no contradiction,
+ * then type graphs are equivalent.
+ *
+ * When checking types for equivalence, there is one special case: FWD types.
+ * If FWD type resolution is allowed and one of the types (either from canonical
+ * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind
+ * flag) and their names match, hypothetical mapping is updated to point from
+ * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully,
+ * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently.
+ *
+ * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution,
+ * if there are two exactly named (or anonymous) structs/unions that are
+ * compatible structurally, one of which has FWD field, while other is concrete
+ * STRUCT/UNION, but according to C sources they are different structs/unions
+ * that are referencing different types with the same name. This is extremely
+ * unlikely to happen, but btf_dedup API allows to disable FWD resolution if
+ * this logic is causing problems.
+ *
+ * Doing FWD resolution means that both candidate and/or canonical graphs can
+ * consists of portions of the graph that come from multiple compilation units.
+ * This is due to the fact that types within single compilation unit are always
+ * deduplicated and FWDs are already resolved, if referenced struct/union
+ * definiton is available. So, if we had unresolved FWD and found corresponding
+ * STRUCT/UNION, they will be from different compilation units. This
+ * consequently means that when we "link" FWD to corresponding STRUCT/UNION,
+ * type graph will likely have at least two different BTF types that describe
+ * same type (e.g., most probably there will be two different BTF types for the
+ * same 'int' primitive type) and could even have "overlapping" parts of type
+ * graph that describe same subset of types.
+ *
+ * This in turn means that our assumption that each type in canonical graph
+ * must correspond to exactly one type in candidate graph might not hold
+ * anymore and will make it harder to detect contradictions using hypothetical
+ * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION
+ * resolution only in canonical graph. FWDs in candidate graphs are never
+ * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs
+ * that can occur:
+ * - Both types in canonical and candidate graphs are FWDs. If they are
+ * structurally equivalent, then they can either be both resolved to the
+ * same STRUCT/UNION or not resolved at all. In both cases they are
+ * equivalent and there is no need to resolve FWD on candidate side.
+ * - Both types in canonical and candidate graphs are concrete STRUCT/UNION,
+ * so nothing to resolve as well, algorithm will check equivalence anyway.
+ * - Type in canonical graph is FWD, while type in candidate is concrete
+ * STRUCT/UNION. In this case candidate graph comes from single compilation
+ * unit, so there is exactly one BTF type for each unique C type. After
+ * resolving FWD into STRUCT/UNION, there might be more than one BTF type
+ * in canonical graph mapping to single BTF type in candidate graph, but
+ * because hypothetical mapping maps from canonical to candidate types, it's
+ * alright, and we still maintain the property of having single `canon_id`
+ * mapping to single `cand_id` (there could be two different `canon_id`
+ * mapped to the same `cand_id`, but it's not contradictory).
+ * - Type in canonical graph is concrete STRUCT/UNION, while type in candidate
+ * graph is FWD. In this case we are just going to check compatibility of
+ * STRUCT/UNION and corresponding FWD, and if they are compatible, we'll
+ * assume that whatever STRUCT/UNION FWD resolves to must be equivalent to
+ * a concrete STRUCT/UNION from canonical graph. If the rest of type graphs
+ * turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from
+ * canonical graph.
+ */
+static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
+ __u32 canon_id)
+{
+ struct btf_type *cand_type;
+ struct btf_type *canon_type;
+ __u32 hypot_type_id;
+ __u16 cand_kind;
+ __u16 canon_kind;
+ int i, eq;
+
+ /* if both resolve to the same canonical, they must be equivalent */
+ if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id))
+ return 1;
+
+ canon_id = resolve_fwd_id(d, canon_id);
+
+ hypot_type_id = d->hypot_map[canon_id];
+ if (hypot_type_id <= BTF_MAX_NR_TYPES)
+ return hypot_type_id == cand_id;
+
+ if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
+ return -ENOMEM;
+
+ cand_type = d->btf->types[cand_id];
+ canon_type = d->btf->types[canon_id];
+ cand_kind = btf_kind(cand_type);
+ canon_kind = btf_kind(canon_type);
+
+ if (cand_type->name_off != canon_type->name_off)
+ return 0;
+
+ /* FWD <--> STRUCT/UNION equivalence check, if enabled */
+ if (!d->opts.dont_resolve_fwds
+ && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)
+ && cand_kind != canon_kind) {
+ __u16 real_kind;
+ __u16 fwd_kind;
+
+ if (cand_kind == BTF_KIND_FWD) {
+ real_kind = canon_kind;
+ fwd_kind = btf_fwd_kind(cand_type);
+ } else {
+ real_kind = cand_kind;
+ fwd_kind = btf_fwd_kind(canon_type);
+ }
+ return fwd_kind == real_kind;
+ }
+
+ if (cand_kind != canon_kind)
+ return 0;
+
+ switch (cand_kind) {
+ case BTF_KIND_INT:
+ return btf_equal_int(cand_type, canon_type);
+
+ case BTF_KIND_ENUM:
+ if (d->opts.dont_resolve_fwds)
+ return btf_equal_enum(cand_type, canon_type);
+ else
+ return btf_compat_enum(cand_type, canon_type);
+
+ case BTF_KIND_FWD:
+ return btf_equal_common(cand_type, canon_type);
+
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ if (cand_type->info != canon_type->info)
+ return 0;
+ return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
+
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *cand_arr, *canon_arr;
+
+ if (!btf_compat_array(cand_type, canon_type))
+ return 0;
+ cand_arr = btf_array(cand_type);
+ canon_arr = btf_array(canon_type);
+ eq = btf_dedup_is_equiv(d,
+ cand_arr->index_type, canon_arr->index_type);
+ if (eq <= 0)
+ return eq;
+ return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type);
+ }
+
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *cand_m, *canon_m;
+ __u16 vlen;
+
+ if (!btf_shallow_equal_struct(cand_type, canon_type))
+ return 0;
+ vlen = btf_vlen(cand_type);
+ cand_m = btf_members(cand_type);
+ canon_m = btf_members(canon_type);
+ for (i = 0; i < vlen; i++) {
+ eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type);
+ if (eq <= 0)
+ return eq;
+ cand_m++;
+ canon_m++;
+ }
+
+ return 1;
+ }
+
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *cand_p, *canon_p;
+ __u16 vlen;
+
+ if (!btf_compat_fnproto(cand_type, canon_type))
+ return 0;
+ eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
+ if (eq <= 0)
+ return eq;
+ vlen = btf_vlen(cand_type);
+ cand_p = btf_params(cand_type);
+ canon_p = btf_params(canon_type);
+ for (i = 0; i < vlen; i++) {
+ eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type);
+ if (eq <= 0)
+ return eq;
+ cand_p++;
+ canon_p++;
+ }
+ return 1;
+ }
+
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * Use hypothetical mapping, produced by successful type graph equivalence
+ * check, to augment existing struct/union canonical mapping, where possible.
+ *
+ * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record
+ * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional:
+ * it doesn't matter if FWD type was part of canonical graph or candidate one,
+ * we are recording the mapping anyway. As opposed to carefulness required
+ * for struct/union correspondence mapping (described below), for FWD resolution
+ * it's not important, as by the time that FWD type (reference type) will be
+ * deduplicated all structs/unions will be deduped already anyway.
+ *
+ * Recording STRUCT/UNION mapping is purely a performance optimization and is
+ * not required for correctness. It needs to be done carefully to ensure that
+ * struct/union from candidate's type graph is not mapped into corresponding
+ * struct/union from canonical type graph that itself hasn't been resolved into
+ * canonical representative. The only guarantee we have is that canonical
+ * struct/union was determined as canonical and that won't change. But any
+ * types referenced through that struct/union fields could have been not yet
+ * resolved, so in case like that it's too early to establish any kind of
+ * correspondence between structs/unions.
+ *
+ * No canonical correspondence is derived for primitive types (they are already
+ * deduplicated completely already anyway) or reference types (they rely on
+ * stability of struct/union canonical relationship for equivalence checks).
+ */
+static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
+{
+ __u32 cand_type_id, targ_type_id;
+ __u16 t_kind, c_kind;
+ __u32 t_id, c_id;
+ int i;
+
+ for (i = 0; i < d->hypot_cnt; i++) {
+ cand_type_id = d->hypot_list[i];
+ targ_type_id = d->hypot_map[cand_type_id];
+ t_id = resolve_type_id(d, targ_type_id);
+ c_id = resolve_type_id(d, cand_type_id);
+ t_kind = btf_kind(d->btf->types[t_id]);
+ c_kind = btf_kind(d->btf->types[c_id]);
+ /*
+ * Resolve FWD into STRUCT/UNION.
+ * It's ok to resolve FWD into STRUCT/UNION that's not yet
+ * mapped to canonical representative (as opposed to
+ * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because
+ * eventually that struct is going to be mapped and all resolved
+ * FWDs will automatically resolve to correct canonical
+ * representative. This will happen before ref type deduping,
+ * which critically depends on stability of these mapping. This
+ * stability is not a requirement for STRUCT/UNION equivalence
+ * checks, though.
+ */
+ if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD)
+ d->map[c_id] = t_id;
+ else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
+ d->map[t_id] = c_id;
+
+ if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) &&
+ c_kind != BTF_KIND_FWD &&
+ is_type_mapped(d, c_id) &&
+ !is_type_mapped(d, t_id)) {
+ /*
+ * as a perf optimization, we can map struct/union
+ * that's part of type graph we just verified for
+ * equivalence. We can do that for struct/union that has
+ * canonical representative only, though.
+ */
+ d->map[t_id] = c_id;
+ }
+ }
+}
+
+/*
+ * Deduplicate struct/union types.
+ *
+ * For each struct/union type its type signature hash is calculated, taking
+ * into account type's name, size, number, order and names of fields, but
+ * ignoring type ID's referenced from fields, because they might not be deduped
+ * completely until after reference types deduplication phase. This type hash
+ * is used to iterate over all potential canonical types, sharing same hash.
+ * For each canonical candidate we check whether type graphs that they form
+ * (through referenced types in fields and so on) are equivalent using algorithm
+ * implemented in `btf_dedup_is_equiv`. If such equivalence is found and
+ * BTF_KIND_FWD resolution is allowed, then hypothetical mapping
+ * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence
+ * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to
+ * potentially map other structs/unions to their canonical representatives,
+ * if such relationship hasn't yet been established. This speeds up algorithm
+ * by eliminating some of the duplicate work.
+ *
+ * If no matching canonical representative was found, struct/union is marked
+ * as canonical for itself and is added into btf_dedup->dedup_table hash map
+ * for further look ups.
+ */
+static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
+{
+ struct btf_type *cand_type, *t;
+ struct hashmap_entry *hash_entry;
+ /* if we don't find equivalent type, then we are canonical */
+ __u32 new_id = type_id;
+ __u16 kind;
+ long h;
+
+ /* already deduped or is in process of deduping (loop detected) */
+ if (d->map[type_id] <= BTF_MAX_NR_TYPES)
+ return 0;
+
+ t = d->btf->types[type_id];
+ kind = btf_kind(t);
+
+ if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
+ return 0;
+
+ h = btf_hash_struct(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ __u32 cand_id = (__u32)(long)hash_entry->value;
+ int eq;
+
+ /*
+ * Even though btf_dedup_is_equiv() checks for
+ * btf_shallow_equal_struct() internally when checking two
+ * structs (unions) for equivalence, we need to guard here
+ * from picking matching FWD type as a dedup candidate.
+ * This can happen due to hash collision. In such case just
+ * relying on btf_dedup_is_equiv() would lead to potentially
+ * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because
+ * FWD and compatible STRUCT/UNION are considered equivalent.
+ */
+ cand_type = d->btf->types[cand_id];
+ if (!btf_shallow_equal_struct(t, cand_type))
+ continue;
+
+ btf_dedup_clear_hypot_map(d);
+ eq = btf_dedup_is_equiv(d, type_id, cand_id);
+ if (eq < 0)
+ return eq;
+ if (!eq)
+ continue;
+ new_id = cand_id;
+ btf_dedup_merge_hypot_map(d);
+ break;
+ }
+
+ d->map[type_id] = new_id;
+ if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int btf_dedup_struct_types(struct btf_dedup *d)
+{
+ int i, err;
+
+ for (i = 1; i <= d->btf->nr_types; i++) {
+ err = btf_dedup_struct_type(d, i);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+/*
+ * Deduplicate reference type.
+ *
+ * Once all primitive and struct/union types got deduplicated, we can easily
+ * deduplicate all other (reference) BTF types. This is done in two steps:
+ *
+ * 1. Resolve all referenced type IDs into their canonical type IDs. This
+ * resolution can be done either immediately for primitive or struct/union types
+ * (because they were deduped in previous two phases) or recursively for
+ * reference types. Recursion will always terminate at either primitive or
+ * struct/union type, at which point we can "unwind" chain of reference types
+ * one by one. There is no danger of encountering cycles because in C type
+ * system the only way to form type cycle is through struct/union, so any chain
+ * of reference types, even those taking part in a type cycle, will inevitably
+ * reach struct/union at some point.
+ *
+ * 2. Once all referenced type IDs are resolved into canonical ones, BTF type
+ * becomes "stable", in the sense that no further deduplication will cause
+ * any changes to it. With that, it's now possible to calculate type's signature
+ * hash (this time taking into account referenced type IDs) and loop over all
+ * potential canonical representatives. If no match was found, current type
+ * will become canonical representative of itself and will be added into
+ * btf_dedup->dedup_table as another possible canonical representative.
+ */
+static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
+{
+ struct hashmap_entry *hash_entry;
+ __u32 new_id = type_id, cand_id;
+ struct btf_type *t, *cand;
+ /* if we don't find equivalent type, then we are representative type */
+ int ref_type_id;
+ long h;
+
+ if (d->map[type_id] == BTF_IN_PROGRESS_ID)
+ return -ELOOP;
+ if (d->map[type_id] <= BTF_MAX_NR_TYPES)
+ return resolve_type_id(d, type_id);
+
+ t = d->btf->types[type_id];
+ d->map[type_id] = BTF_IN_PROGRESS_ID;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ ref_type_id = btf_dedup_ref_type(d, t->type);
+ if (ref_type_id < 0)
+ return ref_type_id;
+ t->type = ref_type_id;
+
+ h = btf_hash_common(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ cand_id = (__u32)(long)hash_entry->value;
+ cand = d->btf->types[cand_id];
+ if (btf_equal_common(t, cand)) {
+ new_id = cand_id;
+ break;
+ }
+ }
+ break;
+
+ case BTF_KIND_ARRAY: {
+ struct btf_array *info = btf_array(t);
+
+ ref_type_id = btf_dedup_ref_type(d, info->type);
+ if (ref_type_id < 0)
+ return ref_type_id;
+ info->type = ref_type_id;
+
+ ref_type_id = btf_dedup_ref_type(d, info->index_type);
+ if (ref_type_id < 0)
+ return ref_type_id;
+ info->index_type = ref_type_id;
+
+ h = btf_hash_array(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ cand_id = (__u32)(long)hash_entry->value;
+ cand = d->btf->types[cand_id];
+ if (btf_equal_array(t, cand)) {
+ new_id = cand_id;
+ break;
+ }
+ }
+ break;
+ }
+
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *param;
+ __u16 vlen;
+ int i;
+
+ ref_type_id = btf_dedup_ref_type(d, t->type);
+ if (ref_type_id < 0)
+ return ref_type_id;
+ t->type = ref_type_id;
+
+ vlen = btf_vlen(t);
+ param = btf_params(t);
+ for (i = 0; i < vlen; i++) {
+ ref_type_id = btf_dedup_ref_type(d, param->type);
+ if (ref_type_id < 0)
+ return ref_type_id;
+ param->type = ref_type_id;
+ param++;
+ }
+
+ h = btf_hash_fnproto(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ cand_id = (__u32)(long)hash_entry->value;
+ cand = d->btf->types[cand_id];
+ if (btf_equal_fnproto(t, cand)) {
+ new_id = cand_id;
+ break;
+ }
+ }
+ break;
+ }
+
+ default:
+ return -EINVAL;
+ }
+
+ d->map[type_id] = new_id;
+ if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
+ return -ENOMEM;
+
+ return new_id;
+}
+
+static int btf_dedup_ref_types(struct btf_dedup *d)
+{
+ int i, err;
+
+ for (i = 1; i <= d->btf->nr_types; i++) {
+ err = btf_dedup_ref_type(d, i);
+ if (err < 0)
+ return err;
+ }
+ /* we won't need d->dedup_table anymore */
+ hashmap__free(d->dedup_table);
+ d->dedup_table = NULL;
+ return 0;
+}
+
+/*
+ * Compact types.
+ *
+ * After we established for each type its corresponding canonical representative
+ * type, we now can eliminate types that are not canonical and leave only
+ * canonical ones layed out sequentially in memory by copying them over
+ * duplicates. During compaction btf_dedup->hypot_map array is reused to store
+ * a map from original type ID to a new compacted type ID, which will be used
+ * during next phase to "fix up" type IDs, referenced from struct/union and
+ * reference types.
+ */
+static int btf_dedup_compact_types(struct btf_dedup *d)
+{
+ struct btf_type **new_types;
+ __u32 next_type_id = 1;
+ char *types_start, *p;
+ int i, len;
+
+ /* we are going to reuse hypot_map to store compaction remapping */
+ d->hypot_map[0] = 0;
+ for (i = 1; i <= d->btf->nr_types; i++)
+ d->hypot_map[i] = BTF_UNPROCESSED_ID;
+
+ types_start = d->btf->nohdr_data + d->btf->hdr->type_off;
+ p = types_start;
+
+ for (i = 1; i <= d->btf->nr_types; i++) {
+ if (d->map[i] != i)
+ continue;
+
+ len = btf_type_size(d->btf->types[i]);
+ if (len < 0)
+ return len;
+
+ memmove(p, d->btf->types[i], len);
+ d->hypot_map[i] = next_type_id;
+ d->btf->types[next_type_id] = (struct btf_type *)p;
+ p += len;
+ next_type_id++;
+ }
+
+ /* shrink struct btf's internal types index and update btf_header */
+ d->btf->nr_types = next_type_id - 1;
+ d->btf->types_size = d->btf->nr_types;
+ d->btf->hdr->type_len = p - types_start;
+ new_types = realloc(d->btf->types,
+ (1 + d->btf->nr_types) * sizeof(struct btf_type *));
+ if (!new_types)
+ return -ENOMEM;
+ d->btf->types = new_types;
+
+ /* make sure string section follows type information without gaps */
+ d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data;
+ memmove(p, d->btf->strings, d->btf->hdr->str_len);
+ d->btf->strings = p;
+ p += d->btf->hdr->str_len;
+
+ d->btf->data_size = p - (char *)d->btf->data;
+ return 0;
+}
+
+/*
+ * Figure out final (deduplicated and compacted) type ID for provided original
+ * `type_id` by first resolving it into corresponding canonical type ID and
+ * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map,
+ * which is populated during compaction phase.
+ */
+static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
+{
+ __u32 resolved_type_id, new_type_id;
+
+ resolved_type_id = resolve_type_id(d, type_id);
+ new_type_id = d->hypot_map[resolved_type_id];
+ if (new_type_id > BTF_MAX_NR_TYPES)
+ return -EINVAL;
+ return new_type_id;
+}
+
+/*
+ * Remap referenced type IDs into deduped type IDs.
+ *
+ * After BTF types are deduplicated and compacted, their final type IDs may
+ * differ from original ones. The map from original to a corresponding
+ * deduped type ID is stored in btf_dedup->hypot_map and is populated during
+ * compaction phase. During remapping phase we are rewriting all type IDs
+ * referenced from any BTF type (e.g., struct fields, func proto args, etc) to
+ * their final deduped type IDs.
+ */
+static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
+{
+ struct btf_type *t = d->btf->types[type_id];
+ int i, r;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_ENUM:
+ break;
+
+ case BTF_KIND_FWD:
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ r = btf_dedup_remap_type_id(d, t->type);
+ if (r < 0)
+ return r;
+ t->type = r;
+ break;
+
+ case BTF_KIND_ARRAY: {
+ struct btf_array *arr_info = btf_array(t);
+
+ r = btf_dedup_remap_type_id(d, arr_info->type);
+ if (r < 0)
+ return r;
+ arr_info->type = r;
+ r = btf_dedup_remap_type_id(d, arr_info->index_type);
+ if (r < 0)
+ return r;
+ arr_info->index_type = r;
+ break;
+ }
+
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ struct btf_member *member = btf_members(t);
+ __u16 vlen = btf_vlen(t);
+
+ for (i = 0; i < vlen; i++) {
+ r = btf_dedup_remap_type_id(d, member->type);
+ if (r < 0)
+ return r;
+ member->type = r;
+ member++;
+ }
+ break;
+ }
+
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *param = btf_params(t);
+ __u16 vlen = btf_vlen(t);
+
+ r = btf_dedup_remap_type_id(d, t->type);
+ if (r < 0)
+ return r;
+ t->type = r;
+
+ for (i = 0; i < vlen; i++) {
+ r = btf_dedup_remap_type_id(d, param->type);
+ if (r < 0)
+ return r;
+ param->type = r;
+ param++;
+ }
+ break;
+ }
+
+ case BTF_KIND_DATASEC: {
+ struct btf_var_secinfo *var = btf_var_secinfos(t);
+ __u16 vlen = btf_vlen(t);
+
+ for (i = 0; i < vlen; i++) {
+ r = btf_dedup_remap_type_id(d, var->type);
+ if (r < 0)
+ return r;
+ var->type = r;
+ var++;
+ }
+ break;
+ }
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int btf_dedup_remap_types(struct btf_dedup *d)
+{
+ int i, r;
+
+ for (i = 1; i <= d->btf->nr_types; i++) {
+ r = btf_dedup_remap_type(d, i);
+ if (r < 0)
+ return r;
+ }
+ return 0;
+}
diff --git a/src/contrib/libbpf/bpf/btf.h b/src/contrib/libbpf/bpf/btf.h
new file mode 100644
index 0000000..d9ac73a
--- /dev/null
+++ b/src/contrib/libbpf/bpf/btf.h
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2018 Facebook */
+
+#ifndef __LIBBPF_BTF_H
+#define __LIBBPF_BTF_H
+
+#include <stdarg.h>
+#include <linux/btf.h>
+#include <linux/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef LIBBPF_API
+#define LIBBPF_API __attribute__((visibility("default")))
+#endif
+
+#define BTF_ELF_SEC ".BTF"
+#define BTF_EXT_ELF_SEC ".BTF.ext"
+#define MAPS_ELF_SEC ".maps"
+
+struct btf;
+struct btf_ext;
+struct btf_type;
+
+struct bpf_object;
+
+/*
+ * The .BTF.ext ELF section layout defined as
+ * struct btf_ext_header
+ * func_info subsection
+ *
+ * The func_info subsection layout:
+ * record size for struct bpf_func_info in the func_info subsection
+ * struct btf_sec_func_info for section #1
+ * a list of bpf_func_info records for section #1
+ * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
+ * but may not be identical
+ * struct btf_sec_func_info for section #2
+ * a list of bpf_func_info records for section #2
+ * ......
+ *
+ * Note that the bpf_func_info record size in .BTF.ext may not
+ * be the same as the one defined in include/uapi/linux/bpf.h.
+ * The loader should ensure that record_size meets minimum
+ * requirement and pass the record as is to the kernel. The
+ * kernel will handle the func_info properly based on its contents.
+ */
+struct btf_ext_header {
+ __u16 magic;
+ __u8 version;
+ __u8 flags;
+ __u32 hdr_len;
+
+ /* All offsets are in bytes relative to the end of this header */
+ __u32 func_info_off;
+ __u32 func_info_len;
+ __u32 line_info_off;
+ __u32 line_info_len;
+
+ /* optional part of .BTF.ext header */
+ __u32 field_reloc_off;
+ __u32 field_reloc_len;
+};
+
+LIBBPF_API void btf__free(struct btf *btf);
+LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size);
+LIBBPF_API struct btf *btf__parse_elf(const char *path,
+ struct btf_ext **btf_ext);
+LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
+LIBBPF_API int btf__load(struct btf *btf);
+LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
+ const char *type_name);
+LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
+ const char *type_name, __u32 kind);
+LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
+LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
+ __u32 id);
+LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
+LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
+LIBBPF_API int btf__fd(const struct btf *btf);
+LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
+LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
+LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
+LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
+ __u32 expected_key_size,
+ __u32 expected_value_size,
+ __u32 *key_type_id, __u32 *value_type_id);
+
+LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
+LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
+LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,
+ __u32 *size);
+LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const char *sec_name, __u32 insns_cnt,
+ void **func_info, __u32 *cnt);
+LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const char *sec_name, __u32 insns_cnt,
+ void **line_info, __u32 *cnt);
+LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
+LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
+
+struct btf_dedup_opts {
+ unsigned int dedup_table_size;
+ bool dont_resolve_fwds;
+};
+
+LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
+ const struct btf_dedup_opts *opts);
+
+struct btf_dump;
+
+struct btf_dump_opts {
+ void *ctx;
+};
+
+typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
+
+LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const struct btf_dump_opts *opts,
+ btf_dump_printf_fn_t printf_fn);
+LIBBPF_API void btf_dump__free(struct btf_dump *d);
+
+LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
+
+/*
+ * A set of helpers for easier BTF types handling
+ */
+static inline __u16 btf_kind(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info);
+}
+
+static inline __u16 btf_vlen(const struct btf_type *t)
+{
+ return BTF_INFO_VLEN(t->info);
+}
+
+static inline bool btf_kflag(const struct btf_type *t)
+{
+ return BTF_INFO_KFLAG(t->info);
+}
+
+static inline bool btf_is_int(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_INT;
+}
+
+static inline bool btf_is_ptr(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_PTR;
+}
+
+static inline bool btf_is_array(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_ARRAY;
+}
+
+static inline bool btf_is_struct(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_STRUCT;
+}
+
+static inline bool btf_is_union(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_UNION;
+}
+
+static inline bool btf_is_composite(const struct btf_type *t)
+{
+ __u16 kind = btf_kind(t);
+
+ return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
+}
+
+static inline bool btf_is_enum(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_ENUM;
+}
+
+static inline bool btf_is_fwd(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_FWD;
+}
+
+static inline bool btf_is_typedef(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_TYPEDEF;
+}
+
+static inline bool btf_is_volatile(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_VOLATILE;
+}
+
+static inline bool btf_is_const(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_CONST;
+}
+
+static inline bool btf_is_restrict(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_RESTRICT;
+}
+
+static inline bool btf_is_mod(const struct btf_type *t)
+{
+ __u16 kind = btf_kind(t);
+
+ return kind == BTF_KIND_VOLATILE ||
+ kind == BTF_KIND_CONST ||
+ kind == BTF_KIND_RESTRICT;
+}
+
+static inline bool btf_is_func(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_FUNC;
+}
+
+static inline bool btf_is_func_proto(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_FUNC_PROTO;
+}
+
+static inline bool btf_is_var(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_VAR;
+}
+
+static inline bool btf_is_datasec(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_DATASEC;
+}
+
+static inline __u8 btf_int_encoding(const struct btf_type *t)
+{
+ return BTF_INT_ENCODING(*(__u32 *)(t + 1));
+}
+
+static inline __u8 btf_int_offset(const struct btf_type *t)
+{
+ return BTF_INT_OFFSET(*(__u32 *)(t + 1));
+}
+
+static inline __u8 btf_int_bits(const struct btf_type *t)
+{
+ return BTF_INT_BITS(*(__u32 *)(t + 1));
+}
+
+static inline struct btf_array *btf_array(const struct btf_type *t)
+{
+ return (struct btf_array *)(t + 1);
+}
+
+static inline struct btf_enum *btf_enum(const struct btf_type *t)
+{
+ return (struct btf_enum *)(t + 1);
+}
+
+static inline struct btf_member *btf_members(const struct btf_type *t)
+{
+ return (struct btf_member *)(t + 1);
+}
+
+/* Get bit offset of a member with specified index. */
+static inline __u32 btf_member_bit_offset(const struct btf_type *t,
+ __u32 member_idx)
+{
+ const struct btf_member *m = btf_members(t) + member_idx;
+ bool kflag = btf_kflag(t);
+
+ return kflag ? BTF_MEMBER_BIT_OFFSET(m->offset) : m->offset;
+}
+/*
+ * Get bitfield size of a member, assuming t is BTF_KIND_STRUCT or
+ * BTF_KIND_UNION. If member is not a bitfield, zero is returned.
+ */
+static inline __u32 btf_member_bitfield_size(const struct btf_type *t,
+ __u32 member_idx)
+{
+ const struct btf_member *m = btf_members(t) + member_idx;
+ bool kflag = btf_kflag(t);
+
+ return kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0;
+}
+
+static inline struct btf_param *btf_params(const struct btf_type *t)
+{
+ return (struct btf_param *)(t + 1);
+}
+
+static inline struct btf_var *btf_var(const struct btf_type *t)
+{
+ return (struct btf_var *)(t + 1);
+}
+
+static inline struct btf_var_secinfo *
+btf_var_secinfos(const struct btf_type *t)
+{
+ return (struct btf_var_secinfo *)(t + 1);
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_BTF_H */
diff --git a/src/contrib/libbpf/bpf/btf_dump.c b/src/contrib/libbpf/bpf/btf_dump.c
new file mode 100644
index 0000000..cb126d8
--- /dev/null
+++ b/src/contrib/libbpf/bpf/btf_dump.c
@@ -0,0 +1,1386 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C type converter.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include "btf.h"
+#include "hashmap.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
+static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
+
+static const char *pfx(int lvl)
+{
+ return lvl >= PREFIX_CNT ? PREFIXES : &PREFIXES[PREFIX_CNT - lvl];
+}
+
+enum btf_dump_type_order_state {
+ NOT_ORDERED,
+ ORDERING,
+ ORDERED,
+};
+
+enum btf_dump_type_emit_state {
+ NOT_EMITTED,
+ EMITTING,
+ EMITTED,
+};
+
+/* per-type auxiliary state */
+struct btf_dump_type_aux_state {
+ /* topological sorting state */
+ enum btf_dump_type_order_state order_state: 2;
+ /* emitting state used to determine the need for forward declaration */
+ enum btf_dump_type_emit_state emit_state: 2;
+ /* whether forward declaration was already emitted */
+ __u8 fwd_emitted: 1;
+ /* whether unique non-duplicate name was already assigned */
+ __u8 name_resolved: 1;
+ /* whether type is referenced from any other type */
+ __u8 referenced: 1;
+};
+
+struct btf_dump {
+ const struct btf *btf;
+ const struct btf_ext *btf_ext;
+ btf_dump_printf_fn_t printf_fn;
+ struct btf_dump_opts opts;
+
+ /* per-type auxiliary state */
+ struct btf_dump_type_aux_state *type_states;
+ /* per-type optional cached unique name, must be freed, if present */
+ const char **cached_names;
+
+ /* topo-sorted list of dependent type definitions */
+ __u32 *emit_queue;
+ int emit_queue_cap;
+ int emit_queue_cnt;
+
+ /*
+ * stack of type declarations (e.g., chain of modifiers, arrays,
+ * funcs, etc)
+ */
+ __u32 *decl_stack;
+ int decl_stack_cap;
+ int decl_stack_cnt;
+
+ /* maps struct/union/enum name to a number of name occurrences */
+ struct hashmap *type_names;
+ /*
+ * maps typedef identifiers and enum value names to a number of such
+ * name occurrences
+ */
+ struct hashmap *ident_names;
+};
+
+static size_t str_hash_fn(const void *key, void *ctx)
+{
+ const char *s = key;
+ size_t h = 0;
+
+ while (*s) {
+ h = h * 31 + *s;
+ s++;
+ }
+ return h;
+}
+
+static bool str_equal_fn(const void *a, const void *b, void *ctx)
+{
+ return strcmp(a, b) == 0;
+}
+
+static const char *btf_name_of(const struct btf_dump *d, __u32 name_off)
+{
+ return btf__name_by_offset(d->btf, name_off);
+}
+
+static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ d->printf_fn(d->opts.ctx, fmt, args);
+ va_end(args);
+}
+
+struct btf_dump *btf_dump__new(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const struct btf_dump_opts *opts,
+ btf_dump_printf_fn_t printf_fn)
+{
+ struct btf_dump *d;
+ int err;
+
+ d = calloc(1, sizeof(struct btf_dump));
+ if (!d)
+ return ERR_PTR(-ENOMEM);
+
+ d->btf = btf;
+ d->btf_ext = btf_ext;
+ d->printf_fn = printf_fn;
+ d->opts.ctx = opts ? opts->ctx : NULL;
+
+ d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
+ if (IS_ERR(d->type_names)) {
+ err = PTR_ERR(d->type_names);
+ d->type_names = NULL;
+ btf_dump__free(d);
+ return ERR_PTR(err);
+ }
+ d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
+ if (IS_ERR(d->ident_names)) {
+ err = PTR_ERR(d->ident_names);
+ d->ident_names = NULL;
+ btf_dump__free(d);
+ return ERR_PTR(err);
+ }
+
+ return d;
+}
+
+void btf_dump__free(struct btf_dump *d)
+{
+ int i, cnt;
+
+ if (!d)
+ return;
+
+ free(d->type_states);
+ if (d->cached_names) {
+ /* any set cached name is owned by us and should be freed */
+ for (i = 0, cnt = btf__get_nr_types(d->btf); i <= cnt; i++) {
+ if (d->cached_names[i])
+ free((void *)d->cached_names[i]);
+ }
+ }
+ free(d->cached_names);
+ free(d->emit_queue);
+ free(d->decl_stack);
+ hashmap__free(d->type_names);
+ hashmap__free(d->ident_names);
+
+ free(d);
+}
+
+static int btf_dump_mark_referenced(struct btf_dump *d);
+static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
+static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
+
+/*
+ * Dump BTF type in a compilable C syntax, including all the necessary
+ * dependent types, necessary for compilation. If some of the dependent types
+ * were already emitted as part of previous btf_dump__dump_type() invocation
+ * for another type, they won't be emitted again. This API allows callers to
+ * filter out BTF types according to user-defined criterias and emitted only
+ * minimal subset of types, necessary to compile everything. Full struct/union
+ * definitions will still be emitted, even if the only usage is through
+ * pointer and could be satisfied with just a forward declaration.
+ *
+ * Dumping is done in two high-level passes:
+ * 1. Topologically sort type definitions to satisfy C rules of compilation.
+ * 2. Emit type definitions in C syntax.
+ *
+ * Returns 0 on success; <0, otherwise.
+ */
+int btf_dump__dump_type(struct btf_dump *d, __u32 id)
+{
+ int err, i;
+
+ if (id > btf__get_nr_types(d->btf))
+ return -EINVAL;
+
+ /* type states are lazily allocated, as they might not be needed */
+ if (!d->type_states) {
+ d->type_states = calloc(1 + btf__get_nr_types(d->btf),
+ sizeof(d->type_states[0]));
+ if (!d->type_states)
+ return -ENOMEM;
+ d->cached_names = calloc(1 + btf__get_nr_types(d->btf),
+ sizeof(d->cached_names[0]));
+ if (!d->cached_names)
+ return -ENOMEM;
+
+ /* VOID is special */
+ d->type_states[0].order_state = ORDERED;
+ d->type_states[0].emit_state = EMITTED;
+
+ /* eagerly determine referenced types for anon enums */
+ err = btf_dump_mark_referenced(d);
+ if (err)
+ return err;
+ }
+
+ d->emit_queue_cnt = 0;
+ err = btf_dump_order_type(d, id, false);
+ if (err < 0)
+ return err;
+
+ for (i = 0; i < d->emit_queue_cnt; i++)
+ btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/);
+
+ return 0;
+}
+
+/*
+ * Mark all types that are referenced from any other type. This is used to
+ * determine top-level anonymous enums that need to be emitted as an
+ * independent type declarations.
+ * Anonymous enums come in two flavors: either embedded in a struct's field
+ * definition, in which case they have to be declared inline as part of field
+ * type declaration; or as a top-level anonymous enum, typically used for
+ * declaring global constants. It's impossible to distinguish between two
+ * without knowning whether given enum type was referenced from other type:
+ * top-level anonymous enum won't be referenced by anything, while embedded
+ * one will.
+ */
+static int btf_dump_mark_referenced(struct btf_dump *d)
+{
+ int i, j, n = btf__get_nr_types(d->btf);
+ const struct btf_type *t;
+ __u16 vlen;
+
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(d->btf, i);
+ vlen = btf_vlen(t);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ break;
+
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ d->type_states[t->type].referenced = 1;
+ break;
+
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *a = btf_array(t);
+
+ d->type_states[a->index_type].referenced = 1;
+ d->type_states[a->type].referenced = 1;
+ break;
+ }
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+
+ for (j = 0; j < vlen; j++, m++)
+ d->type_states[m->type].referenced = 1;
+ break;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *p = btf_params(t);
+
+ for (j = 0; j < vlen; j++, p++)
+ d->type_states[p->type].referenced = 1;
+ break;
+ }
+ case BTF_KIND_DATASEC: {
+ const struct btf_var_secinfo *v = btf_var_secinfos(t);
+
+ for (j = 0; j < vlen; j++, v++)
+ d->type_states[v->type].referenced = 1;
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
+{
+ __u32 *new_queue;
+ size_t new_cap;
+
+ if (d->emit_queue_cnt >= d->emit_queue_cap) {
+ new_cap = max(16, d->emit_queue_cap * 3 / 2);
+ new_queue = realloc(d->emit_queue,
+ new_cap * sizeof(new_queue[0]));
+ if (!new_queue)
+ return -ENOMEM;
+ d->emit_queue = new_queue;
+ d->emit_queue_cap = new_cap;
+ }
+
+ d->emit_queue[d->emit_queue_cnt++] = id;
+ return 0;
+}
+
+/*
+ * Determine order of emitting dependent types and specified type to satisfy
+ * C compilation rules. This is done through topological sorting with an
+ * additional complication which comes from C rules. The main idea for C is
+ * that if some type is "embedded" into a struct/union, it's size needs to be
+ * known at the time of definition of containing type. E.g., for:
+ *
+ * struct A {};
+ * struct B { struct A x; }
+ *
+ * struct A *HAS* to be defined before struct B, because it's "embedded",
+ * i.e., it is part of struct B layout. But in the following case:
+ *
+ * struct A;
+ * struct B { struct A *x; }
+ * struct A {};
+ *
+ * it's enough to just have a forward declaration of struct A at the time of
+ * struct B definition, as struct B has a pointer to struct A, so the size of
+ * field x is known without knowing struct A size: it's sizeof(void *).
+ *
+ * Unfortunately, there are some trickier cases we need to handle, e.g.:
+ *
+ * struct A {}; // if this was forward-declaration: compilation error
+ * struct B {
+ * struct { // anonymous struct
+ * struct A y;
+ * } *x;
+ * };
+ *
+ * In this case, struct B's field x is a pointer, so it's size is known
+ * regardless of the size of (anonymous) struct it points to. But because this
+ * struct is anonymous and thus defined inline inside struct B, *and* it
+ * embeds struct A, compiler requires full definition of struct A to be known
+ * before struct B can be defined. This creates a transitive dependency
+ * between struct A and struct B. If struct A was forward-declared before
+ * struct B definition and fully defined after struct B definition, that would
+ * trigger compilation error.
+ *
+ * All this means that while we are doing topological sorting on BTF type
+ * graph, we need to determine relationships between different types (graph
+ * nodes):
+ * - weak link (relationship) between X and Y, if Y *CAN* be
+ * forward-declared at the point of X definition;
+ * - strong link, if Y *HAS* to be fully-defined before X can be defined.
+ *
+ * The rule is as follows. Given a chain of BTF types from X to Y, if there is
+ * BTF_KIND_PTR type in the chain and at least one non-anonymous type
+ * Z (excluding X, including Y), then link is weak. Otherwise, it's strong.
+ * Weak/strong relationship is determined recursively during DFS traversal and
+ * is returned as a result from btf_dump_order_type().
+ *
+ * btf_dump_order_type() is trying to avoid unnecessary forward declarations,
+ * but it is not guaranteeing that no extraneous forward declarations will be
+ * emitted.
+ *
+ * To avoid extra work, algorithm marks some of BTF types as ORDERED, when
+ * it's done with them, but not for all (e.g., VOLATILE, CONST, RESTRICT,
+ * ARRAY, FUNC_PROTO), as weak/strong semantics for those depends on the
+ * entire graph path, so depending where from one came to that BTF type, it
+ * might cause weak or strong ordering. For types like STRUCT/UNION/INT/ENUM,
+ * once they are processed, there is no need to do it again, so they are
+ * marked as ORDERED. We can mark PTR as ORDERED as well, as it semi-forces
+ * weak link, unless subsequent referenced STRUCT/UNION/ENUM is anonymous. But
+ * in any case, once those are processed, no need to do it again, as the
+ * result won't change.
+ *
+ * Returns:
+ * - 1, if type is part of strong link (so there is strong topological
+ * ordering requirements);
+ * - 0, if type is part of weak link (so can be satisfied through forward
+ * declaration);
+ * - <0, on error (e.g., unsatisfiable type loop detected).
+ */
+static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
+{
+ /*
+ * Order state is used to detect strong link cycles, but only for BTF
+ * kinds that are or could be an independent definition (i.e.,
+ * stand-alone fwd decl, enum, typedef, struct, union). Ptrs, arrays,
+ * func_protos, modifiers are just means to get to these definitions.
+ * Int/void don't need definitions, they are assumed to be always
+ * properly defined. We also ignore datasec, var, and funcs for now.
+ * So for all non-defining kinds, we never even set ordering state,
+ * for defining kinds we set ORDERING and subsequently ORDERED if it
+ * forms a strong link.
+ */
+ struct btf_dump_type_aux_state *tstate = &d->type_states[id];
+ const struct btf_type *t;
+ __u16 vlen;
+ int err, i;
+
+ /* return true, letting typedefs know that it's ok to be emitted */
+ if (tstate->order_state == ORDERED)
+ return 1;
+
+ t = btf__type_by_id(d->btf, id);
+
+ if (tstate->order_state == ORDERING) {
+ /* type loop, but resolvable through fwd declaration */
+ if (btf_is_composite(t) && through_ptr && t->name_off != 0)
+ return 0;
+ pr_warn("unsatisfiable type cycle, id:[%u]\n", id);
+ return -ELOOP;
+ }
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ tstate->order_state = ORDERED;
+ return 0;
+
+ case BTF_KIND_PTR:
+ err = btf_dump_order_type(d, t->type, true);
+ tstate->order_state = ORDERED;
+ return err;
+
+ case BTF_KIND_ARRAY:
+ return btf_dump_order_type(d, btf_array(t)->type, through_ptr);
+
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+ /*
+ * struct/union is part of strong link, only if it's embedded
+ * (so no ptr in a path) or it's anonymous (so has to be
+ * defined inline, even if declared through ptr)
+ */
+ if (through_ptr && t->name_off != 0)
+ return 0;
+
+ tstate->order_state = ORDERING;
+
+ vlen = btf_vlen(t);
+ for (i = 0; i < vlen; i++, m++) {
+ err = btf_dump_order_type(d, m->type, false);
+ if (err < 0)
+ return err;
+ }
+
+ if (t->name_off != 0) {
+ err = btf_dump_add_emit_queue_id(d, id);
+ if (err < 0)
+ return err;
+ }
+
+ tstate->order_state = ORDERED;
+ return 1;
+ }
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ /*
+ * non-anonymous or non-referenced enums are top-level
+ * declarations and should be emitted. Same logic can be
+ * applied to FWDs, it won't hurt anyways.
+ */
+ if (t->name_off != 0 || !tstate->referenced) {
+ err = btf_dump_add_emit_queue_id(d, id);
+ if (err)
+ return err;
+ }
+ tstate->order_state = ORDERED;
+ return 1;
+
+ case BTF_KIND_TYPEDEF: {
+ int is_strong;
+
+ is_strong = btf_dump_order_type(d, t->type, through_ptr);
+ if (is_strong < 0)
+ return is_strong;
+
+ /* typedef is similar to struct/union w.r.t. fwd-decls */
+ if (through_ptr && !is_strong)
+ return 0;
+
+ /* typedef is always a named definition */
+ err = btf_dump_add_emit_queue_id(d, id);
+ if (err)
+ return err;
+
+ d->type_states[id].order_state = ORDERED;
+ return 1;
+ }
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ return btf_dump_order_type(d, t->type, through_ptr);
+
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *p = btf_params(t);
+ bool is_strong;
+
+ err = btf_dump_order_type(d, t->type, through_ptr);
+ if (err < 0)
+ return err;
+ is_strong = err > 0;
+
+ vlen = btf_vlen(t);
+ for (i = 0; i < vlen; i++, p++) {
+ err = btf_dump_order_type(d, p->type, through_ptr);
+ if (err < 0)
+ return err;
+ if (err > 0)
+ is_strong = true;
+ }
+ return is_strong;
+ }
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ case BTF_KIND_DATASEC:
+ d->type_states[id].order_state = ORDERED;
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
+ const struct btf_type *t);
+static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t, int lvl);
+
+static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
+ const struct btf_type *t);
+static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t, int lvl);
+
+static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t);
+
+static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t, int lvl);
+
+/* a local view into a shared stack */
+struct id_stack {
+ const __u32 *ids;
+ int cnt;
+};
+
+static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
+ const char *fname, int lvl);
+static void btf_dump_emit_type_chain(struct btf_dump *d,
+ struct id_stack *decl_stack,
+ const char *fname, int lvl);
+
+static const char *btf_dump_type_name(struct btf_dump *d, __u32 id);
+static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id);
+static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
+ const char *orig_name);
+
+static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id)
+{
+ const struct btf_type *t = btf__type_by_id(d->btf, id);
+
+ /* __builtin_va_list is a compiler built-in, which causes compilation
+ * errors, when compiling w/ different compiler, then used to compile
+ * original code (e.g., GCC to compile kernel, Clang to use generated
+ * C header from BTF). As it is built-in, it should be already defined
+ * properly internally in compiler.
+ */
+ if (t->name_off == 0)
+ return false;
+ return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0;
+}
+
+/*
+ * Emit C-syntax definitions of types from chains of BTF types.
+ *
+ * High-level handling of determining necessary forward declarations are handled
+ * by btf_dump_emit_type() itself, but all nitty-gritty details of emitting type
+ * declarations/definitions in C syntax are handled by a combo of
+ * btf_dump_emit_type_decl()/btf_dump_emit_type_chain() w/ delegation to
+ * corresponding btf_dump_emit_*_{def,fwd}() functions.
+ *
+ * We also keep track of "containing struct/union type ID" to determine when
+ * we reference it from inside and thus can avoid emitting unnecessary forward
+ * declaration.
+ *
+ * This algorithm is designed in such a way, that even if some error occurs
+ * (either technical, e.g., out of memory, or logical, i.e., malformed BTF
+ * that doesn't comply to C rules completely), algorithm will try to proceed
+ * and produce as much meaningful output as possible.
+ */
+static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
+{
+ struct btf_dump_type_aux_state *tstate = &d->type_states[id];
+ bool top_level_def = cont_id == 0;
+ const struct btf_type *t;
+ __u16 kind;
+
+ if (tstate->emit_state == EMITTED)
+ return;
+
+ t = btf__type_by_id(d->btf, id);
+ kind = btf_kind(t);
+
+ if (tstate->emit_state == EMITTING) {
+ if (tstate->fwd_emitted)
+ return;
+
+ switch (kind) {
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ /*
+ * if we are referencing a struct/union that we are
+ * part of - then no need for fwd declaration
+ */
+ if (id == cont_id)
+ return;
+ if (t->name_off == 0) {
+ pr_warn("anonymous struct/union loop, id:[%u]\n",
+ id);
+ return;
+ }
+ btf_dump_emit_struct_fwd(d, id, t);
+ btf_dump_printf(d, ";\n\n");
+ tstate->fwd_emitted = 1;
+ break;
+ case BTF_KIND_TYPEDEF:
+ /*
+ * for typedef fwd_emitted means typedef definition
+ * was emitted, but it can be used only for "weak"
+ * references through pointer only, not for embedding
+ */
+ if (!btf_dump_is_blacklisted(d, id)) {
+ btf_dump_emit_typedef_def(d, id, t, 0);
+ btf_dump_printf(d, ";\n\n");
+ };
+ tstate->fwd_emitted = 1;
+ break;
+ default:
+ break;
+ }
+
+ return;
+ }
+
+ switch (kind) {
+ case BTF_KIND_INT:
+ tstate->emit_state = EMITTED;
+ break;
+ case BTF_KIND_ENUM:
+ if (top_level_def) {
+ btf_dump_emit_enum_def(d, id, t, 0);
+ btf_dump_printf(d, ";\n\n");
+ }
+ tstate->emit_state = EMITTED;
+ break;
+ case BTF_KIND_PTR:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ btf_dump_emit_type(d, t->type, cont_id);
+ break;
+ case BTF_KIND_ARRAY:
+ btf_dump_emit_type(d, btf_array(t)->type, cont_id);
+ break;
+ case BTF_KIND_FWD:
+ btf_dump_emit_fwd_def(d, id, t);
+ btf_dump_printf(d, ";\n\n");
+ tstate->emit_state = EMITTED;
+ break;
+ case BTF_KIND_TYPEDEF:
+ tstate->emit_state = EMITTING;
+ btf_dump_emit_type(d, t->type, id);
+ /*
+ * typedef can server as both definition and forward
+ * declaration; at this stage someone depends on
+ * typedef as a forward declaration (refers to it
+ * through pointer), so unless we already did it,
+ * emit typedef as a forward declaration
+ */
+ if (!tstate->fwd_emitted && !btf_dump_is_blacklisted(d, id)) {
+ btf_dump_emit_typedef_def(d, id, t, 0);
+ btf_dump_printf(d, ";\n\n");
+ }
+ tstate->emit_state = EMITTED;
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ tstate->emit_state = EMITTING;
+ /* if it's a top-level struct/union definition or struct/union
+ * is anonymous, then in C we'll be emitting all fields and
+ * their types (as opposed to just `struct X`), so we need to
+ * make sure that all types, referenced from struct/union
+ * members have necessary forward-declarations, where
+ * applicable
+ */
+ if (top_level_def || t->name_off == 0) {
+ const struct btf_member *m = btf_members(t);
+ __u16 vlen = btf_vlen(t);
+ int i, new_cont_id;
+
+ new_cont_id = t->name_off == 0 ? cont_id : id;
+ for (i = 0; i < vlen; i++, m++)
+ btf_dump_emit_type(d, m->type, new_cont_id);
+ } else if (!tstate->fwd_emitted && id != cont_id) {
+ btf_dump_emit_struct_fwd(d, id, t);
+ btf_dump_printf(d, ";\n\n");
+ tstate->fwd_emitted = 1;
+ }
+
+ if (top_level_def) {
+ btf_dump_emit_struct_def(d, id, t, 0);
+ btf_dump_printf(d, ";\n\n");
+ tstate->emit_state = EMITTED;
+ } else {
+ tstate->emit_state = NOT_EMITTED;
+ }
+ break;
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *p = btf_params(t);
+ __u16 vlen = btf_vlen(t);
+ int i;
+
+ btf_dump_emit_type(d, t->type, cont_id);
+ for (i = 0; i < vlen; i++, p++)
+ btf_dump_emit_type(d, p->type, cont_id);
+
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+static int btf_align_of(const struct btf *btf, __u32 id)
+{
+ const struct btf_type *t = btf__type_by_id(btf, id);
+ __u16 kind = btf_kind(t);
+
+ switch (kind) {
+ case BTF_KIND_INT:
+ case BTF_KIND_ENUM:
+ return min(sizeof(void *), t->size);
+ case BTF_KIND_PTR:
+ return sizeof(void *);
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ return btf_align_of(btf, t->type);
+ case BTF_KIND_ARRAY:
+ return btf_align_of(btf, btf_array(t)->type);
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+ __u16 vlen = btf_vlen(t);
+ int i, align = 1;
+
+ for (i = 0; i < vlen; i++, m++)
+ align = max(align, btf_align_of(btf, m->type));
+
+ return align;
+ }
+ default:
+ pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
+ return 1;
+ }
+}
+
+static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
+ const struct btf_type *t)
+{
+ const struct btf_member *m;
+ int align, i, bit_sz;
+ __u16 vlen;
+
+ align = btf_align_of(btf, id);
+ /* size of a non-packed struct has to be a multiple of its alignment*/
+ if (t->size % align)
+ return true;
+
+ m = btf_members(t);
+ vlen = btf_vlen(t);
+ /* all non-bitfield fields have to be naturally aligned */
+ for (i = 0; i < vlen; i++, m++) {
+ align = btf_align_of(btf, m->type);
+ bit_sz = btf_member_bitfield_size(t, i);
+ if (bit_sz == 0 && m->offset % (8 * align) != 0)
+ return true;
+ }
+
+ /*
+ * if original struct was marked as packed, but its layout is
+ * naturally aligned, we'll detect that it's not packed
+ */
+ return false;
+}
+
+static int chip_away_bits(int total, int at_most)
+{
+ return total % at_most ? : at_most;
+}
+
+static void btf_dump_emit_bit_padding(const struct btf_dump *d,
+ int cur_off, int m_off, int m_bit_sz,
+ int align, int lvl)
+{
+ int off_diff = m_off - cur_off;
+ int ptr_bits = sizeof(void *) * 8;
+
+ if (off_diff <= 0)
+ /* no gap */
+ return;
+ if (m_bit_sz == 0 && off_diff < align * 8)
+ /* natural padding will take care of a gap */
+ return;
+
+ while (off_diff > 0) {
+ const char *pad_type;
+ int pad_bits;
+
+ if (ptr_bits > 32 && off_diff > 32) {
+ pad_type = "long";
+ pad_bits = chip_away_bits(off_diff, ptr_bits);
+ } else if (off_diff > 16) {
+ pad_type = "int";
+ pad_bits = chip_away_bits(off_diff, 32);
+ } else if (off_diff > 8) {
+ pad_type = "short";
+ pad_bits = chip_away_bits(off_diff, 16);
+ } else {
+ pad_type = "char";
+ pad_bits = chip_away_bits(off_diff, 8);
+ }
+ btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
+ off_diff -= pad_bits;
+ }
+}
+
+static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
+ const struct btf_type *t)
+{
+ btf_dump_printf(d, "%s %s",
+ btf_is_struct(t) ? "struct" : "union",
+ btf_dump_type_name(d, id));
+}
+
+static void btf_dump_emit_struct_def(struct btf_dump *d,
+ __u32 id,
+ const struct btf_type *t,
+ int lvl)
+{
+ const struct btf_member *m = btf_members(t);
+ bool is_struct = btf_is_struct(t);
+ int align, i, packed, off = 0;
+ __u16 vlen = btf_vlen(t);
+
+ packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0;
+
+ btf_dump_printf(d, "%s%s%s {",
+ is_struct ? "struct" : "union",
+ t->name_off ? " " : "",
+ btf_dump_type_name(d, id));
+
+ for (i = 0; i < vlen; i++, m++) {
+ const char *fname;
+ int m_off, m_sz;
+
+ fname = btf_name_of(d, m->name_off);
+ m_sz = btf_member_bitfield_size(t, i);
+ m_off = btf_member_bit_offset(t, i);
+ align = packed ? 1 : btf_align_of(d->btf, m->type);
+
+ btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1);
+ btf_dump_printf(d, "\n%s", pfx(lvl + 1));
+ btf_dump_emit_type_decl(d, m->type, fname, lvl + 1);
+
+ if (m_sz) {
+ btf_dump_printf(d, ": %d", m_sz);
+ off = m_off + m_sz;
+ } else {
+ m_sz = max(0, btf__resolve_size(d->btf, m->type));
+ off = m_off + m_sz * 8;
+ }
+ btf_dump_printf(d, ";");
+ }
+
+ /* pad at the end, if necessary */
+ if (is_struct) {
+ align = packed ? 1 : btf_align_of(d->btf, id);
+ btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align,
+ lvl + 1);
+ }
+
+ if (vlen)
+ btf_dump_printf(d, "\n");
+ btf_dump_printf(d, "%s}", pfx(lvl));
+ if (packed)
+ btf_dump_printf(d, " __attribute__((packed))");
+}
+
+static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
+ const struct btf_type *t)
+{
+ btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id));
+}
+
+static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t,
+ int lvl)
+{
+ const struct btf_enum *v = btf_enum(t);
+ __u16 vlen = btf_vlen(t);
+ const char *name;
+ size_t dup_cnt;
+ int i;
+
+ btf_dump_printf(d, "enum%s%s",
+ t->name_off ? " " : "",
+ btf_dump_type_name(d, id));
+
+ if (vlen) {
+ btf_dump_printf(d, " {");
+ for (i = 0; i < vlen; i++, v++) {
+ name = btf_name_of(d, v->name_off);
+ /* enumerators share namespace with typedef idents */
+ dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
+ if (dup_cnt > 1) {
+ btf_dump_printf(d, "\n%s%s___%zu = %d,",
+ pfx(lvl + 1), name, dup_cnt,
+ (__s32)v->val);
+ } else {
+ btf_dump_printf(d, "\n%s%s = %d,",
+ pfx(lvl + 1), name,
+ (__s32)v->val);
+ }
+ }
+ btf_dump_printf(d, "\n%s}", pfx(lvl));
+ }
+}
+
+static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t)
+{
+ const char *name = btf_dump_type_name(d, id);
+
+ if (btf_kflag(t))
+ btf_dump_printf(d, "union %s", name);
+ else
+ btf_dump_printf(d, "struct %s", name);
+}
+
+static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t, int lvl)
+{
+ const char *name = btf_dump_ident_name(d, id);
+
+ /*
+ * Old GCC versions are emitting invalid typedef for __gnuc_va_list
+ * pointing to VOID. This generates warnings from btf_dump() and
+ * results in uncompilable header file, so we are fixing it up here
+ * with valid typedef into __builtin_va_list.
+ */
+ if (t->type == 0 && strcmp(name, "__gnuc_va_list") == 0) {
+ btf_dump_printf(d, "typedef __builtin_va_list __gnuc_va_list");
+ return;
+ }
+
+ btf_dump_printf(d, "typedef ");
+ btf_dump_emit_type_decl(d, t->type, name, lvl);
+}
+
+static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
+{
+ __u32 *new_stack;
+ size_t new_cap;
+
+ if (d->decl_stack_cnt >= d->decl_stack_cap) {
+ new_cap = max(16, d->decl_stack_cap * 3 / 2);
+ new_stack = realloc(d->decl_stack,
+ new_cap * sizeof(new_stack[0]));
+ if (!new_stack)
+ return -ENOMEM;
+ d->decl_stack = new_stack;
+ d->decl_stack_cap = new_cap;
+ }
+
+ d->decl_stack[d->decl_stack_cnt++] = id;
+
+ return 0;
+}
+
+/*
+ * Emit type declaration (e.g., field type declaration in a struct or argument
+ * declaration in function prototype) in correct C syntax.
+ *
+ * For most types it's trivial, but there are few quirky type declaration
+ * cases worth mentioning:
+ * - function prototypes (especially nesting of function prototypes);
+ * - arrays;
+ * - const/volatile/restrict for pointers vs other types.
+ *
+ * For a good discussion of *PARSING* C syntax (as a human), see
+ * Peter van der Linden's "Expert C Programming: Deep C Secrets",
+ * Ch.3 "Unscrambling Declarations in C".
+ *
+ * It won't help with BTF to C conversion much, though, as it's an opposite
+ * problem. So we came up with this algorithm in reverse to van der Linden's
+ * parsing algorithm. It goes from structured BTF representation of type
+ * declaration to a valid compilable C syntax.
+ *
+ * For instance, consider this C typedef:
+ * typedef const int * const * arr[10] arr_t;
+ * It will be represented in BTF with this chain of BTF types:
+ * [typedef] -> [array] -> [ptr] -> [const] -> [ptr] -> [const] -> [int]
+ *
+ * Notice how [const] modifier always goes before type it modifies in BTF type
+ * graph, but in C syntax, const/volatile/restrict modifiers are written to
+ * the right of pointers, but to the left of other types. There are also other
+ * quirks, like function pointers, arrays of them, functions returning other
+ * functions, etc.
+ *
+ * We handle that by pushing all the types to a stack, until we hit "terminal"
+ * type (int/enum/struct/union/fwd). Then depending on the kind of a type on
+ * top of a stack, modifiers are handled differently. Array/function pointers
+ * have also wildly different syntax and how nesting of them are done. See
+ * code for authoritative definition.
+ *
+ * To avoid allocating new stack for each independent chain of BTF types, we
+ * share one bigger stack, with each chain working only on its own local view
+ * of a stack frame. Some care is required to "pop" stack frames after
+ * processing type declaration chain.
+ */
+static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
+ const char *fname, int lvl)
+{
+ struct id_stack decl_stack;
+ const struct btf_type *t;
+ int err, stack_start;
+
+ stack_start = d->decl_stack_cnt;
+ for (;;) {
+ err = btf_dump_push_decl_stack_id(d, id);
+ if (err < 0) {
+ /*
+ * if we don't have enough memory for entire type decl
+ * chain, restore stack, emit warning, and try to
+ * proceed nevertheless
+ */
+ pr_warn("not enough memory for decl stack:%d", err);
+ d->decl_stack_cnt = stack_start;
+ return;
+ }
+
+ /* VOID */
+ if (id == 0)
+ break;
+
+ t = btf__type_by_id(d->btf, id);
+ switch (btf_kind(t)) {
+ case BTF_KIND_PTR:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_FUNC_PROTO:
+ id = t->type;
+ break;
+ case BTF_KIND_ARRAY:
+ id = btf_array(t)->type;
+ break;
+ case BTF_KIND_INT:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_TYPEDEF:
+ goto done;
+ default:
+ pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
+ btf_kind(t), id);
+ goto done;
+ }
+ }
+done:
+ /*
+ * We might be inside a chain of declarations (e.g., array of function
+ * pointers returning anonymous (so inlined) structs, having another
+ * array field). Each of those needs its own "stack frame" to handle
+ * emitting of declarations. Those stack frames are non-overlapping
+ * portions of shared btf_dump->decl_stack. To make it a bit nicer to
+ * handle this set of nested stacks, we create a view corresponding to
+ * our own "stack frame" and work with it as an independent stack.
+ * We'll need to clean up after emit_type_chain() returns, though.
+ */
+ decl_stack.ids = d->decl_stack + stack_start;
+ decl_stack.cnt = d->decl_stack_cnt - stack_start;
+ btf_dump_emit_type_chain(d, &decl_stack, fname, lvl);
+ /*
+ * emit_type_chain() guarantees that it will pop its entire decl_stack
+ * frame before returning. But it works with a read-only view into
+ * decl_stack, so it doesn't actually pop anything from the
+ * perspective of shared btf_dump->decl_stack, per se. We need to
+ * reset decl_stack state to how it was before us to avoid it growing
+ * all the time.
+ */
+ d->decl_stack_cnt = stack_start;
+}
+
+static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack)
+{
+ const struct btf_type *t;
+ __u32 id;
+
+ while (decl_stack->cnt) {
+ id = decl_stack->ids[decl_stack->cnt - 1];
+ t = btf__type_by_id(d->btf, id);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_VOLATILE:
+ btf_dump_printf(d, "volatile ");
+ break;
+ case BTF_KIND_CONST:
+ btf_dump_printf(d, "const ");
+ break;
+ case BTF_KIND_RESTRICT:
+ btf_dump_printf(d, "restrict ");
+ break;
+ default:
+ return;
+ }
+ decl_stack->cnt--;
+ }
+}
+
+static void btf_dump_emit_name(const struct btf_dump *d,
+ const char *name, bool last_was_ptr)
+{
+ bool separate = name[0] && !last_was_ptr;
+
+ btf_dump_printf(d, "%s%s", separate ? " " : "", name);
+}
+
+static void btf_dump_emit_type_chain(struct btf_dump *d,
+ struct id_stack *decls,
+ const char *fname, int lvl)
+{
+ /*
+ * last_was_ptr is used to determine if we need to separate pointer
+ * asterisk (*) from previous part of type signature with space, so
+ * that we get `int ***`, instead of `int * * *`. We default to true
+ * for cases where we have single pointer in a chain. E.g., in ptr ->
+ * func_proto case. func_proto will start a new emit_type_chain call
+ * with just ptr, which should be emitted as (*) or (*<fname>), so we
+ * don't want to prepend space for that last pointer.
+ */
+ bool last_was_ptr = true;
+ const struct btf_type *t;
+ const char *name;
+ __u16 kind;
+ __u32 id;
+
+ while (decls->cnt) {
+ id = decls->ids[--decls->cnt];
+ if (id == 0) {
+ /* VOID is a special snowflake */
+ btf_dump_emit_mods(d, decls);
+ btf_dump_printf(d, "void");
+ last_was_ptr = false;
+ continue;
+ }
+
+ t = btf__type_by_id(d->btf, id);
+ kind = btf_kind(t);
+
+ switch (kind) {
+ case BTF_KIND_INT:
+ btf_dump_emit_mods(d, decls);
+ name = btf_name_of(d, t->name_off);
+ btf_dump_printf(d, "%s", name);
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ btf_dump_emit_mods(d, decls);
+ /* inline anonymous struct/union */
+ if (t->name_off == 0)
+ btf_dump_emit_struct_def(d, id, t, lvl);
+ else
+ btf_dump_emit_struct_fwd(d, id, t);
+ break;
+ case BTF_KIND_ENUM:
+ btf_dump_emit_mods(d, decls);
+ /* inline anonymous enum */
+ if (t->name_off == 0)
+ btf_dump_emit_enum_def(d, id, t, lvl);
+ else
+ btf_dump_emit_enum_fwd(d, id, t);
+ break;
+ case BTF_KIND_FWD:
+ btf_dump_emit_mods(d, decls);
+ btf_dump_emit_fwd_def(d, id, t);
+ break;
+ case BTF_KIND_TYPEDEF:
+ btf_dump_emit_mods(d, decls);
+ btf_dump_printf(d, "%s", btf_dump_ident_name(d, id));
+ break;
+ case BTF_KIND_PTR:
+ btf_dump_printf(d, "%s", last_was_ptr ? "*" : " *");
+ break;
+ case BTF_KIND_VOLATILE:
+ btf_dump_printf(d, " volatile");
+ break;
+ case BTF_KIND_CONST:
+ btf_dump_printf(d, " const");
+ break;
+ case BTF_KIND_RESTRICT:
+ btf_dump_printf(d, " restrict");
+ break;
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *a = btf_array(t);
+ const struct btf_type *next_t;
+ __u32 next_id;
+ bool multidim;
+ /*
+ * GCC has a bug
+ * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=8354)
+ * which causes it to emit extra const/volatile
+ * modifiers for an array, if array's element type has
+ * const/volatile modifiers. Clang doesn't do that.
+ * In general, it doesn't seem very meaningful to have
+ * a const/volatile modifier for array, so we are
+ * going to silently skip them here.
+ */
+ while (decls->cnt) {
+ next_id = decls->ids[decls->cnt - 1];
+ next_t = btf__type_by_id(d->btf, next_id);
+ if (btf_is_mod(next_t))
+ decls->cnt--;
+ else
+ break;
+ }
+
+ if (decls->cnt == 0) {
+ btf_dump_emit_name(d, fname, last_was_ptr);
+ btf_dump_printf(d, "[%u]", a->nelems);
+ return;
+ }
+
+ next_id = decls->ids[decls->cnt - 1];
+ next_t = btf__type_by_id(d->btf, next_id);
+ multidim = btf_is_array(next_t);
+ /* we need space if we have named non-pointer */
+ if (fname[0] && !last_was_ptr)
+ btf_dump_printf(d, " ");
+ /* no parentheses for multi-dimensional array */
+ if (!multidim)
+ btf_dump_printf(d, "(");
+ btf_dump_emit_type_chain(d, decls, fname, lvl);
+ if (!multidim)
+ btf_dump_printf(d, ")");
+ btf_dump_printf(d, "[%u]", a->nelems);
+ return;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *p = btf_params(t);
+ __u16 vlen = btf_vlen(t);
+ int i;
+
+ btf_dump_emit_mods(d, decls);
+ if (decls->cnt) {
+ btf_dump_printf(d, " (");
+ btf_dump_emit_type_chain(d, decls, fname, lvl);
+ btf_dump_printf(d, ")");
+ } else {
+ btf_dump_emit_name(d, fname, last_was_ptr);
+ }
+ btf_dump_printf(d, "(");
+ /*
+ * Clang for BPF target generates func_proto with no
+ * args as a func_proto with a single void arg (e.g.,
+ * `int (*f)(void)` vs just `int (*f)()`). We are
+ * going to pretend there are no args for such case.
+ */
+ if (vlen == 1 && p->type == 0) {
+ btf_dump_printf(d, ")");
+ return;
+ }
+
+ for (i = 0; i < vlen; i++, p++) {
+ if (i > 0)
+ btf_dump_printf(d, ", ");
+
+ /* last arg of type void is vararg */
+ if (i == vlen - 1 && p->type == 0) {
+ btf_dump_printf(d, "...");
+ break;
+ }
+
+ name = btf_name_of(d, p->name_off);
+ btf_dump_emit_type_decl(d, p->type, name, lvl);
+ }
+
+ btf_dump_printf(d, ")");
+ return;
+ }
+ default:
+ pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
+ kind, id);
+ return;
+ }
+
+ last_was_ptr = kind == BTF_KIND_PTR;
+ }
+
+ btf_dump_emit_name(d, fname, last_was_ptr);
+}
+
+/* return number of duplicates (occurrences) of a given name */
+static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
+ const char *orig_name)
+{
+ size_t dup_cnt = 0;
+
+ hashmap__find(name_map, orig_name, (void **)&dup_cnt);
+ dup_cnt++;
+ hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL);
+
+ return dup_cnt;
+}
+
+static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id,
+ struct hashmap *name_map)
+{
+ struct btf_dump_type_aux_state *s = &d->type_states[id];
+ const struct btf_type *t = btf__type_by_id(d->btf, id);
+ const char *orig_name = btf_name_of(d, t->name_off);
+ const char **cached_name = &d->cached_names[id];
+ size_t dup_cnt;
+
+ if (t->name_off == 0)
+ return "";
+
+ if (s->name_resolved)
+ return *cached_name ? *cached_name : orig_name;
+
+ dup_cnt = btf_dump_name_dups(d, name_map, orig_name);
+ if (dup_cnt > 1) {
+ const size_t max_len = 256;
+ char new_name[max_len];
+
+ snprintf(new_name, max_len, "%s___%zu", orig_name, dup_cnt);
+ *cached_name = strdup(new_name);
+ }
+
+ s->name_resolved = 1;
+ return *cached_name ? *cached_name : orig_name;
+}
+
+static const char *btf_dump_type_name(struct btf_dump *d, __u32 id)
+{
+ return btf_dump_resolve_name(d, id, d->type_names);
+}
+
+static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id)
+{
+ return btf_dump_resolve_name(d, id, d->ident_names);
+}
diff --git a/src/contrib/libbpf/bpf/hashmap.c b/src/contrib/libbpf/bpf/hashmap.c
new file mode 100644
index 0000000..6122272
--- /dev/null
+++ b/src/contrib/libbpf/bpf/hashmap.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+
+/* start with 4 buckets */
+#define HASHMAP_MIN_CAP_BITS 2
+
+static void hashmap_add_entry(struct hashmap_entry **pprev,
+ struct hashmap_entry *entry)
+{
+ entry->next = *pprev;
+ *pprev = entry;
+}
+
+static void hashmap_del_entry(struct hashmap_entry **pprev,
+ struct hashmap_entry *entry)
+{
+ *pprev = entry->next;
+ entry->next = NULL;
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn, void *ctx)
+{
+ map->hash_fn = hash_fn;
+ map->equal_fn = equal_fn;
+ map->ctx = ctx;
+
+ map->buckets = NULL;
+ map->cap = 0;
+ map->cap_bits = 0;
+ map->sz = 0;
+}
+
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn,
+ void *ctx)
+{
+ struct hashmap *map = malloc(sizeof(struct hashmap));
+
+ if (!map)
+ return ERR_PTR(-ENOMEM);
+ hashmap__init(map, hash_fn, equal_fn, ctx);
+ return map;
+}
+
+void hashmap__clear(struct hashmap *map)
+{
+ free(map->buckets);
+ map->cap = map->cap_bits = map->sz = 0;
+}
+
+void hashmap__free(struct hashmap *map)
+{
+ if (!map)
+ return;
+
+ hashmap__clear(map);
+ free(map);
+}
+
+size_t hashmap__size(const struct hashmap *map)
+{
+ return map->sz;
+}
+
+size_t hashmap__capacity(const struct hashmap *map)
+{
+ return map->cap;
+}
+
+static bool hashmap_needs_to_grow(struct hashmap *map)
+{
+ /* grow if empty or more than 75% filled */
+ return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap);
+}
+
+static int hashmap_grow(struct hashmap *map)
+{
+ struct hashmap_entry **new_buckets;
+ struct hashmap_entry *cur, *tmp;
+ size_t new_cap_bits, new_cap;
+ size_t h;
+ int bkt;
+
+ new_cap_bits = map->cap_bits + 1;
+ if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
+ new_cap_bits = HASHMAP_MIN_CAP_BITS;
+
+ new_cap = 1UL << new_cap_bits;
+ new_buckets = calloc(new_cap, sizeof(new_buckets[0]));
+ if (!new_buckets)
+ return -ENOMEM;
+
+ hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+ h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits);
+ hashmap_add_entry(&new_buckets[h], cur);
+ }
+
+ map->cap = new_cap;
+ map->cap_bits = new_cap_bits;
+ free(map->buckets);
+ map->buckets = new_buckets;
+
+ return 0;
+}
+
+static bool hashmap_find_entry(const struct hashmap *map,
+ const void *key, size_t hash,
+ struct hashmap_entry ***pprev,
+ struct hashmap_entry **entry)
+{
+ struct hashmap_entry *cur, **prev_ptr;
+
+ if (!map->buckets)
+ return false;
+
+ for (prev_ptr = &map->buckets[hash], cur = *prev_ptr;
+ cur;
+ prev_ptr = &cur->next, cur = cur->next) {
+ if (map->equal_fn(cur->key, key, map->ctx)) {
+ if (pprev)
+ *pprev = prev_ptr;
+ *entry = cur;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+ enum hashmap_insert_strategy strategy,
+ const void **old_key, void **old_value)
+{
+ struct hashmap_entry *entry;
+ size_t h;
+ int err;
+
+ if (old_key)
+ *old_key = NULL;
+ if (old_value)
+ *old_value = NULL;
+
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (strategy != HASHMAP_APPEND &&
+ hashmap_find_entry(map, key, h, NULL, &entry)) {
+ if (old_key)
+ *old_key = entry->key;
+ if (old_value)
+ *old_value = entry->value;
+
+ if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) {
+ entry->key = key;
+ entry->value = value;
+ return 0;
+ } else if (strategy == HASHMAP_ADD) {
+ return -EEXIST;
+ }
+ }
+
+ if (strategy == HASHMAP_UPDATE)
+ return -ENOENT;
+
+ if (hashmap_needs_to_grow(map)) {
+ err = hashmap_grow(map);
+ if (err)
+ return err;
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ }
+
+ entry = malloc(sizeof(struct hashmap_entry));
+ if (!entry)
+ return -ENOMEM;
+
+ entry->key = key;
+ entry->value = value;
+ hashmap_add_entry(&map->buckets[h], entry);
+ map->sz++;
+
+ return 0;
+}
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value)
+{
+ struct hashmap_entry *entry;
+ size_t h;
+
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (!hashmap_find_entry(map, key, h, NULL, &entry))
+ return false;
+
+ if (value)
+ *value = entry->value;
+ return true;
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+ const void **old_key, void **old_value)
+{
+ struct hashmap_entry **pprev, *entry;
+ size_t h;
+
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (!hashmap_find_entry(map, key, h, &pprev, &entry))
+ return false;
+
+ if (old_key)
+ *old_key = entry->key;
+ if (old_value)
+ *old_value = entry->value;
+
+ hashmap_del_entry(pprev, entry);
+ free(entry);
+ map->sz--;
+
+ return true;
+}
+
diff --git a/src/contrib/libbpf/bpf/hashmap.h b/src/contrib/libbpf/bpf/hashmap.h
new file mode 100644
index 0000000..bae8879
--- /dev/null
+++ b/src/contrib/libbpf/bpf/hashmap.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#ifndef __LIBBPF_HASHMAP_H
+#define __LIBBPF_HASHMAP_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#ifdef __GLIBC__
+#include <bits/wordsize.h>
+#else
+#include <bits/reg.h>
+#endif
+#include "libbpf_internal.h"
+
+static inline size_t hash_bits(size_t h, int bits)
+{
+ /* shuffle bits and return requested number of upper bits */
+ return (h * 11400714819323198485llu) >> (__WORDSIZE - bits);
+}
+
+typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
+typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
+
+struct hashmap_entry {
+ const void *key;
+ void *value;
+ struct hashmap_entry *next;
+};
+
+struct hashmap {
+ hashmap_hash_fn hash_fn;
+ hashmap_equal_fn equal_fn;
+ void *ctx;
+
+ struct hashmap_entry **buckets;
+ size_t cap;
+ size_t cap_bits;
+ size_t sz;
+};
+
+#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \
+ .hash_fn = (hash_fn), \
+ .equal_fn = (equal_fn), \
+ .ctx = (ctx), \
+ .buckets = NULL, \
+ .cap = 0, \
+ .cap_bits = 0, \
+ .sz = 0, \
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn, void *ctx);
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn,
+ void *ctx);
+void hashmap__clear(struct hashmap *map);
+void hashmap__free(struct hashmap *map);
+
+size_t hashmap__size(const struct hashmap *map);
+size_t hashmap__capacity(const struct hashmap *map);
+
+/*
+ * Hashmap insertion strategy:
+ * - HASHMAP_ADD - only add key/value if key doesn't exist yet;
+ * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise,
+ * update value;
+ * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do
+ * nothing and return -ENOENT;
+ * - HASHMAP_APPEND - always add key/value pair, even if key already exists.
+ * This turns hashmap into a multimap by allowing multiple values to be
+ * associated with the same key. Most useful read API for such hashmap is
+ * hashmap__for_each_key_entry() iteration. If hashmap__find() is still
+ * used, it will return last inserted key/value entry (first in a bucket
+ * chain).
+ */
+enum hashmap_insert_strategy {
+ HASHMAP_ADD,
+ HASHMAP_SET,
+ HASHMAP_UPDATE,
+ HASHMAP_APPEND,
+};
+
+/*
+ * hashmap__insert() adds key/value entry w/ various semantics, depending on
+ * provided strategy value. If a given key/value pair replaced already
+ * existing key/value pair, both old key and old value will be returned
+ * through old_key and old_value to allow calling code do proper memory
+ * management.
+ */
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+ enum hashmap_insert_strategy strategy,
+ const void **old_key, void **old_value);
+
+static inline int hashmap__add(struct hashmap *map,
+ const void *key, void *value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
+}
+
+static inline int hashmap__set(struct hashmap *map,
+ const void *key, void *value,
+ const void **old_key, void **old_value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_SET,
+ old_key, old_value);
+}
+
+static inline int hashmap__update(struct hashmap *map,
+ const void *key, void *value,
+ const void **old_key, void **old_value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_UPDATE,
+ old_key, old_value);
+}
+
+static inline int hashmap__append(struct hashmap *map,
+ const void *key, void *value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+ const void **old_key, void **old_value);
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value);
+
+/*
+ * hashmap__for_each_entry - iterate over all entries in hashmap
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry(map, cur, bkt) \
+ for (bkt = 0; bkt < map->cap; bkt++) \
+ for (cur = map->buckets[bkt]; cur; cur = cur->next)
+
+/*
+ * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe
+ * against removals
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @tmp: struct hashmap_entry * used as a temporary next cursor storage
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \
+ for (bkt = 0; bkt < map->cap; bkt++) \
+ for (cur = map->buckets[bkt]; \
+ cur && ({tmp = cur->next; true; }); \
+ cur = tmp)
+
+/*
+ * hashmap__for_each_key_entry - iterate over entries associated with given key
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @key: key to iterate entries for
+ */
+#define hashmap__for_each_key_entry(map, cur, _key) \
+ for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
+ map->cap_bits); \
+ map->buckets ? map->buckets[bkt] : NULL; }); \
+ cur; \
+ cur = cur->next) \
+ if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \
+ for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
+ map->cap_bits); \
+ cur = map->buckets ? map->buckets[bkt] : NULL; }); \
+ cur && ({ tmp = cur->next; true; }); \
+ cur = tmp) \
+ if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#endif /* __LIBBPF_HASHMAP_H */
diff --git a/src/contrib/libbpf/bpf/libbpf.c b/src/contrib/libbpf/bpf/libbpf.c
new file mode 100644
index 0000000..29d8d03
--- /dev/null
+++ b/src/contrib/libbpf/bpf/libbpf.c
@@ -0,0 +1,6581 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Common eBPF ELF object loading operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ * Copyright (C) 2017 Nicira, Inc.
+ * Copyright (C) 2019 Isovalent, Inc.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <libgen.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <endian.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <asm/unistd.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/filter.h>
+#include <linux/list.h>
+#include <linux/limits.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
+#include <linux/version.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <sys/utsname.h>
+#include <libelf.h>
+#include <gelf.h>
+
+#include "libbpf.h"
+#include "bpf.h"
+#include "btf.h"
+#include "str_error.h"
+#include "libbpf_internal.h"
+#include "hashmap.h"
+
+#ifndef EM_BPF
+#define EM_BPF 247
+#endif
+
+#ifndef BPF_FS_MAGIC
+#define BPF_FS_MAGIC 0xcafe4a11
+#endif
+
+/* vsprintf() in __base_pr() uses nonliteral format string. It may break
+ * compilation if user enables corresponding warning. Disable it explicitly.
+ */
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+
+#define __printf(a, b) __attribute__((format(printf, a, b)))
+
+static int __base_pr(enum libbpf_print_level level, const char *format,
+ va_list args)
+{
+ if (level == LIBBPF_DEBUG)
+ return 0;
+
+ return vfprintf(stderr, format, args);
+}
+
+static libbpf_print_fn_t __libbpf_pr = __base_pr;
+
+libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
+{
+ libbpf_print_fn_t old_print_fn = __libbpf_pr;
+
+ __libbpf_pr = fn;
+ return old_print_fn;
+}
+
+__printf(2, 3)
+void libbpf_print(enum libbpf_print_level level, const char *format, ...)
+{
+ va_list args;
+
+ if (!__libbpf_pr)
+ return;
+
+ va_start(args, format);
+ __libbpf_pr(level, format, args);
+ va_end(args);
+}
+
+#define STRERR_BUFSIZE 128
+
+#define CHECK_ERR(action, err, out) do { \
+ err = action; \
+ if (err) \
+ goto out; \
+} while (0)
+
+
+/* Copied from tools/perf/util/util.h */
+#ifndef zfree
+# define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
+#endif
+
+#ifndef zclose
+# define zclose(fd) ({ \
+ int ___err = 0; \
+ if ((fd) >= 0) \
+ ___err = close((fd)); \
+ fd = -1; \
+ ___err; })
+#endif
+
+#ifdef HAVE_LIBELF_MMAP_SUPPORT
+# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP
+#else
+# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
+#endif
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+struct bpf_capabilities {
+ /* v4.14: kernel support for program & map names. */
+ __u32 name:1;
+ /* v5.2: kernel support for global data sections. */
+ __u32 global_data:1;
+ /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
+ __u32 btf_func:1;
+ /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
+ __u32 btf_datasec:1;
+ /* BPF_F_MMAPABLE is supported for arrays */
+ __u32 array_mmap:1;
+};
+
+/*
+ * bpf_prog should be a better name but it has been used in
+ * linux/filter.h.
+ */
+struct bpf_program {
+ /* Index in elf obj file, for relocation use. */
+ int idx;
+ char *name;
+ int prog_ifindex;
+ char *section_name;
+ /* section_name with / replaced by _; makes recursive pinning
+ * in bpf_object__pin_programs easier
+ */
+ char *pin_name;
+ struct bpf_insn *insns;
+ size_t insns_cnt, main_prog_cnt;
+ enum bpf_prog_type type;
+
+ struct reloc_desc {
+ enum {
+ RELO_LD64,
+ RELO_CALL,
+ RELO_DATA,
+ } type;
+ int insn_idx;
+ int map_idx;
+ int sym_off;
+ } *reloc_desc;
+ int nr_reloc;
+ int log_level;
+
+ struct {
+ int nr;
+ int *fds;
+ } instances;
+ bpf_program_prep_t preprocessor;
+
+ struct bpf_object *obj;
+ void *priv;
+ bpf_program_clear_priv_t clear_priv;
+
+ enum bpf_attach_type expected_attach_type;
+ __u32 attach_btf_id;
+ __u32 attach_prog_fd;
+ void *func_info;
+ __u32 func_info_rec_size;
+ __u32 func_info_cnt;
+
+ struct bpf_capabilities *caps;
+
+ void *line_info;
+ __u32 line_info_rec_size;
+ __u32 line_info_cnt;
+ __u32 prog_flags;
+};
+
+enum libbpf_map_type {
+ LIBBPF_MAP_UNSPEC,
+ LIBBPF_MAP_DATA,
+ LIBBPF_MAP_BSS,
+ LIBBPF_MAP_RODATA,
+};
+
+static const char * const libbpf_type_to_btf_name[] = {
+ [LIBBPF_MAP_DATA] = ".data",
+ [LIBBPF_MAP_BSS] = ".bss",
+ [LIBBPF_MAP_RODATA] = ".rodata",
+};
+
+struct bpf_map {
+ int fd;
+ char *name;
+ int sec_idx;
+ size_t sec_offset;
+ int map_ifindex;
+ int inner_map_fd;
+ struct bpf_map_def def;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
+ void *priv;
+ bpf_map_clear_priv_t clear_priv;
+ enum libbpf_map_type libbpf_type;
+ char *pin_path;
+ bool pinned;
+ bool reused;
+};
+
+struct bpf_secdata {
+ void *rodata;
+ void *data;
+};
+
+static LIST_HEAD(bpf_objects_list);
+
+struct bpf_object {
+ char name[BPF_OBJ_NAME_LEN];
+ char license[64];
+ __u32 kern_version;
+
+ struct bpf_program *programs;
+ size_t nr_programs;
+ struct bpf_map *maps;
+ size_t nr_maps;
+ size_t maps_cap;
+ struct bpf_secdata sections;
+
+ bool loaded;
+ bool has_pseudo_calls;
+ bool relaxed_core_relocs;
+
+ /*
+ * Information when doing elf related work. Only valid if fd
+ * is valid.
+ */
+ struct {
+ int fd;
+ const void *obj_buf;
+ size_t obj_buf_sz;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ Elf_Data *symbols;
+ Elf_Data *data;
+ Elf_Data *rodata;
+ Elf_Data *bss;
+ size_t strtabidx;
+ struct {
+ GElf_Shdr shdr;
+ Elf_Data *data;
+ } *reloc_sects;
+ int nr_reloc_sects;
+ int maps_shndx;
+ int btf_maps_shndx;
+ int text_shndx;
+ int data_shndx;
+ int rodata_shndx;
+ int bss_shndx;
+ } efile;
+ /*
+ * All loaded bpf_object is linked in a list, which is
+ * hidden to caller. bpf_objects__<func> handlers deal with
+ * all objects.
+ */
+ struct list_head list;
+
+ struct btf *btf;
+ struct btf_ext *btf_ext;
+
+ void *priv;
+ bpf_object_clear_priv_t clear_priv;
+
+ struct bpf_capabilities caps;
+
+ char path[];
+};
+#define obj_elf_valid(o) ((o)->efile.elf)
+
+void bpf_program__unload(struct bpf_program *prog)
+{
+ int i;
+
+ if (!prog)
+ return;
+
+ /*
+ * If the object is opened but the program was never loaded,
+ * it is possible that prog->instances.nr == -1.
+ */
+ if (prog->instances.nr > 0) {
+ for (i = 0; i < prog->instances.nr; i++)
+ zclose(prog->instances.fds[i]);
+ } else if (prog->instances.nr != -1) {
+ pr_warn("Internal error: instances.nr is %d\n",
+ prog->instances.nr);
+ }
+
+ prog->instances.nr = -1;
+ zfree(&prog->instances.fds);
+
+ zfree(&prog->func_info);
+ zfree(&prog->line_info);
+}
+
+static void bpf_program__exit(struct bpf_program *prog)
+{
+ if (!prog)
+ return;
+
+ if (prog->clear_priv)
+ prog->clear_priv(prog, prog->priv);
+
+ prog->priv = NULL;
+ prog->clear_priv = NULL;
+
+ bpf_program__unload(prog);
+ zfree(&prog->name);
+ zfree(&prog->section_name);
+ zfree(&prog->pin_name);
+ zfree(&prog->insns);
+ zfree(&prog->reloc_desc);
+
+ prog->nr_reloc = 0;
+ prog->insns_cnt = 0;
+ prog->idx = -1;
+}
+
+static char *__bpf_program__pin_name(struct bpf_program *prog)
+{
+ char *name, *p;
+
+ name = p = strdup(prog->section_name);
+ while ((p = strchr(p, '/')))
+ *p = '_';
+
+ return name;
+}
+
+static int
+bpf_program__init(void *data, size_t size, char *section_name, int idx,
+ struct bpf_program *prog)
+{
+ const size_t bpf_insn_sz = sizeof(struct bpf_insn);
+
+ if (size == 0 || size % bpf_insn_sz) {
+ pr_warn("corrupted section '%s', size: %zu\n",
+ section_name, size);
+ return -EINVAL;
+ }
+
+ memset(prog, 0, sizeof(*prog));
+
+ prog->section_name = strdup(section_name);
+ if (!prog->section_name) {
+ pr_warn("failed to alloc name for prog under section(%d) %s\n",
+ idx, section_name);
+ goto errout;
+ }
+
+ prog->pin_name = __bpf_program__pin_name(prog);
+ if (!prog->pin_name) {
+ pr_warn("failed to alloc pin name for prog under section(%d) %s\n",
+ idx, section_name);
+ goto errout;
+ }
+
+ prog->insns = malloc(size);
+ if (!prog->insns) {
+ pr_warn("failed to alloc insns for prog under section %s\n",
+ section_name);
+ goto errout;
+ }
+ prog->insns_cnt = size / bpf_insn_sz;
+ memcpy(prog->insns, data, size);
+ prog->idx = idx;
+ prog->instances.fds = NULL;
+ prog->instances.nr = -1;
+ prog->type = BPF_PROG_TYPE_UNSPEC;
+
+ return 0;
+errout:
+ bpf_program__exit(prog);
+ return -ENOMEM;
+}
+
+static int
+bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
+ char *section_name, int idx)
+{
+ struct bpf_program prog, *progs;
+ int nr_progs, err;
+
+ err = bpf_program__init(data, size, section_name, idx, &prog);
+ if (err)
+ return err;
+
+ prog.caps = &obj->caps;
+ progs = obj->programs;
+ nr_progs = obj->nr_programs;
+
+ progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
+ if (!progs) {
+ /*
+ * In this case the original obj->programs
+ * is still valid, so don't need special treat for
+ * bpf_close_object().
+ */
+ pr_warn("failed to alloc a new program under section '%s'\n",
+ section_name);
+ bpf_program__exit(&prog);
+ return -ENOMEM;
+ }
+
+ pr_debug("found program %s\n", prog.section_name);
+ obj->programs = progs;
+ obj->nr_programs = nr_progs + 1;
+ prog.obj = obj;
+ progs[nr_progs] = prog;
+ return 0;
+}
+
+static int
+bpf_object__init_prog_names(struct bpf_object *obj)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ struct bpf_program *prog;
+ size_t pi, si;
+
+ for (pi = 0; pi < obj->nr_programs; pi++) {
+ const char *name = NULL;
+
+ prog = &obj->programs[pi];
+
+ for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
+ si++) {
+ GElf_Sym sym;
+
+ if (!gelf_getsym(symbols, si, &sym))
+ continue;
+ if (sym.st_shndx != prog->idx)
+ continue;
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
+ continue;
+
+ name = elf_strptr(obj->efile.elf,
+ obj->efile.strtabidx,
+ sym.st_name);
+ if (!name) {
+ pr_warn("failed to get sym name string for prog %s\n",
+ prog->section_name);
+ return -LIBBPF_ERRNO__LIBELF;
+ }
+ }
+
+ if (!name && prog->idx == obj->efile.text_shndx)
+ name = ".text";
+
+ if (!name) {
+ pr_warn("failed to find sym for prog %s\n",
+ prog->section_name);
+ return -EINVAL;
+ }
+
+ prog->name = strdup(name);
+ if (!prog->name) {
+ pr_warn("failed to allocate memory for prog sym %s\n",
+ name);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static __u32 get_kernel_version(void)
+{
+ __u32 major, minor, patch;
+ struct utsname info;
+
+ uname(&info);
+ if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
+ return 0;
+ return KERNEL_VERSION(major, minor, patch);
+}
+
+static struct bpf_object *bpf_object__new(const char *path,
+ const void *obj_buf,
+ size_t obj_buf_sz,
+ const char *obj_name)
+{
+ struct bpf_object *obj;
+ char *end;
+
+ obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
+ if (!obj) {
+ pr_warn("alloc memory failed for %s\n", path);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ strcpy(obj->path, path);
+ if (obj_name) {
+ strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
+ obj->name[sizeof(obj->name) - 1] = 0;
+ } else {
+ /* Using basename() GNU version which doesn't modify arg. */
+ strncpy(obj->name, basename((void *)path),
+ sizeof(obj->name) - 1);
+ end = strchr(obj->name, '.');
+ if (end)
+ *end = 0;
+ }
+
+ obj->efile.fd = -1;
+ /*
+ * Caller of this function should also call
+ * bpf_object__elf_finish() after data collection to return
+ * obj_buf to user. If not, we should duplicate the buffer to
+ * avoid user freeing them before elf finish.
+ */
+ obj->efile.obj_buf = obj_buf;
+ obj->efile.obj_buf_sz = obj_buf_sz;
+ obj->efile.maps_shndx = -1;
+ obj->efile.btf_maps_shndx = -1;
+ obj->efile.data_shndx = -1;
+ obj->efile.rodata_shndx = -1;
+ obj->efile.bss_shndx = -1;
+
+ obj->kern_version = get_kernel_version();
+ obj->loaded = false;
+
+ INIT_LIST_HEAD(&obj->list);
+ list_add(&obj->list, &bpf_objects_list);
+ return obj;
+}
+
+static void bpf_object__elf_finish(struct bpf_object *obj)
+{
+ if (!obj_elf_valid(obj))
+ return;
+
+ if (obj->efile.elf) {
+ elf_end(obj->efile.elf);
+ obj->efile.elf = NULL;
+ }
+ obj->efile.symbols = NULL;
+ obj->efile.data = NULL;
+ obj->efile.rodata = NULL;
+ obj->efile.bss = NULL;
+
+ zfree(&obj->efile.reloc_sects);
+ obj->efile.nr_reloc_sects = 0;
+ zclose(obj->efile.fd);
+ obj->efile.obj_buf = NULL;
+ obj->efile.obj_buf_sz = 0;
+}
+
+static int bpf_object__elf_init(struct bpf_object *obj)
+{
+ int err = 0;
+ GElf_Ehdr *ep;
+
+ if (obj_elf_valid(obj)) {
+ pr_warn("elf init: internal error\n");
+ return -LIBBPF_ERRNO__LIBELF;
+ }
+
+ if (obj->efile.obj_buf_sz > 0) {
+ /*
+ * obj_buf should have been validated by
+ * bpf_object__open_buffer().
+ */
+ obj->efile.elf = elf_memory((char *)obj->efile.obj_buf,
+ obj->efile.obj_buf_sz);
+ } else {
+ obj->efile.fd = open(obj->path, O_RDONLY);
+ if (obj->efile.fd < 0) {
+ char errmsg[STRERR_BUFSIZE], *cp;
+
+ err = -errno;
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+ pr_warn("failed to open %s: %s\n", obj->path, cp);
+ return err;
+ }
+
+ obj->efile.elf = elf_begin(obj->efile.fd,
+ LIBBPF_ELF_C_READ_MMAP, NULL);
+ }
+
+ if (!obj->efile.elf) {
+ pr_warn("failed to open %s as ELF file\n", obj->path);
+ err = -LIBBPF_ERRNO__LIBELF;
+ goto errout;
+ }
+
+ if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
+ pr_warn("failed to get EHDR from %s\n", obj->path);
+ err = -LIBBPF_ERRNO__FORMAT;
+ goto errout;
+ }
+ ep = &obj->efile.ehdr;
+
+ /* Old LLVM set e_machine to EM_NONE */
+ if (ep->e_type != ET_REL ||
+ (ep->e_machine && ep->e_machine != EM_BPF)) {
+ pr_warn("%s is not an eBPF object file\n", obj->path);
+ err = -LIBBPF_ERRNO__FORMAT;
+ goto errout;
+ }
+
+ return 0;
+errout:
+ bpf_object__elf_finish(obj);
+ return err;
+}
+
+static int bpf_object__check_endianness(struct bpf_object *obj)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
+ return 0;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
+ return 0;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+ pr_warn("endianness mismatch.\n");
+ return -LIBBPF_ERRNO__ENDIAN;
+}
+
+static int
+bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
+{
+ memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
+ pr_debug("license of %s is %s\n", obj->path, obj->license);
+ return 0;
+}
+
+static int
+bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
+{
+ __u32 kver;
+
+ if (size != sizeof(kver)) {
+ pr_warn("invalid kver section in %s\n", obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ memcpy(&kver, data, sizeof(kver));
+ obj->kern_version = kver;
+ pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
+ return 0;
+}
+
+static int compare_bpf_map(const void *_a, const void *_b)
+{
+ const struct bpf_map *a = _a;
+ const struct bpf_map *b = _b;
+
+ if (a->sec_idx != b->sec_idx)
+ return a->sec_idx - b->sec_idx;
+ return a->sec_offset - b->sec_offset;
+}
+
+static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
+{
+ if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+ type == BPF_MAP_TYPE_HASH_OF_MAPS)
+ return true;
+ return false;
+}
+
+static int bpf_object_search_section_size(const struct bpf_object *obj,
+ const char *name, size_t *d_size)
+{
+ const GElf_Ehdr *ep = &obj->efile.ehdr;
+ Elf *elf = obj->efile.elf;
+ Elf_Scn *scn = NULL;
+ int idx = 0;
+
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ const char *sec_name;
+ Elf_Data *data;
+ GElf_Shdr sh;
+
+ idx++;
+ if (gelf_getshdr(scn, &sh) != &sh) {
+ pr_warn("failed to get section(%d) header from %s\n",
+ idx, obj->path);
+ return -EIO;
+ }
+
+ sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
+ if (!sec_name) {
+ pr_warn("failed to get section(%d) name from %s\n",
+ idx, obj->path);
+ return -EIO;
+ }
+
+ if (strcmp(name, sec_name))
+ continue;
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ pr_warn("failed to get section(%d) data from %s(%s)\n",
+ idx, name, obj->path);
+ return -EIO;
+ }
+
+ *d_size = data->d_size;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+int bpf_object__section_size(const struct bpf_object *obj, const char *name,
+ __u32 *size)
+{
+ int ret = -ENOENT;
+ size_t d_size;
+
+ *size = 0;
+ if (!name) {
+ return -EINVAL;
+ } else if (!strcmp(name, ".data")) {
+ if (obj->efile.data)
+ *size = obj->efile.data->d_size;
+ } else if (!strcmp(name, ".bss")) {
+ if (obj->efile.bss)
+ *size = obj->efile.bss->d_size;
+ } else if (!strcmp(name, ".rodata")) {
+ if (obj->efile.rodata)
+ *size = obj->efile.rodata->d_size;
+ } else {
+ ret = bpf_object_search_section_size(obj, name, &d_size);
+ if (!ret)
+ *size = d_size;
+ }
+
+ return *size ? 0 : ret;
+}
+
+int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
+ __u32 *off)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ const char *sname;
+ size_t si;
+
+ if (!name || !off)
+ return -EINVAL;
+
+ for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
+ GElf_Sym sym;
+
+ if (!gelf_getsym(symbols, si, &sym))
+ continue;
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
+ GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
+ continue;
+
+ sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+ sym.st_name);
+ if (!sname) {
+ pr_warn("failed to get sym name string for var %s\n",
+ name);
+ return -EIO;
+ }
+ if (strcmp(name, sname) == 0) {
+ *off = sym.st_value;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
+{
+ struct bpf_map *new_maps;
+ size_t new_cap;
+ int i;
+
+ if (obj->nr_maps < obj->maps_cap)
+ return &obj->maps[obj->nr_maps++];
+
+ new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
+ new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));
+ if (!new_maps) {
+ pr_warn("alloc maps for object failed\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ obj->maps_cap = new_cap;
+ obj->maps = new_maps;
+
+ /* zero out new maps */
+ memset(obj->maps + obj->nr_maps, 0,
+ (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
+ /*
+ * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
+ * when failure (zclose won't close negative fd)).
+ */
+ for (i = obj->nr_maps; i < obj->maps_cap; i++) {
+ obj->maps[i].fd = -1;
+ obj->maps[i].inner_map_fd = -1;
+ }
+
+ return &obj->maps[obj->nr_maps++];
+}
+
+static int
+bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
+ int sec_idx, Elf_Data *data, void **data_buff)
+{
+ char map_name[BPF_OBJ_NAME_LEN];
+ struct bpf_map_def *def;
+ struct bpf_map *map;
+
+ map = bpf_object__add_map(obj);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ map->libbpf_type = type;
+ map->sec_idx = sec_idx;
+ map->sec_offset = 0;
+ snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name,
+ libbpf_type_to_btf_name[type]);
+ map->name = strdup(map_name);
+ if (!map->name) {
+ pr_warn("failed to alloc map name\n");
+ return -ENOMEM;
+ }
+
+ def = &map->def;
+ def->type = BPF_MAP_TYPE_ARRAY;
+ def->key_size = sizeof(int);
+ def->value_size = data->d_size;
+ def->max_entries = 1;
+ def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0;
+ if (obj->caps.array_mmap)
+ def->map_flags |= BPF_F_MMAPABLE;
+
+ pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
+ map_name, map->sec_idx, map->sec_offset, def->map_flags);
+
+ if (data_buff) {
+ *data_buff = malloc(data->d_size);
+ if (!*data_buff) {
+ zfree(&map->name);
+ pr_warn("failed to alloc map content buffer\n");
+ return -ENOMEM;
+ }
+ memcpy(*data_buff, data->d_buf, data->d_size);
+ }
+
+ pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
+ return 0;
+}
+
+static int bpf_object__init_global_data_maps(struct bpf_object *obj)
+{
+ int err;
+
+ if (!obj->caps.global_data)
+ return 0;
+ /*
+ * Populate obj->maps with libbpf internal maps.
+ */
+ if (obj->efile.data_shndx >= 0) {
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
+ obj->efile.data_shndx,
+ obj->efile.data,
+ &obj->sections.data);
+ if (err)
+ return err;
+ }
+ if (obj->efile.rodata_shndx >= 0) {
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
+ obj->efile.rodata_shndx,
+ obj->efile.rodata,
+ &obj->sections.rodata);
+ if (err)
+ return err;
+ }
+ if (obj->efile.bss_shndx >= 0) {
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
+ obj->efile.bss_shndx,
+ obj->efile.bss, NULL);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ int i, map_def_sz = 0, nr_maps = 0, nr_syms;
+ Elf_Data *data = NULL;
+ Elf_Scn *scn;
+
+ if (obj->efile.maps_shndx < 0)
+ return 0;
+
+ if (!symbols)
+ return -EINVAL;
+
+ scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
+ if (scn)
+ data = elf_getdata(scn, NULL);
+ if (!scn || !data) {
+ pr_warn("failed to get Elf_Data from map section %d\n",
+ obj->efile.maps_shndx);
+ return -EINVAL;
+ }
+
+ /*
+ * Count number of maps. Each map has a name.
+ * Array of maps is not supported: only the first element is
+ * considered.
+ *
+ * TODO: Detect array of map and report error.
+ */
+ nr_syms = symbols->d_size / sizeof(GElf_Sym);
+ for (i = 0; i < nr_syms; i++) {
+ GElf_Sym sym;
+
+ if (!gelf_getsym(symbols, i, &sym))
+ continue;
+ if (sym.st_shndx != obj->efile.maps_shndx)
+ continue;
+ nr_maps++;
+ }
+ /* Assume equally sized map definitions */
+ pr_debug("maps in %s: %d maps in %zd bytes\n",
+ obj->path, nr_maps, data->d_size);
+
+ if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
+ pr_warn("unable to determine map definition size section %s, %d maps in %zd bytes\n",
+ obj->path, nr_maps, data->d_size);
+ return -EINVAL;
+ }
+ map_def_sz = data->d_size / nr_maps;
+
+ /* Fill obj->maps using data in "maps" section. */
+ for (i = 0; i < nr_syms; i++) {
+ GElf_Sym sym;
+ const char *map_name;
+ struct bpf_map_def *def;
+ struct bpf_map *map;
+
+ if (!gelf_getsym(symbols, i, &sym))
+ continue;
+ if (sym.st_shndx != obj->efile.maps_shndx)
+ continue;
+
+ map = bpf_object__add_map(obj);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+ sym.st_name);
+ if (!map_name) {
+ pr_warn("failed to get map #%d name sym string for obj %s\n",
+ i, obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ map->libbpf_type = LIBBPF_MAP_UNSPEC;
+ map->sec_idx = sym.st_shndx;
+ map->sec_offset = sym.st_value;
+ pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
+ map_name, map->sec_idx, map->sec_offset);
+ if (sym.st_value + map_def_sz > data->d_size) {
+ pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
+ obj->path, map_name);
+ return -EINVAL;
+ }
+
+ map->name = strdup(map_name);
+ if (!map->name) {
+ pr_warn("failed to alloc map name\n");
+ return -ENOMEM;
+ }
+ pr_debug("map %d is \"%s\"\n", i, map->name);
+ def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
+ /*
+ * If the definition of the map in the object file fits in
+ * bpf_map_def, copy it. Any extra fields in our version
+ * of bpf_map_def will default to zero as a result of the
+ * calloc above.
+ */
+ if (map_def_sz <= sizeof(struct bpf_map_def)) {
+ memcpy(&map->def, def, map_def_sz);
+ } else {
+ /*
+ * Here the map structure being read is bigger than what
+ * we expect, truncate if the excess bits are all zero.
+ * If they are not zero, reject this map as
+ * incompatible.
+ */
+ char *b;
+
+ for (b = ((char *)def) + sizeof(struct bpf_map_def);
+ b < ((char *)def) + map_def_sz; b++) {
+ if (*b != 0) {
+ pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
+ obj->path, map_name);
+ if (strict)
+ return -EINVAL;
+ }
+ }
+ memcpy(&map->def, def, sizeof(struct bpf_map_def));
+ }
+ }
+ return 0;
+}
+
+static const struct btf_type *
+skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
+{
+ const struct btf_type *t = btf__type_by_id(btf, id);
+
+ if (res_id)
+ *res_id = id;
+
+ while (btf_is_mod(t) || btf_is_typedef(t)) {
+ if (res_id)
+ *res_id = t->type;
+ t = btf__type_by_id(btf, t->type);
+ }
+
+ return t;
+}
+
+/*
+ * Fetch integer attribute of BTF map definition. Such attributes are
+ * represented using a pointer to an array, in which dimensionality of array
+ * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
+ * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
+ * type definition, while using only sizeof(void *) space in ELF data section.
+ */
+static bool get_map_field_int(const char *map_name, const struct btf *btf,
+ const struct btf_type *def,
+ const struct btf_member *m, __u32 *res)
+{
+ const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
+ const char *name = btf__name_by_offset(btf, m->name_off);
+ const struct btf_array *arr_info;
+ const struct btf_type *arr_t;
+
+ if (!btf_is_ptr(t)) {
+ pr_warn("map '%s': attr '%s': expected PTR, got %u.\n",
+ map_name, name, btf_kind(t));
+ return false;
+ }
+
+ arr_t = btf__type_by_id(btf, t->type);
+ if (!arr_t) {
+ pr_warn("map '%s': attr '%s': type [%u] not found.\n",
+ map_name, name, t->type);
+ return false;
+ }
+ if (!btf_is_array(arr_t)) {
+ pr_warn("map '%s': attr '%s': expected ARRAY, got %u.\n",
+ map_name, name, btf_kind(arr_t));
+ return false;
+ }
+ arr_info = btf_array(arr_t);
+ *res = arr_info->nelems;
+ return true;
+}
+
+static int build_map_pin_path(struct bpf_map *map, const char *path)
+{
+ char buf[PATH_MAX];
+ int err, len;
+
+ if (!path)
+ path = "/sys/fs/bpf";
+
+ len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
+ if (len < 0)
+ return -EINVAL;
+ else if (len >= PATH_MAX)
+ return -ENAMETOOLONG;
+
+ err = bpf_map__set_pin_path(map, buf);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int bpf_object__init_user_btf_map(struct bpf_object *obj,
+ const struct btf_type *sec,
+ int var_idx, int sec_idx,
+ const Elf_Data *data, bool strict,
+ const char *pin_root_path)
+{
+ const struct btf_type *var, *def, *t;
+ const struct btf_var_secinfo *vi;
+ const struct btf_var *var_extra;
+ const struct btf_member *m;
+ const char *map_name;
+ struct bpf_map *map;
+ int vlen, i;
+
+ vi = btf_var_secinfos(sec) + var_idx;
+ var = btf__type_by_id(obj->btf, vi->type);
+ var_extra = btf_var(var);
+ map_name = btf__name_by_offset(obj->btf, var->name_off);
+ vlen = btf_vlen(var);
+
+ if (map_name == NULL || map_name[0] == '\0') {
+ pr_warn("map #%d: empty name.\n", var_idx);
+ return -EINVAL;
+ }
+ if ((__u64)vi->offset + vi->size > data->d_size) {
+ pr_warn("map '%s' BTF data is corrupted.\n", map_name);
+ return -EINVAL;
+ }
+ if (!btf_is_var(var)) {
+ pr_warn("map '%s': unexpected var kind %u.\n",
+ map_name, btf_kind(var));
+ return -EINVAL;
+ }
+ if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
+ var_extra->linkage != BTF_VAR_STATIC) {
+ pr_warn("map '%s': unsupported var linkage %u.\n",
+ map_name, var_extra->linkage);
+ return -EOPNOTSUPP;
+ }
+
+ def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
+ if (!btf_is_struct(def)) {
+ pr_warn("map '%s': unexpected def kind %u.\n",
+ map_name, btf_kind(var));
+ return -EINVAL;
+ }
+ if (def->size > vi->size) {
+ pr_warn("map '%s': invalid def size.\n", map_name);
+ return -EINVAL;
+ }
+
+ map = bpf_object__add_map(obj);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+ map->name = strdup(map_name);
+ if (!map->name) {
+ pr_warn("map '%s': failed to alloc map name.\n", map_name);
+ return -ENOMEM;
+ }
+ map->libbpf_type = LIBBPF_MAP_UNSPEC;
+ map->def.type = BPF_MAP_TYPE_UNSPEC;
+ map->sec_idx = sec_idx;
+ map->sec_offset = vi->offset;
+ pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
+ map_name, map->sec_idx, map->sec_offset);
+
+ vlen = btf_vlen(def);
+ m = btf_members(def);
+ for (i = 0; i < vlen; i++, m++) {
+ const char *name = btf__name_by_offset(obj->btf, m->name_off);
+
+ if (!name) {
+ pr_warn("map '%s': invalid field #%d.\n", map_name, i);
+ return -EINVAL;
+ }
+ if (strcmp(name, "type") == 0) {
+ if (!get_map_field_int(map_name, obj->btf, def, m,
+ &map->def.type))
+ return -EINVAL;
+ pr_debug("map '%s': found type = %u.\n",
+ map_name, map->def.type);
+ } else if (strcmp(name, "max_entries") == 0) {
+ if (!get_map_field_int(map_name, obj->btf, def, m,
+ &map->def.max_entries))
+ return -EINVAL;
+ pr_debug("map '%s': found max_entries = %u.\n",
+ map_name, map->def.max_entries);
+ } else if (strcmp(name, "map_flags") == 0) {
+ if (!get_map_field_int(map_name, obj->btf, def, m,
+ &map->def.map_flags))
+ return -EINVAL;
+ pr_debug("map '%s': found map_flags = %u.\n",
+ map_name, map->def.map_flags);
+ } else if (strcmp(name, "key_size") == 0) {
+ __u32 sz;
+
+ if (!get_map_field_int(map_name, obj->btf, def, m,
+ &sz))
+ return -EINVAL;
+ pr_debug("map '%s': found key_size = %u.\n",
+ map_name, sz);
+ if (map->def.key_size && map->def.key_size != sz) {
+ pr_warn("map '%s': conflicting key size %u != %u.\n",
+ map_name, map->def.key_size, sz);
+ return -EINVAL;
+ }
+ map->def.key_size = sz;
+ } else if (strcmp(name, "key") == 0) {
+ __s64 sz;
+
+ t = btf__type_by_id(obj->btf, m->type);
+ if (!t) {
+ pr_warn("map '%s': key type [%d] not found.\n",
+ map_name, m->type);
+ return -EINVAL;
+ }
+ if (!btf_is_ptr(t)) {
+ pr_warn("map '%s': key spec is not PTR: %u.\n",
+ map_name, btf_kind(t));
+ return -EINVAL;
+ }
+ sz = btf__resolve_size(obj->btf, t->type);
+ if (sz < 0) {
+ pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n",
+ map_name, t->type, sz);
+ return sz;
+ }
+ pr_debug("map '%s': found key [%u], sz = %lld.\n",
+ map_name, t->type, sz);
+ if (map->def.key_size && map->def.key_size != sz) {
+ pr_warn("map '%s': conflicting key size %u != %lld.\n",
+ map_name, map->def.key_size, sz);
+ return -EINVAL;
+ }
+ map->def.key_size = sz;
+ map->btf_key_type_id = t->type;
+ } else if (strcmp(name, "value_size") == 0) {
+ __u32 sz;
+
+ if (!get_map_field_int(map_name, obj->btf, def, m,
+ &sz))
+ return -EINVAL;
+ pr_debug("map '%s': found value_size = %u.\n",
+ map_name, sz);
+ if (map->def.value_size && map->def.value_size != sz) {
+ pr_warn("map '%s': conflicting value size %u != %u.\n",
+ map_name, map->def.value_size, sz);
+ return -EINVAL;
+ }
+ map->def.value_size = sz;
+ } else if (strcmp(name, "value") == 0) {
+ __s64 sz;
+
+ t = btf__type_by_id(obj->btf, m->type);
+ if (!t) {
+ pr_warn("map '%s': value type [%d] not found.\n",
+ map_name, m->type);
+ return -EINVAL;
+ }
+ if (!btf_is_ptr(t)) {
+ pr_warn("map '%s': value spec is not PTR: %u.\n",
+ map_name, btf_kind(t));
+ return -EINVAL;
+ }
+ sz = btf__resolve_size(obj->btf, t->type);
+ if (sz < 0) {
+ pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n",
+ map_name, t->type, sz);
+ return sz;
+ }
+ pr_debug("map '%s': found value [%u], sz = %lld.\n",
+ map_name, t->type, sz);
+ if (map->def.value_size && map->def.value_size != sz) {
+ pr_warn("map '%s': conflicting value size %u != %lld.\n",
+ map_name, map->def.value_size, sz);
+ return -EINVAL;
+ }
+ map->def.value_size = sz;
+ map->btf_value_type_id = t->type;
+ } else if (strcmp(name, "pinning") == 0) {
+ __u32 val;
+ int err;
+
+ if (!get_map_field_int(map_name, obj->btf, def, m,
+ &val))
+ return -EINVAL;
+ pr_debug("map '%s': found pinning = %u.\n",
+ map_name, val);
+
+ if (val != LIBBPF_PIN_NONE &&
+ val != LIBBPF_PIN_BY_NAME) {
+ pr_warn("map '%s': invalid pinning value %u.\n",
+ map_name, val);
+ return -EINVAL;
+ }
+ if (val == LIBBPF_PIN_BY_NAME) {
+ err = build_map_pin_path(map, pin_root_path);
+ if (err) {
+ pr_warn("map '%s': couldn't build pin path.\n",
+ map_name);
+ return err;
+ }
+ }
+ } else {
+ if (strict) {
+ pr_warn("map '%s': unknown field '%s'.\n",
+ map_name, name);
+ return -ENOTSUP;
+ }
+ pr_debug("map '%s': ignoring unknown field '%s'.\n",
+ map_name, name);
+ }
+ }
+
+ if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
+ pr_warn("map '%s': map type isn't specified.\n", map_name);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
+ const char *pin_root_path)
+{
+ const struct btf_type *sec = NULL;
+ int nr_types, i, vlen, err;
+ const struct btf_type *t;
+ const char *name;
+ Elf_Data *data;
+ Elf_Scn *scn;
+
+ if (obj->efile.btf_maps_shndx < 0)
+ return 0;
+
+ scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx);
+ if (scn)
+ data = elf_getdata(scn, NULL);
+ if (!scn || !data) {
+ pr_warn("failed to get Elf_Data from map section %d (%s)\n",
+ obj->efile.maps_shndx, MAPS_ELF_SEC);
+ return -EINVAL;
+ }
+
+ nr_types = btf__get_nr_types(obj->btf);
+ for (i = 1; i <= nr_types; i++) {
+ t = btf__type_by_id(obj->btf, i);
+ if (!btf_is_datasec(t))
+ continue;
+ name = btf__name_by_offset(obj->btf, t->name_off);
+ if (strcmp(name, MAPS_ELF_SEC) == 0) {
+ sec = t;
+ break;
+ }
+ }
+
+ if (!sec) {
+ pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
+ return -ENOENT;
+ }
+
+ vlen = btf_vlen(sec);
+ for (i = 0; i < vlen; i++) {
+ err = bpf_object__init_user_btf_map(obj, sec, i,
+ obj->efile.btf_maps_shndx,
+ data, strict,
+ pin_root_path);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps,
+ const char *pin_root_path)
+{
+ bool strict = !relaxed_maps;
+ int err;
+
+ err = bpf_object__init_user_maps(obj, strict);
+ if (err)
+ return err;
+
+ err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
+ if (err)
+ return err;
+
+ err = bpf_object__init_global_data_maps(obj);
+ if (err)
+ return err;
+
+ if (obj->nr_maps) {
+ qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]),
+ compare_bpf_map);
+ }
+ return 0;
+}
+
+static bool section_have_execinstr(struct bpf_object *obj, int idx)
+{
+ Elf_Scn *scn;
+ GElf_Shdr sh;
+
+ scn = elf_getscn(obj->efile.elf, idx);
+ if (!scn)
+ return false;
+
+ if (gelf_getshdr(scn, &sh) != &sh)
+ return false;
+
+ if (sh.sh_flags & SHF_EXECINSTR)
+ return true;
+
+ return false;
+}
+
+static void bpf_object__sanitize_btf(struct bpf_object *obj)
+{
+ bool has_datasec = obj->caps.btf_datasec;
+ bool has_func = obj->caps.btf_func;
+ struct btf *btf = obj->btf;
+ struct btf_type *t;
+ int i, j, vlen;
+
+ if (!obj->btf || (has_func && has_datasec))
+ return;
+
+ for (i = 1; i <= btf__get_nr_types(btf); i++) {
+ t = (struct btf_type *)btf__type_by_id(btf, i);
+
+ if (!has_datasec && btf_is_var(t)) {
+ /* replace VAR with INT */
+ t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
+ /*
+ * using size = 1 is the safest choice, 4 will be too
+ * big and cause kernel BTF validation failure if
+ * original variable took less than 4 bytes
+ */
+ t->size = 1;
+ *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
+ } else if (!has_datasec && btf_is_datasec(t)) {
+ /* replace DATASEC with STRUCT */
+ const struct btf_var_secinfo *v = btf_var_secinfos(t);
+ struct btf_member *m = btf_members(t);
+ struct btf_type *vt;
+ char *name;
+
+ name = (char *)btf__name_by_offset(btf, t->name_off);
+ while (*name) {
+ if (*name == '.')
+ *name = '_';
+ name++;
+ }
+
+ vlen = btf_vlen(t);
+ t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
+ for (j = 0; j < vlen; j++, v++, m++) {
+ /* order of field assignments is important */
+ m->offset = v->offset * 8;
+ m->type = v->type;
+ /* preserve variable name as member name */
+ vt = (void *)btf__type_by_id(btf, v->type);
+ m->name_off = vt->name_off;
+ }
+ } else if (!has_func && btf_is_func_proto(t)) {
+ /* replace FUNC_PROTO with ENUM */
+ vlen = btf_vlen(t);
+ t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
+ t->size = sizeof(__u32); /* kernel enforced */
+ } else if (!has_func && btf_is_func(t)) {
+ /* replace FUNC with TYPEDEF */
+ t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
+ }
+ }
+}
+
+static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
+{
+ if (!obj->btf_ext)
+ return;
+
+ if (!obj->caps.btf_func) {
+ btf_ext__free(obj->btf_ext);
+ obj->btf_ext = NULL;
+ }
+}
+
+static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
+{
+ return obj->efile.btf_maps_shndx >= 0;
+}
+
+static int bpf_object__init_btf(struct bpf_object *obj,
+ Elf_Data *btf_data,
+ Elf_Data *btf_ext_data)
+{
+ bool btf_required = bpf_object__is_btf_mandatory(obj);
+ int err = 0;
+
+ if (btf_data) {
+ obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
+ if (IS_ERR(obj->btf)) {
+ pr_warn("Error loading ELF section %s: %d.\n",
+ BTF_ELF_SEC, err);
+ goto out;
+ }
+ err = btf__finalize_data(obj, obj->btf);
+ if (err) {
+ pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
+ goto out;
+ }
+ }
+ if (btf_ext_data) {
+ if (!obj->btf) {
+ pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
+ BTF_EXT_ELF_SEC, BTF_ELF_SEC);
+ goto out;
+ }
+ obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
+ btf_ext_data->d_size);
+ if (IS_ERR(obj->btf_ext)) {
+ pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
+ BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
+ obj->btf_ext = NULL;
+ goto out;
+ }
+ }
+out:
+ if (err || IS_ERR(obj->btf)) {
+ if (btf_required)
+ err = err ? : PTR_ERR(obj->btf);
+ else
+ err = 0;
+ if (!IS_ERR_OR_NULL(obj->btf))
+ btf__free(obj->btf);
+ obj->btf = NULL;
+ }
+ if (btf_required && !obj->btf) {
+ pr_warn("BTF is required, but is missing or corrupted.\n");
+ return err == 0 ? -ENOENT : err;
+ }
+ return 0;
+}
+
+static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
+{
+ int err = 0;
+
+ if (!obj->btf)
+ return 0;
+
+ bpf_object__sanitize_btf(obj);
+ bpf_object__sanitize_btf_ext(obj);
+
+ err = btf__load(obj->btf);
+ if (err) {
+ pr_warn("Error loading %s into kernel: %d.\n",
+ BTF_ELF_SEC, err);
+ btf__free(obj->btf);
+ obj->btf = NULL;
+ /* btf_ext can't exist without btf, so free it as well */
+ if (obj->btf_ext) {
+ btf_ext__free(obj->btf_ext);
+ obj->btf_ext = NULL;
+ }
+
+ if (bpf_object__is_btf_mandatory(obj))
+ return err;
+ }
+ return 0;
+}
+
+static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps,
+ const char *pin_root_path)
+{
+ Elf *elf = obj->efile.elf;
+ GElf_Ehdr *ep = &obj->efile.ehdr;
+ Elf_Data *btf_ext_data = NULL;
+ Elf_Data *btf_data = NULL;
+ Elf_Scn *scn = NULL;
+ int idx = 0, err = 0;
+
+ /* Elf is corrupted/truncated, avoid calling elf_strptr. */
+ if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
+ pr_warn("failed to get e_shstrndx from %s\n", obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ char *name;
+ GElf_Shdr sh;
+ Elf_Data *data;
+
+ idx++;
+ if (gelf_getshdr(scn, &sh) != &sh) {
+ pr_warn("failed to get section(%d) header from %s\n",
+ idx, obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
+ if (!name) {
+ pr_warn("failed to get section(%d) name from %s\n",
+ idx, obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ pr_warn("failed to get section(%d) data from %s(%s)\n",
+ idx, name, obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
+ idx, name, (unsigned long)data->d_size,
+ (int)sh.sh_link, (unsigned long)sh.sh_flags,
+ (int)sh.sh_type);
+
+ if (strcmp(name, "license") == 0) {
+ err = bpf_object__init_license(obj,
+ data->d_buf,
+ data->d_size);
+ if (err)
+ return err;
+ } else if (strcmp(name, "version") == 0) {
+ err = bpf_object__init_kversion(obj,
+ data->d_buf,
+ data->d_size);
+ if (err)
+ return err;
+ } else if (strcmp(name, "maps") == 0) {
+ obj->efile.maps_shndx = idx;
+ } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
+ obj->efile.btf_maps_shndx = idx;
+ } else if (strcmp(name, BTF_ELF_SEC) == 0) {
+ btf_data = data;
+ } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
+ btf_ext_data = data;
+ } else if (sh.sh_type == SHT_SYMTAB) {
+ if (obj->efile.symbols) {
+ pr_warn("bpf: multiple SYMTAB in %s\n",
+ obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ obj->efile.symbols = data;
+ obj->efile.strtabidx = sh.sh_link;
+ } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
+ if (sh.sh_flags & SHF_EXECINSTR) {
+ if (strcmp(name, ".text") == 0)
+ obj->efile.text_shndx = idx;
+ err = bpf_object__add_program(obj, data->d_buf,
+ data->d_size,
+ name, idx);
+ if (err) {
+ char errmsg[STRERR_BUFSIZE];
+ char *cp;
+
+ cp = libbpf_strerror_r(-err, errmsg,
+ sizeof(errmsg));
+ pr_warn("failed to alloc program %s (%s): %s",
+ name, obj->path, cp);
+ return err;
+ }
+ } else if (strcmp(name, ".data") == 0) {
+ obj->efile.data = data;
+ obj->efile.data_shndx = idx;
+ } else if (strcmp(name, ".rodata") == 0) {
+ obj->efile.rodata = data;
+ obj->efile.rodata_shndx = idx;
+ } else {
+ pr_debug("skip section(%d) %s\n", idx, name);
+ }
+ } else if (sh.sh_type == SHT_REL) {
+ int nr_sects = obj->efile.nr_reloc_sects;
+ void *sects = obj->efile.reloc_sects;
+ int sec = sh.sh_info; /* points to other section */
+
+ /* Only do relo for section with exec instructions */
+ if (!section_have_execinstr(obj, sec)) {
+ pr_debug("skip relo %s(%d) for section(%d)\n",
+ name, idx, sec);
+ continue;
+ }
+
+ sects = reallocarray(sects, nr_sects + 1,
+ sizeof(*obj->efile.reloc_sects));
+ if (!sects) {
+ pr_warn("reloc_sects realloc failed\n");
+ return -ENOMEM;
+ }
+
+ obj->efile.reloc_sects = sects;
+ obj->efile.nr_reloc_sects++;
+
+ obj->efile.reloc_sects[nr_sects].shdr = sh;
+ obj->efile.reloc_sects[nr_sects].data = data;
+ } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) {
+ obj->efile.bss = data;
+ obj->efile.bss_shndx = idx;
+ } else {
+ pr_debug("skip section(%d) %s\n", idx, name);
+ }
+ }
+
+ if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
+ pr_warn("Corrupted ELF file: index of strtab invalid\n");
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ err = bpf_object__init_btf(obj, btf_data, btf_ext_data);
+ if (!err)
+ err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path);
+ if (!err)
+ err = bpf_object__sanitize_and_load_btf(obj);
+ if (!err)
+ err = bpf_object__init_prog_names(obj);
+ return err;
+}
+
+static struct bpf_program *
+bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
+{
+ struct bpf_program *prog;
+ size_t i;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ if (prog->idx == idx)
+ return prog;
+ }
+ return NULL;
+}
+
+struct bpf_program *
+bpf_object__find_program_by_title(const struct bpf_object *obj,
+ const char *title)
+{
+ struct bpf_program *pos;
+
+ bpf_object__for_each_program(pos, obj) {
+ if (pos->section_name && !strcmp(pos->section_name, title))
+ return pos;
+ }
+ return NULL;
+}
+
+static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
+ int shndx)
+{
+ return shndx == obj->efile.data_shndx ||
+ shndx == obj->efile.bss_shndx ||
+ shndx == obj->efile.rodata_shndx;
+}
+
+static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
+ int shndx)
+{
+ return shndx == obj->efile.maps_shndx ||
+ shndx == obj->efile.btf_maps_shndx;
+}
+
+static enum libbpf_map_type
+bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
+{
+ if (shndx == obj->efile.data_shndx)
+ return LIBBPF_MAP_DATA;
+ else if (shndx == obj->efile.bss_shndx)
+ return LIBBPF_MAP_BSS;
+ else if (shndx == obj->efile.rodata_shndx)
+ return LIBBPF_MAP_RODATA;
+ else
+ return LIBBPF_MAP_UNSPEC;
+}
+
+static int bpf_program__record_reloc(struct bpf_program *prog,
+ struct reloc_desc *reloc_desc,
+ __u32 insn_idx, const char *name,
+ const GElf_Sym *sym, const GElf_Rel *rel)
+{
+ struct bpf_insn *insn = &prog->insns[insn_idx];
+ size_t map_idx, nr_maps = prog->obj->nr_maps;
+ struct bpf_object *obj = prog->obj;
+ __u32 shdr_idx = sym->st_shndx;
+ enum libbpf_map_type type;
+ struct bpf_map *map;
+
+ /* sub-program call relocation */
+ if (insn->code == (BPF_JMP | BPF_CALL)) {
+ if (insn->src_reg != BPF_PSEUDO_CALL) {
+ pr_warn("incorrect bpf_call opcode\n");
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ /* text_shndx can be 0, if no default "main" program exists */
+ if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
+ pr_warn("bad call relo against section %u\n", shdr_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ if (sym->st_value % 8) {
+ pr_warn("bad call relo offset: %llu\n", (__u64)sym->st_value);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ reloc_desc->type = RELO_CALL;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->sym_off = sym->st_value;
+ obj->has_pseudo_calls = true;
+ return 0;
+ }
+
+ if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
+ pr_warn("invalid relo for insns[%d].code 0x%x\n",
+ insn_idx, insn->code);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
+ pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n",
+ name, shdr_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
+
+ /* generic map reference relocation */
+ if (type == LIBBPF_MAP_UNSPEC) {
+ if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
+ pr_warn("bad map relo against section %u\n",
+ shdr_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ for (map_idx = 0; map_idx < nr_maps; map_idx++) {
+ map = &obj->maps[map_idx];
+ if (map->libbpf_type != type ||
+ map->sec_idx != sym->st_shndx ||
+ map->sec_offset != sym->st_value)
+ continue;
+ pr_debug("found map %zd (%s, sec %d, off %zu) for insn %u\n",
+ map_idx, map->name, map->sec_idx,
+ map->sec_offset, insn_idx);
+ break;
+ }
+ if (map_idx >= nr_maps) {
+ pr_warn("map relo failed to find map for sec %u, off %llu\n",
+ shdr_idx, (__u64)sym->st_value);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ reloc_desc->type = RELO_LD64;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->map_idx = map_idx;
+ reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
+ return 0;
+ }
+
+ /* global data map relocation */
+ if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
+ pr_warn("bad data relo against section %u\n", shdr_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ if (!obj->caps.global_data) {
+ pr_warn("relocation: kernel does not support global \'%s\' variable access in insns[%d]\n",
+ name, insn_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ for (map_idx = 0; map_idx < nr_maps; map_idx++) {
+ map = &obj->maps[map_idx];
+ if (map->libbpf_type != type)
+ continue;
+ pr_debug("found data map %zd (%s, sec %d, off %zu) for insn %u\n",
+ map_idx, map->name, map->sec_idx, map->sec_offset,
+ insn_idx);
+ break;
+ }
+ if (map_idx >= nr_maps) {
+ pr_warn("data relo failed to find map for sec %u\n",
+ shdr_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ reloc_desc->type = RELO_DATA;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->map_idx = map_idx;
+ reloc_desc->sym_off = sym->st_value;
+ return 0;
+}
+
+static int
+bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
+ Elf_Data *data, struct bpf_object *obj)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ int err, i, nrels;
+
+ pr_debug("collecting relocating info for: '%s'\n", prog->section_name);
+ nrels = shdr->sh_size / shdr->sh_entsize;
+
+ prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
+ if (!prog->reloc_desc) {
+ pr_warn("failed to alloc memory in relocation\n");
+ return -ENOMEM;
+ }
+ prog->nr_reloc = nrels;
+
+ for (i = 0; i < nrels; i++) {
+ const char *name;
+ __u32 insn_idx;
+ GElf_Sym sym;
+ GElf_Rel rel;
+
+ if (!gelf_getrel(data, i, &rel)) {
+ pr_warn("relocation: failed to get %d reloc\n", i);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
+ pr_warn("relocation: symbol %"PRIx64" not found\n",
+ GELF_R_SYM(rel.r_info));
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ if (rel.r_offset % sizeof(struct bpf_insn))
+ return -LIBBPF_ERRNO__FORMAT;
+
+ insn_idx = rel.r_offset / sizeof(struct bpf_insn);
+ name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+ sym.st_name) ? : "<?>";
+
+ pr_debug("relo for shdr %u, symb %llu, value %llu, type %d, bind %d, name %d (\'%s\'), insn %u\n",
+ (__u32)sym.st_shndx, (__u64)GELF_R_SYM(rel.r_info),
+ (__u64)sym.st_value, GELF_ST_TYPE(sym.st_info),
+ GELF_ST_BIND(sym.st_info), sym.st_name, name,
+ insn_idx);
+
+ err = bpf_program__record_reloc(prog, &prog->reloc_desc[i],
+ insn_idx, name, &sym, &rel);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
+{
+ struct bpf_map_def *def = &map->def;
+ __u32 key_type_id = 0, value_type_id = 0;
+ int ret;
+
+ /* if it's BTF-defined map, we don't need to search for type IDs */
+ if (map->sec_idx == obj->efile.btf_maps_shndx)
+ return 0;
+
+ if (!bpf_map__is_internal(map)) {
+ ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
+ def->value_size, &key_type_id,
+ &value_type_id);
+ } else {
+ /*
+ * LLVM annotates global data differently in BTF, that is,
+ * only as '.data', '.bss' or '.rodata'.
+ */
+ ret = btf__find_by_name(obj->btf,
+ libbpf_type_to_btf_name[map->libbpf_type]);
+ }
+ if (ret < 0)
+ return ret;
+
+ map->btf_key_type_id = key_type_id;
+ map->btf_value_type_id = bpf_map__is_internal(map) ?
+ ret : value_type_id;
+ return 0;
+}
+
+int bpf_map__reuse_fd(struct bpf_map *map, int fd)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ int new_fd, err;
+ char *new_name;
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err)
+ return err;
+
+ new_name = strdup(info.name);
+ if (!new_name)
+ return -errno;
+
+ new_fd = open("/", O_RDONLY | O_CLOEXEC);
+ if (new_fd < 0) {
+ err = -errno;
+ goto err_free_new_name;
+ }
+
+ new_fd = dup3(fd, new_fd, O_CLOEXEC);
+ if (new_fd < 0) {
+ err = -errno;
+ goto err_close_new_fd;
+ }
+
+ err = zclose(map->fd);
+ if (err) {
+ err = -errno;
+ goto err_close_new_fd;
+ }
+ free(map->name);
+
+ map->fd = new_fd;
+ map->name = new_name;
+ map->def.type = info.type;
+ map->def.key_size = info.key_size;
+ map->def.value_size = info.value_size;
+ map->def.max_entries = info.max_entries;
+ map->def.map_flags = info.map_flags;
+ map->btf_key_type_id = info.btf_key_type_id;
+ map->btf_value_type_id = info.btf_value_type_id;
+ map->reused = true;
+
+ return 0;
+
+err_close_new_fd:
+ close(new_fd);
+err_free_new_name:
+ free(new_name);
+ return err;
+}
+
+int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
+{
+ if (!map || !max_entries)
+ return -EINVAL;
+
+ /* If map already created, its attributes can't be changed. */
+ if (map->fd >= 0)
+ return -EBUSY;
+
+ map->def.max_entries = max_entries;
+
+ return 0;
+}
+
+static int
+bpf_object__probe_name(struct bpf_object *obj)
+{
+ struct bpf_load_program_attr attr;
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int ret;
+
+ /* make sure basic loading works */
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insns = insns;
+ attr.insns_cnt = ARRAY_SIZE(insns);
+ attr.license = "GPL";
+
+ ret = bpf_load_program_xattr(&attr, NULL, 0);
+ if (ret < 0) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n",
+ __func__, cp, errno);
+ return -errno;
+ }
+ close(ret);
+
+ /* now try the same program, but with the name */
+
+ attr.name = "test";
+ ret = bpf_load_program_xattr(&attr, NULL, 0);
+ if (ret >= 0) {
+ obj->caps.name = 1;
+ close(ret);
+ }
+
+ return 0;
+}
+
+static int
+bpf_object__probe_global_data(struct bpf_object *obj)
+{
+ struct bpf_load_program_attr prg_attr;
+ struct bpf_create_map_attr map_attr;
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct bpf_insn insns[] = {
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int ret, map;
+
+ memset(&map_attr, 0, sizeof(map_attr));
+ map_attr.map_type = BPF_MAP_TYPE_ARRAY;
+ map_attr.key_size = sizeof(int);
+ map_attr.value_size = 32;
+ map_attr.max_entries = 1;
+
+ map = bpf_create_map_xattr(&map_attr);
+ if (map < 0) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+ __func__, cp, errno);
+ return -errno;
+ }
+
+ insns[0].imm = map;
+
+ memset(&prg_attr, 0, sizeof(prg_attr));
+ prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ prg_attr.insns = insns;
+ prg_attr.insns_cnt = ARRAY_SIZE(insns);
+ prg_attr.license = "GPL";
+
+ ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
+ if (ret >= 0) {
+ obj->caps.global_data = 1;
+ close(ret);
+ }
+
+ close(map);
+ return 0;
+}
+
+static int bpf_object__probe_btf_func(struct bpf_object *obj)
+{
+ static const char strs[] = "\0int\0x\0a";
+ /* void x(int a) {} */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* FUNC_PROTO */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+ BTF_PARAM_ENC(7, 1),
+ /* FUNC x */ /* [3] */
+ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
+ };
+ int btf_fd;
+
+ btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs));
+ if (btf_fd >= 0) {
+ obj->caps.btf_func = 1;
+ close(btf_fd);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bpf_object__probe_btf_datasec(struct bpf_object *obj)
+{
+ static const char strs[] = "\0x\0.data";
+ /* static int a; */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC val */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ };
+ int btf_fd;
+
+ btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs));
+ if (btf_fd >= 0) {
+ obj->caps.btf_datasec = 1;
+ close(btf_fd);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bpf_object__probe_array_mmap(struct bpf_object *obj)
+{
+ struct bpf_create_map_attr attr = {
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_flags = BPF_F_MMAPABLE,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1,
+ };
+ int fd;
+
+ fd = bpf_create_map_xattr(&attr);
+ if (fd >= 0) {
+ obj->caps.array_mmap = 1;
+ close(fd);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+bpf_object__probe_caps(struct bpf_object *obj)
+{
+ int (*probe_fn[])(struct bpf_object *obj) = {
+ bpf_object__probe_name,
+ bpf_object__probe_global_data,
+ bpf_object__probe_btf_func,
+ bpf_object__probe_btf_datasec,
+ bpf_object__probe_array_mmap,
+ };
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(probe_fn); i++) {
+ ret = probe_fn[i](obj);
+ if (ret < 0)
+ pr_debug("Probe #%d failed with %d.\n", i, ret);
+ }
+
+ return 0;
+}
+
+static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
+{
+ struct bpf_map_info map_info = {};
+ char msg[STRERR_BUFSIZE];
+ __u32 map_info_len;
+
+ map_info_len = sizeof(map_info);
+
+ if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) {
+ pr_warn("failed to get map info for map FD %d: %s\n",
+ map_fd, libbpf_strerror_r(errno, msg, sizeof(msg)));
+ return false;
+ }
+
+ return (map_info.type == map->def.type &&
+ map_info.key_size == map->def.key_size &&
+ map_info.value_size == map->def.value_size &&
+ map_info.max_entries == map->def.max_entries &&
+ map_info.map_flags == map->def.map_flags);
+}
+
+static int
+bpf_object__reuse_map(struct bpf_map *map)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int err, pin_fd;
+
+ pin_fd = bpf_obj_get(map->pin_path);
+ if (pin_fd < 0) {
+ err = -errno;
+ if (err == -ENOENT) {
+ pr_debug("found no pinned map to reuse at '%s'\n",
+ map->pin_path);
+ return 0;
+ }
+
+ cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
+ pr_warn("couldn't retrieve pinned map '%s': %s\n",
+ map->pin_path, cp);
+ return err;
+ }
+
+ if (!map_is_reuse_compat(map, pin_fd)) {
+ pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
+ map->pin_path);
+ close(pin_fd);
+ return -EINVAL;
+ }
+
+ err = bpf_map__reuse_fd(map, pin_fd);
+ if (err) {
+ close(pin_fd);
+ return err;
+ }
+ map->pinned = true;
+ pr_debug("reused pinned map at '%s'\n", map->pin_path);
+
+ return 0;
+}
+
+static int
+bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int err, zero = 0;
+ __u8 *data;
+
+ /* Nothing to do here since kernel already zero-initializes .bss map. */
+ if (map->libbpf_type == LIBBPF_MAP_BSS)
+ return 0;
+
+ data = map->libbpf_type == LIBBPF_MAP_DATA ?
+ obj->sections.data : obj->sections.rodata;
+
+ err = bpf_map_update_elem(map->fd, &zero, data, 0);
+ /* Freeze .rodata map as read-only from syscall side. */
+ if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) {
+ err = bpf_map_freeze(map->fd);
+ if (err) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("Error freezing map(%s) as read-only: %s\n",
+ map->name, cp);
+ err = 0;
+ }
+ }
+ return err;
+}
+
+static int
+bpf_object__create_maps(struct bpf_object *obj)
+{
+ struct bpf_create_map_attr create_attr = {};
+ int nr_cpus = 0;
+ unsigned int i;
+ int err;
+
+ for (i = 0; i < obj->nr_maps; i++) {
+ struct bpf_map *map = &obj->maps[i];
+ struct bpf_map_def *def = &map->def;
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int *pfd = &map->fd;
+
+ if (map->pin_path) {
+ err = bpf_object__reuse_map(map);
+ if (err) {
+ pr_warn("error reusing pinned map %s\n",
+ map->name);
+ return err;
+ }
+ }
+
+ if (map->fd >= 0) {
+ pr_debug("skip map create (preset) %s: fd=%d\n",
+ map->name, map->fd);
+ continue;
+ }
+
+ if (obj->caps.name)
+ create_attr.name = map->name;
+ create_attr.map_ifindex = map->map_ifindex;
+ create_attr.map_type = def->type;
+ create_attr.map_flags = def->map_flags;
+ create_attr.key_size = def->key_size;
+ create_attr.value_size = def->value_size;
+ if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
+ !def->max_entries) {
+ if (!nr_cpus)
+ nr_cpus = libbpf_num_possible_cpus();
+ if (nr_cpus < 0) {
+ pr_warn("failed to determine number of system CPUs: %d\n",
+ nr_cpus);
+ err = nr_cpus;
+ goto err_out;
+ }
+ pr_debug("map '%s': setting size to %d\n",
+ map->name, nr_cpus);
+ create_attr.max_entries = nr_cpus;
+ } else {
+ create_attr.max_entries = def->max_entries;
+ }
+ create_attr.btf_fd = 0;
+ create_attr.btf_key_type_id = 0;
+ create_attr.btf_value_type_id = 0;
+ if (bpf_map_type__is_map_in_map(def->type) &&
+ map->inner_map_fd >= 0)
+ create_attr.inner_map_fd = map->inner_map_fd;
+
+ if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
+ create_attr.btf_fd = btf__fd(obj->btf);
+ create_attr.btf_key_type_id = map->btf_key_type_id;
+ create_attr.btf_value_type_id = map->btf_value_type_id;
+ }
+
+ *pfd = bpf_create_map_xattr(&create_attr);
+ if (*pfd < 0 && (create_attr.btf_key_type_id ||
+ create_attr.btf_value_type_id)) {
+ err = -errno;
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+ pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
+ map->name, cp, err);
+ create_attr.btf_fd = 0;
+ create_attr.btf_key_type_id = 0;
+ create_attr.btf_value_type_id = 0;
+ map->btf_key_type_id = 0;
+ map->btf_value_type_id = 0;
+ *pfd = bpf_create_map_xattr(&create_attr);
+ }
+
+ if (*pfd < 0) {
+ size_t j;
+
+ err = -errno;
+err_out:
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+ pr_warn("failed to create map (name: '%s'): %s(%d)\n",
+ map->name, cp, err);
+ for (j = 0; j < i; j++)
+ zclose(obj->maps[j].fd);
+ return err;
+ }
+
+ if (bpf_map__is_internal(map)) {
+ err = bpf_object__populate_internal_map(obj, map);
+ if (err < 0) {
+ zclose(*pfd);
+ goto err_out;
+ }
+ }
+
+ if (map->pin_path && !map->pinned) {
+ err = bpf_map__pin(map, NULL);
+ if (err) {
+ pr_warn("failed to auto-pin map name '%s' at '%s'\n",
+ map->name, map->pin_path);
+ return err;
+ }
+ }
+
+ pr_debug("created map %s: fd=%d\n", map->name, *pfd);
+ }
+
+ return 0;
+}
+
+static int
+check_btf_ext_reloc_err(struct bpf_program *prog, int err,
+ void *btf_prog_info, const char *info_name)
+{
+ if (err != -ENOENT) {
+ pr_warn("Error in loading %s for sec %s.\n",
+ info_name, prog->section_name);
+ return err;
+ }
+
+ /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */
+
+ if (btf_prog_info) {
+ /*
+ * Some info has already been found but has problem
+ * in the last btf_ext reloc. Must have to error out.
+ */
+ pr_warn("Error in relocating %s for sec %s.\n",
+ info_name, prog->section_name);
+ return err;
+ }
+
+ /* Have problem loading the very first info. Ignore the rest. */
+ pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n",
+ info_name, prog->section_name, info_name);
+ return 0;
+}
+
+static int
+bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj,
+ const char *section_name, __u32 insn_offset)
+{
+ int err;
+
+ if (!insn_offset || prog->func_info) {
+ /*
+ * !insn_offset => main program
+ *
+ * For sub prog, the main program's func_info has to
+ * be loaded first (i.e. prog->func_info != NULL)
+ */
+ err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext,
+ section_name, insn_offset,
+ &prog->func_info,
+ &prog->func_info_cnt);
+ if (err)
+ return check_btf_ext_reloc_err(prog, err,
+ prog->func_info,
+ "bpf_func_info");
+
+ prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext);
+ }
+
+ if (!insn_offset || prog->line_info) {
+ err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext,
+ section_name, insn_offset,
+ &prog->line_info,
+ &prog->line_info_cnt);
+ if (err)
+ return check_btf_ext_reloc_err(prog, err,
+ prog->line_info,
+ "bpf_line_info");
+
+ prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext);
+ }
+
+ return 0;
+}
+
+#define BPF_CORE_SPEC_MAX_LEN 64
+
+/* represents BPF CO-RE field or array element accessor */
+struct bpf_core_accessor {
+ __u32 type_id; /* struct/union type or array element type */
+ __u32 idx; /* field index or array index */
+ const char *name; /* field name or NULL for array accessor */
+};
+
+struct bpf_core_spec {
+ const struct btf *btf;
+ /* high-level spec: named fields and array indices only */
+ struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+ /* high-level spec length */
+ int len;
+ /* raw, low-level spec: 1-to-1 with accessor spec string */
+ int raw_spec[BPF_CORE_SPEC_MAX_LEN];
+ /* raw spec length */
+ int raw_len;
+ /* field bit offset represented by spec */
+ __u32 bit_offset;
+};
+
+static bool str_is_empty(const char *s)
+{
+ return !s || !s[0];
+}
+
+/*
+ * Turn bpf_field_reloc into a low- and high-level spec representation,
+ * validating correctness along the way, as well as calculating resulting
+ * field bit offset, specified by accessor string. Low-level spec captures
+ * every single level of nestedness, including traversing anonymous
+ * struct/union members. High-level one only captures semantically meaningful
+ * "turning points": named fields and array indicies.
+ * E.g., for this case:
+ *
+ * struct sample {
+ * int __unimportant;
+ * struct {
+ * int __1;
+ * int __2;
+ * int a[7];
+ * };
+ * };
+ *
+ * struct sample *s = ...;
+ *
+ * int x = &s->a[3]; // access string = '0:1:2:3'
+ *
+ * Low-level spec has 1:1 mapping with each element of access string (it's
+ * just a parsed access string representation): [0, 1, 2, 3].
+ *
+ * High-level spec will capture only 3 points:
+ * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
+ * - field 'a' access (corresponds to '2' in low-level spec);
+ * - array element #3 access (corresponds to '3' in low-level spec).
+ *
+ */
+static int bpf_core_spec_parse(const struct btf *btf,
+ __u32 type_id,
+ const char *spec_str,
+ struct bpf_core_spec *spec)
+{
+ int access_idx, parsed_len, i;
+ const struct btf_type *t;
+ const char *name;
+ __u32 id;
+ __s64 sz;
+
+ if (str_is_empty(spec_str) || *spec_str == ':')
+ return -EINVAL;
+
+ memset(spec, 0, sizeof(*spec));
+ spec->btf = btf;
+
+ /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
+ while (*spec_str) {
+ if (*spec_str == ':')
+ ++spec_str;
+ if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
+ return -EINVAL;
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+ spec_str += parsed_len;
+ spec->raw_spec[spec->raw_len++] = access_idx;
+ }
+
+ if (spec->raw_len == 0)
+ return -EINVAL;
+
+ /* first spec value is always reloc type array index */
+ t = skip_mods_and_typedefs(btf, type_id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[0];
+ spec->spec[0].type_id = id;
+ spec->spec[0].idx = access_idx;
+ spec->len++;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->bit_offset = access_idx * sz * 8;
+
+ for (i = 1; i < spec->raw_len; i++) {
+ t = skip_mods_and_typedefs(btf, id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[i];
+
+ if (btf_is_composite(t)) {
+ const struct btf_member *m;
+ __u32 bit_offset;
+
+ if (access_idx >= btf_vlen(t))
+ return -EINVAL;
+
+ bit_offset = btf_member_bit_offset(t, access_idx);
+ spec->bit_offset += bit_offset;
+
+ m = btf_members(t) + access_idx;
+ if (m->name_off) {
+ name = btf__name_by_offset(btf, m->name_off);
+ if (str_is_empty(name))
+ return -EINVAL;
+
+ spec->spec[spec->len].type_id = id;
+ spec->spec[spec->len].idx = access_idx;
+ spec->spec[spec->len].name = name;
+ spec->len++;
+ }
+
+ id = m->type;
+ } else if (btf_is_array(t)) {
+ const struct btf_array *a = btf_array(t);
+
+ t = skip_mods_and_typedefs(btf, a->type, &id);
+ if (!t || access_idx >= a->nelems)
+ return -EINVAL;
+
+ spec->spec[spec->len].type_id = id;
+ spec->spec[spec->len].idx = access_idx;
+ spec->len++;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->bit_offset += access_idx * sz * 8;
+ } else {
+ pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n",
+ type_id, spec_str, i, id, btf_kind(t));
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static bool bpf_core_is_flavor_sep(const char *s)
+{
+ /* check X___Y name pattern, where X and Y are not underscores */
+ return s[0] != '_' && /* X */
+ s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */
+ s[4] != '_'; /* Y */
+}
+
+/* Given 'some_struct_name___with_flavor' return the length of a name prefix
+ * before last triple underscore. Struct name part after last triple
+ * underscore is ignored by BPF CO-RE relocation during relocation matching.
+ */
+static size_t bpf_core_essential_name_len(const char *name)
+{
+ size_t n = strlen(name);
+ int i;
+
+ for (i = n - 5; i >= 0; i--) {
+ if (bpf_core_is_flavor_sep(name + i))
+ return i + 1;
+ }
+ return n;
+}
+
+/* dynamically sized list of type IDs */
+struct ids_vec {
+ __u32 *data;
+ int len;
+};
+
+static void bpf_core_free_cands(struct ids_vec *cand_ids)
+{
+ free(cand_ids->data);
+ free(cand_ids);
+}
+
+static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
+ __u32 local_type_id,
+ const struct btf *targ_btf)
+{
+ size_t local_essent_len, targ_essent_len;
+ const char *local_name, *targ_name;
+ const struct btf_type *t;
+ struct ids_vec *cand_ids;
+ __u32 *new_ids;
+ int i, err, n;
+
+ t = btf__type_by_id(local_btf, local_type_id);
+ if (!t)
+ return ERR_PTR(-EINVAL);
+
+ local_name = btf__name_by_offset(local_btf, t->name_off);
+ if (str_is_empty(local_name))
+ return ERR_PTR(-EINVAL);
+ local_essent_len = bpf_core_essential_name_len(local_name);
+
+ cand_ids = calloc(1, sizeof(*cand_ids));
+ if (!cand_ids)
+ return ERR_PTR(-ENOMEM);
+
+ n = btf__get_nr_types(targ_btf);
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(targ_btf, i);
+ targ_name = btf__name_by_offset(targ_btf, t->name_off);
+ if (str_is_empty(targ_name))
+ continue;
+
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
+ if (targ_essent_len != local_essent_len)
+ continue;
+
+ if (strncmp(local_name, targ_name, local_essent_len) == 0) {
+ pr_debug("[%d] %s: found candidate [%d] %s\n",
+ local_type_id, local_name, i, targ_name);
+ new_ids = realloc(cand_ids->data, cand_ids->len + 1);
+ if (!new_ids) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ cand_ids->data = new_ids;
+ cand_ids->data[cand_ids->len++] = i;
+ }
+ }
+ return cand_ids;
+err_out:
+ bpf_core_free_cands(cand_ids);
+ return ERR_PTR(err);
+}
+
+/* Check two types for compatibility, skipping const/volatile/restrict and
+ * typedefs, to ensure we are relocating compatible entities:
+ * - any two STRUCTs/UNIONs are compatible and can be mixed;
+ * - any two FWDs are compatible, if their names match (modulo flavor suffix);
+ * - any two PTRs are always compatible;
+ * - for ENUMs, names should be the same (ignoring flavor suffix) or at
+ * least one of enums should be anonymous;
+ * - for ENUMs, check sizes, names are ignored;
+ * - for INT, size and signedness are ignored;
+ * - for ARRAY, dimensionality is ignored, element types are checked for
+ * compatibility recursively;
+ * - everything else shouldn't be ever a target of relocation.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_fields_are_compat(const struct btf *local_btf,
+ __u32 local_id,
+ const struct btf *targ_btf,
+ __u32 targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+
+recur:
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (btf_is_composite(local_type) && btf_is_composite(targ_type))
+ return 1;
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+
+ switch (btf_kind(local_type)) {
+ case BTF_KIND_PTR:
+ return 1;
+ case BTF_KIND_FWD:
+ case BTF_KIND_ENUM: {
+ const char *local_name, *targ_name;
+ size_t local_len, targ_len;
+
+ local_name = btf__name_by_offset(local_btf,
+ local_type->name_off);
+ targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
+ local_len = bpf_core_essential_name_len(local_name);
+ targ_len = bpf_core_essential_name_len(targ_name);
+ /* one of them is anonymous or both w/ same flavor-less names */
+ return local_len == 0 || targ_len == 0 ||
+ (local_len == targ_len &&
+ strncmp(local_name, targ_name, local_len) == 0);
+ }
+ case BTF_KIND_INT:
+ /* just reject deprecated bitfield-like integers; all other
+ * integers are by default compatible between each other
+ */
+ return btf_int_offset(local_type) == 0 &&
+ btf_int_offset(targ_type) == 0;
+ case BTF_KIND_ARRAY:
+ local_id = btf_array(local_type)->type;
+ targ_id = btf_array(targ_type)->type;
+ goto recur;
+ default:
+ pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
+ btf_kind(local_type), local_id, targ_id);
+ return 0;
+ }
+}
+
+/*
+ * Given single high-level named field accessor in local type, find
+ * corresponding high-level accessor for a target type. Along the way,
+ * maintain low-level spec for target as well. Also keep updating target
+ * bit offset.
+ *
+ * Searching is performed through recursive exhaustive enumeration of all
+ * fields of a struct/union. If there are any anonymous (embedded)
+ * structs/unions, they are recursively searched as well. If field with
+ * desired name is found, check compatibility between local and target types,
+ * before returning result.
+ *
+ * 1 is returned, if field is found.
+ * 0 is returned if no compatible field is found.
+ * <0 is returned on error.
+ */
+static int bpf_core_match_member(const struct btf *local_btf,
+ const struct bpf_core_accessor *local_acc,
+ const struct btf *targ_btf,
+ __u32 targ_id,
+ struct bpf_core_spec *spec,
+ __u32 *next_targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+ const struct btf_member *local_member, *m;
+ const char *local_name, *targ_name;
+ __u32 local_id;
+ int i, n, found;
+
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+ if (!btf_is_composite(targ_type))
+ return 0;
+
+ local_id = local_acc->type_id;
+ local_type = btf__type_by_id(local_btf, local_id);
+ local_member = btf_members(local_type) + local_acc->idx;
+ local_name = btf__name_by_offset(local_btf, local_member->name_off);
+
+ n = btf_vlen(targ_type);
+ m = btf_members(targ_type);
+ for (i = 0; i < n; i++, m++) {
+ __u32 bit_offset;
+
+ bit_offset = btf_member_bit_offset(targ_type, i);
+
+ /* too deep struct/union/array nesting */
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ /* speculate this member will be the good one */
+ spec->bit_offset += bit_offset;
+ spec->raw_spec[spec->raw_len++] = i;
+
+ targ_name = btf__name_by_offset(targ_btf, m->name_off);
+ if (str_is_empty(targ_name)) {
+ /* embedded struct/union, we need to go deeper */
+ found = bpf_core_match_member(local_btf, local_acc,
+ targ_btf, m->type,
+ spec, next_targ_id);
+ if (found) /* either found or error */
+ return found;
+ } else if (strcmp(local_name, targ_name) == 0) {
+ /* matching named field */
+ struct bpf_core_accessor *targ_acc;
+
+ targ_acc = &spec->spec[spec->len++];
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = i;
+ targ_acc->name = targ_name;
+
+ *next_targ_id = m->type;
+ found = bpf_core_fields_are_compat(local_btf,
+ local_member->type,
+ targ_btf, m->type);
+ if (!found)
+ spec->len--; /* pop accessor */
+ return found;
+ }
+ /* member turned out not to be what we looked for */
+ spec->bit_offset -= bit_offset;
+ spec->raw_len--;
+ }
+
+ return 0;
+}
+
+/*
+ * Try to match local spec to a target type and, if successful, produce full
+ * target spec (high-level, low-level + bit offset).
+ */
+static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
+ const struct btf *targ_btf, __u32 targ_id,
+ struct bpf_core_spec *targ_spec)
+{
+ const struct btf_type *targ_type;
+ const struct bpf_core_accessor *local_acc;
+ struct bpf_core_accessor *targ_acc;
+ int i, sz, matched;
+
+ memset(targ_spec, 0, sizeof(*targ_spec));
+ targ_spec->btf = targ_btf;
+
+ local_acc = &local_spec->spec[0];
+ targ_acc = &targ_spec->spec[0];
+
+ for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
+ &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+
+ if (local_acc->name) {
+ matched = bpf_core_match_member(local_spec->btf,
+ local_acc,
+ targ_btf, targ_id,
+ targ_spec, &targ_id);
+ if (matched <= 0)
+ return matched;
+ } else {
+ /* for i=0, targ_id is already treated as array element
+ * type (because it's the original struct), for others
+ * we should find array element type first
+ */
+ if (i > 0) {
+ const struct btf_array *a;
+
+ if (!btf_is_array(targ_type))
+ return 0;
+
+ a = btf_array(targ_type);
+ if (local_acc->idx >= a->nelems)
+ return 0;
+ if (!skip_mods_and_typedefs(targ_btf, a->type,
+ &targ_id))
+ return -EINVAL;
+ }
+
+ /* too deep struct/union/array nesting */
+ if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = local_acc->idx;
+ targ_acc->name = NULL;
+ targ_spec->len++;
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+ targ_spec->raw_len++;
+
+ sz = btf__resolve_size(targ_btf, targ_id);
+ if (sz < 0)
+ return sz;
+ targ_spec->bit_offset += local_acc->idx * sz * 8;
+ }
+ }
+
+ return 1;
+}
+
+static int bpf_core_calc_field_relo(const struct bpf_program *prog,
+ const struct bpf_field_reloc *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val, bool *validate)
+{
+ const struct bpf_core_accessor *acc = &spec->spec[spec->len - 1];
+ const struct btf_type *t = btf__type_by_id(spec->btf, acc->type_id);
+ __u32 byte_off, byte_sz, bit_off, bit_sz;
+ const struct btf_member *m;
+ const struct btf_type *mt;
+ bool bitfield;
+ __s64 sz;
+
+ /* a[n] accessor needs special handling */
+ if (!acc->name) {
+ if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
+ *val = spec->bit_offset / 8;
+ } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
+ sz = btf__resolve_size(spec->btf, acc->type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *val = sz;
+ } else {
+ pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
+ bpf_program__title(prog, false),
+ relo->kind, relo->insn_off / 8);
+ return -EINVAL;
+ }
+ if (validate)
+ *validate = true;
+ return 0;
+ }
+
+ m = btf_members(t) + acc->idx;
+ mt = skip_mods_and_typedefs(spec->btf, m->type, NULL);
+ bit_off = spec->bit_offset;
+ bit_sz = btf_member_bitfield_size(t, acc->idx);
+
+ bitfield = bit_sz > 0;
+ if (bitfield) {
+ byte_sz = mt->size;
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
+ /* figure out smallest int size necessary for bitfield load */
+ while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
+ if (byte_sz >= 8) {
+ /* bitfield can't be read with 64-bit read */
+ pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
+ bpf_program__title(prog, false),
+ relo->kind, relo->insn_off / 8);
+ return -E2BIG;
+ }
+ byte_sz *= 2;
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
+ }
+ } else {
+ sz = btf__resolve_size(spec->btf, m->type);
+ if (sz < 0)
+ return -EINVAL;
+ byte_sz = sz;
+ byte_off = spec->bit_offset / 8;
+ bit_sz = byte_sz * 8;
+ }
+
+ /* for bitfields, all the relocatable aspects are ambiguous and we
+ * might disagree with compiler, so turn off validation of expected
+ * value, except for signedness
+ */
+ if (validate)
+ *validate = !bitfield;
+
+ switch (relo->kind) {
+ case BPF_FIELD_BYTE_OFFSET:
+ *val = byte_off;
+ break;
+ case BPF_FIELD_BYTE_SIZE:
+ *val = byte_sz;
+ break;
+ case BPF_FIELD_SIGNED:
+ /* enums will be assumed unsigned */
+ *val = btf_is_enum(mt) ||
+ (btf_int_encoding(mt) & BTF_INT_SIGNED);
+ if (validate)
+ *validate = true; /* signedness is never ambiguous */
+ break;
+ case BPF_FIELD_LSHIFT_U64:
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ *val = 64 - (bit_off + bit_sz - byte_off * 8);
+#else
+ *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
+#endif
+ break;
+ case BPF_FIELD_RSHIFT_U64:
+ *val = 64 - bit_sz;
+ if (validate)
+ *validate = true; /* right shift is never ambiguous */
+ break;
+ case BPF_FIELD_EXISTS:
+ default:
+ pr_warn("prog '%s': unknown relo %d at insn #%d\n",
+ bpf_program__title(prog, false),
+ relo->kind, relo->insn_off / 8);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Patch relocatable BPF instruction.
+ *
+ * Patched value is determined by relocation kind and target specification.
+ * For field existence relocation target spec will be NULL if field is not
+ * found.
+ * Expected insn->imm value is determined using relocation kind and local
+ * spec, and is checked before patching instruction. If actual insn->imm value
+ * is wrong, bail out with error.
+ *
+ * Currently three kinds of BPF instructions are supported:
+ * 1. rX = <imm> (assignment with immediate operand);
+ * 2. rX += <imm> (arithmetic operations with immediate operand);
+ */
+static int bpf_core_reloc_insn(struct bpf_program *prog,
+ const struct bpf_field_reloc *relo,
+ const struct bpf_core_spec *local_spec,
+ const struct bpf_core_spec *targ_spec)
+{
+ bool failed = false, validate = true;
+ __u32 orig_val, new_val;
+ struct bpf_insn *insn;
+ int insn_idx, err;
+ __u8 class;
+
+ if (relo->insn_off % sizeof(struct bpf_insn))
+ return -EINVAL;
+ insn_idx = relo->insn_off / sizeof(struct bpf_insn);
+
+ if (relo->kind == BPF_FIELD_EXISTS) {
+ orig_val = 1; /* can't generate EXISTS relo w/o local field */
+ new_val = targ_spec ? 1 : 0;
+ } else if (!targ_spec) {
+ failed = true;
+ new_val = (__u32)-1;
+ } else {
+ err = bpf_core_calc_field_relo(prog, relo, local_spec,
+ &orig_val, &validate);
+ if (err)
+ return err;
+ err = bpf_core_calc_field_relo(prog, relo, targ_spec,
+ &new_val, NULL);
+ if (err)
+ return err;
+ }
+
+ insn = &prog->insns[insn_idx];
+ class = BPF_CLASS(insn->code);
+
+ if (class == BPF_ALU || class == BPF_ALU64) {
+ if (BPF_SRC(insn->code) != BPF_K)
+ return -EINVAL;
+ if (!failed && validate && insn->imm != orig_val) {
+ pr_warn("prog '%s': unexpected insn #%d value: got %u, exp %u -> %u\n",
+ bpf_program__title(prog, false), insn_idx,
+ insn->imm, orig_val, new_val);
+ return -EINVAL;
+ }
+ orig_val = insn->imm;
+ insn->imm = new_val;
+ pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n",
+ bpf_program__title(prog, false), insn_idx,
+ failed ? " w/ failed reloc" : "", orig_val, new_val);
+ } else {
+ pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
+ bpf_program__title(prog, false),
+ insn_idx, insn->code, insn->src_reg, insn->dst_reg,
+ insn->off, insn->imm);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct btf *btf_load_raw(const char *path)
+{
+ struct btf *btf;
+ size_t read_cnt;
+ struct stat st;
+ void *data;
+ FILE *f;
+
+ if (stat(path, &st))
+ return ERR_PTR(-errno);
+
+ data = malloc(st.st_size);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ f = fopen(path, "rb");
+ if (!f) {
+ btf = ERR_PTR(-errno);
+ goto cleanup;
+ }
+
+ read_cnt = fread(data, 1, st.st_size, f);
+ fclose(f);
+ if (read_cnt < st.st_size) {
+ btf = ERR_PTR(-EBADF);
+ goto cleanup;
+ }
+
+ btf = btf__new(data, read_cnt);
+
+cleanup:
+ free(data);
+ return btf;
+}
+
+/*
+ * Probe few well-known locations for vmlinux kernel image and try to load BTF
+ * data out of it to use for target BTF.
+ */
+static struct btf *bpf_core_find_kernel_btf(void)
+{
+ struct {
+ const char *path_fmt;
+ bool raw_btf;
+ } locations[] = {
+ /* try canonical vmlinux BTF through sysfs first */
+ { "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
+ /* fall back to trying to find vmlinux ELF on disk otherwise */
+ { "/boot/vmlinux-%1$s" },
+ { "/lib/modules/%1$s/vmlinux-%1$s" },
+ { "/lib/modules/%1$s/build/vmlinux" },
+ { "/usr/lib/modules/%1$s/kernel/vmlinux" },
+ { "/usr/lib/debug/boot/vmlinux-%1$s" },
+ { "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
+ { "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
+ };
+ char path[PATH_MAX + 1];
+ struct utsname buf;
+ struct btf *btf;
+ int i;
+
+ uname(&buf);
+
+ for (i = 0; i < ARRAY_SIZE(locations); i++) {
+ snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
+
+ if (access(path, R_OK))
+ continue;
+
+ if (locations[i].raw_btf)
+ btf = btf_load_raw(path);
+ else
+ btf = btf__parse_elf(path, NULL);
+
+ pr_debug("loading kernel BTF '%s': %ld\n",
+ path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
+ if (IS_ERR(btf))
+ continue;
+
+ return btf;
+ }
+
+ pr_warn("failed to find valid kernel BTF\n");
+ return ERR_PTR(-ESRCH);
+}
+
+/* Output spec definition in the format:
+ * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
+ * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
+ */
+static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
+{
+ const struct btf_type *t;
+ const char *s;
+ __u32 type_id;
+ int i;
+
+ type_id = spec->spec[0].type_id;
+ t = btf__type_by_id(spec->btf, type_id);
+ s = btf__name_by_offset(spec->btf, t->name_off);
+ libbpf_print(level, "[%u] %s + ", type_id, s);
+
+ for (i = 0; i < spec->raw_len; i++)
+ libbpf_print(level, "%d%s", spec->raw_spec[i],
+ i == spec->raw_len - 1 ? " => " : ":");
+
+ libbpf_print(level, "%u.%u @ &x",
+ spec->bit_offset / 8, spec->bit_offset % 8);
+
+ for (i = 0; i < spec->len; i++) {
+ if (spec->spec[i].name)
+ libbpf_print(level, ".%s", spec->spec[i].name);
+ else
+ libbpf_print(level, "[%u]", spec->spec[i].idx);
+ }
+
+}
+
+static size_t bpf_core_hash_fn(const void *key, void *ctx)
+{
+ return (size_t)key;
+}
+
+static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
+{
+ return k1 == k2;
+}
+
+static void *u32_as_hash_key(__u32 x)
+{
+ return (void *)(uintptr_t)x;
+}
+
+/*
+ * CO-RE relocate single instruction.
+ *
+ * The outline and important points of the algorithm:
+ * 1. For given local type, find corresponding candidate target types.
+ * Candidate type is a type with the same "essential" name, ignoring
+ * everything after last triple underscore (___). E.g., `sample`,
+ * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
+ * for each other. Names with triple underscore are referred to as
+ * "flavors" and are useful, among other things, to allow to
+ * specify/support incompatible variations of the same kernel struct, which
+ * might differ between different kernel versions and/or build
+ * configurations.
+ *
+ * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
+ * converter, when deduplicated BTF of a kernel still contains more than
+ * one different types with the same name. In that case, ___2, ___3, etc
+ * are appended starting from second name conflict. But start flavors are
+ * also useful to be defined "locally", in BPF program, to extract same
+ * data from incompatible changes between different kernel
+ * versions/configurations. For instance, to handle field renames between
+ * kernel versions, one can use two flavors of the struct name with the
+ * same common name and use conditional relocations to extract that field,
+ * depending on target kernel version.
+ * 2. For each candidate type, try to match local specification to this
+ * candidate target type. Matching involves finding corresponding
+ * high-level spec accessors, meaning that all named fields should match,
+ * as well as all array accesses should be within the actual bounds. Also,
+ * types should be compatible (see bpf_core_fields_are_compat for details).
+ * 3. It is supported and expected that there might be multiple flavors
+ * matching the spec. As long as all the specs resolve to the same set of
+ * offsets across all candidates, there is no error. If there is any
+ * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
+ * imprefection of BTF deduplication, which can cause slight duplication of
+ * the same BTF type, if some directly or indirectly referenced (by
+ * pointer) type gets resolved to different actual types in different
+ * object files. If such situation occurs, deduplicated BTF will end up
+ * with two (or more) structurally identical types, which differ only in
+ * types they refer to through pointer. This should be OK in most cases and
+ * is not an error.
+ * 4. Candidate types search is performed by linearly scanning through all
+ * types in target BTF. It is anticipated that this is overall more
+ * efficient memory-wise and not significantly worse (if not better)
+ * CPU-wise compared to prebuilding a map from all local type names to
+ * a list of candidate type names. It's also sped up by caching resolved
+ * list of matching candidates per each local "root" type ID, that has at
+ * least one bpf_field_reloc associated with it. This list is shared
+ * between multiple relocations for the same type ID and is updated as some
+ * of the candidates are pruned due to structural incompatibility.
+ */
+static int bpf_core_reloc_field(struct bpf_program *prog,
+ const struct bpf_field_reloc *relo,
+ int relo_idx,
+ const struct btf *local_btf,
+ const struct btf *targ_btf,
+ struct hashmap *cand_cache)
+{
+ const char *prog_name = bpf_program__title(prog, false);
+ struct bpf_core_spec local_spec, cand_spec, targ_spec;
+ const void *type_key = u32_as_hash_key(relo->type_id);
+ const struct btf_type *local_type, *cand_type;
+ const char *local_name, *cand_name;
+ struct ids_vec *cand_ids;
+ __u32 local_id, cand_id;
+ const char *spec_str;
+ int i, j, err;
+
+ local_id = relo->type_id;
+ local_type = btf__type_by_id(local_btf, local_id);
+ if (!local_type)
+ return -EINVAL;
+
+ local_name = btf__name_by_offset(local_btf, local_type->name_off);
+ if (str_is_empty(local_name))
+ return -EINVAL;
+
+ spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
+ if (str_is_empty(spec_str))
+ return -EINVAL;
+
+ err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec);
+ if (err) {
+ pr_warn("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n",
+ prog_name, relo_idx, local_id, local_name, spec_str,
+ err);
+ return -EINVAL;
+ }
+
+ pr_debug("prog '%s': relo #%d: kind %d, spec is ", prog_name, relo_idx,
+ relo->kind);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
+ libbpf_print(LIBBPF_DEBUG, "\n");
+
+ if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
+ cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
+ if (IS_ERR(cand_ids)) {
+ pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld",
+ prog_name, relo_idx, local_id, local_name,
+ PTR_ERR(cand_ids));
+ return PTR_ERR(cand_ids);
+ }
+ err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
+ if (err) {
+ bpf_core_free_cands(cand_ids);
+ return err;
+ }
+ }
+
+ for (i = 0, j = 0; i < cand_ids->len; i++) {
+ cand_id = cand_ids->data[i];
+ cand_type = btf__type_by_id(targ_btf, cand_id);
+ cand_name = btf__name_by_offset(targ_btf, cand_type->name_off);
+
+ err = bpf_core_spec_match(&local_spec, targ_btf,
+ cand_id, &cand_spec);
+ pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ",
+ prog_name, relo_idx, i, cand_name);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+ libbpf_print(LIBBPF_DEBUG, ": %d\n", err);
+ if (err < 0) {
+ pr_warn("prog '%s': relo #%d: matching error: %d\n",
+ prog_name, relo_idx, err);
+ return err;
+ }
+ if (err == 0)
+ continue;
+
+ if (j == 0) {
+ targ_spec = cand_spec;
+ } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
+ /* if there are many candidates, they should all
+ * resolve to the same bit offset
+ */
+ pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n",
+ prog_name, relo_idx, cand_spec.bit_offset,
+ targ_spec.bit_offset);
+ return -EINVAL;
+ }
+
+ cand_ids->data[j++] = cand_spec.spec[0].type_id;
+ }
+
+ /*
+ * For BPF_FIELD_EXISTS relo or when relaxed CO-RE reloc mode is
+ * requested, it's expected that we might not find any candidates.
+ * In this case, if field wasn't found in any candidate, the list of
+ * candidates shouldn't change at all, we'll just handle relocating
+ * appropriately, depending on relo's kind.
+ */
+ if (j > 0)
+ cand_ids->len = j;
+
+ if (j == 0 && !prog->obj->relaxed_core_relocs &&
+ relo->kind != BPF_FIELD_EXISTS) {
+ pr_warn("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n",
+ prog_name, relo_idx, local_id, local_name, spec_str);
+ return -ESRCH;
+ }
+
+ /* bpf_core_reloc_insn should know how to handle missing targ_spec */
+ err = bpf_core_reloc_insn(prog, relo, &local_spec,
+ j ? &targ_spec : NULL);
+ if (err) {
+ pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
+ prog_name, relo_idx, relo->insn_off, err);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
+{
+ const struct btf_ext_info_sec *sec;
+ const struct bpf_field_reloc *rec;
+ const struct btf_ext_info *seg;
+ struct hashmap_entry *entry;
+ struct hashmap *cand_cache = NULL;
+ struct bpf_program *prog;
+ struct btf *targ_btf;
+ const char *sec_name;
+ int i, err = 0;
+
+ if (targ_btf_path)
+ targ_btf = btf__parse_elf(targ_btf_path, NULL);
+ else
+ targ_btf = bpf_core_find_kernel_btf();
+ if (IS_ERR(targ_btf)) {
+ pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
+ return PTR_ERR(targ_btf);
+ }
+
+ cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
+ if (IS_ERR(cand_cache)) {
+ err = PTR_ERR(cand_cache);
+ goto out;
+ }
+
+ seg = &obj->btf_ext->field_reloc_info;
+ for_each_btf_ext_sec(seg, sec) {
+ sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
+ if (str_is_empty(sec_name)) {
+ err = -EINVAL;
+ goto out;
+ }
+ prog = bpf_object__find_program_by_title(obj, sec_name);
+ if (!prog) {
+ pr_warn("failed to find program '%s' for CO-RE offset relocation\n",
+ sec_name);
+ err = -EINVAL;
+ goto out;
+ }
+
+ pr_debug("prog '%s': performing %d CO-RE offset relocs\n",
+ sec_name, sec->num_info);
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = bpf_core_reloc_field(prog, rec, i, obj->btf,
+ targ_btf, cand_cache);
+ if (err) {
+ pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
+ sec_name, i, err);
+ goto out;
+ }
+ }
+ }
+
+out:
+ btf__free(targ_btf);
+ if (!IS_ERR_OR_NULL(cand_cache)) {
+ hashmap__for_each_entry(cand_cache, entry, i) {
+ bpf_core_free_cands(entry->value);
+ }
+ hashmap__free(cand_cache);
+ }
+ return err;
+}
+
+static int
+bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
+{
+ int err = 0;
+
+ if (obj->btf_ext->field_reloc_info.len)
+ err = bpf_core_reloc_fields(obj, targ_btf_path);
+
+ return err;
+}
+
+static int
+bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
+ struct reloc_desc *relo)
+{
+ struct bpf_insn *insn, *new_insn;
+ struct bpf_program *text;
+ size_t new_cnt;
+ int err;
+
+ if (relo->type != RELO_CALL)
+ return -LIBBPF_ERRNO__RELOC;
+
+ if (prog->idx == obj->efile.text_shndx) {
+ pr_warn("relo in .text insn %d into off %d (insn #%d)\n",
+ relo->insn_idx, relo->sym_off, relo->sym_off / 8);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ if (prog->main_prog_cnt == 0) {
+ text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
+ if (!text) {
+ pr_warn("no .text section found yet relo into text exist\n");
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ new_cnt = prog->insns_cnt + text->insns_cnt;
+ new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn));
+ if (!new_insn) {
+ pr_warn("oom in prog realloc\n");
+ return -ENOMEM;
+ }
+ prog->insns = new_insn;
+
+ if (obj->btf_ext) {
+ err = bpf_program_reloc_btf_ext(prog, obj,
+ text->section_name,
+ prog->insns_cnt);
+ if (err)
+ return err;
+ }
+
+ memcpy(new_insn + prog->insns_cnt, text->insns,
+ text->insns_cnt * sizeof(*insn));
+ prog->main_prog_cnt = prog->insns_cnt;
+ prog->insns_cnt = new_cnt;
+ pr_debug("added %zd insn from %s to prog %s\n",
+ text->insns_cnt, text->section_name,
+ prog->section_name);
+ }
+ insn = &prog->insns[relo->insn_idx];
+ insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx;
+ return 0;
+}
+
+static int
+bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
+{
+ int i, err;
+
+ if (!prog)
+ return 0;
+
+ if (obj->btf_ext) {
+ err = bpf_program_reloc_btf_ext(prog, obj,
+ prog->section_name, 0);
+ if (err)
+ return err;
+ }
+
+ if (!prog->reloc_desc)
+ return 0;
+
+ for (i = 0; i < prog->nr_reloc; i++) {
+ struct reloc_desc *relo = &prog->reloc_desc[i];
+
+ if (relo->type == RELO_LD64 || relo->type == RELO_DATA) {
+ struct bpf_insn *insn = &prog->insns[relo->insn_idx];
+
+ if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
+ pr_warn("relocation out of range: '%s'\n",
+ prog->section_name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ if (relo->type != RELO_DATA) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_FD;
+ } else {
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[1].imm = insn[0].imm + relo->sym_off;
+ }
+ insn[0].imm = obj->maps[relo->map_idx].fd;
+ } else if (relo->type == RELO_CALL) {
+ err = bpf_program__reloc_text(prog, obj, relo);
+ if (err)
+ return err;
+ }
+ }
+
+ zfree(&prog->reloc_desc);
+ prog->nr_reloc = 0;
+ return 0;
+}
+
+static int
+bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
+{
+ struct bpf_program *prog;
+ size_t i;
+ int err;
+
+ if (obj->btf_ext) {
+ err = bpf_object__relocate_core(obj, targ_btf_path);
+ if (err) {
+ pr_warn("failed to perform CO-RE relocations: %d\n",
+ err);
+ return err;
+ }
+ }
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+
+ err = bpf_program__relocate(prog, obj);
+ if (err) {
+ pr_warn("failed to relocate '%s'\n", prog->section_name);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static int bpf_object__collect_reloc(struct bpf_object *obj)
+{
+ int i, err;
+
+ if (!obj_elf_valid(obj)) {
+ pr_warn("Internal error: elf object is closed\n");
+ return -LIBBPF_ERRNO__INTERNAL;
+ }
+
+ for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
+ GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
+ Elf_Data *data = obj->efile.reloc_sects[i].data;
+ int idx = shdr->sh_info;
+ struct bpf_program *prog;
+
+ if (shdr->sh_type != SHT_REL) {
+ pr_warn("internal error at %d\n", __LINE__);
+ return -LIBBPF_ERRNO__INTERNAL;
+ }
+
+ prog = bpf_object__find_prog_by_idx(obj, idx);
+ if (!prog) {
+ pr_warn("relocation failed: no section(%d)\n", idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ err = bpf_program__collect_reloc(prog, shdr, data, obj);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int
+load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
+ char *license, __u32 kern_version, int *pfd)
+{
+ struct bpf_load_program_attr load_attr;
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int log_buf_size = BPF_LOG_BUF_SIZE;
+ char *log_buf;
+ int btf_fd, ret;
+
+ if (!insns || !insns_cnt)
+ return -EINVAL;
+
+ memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+ load_attr.prog_type = prog->type;
+ load_attr.expected_attach_type = prog->expected_attach_type;
+ if (prog->caps->name)
+ load_attr.name = prog->name;
+ load_attr.insns = insns;
+ load_attr.insns_cnt = insns_cnt;
+ load_attr.license = license;
+ if (prog->type == BPF_PROG_TYPE_TRACING) {
+ load_attr.attach_prog_fd = prog->attach_prog_fd;
+ load_attr.attach_btf_id = prog->attach_btf_id;
+ } else {
+ load_attr.kern_version = kern_version;
+ load_attr.prog_ifindex = prog->prog_ifindex;
+ }
+ /* if .BTF.ext was loaded, kernel supports associated BTF for prog */
+ if (prog->obj->btf_ext)
+ btf_fd = bpf_object__btf_fd(prog->obj);
+ else
+ btf_fd = -1;
+ load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0;
+ load_attr.func_info = prog->func_info;
+ load_attr.func_info_rec_size = prog->func_info_rec_size;
+ load_attr.func_info_cnt = prog->func_info_cnt;
+ load_attr.line_info = prog->line_info;
+ load_attr.line_info_rec_size = prog->line_info_rec_size;
+ load_attr.line_info_cnt = prog->line_info_cnt;
+ load_attr.log_level = prog->log_level;
+ load_attr.prog_flags = prog->prog_flags;
+
+retry_load:
+ log_buf = malloc(log_buf_size);
+ if (!log_buf)
+ pr_warn("Alloc log buffer for bpf loader error, continue without log\n");
+
+ ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
+
+ if (ret >= 0) {
+ if (load_attr.log_level)
+ pr_debug("verifier log:\n%s", log_buf);
+ *pfd = ret;
+ ret = 0;
+ goto out;
+ }
+
+ if (errno == ENOSPC) {
+ log_buf_size <<= 1;
+ free(log_buf);
+ goto retry_load;
+ }
+ ret = -errno;
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("load bpf program failed: %s\n", cp);
+
+ if (log_buf && log_buf[0] != '\0') {
+ ret = -LIBBPF_ERRNO__VERIFY;
+ pr_warn("-- BEGIN DUMP LOG ---\n");
+ pr_warn("\n%s\n", log_buf);
+ pr_warn("-- END LOG --\n");
+ } else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
+ pr_warn("Program too large (%zu insns), at most %d insns\n",
+ load_attr.insns_cnt, BPF_MAXINSNS);
+ ret = -LIBBPF_ERRNO__PROG2BIG;
+ } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
+ /* Wrong program type? */
+ int fd;
+
+ load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
+ load_attr.expected_attach_type = 0;
+ fd = bpf_load_program_xattr(&load_attr, NULL, 0);
+ if (fd >= 0) {
+ close(fd);
+ ret = -LIBBPF_ERRNO__PROGTYPE;
+ goto out;
+ }
+ }
+
+out:
+ free(log_buf);
+ return ret;
+}
+
+int
+bpf_program__load(struct bpf_program *prog,
+ char *license, __u32 kern_version)
+{
+ int err = 0, fd, i;
+
+ if (prog->instances.nr < 0 || !prog->instances.fds) {
+ if (prog->preprocessor) {
+ pr_warn("Internal error: can't load program '%s'\n",
+ prog->section_name);
+ return -LIBBPF_ERRNO__INTERNAL;
+ }
+
+ prog->instances.fds = malloc(sizeof(int));
+ if (!prog->instances.fds) {
+ pr_warn("Not enough memory for BPF fds\n");
+ return -ENOMEM;
+ }
+ prog->instances.nr = 1;
+ prog->instances.fds[0] = -1;
+ }
+
+ if (!prog->preprocessor) {
+ if (prog->instances.nr != 1) {
+ pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n",
+ prog->section_name, prog->instances.nr);
+ }
+ err = load_program(prog, prog->insns, prog->insns_cnt,
+ license, kern_version, &fd);
+ if (!err)
+ prog->instances.fds[0] = fd;
+ goto out;
+ }
+
+ for (i = 0; i < prog->instances.nr; i++) {
+ struct bpf_prog_prep_result result;
+ bpf_program_prep_t preprocessor = prog->preprocessor;
+
+ memset(&result, 0, sizeof(result));
+ err = preprocessor(prog, i, prog->insns,
+ prog->insns_cnt, &result);
+ if (err) {
+ pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
+ i, prog->section_name);
+ goto out;
+ }
+
+ if (!result.new_insn_ptr || !result.new_insn_cnt) {
+ pr_debug("Skip loading the %dth instance of program '%s'\n",
+ i, prog->section_name);
+ prog->instances.fds[i] = -1;
+ if (result.pfd)
+ *result.pfd = -1;
+ continue;
+ }
+
+ err = load_program(prog, result.new_insn_ptr,
+ result.new_insn_cnt,
+ license, kern_version, &fd);
+
+ if (err) {
+ pr_warn("Loading the %dth instance of program '%s' failed\n",
+ i, prog->section_name);
+ goto out;
+ }
+
+ if (result.pfd)
+ *result.pfd = fd;
+ prog->instances.fds[i] = fd;
+ }
+out:
+ if (err)
+ pr_warn("failed to load program '%s'\n", prog->section_name);
+ zfree(&prog->insns);
+ prog->insns_cnt = 0;
+ return err;
+}
+
+static bool bpf_program__is_function_storage(const struct bpf_program *prog,
+ const struct bpf_object *obj)
+{
+ return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
+}
+
+static int
+bpf_object__load_progs(struct bpf_object *obj, int log_level)
+{
+ size_t i;
+ int err;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ if (bpf_program__is_function_storage(&obj->programs[i], obj))
+ continue;
+ obj->programs[i].log_level |= log_level;
+ err = bpf_program__load(&obj->programs[i],
+ obj->license,
+ obj->kern_version);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int libbpf_find_attach_btf_id(const char *name,
+ enum bpf_attach_type attach_type,
+ __u32 attach_prog_fd);
+static struct bpf_object *
+__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
+ struct bpf_object_open_opts *opts)
+{
+ const char *pin_root_path;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ const char *obj_name;
+ char tmp_name[64];
+ bool relaxed_maps;
+ __u32 attach_prog_fd;
+ int err;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ pr_warn("failed to init libelf for %s\n",
+ path ? : "(mem buf)");
+ return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
+ }
+
+ if (!OPTS_VALID(opts, bpf_object_open_opts))
+ return ERR_PTR(-EINVAL);
+
+ obj_name = OPTS_GET(opts, object_name, NULL);
+ if (obj_buf) {
+ if (!obj_name) {
+ snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
+ (unsigned long)obj_buf,
+ (unsigned long)obj_buf_sz);
+ obj_name = tmp_name;
+ }
+ path = obj_name;
+ pr_debug("loading object '%s' from buffer\n", obj_name);
+ }
+
+ obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
+ if (IS_ERR(obj))
+ return obj;
+
+ obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false);
+ relaxed_maps = OPTS_GET(opts, relaxed_maps, false);
+ pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
+ attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
+
+ CHECK_ERR(bpf_object__elf_init(obj), err, out);
+ CHECK_ERR(bpf_object__check_endianness(obj), err, out);
+ CHECK_ERR(bpf_object__probe_caps(obj), err, out);
+ CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path),
+ err, out);
+ CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
+ bpf_object__elf_finish(obj);
+
+ bpf_object__for_each_program(prog, obj) {
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type attach_type;
+
+ err = libbpf_prog_type_by_name(prog->section_name, &prog_type,
+ &attach_type);
+ if (err == -ESRCH)
+ /* couldn't guess, but user might manually specify */
+ continue;
+ if (err)
+ goto out;
+
+ bpf_program__set_type(prog, prog_type);
+ bpf_program__set_expected_attach_type(prog, attach_type);
+ if (prog_type == BPF_PROG_TYPE_TRACING) {
+ err = libbpf_find_attach_btf_id(prog->section_name,
+ attach_type,
+ attach_prog_fd);
+ if (err <= 0)
+ goto out;
+ prog->attach_btf_id = err;
+ prog->attach_prog_fd = attach_prog_fd;
+ }
+ }
+
+ return obj;
+out:
+ bpf_object__close(obj);
+ return ERR_PTR(err);
+}
+
+static struct bpf_object *
+__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
+{
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .relaxed_maps = flags & MAPS_RELAX_COMPAT,
+ );
+
+ /* param validation */
+ if (!attr->file)
+ return NULL;
+
+ pr_debug("loading %s\n", attr->file);
+ return __bpf_object__open(attr->file, NULL, 0, &opts);
+}
+
+struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
+{
+ return __bpf_object__open_xattr(attr, 0);
+}
+
+struct bpf_object *bpf_object__open(const char *path)
+{
+ struct bpf_object_open_attr attr = {
+ .file = path,
+ .prog_type = BPF_PROG_TYPE_UNSPEC,
+ };
+
+ return bpf_object__open_xattr(&attr);
+}
+
+struct bpf_object *
+bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts)
+{
+ if (!path)
+ return ERR_PTR(-EINVAL);
+
+ pr_debug("loading %s\n", path);
+
+ return __bpf_object__open(path, NULL, 0, opts);
+}
+
+struct bpf_object *
+bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
+ struct bpf_object_open_opts *opts)
+{
+ if (!obj_buf || obj_buf_sz == 0)
+ return ERR_PTR(-EINVAL);
+
+ return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
+}
+
+struct bpf_object *
+bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
+ const char *name)
+{
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .object_name = name,
+ /* wrong default, but backwards-compatible */
+ .relaxed_maps = true,
+ );
+
+ /* returning NULL is wrong, but backwards-compatible */
+ if (!obj_buf || obj_buf_sz == 0)
+ return NULL;
+
+ return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
+}
+
+int bpf_object__unload(struct bpf_object *obj)
+{
+ size_t i;
+
+ if (!obj)
+ return -EINVAL;
+
+ for (i = 0; i < obj->nr_maps; i++)
+ zclose(obj->maps[i].fd);
+
+ for (i = 0; i < obj->nr_programs; i++)
+ bpf_program__unload(&obj->programs[i]);
+
+ return 0;
+}
+
+int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
+{
+ struct bpf_object *obj;
+ int err, i;
+
+ if (!attr)
+ return -EINVAL;
+ obj = attr->obj;
+ if (!obj)
+ return -EINVAL;
+
+ if (obj->loaded) {
+ pr_warn("object should not be loaded twice\n");
+ return -EINVAL;
+ }
+
+ obj->loaded = true;
+
+ CHECK_ERR(bpf_object__create_maps(obj), err, out);
+ CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out);
+ CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
+
+ return 0;
+out:
+ /* unpin any maps that were auto-pinned during load */
+ for (i = 0; i < obj->nr_maps; i++)
+ if (obj->maps[i].pinned && !obj->maps[i].reused)
+ bpf_map__unpin(&obj->maps[i], NULL);
+
+ bpf_object__unload(obj);
+ pr_warn("failed to load object '%s'\n", obj->path);
+ return err;
+}
+
+int bpf_object__load(struct bpf_object *obj)
+{
+ struct bpf_object_load_attr attr = {
+ .obj = obj,
+ };
+
+ return bpf_object__load_xattr(&attr);
+}
+
+static int make_parent_dir(const char *path)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ char *dname, *dir;
+ int err = 0;
+
+ dname = strdup(path);
+ if (dname == NULL)
+ return -ENOMEM;
+
+ dir = dirname(dname);
+ if (mkdir(dir, 0700) && errno != EEXIST)
+ err = -errno;
+
+ free(dname);
+ if (err) {
+ cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
+ pr_warn("failed to mkdir %s: %s\n", path, cp);
+ }
+ return err;
+}
+
+static int check_path(const char *path)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct statfs st_fs;
+ char *dname, *dir;
+ int err = 0;
+
+ if (path == NULL)
+ return -EINVAL;
+
+ dname = strdup(path);
+ if (dname == NULL)
+ return -ENOMEM;
+
+ dir = dirname(dname);
+ if (statfs(dir, &st_fs)) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("failed to statfs %s: %s\n", dir, cp);
+ err = -errno;
+ }
+ free(dname);
+
+ if (!err && st_fs.f_type != BPF_FS_MAGIC) {
+ pr_warn("specified path %s is not on BPF FS\n", path);
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
+ int instance)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int err;
+
+ err = make_parent_dir(path);
+ if (err)
+ return err;
+
+ err = check_path(path);
+ if (err)
+ return err;
+
+ if (prog == NULL) {
+ pr_warn("invalid program pointer\n");
+ return -EINVAL;
+ }
+
+ if (instance < 0 || instance >= prog->instances.nr) {
+ pr_warn("invalid prog instance %d of prog %s (max %d)\n",
+ instance, prog->section_name, prog->instances.nr);
+ return -EINVAL;
+ }
+
+ if (bpf_obj_pin(prog->instances.fds[instance], path)) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("failed to pin program: %s\n", cp);
+ return -errno;
+ }
+ pr_debug("pinned program '%s'\n", path);
+
+ return 0;
+}
+
+int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
+ int instance)
+{
+ int err;
+
+ err = check_path(path);
+ if (err)
+ return err;
+
+ if (prog == NULL) {
+ pr_warn("invalid program pointer\n");
+ return -EINVAL;
+ }
+
+ if (instance < 0 || instance >= prog->instances.nr) {
+ pr_warn("invalid prog instance %d of prog %s (max %d)\n",
+ instance, prog->section_name, prog->instances.nr);
+ return -EINVAL;
+ }
+
+ err = unlink(path);
+ if (err != 0)
+ return -errno;
+ pr_debug("unpinned program '%s'\n", path);
+
+ return 0;
+}
+
+int bpf_program__pin(struct bpf_program *prog, const char *path)
+{
+ int i, err;
+
+ err = make_parent_dir(path);
+ if (err)
+ return err;
+
+ err = check_path(path);
+ if (err)
+ return err;
+
+ if (prog == NULL) {
+ pr_warn("invalid program pointer\n");
+ return -EINVAL;
+ }
+
+ if (prog->instances.nr <= 0) {
+ pr_warn("no instances of prog %s to pin\n",
+ prog->section_name);
+ return -EINVAL;
+ }
+
+ if (prog->instances.nr == 1) {
+ /* don't create subdirs when pinning single instance */
+ return bpf_program__pin_instance(prog, path, 0);
+ }
+
+ for (i = 0; i < prog->instances.nr; i++) {
+ char buf[PATH_MAX];
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
+ if (len < 0) {
+ err = -EINVAL;
+ goto err_unpin;
+ } else if (len >= PATH_MAX) {
+ err = -ENAMETOOLONG;
+ goto err_unpin;
+ }
+
+ err = bpf_program__pin_instance(prog, buf, i);
+ if (err)
+ goto err_unpin;
+ }
+
+ return 0;
+
+err_unpin:
+ for (i = i - 1; i >= 0; i--) {
+ char buf[PATH_MAX];
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
+ if (len < 0)
+ continue;
+ else if (len >= PATH_MAX)
+ continue;
+
+ bpf_program__unpin_instance(prog, buf, i);
+ }
+
+ rmdir(path);
+
+ return err;
+}
+
+int bpf_program__unpin(struct bpf_program *prog, const char *path)
+{
+ int i, err;
+
+ err = check_path(path);
+ if (err)
+ return err;
+
+ if (prog == NULL) {
+ pr_warn("invalid program pointer\n");
+ return -EINVAL;
+ }
+
+ if (prog->instances.nr <= 0) {
+ pr_warn("no instances of prog %s to pin\n",
+ prog->section_name);
+ return -EINVAL;
+ }
+
+ if (prog->instances.nr == 1) {
+ /* don't create subdirs when pinning single instance */
+ return bpf_program__unpin_instance(prog, path, 0);
+ }
+
+ for (i = 0; i < prog->instances.nr; i++) {
+ char buf[PATH_MAX];
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
+ if (len < 0)
+ return -EINVAL;
+ else if (len >= PATH_MAX)
+ return -ENAMETOOLONG;
+
+ err = bpf_program__unpin_instance(prog, buf, i);
+ if (err)
+ return err;
+ }
+
+ err = rmdir(path);
+ if (err)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_map__pin(struct bpf_map *map, const char *path)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int err;
+
+ if (map == NULL) {
+ pr_warn("invalid map pointer\n");
+ return -EINVAL;
+ }
+
+ if (map->pin_path) {
+ if (path && strcmp(path, map->pin_path)) {
+ pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
+ bpf_map__name(map), map->pin_path, path);
+ return -EINVAL;
+ } else if (map->pinned) {
+ pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
+ bpf_map__name(map), map->pin_path);
+ return 0;
+ }
+ } else {
+ if (!path) {
+ pr_warn("missing a path to pin map '%s' at\n",
+ bpf_map__name(map));
+ return -EINVAL;
+ } else if (map->pinned) {
+ pr_warn("map '%s' already pinned\n", bpf_map__name(map));
+ return -EEXIST;
+ }
+
+ map->pin_path = strdup(path);
+ if (!map->pin_path) {
+ err = -errno;
+ goto out_err;
+ }
+ }
+
+ err = make_parent_dir(map->pin_path);
+ if (err)
+ return err;
+
+ err = check_path(map->pin_path);
+ if (err)
+ return err;
+
+ if (bpf_obj_pin(map->fd, map->pin_path)) {
+ err = -errno;
+ goto out_err;
+ }
+
+ map->pinned = true;
+ pr_debug("pinned map '%s'\n", map->pin_path);
+
+ return 0;
+
+out_err:
+ cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
+ pr_warn("failed to pin map: %s\n", cp);
+ return err;
+}
+
+int bpf_map__unpin(struct bpf_map *map, const char *path)
+{
+ int err;
+
+ if (map == NULL) {
+ pr_warn("invalid map pointer\n");
+ return -EINVAL;
+ }
+
+ if (map->pin_path) {
+ if (path && strcmp(path, map->pin_path)) {
+ pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
+ bpf_map__name(map), map->pin_path, path);
+ return -EINVAL;
+ }
+ path = map->pin_path;
+ } else if (!path) {
+ pr_warn("no path to unpin map '%s' from\n",
+ bpf_map__name(map));
+ return -EINVAL;
+ }
+
+ err = check_path(path);
+ if (err)
+ return err;
+
+ err = unlink(path);
+ if (err != 0)
+ return -errno;
+
+ map->pinned = false;
+ pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
+
+ return 0;
+}
+
+int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
+{
+ char *new = NULL;
+
+ if (path) {
+ new = strdup(path);
+ if (!new)
+ return -errno;
+ }
+
+ free(map->pin_path);
+ map->pin_path = new;
+ return 0;
+}
+
+const char *bpf_map__get_pin_path(const struct bpf_map *map)
+{
+ return map->pin_path;
+}
+
+bool bpf_map__is_pinned(const struct bpf_map *map)
+{
+ return map->pinned;
+}
+
+int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
+{
+ struct bpf_map *map;
+ int err;
+
+ if (!obj)
+ return -ENOENT;
+
+ if (!obj->loaded) {
+ pr_warn("object not yet loaded; load it first\n");
+ return -ENOENT;
+ }
+
+ bpf_object__for_each_map(map, obj) {
+ char *pin_path = NULL;
+ char buf[PATH_MAX];
+
+ if (path) {
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
+ bpf_map__name(map));
+ if (len < 0) {
+ err = -EINVAL;
+ goto err_unpin_maps;
+ } else if (len >= PATH_MAX) {
+ err = -ENAMETOOLONG;
+ goto err_unpin_maps;
+ }
+ pin_path = buf;
+ } else if (!map->pin_path) {
+ continue;
+ }
+
+ err = bpf_map__pin(map, pin_path);
+ if (err)
+ goto err_unpin_maps;
+ }
+
+ return 0;
+
+err_unpin_maps:
+ while ((map = bpf_map__prev(map, obj))) {
+ if (!map->pin_path)
+ continue;
+
+ bpf_map__unpin(map, NULL);
+ }
+
+ return err;
+}
+
+int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
+{
+ struct bpf_map *map;
+ int err;
+
+ if (!obj)
+ return -ENOENT;
+
+ bpf_object__for_each_map(map, obj) {
+ char *pin_path = NULL;
+ char buf[PATH_MAX];
+
+ if (path) {
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
+ bpf_map__name(map));
+ if (len < 0)
+ return -EINVAL;
+ else if (len >= PATH_MAX)
+ return -ENAMETOOLONG;
+ pin_path = buf;
+ } else if (!map->pin_path) {
+ continue;
+ }
+
+ err = bpf_map__unpin(map, pin_path);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
+{
+ struct bpf_program *prog;
+ int err;
+
+ if (!obj)
+ return -ENOENT;
+
+ if (!obj->loaded) {
+ pr_warn("object not yet loaded; load it first\n");
+ return -ENOENT;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ char buf[PATH_MAX];
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
+ prog->pin_name);
+ if (len < 0) {
+ err = -EINVAL;
+ goto err_unpin_programs;
+ } else if (len >= PATH_MAX) {
+ err = -ENAMETOOLONG;
+ goto err_unpin_programs;
+ }
+
+ err = bpf_program__pin(prog, buf);
+ if (err)
+ goto err_unpin_programs;
+ }
+
+ return 0;
+
+err_unpin_programs:
+ while ((prog = bpf_program__prev(prog, obj))) {
+ char buf[PATH_MAX];
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
+ prog->pin_name);
+ if (len < 0)
+ continue;
+ else if (len >= PATH_MAX)
+ continue;
+
+ bpf_program__unpin(prog, buf);
+ }
+
+ return err;
+}
+
+int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
+{
+ struct bpf_program *prog;
+ int err;
+
+ if (!obj)
+ return -ENOENT;
+
+ bpf_object__for_each_program(prog, obj) {
+ char buf[PATH_MAX];
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
+ prog->pin_name);
+ if (len < 0)
+ return -EINVAL;
+ else if (len >= PATH_MAX)
+ return -ENAMETOOLONG;
+
+ err = bpf_program__unpin(prog, buf);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int bpf_object__pin(struct bpf_object *obj, const char *path)
+{
+ int err;
+
+ err = bpf_object__pin_maps(obj, path);
+ if (err)
+ return err;
+
+ err = bpf_object__pin_programs(obj, path);
+ if (err) {
+ bpf_object__unpin_maps(obj, path);
+ return err;
+ }
+
+ return 0;
+}
+
+void bpf_object__close(struct bpf_object *obj)
+{
+ size_t i;
+
+ if (!obj)
+ return;
+
+ if (obj->clear_priv)
+ obj->clear_priv(obj, obj->priv);
+
+ bpf_object__elf_finish(obj);
+ bpf_object__unload(obj);
+ btf__free(obj->btf);
+ btf_ext__free(obj->btf_ext);
+
+ for (i = 0; i < obj->nr_maps; i++) {
+ zfree(&obj->maps[i].name);
+ zfree(&obj->maps[i].pin_path);
+ if (obj->maps[i].clear_priv)
+ obj->maps[i].clear_priv(&obj->maps[i],
+ obj->maps[i].priv);
+ obj->maps[i].priv = NULL;
+ obj->maps[i].clear_priv = NULL;
+ }
+
+ zfree(&obj->sections.rodata);
+ zfree(&obj->sections.data);
+ zfree(&obj->maps);
+ obj->nr_maps = 0;
+
+ if (obj->programs && obj->nr_programs) {
+ for (i = 0; i < obj->nr_programs; i++)
+ bpf_program__exit(&obj->programs[i]);
+ }
+ zfree(&obj->programs);
+
+ list_del(&obj->list);
+ free(obj);
+}
+
+struct bpf_object *
+bpf_object__next(struct bpf_object *prev)
+{
+ struct bpf_object *next;
+
+ if (!prev)
+ next = list_first_entry(&bpf_objects_list,
+ struct bpf_object,
+ list);
+ else
+ next = list_next_entry(prev, list);
+
+ /* Empty list is noticed here so don't need checking on entry. */
+ if (&next->list == &bpf_objects_list)
+ return NULL;
+
+ return next;
+}
+
+const char *bpf_object__name(const struct bpf_object *obj)
+{
+ return obj ? obj->name : ERR_PTR(-EINVAL);
+}
+
+unsigned int bpf_object__kversion(const struct bpf_object *obj)
+{
+ return obj ? obj->kern_version : 0;
+}
+
+struct btf *bpf_object__btf(const struct bpf_object *obj)
+{
+ return obj ? obj->btf : NULL;
+}
+
+int bpf_object__btf_fd(const struct bpf_object *obj)
+{
+ return obj->btf ? btf__fd(obj->btf) : -1;
+}
+
+int bpf_object__set_priv(struct bpf_object *obj, void *priv,
+ bpf_object_clear_priv_t clear_priv)
+{
+ if (obj->priv && obj->clear_priv)
+ obj->clear_priv(obj, obj->priv);
+
+ obj->priv = priv;
+ obj->clear_priv = clear_priv;
+ return 0;
+}
+
+void *bpf_object__priv(const struct bpf_object *obj)
+{
+ return obj ? obj->priv : ERR_PTR(-EINVAL);
+}
+
+static struct bpf_program *
+__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
+ bool forward)
+{
+ size_t nr_programs = obj->nr_programs;
+ ssize_t idx;
+
+ if (!nr_programs)
+ return NULL;
+
+ if (!p)
+ /* Iter from the beginning */
+ return forward ? &obj->programs[0] :
+ &obj->programs[nr_programs - 1];
+
+ if (p->obj != obj) {
+ pr_warn("error: program handler doesn't match object\n");
+ return NULL;
+ }
+
+ idx = (p - obj->programs) + (forward ? 1 : -1);
+ if (idx >= obj->nr_programs || idx < 0)
+ return NULL;
+ return &obj->programs[idx];
+}
+
+struct bpf_program *
+bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
+{
+ struct bpf_program *prog = prev;
+
+ do {
+ prog = __bpf_program__iter(prog, obj, true);
+ } while (prog && bpf_program__is_function_storage(prog, obj));
+
+ return prog;
+}
+
+struct bpf_program *
+bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
+{
+ struct bpf_program *prog = next;
+
+ do {
+ prog = __bpf_program__iter(prog, obj, false);
+ } while (prog && bpf_program__is_function_storage(prog, obj));
+
+ return prog;
+}
+
+int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+ bpf_program_clear_priv_t clear_priv)
+{
+ if (prog->priv && prog->clear_priv)
+ prog->clear_priv(prog, prog->priv);
+
+ prog->priv = priv;
+ prog->clear_priv = clear_priv;
+ return 0;
+}
+
+void *bpf_program__priv(const struct bpf_program *prog)
+{
+ return prog ? prog->priv : ERR_PTR(-EINVAL);
+}
+
+void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
+{
+ prog->prog_ifindex = ifindex;
+}
+
+const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
+{
+ const char *title;
+
+ title = prog->section_name;
+ if (needs_copy) {
+ title = strdup(title);
+ if (!title) {
+ pr_warn("failed to strdup program title\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+
+ return title;
+}
+
+int bpf_program__fd(const struct bpf_program *prog)
+{
+ return bpf_program__nth_fd(prog, 0);
+}
+
+size_t bpf_program__size(const struct bpf_program *prog)
+{
+ return prog->insns_cnt * sizeof(struct bpf_insn);
+}
+
+int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
+ bpf_program_prep_t prep)
+{
+ int *instances_fds;
+
+ if (nr_instances <= 0 || !prep)
+ return -EINVAL;
+
+ if (prog->instances.nr > 0 || prog->instances.fds) {
+ pr_warn("Can't set pre-processor after loading\n");
+ return -EINVAL;
+ }
+
+ instances_fds = malloc(sizeof(int) * nr_instances);
+ if (!instances_fds) {
+ pr_warn("alloc memory failed for fds\n");
+ return -ENOMEM;
+ }
+
+ /* fill all fd with -1 */
+ memset(instances_fds, -1, sizeof(int) * nr_instances);
+
+ prog->instances.nr = nr_instances;
+ prog->instances.fds = instances_fds;
+ prog->preprocessor = prep;
+ return 0;
+}
+
+int bpf_program__nth_fd(const struct bpf_program *prog, int n)
+{
+ int fd;
+
+ if (!prog)
+ return -EINVAL;
+
+ if (n >= prog->instances.nr || n < 0) {
+ pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
+ n, prog->section_name, prog->instances.nr);
+ return -EINVAL;
+ }
+
+ fd = prog->instances.fds[n];
+ if (fd < 0) {
+ pr_warn("%dth instance of program '%s' is invalid\n",
+ n, prog->section_name);
+ return -ENOENT;
+ }
+
+ return fd;
+}
+
+enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
+{
+ return prog->type;
+}
+
+void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
+{
+ prog->type = type;
+}
+
+static bool bpf_program__is_type(const struct bpf_program *prog,
+ enum bpf_prog_type type)
+{
+ return prog ? (prog->type == type) : false;
+}
+
+#define BPF_PROG_TYPE_FNS(NAME, TYPE) \
+int bpf_program__set_##NAME(struct bpf_program *prog) \
+{ \
+ if (!prog) \
+ return -EINVAL; \
+ bpf_program__set_type(prog, TYPE); \
+ return 0; \
+} \
+ \
+bool bpf_program__is_##NAME(const struct bpf_program *prog) \
+{ \
+ return bpf_program__is_type(prog, TYPE); \
+} \
+
+BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
+BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
+BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
+BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
+BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
+BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
+BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
+BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
+BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
+
+enum bpf_attach_type
+bpf_program__get_expected_attach_type(struct bpf_program *prog)
+{
+ return prog->expected_attach_type;
+}
+
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+ enum bpf_attach_type type)
+{
+ prog->expected_attach_type = type;
+}
+
+#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, btf, atype) \
+ { string, sizeof(string) - 1, ptype, eatype, is_attachable, btf, atype }
+
+/* Programs that can NOT be attached. */
+#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
+
+/* Programs that can be attached. */
+#define BPF_APROG_SEC(string, ptype, atype) \
+ BPF_PROG_SEC_IMPL(string, ptype, 0, 1, 0, atype)
+
+/* Programs that must specify expected attach type at load time. */
+#define BPF_EAPROG_SEC(string, ptype, eatype) \
+ BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype)
+
+/* Programs that use BTF to identify attach point */
+#define BPF_PROG_BTF(string, ptype, eatype) \
+ BPF_PROG_SEC_IMPL(string, ptype, eatype, 0, 1, 0)
+
+/* Programs that can be attached but attach type can't be identified by section
+ * name. Kept for backward compatibility.
+ */
+#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
+
+static const struct {
+ const char *sec;
+ size_t len;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ bool is_attachable;
+ bool is_attach_btf;
+ enum bpf_attach_type attach_type;
+} section_names[] = {
+ BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
+ BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE),
+ BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE),
+ BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE),
+ BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE),
+ BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS),
+ BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT),
+ BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT),
+ BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT),
+ BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT),
+ BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT),
+ BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING,
+ BPF_TRACE_RAW_TP),
+ BPF_PROG_BTF("fentry/", BPF_PROG_TYPE_TRACING,
+ BPF_TRACE_FENTRY),
+ BPF_PROG_BTF("fexit/", BPF_PROG_TYPE_TRACING,
+ BPF_TRACE_FEXIT),
+ BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
+ BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
+ BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
+ BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT),
+ BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT),
+ BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL),
+ BPF_APROG_SEC("cgroup_skb/ingress", BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_CGROUP_INET_INGRESS),
+ BPF_APROG_SEC("cgroup_skb/egress", BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_CGROUP_INET_EGRESS),
+ BPF_APROG_COMPAT("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB),
+ BPF_APROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_CGROUP_INET_SOCK_CREATE),
+ BPF_EAPROG_SEC("cgroup/post_bind4", BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_CGROUP_INET4_POST_BIND),
+ BPF_EAPROG_SEC("cgroup/post_bind6", BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_CGROUP_INET6_POST_BIND),
+ BPF_APROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE,
+ BPF_CGROUP_DEVICE),
+ BPF_APROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS,
+ BPF_CGROUP_SOCK_OPS),
+ BPF_APROG_SEC("sk_skb/stream_parser", BPF_PROG_TYPE_SK_SKB,
+ BPF_SK_SKB_STREAM_PARSER),
+ BPF_APROG_SEC("sk_skb/stream_verdict", BPF_PROG_TYPE_SK_SKB,
+ BPF_SK_SKB_STREAM_VERDICT),
+ BPF_APROG_COMPAT("sk_skb", BPF_PROG_TYPE_SK_SKB),
+ BPF_APROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG,
+ BPF_SK_MSG_VERDICT),
+ BPF_APROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2,
+ BPF_LIRC_MODE2),
+ BPF_APROG_SEC("flow_dissector", BPF_PROG_TYPE_FLOW_DISSECTOR,
+ BPF_FLOW_DISSECTOR),
+ BPF_EAPROG_SEC("cgroup/bind4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET4_BIND),
+ BPF_EAPROG_SEC("cgroup/bind6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET6_BIND),
+ BPF_EAPROG_SEC("cgroup/connect4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET4_CONNECT),
+ BPF_EAPROG_SEC("cgroup/connect6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET6_CONNECT),
+ BPF_EAPROG_SEC("cgroup/sendmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_UDP4_SENDMSG),
+ BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_UDP6_SENDMSG),
+ BPF_EAPROG_SEC("cgroup/recvmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_UDP4_RECVMSG),
+ BPF_EAPROG_SEC("cgroup/recvmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_UDP6_RECVMSG),
+ BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL,
+ BPF_CGROUP_SYSCTL),
+ BPF_EAPROG_SEC("cgroup/getsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ BPF_CGROUP_GETSOCKOPT),
+ BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ BPF_CGROUP_SETSOCKOPT),
+};
+
+#undef BPF_PROG_SEC_IMPL
+#undef BPF_PROG_SEC
+#undef BPF_APROG_SEC
+#undef BPF_EAPROG_SEC
+#undef BPF_APROG_COMPAT
+
+#define MAX_TYPE_NAME_SIZE 32
+
+static char *libbpf_get_type_names(bool attach_type)
+{
+ int i, len = ARRAY_SIZE(section_names) * MAX_TYPE_NAME_SIZE;
+ char *buf;
+
+ buf = malloc(len);
+ if (!buf)
+ return NULL;
+
+ buf[0] = '\0';
+ /* Forge string buf with all available names */
+ for (i = 0; i < ARRAY_SIZE(section_names); i++) {
+ if (attach_type && !section_names[i].is_attachable)
+ continue;
+
+ if (strlen(buf) + strlen(section_names[i].sec) + 2 > len) {
+ free(buf);
+ return NULL;
+ }
+ strcat(buf, " ");
+ strcat(buf, section_names[i].sec);
+ }
+
+ return buf;
+}
+
+int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+ enum bpf_attach_type *expected_attach_type)
+{
+ char *type_names;
+ int i;
+
+ if (!name)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(section_names); i++) {
+ if (strncmp(name, section_names[i].sec, section_names[i].len))
+ continue;
+ *prog_type = section_names[i].prog_type;
+ *expected_attach_type = section_names[i].expected_attach_type;
+ return 0;
+ }
+ pr_warn("failed to guess program type from ELF section '%s'\n", name);
+ type_names = libbpf_get_type_names(false);
+ if (type_names != NULL) {
+ pr_info("supported section(type) names are:%s\n", type_names);
+ free(type_names);
+ }
+
+ return -ESRCH;
+}
+
+#define BTF_PREFIX "btf_trace_"
+int libbpf_find_vmlinux_btf_id(const char *name,
+ enum bpf_attach_type attach_type)
+{
+ struct btf *btf = bpf_core_find_kernel_btf();
+ char raw_tp_btf[128] = BTF_PREFIX;
+ char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1;
+ const char *btf_name;
+ int err = -EINVAL;
+ __u32 kind;
+
+ if (IS_ERR(btf)) {
+ pr_warn("vmlinux BTF is not found\n");
+ return -EINVAL;
+ }
+
+ if (attach_type == BPF_TRACE_RAW_TP) {
+ /* prepend "btf_trace_" prefix per kernel convention */
+ strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX));
+ btf_name = raw_tp_btf;
+ kind = BTF_KIND_TYPEDEF;
+ } else {
+ btf_name = name;
+ kind = BTF_KIND_FUNC;
+ }
+ err = btf__find_by_name_kind(btf, btf_name, kind);
+ btf__free(btf);
+ return err;
+}
+
+static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
+{
+ struct bpf_prog_info_linear *info_linear;
+ struct bpf_prog_info *info;
+ struct btf *btf = NULL;
+ int err = -EINVAL;
+
+ info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
+ if (IS_ERR_OR_NULL(info_linear)) {
+ pr_warn("failed get_prog_info_linear for FD %d\n",
+ attach_prog_fd);
+ return -EINVAL;
+ }
+ info = &info_linear->info;
+ if (!info->btf_id) {
+ pr_warn("The target program doesn't have BTF\n");
+ goto out;
+ }
+ if (btf__get_from_id(info->btf_id, &btf)) {
+ pr_warn("Failed to get BTF of the program\n");
+ goto out;
+ }
+ err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
+ btf__free(btf);
+ if (err <= 0) {
+ pr_warn("%s is not found in prog's BTF\n", name);
+ goto out;
+ }
+out:
+ free(info_linear);
+ return err;
+}
+
+static int libbpf_find_attach_btf_id(const char *name,
+ enum bpf_attach_type attach_type,
+ __u32 attach_prog_fd)
+{
+ int i, err;
+
+ if (!name)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(section_names); i++) {
+ if (!section_names[i].is_attach_btf)
+ continue;
+ if (strncmp(name, section_names[i].sec, section_names[i].len))
+ continue;
+ if (attach_prog_fd)
+ err = libbpf_find_prog_btf_id(name + section_names[i].len,
+ attach_prog_fd);
+ else
+ err = libbpf_find_vmlinux_btf_id(name + section_names[i].len,
+ attach_type);
+ if (err <= 0)
+ pr_warn("%s is not found in vmlinux BTF\n", name);
+ return err;
+ }
+ pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
+ return -ESRCH;
+}
+
+int libbpf_attach_type_by_name(const char *name,
+ enum bpf_attach_type *attach_type)
+{
+ char *type_names;
+ int i;
+
+ if (!name)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(section_names); i++) {
+ if (strncmp(name, section_names[i].sec, section_names[i].len))
+ continue;
+ if (!section_names[i].is_attachable)
+ return -EINVAL;
+ *attach_type = section_names[i].attach_type;
+ return 0;
+ }
+ pr_warn("failed to guess attach type based on ELF section name '%s'\n", name);
+ type_names = libbpf_get_type_names(true);
+ if (type_names != NULL) {
+ pr_info("attachable section(type) names are:%s\n", type_names);
+ free(type_names);
+ }
+
+ return -EINVAL;
+}
+
+int bpf_map__fd(const struct bpf_map *map)
+{
+ return map ? map->fd : -EINVAL;
+}
+
+const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
+{
+ return map ? &map->def : ERR_PTR(-EINVAL);
+}
+
+const char *bpf_map__name(const struct bpf_map *map)
+{
+ return map ? map->name : NULL;
+}
+
+__u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
+{
+ return map ? map->btf_key_type_id : 0;
+}
+
+__u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
+{
+ return map ? map->btf_value_type_id : 0;
+}
+
+int bpf_map__set_priv(struct bpf_map *map, void *priv,
+ bpf_map_clear_priv_t clear_priv)
+{
+ if (!map)
+ return -EINVAL;
+
+ if (map->priv) {
+ if (map->clear_priv)
+ map->clear_priv(map, map->priv);
+ }
+
+ map->priv = priv;
+ map->clear_priv = clear_priv;
+ return 0;
+}
+
+void *bpf_map__priv(const struct bpf_map *map)
+{
+ return map ? map->priv : ERR_PTR(-EINVAL);
+}
+
+bool bpf_map__is_offload_neutral(const struct bpf_map *map)
+{
+ return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
+}
+
+bool bpf_map__is_internal(const struct bpf_map *map)
+{
+ return map->libbpf_type != LIBBPF_MAP_UNSPEC;
+}
+
+void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
+{
+ map->map_ifindex = ifindex;
+}
+
+int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
+{
+ if (!bpf_map_type__is_map_in_map(map->def.type)) {
+ pr_warn("error: unsupported map type\n");
+ return -EINVAL;
+ }
+ if (map->inner_map_fd != -1) {
+ pr_warn("error: inner_map_fd already specified\n");
+ return -EINVAL;
+ }
+ map->inner_map_fd = fd;
+ return 0;
+}
+
+static struct bpf_map *
+__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
+{
+ ssize_t idx;
+ struct bpf_map *s, *e;
+
+ if (!obj || !obj->maps)
+ return NULL;
+
+ s = obj->maps;
+ e = obj->maps + obj->nr_maps;
+
+ if ((m < s) || (m >= e)) {
+ pr_warn("error in %s: map handler doesn't belong to object\n",
+ __func__);
+ return NULL;
+ }
+
+ idx = (m - obj->maps) + i;
+ if (idx >= obj->nr_maps || idx < 0)
+ return NULL;
+ return &obj->maps[idx];
+}
+
+struct bpf_map *
+bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
+{
+ if (prev == NULL)
+ return obj->maps;
+
+ return __bpf_map__iter(prev, obj, 1);
+}
+
+struct bpf_map *
+bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
+{
+ if (next == NULL) {
+ if (!obj->nr_maps)
+ return NULL;
+ return obj->maps + obj->nr_maps - 1;
+ }
+
+ return __bpf_map__iter(next, obj, -1);
+}
+
+struct bpf_map *
+bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
+{
+ struct bpf_map *pos;
+
+ bpf_object__for_each_map(pos, obj) {
+ if (pos->name && !strcmp(pos->name, name))
+ return pos;
+ }
+ return NULL;
+}
+
+int
+bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
+{
+ return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
+}
+
+struct bpf_map *
+bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
+{
+ return ERR_PTR(-ENOTSUP);
+}
+
+long libbpf_get_error(const void *ptr)
+{
+ return PTR_ERR_OR_ZERO(ptr);
+}
+
+int bpf_prog_load(const char *file, enum bpf_prog_type type,
+ struct bpf_object **pobj, int *prog_fd)
+{
+ struct bpf_prog_load_attr attr;
+
+ memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+ attr.file = file;
+ attr.prog_type = type;
+ attr.expected_attach_type = 0;
+
+ return bpf_prog_load_xattr(&attr, pobj, prog_fd);
+}
+
+int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
+ struct bpf_object **pobj, int *prog_fd)
+{
+ struct bpf_object_open_attr open_attr = {};
+ struct bpf_program *prog, *first_prog = NULL;
+ struct bpf_object *obj;
+ struct bpf_map *map;
+ int err;
+
+ if (!attr)
+ return -EINVAL;
+ if (!attr->file)
+ return -EINVAL;
+
+ open_attr.file = attr->file;
+ open_attr.prog_type = attr->prog_type;
+
+ obj = bpf_object__open_xattr(&open_attr);
+ if (IS_ERR_OR_NULL(obj))
+ return -ENOENT;
+
+ bpf_object__for_each_program(prog, obj) {
+ enum bpf_attach_type attach_type = attr->expected_attach_type;
+ /*
+ * to preserve backwards compatibility, bpf_prog_load treats
+ * attr->prog_type, if specified, as an override to whatever
+ * bpf_object__open guessed
+ */
+ if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
+ bpf_program__set_type(prog, attr->prog_type);
+ bpf_program__set_expected_attach_type(prog,
+ attach_type);
+ }
+ if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
+ /*
+ * we haven't guessed from section name and user
+ * didn't provide a fallback type, too bad...
+ */
+ bpf_object__close(obj);
+ return -EINVAL;
+ }
+
+ prog->prog_ifindex = attr->ifindex;
+ prog->log_level = attr->log_level;
+ prog->prog_flags = attr->prog_flags;
+ if (!first_prog)
+ first_prog = prog;
+ }
+
+ bpf_object__for_each_map(map, obj) {
+ if (!bpf_map__is_offload_neutral(map))
+ map->map_ifindex = attr->ifindex;
+ }
+
+ if (!first_prog) {
+ pr_warn("object file doesn't contain bpf program\n");
+ bpf_object__close(obj);
+ return -ENOENT;
+ }
+
+ err = bpf_object__load(obj);
+ if (err) {
+ bpf_object__close(obj);
+ return -EINVAL;
+ }
+
+ *pobj = obj;
+ *prog_fd = bpf_program__fd(first_prog);
+ return 0;
+}
+
+struct bpf_link {
+ int (*destroy)(struct bpf_link *link);
+};
+
+int bpf_link__destroy(struct bpf_link *link)
+{
+ int err;
+
+ if (!link)
+ return 0;
+
+ err = link->destroy(link);
+ free(link);
+
+ return err;
+}
+
+struct bpf_link_fd {
+ struct bpf_link link; /* has to be at the top of struct */
+ int fd; /* hook FD */
+};
+
+static int bpf_link__destroy_perf_event(struct bpf_link *link)
+{
+ struct bpf_link_fd *l = (void *)link;
+ int err;
+
+ err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0);
+ if (err)
+ err = -errno;
+
+ close(l->fd);
+ return err;
+}
+
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
+ int pfd)
+{
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link_fd *link;
+ int prog_fd, err;
+
+ if (pfd < 0) {
+ pr_warn("program '%s': invalid perf event FD %d\n",
+ bpf_program__title(prog, false), pfd);
+ return ERR_PTR(-EINVAL);
+ }
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n",
+ bpf_program__title(prog, false));
+ return ERR_PTR(-EINVAL);
+ }
+
+ link = malloc(sizeof(*link));
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+ link->link.destroy = &bpf_link__destroy_perf_event;
+ link->fd = pfd;
+
+ if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
+ err = -errno;
+ free(link);
+ pr_warn("program '%s': failed to attach to pfd %d: %s\n",
+ bpf_program__title(prog, false), pfd,
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ return ERR_PTR(err);
+ }
+ if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
+ err = -errno;
+ free(link);
+ pr_warn("program '%s': failed to enable pfd %d: %s\n",
+ bpf_program__title(prog, false), pfd,
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ return ERR_PTR(err);
+ }
+ return (struct bpf_link *)link;
+}
+
+/*
+ * this function is expected to parse integer in the range of [0, 2^31-1] from
+ * given file using scanf format string fmt. If actual parsed value is
+ * negative, the result might be indistinguishable from error
+ */
+static int parse_uint_from_file(const char *file, const char *fmt)
+{
+ char buf[STRERR_BUFSIZE];
+ int err, ret;
+ FILE *f;
+
+ f = fopen(file, "r");
+ if (!f) {
+ err = -errno;
+ pr_debug("failed to open '%s': %s\n", file,
+ libbpf_strerror_r(err, buf, sizeof(buf)));
+ return err;
+ }
+ err = fscanf(f, fmt, &ret);
+ if (err != 1) {
+ err = err == EOF ? -EIO : -errno;
+ pr_debug("failed to parse '%s': %s\n", file,
+ libbpf_strerror_r(err, buf, sizeof(buf)));
+ fclose(f);
+ return err;
+ }
+ fclose(f);
+ return ret;
+}
+
+static int determine_kprobe_perf_type(void)
+{
+ const char *file = "/sys/bus/event_source/devices/kprobe/type";
+
+ return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_uprobe_perf_type(void)
+{
+ const char *file = "/sys/bus/event_source/devices/uprobe/type";
+
+ return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_kprobe_retprobe_bit(void)
+{
+ const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
+
+ return parse_uint_from_file(file, "config:%d\n");
+}
+
+static int determine_uprobe_retprobe_bit(void)
+{
+ const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
+
+ return parse_uint_from_file(file, "config:%d\n");
+}
+
+static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
+ uint64_t offset, int pid)
+{
+ struct perf_event_attr attr = {};
+ char errmsg[STRERR_BUFSIZE];
+ int type, pfd, err;
+
+ type = uprobe ? determine_uprobe_perf_type()
+ : determine_kprobe_perf_type();
+ if (type < 0) {
+ pr_warn("failed to determine %s perf type: %s\n",
+ uprobe ? "uprobe" : "kprobe",
+ libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
+ return type;
+ }
+ if (retprobe) {
+ int bit = uprobe ? determine_uprobe_retprobe_bit()
+ : determine_kprobe_retprobe_bit();
+
+ if (bit < 0) {
+ pr_warn("failed to determine %s retprobe bit: %s\n",
+ uprobe ? "uprobe" : "kprobe",
+ libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
+ return bit;
+ }
+ attr.config |= 1 << bit;
+ }
+ attr.size = sizeof(attr);
+ attr.type = type;
+ attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
+ attr.config2 = offset; /* kprobe_addr or probe_offset */
+
+ /* pid filter is meaningful only for uprobes */
+ pfd = syscall(__NR_perf_event_open, &attr,
+ pid < 0 ? -1 : pid /* pid */,
+ pid == -1 ? 0 : -1 /* cpu */,
+ -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
+ if (pfd < 0) {
+ err = -errno;
+ pr_warn("%s perf_event_open() failed: %s\n",
+ uprobe ? "uprobe" : "kprobe",
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ return err;
+ }
+ return pfd;
+}
+
+struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
+ bool retprobe,
+ const char *func_name)
+{
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link *link;
+ int pfd, err;
+
+ pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
+ 0 /* offset */, -1 /* pid */);
+ if (pfd < 0) {
+ pr_warn("program '%s': failed to create %s '%s' perf event: %s\n",
+ bpf_program__title(prog, false),
+ retprobe ? "kretprobe" : "kprobe", func_name,
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(pfd);
+ }
+ link = bpf_program__attach_perf_event(prog, pfd);
+ if (IS_ERR(link)) {
+ close(pfd);
+ err = PTR_ERR(link);
+ pr_warn("program '%s': failed to attach to %s '%s': %s\n",
+ bpf_program__title(prog, false),
+ retprobe ? "kretprobe" : "kprobe", func_name,
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ return link;
+ }
+ return link;
+}
+
+struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
+ bool retprobe, pid_t pid,
+ const char *binary_path,
+ size_t func_offset)
+{
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link *link;
+ int pfd, err;
+
+ pfd = perf_event_open_probe(true /* uprobe */, retprobe,
+ binary_path, func_offset, pid);
+ if (pfd < 0) {
+ pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
+ bpf_program__title(prog, false),
+ retprobe ? "uretprobe" : "uprobe",
+ binary_path, func_offset,
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(pfd);
+ }
+ link = bpf_program__attach_perf_event(prog, pfd);
+ if (IS_ERR(link)) {
+ close(pfd);
+ err = PTR_ERR(link);
+ pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
+ bpf_program__title(prog, false),
+ retprobe ? "uretprobe" : "uprobe",
+ binary_path, func_offset,
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ return link;
+ }
+ return link;
+}
+
+static int determine_tracepoint_id(const char *tp_category,
+ const char *tp_name)
+{
+ char file[PATH_MAX];
+ int ret;
+
+ ret = snprintf(file, sizeof(file),
+ "/sys/kernel/debug/tracing/events/%s/%s/id",
+ tp_category, tp_name);
+ if (ret < 0)
+ return -errno;
+ if (ret >= sizeof(file)) {
+ pr_debug("tracepoint %s/%s path is too long\n",
+ tp_category, tp_name);
+ return -E2BIG;
+ }
+ return parse_uint_from_file(file, "%d\n");
+}
+
+static int perf_event_open_tracepoint(const char *tp_category,
+ const char *tp_name)
+{
+ struct perf_event_attr attr = {};
+ char errmsg[STRERR_BUFSIZE];
+ int tp_id, pfd, err;
+
+ tp_id = determine_tracepoint_id(tp_category, tp_name);
+ if (tp_id < 0) {
+ pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
+ tp_category, tp_name,
+ libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
+ return tp_id;
+ }
+
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.size = sizeof(attr);
+ attr.config = tp_id;
+
+ pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
+ -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
+ if (pfd < 0) {
+ err = -errno;
+ pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
+ tp_category, tp_name,
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ return err;
+ }
+ return pfd;
+}
+
+struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name)
+{
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link *link;
+ int pfd, err;
+
+ pfd = perf_event_open_tracepoint(tp_category, tp_name);
+ if (pfd < 0) {
+ pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
+ bpf_program__title(prog, false),
+ tp_category, tp_name,
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(pfd);
+ }
+ link = bpf_program__attach_perf_event(prog, pfd);
+ if (IS_ERR(link)) {
+ close(pfd);
+ err = PTR_ERR(link);
+ pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n",
+ bpf_program__title(prog, false),
+ tp_category, tp_name,
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ return link;
+ }
+ return link;
+}
+
+static int bpf_link__destroy_fd(struct bpf_link *link)
+{
+ struct bpf_link_fd *l = (void *)link;
+
+ return close(l->fd);
+}
+
+struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
+ const char *tp_name)
+{
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link_fd *link;
+ int prog_fd, pfd;
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("program '%s': can't attach before loaded\n",
+ bpf_program__title(prog, false));
+ return ERR_PTR(-EINVAL);
+ }
+
+ link = malloc(sizeof(*link));
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+ link->link.destroy = &bpf_link__destroy_fd;
+
+ pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
+ if (pfd < 0) {
+ pfd = -errno;
+ free(link);
+ pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n",
+ bpf_program__title(prog, false), tp_name,
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(pfd);
+ }
+ link->fd = pfd;
+ return (struct bpf_link *)link;
+}
+
+struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
+{
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link_fd *link;
+ int prog_fd, pfd;
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("program '%s': can't attach before loaded\n",
+ bpf_program__title(prog, false));
+ return ERR_PTR(-EINVAL);
+ }
+
+ link = malloc(sizeof(*link));
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+ link->link.destroy = &bpf_link__destroy_fd;
+
+ pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
+ if (pfd < 0) {
+ pfd = -errno;
+ free(link);
+ pr_warn("program '%s': failed to attach to trace: %s\n",
+ bpf_program__title(prog, false),
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(pfd);
+ }
+ link->fd = pfd;
+ return (struct bpf_link *)link;
+}
+
+enum bpf_perf_event_ret
+bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
+ void **copy_mem, size_t *copy_size,
+ bpf_perf_event_print_t fn, void *private_data)
+{
+ struct perf_event_mmap_page *header = mmap_mem;
+ __u64 data_head = ring_buffer_read_head(header);
+ __u64 data_tail = header->data_tail;
+ void *base = ((__u8 *)header) + page_size;
+ int ret = LIBBPF_PERF_EVENT_CONT;
+ struct perf_event_header *ehdr;
+ size_t ehdr_size;
+
+ while (data_head != data_tail) {
+ ehdr = base + (data_tail & (mmap_size - 1));
+ ehdr_size = ehdr->size;
+
+ if (((void *)ehdr) + ehdr_size > base + mmap_size) {
+ void *copy_start = ehdr;
+ size_t len_first = base + mmap_size - copy_start;
+ size_t len_secnd = ehdr_size - len_first;
+
+ if (*copy_size < ehdr_size) {
+ free(*copy_mem);
+ *copy_mem = malloc(ehdr_size);
+ if (!*copy_mem) {
+ *copy_size = 0;
+ ret = LIBBPF_PERF_EVENT_ERROR;
+ break;
+ }
+ *copy_size = ehdr_size;
+ }
+
+ memcpy(*copy_mem, copy_start, len_first);
+ memcpy(*copy_mem + len_first, base, len_secnd);
+ ehdr = *copy_mem;
+ }
+
+ ret = fn(ehdr, private_data);
+ data_tail += ehdr_size;
+ if (ret != LIBBPF_PERF_EVENT_CONT)
+ break;
+ }
+
+ ring_buffer_write_tail(header, data_tail);
+ return ret;
+}
+
+struct perf_buffer;
+
+struct perf_buffer_params {
+ struct perf_event_attr *attr;
+ /* if event_cb is specified, it takes precendence */
+ perf_buffer_event_fn event_cb;
+ /* sample_cb and lost_cb are higher-level common-case callbacks */
+ perf_buffer_sample_fn sample_cb;
+ perf_buffer_lost_fn lost_cb;
+ void *ctx;
+ int cpu_cnt;
+ int *cpus;
+ int *map_keys;
+};
+
+struct perf_cpu_buf {
+ struct perf_buffer *pb;
+ void *base; /* mmap()'ed memory */
+ void *buf; /* for reconstructing segmented data */
+ size_t buf_size;
+ int fd;
+ int cpu;
+ int map_key;
+};
+
+struct perf_buffer {
+ perf_buffer_event_fn event_cb;
+ perf_buffer_sample_fn sample_cb;
+ perf_buffer_lost_fn lost_cb;
+ void *ctx; /* passed into callbacks */
+
+ size_t page_size;
+ size_t mmap_size;
+ struct perf_cpu_buf **cpu_bufs;
+ struct epoll_event *events;
+ int cpu_cnt;
+ int epoll_fd; /* perf event FD */
+ int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
+};
+
+static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
+ struct perf_cpu_buf *cpu_buf)
+{
+ if (!cpu_buf)
+ return;
+ if (cpu_buf->base &&
+ munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
+ pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
+ if (cpu_buf->fd >= 0) {
+ ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
+ close(cpu_buf->fd);
+ }
+ free(cpu_buf->buf);
+ free(cpu_buf);
+}
+
+void perf_buffer__free(struct perf_buffer *pb)
+{
+ int i;
+
+ if (!pb)
+ return;
+ if (pb->cpu_bufs) {
+ for (i = 0; i < pb->cpu_cnt && pb->cpu_bufs[i]; i++) {
+ struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
+
+ bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
+ perf_buffer__free_cpu_buf(pb, cpu_buf);
+ }
+ free(pb->cpu_bufs);
+ }
+ if (pb->epoll_fd >= 0)
+ close(pb->epoll_fd);
+ free(pb->events);
+ free(pb);
+}
+
+static struct perf_cpu_buf *
+perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
+ int cpu, int map_key)
+{
+ struct perf_cpu_buf *cpu_buf;
+ char msg[STRERR_BUFSIZE];
+ int err;
+
+ cpu_buf = calloc(1, sizeof(*cpu_buf));
+ if (!cpu_buf)
+ return ERR_PTR(-ENOMEM);
+
+ cpu_buf->pb = pb;
+ cpu_buf->cpu = cpu;
+ cpu_buf->map_key = map_key;
+
+ cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
+ -1, PERF_FLAG_FD_CLOEXEC);
+ if (cpu_buf->fd < 0) {
+ err = -errno;
+ pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
+ cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+ goto error;
+ }
+
+ cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ cpu_buf->fd, 0);
+ if (cpu_buf->base == MAP_FAILED) {
+ cpu_buf->base = NULL;
+ err = -errno;
+ pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
+ cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+ goto error;
+ }
+
+ if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
+ err = -errno;
+ pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
+ cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+ goto error;
+ }
+
+ return cpu_buf;
+
+error:
+ perf_buffer__free_cpu_buf(pb, cpu_buf);
+ return (struct perf_cpu_buf *)ERR_PTR(err);
+}
+
+static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
+ struct perf_buffer_params *p);
+
+struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
+ const struct perf_buffer_opts *opts)
+{
+ struct perf_buffer_params p = {};
+ struct perf_event_attr attr = { 0, };
+
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ p.attr = &attr;
+ p.sample_cb = opts ? opts->sample_cb : NULL;
+ p.lost_cb = opts ? opts->lost_cb : NULL;
+ p.ctx = opts ? opts->ctx : NULL;
+
+ return __perf_buffer__new(map_fd, page_cnt, &p);
+}
+
+struct perf_buffer *
+perf_buffer__new_raw(int map_fd, size_t page_cnt,
+ const struct perf_buffer_raw_opts *opts)
+{
+ struct perf_buffer_params p = {};
+
+ p.attr = opts->attr;
+ p.event_cb = opts->event_cb;
+ p.ctx = opts->ctx;
+ p.cpu_cnt = opts->cpu_cnt;
+ p.cpus = opts->cpus;
+ p.map_keys = opts->map_keys;
+
+ return __perf_buffer__new(map_fd, page_cnt, &p);
+}
+
+static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
+ struct perf_buffer_params *p)
+{
+ struct bpf_map_info map = {};
+ char msg[STRERR_BUFSIZE];
+ struct perf_buffer *pb;
+ __u32 map_info_len;
+ int err, i;
+
+ if (page_cnt & (page_cnt - 1)) {
+ pr_warn("page count should be power of two, but is %zu\n",
+ page_cnt);
+ return ERR_PTR(-EINVAL);
+ }
+
+ map_info_len = sizeof(map);
+ err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
+ if (err) {
+ err = -errno;
+ pr_warn("failed to get map info for map FD %d: %s\n",
+ map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
+ return ERR_PTR(err);
+ }
+
+ if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+ pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+ map.name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ pb = calloc(1, sizeof(*pb));
+ if (!pb)
+ return ERR_PTR(-ENOMEM);
+
+ pb->event_cb = p->event_cb;
+ pb->sample_cb = p->sample_cb;
+ pb->lost_cb = p->lost_cb;
+ pb->ctx = p->ctx;
+
+ pb->page_size = getpagesize();
+ pb->mmap_size = pb->page_size * page_cnt;
+ pb->map_fd = map_fd;
+
+ pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (pb->epoll_fd < 0) {
+ err = -errno;
+ pr_warn("failed to create epoll instance: %s\n",
+ libbpf_strerror_r(err, msg, sizeof(msg)));
+ goto error;
+ }
+
+ if (p->cpu_cnt > 0) {
+ pb->cpu_cnt = p->cpu_cnt;
+ } else {
+ pb->cpu_cnt = libbpf_num_possible_cpus();
+ if (pb->cpu_cnt < 0) {
+ err = pb->cpu_cnt;
+ goto error;
+ }
+ if (map.max_entries < pb->cpu_cnt)
+ pb->cpu_cnt = map.max_entries;
+ }
+
+ pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
+ if (!pb->events) {
+ err = -ENOMEM;
+ pr_warn("failed to allocate events: out of memory\n");
+ goto error;
+ }
+ pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
+ if (!pb->cpu_bufs) {
+ err = -ENOMEM;
+ pr_warn("failed to allocate buffers: out of memory\n");
+ goto error;
+ }
+
+ for (i = 0; i < pb->cpu_cnt; i++) {
+ struct perf_cpu_buf *cpu_buf;
+ int cpu, map_key;
+
+ cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
+ map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
+
+ cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
+ if (IS_ERR(cpu_buf)) {
+ err = PTR_ERR(cpu_buf);
+ goto error;
+ }
+
+ pb->cpu_bufs[i] = cpu_buf;
+
+ err = bpf_map_update_elem(pb->map_fd, &map_key,
+ &cpu_buf->fd, 0);
+ if (err) {
+ err = -errno;
+ pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
+ cpu, map_key, cpu_buf->fd,
+ libbpf_strerror_r(err, msg, sizeof(msg)));
+ goto error;
+ }
+
+ pb->events[i].events = EPOLLIN;
+ pb->events[i].data.ptr = cpu_buf;
+ if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
+ &pb->events[i]) < 0) {
+ err = -errno;
+ pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
+ cpu, cpu_buf->fd,
+ libbpf_strerror_r(err, msg, sizeof(msg)));
+ goto error;
+ }
+ }
+
+ return pb;
+
+error:
+ if (pb)
+ perf_buffer__free(pb);
+ return ERR_PTR(err);
+}
+
+struct perf_sample_raw {
+ struct perf_event_header header;
+ uint32_t size;
+ char data[0];
+};
+
+struct perf_sample_lost {
+ struct perf_event_header header;
+ uint64_t id;
+ uint64_t lost;
+ uint64_t sample_id;
+};
+
+static enum bpf_perf_event_ret
+perf_buffer__process_record(struct perf_event_header *e, void *ctx)
+{
+ struct perf_cpu_buf *cpu_buf = ctx;
+ struct perf_buffer *pb = cpu_buf->pb;
+ void *data = e;
+
+ /* user wants full control over parsing perf event */
+ if (pb->event_cb)
+ return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
+
+ switch (e->type) {
+ case PERF_RECORD_SAMPLE: {
+ struct perf_sample_raw *s = data;
+
+ if (pb->sample_cb)
+ pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
+ break;
+ }
+ case PERF_RECORD_LOST: {
+ struct perf_sample_lost *s = data;
+
+ if (pb->lost_cb)
+ pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
+ break;
+ }
+ default:
+ pr_warn("unknown perf sample type %d\n", e->type);
+ return LIBBPF_PERF_EVENT_ERROR;
+ }
+ return LIBBPF_PERF_EVENT_CONT;
+}
+
+static int perf_buffer__process_records(struct perf_buffer *pb,
+ struct perf_cpu_buf *cpu_buf)
+{
+ enum bpf_perf_event_ret ret;
+
+ ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
+ pb->page_size, &cpu_buf->buf,
+ &cpu_buf->buf_size,
+ perf_buffer__process_record, cpu_buf);
+ if (ret != LIBBPF_PERF_EVENT_CONT)
+ return ret;
+ return 0;
+}
+
+int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
+{
+ int i, cnt, err;
+
+ cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
+ for (i = 0; i < cnt; i++) {
+ struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
+
+ err = perf_buffer__process_records(pb, cpu_buf);
+ if (err) {
+ pr_warn("error while processing records: %d\n", err);
+ return err;
+ }
+ }
+ return cnt < 0 ? -errno : cnt;
+}
+
+struct bpf_prog_info_array_desc {
+ int array_offset; /* e.g. offset of jited_prog_insns */
+ int count_offset; /* e.g. offset of jited_prog_len */
+ int size_offset; /* > 0: offset of rec size,
+ * < 0: fix size of -size_offset
+ */
+};
+
+static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
+ [BPF_PROG_INFO_JITED_INSNS] = {
+ offsetof(struct bpf_prog_info, jited_prog_insns),
+ offsetof(struct bpf_prog_info, jited_prog_len),
+ -1,
+ },
+ [BPF_PROG_INFO_XLATED_INSNS] = {
+ offsetof(struct bpf_prog_info, xlated_prog_insns),
+ offsetof(struct bpf_prog_info, xlated_prog_len),
+ -1,
+ },
+ [BPF_PROG_INFO_MAP_IDS] = {
+ offsetof(struct bpf_prog_info, map_ids),
+ offsetof(struct bpf_prog_info, nr_map_ids),
+ -(int)sizeof(__u32),
+ },
+ [BPF_PROG_INFO_JITED_KSYMS] = {
+ offsetof(struct bpf_prog_info, jited_ksyms),
+ offsetof(struct bpf_prog_info, nr_jited_ksyms),
+ -(int)sizeof(__u64),
+ },
+ [BPF_PROG_INFO_JITED_FUNC_LENS] = {
+ offsetof(struct bpf_prog_info, jited_func_lens),
+ offsetof(struct bpf_prog_info, nr_jited_func_lens),
+ -(int)sizeof(__u32),
+ },
+ [BPF_PROG_INFO_FUNC_INFO] = {
+ offsetof(struct bpf_prog_info, func_info),
+ offsetof(struct bpf_prog_info, nr_func_info),
+ offsetof(struct bpf_prog_info, func_info_rec_size),
+ },
+ [BPF_PROG_INFO_LINE_INFO] = {
+ offsetof(struct bpf_prog_info, line_info),
+ offsetof(struct bpf_prog_info, nr_line_info),
+ offsetof(struct bpf_prog_info, line_info_rec_size),
+ },
+ [BPF_PROG_INFO_JITED_LINE_INFO] = {
+ offsetof(struct bpf_prog_info, jited_line_info),
+ offsetof(struct bpf_prog_info, nr_jited_line_info),
+ offsetof(struct bpf_prog_info, jited_line_info_rec_size),
+ },
+ [BPF_PROG_INFO_PROG_TAGS] = {
+ offsetof(struct bpf_prog_info, prog_tags),
+ offsetof(struct bpf_prog_info, nr_prog_tags),
+ -(int)sizeof(__u8) * BPF_TAG_SIZE,
+ },
+
+};
+
+static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
+ int offset)
+{
+ __u32 *array = (__u32 *)info;
+
+ if (offset >= 0)
+ return array[offset / sizeof(__u32)];
+ return -(int)offset;
+}
+
+static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
+ int offset)
+{
+ __u64 *array = (__u64 *)info;
+
+ if (offset >= 0)
+ return array[offset / sizeof(__u64)];
+ return -(int)offset;
+}
+
+static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
+ __u32 val)
+{
+ __u32 *array = (__u32 *)info;
+
+ if (offset >= 0)
+ array[offset / sizeof(__u32)] = val;
+}
+
+static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
+ __u64 val)
+{
+ __u64 *array = (__u64 *)info;
+
+ if (offset >= 0)
+ array[offset / sizeof(__u64)] = val;
+}
+
+struct bpf_prog_info_linear *
+bpf_program__get_prog_info_linear(int fd, __u64 arrays)
+{
+ struct bpf_prog_info_linear *info_linear;
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ __u32 data_len = 0;
+ int i, err;
+ void *ptr;
+
+ if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
+ return ERR_PTR(-EINVAL);
+
+ /* step 1: get array dimensions */
+ err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+ if (err) {
+ pr_debug("can't get prog info: %s", strerror(errno));
+ return ERR_PTR(-EFAULT);
+ }
+
+ /* step 2: calculate total size of all arrays */
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+ bool include_array = (arrays & (1UL << i)) > 0;
+ struct bpf_prog_info_array_desc *desc;
+ __u32 count, size;
+
+ desc = bpf_prog_info_array_desc + i;
+
+ /* kernel is too old to support this field */
+ if (info_len < desc->array_offset + sizeof(__u32) ||
+ info_len < desc->count_offset + sizeof(__u32) ||
+ (desc->size_offset > 0 && info_len < desc->size_offset))
+ include_array = false;
+
+ if (!include_array) {
+ arrays &= ~(1UL << i); /* clear the bit */
+ continue;
+ }
+
+ count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+ size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+
+ data_len += count * size;
+ }
+
+ /* step 3: allocate continuous memory */
+ data_len = roundup(data_len, sizeof(__u64));
+ info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
+ if (!info_linear)
+ return ERR_PTR(-ENOMEM);
+
+ /* step 4: fill data to info_linear->info */
+ info_linear->arrays = arrays;
+ memset(&info_linear->info, 0, sizeof(info));
+ ptr = info_linear->data;
+
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+ struct bpf_prog_info_array_desc *desc;
+ __u32 count, size;
+
+ if ((arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpf_prog_info_array_desc + i;
+ count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+ size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+ bpf_prog_info_set_offset_u32(&info_linear->info,
+ desc->count_offset, count);
+ bpf_prog_info_set_offset_u32(&info_linear->info,
+ desc->size_offset, size);
+ bpf_prog_info_set_offset_u64(&info_linear->info,
+ desc->array_offset,
+ ptr_to_u64(ptr));
+ ptr += count * size;
+ }
+
+ /* step 5: call syscall again to get required arrays */
+ err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
+ if (err) {
+ pr_debug("can't get prog info: %s", strerror(errno));
+ free(info_linear);
+ return ERR_PTR(-EFAULT);
+ }
+
+ /* step 6: verify the data */
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+ struct bpf_prog_info_array_desc *desc;
+ __u32 v1, v2;
+
+ if ((arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpf_prog_info_array_desc + i;
+ v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+ v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+ desc->count_offset);
+ if (v1 != v2)
+ pr_warn("%s: mismatch in element count\n", __func__);
+
+ v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+ v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+ desc->size_offset);
+ if (v1 != v2)
+ pr_warn("%s: mismatch in rec size\n", __func__);
+ }
+
+ /* step 7: update info_len and data_len */
+ info_linear->info_len = sizeof(struct bpf_prog_info);
+ info_linear->data_len = data_len;
+
+ return info_linear;
+}
+
+void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
+{
+ int i;
+
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+ struct bpf_prog_info_array_desc *desc;
+ __u64 addr, offs;
+
+ if ((info_linear->arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpf_prog_info_array_desc + i;
+ addr = bpf_prog_info_read_offset_u64(&info_linear->info,
+ desc->array_offset);
+ offs = addr - ptr_to_u64(info_linear->data);
+ bpf_prog_info_set_offset_u64(&info_linear->info,
+ desc->array_offset, offs);
+ }
+}
+
+void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
+{
+ int i;
+
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+ struct bpf_prog_info_array_desc *desc;
+ __u64 addr, offs;
+
+ if ((info_linear->arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpf_prog_info_array_desc + i;
+ offs = bpf_prog_info_read_offset_u64(&info_linear->info,
+ desc->array_offset);
+ addr = offs + ptr_to_u64(info_linear->data);
+ bpf_prog_info_set_offset_u64(&info_linear->info,
+ desc->array_offset, addr);
+ }
+}
+
+int libbpf_num_possible_cpus(void)
+{
+ static const char *fcpu = "/sys/devices/system/cpu/possible";
+ int len = 0, n = 0, il = 0, ir = 0;
+ unsigned int start = 0, end = 0;
+ int tmp_cpus = 0;
+ static int cpus;
+ char buf[128];
+ int error = 0;
+ int fd = -1;
+
+ tmp_cpus = READ_ONCE(cpus);
+ if (tmp_cpus > 0)
+ return tmp_cpus;
+
+ fd = open(fcpu, O_RDONLY);
+ if (fd < 0) {
+ error = errno;
+ pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error));
+ return -error;
+ }
+ len = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (len <= 0) {
+ error = len ? errno : EINVAL;
+ pr_warn("Failed to read # of possible cpus from %s: %s\n",
+ fcpu, strerror(error));
+ return -error;
+ }
+ if (len == sizeof(buf)) {
+ pr_warn("File %s size overflow\n", fcpu);
+ return -EOVERFLOW;
+ }
+ buf[len] = '\0';
+
+ for (ir = 0, tmp_cpus = 0; ir <= len; ir++) {
+ /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
+ if (buf[ir] == ',' || buf[ir] == '\0') {
+ buf[ir] = '\0';
+ n = sscanf(&buf[il], "%u-%u", &start, &end);
+ if (n <= 0) {
+ pr_warn("Failed to get # CPUs from %s\n",
+ &buf[il]);
+ return -EINVAL;
+ } else if (n == 1) {
+ end = start;
+ }
+ tmp_cpus += end - start + 1;
+ il = ir + 1;
+ }
+ }
+ if (tmp_cpus <= 0) {
+ pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu);
+ return -EINVAL;
+ }
+
+ WRITE_ONCE(cpus, tmp_cpus);
+ return tmp_cpus;
+}
diff --git a/src/contrib/libbpf/bpf/libbpf.h b/src/contrib/libbpf/bpf/libbpf.h
new file mode 100644
index 0000000..0dbf4bf
--- /dev/null
+++ b/src/contrib/libbpf/bpf/libbpf.h
@@ -0,0 +1,637 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Common eBPF ELF object loading operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ */
+#ifndef __LIBBPF_LIBBPF_H
+#define __LIBBPF_LIBBPF_H
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <sys/types.h> // for size_t
+#include <linux/bpf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef LIBBPF_API
+#define LIBBPF_API __attribute__((visibility("default")))
+#endif
+
+enum libbpf_errno {
+ __LIBBPF_ERRNO__START = 4000,
+
+ /* Something wrong in libelf */
+ LIBBPF_ERRNO__LIBELF = __LIBBPF_ERRNO__START,
+ LIBBPF_ERRNO__FORMAT, /* BPF object format invalid */
+ LIBBPF_ERRNO__KVERSION, /* Incorrect or no 'version' section */
+ LIBBPF_ERRNO__ENDIAN, /* Endian mismatch */
+ LIBBPF_ERRNO__INTERNAL, /* Internal error in libbpf */
+ LIBBPF_ERRNO__RELOC, /* Relocation failed */
+ LIBBPF_ERRNO__LOAD, /* Load program failure for unknown reason */
+ LIBBPF_ERRNO__VERIFY, /* Kernel verifier blocks program loading */
+ LIBBPF_ERRNO__PROG2BIG, /* Program too big */
+ LIBBPF_ERRNO__KVER, /* Incorrect kernel version */
+ LIBBPF_ERRNO__PROGTYPE, /* Kernel doesn't support this program type */
+ LIBBPF_ERRNO__WRNGPID, /* Wrong pid in netlink message */
+ LIBBPF_ERRNO__INVSEQ, /* Invalid netlink sequence */
+ LIBBPF_ERRNO__NLPARSE, /* netlink parsing error */
+ __LIBBPF_ERRNO__END,
+};
+
+LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size);
+
+enum libbpf_print_level {
+ LIBBPF_WARN,
+ LIBBPF_INFO,
+ LIBBPF_DEBUG,
+};
+
+typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level,
+ const char *, va_list ap);
+
+LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn);
+
+/* Hide internal to user */
+struct bpf_object;
+
+struct bpf_object_open_attr {
+ const char *file;
+ enum bpf_prog_type prog_type;
+};
+
+/* Helper macro to declare and initialize libbpf options struct
+ *
+ * This dance with uninitialized declaration, followed by memset to zero,
+ * followed by assignment using compound literal syntax is done to preserve
+ * ability to use a nice struct field initialization syntax and **hopefully**
+ * have all the padding bytes initialized to zero. It's not guaranteed though,
+ * when copying literal, that compiler won't copy garbage in literal's padding
+ * bytes, but that's the best way I've found and it seems to work in practice.
+ *
+ * Macro declares opts struct of given type and name, zero-initializes,
+ * including any extra padding, it with memset() and then assigns initial
+ * values provided by users in struct initializer-syntax as varargs.
+ */
+#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \
+ struct TYPE NAME = ({ \
+ memset(&NAME, 0, sizeof(struct TYPE)); \
+ (struct TYPE) { \
+ .sz = sizeof(struct TYPE), \
+ __VA_ARGS__ \
+ }; \
+ })
+
+struct bpf_object_open_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* object name override, if provided:
+ * - for object open from file, this will override setting object
+ * name from file path's base name;
+ * - for object open from memory buffer, this will specify an object
+ * name and will override default "<addr>-<buf-size>" name;
+ */
+ const char *object_name;
+ /* parse map definitions non-strictly, allowing extra attributes/data */
+ bool relaxed_maps;
+ /* process CO-RE relocations non-strictly, allowing them to fail */
+ bool relaxed_core_relocs;
+ /* maps that set the 'pinning' attribute in their definition will have
+ * their pin_path attribute set to a file in this directory, and be
+ * auto-pinned to that path on load; defaults to "/sys/fs/bpf".
+ */
+ const char *pin_root_path;
+ __u32 attach_prog_fd;
+};
+#define bpf_object_open_opts__last_field attach_prog_fd
+
+LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
+LIBBPF_API struct bpf_object *
+bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts);
+LIBBPF_API struct bpf_object *
+bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
+ struct bpf_object_open_opts *opts);
+
+/* deprecated bpf_object__open variants */
+LIBBPF_API struct bpf_object *
+bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
+ const char *name);
+LIBBPF_API struct bpf_object *
+bpf_object__open_xattr(struct bpf_object_open_attr *attr);
+
+int bpf_object__section_size(const struct bpf_object *obj, const char *name,
+ __u32 *size);
+int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
+ __u32 *off);
+
+enum libbpf_pin_type {
+ LIBBPF_PIN_NONE,
+ /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+ LIBBPF_PIN_BY_NAME,
+};
+
+/* pin_maps and unpin_maps can both be called with a NULL path, in which case
+ * they will use the pin_path attribute of each map (and ignore all maps that
+ * don't have a pin_path set).
+ */
+LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);
+LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,
+ const char *path);
+LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj,
+ const char *path);
+LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj,
+ const char *path);
+LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path);
+LIBBPF_API void bpf_object__close(struct bpf_object *object);
+
+struct bpf_object_load_attr {
+ struct bpf_object *obj;
+ int log_level;
+ const char *target_btf_path;
+};
+
+/* Load/unload object into/from kernel */
+LIBBPF_API int bpf_object__load(struct bpf_object *obj);
+LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
+LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
+LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
+LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
+
+struct btf;
+LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
+LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
+
+LIBBPF_API struct bpf_program *
+bpf_object__find_program_by_title(const struct bpf_object *obj,
+ const char *title);
+
+LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);
+#define bpf_object__for_each_safe(pos, tmp) \
+ for ((pos) = bpf_object__next(NULL), \
+ (tmp) = bpf_object__next(pos); \
+ (pos) != NULL; \
+ (pos) = (tmp), (tmp) = bpf_object__next(tmp))
+
+typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
+LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,
+ bpf_object_clear_priv_t clear_priv);
+LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog);
+
+LIBBPF_API int
+libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+ enum bpf_attach_type *expected_attach_type);
+LIBBPF_API int libbpf_attach_type_by_name(const char *name,
+ enum bpf_attach_type *attach_type);
+LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name,
+ enum bpf_attach_type attach_type);
+
+/* Accessors of bpf_program */
+struct bpf_program;
+LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,
+ const struct bpf_object *obj);
+
+#define bpf_object__for_each_program(pos, obj) \
+ for ((pos) = bpf_program__next(NULL, (obj)); \
+ (pos) != NULL; \
+ (pos) = bpf_program__next((pos), (obj)))
+
+LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog,
+ const struct bpf_object *obj);
+
+typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *);
+
+LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+ bpf_program_clear_priv_t clear_priv);
+
+LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog);
+LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
+ __u32 ifindex);
+
+LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,
+ bool needs_copy);
+
+/* returns program size in bytes */
+LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog);
+
+LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license,
+ __u32 kern_version);
+LIBBPF_API int bpf_program__fd(const struct bpf_program *prog);
+LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,
+ const char *path,
+ int instance);
+LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog,
+ const char *path,
+ int instance);
+LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
+LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path);
+LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
+
+struct bpf_link;
+
+LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
+ const char *func_name);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
+ pid_t pid, const char *binary_path,
+ size_t func_offset);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_tracepoint(struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
+ const char *tp_name);
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_trace(struct bpf_program *prog);
+struct bpf_insn;
+
+/*
+ * Libbpf allows callers to adjust BPF programs before being loaded
+ * into kernel. One program in an object file can be transformed into
+ * multiple variants to be attached to different hooks.
+ *
+ * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
+ * form an API for this purpose.
+ *
+ * - bpf_program_prep_t:
+ * Defines a 'preprocessor', which is a caller defined function
+ * passed to libbpf through bpf_program__set_prep(), and will be
+ * called before program is loaded. The processor should adjust
+ * the program one time for each instance according to the instance id
+ * passed to it.
+ *
+ * - bpf_program__set_prep:
+ * Attaches a preprocessor to a BPF program. The number of instances
+ * that should be created is also passed through this function.
+ *
+ * - bpf_program__nth_fd:
+ * After the program is loaded, get resulting FD of a given instance
+ * of the BPF program.
+ *
+ * If bpf_program__set_prep() is not used, the program would be loaded
+ * without adjustment during bpf_object__load(). The program has only
+ * one instance. In this case bpf_program__fd(prog) is equal to
+ * bpf_program__nth_fd(prog, 0).
+ */
+
+struct bpf_prog_prep_result {
+ /*
+ * If not NULL, load new instruction array.
+ * If set to NULL, don't load this instance.
+ */
+ struct bpf_insn *new_insn_ptr;
+ int new_insn_cnt;
+
+ /* If not NULL, result FD is written to it. */
+ int *pfd;
+};
+
+/*
+ * Parameters of bpf_program_prep_t:
+ * - prog: The bpf_program being loaded.
+ * - n: Index of instance being generated.
+ * - insns: BPF instructions array.
+ * - insns_cnt:Number of instructions in insns.
+ * - res: Output parameter, result of transformation.
+ *
+ * Return value:
+ * - Zero: pre-processing success.
+ * - Non-zero: pre-processing error, stop loading.
+ */
+typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
+ struct bpf_insn *insns, int insns_cnt,
+ struct bpf_prog_prep_result *res);
+
+LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
+ bpf_program_prep_t prep);
+
+LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n);
+
+/*
+ * Adjust type of BPF program. Default is kprobe.
+ */
+LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
+
+LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
+LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
+ enum bpf_prog_type type);
+
+LIBBPF_API enum bpf_attach_type
+bpf_program__get_expected_attach_type(struct bpf_program *prog);
+LIBBPF_API void
+bpf_program__set_expected_attach_type(struct bpf_program *prog,
+ enum bpf_attach_type type);
+
+LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
+
+/*
+ * No need for __attribute__((packed)), all members of 'bpf_map_def'
+ * are all aligned. In addition, using __attribute__((packed))
+ * would trigger a -Wpacked warning message, and lead to an error
+ * if -Werror is set.
+ */
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+ unsigned int map_flags;
+};
+
+/*
+ * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel,
+ * so no need to worry about a name clash.
+ */
+struct bpf_map;
+LIBBPF_API struct bpf_map *
+bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name);
+
+LIBBPF_API int
+bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);
+
+/*
+ * Get bpf_map through the offset of corresponding struct bpf_map_def
+ * in the BPF object file.
+ */
+LIBBPF_API struct bpf_map *
+bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
+
+LIBBPF_API struct bpf_map *
+bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);
+#define bpf_object__for_each_map(pos, obj) \
+ for ((pos) = bpf_map__next(NULL, (obj)); \
+ (pos) != NULL; \
+ (pos) = bpf_map__next((pos), (obj)))
+#define bpf_map__for_each bpf_object__for_each_map
+
+LIBBPF_API struct bpf_map *
+bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj);
+
+LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
+LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
+LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
+LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
+
+typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
+LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
+ bpf_map_clear_priv_t clear_priv);
+LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
+LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
+LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
+LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
+LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
+LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
+LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
+LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
+LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
+LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
+LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
+
+LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd);
+
+LIBBPF_API long libbpf_get_error(const void *ptr);
+
+struct bpf_prog_load_attr {
+ const char *file;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ int ifindex;
+ int log_level;
+ int prog_flags;
+};
+
+LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
+ struct bpf_object **pobj, int *prog_fd);
+LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type,
+ struct bpf_object **pobj, int *prog_fd);
+
+struct xdp_link_info {
+ __u32 prog_id;
+ __u32 drv_prog_id;
+ __u32 hw_prog_id;
+ __u32 skb_prog_id;
+ __u8 attach_mode;
+};
+
+LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
+LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
+ size_t info_size, __u32 flags);
+
+struct perf_buffer;
+
+typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu,
+ void *data, __u32 size);
+typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt);
+
+/* common use perf buffer options */
+struct perf_buffer_opts {
+ /* if specified, sample_cb is called for each sample */
+ perf_buffer_sample_fn sample_cb;
+ /* if specified, lost_cb is called for each batch of lost samples */
+ perf_buffer_lost_fn lost_cb;
+ /* ctx is provided to sample_cb and lost_cb */
+ void *ctx;
+};
+
+LIBBPF_API struct perf_buffer *
+perf_buffer__new(int map_fd, size_t page_cnt,
+ const struct perf_buffer_opts *opts);
+
+enum bpf_perf_event_ret {
+ LIBBPF_PERF_EVENT_DONE = 0,
+ LIBBPF_PERF_EVENT_ERROR = -1,
+ LIBBPF_PERF_EVENT_CONT = -2,
+};
+
+struct perf_event_header;
+
+typedef enum bpf_perf_event_ret
+(*perf_buffer_event_fn)(void *ctx, int cpu, struct perf_event_header *event);
+
+/* raw perf buffer options, giving most power and control */
+struct perf_buffer_raw_opts {
+ /* perf event attrs passed directly into perf_event_open() */
+ struct perf_event_attr *attr;
+ /* raw event callback */
+ perf_buffer_event_fn event_cb;
+ /* ctx is provided to event_cb */
+ void *ctx;
+ /* if cpu_cnt == 0, open all on all possible CPUs (up to the number of
+ * max_entries of given PERF_EVENT_ARRAY map)
+ */
+ int cpu_cnt;
+ /* if cpu_cnt > 0, cpus is an array of CPUs to open ring buffers on */
+ int *cpus;
+ /* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */
+ int *map_keys;
+};
+
+LIBBPF_API struct perf_buffer *
+perf_buffer__new_raw(int map_fd, size_t page_cnt,
+ const struct perf_buffer_raw_opts *opts);
+
+LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
+
+typedef enum bpf_perf_event_ret
+ (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
+ void *private_data);
+LIBBPF_API enum bpf_perf_event_ret
+bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
+ void **copy_mem, size_t *copy_size,
+ bpf_perf_event_print_t fn, void *private_data);
+
+struct nlattr;
+typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+int libbpf_netlink_open(unsigned int *nl_pid);
+int libbpf_nl_get_link(int sock, unsigned int nl_pid,
+ libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
+int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
+ libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie);
+int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
+ libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
+int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
+ libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie);
+
+struct bpf_prog_linfo;
+struct bpf_prog_info;
+
+LIBBPF_API void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo);
+LIBBPF_API struct bpf_prog_linfo *
+bpf_prog_linfo__new(const struct bpf_prog_info *info);
+LIBBPF_API const struct bpf_line_info *
+bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
+ __u64 addr, __u32 func_idx, __u32 nr_skip);
+LIBBPF_API const struct bpf_line_info *
+bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
+ __u32 insn_off, __u32 nr_skip);
+
+/*
+ * Probe for supported system features
+ *
+ * Note that running many of these probes in a short amount of time can cause
+ * the kernel to reach the maximal size of lockable memory allowed for the
+ * user, causing subsequent probes to fail. In this case, the caller may want
+ * to adjust that limit with setrlimit().
+ */
+LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type,
+ __u32 ifindex);
+LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex);
+LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id,
+ enum bpf_prog_type prog_type, __u32 ifindex);
+
+/*
+ * Get bpf_prog_info in continuous memory
+ *
+ * struct bpf_prog_info has multiple arrays. The user has option to choose
+ * arrays to fetch from kernel. The following APIs provide an uniform way to
+ * fetch these data. All arrays in bpf_prog_info are stored in a single
+ * continuous memory region. This makes it easy to store the info in a
+ * file.
+ *
+ * Before writing bpf_prog_info_linear to files, it is necessary to
+ * translate pointers in bpf_prog_info to offsets. Helper functions
+ * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr()
+ * are introduced to switch between pointers and offsets.
+ *
+ * Examples:
+ * # To fetch map_ids and prog_tags:
+ * __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) |
+ * (1UL << BPF_PROG_INFO_PROG_TAGS);
+ * struct bpf_prog_info_linear *info_linear =
+ * bpf_program__get_prog_info_linear(fd, arrays);
+ *
+ * # To save data in file
+ * bpf_program__bpil_addr_to_offs(info_linear);
+ * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len);
+ *
+ * # To read data from file
+ * read(f, info_linear, <proper_size>);
+ * bpf_program__bpil_offs_to_addr(info_linear);
+ */
+enum bpf_prog_info_array {
+ BPF_PROG_INFO_FIRST_ARRAY = 0,
+ BPF_PROG_INFO_JITED_INSNS = 0,
+ BPF_PROG_INFO_XLATED_INSNS,
+ BPF_PROG_INFO_MAP_IDS,
+ BPF_PROG_INFO_JITED_KSYMS,
+ BPF_PROG_INFO_JITED_FUNC_LENS,
+ BPF_PROG_INFO_FUNC_INFO,
+ BPF_PROG_INFO_LINE_INFO,
+ BPF_PROG_INFO_JITED_LINE_INFO,
+ BPF_PROG_INFO_PROG_TAGS,
+ BPF_PROG_INFO_LAST_ARRAY,
+};
+
+struct bpf_prog_info_linear {
+ /* size of struct bpf_prog_info, when the tool is compiled */
+ __u32 info_len;
+ /* total bytes allocated for data, round up to 8 bytes */
+ __u32 data_len;
+ /* which arrays are included in data */
+ __u64 arrays;
+ struct bpf_prog_info info;
+ __u8 data[];
+};
+
+LIBBPF_API struct bpf_prog_info_linear *
+bpf_program__get_prog_info_linear(int fd, __u64 arrays);
+
+LIBBPF_API void
+bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear);
+
+LIBBPF_API void
+bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
+
+/*
+ * A helper function to get the number of possible CPUs before looking up
+ * per-CPU maps. Negative errno is returned on failure.
+ *
+ * Example usage:
+ *
+ * int ncpus = libbpf_num_possible_cpus();
+ * if (ncpus < 0) {
+ * // error handling
+ * }
+ * long values[ncpus];
+ * bpf_map_lookup_elem(per_cpu_map_fd, key, values);
+ *
+ */
+LIBBPF_API int libbpf_num_possible_cpus(void);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_LIBBPF_H */
diff --git a/src/contrib/libbpf/bpf/libbpf_errno.c b/src/contrib/libbpf/bpf/libbpf_errno.c
new file mode 100644
index 0000000..4343e40
--- /dev/null
+++ b/src/contrib/libbpf/bpf/libbpf_errno.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ * Copyright (C) 2017 Nicira, Inc.
+ */
+
+#undef _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+
+#include "libbpf.h"
+
+#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START)
+#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c)
+#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)
+
+static const char *libbpf_strerror_table[NR_ERRNO] = {
+ [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf",
+ [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid",
+ [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost",
+ [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch",
+ [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf",
+ [ERRCODE_OFFSET(RELOC)] = "Relocation failed",
+ [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading",
+ [ERRCODE_OFFSET(PROG2BIG)] = "Program too big",
+ [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version",
+ [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type",
+ [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message",
+ [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence",
+ [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing",
+};
+
+int libbpf_strerror(int err, char *buf, size_t size)
+{
+ if (!buf || !size)
+ return -1;
+
+ err = err > 0 ? err : -err;
+
+ if (err < __LIBBPF_ERRNO__START) {
+ int ret;
+
+ ret = strerror_r(err, buf, size);
+ buf[size - 1] = '\0';
+ return ret;
+ }
+
+ if (err < __LIBBPF_ERRNO__END) {
+ const char *msg;
+
+ msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
+ snprintf(buf, size, "%s", msg);
+ buf[size - 1] = '\0';
+ return 0;
+ }
+
+ snprintf(buf, size, "Unknown libbpf error %d", err);
+ buf[size - 1] = '\0';
+ return -1;
+}
diff --git a/src/contrib/libbpf/bpf/libbpf_internal.h b/src/contrib/libbpf/bpf/libbpf_internal.h
new file mode 100644
index 0000000..97ac17a
--- /dev/null
+++ b/src/contrib/libbpf/bpf/libbpf_internal.h
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Internal libbpf helpers.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+
+#ifndef __LIBBPF_LIBBPF_INTERNAL_H
+#define __LIBBPF_LIBBPF_INTERNAL_H
+
+#include "libbpf.h"
+
+#define BTF_INFO_ENC(kind, kind_flag, vlen) \
+ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
+ ((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
+ BTF_INT_ENC(encoding, bits_offset, bits)
+#define BTF_MEMBER_ENC(name, type, bits_offset) (name), (type), (bits_offset)
+#define BTF_PARAM_ENC(name, type) (name), (type)
+#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
+
+#ifndef min
+# define min(x, y) ((x) < (y) ? (x) : (y))
+#endif
+#ifndef max
+# define max(x, y) ((x) < (y) ? (y) : (x))
+#endif
+#ifndef offsetofend
+# define offsetofend(TYPE, FIELD) \
+ (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD))
+#endif
+
+/* Symbol versioning is different between static and shared library.
+ * Properly versioned symbols are needed for shared library, but
+ * only the symbol of the new version is needed for static library.
+ */
+#ifdef SHARED
+# define COMPAT_VERSION(internal_name, api_name, version) \
+ asm(".symver " #internal_name "," #api_name "@" #version);
+# define DEFAULT_VERSION(internal_name, api_name, version) \
+ asm(".symver " #internal_name "," #api_name "@@" #version);
+#else
+# define COMPAT_VERSION(internal_name, api_name, version)
+# define DEFAULT_VERSION(internal_name, api_name, version) \
+ extern typeof(internal_name) api_name \
+ __attribute__((alias(#internal_name)));
+#endif
+
+extern void libbpf_print(enum libbpf_print_level level,
+ const char *format, ...)
+ __attribute__((format(printf, 2, 3)));
+
+#define __pr(level, fmt, ...) \
+do { \
+ libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define pr_warn(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__)
+#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
+#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__)
+
+static inline bool libbpf_validate_opts(const char *opts,
+ size_t opts_sz, size_t user_sz,
+ const char *type_name)
+{
+ if (user_sz < sizeof(size_t)) {
+ pr_warn("%s size (%zu) is too small\n", type_name, user_sz);
+ return false;
+ }
+ if (user_sz > opts_sz) {
+ size_t i;
+
+ for (i = opts_sz; i < user_sz; i++) {
+ if (opts[i]) {
+ pr_warn("%s has non-zero extra bytes",
+ type_name);
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+#define OPTS_VALID(opts, type) \
+ (!(opts) || libbpf_validate_opts((const char *)opts, \
+ offsetofend(struct type, \
+ type##__last_field), \
+ (opts)->sz, #type))
+#define OPTS_HAS(opts, field) \
+ ((opts) && opts->sz >= offsetofend(typeof(*(opts)), field))
+#define OPTS_GET(opts, field, fallback_value) \
+ (OPTS_HAS(opts, field) ? (opts)->field : fallback_value)
+
+int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
+ const char *str_sec, size_t str_len);
+
+struct btf_ext_info {
+ /*
+ * info points to the individual info section (e.g. func_info and
+ * line_info) from the .BTF.ext. It does not include the __u32 rec_size.
+ */
+ void *info;
+ __u32 rec_size;
+ __u32 len;
+};
+
+#define for_each_btf_ext_sec(seg, sec) \
+ for (sec = (seg)->info; \
+ (void *)sec < (seg)->info + (seg)->len; \
+ sec = (void *)sec + sizeof(struct btf_ext_info_sec) + \
+ (seg)->rec_size * sec->num_info)
+
+#define for_each_btf_ext_rec(seg, sec, i, rec) \
+ for (i = 0, rec = (void *)&(sec)->data; \
+ i < (sec)->num_info; \
+ i++, rec = (void *)rec + (seg)->rec_size)
+
+struct btf_ext {
+ union {
+ struct btf_ext_header *hdr;
+ void *data;
+ };
+ struct btf_ext_info func_info;
+ struct btf_ext_info line_info;
+ struct btf_ext_info field_reloc_info;
+ __u32 data_size;
+};
+
+struct btf_ext_info_sec {
+ __u32 sec_name_off;
+ __u32 num_info;
+ /* Followed by num_info * record_size number of bytes */
+ __u8 data[0];
+};
+
+/* The minimum bpf_func_info checked by the loader */
+struct bpf_func_info_min {
+ __u32 insn_off;
+ __u32 type_id;
+};
+
+/* The minimum bpf_line_info checked by the loader */
+struct bpf_line_info_min {
+ __u32 insn_off;
+ __u32 file_name_off;
+ __u32 line_off;
+ __u32 line_col;
+};
+
+/* bpf_field_info_kind encodes which aspect of captured field has to be
+ * adjusted by relocations. Currently supported values are:
+ * - BPF_FIELD_BYTE_OFFSET: field offset (in bytes);
+ * - BPF_FIELD_EXISTS: field existence (1, if field exists; 0, otherwise);
+ */
+enum bpf_field_info_kind {
+ BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
+ BPF_FIELD_BYTE_SIZE = 1,
+ BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
+ BPF_FIELD_SIGNED = 3,
+ BPF_FIELD_LSHIFT_U64 = 4,
+ BPF_FIELD_RSHIFT_U64 = 5,
+};
+
+/* The minimum bpf_field_reloc checked by the loader
+ *
+ * Field relocation captures the following data:
+ * - insn_off - instruction offset (in bytes) within a BPF program that needs
+ * its insn->imm field to be relocated with actual field info;
+ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
+ * field;
+ * - access_str_off - offset into corresponding .BTF string section. String
+ * itself encodes an accessed field using a sequence of field and array
+ * indicies, separated by colon (:). It's conceptually very close to LLVM's
+ * getelementptr ([0]) instruction's arguments for identifying offset to
+ * a field.
+ *
+ * Example to provide a better feel.
+ *
+ * struct sample {
+ * int a;
+ * struct {
+ * int b[10];
+ * };
+ * };
+ *
+ * struct sample *s = ...;
+ * int x = &s->a; // encoded as "0:0" (a is field #0)
+ * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1,
+ * // b is field #0 inside anon struct, accessing elem #5)
+ * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
+ *
+ * type_id for all relocs in this example will capture BTF type id of
+ * `struct sample`.
+ *
+ * Such relocation is emitted when using __builtin_preserve_access_index()
+ * Clang built-in, passing expression that captures field address, e.g.:
+ *
+ * bpf_probe_read(&dst, sizeof(dst),
+ * __builtin_preserve_access_index(&src->a.b.c));
+ *
+ * In this case Clang will emit field relocation recording necessary data to
+ * be able to find offset of embedded `a.b.c` field within `src` struct.
+ *
+ * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
+ */
+struct bpf_field_reloc {
+ __u32 insn_off;
+ __u32 type_id;
+ __u32 access_str_off;
+ enum bpf_field_info_kind kind;
+};
+
+#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/src/contrib/libbpf/bpf/libbpf_probes.c b/src/contrib/libbpf/bpf/libbpf_probes.c
new file mode 100644
index 0000000..a9eb8b3
--- /dev/null
+++ b/src/contrib/libbpf/bpf/libbpf_probes.c
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2019 Netronome Systems, Inc. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <net/if.h>
+#include <sys/utsname.h>
+
+#include <linux/btf.h>
+#include <linux/filter.h>
+#include <linux/kernel.h>
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+static bool grep(const char *buffer, const char *pattern)
+{
+ return !!strstr(buffer, pattern);
+}
+
+static int get_vendor_id(int ifindex)
+{
+ char ifname[IF_NAMESIZE], path[64], buf[8];
+ ssize_t len;
+ int fd;
+
+ if (!if_indextoname(ifindex, ifname))
+ return -1;
+
+ snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname);
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ len = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (len < 0)
+ return -1;
+ if (len >= (ssize_t)sizeof(buf))
+ return -1;
+ buf[len] = '\0';
+
+ return strtol(buf, NULL, 0);
+}
+
+static int get_kernel_version(void)
+{
+ int version, subversion, patchlevel;
+ struct utsname utsn;
+
+ /* Return 0 on failure, and attempt to probe with empty kversion */
+ if (uname(&utsn))
+ return 0;
+
+ if (sscanf(utsn.release, "%d.%d.%d",
+ &version, &subversion, &patchlevel) != 3)
+ return 0;
+
+ return (version << 16) + (subversion << 8) + patchlevel;
+}
+
+static void
+probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
+ size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex)
+{
+ struct bpf_load_program_attr xattr = {};
+ int fd;
+
+ switch (prog_type) {
+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+ xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
+ break;
+ case BPF_PROG_TYPE_KPROBE:
+ xattr.kern_version = get_kernel_version();
+ break;
+ case BPF_PROG_TYPE_UNSPEC:
+ case BPF_PROG_TYPE_SOCKET_FILTER:
+ case BPF_PROG_TYPE_SCHED_CLS:
+ case BPF_PROG_TYPE_SCHED_ACT:
+ case BPF_PROG_TYPE_TRACEPOINT:
+ case BPF_PROG_TYPE_XDP:
+ case BPF_PROG_TYPE_PERF_EVENT:
+ case BPF_PROG_TYPE_CGROUP_SKB:
+ case BPF_PROG_TYPE_CGROUP_SOCK:
+ case BPF_PROG_TYPE_LWT_IN:
+ case BPF_PROG_TYPE_LWT_OUT:
+ case BPF_PROG_TYPE_LWT_XMIT:
+ case BPF_PROG_TYPE_SOCK_OPS:
+ case BPF_PROG_TYPE_SK_SKB:
+ case BPF_PROG_TYPE_CGROUP_DEVICE:
+ case BPF_PROG_TYPE_SK_MSG:
+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
+ case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
+ case BPF_PROG_TYPE_LWT_SEG6LOCAL:
+ case BPF_PROG_TYPE_LIRC_MODE2:
+ case BPF_PROG_TYPE_SK_REUSEPORT:
+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+ case BPF_PROG_TYPE_TRACING:
+ default:
+ break;
+ }
+
+ xattr.prog_type = prog_type;
+ xattr.insns = insns;
+ xattr.insns_cnt = insns_cnt;
+ xattr.license = "GPL";
+ xattr.prog_ifindex = ifindex;
+
+ fd = bpf_load_program_xattr(&xattr, buf, buf_len);
+ if (fd >= 0)
+ close(fd);
+}
+
+bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex)
+{
+ struct bpf_insn insns[2] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN()
+ };
+
+ if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS)
+ /* nfp returns -EINVAL on exit(0) with TC offload */
+ insns[0].imm = 2;
+
+ errno = 0;
+ probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex);
+
+ return errno != EINVAL && errno != EOPNOTSUPP;
+}
+
+int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
+ const char *str_sec, size_t str_len)
+{
+ struct btf_header hdr = {
+ .magic = BTF_MAGIC,
+ .version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
+ .type_len = types_len,
+ .str_off = types_len,
+ .str_len = str_len,
+ };
+ int btf_fd, btf_len;
+ __u8 *raw_btf;
+
+ btf_len = hdr.hdr_len + hdr.type_len + hdr.str_len;
+ raw_btf = malloc(btf_len);
+ if (!raw_btf)
+ return -ENOMEM;
+
+ memcpy(raw_btf, &hdr, sizeof(hdr));
+ memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);
+ memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len);
+
+ btf_fd = bpf_load_btf(raw_btf, btf_len, NULL, 0, false);
+
+ free(raw_btf);
+ return btf_fd;
+}
+
+static int load_sk_storage_btf(void)
+{
+ const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l";
+ /* struct bpf_spin_lock {
+ * int val;
+ * };
+ * struct val {
+ * int cnt;
+ * struct bpf_spin_lock l;
+ * };
+ */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* struct bpf_spin_lock */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),
+ BTF_MEMBER_ENC(15, 1, 0), /* int val; */
+ /* struct val */ /* [3] */
+ BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+ BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */
+ BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */
+ };
+
+ return libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs));
+}
+
+bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
+{
+ int key_size, value_size, max_entries, map_flags;
+ __u32 btf_key_type_id = 0, btf_value_type_id = 0;
+ struct bpf_create_map_attr attr = {};
+ int fd = -1, btf_fd = -1, fd_inner;
+
+ key_size = sizeof(__u32);
+ value_size = sizeof(__u32);
+ max_entries = 1;
+ map_flags = 0;
+
+ switch (map_type) {
+ case BPF_MAP_TYPE_STACK_TRACE:
+ value_size = sizeof(__u64);
+ break;
+ case BPF_MAP_TYPE_LPM_TRIE:
+ key_size = sizeof(__u64);
+ value_size = sizeof(__u64);
+ map_flags = BPF_F_NO_PREALLOC;
+ break;
+ case BPF_MAP_TYPE_CGROUP_STORAGE:
+ case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
+ key_size = sizeof(struct bpf_cgroup_storage_key);
+ value_size = sizeof(__u64);
+ max_entries = 0;
+ break;
+ case BPF_MAP_TYPE_QUEUE:
+ case BPF_MAP_TYPE_STACK:
+ key_size = 0;
+ break;
+ case BPF_MAP_TYPE_SK_STORAGE:
+ btf_key_type_id = 1;
+ btf_value_type_id = 3;
+ value_size = 8;
+ max_entries = 0;
+ map_flags = BPF_F_NO_PREALLOC;
+ btf_fd = load_sk_storage_btf();
+ if (btf_fd < 0)
+ return false;
+ break;
+ case BPF_MAP_TYPE_UNSPEC:
+ case BPF_MAP_TYPE_HASH:
+ case BPF_MAP_TYPE_ARRAY:
+ case BPF_MAP_TYPE_PROG_ARRAY:
+ case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ case BPF_MAP_TYPE_PERCPU_ARRAY:
+ case BPF_MAP_TYPE_CGROUP_ARRAY:
+ case BPF_MAP_TYPE_LRU_HASH:
+ case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+ case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ case BPF_MAP_TYPE_DEVMAP:
+ case BPF_MAP_TYPE_DEVMAP_HASH:
+ case BPF_MAP_TYPE_SOCKMAP:
+ case BPF_MAP_TYPE_CPUMAP:
+ case BPF_MAP_TYPE_XSKMAP:
+ case BPF_MAP_TYPE_SOCKHASH:
+ case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+ default:
+ break;
+ }
+
+ if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+ map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+ /* TODO: probe for device, once libbpf has a function to create
+ * map-in-map for offload
+ */
+ if (ifindex)
+ return false;
+
+ fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH,
+ sizeof(__u32), sizeof(__u32), 1, 0);
+ if (fd_inner < 0)
+ return false;
+ fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32),
+ fd_inner, 1, 0);
+ close(fd_inner);
+ } else {
+ /* Note: No other restriction on map type probes for offload */
+ attr.map_type = map_type;
+ attr.key_size = key_size;
+ attr.value_size = value_size;
+ attr.max_entries = max_entries;
+ attr.map_flags = map_flags;
+ attr.map_ifindex = ifindex;
+ if (btf_fd >= 0) {
+ attr.btf_fd = btf_fd;
+ attr.btf_key_type_id = btf_key_type_id;
+ attr.btf_value_type_id = btf_value_type_id;
+ }
+
+ fd = bpf_create_map_xattr(&attr);
+ }
+ if (fd >= 0)
+ close(fd);
+ if (btf_fd >= 0)
+ close(btf_fd);
+
+ return fd >= 0;
+}
+
+bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type,
+ __u32 ifindex)
+{
+ struct bpf_insn insns[2] = {
+ BPF_EMIT_CALL(id),
+ BPF_EXIT_INSN()
+ };
+ char buf[4096] = {};
+ bool res;
+
+ probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf),
+ ifindex);
+ res = !grep(buf, "invalid func ") && !grep(buf, "unknown func ");
+
+ if (ifindex) {
+ switch (get_vendor_id(ifindex)) {
+ case 0x19ee: /* Netronome specific */
+ res = res && !grep(buf, "not supported by FW") &&
+ !grep(buf, "unsupported function id");
+ break;
+ default:
+ break;
+ }
+ }
+
+ return res;
+}
diff --git a/src/contrib/libbpf/bpf/libbpf_util.h b/src/contrib/libbpf/bpf/libbpf_util.h
new file mode 100644
index 0000000..59c779c
--- /dev/null
+++ b/src/contrib/libbpf/bpf/libbpf_util.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2019 Facebook */
+
+#ifndef __LIBBPF_LIBBPF_UTIL_H
+#define __LIBBPF_LIBBPF_UTIL_H
+
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Use these barrier functions instead of smp_[rw]mb() when they are
+ * used in a libbpf header file. That way they can be built into the
+ * application that uses libbpf.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+# define libbpf_smp_rmb() asm volatile("" : : : "memory")
+# define libbpf_smp_wmb() asm volatile("" : : : "memory")
+# define libbpf_smp_mb() \
+ asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc")
+/* Hinders stores to be observed before older loads. */
+# define libbpf_smp_rwmb() asm volatile("" : : : "memory")
+#elif defined(__aarch64__)
+# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory")
+# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
+# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
+# define libbpf_smp_rwmb() libbpf_smp_mb()
+#elif defined(__arm__)
+/* These are only valid for armv7 and above */
+# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory")
+# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
+# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
+# define libbpf_smp_rwmb() libbpf_smp_mb()
+#else
+/* Architecture missing native barrier functions. */
+# define libbpf_smp_rmb() __sync_synchronize()
+# define libbpf_smp_wmb() __sync_synchronize()
+# define libbpf_smp_mb() __sync_synchronize()
+# define libbpf_smp_rwmb() __sync_synchronize()
+#endif
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/src/contrib/libbpf/bpf/netlink.c b/src/contrib/libbpf/bpf/netlink.c
new file mode 100644
index 0000000..5065c1a
--- /dev/null
+++ b/src/contrib/libbpf/bpf/netlink.c
@@ -0,0 +1,451 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <memory.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/rtnetlink.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <time.h>
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "nlattr.h"
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
+ void *cookie);
+
+struct xdp_id_md {
+ int ifindex;
+ __u32 flags;
+ struct xdp_link_info info;
+};
+
+int libbpf_netlink_open(__u32 *nl_pid)
+{
+ struct sockaddr_nl sa;
+ socklen_t addrlen;
+ int one = 1, ret;
+ int sock;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0)
+ return -errno;
+
+ if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one)) < 0) {
+ pr_warn("Netlink error reporting not supported\n");
+ }
+
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ addrlen = sizeof(sa);
+ if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ if (addrlen != sizeof(sa)) {
+ ret = -LIBBPF_ERRNO__INTERNAL;
+ goto cleanup;
+ }
+
+ *nl_pid = sa.nl_pid;
+ return sock;
+
+cleanup:
+ close(sock);
+ return ret;
+}
+
+static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq,
+ __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
+ void *cookie)
+{
+ bool multipart = true;
+ struct nlmsgerr *err;
+ struct nlmsghdr *nh;
+ char buf[4096];
+ int len, ret;
+
+ while (multipart) {
+ multipart = false;
+ len = recv(sock, buf, sizeof(buf), 0);
+ if (len < 0) {
+ ret = -errno;
+ goto done;
+ }
+
+ if (len == 0)
+ break;
+
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_pid != nl_pid) {
+ ret = -LIBBPF_ERRNO__WRNGPID;
+ goto done;
+ }
+ if (nh->nlmsg_seq != seq) {
+ ret = -LIBBPF_ERRNO__INVSEQ;
+ goto done;
+ }
+ if (nh->nlmsg_flags & NLM_F_MULTI)
+ multipart = true;
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ ret = err->error;
+ libbpf_nla_dump_errormsg(nh);
+ goto done;
+ case NLMSG_DONE:
+ return 0;
+ default:
+ break;
+ }
+ if (_fn) {
+ ret = _fn(nh, fn, cookie);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+ ret = 0;
+done:
+ return ret;
+}
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+ int sock, seq = 0, ret;
+ struct nlattr *nla, *nla_xdp;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg ifinfo;
+ char attrbuf[64];
+ } req;
+ __u32 nl_pid;
+
+ sock = libbpf_netlink_open(&nl_pid);
+ if (sock < 0)
+ return sock;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_SETLINK;
+ req.nh.nlmsg_pid = 0;
+ req.nh.nlmsg_seq = ++seq;
+ req.ifinfo.ifi_family = AF_UNSPEC;
+ req.ifinfo.ifi_index = ifindex;
+
+ /* started nested attribute for XDP */
+ nla = (struct nlattr *)(((char *)&req)
+ + NLMSG_ALIGN(req.nh.nlmsg_len));
+ nla->nla_type = NLA_F_NESTED | IFLA_XDP;
+ nla->nla_len = NLA_HDRLEN;
+
+ /* add XDP fd */
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = IFLA_XDP_FD;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+ nla->nla_len += nla_xdp->nla_len;
+
+ /* if user passed in any flags, add those too */
+ if (flags) {
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = IFLA_XDP_FLAGS;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+ nla->nla_len += nla_xdp->nla_len;
+ }
+
+ req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+ ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL);
+
+cleanup:
+ close(sock);
+ return ret;
+}
+
+static int __dump_link_nlmsg(struct nlmsghdr *nlh,
+ libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+ struct nlattr *tb[IFLA_MAX + 1], *attr;
+ struct ifinfomsg *ifi = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
+ attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
+ if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_link_nlmsg(cookie, ifi, tb);
+}
+
+static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
+{
+ struct nlattr *xdp_tb[IFLA_XDP_MAX + 1];
+ struct xdp_id_md *xdp_id = cookie;
+ struct ifinfomsg *ifinfo = msg;
+ int ret;
+
+ if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index)
+ return 0;
+
+ if (!tb[IFLA_XDP])
+ return 0;
+
+ ret = libbpf_nla_parse_nested(xdp_tb, IFLA_XDP_MAX, tb[IFLA_XDP], NULL);
+ if (ret)
+ return ret;
+
+ if (!xdp_tb[IFLA_XDP_ATTACHED])
+ return 0;
+
+ xdp_id->info.attach_mode = libbpf_nla_getattr_u8(
+ xdp_tb[IFLA_XDP_ATTACHED]);
+
+ if (xdp_id->info.attach_mode == XDP_ATTACHED_NONE)
+ return 0;
+
+ if (xdp_tb[IFLA_XDP_PROG_ID])
+ xdp_id->info.prog_id = libbpf_nla_getattr_u32(
+ xdp_tb[IFLA_XDP_PROG_ID]);
+
+ if (xdp_tb[IFLA_XDP_SKB_PROG_ID])
+ xdp_id->info.skb_prog_id = libbpf_nla_getattr_u32(
+ xdp_tb[IFLA_XDP_SKB_PROG_ID]);
+
+ if (xdp_tb[IFLA_XDP_DRV_PROG_ID])
+ xdp_id->info.drv_prog_id = libbpf_nla_getattr_u32(
+ xdp_tb[IFLA_XDP_DRV_PROG_ID]);
+
+ if (xdp_tb[IFLA_XDP_HW_PROG_ID])
+ xdp_id->info.hw_prog_id = libbpf_nla_getattr_u32(
+ xdp_tb[IFLA_XDP_HW_PROG_ID]);
+
+ return 0;
+}
+
+int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
+ size_t info_size, __u32 flags)
+{
+ struct xdp_id_md xdp_id = {};
+ int sock, ret;
+ __u32 nl_pid;
+ __u32 mask;
+
+ if (flags & ~XDP_FLAGS_MASK || !info_size)
+ return -EINVAL;
+
+ /* Check whether the single {HW,DRV,SKB} mode is set */
+ flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
+ mask = flags - 1;
+ if (flags && flags & mask)
+ return -EINVAL;
+
+ sock = libbpf_netlink_open(&nl_pid);
+ if (sock < 0)
+ return sock;
+
+ xdp_id.ifindex = ifindex;
+ xdp_id.flags = flags;
+
+ ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_info, &xdp_id);
+ if (!ret) {
+ size_t sz = min(info_size, sizeof(xdp_id.info));
+
+ memcpy(info, &xdp_id.info, sz);
+ memset((void *) info + sz, 0, info_size - sz);
+ }
+
+ close(sock);
+ return ret;
+}
+
+static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
+{
+ if (info->attach_mode != XDP_ATTACHED_MULTI)
+ return info->prog_id;
+ if (flags & XDP_FLAGS_DRV_MODE)
+ return info->drv_prog_id;
+ if (flags & XDP_FLAGS_HW_MODE)
+ return info->hw_prog_id;
+ if (flags & XDP_FLAGS_SKB_MODE)
+ return info->skb_prog_id;
+
+ return 0;
+}
+
+int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+{
+ struct xdp_link_info info;
+ int ret;
+
+ ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags);
+ if (!ret)
+ *prog_id = get_xdp_id(&info, flags);
+
+ return ret;
+}
+
+int libbpf_nl_get_link(int sock, unsigned int nl_pid,
+ libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct ifinfomsg ifm;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+ .nlh.nlmsg_type = RTM_GETLINK,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .ifm.ifi_family = AF_PACKET,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
+ dump_link_nlmsg, cookie);
+}
+
+static int __dump_class_nlmsg(struct nlmsghdr *nlh,
+ libbpf_dump_nlmsg_t dump_class_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_class_nlmsg(cookie, t, tb);
+}
+
+int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
+ libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETTCLASS,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg,
+ dump_class_nlmsg, cookie);
+}
+
+static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh,
+ libbpf_dump_nlmsg_t dump_qdisc_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_qdisc_nlmsg(cookie, t, tb);
+}
+
+int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
+ libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETQDISC,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg,
+ dump_qdisc_nlmsg, cookie);
+}
+
+static int __dump_filter_nlmsg(struct nlmsghdr *nlh,
+ libbpf_dump_nlmsg_t dump_filter_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_filter_nlmsg(cookie, t, tb);
+}
+
+int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
+ libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETTFILTER,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ .t.tcm_parent = handle,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg,
+ dump_filter_nlmsg, cookie);
+}
diff --git a/src/contrib/libbpf/bpf/nlattr.c b/src/contrib/libbpf/bpf/nlattr.c
new file mode 100644
index 0000000..8db44bb
--- /dev/null
+++ b/src/contrib/libbpf/bpf/nlattr.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * NETLINK Netlink attributes
+ *
+ * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <errno.h>
+#include "nlattr.h"
+#include "libbpf_internal.h"
+#include <linux/rtnetlink.h>
+#include <string.h>
+#include <stdio.h>
+
+static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = {
+ [LIBBPF_NLA_U8] = sizeof(uint8_t),
+ [LIBBPF_NLA_U16] = sizeof(uint16_t),
+ [LIBBPF_NLA_U32] = sizeof(uint32_t),
+ [LIBBPF_NLA_U64] = sizeof(uint64_t),
+ [LIBBPF_NLA_STRING] = 1,
+ [LIBBPF_NLA_FLAG] = 0,
+};
+
+static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
+{
+ int totlen = NLA_ALIGN(nla->nla_len);
+
+ *remaining -= totlen;
+ return (struct nlattr *) ((char *) nla + totlen);
+}
+
+static int nla_ok(const struct nlattr *nla, int remaining)
+{
+ return remaining >= sizeof(*nla) &&
+ nla->nla_len >= sizeof(*nla) &&
+ nla->nla_len <= remaining;
+}
+
+static int nla_type(const struct nlattr *nla)
+{
+ return nla->nla_type & NLA_TYPE_MASK;
+}
+
+static int validate_nla(struct nlattr *nla, int maxtype,
+ struct libbpf_nla_policy *policy)
+{
+ struct libbpf_nla_policy *pt;
+ unsigned int minlen = 0;
+ int type = nla_type(nla);
+
+ if (type < 0 || type > maxtype)
+ return 0;
+
+ pt = &policy[type];
+
+ if (pt->type > LIBBPF_NLA_TYPE_MAX)
+ return 0;
+
+ if (pt->minlen)
+ minlen = pt->minlen;
+ else if (pt->type != LIBBPF_NLA_UNSPEC)
+ minlen = nla_attr_minlen[pt->type];
+
+ if (libbpf_nla_len(nla) < minlen)
+ return -1;
+
+ if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen)
+ return -1;
+
+ if (pt->type == LIBBPF_NLA_STRING) {
+ char *data = libbpf_nla_data(nla);
+
+ if (data[libbpf_nla_len(nla) - 1] != '\0')
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int nlmsg_len(const struct nlmsghdr *nlh)
+{
+ return nlh->nlmsg_len - NLMSG_HDRLEN;
+}
+
+/**
+ * Create attribute index based on a stream of attributes.
+ * @arg tb Index array to be filled (maxtype+1 elements).
+ * @arg maxtype Maximum attribute type expected and accepted.
+ * @arg head Head of attribute stream.
+ * @arg len Length of attribute stream.
+ * @arg policy Attribute validation policy.
+ *
+ * Iterates over the stream of attributes and stores a pointer to each
+ * attribute in the index array using the attribute type as index to
+ * the array. Attribute with a type greater than the maximum type
+ * specified will be silently ignored in order to maintain backwards
+ * compatibility. If \a policy is not NULL, the attribute will be
+ * validated using the specified policy.
+ *
+ * @see nla_validate
+ * @return 0 on success or a negative error code.
+ */
+int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,
+ int len, struct libbpf_nla_policy *policy)
+{
+ struct nlattr *nla;
+ int rem, err;
+
+ memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+
+ libbpf_nla_for_each_attr(nla, head, len, rem) {
+ int type = nla_type(nla);
+
+ if (type > maxtype)
+ continue;
+
+ if (policy) {
+ err = validate_nla(nla, maxtype, policy);
+ if (err < 0)
+ goto errout;
+ }
+
+ if (tb[type])
+ pr_warn("Attribute of type %#x found multiple times in message, "
+ "previous attribute is being ignored.\n", type);
+
+ tb[type] = nla;
+ }
+
+ err = 0;
+errout:
+ return err;
+}
+
+/**
+ * Create attribute index based on nested attribute
+ * @arg tb Index array to be filled (maxtype+1 elements).
+ * @arg maxtype Maximum attribute type expected and accepted.
+ * @arg nla Nested Attribute.
+ * @arg policy Attribute validation policy.
+ *
+ * Feeds the stream of attributes nested into the specified attribute
+ * to libbpf_nla_parse().
+ *
+ * @see libbpf_nla_parse
+ * @return 0 on success or a negative error code.
+ */
+int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype,
+ struct nlattr *nla,
+ struct libbpf_nla_policy *policy)
+{
+ return libbpf_nla_parse(tb, maxtype, libbpf_nla_data(nla),
+ libbpf_nla_len(nla), policy);
+}
+
+/* dump netlink extended ack error message */
+int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh)
+{
+ struct libbpf_nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = {
+ [NLMSGERR_ATTR_MSG] = { .type = LIBBPF_NLA_STRING },
+ [NLMSGERR_ATTR_OFFS] = { .type = LIBBPF_NLA_U32 },
+ };
+ struct nlattr *tb[NLMSGERR_ATTR_MAX + 1], *attr;
+ struct nlmsgerr *err;
+ char *errmsg = NULL;
+ int hlen, alen;
+
+ /* no TLVs, nothing to do here */
+ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return 0;
+
+ err = (struct nlmsgerr *)NLMSG_DATA(nlh);
+ hlen = sizeof(*err);
+
+ /* if NLM_F_CAPPED is set then the inner err msg was capped */
+ if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+ hlen += nlmsg_len(&err->msg);
+
+ attr = (struct nlattr *) ((void *) err + hlen);
+ alen = nlh->nlmsg_len - hlen;
+
+ if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen,
+ extack_policy) != 0) {
+ pr_warn("Failed to parse extended error attributes\n");
+ return 0;
+ }
+
+ if (tb[NLMSGERR_ATTR_MSG])
+ errmsg = (char *) libbpf_nla_data(tb[NLMSGERR_ATTR_MSG]);
+
+ pr_warn("Kernel error message: %s\n", errmsg);
+
+ return 0;
+}
diff --git a/src/contrib/libbpf/bpf/nlattr.h b/src/contrib/libbpf/bpf/nlattr.h
new file mode 100644
index 0000000..6cc3ac9
--- /dev/null
+++ b/src/contrib/libbpf/bpf/nlattr.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * NETLINK Netlink attributes
+ *
+ * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
+ */
+
+#ifndef __LIBBPF_NLATTR_H
+#define __LIBBPF_NLATTR_H
+
+#include <stdint.h>
+#include <linux/netlink.h>
+/* avoid multiple definition of netlink features */
+#define __LINUX_NETLINK_H
+
+/**
+ * Standard attribute types to specify validation policy
+ */
+enum {
+ LIBBPF_NLA_UNSPEC, /**< Unspecified type, binary data chunk */
+ LIBBPF_NLA_U8, /**< 8 bit integer */
+ LIBBPF_NLA_U16, /**< 16 bit integer */
+ LIBBPF_NLA_U32, /**< 32 bit integer */
+ LIBBPF_NLA_U64, /**< 64 bit integer */
+ LIBBPF_NLA_STRING, /**< NUL terminated character string */
+ LIBBPF_NLA_FLAG, /**< Flag */
+ LIBBPF_NLA_MSECS, /**< Micro seconds (64bit) */
+ LIBBPF_NLA_NESTED, /**< Nested attributes */
+ __LIBBPF_NLA_TYPE_MAX,
+};
+
+#define LIBBPF_NLA_TYPE_MAX (__LIBBPF_NLA_TYPE_MAX - 1)
+
+/**
+ * @ingroup attr
+ * Attribute validation policy.
+ *
+ * See section @core_doc{core_attr_parse,Attribute Parsing} for more details.
+ */
+struct libbpf_nla_policy {
+ /** Type of attribute or LIBBPF_NLA_UNSPEC */
+ uint16_t type;
+
+ /** Minimal length of payload required */
+ uint16_t minlen;
+
+ /** Maximal length of payload allowed */
+ uint16_t maxlen;
+};
+
+/**
+ * @ingroup attr
+ * Iterate over a stream of attributes
+ * @arg pos loop counter, set to current attribute
+ * @arg head head of attribute stream
+ * @arg len length of attribute stream
+ * @arg rem initialized to len, holds bytes currently remaining in stream
+ */
+#define libbpf_nla_for_each_attr(pos, head, len, rem) \
+ for (pos = head, rem = len; \
+ nla_ok(pos, rem); \
+ pos = nla_next(pos, &(rem)))
+
+/**
+ * libbpf_nla_data - head of payload
+ * @nla: netlink attribute
+ */
+static inline void *libbpf_nla_data(const struct nlattr *nla)
+{
+ return (char *) nla + NLA_HDRLEN;
+}
+
+static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla)
+{
+ return *(uint8_t *)libbpf_nla_data(nla);
+}
+
+static inline uint32_t libbpf_nla_getattr_u32(const struct nlattr *nla)
+{
+ return *(uint32_t *)libbpf_nla_data(nla);
+}
+
+static inline const char *libbpf_nla_getattr_str(const struct nlattr *nla)
+{
+ return (const char *)libbpf_nla_data(nla);
+}
+
+/**
+ * libbpf_nla_len - length of payload
+ * @nla: netlink attribute
+ */
+static inline int libbpf_nla_len(const struct nlattr *nla)
+{
+ return nla->nla_len - NLA_HDRLEN;
+}
+
+int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,
+ int len, struct libbpf_nla_policy *policy);
+int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype,
+ struct nlattr *nla,
+ struct libbpf_nla_policy *policy);
+
+int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh);
+
+#endif /* __LIBBPF_NLATTR_H */
diff --git a/src/contrib/libbpf/bpf/str_error.c b/src/contrib/libbpf/bpf/str_error.c
new file mode 100644
index 0000000..b8064ee
--- /dev/null
+++ b/src/contrib/libbpf/bpf/str_error.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#undef _GNU_SOURCE
+#include <string.h>
+#include <stdio.h>
+#include "str_error.h"
+
+/*
+ * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl
+ * libc, while checking strerror_r() return to avoid having to check this in
+ * all places calling it.
+ */
+char *libbpf_strerror_r(int err, char *dst, int len)
+{
+ int ret = strerror_r(err < 0 ? -err : err, dst, len);
+ if (ret)
+ snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret);
+ return dst;
+}
diff --git a/src/contrib/libbpf/bpf/str_error.h b/src/contrib/libbpf/bpf/str_error.h
new file mode 100644
index 0000000..a139334
--- /dev/null
+++ b/src/contrib/libbpf/bpf/str_error.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __LIBBPF_STR_ERROR_H
+#define __LIBBPF_STR_ERROR_H
+
+char *libbpf_strerror_r(int err, char *dst, int len);
+#endif /* __LIBBPF_STR_ERROR_H */
diff --git a/src/contrib/libbpf/bpf/xsk.c b/src/contrib/libbpf/bpf/xsk.c
new file mode 100644
index 0000000..8e0ffa8
--- /dev/null
+++ b/src/contrib/libbpf/bpf/xsk.c
@@ -0,0 +1,797 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright(c) 2018 - 2019 Intel Corporation.
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <asm/barrier.h>
+#include <linux/compiler.h>
+#include <linux/ethtool.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_xdp.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "xsk.h"
+
+#ifndef SOL_XDP
+ #define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+ #define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+ #define PF_XDP AF_XDP
+#endif
+
+struct xsk_umem {
+ struct xsk_ring_prod *fill;
+ struct xsk_ring_cons *comp;
+ char *umem_area;
+ struct xsk_umem_config config;
+ int fd;
+ int refcount;
+};
+
+struct xsk_socket {
+ struct xsk_ring_cons *rx;
+ struct xsk_ring_prod *tx;
+ __u64 outstanding_tx;
+ struct xsk_umem *umem;
+ struct xsk_socket_config config;
+ int fd;
+ int ifindex;
+ int prog_fd;
+ int xsks_map_fd;
+ __u32 queue_id;
+ char ifname[IFNAMSIZ];
+};
+
+struct xsk_nl_info {
+ bool xdp_prog_attached;
+ int ifindex;
+ int fd;
+};
+
+/* Up until and including Linux 5.3 */
+struct xdp_ring_offset_v1 {
+ __u64 producer;
+ __u64 consumer;
+ __u64 desc;
+};
+
+/* Up until and including Linux 5.3 */
+struct xdp_mmap_offsets_v1 {
+ struct xdp_ring_offset_v1 rx;
+ struct xdp_ring_offset_v1 tx;
+ struct xdp_ring_offset_v1 fr;
+ struct xdp_ring_offset_v1 cr;
+};
+
+int xsk_umem__fd(const struct xsk_umem *umem)
+{
+ return umem ? umem->fd : -EINVAL;
+}
+
+int xsk_socket__fd(const struct xsk_socket *xsk)
+{
+ return xsk ? xsk->fd : -EINVAL;
+}
+
+static bool xsk_page_aligned(void *buffer)
+{
+ unsigned long addr = (unsigned long)buffer;
+
+ return !(addr & (getpagesize() - 1));
+}
+
+static void xsk_set_umem_config(struct xsk_umem_config *cfg,
+ const struct xsk_umem_config *usr_cfg)
+{
+ if (!usr_cfg) {
+ cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+ cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+ cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
+ return;
+ }
+
+ cfg->fill_size = usr_cfg->fill_size;
+ cfg->comp_size = usr_cfg->comp_size;
+ cfg->frame_size = usr_cfg->frame_size;
+ cfg->frame_headroom = usr_cfg->frame_headroom;
+ cfg->flags = usr_cfg->flags;
+}
+
+static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
+ const struct xsk_socket_config *usr_cfg)
+{
+ if (!usr_cfg) {
+ cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg->libbpf_flags = 0;
+ cfg->xdp_flags = 0;
+ cfg->bind_flags = 0;
+ return 0;
+ }
+
+ if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
+ return -EINVAL;
+
+ cfg->rx_size = usr_cfg->rx_size;
+ cfg->tx_size = usr_cfg->tx_size;
+ cfg->libbpf_flags = usr_cfg->libbpf_flags;
+ cfg->xdp_flags = usr_cfg->xdp_flags;
+ cfg->bind_flags = usr_cfg->bind_flags;
+
+ return 0;
+}
+
+static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
+{
+ struct xdp_mmap_offsets_v1 off_v1;
+
+ /* getsockopt on a kernel <= 5.3 has no flags fields.
+ * Copy over the offsets to the correct places in the >=5.4 format
+ * and put the flags where they would have been on that kernel.
+ */
+ memcpy(&off_v1, off, sizeof(off_v1));
+
+ off->rx.producer = off_v1.rx.producer;
+ off->rx.consumer = off_v1.rx.consumer;
+ off->rx.desc = off_v1.rx.desc;
+ off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
+
+ off->tx.producer = off_v1.tx.producer;
+ off->tx.consumer = off_v1.tx.consumer;
+ off->tx.desc = off_v1.tx.desc;
+ off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
+
+ off->fr.producer = off_v1.fr.producer;
+ off->fr.consumer = off_v1.fr.consumer;
+ off->fr.desc = off_v1.fr.desc;
+ off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
+
+ off->cr.producer = off_v1.cr.producer;
+ off->cr.consumer = off_v1.cr.consumer;
+ off->cr.desc = off_v1.cr.desc;
+ off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
+}
+
+static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
+{
+ socklen_t optlen;
+ int err;
+
+ optlen = sizeof(*off);
+ err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
+ if (err)
+ return err;
+
+ if (optlen == sizeof(*off))
+ return 0;
+
+ if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
+ xsk_mmap_offsets_v1(off);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
+ __u64 size, struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *usr_config)
+{
+ struct xdp_mmap_offsets off;
+ struct xdp_umem_reg mr;
+ struct xsk_umem *umem;
+ void *map;
+ int err;
+
+ if (!umem_area || !umem_ptr || !fill || !comp)
+ return -EFAULT;
+ if (!size && !xsk_page_aligned(umem_area))
+ return -EINVAL;
+
+ umem = calloc(1, sizeof(*umem));
+ if (!umem)
+ return -ENOMEM;
+
+ umem->fd = socket(AF_XDP, SOCK_RAW, 0);
+ if (umem->fd < 0) {
+ err = -errno;
+ goto out_umem_alloc;
+ }
+
+ umem->umem_area = umem_area;
+ xsk_set_umem_config(&umem->config, usr_config);
+
+ memset(&mr, 0, sizeof(mr));
+ mr.addr = (uintptr_t)umem_area;
+ mr.len = size;
+ mr.chunk_size = umem->config.frame_size;
+ mr.headroom = umem->config.frame_headroom;
+ mr.flags = umem->config.flags;
+
+ err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+ err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING,
+ &umem->config.fill_size,
+ sizeof(umem->config.fill_size));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+ err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
+ &umem->config.comp_size,
+ sizeof(umem->config.comp_size));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
+ XDP_UMEM_PGOFF_FILL_RING);
+ if (map == MAP_FAILED) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ umem->fill = fill;
+ fill->mask = umem->config.fill_size - 1;
+ fill->size = umem->config.fill_size;
+ fill->producer = map + off.fr.producer;
+ fill->consumer = map + off.fr.consumer;
+ fill->flags = map + off.fr.flags;
+ fill->ring = map + off.fr.desc;
+ fill->cached_cons = umem->config.fill_size;
+
+ map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
+ XDP_UMEM_PGOFF_COMPLETION_RING);
+ if (map == MAP_FAILED) {
+ err = -errno;
+ goto out_mmap;
+ }
+
+ umem->comp = comp;
+ comp->mask = umem->config.comp_size - 1;
+ comp->size = umem->config.comp_size;
+ comp->producer = map + off.cr.producer;
+ comp->consumer = map + off.cr.consumer;
+ comp->flags = map + off.cr.flags;
+ comp->ring = map + off.cr.desc;
+
+ *umem_ptr = umem;
+ return 0;
+
+out_mmap:
+ munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
+out_socket:
+ close(umem->fd);
+out_umem_alloc:
+ free(umem);
+ return err;
+}
+
+struct xsk_umem_config_v1 {
+ __u32 fill_size;
+ __u32 comp_size;
+ __u32 frame_size;
+ __u32 frame_headroom;
+};
+
+int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
+ __u64 size, struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *usr_config)
+{
+ struct xsk_umem_config config;
+
+ memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1));
+ config.flags = 0;
+
+ return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp,
+ &config);
+}
+COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2)
+DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
+
+static int xsk_load_xdp_prog(struct xsk_socket *xsk)
+{
+ static const int log_buf_size = 16 * 1024;
+ char log_buf[log_buf_size];
+ int err, prog_fd;
+
+ /* This is the C-program:
+ * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
+ * {
+ * int ret, index = ctx->rx_queue_index;
+ *
+ * // A set entry here means that the correspnding queue_id
+ * // has an active AF_XDP socket bound to it.
+ * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS);
+ * if (ret > 0)
+ * return ret;
+ *
+ * // Fallback for pre-5.3 kernels, not supporting default
+ * // action in the flags parameter.
+ * if (bpf_map_lookup_elem(&xsks_map, &index))
+ * return bpf_redirect_map(&xsks_map, index, 0);
+ * return XDP_PASS;
+ * }
+ */
+ struct bpf_insn prog[] = {
+ /* r2 = *(u32 *)(r1 + 16) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
+ /* *(u32 *)(r10 - 4) = r2 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
+ /* r1 = xskmap[] */
+ BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ /* r3 = XDP_PASS */
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ /* call bpf_redirect_map */
+ BPF_EMIT_CALL(BPF_FUNC_redirect_map),
+ /* if w0 != 0 goto pc+13 */
+ BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13),
+ /* r2 = r10 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ /* r2 += -4 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ /* r1 = xskmap[] */
+ BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ /* call bpf_map_lookup_elem */
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ /* r1 = r0 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ /* r0 = XDP_PASS */
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ /* if r1 == 0 goto pc+5 */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
+ /* r2 = *(u32 *)(r10 - 4) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
+ /* r1 = xskmap[] */
+ BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ /* r3 = 0 */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ /* call bpf_redirect_map */
+ BPF_EMIT_CALL(BPF_FUNC_redirect_map),
+ /* The jumps are to this instruction */
+ BPF_EXIT_INSN(),
+ };
+ size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+
+ prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt,
+ "LGPL-2.1 or BSD-2-Clause", 0, log_buf,
+ log_buf_size);
+ if (prog_fd < 0) {
+ pr_warn("BPF log buffer:\n%s", log_buf);
+ return prog_fd;
+ }
+
+ err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags);
+ if (err) {
+ close(prog_fd);
+ return err;
+ }
+
+ xsk->prog_fd = prog_fd;
+ return 0;
+}
+
+static int xsk_get_max_queues(struct xsk_socket *xsk)
+{
+ struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
+ struct ifreq ifr = {};
+ int fd, err, ret;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -errno;
+
+ ifr.ifr_data = (void *)&channels;
+ memcpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1);
+ ifr.ifr_name[IFNAMSIZ - 1] = '\0';
+ err = ioctl(fd, SIOCETHTOOL, &ifr);
+ if (err && errno != EOPNOTSUPP) {
+ ret = -errno;
+ goto out;
+ }
+
+ if (err) {
+ /* If the device says it has no channels, then all traffic
+ * is sent to a single stream, so max queues = 1.
+ */
+ ret = 1;
+ } else {
+ /* Take the max of rx, tx, combined. Drivers return
+ * the number of channels in different ways.
+ */
+ ret = max(channels.max_rx, channels.max_tx);
+ ret = max(ret, (int)channels.max_combined);
+ }
+
+out:
+ close(fd);
+ return ret;
+}
+
+static int xsk_create_bpf_maps(struct xsk_socket *xsk)
+{
+ int max_queues;
+ int fd;
+
+ max_queues = xsk_get_max_queues(xsk);
+ if (max_queues < 0)
+ return max_queues;
+
+ fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
+ sizeof(int), sizeof(int), max_queues, 0);
+ if (fd < 0)
+ return fd;
+
+ xsk->xsks_map_fd = fd;
+
+ return 0;
+}
+
+static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
+{
+ bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
+ close(xsk->xsks_map_fd);
+}
+
+static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
+{
+ __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
+ __u32 map_len = sizeof(struct bpf_map_info);
+ struct bpf_prog_info prog_info = {};
+ struct bpf_map_info map_info;
+ int fd, err;
+
+ err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
+ if (err)
+ return err;
+
+ num_maps = prog_info.nr_map_ids;
+
+ map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
+ if (!map_ids)
+ return -ENOMEM;
+
+ memset(&prog_info, 0, prog_len);
+ prog_info.nr_map_ids = num_maps;
+ prog_info.map_ids = (__u64)(unsigned long)map_ids;
+
+ err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
+ if (err)
+ goto out_map_ids;
+
+ xsk->xsks_map_fd = -1;
+
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ fd = bpf_map_get_fd_by_id(map_ids[i]);
+ if (fd < 0)
+ continue;
+
+ err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
+ if (err) {
+ close(fd);
+ continue;
+ }
+
+ if (!strcmp(map_info.name, "xsks_map")) {
+ xsk->xsks_map_fd = fd;
+ continue;
+ }
+
+ close(fd);
+ }
+
+ err = 0;
+ if (xsk->xsks_map_fd == -1)
+ err = -ENOENT;
+
+out_map_ids:
+ free(map_ids);
+ return err;
+}
+
+static int xsk_set_bpf_maps(struct xsk_socket *xsk)
+{
+ return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
+ &xsk->fd, 0);
+}
+
+static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
+{
+ __u32 prog_id = 0;
+ int err;
+
+ err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id,
+ xsk->config.xdp_flags);
+ if (err)
+ return err;
+
+ if (!prog_id) {
+ err = xsk_create_bpf_maps(xsk);
+ if (err)
+ return err;
+
+ err = xsk_load_xdp_prog(xsk);
+ if (err) {
+ xsk_delete_bpf_maps(xsk);
+ return err;
+ }
+ } else {
+ xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (xsk->prog_fd < 0)
+ return -errno;
+ err = xsk_lookup_bpf_maps(xsk);
+ if (err) {
+ close(xsk->prog_fd);
+ return err;
+ }
+ }
+
+ if (xsk->rx)
+ err = xsk_set_bpf_maps(xsk);
+ if (err) {
+ xsk_delete_bpf_maps(xsk);
+ close(xsk->prog_fd);
+ return err;
+ }
+
+ return 0;
+}
+
+int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *usr_config)
+{
+ void *rx_map = NULL, *tx_map = NULL;
+ struct sockaddr_xdp sxdp = {};
+ struct xdp_mmap_offsets off;
+ struct xsk_socket *xsk;
+ int err;
+
+ if (!umem || !xsk_ptr || !(rx || tx))
+ return -EFAULT;
+
+ xsk = calloc(1, sizeof(*xsk));
+ if (!xsk)
+ return -ENOMEM;
+
+ err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
+ if (err)
+ goto out_xsk_alloc;
+
+ if (umem->refcount &&
+ !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
+ pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n");
+ err = -EBUSY;
+ goto out_xsk_alloc;
+ }
+
+ if (umem->refcount++ > 0) {
+ xsk->fd = socket(AF_XDP, SOCK_RAW, 0);
+ if (xsk->fd < 0) {
+ err = -errno;
+ goto out_xsk_alloc;
+ }
+ } else {
+ xsk->fd = umem->fd;
+ }
+
+ xsk->outstanding_tx = 0;
+ xsk->queue_id = queue_id;
+ xsk->umem = umem;
+ xsk->ifindex = if_nametoindex(ifname);
+ if (!xsk->ifindex) {
+ err = -errno;
+ goto out_socket;
+ }
+ memcpy(xsk->ifname, ifname, IFNAMSIZ - 1);
+ xsk->ifname[IFNAMSIZ - 1] = '\0';
+
+ if (rx) {
+ err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
+ &xsk->config.rx_size,
+ sizeof(xsk->config.rx_size));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+ }
+ if (tx) {
+ err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
+ &xsk->config.tx_size,
+ sizeof(xsk->config.tx_size));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+ }
+
+ err = xsk_get_mmap_offsets(xsk->fd, &off);
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ if (rx) {
+ rx_map = mmap(NULL, off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_RX_RING);
+ if (rx_map == MAP_FAILED) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ rx->mask = xsk->config.rx_size - 1;
+ rx->size = xsk->config.rx_size;
+ rx->producer = rx_map + off.rx.producer;
+ rx->consumer = rx_map + off.rx.consumer;
+ rx->flags = rx_map + off.rx.flags;
+ rx->ring = rx_map + off.rx.desc;
+ }
+ xsk->rx = rx;
+
+ if (tx) {
+ tx_map = mmap(NULL, off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_TX_RING);
+ if (tx_map == MAP_FAILED) {
+ err = -errno;
+ goto out_mmap_rx;
+ }
+
+ tx->mask = xsk->config.tx_size - 1;
+ tx->size = xsk->config.tx_size;
+ tx->producer = tx_map + off.tx.producer;
+ tx->consumer = tx_map + off.tx.consumer;
+ tx->flags = tx_map + off.tx.flags;
+ tx->ring = tx_map + off.tx.desc;
+ tx->cached_cons = xsk->config.tx_size;
+ }
+ xsk->tx = tx;
+
+ sxdp.sxdp_family = PF_XDP;
+ sxdp.sxdp_ifindex = xsk->ifindex;
+ sxdp.sxdp_queue_id = xsk->queue_id;
+ if (umem->refcount > 1) {
+ sxdp.sxdp_flags = XDP_SHARED_UMEM;
+ sxdp.sxdp_shared_umem_fd = umem->fd;
+ } else {
+ sxdp.sxdp_flags = xsk->config.bind_flags;
+ }
+
+ err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
+ if (err) {
+ err = -errno;
+ goto out_mmap_tx;
+ }
+
+ xsk->prog_fd = -1;
+
+ if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
+ err = xsk_setup_xdp_prog(xsk);
+ if (err)
+ goto out_mmap_tx;
+ }
+
+ *xsk_ptr = xsk;
+ return 0;
+
+out_mmap_tx:
+ if (tx)
+ munmap(tx_map, off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc));
+out_mmap_rx:
+ if (rx)
+ munmap(rx_map, off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc));
+out_socket:
+ if (--umem->refcount)
+ close(xsk->fd);
+out_xsk_alloc:
+ free(xsk);
+ return err;
+}
+
+int xsk_umem__delete(struct xsk_umem *umem)
+{
+ struct xdp_mmap_offsets off;
+ int err;
+
+ if (!umem)
+ return 0;
+
+ if (umem->refcount)
+ return -EBUSY;
+
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (!err) {
+ munmap(umem->fill->ring - off.fr.desc,
+ off.fr.desc + umem->config.fill_size * sizeof(__u64));
+ munmap(umem->comp->ring - off.cr.desc,
+ off.cr.desc + umem->config.comp_size * sizeof(__u64));
+ }
+
+ close(umem->fd);
+ free(umem);
+
+ return 0;
+}
+
+void xsk_socket__delete(struct xsk_socket *xsk)
+{
+ size_t desc_sz = sizeof(struct xdp_desc);
+ struct xdp_mmap_offsets off;
+ int err;
+
+ if (!xsk)
+ return;
+
+ if (xsk->prog_fd != -1) {
+ xsk_delete_bpf_maps(xsk);
+ close(xsk->prog_fd);
+ }
+
+ err = xsk_get_mmap_offsets(xsk->fd, &off);
+ if (!err) {
+ if (xsk->rx) {
+ munmap(xsk->rx->ring - off.rx.desc,
+ off.rx.desc + xsk->config.rx_size * desc_sz);
+ }
+ if (xsk->tx) {
+ munmap(xsk->tx->ring - off.tx.desc,
+ off.tx.desc + xsk->config.tx_size * desc_sz);
+ }
+
+ }
+
+ xsk->umem->refcount--;
+ /* Do not close an fd that also has an associated umem connected
+ * to it.
+ */
+ if (xsk->fd != xsk->umem->fd)
+ close(xsk->fd);
+ free(xsk);
+}
diff --git a/src/contrib/libbpf/bpf/xsk.h b/src/contrib/libbpf/bpf/xsk.h
new file mode 100644
index 0000000..584f682
--- /dev/null
+++ b/src/contrib/libbpf/bpf/xsk.h
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright(c) 2018 - 2019 Intel Corporation.
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#ifndef __LIBBPF_XSK_H
+#define __LIBBPF_XSK_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <linux/if_xdp.h>
+
+#include "libbpf.h"
+#include "libbpf_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Do not access these members directly. Use the functions below. */
+#define DEFINE_XSK_RING(name) \
+struct name { \
+ __u32 cached_prod; \
+ __u32 cached_cons; \
+ __u32 mask; \
+ __u32 size; \
+ __u32 *producer; \
+ __u32 *consumer; \
+ void *ring; \
+ __u32 *flags; \
+}
+
+DEFINE_XSK_RING(xsk_ring_prod);
+DEFINE_XSK_RING(xsk_ring_cons);
+
+/* For a detailed explanation on the memory barriers associated with the
+ * ring, please take a look at net/xdp/xsk_queue.h.
+ */
+
+struct xsk_umem;
+struct xsk_socket;
+
+static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill,
+ __u32 idx)
+{
+ __u64 *addrs = (__u64 *)fill->ring;
+
+ return &addrs[idx & fill->mask];
+}
+
+static inline const __u64 *
+xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx)
+{
+ const __u64 *addrs = (const __u64 *)comp->ring;
+
+ return &addrs[idx & comp->mask];
+}
+
+static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx,
+ __u32 idx)
+{
+ struct xdp_desc *descs = (struct xdp_desc *)tx->ring;
+
+ return &descs[idx & tx->mask];
+}
+
+static inline const struct xdp_desc *
+xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx)
+{
+ const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring;
+
+ return &descs[idx & rx->mask];
+}
+
+static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r)
+{
+ return *r->flags & XDP_RING_NEED_WAKEUP;
+}
+
+static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
+{
+ __u32 free_entries = r->cached_cons - r->cached_prod;
+
+ if (free_entries >= nb)
+ return free_entries;
+
+ /* Refresh the local tail pointer.
+ * cached_cons is r->size bigger than the real consumer pointer so
+ * that this addition can be avoided in the more frequently
+ * executed code that computs free_entries in the beginning of
+ * this function. Without this optimization it whould have been
+ * free_entries = r->cached_prod - r->cached_cons + r->size.
+ */
+ r->cached_cons = *r->consumer + r->size;
+
+ return r->cached_cons - r->cached_prod;
+}
+
+static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
+{
+ __u32 entries = r->cached_prod - r->cached_cons;
+
+ if (entries == 0) {
+ r->cached_prod = *r->producer;
+ entries = r->cached_prod - r->cached_cons;
+ }
+
+ return (entries > nb) ? nb : entries;
+}
+
+static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod,
+ size_t nb, __u32 *idx)
+{
+ if (xsk_prod_nb_free(prod, nb) < nb)
+ return 0;
+
+ *idx = prod->cached_prod;
+ prod->cached_prod += nb;
+
+ return nb;
+}
+
+static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb)
+{
+ /* Make sure everything has been written to the ring before indicating
+ * this to the kernel by writing the producer pointer.
+ */
+ libbpf_smp_wmb();
+
+ *prod->producer += nb;
+}
+
+static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
+ size_t nb, __u32 *idx)
+{
+ size_t entries = xsk_cons_nb_avail(cons, nb);
+
+ if (entries > 0) {
+ /* Make sure we do not speculatively read the data before
+ * we have received the packet buffers from the ring.
+ */
+ libbpf_smp_rmb();
+
+ *idx = cons->cached_cons;
+ cons->cached_cons += entries;
+ }
+
+ return entries;
+}
+
+static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb)
+{
+ /* Make sure data has been read before indicating we are done
+ * with the entries by updating the consumer pointer.
+ */
+ libbpf_smp_rwmb();
+
+ *cons->consumer += nb;
+}
+
+static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
+{
+ return &((char *)umem_area)[addr];
+}
+
+static inline __u64 xsk_umem__extract_addr(__u64 addr)
+{
+ return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
+}
+
+static inline __u64 xsk_umem__extract_offset(__u64 addr)
+{
+ return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+}
+
+static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr)
+{
+ return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr);
+}
+
+LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem);
+LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
+
+#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048
+#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048
+#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */
+#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
+#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
+#define XSK_UMEM__DEFAULT_FLAGS 0
+
+struct xsk_umem_config {
+ __u32 fill_size;
+ __u32 comp_size;
+ __u32 frame_size;
+ __u32 frame_headroom;
+ __u32 flags;
+};
+
+/* Flags for the libbpf_flags field. */
+#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
+
+struct xsk_socket_config {
+ __u32 rx_size;
+ __u32 tx_size;
+ __u32 libbpf_flags;
+ __u32 xdp_flags;
+ __u16 bind_flags;
+};
+
+/* Set config to NULL to get the default configuration. */
+LIBBPF_API int xsk_umem__create(struct xsk_umem **umem,
+ void *umem_area, __u64 size,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *config);
+LIBBPF_API int xsk_umem__create_v0_0_2(struct xsk_umem **umem,
+ void *umem_area, __u64 size,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *config);
+LIBBPF_API int xsk_umem__create_v0_0_4(struct xsk_umem **umem,
+ void *umem_area, __u64 size,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *config);
+LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
+ const char *ifname, __u32 queue_id,
+ struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *config);
+
+/* Returns 0 for success and -EBUSY if the umem is still in use. */
+LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem);
+LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_XSK_H */
diff --git a/src/contrib/libbpf/include/asm/barrier.h b/src/contrib/libbpf/include/asm/barrier.h
new file mode 100644
index 0000000..1fc6aee
--- /dev/null
+++ b/src/contrib/libbpf/include/asm/barrier.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+#include <linux/compiler.h>
+
+#endif
diff --git a/src/contrib/libbpf/include/linux/compiler.h b/src/contrib/libbpf/include/linux/compiler.h
new file mode 100644
index 0000000..26336dc
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/compiler.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LINUX_COMPILER_H
+#define __LINUX_COMPILER_H
+
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+#define READ_ONCE(x) (*(volatile typeof(x) *)&x)
+#define WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v)
+
+#define barrier() asm volatile("" ::: "memory")
+
+#if defined(__x86_64__)
+
+# define smp_rmb() barrier()
+# define smp_wmb() barrier()
+# define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
+
+# define smp_store_release(p, v) \
+do { \
+ barrier(); \
+ WRITE_ONCE(*p, v); \
+} while (0)
+
+# define smp_load_acquire(p) \
+({ \
+ typeof(*p) ___p = READ_ONCE(*p); \
+ barrier(); \
+ ___p; \
+})
+
+#elif defined(__aarch64__)
+
+# define smp_rmb() asm volatile("dmb ishld" ::: "memory")
+# define smp_wmb() asm volatile("dmb ishst" ::: "memory")
+# define smp_mb() asm volatile("dmb ish" ::: "memory")
+
+#endif
+
+#ifndef smp_mb
+# define smp_mb() __sync_synchronize()
+#endif
+
+#ifndef smp_rmb
+# define smp_rmb() smp_mb()
+#endif
+
+#ifndef smp_wmb
+# define smp_wmb() smp_mb()
+#endif
+
+#ifndef smp_store_release
+# define smp_store_release(p, v) \
+do { \
+ smp_mb(); \
+ WRITE_ONCE(*p, v); \
+} while (0)
+#endif
+
+#ifndef smp_load_acquire
+# define smp_load_acquire(p) \
+({ \
+ typeof(*p) ___p = READ_ONCE(*p); \
+ smp_mb(); \
+ ___p; \
+})
+#endif
+
+#endif /* __LINUX_COMPILER_H */
diff --git a/src/contrib/libbpf/include/linux/err.h b/src/contrib/libbpf/include/linux/err.h
new file mode 100644
index 0000000..1b1dafb
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/err.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LINUX_ERR_H
+#define __LINUX_ERR_H
+
+#include <linux/types.h>
+#include <asm/errno.h>
+
+#define MAX_ERRNO 4095
+
+#define IS_ERR_VALUE(x) ((x) >= (unsigned long)-MAX_ERRNO)
+
+static inline void * ERR_PTR(long error_)
+{
+ return (void *) error_;
+}
+
+static inline long PTR_ERR(const void *ptr)
+{
+ return (long) ptr;
+}
+
+static inline bool IS_ERR(const void *ptr)
+{
+ return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline bool IS_ERR_OR_NULL(const void *ptr)
+{
+ return (!ptr) || IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline long PTR_ERR_OR_ZERO(const void *ptr)
+{
+ return IS_ERR(ptr) ? PTR_ERR(ptr) : 0;
+}
+
+#endif
diff --git a/src/contrib/libbpf/include/linux/filter.h b/src/contrib/libbpf/include/linux/filter.h
new file mode 100644
index 0000000..b0700e2
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/filter.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LINUX_FILTER_H
+#define __LINUX_FILTER_H
+
+#include <linux/bpf.h>
+
+#define BPF_ALU64_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_MOV64_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_EMIT_CALL(FUNC) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_CALL, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((FUNC) - BPF_FUNC_unspec) })
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+#define BPF_MOV64_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_MOV32_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_LD_IMM64_RAW_FULL(DST, SRC, OFF1, OFF2, IMM1, IMM2) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF1, \
+ .imm = IMM1 }), \
+ ((struct bpf_insn) { \
+ .code = 0, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = OFF2, \
+ .imm = IMM2 })
+
+#define BPF_LD_MAP_FD(DST, MAP_FD) \
+ BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_FD, 0, 0, \
+ MAP_FD, 0)
+
+#define BPF_LD_MAP_VALUE(DST, MAP_FD, VALUE_OFF) \
+ BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_VALUE, 0, 0, \
+ MAP_FD, VALUE_OFF)
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+#endif
diff --git a/src/contrib/libbpf/include/linux/kernel.h b/src/contrib/libbpf/include/linux/kernel.h
new file mode 100644
index 0000000..a4a7a9d
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/kernel.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LINUX_KERNEL_H
+#define __LINUX_KERNEL_H
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+#ifndef container_of
+#define container_of(ptr, type, member) ({ \
+ const typeof(((type *)0)->member) * __mptr = (ptr); \
+ (type *)((char *)__mptr - offsetof(type, member)); })
+#endif
+
+#ifndef max
+#define max(x, y) ({ \
+ typeof(x) _max1 = (x); \
+ typeof(y) _max2 = (y); \
+ (void) (&_max1 == &_max2); \
+ _max1 > _max2 ? _max1 : _max2; })
+#endif
+
+#ifndef min
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+#endif
+
+#ifndef roundup
+#define roundup(x, y) ( \
+{ \
+ const typeof(y) __y = y; \
+ (((x) + (__y - 1)) / __y) * __y; \
+} \
+)
+#endif
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+
+#endif
diff --git a/src/contrib/libbpf/include/linux/list.h b/src/contrib/libbpf/include/linux/list.h
new file mode 100644
index 0000000..e3814f7
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/list.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LINUX_LIST_H
+#define __LINUX_LIST_H
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+#define POISON_POINTER_DELTA 0
+#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA)
+#define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA)
+
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+ list->next = list;
+ list->prev = list;
+}
+
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty() on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+static inline void __list_del_entry(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->next = LIST_POISON1;
+ entry->prev = LIST_POISON2;
+}
+
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+#define list_next_entry(pos, member) \
+ list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#endif
diff --git a/src/contrib/libbpf/include/linux/overflow.h b/src/contrib/libbpf/include/linux/overflow.h
new file mode 100644
index 0000000..53d7580
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/overflow.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LINUX_OVERFLOW_H
+#define __LINUX_OVERFLOW_H
+
+#define is_signed_type(type) (((type)(-1)) < (type)1)
+#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
+#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
+#define type_min(T) ((T)((T)-type_max(T)-(T)1))
+
+#ifndef unlikely
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
+
+#ifdef __GNUC__
+#define GCC_VERSION (__GNUC__ * 10000 \
+ + __GNUC_MINOR__ * 100 \
+ + __GNUC_PATCHLEVEL__)
+#if GCC_VERSION >= 50100
+#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
+#endif
+#endif
+
+#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
+
+#define check_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ __builtin_mul_overflow(__a, __b, __d); \
+})
+
+#else
+
+/*
+ * If one of a or b is a compile-time constant, this avoids a division.
+ */
+#define __unsigned_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = __a * __b; \
+ __builtin_constant_p(__b) ? \
+ __b > 0 && __a > type_max(typeof(__a)) / __b : \
+ __a > 0 && __b > type_max(typeof(__b)) / __a; \
+})
+
+/*
+ * Signed multiplication is rather hard. gcc always follows C99, so
+ * division is truncated towards 0. This means that we can write the
+ * overflow check like this:
+ *
+ * (a > 0 && (b > MAX/a || b < MIN/a)) ||
+ * (a < -1 && (b > MIN/a || b < MAX/a) ||
+ * (a == -1 && b == MIN)
+ *
+ * The redundant casts of -1 are to silence an annoying -Wtype-limits
+ * (included in -Wextra) warning: When the type is u8 or u16, the
+ * __b_c_e in check_mul_overflow obviously selects
+ * __unsigned_mul_overflow, but unfortunately gcc still parses this
+ * code and warns about the limited range of __b.
+ */
+
+#define __signed_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ typeof(a) __tmax = type_max(typeof(a)); \
+ typeof(a) __tmin = type_min(typeof(a)); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = (__u64)__a * (__u64)__b; \
+ (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \
+ (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \
+ (__b == (typeof(__b))-1 && __a == __tmin); \
+})
+
+#define check_mul_overflow(a, b, d) \
+ __builtin_choose_expr(is_signed_type(typeof(a)), \
+ __signed_mul_overflow(a, b, d), \
+ __unsigned_mul_overflow(a, b, d))
+
+
+#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
+
+#endif
diff --git a/src/contrib/libbpf/include/linux/ring_buffer.h b/src/contrib/libbpf/include/linux/ring_buffer.h
new file mode 100644
index 0000000..fc4677b
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/ring_buffer.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef _TOOLS_LINUX_RING_BUFFER_H_
+#define _TOOLS_LINUX_RING_BUFFER_H_
+
+#include <linux/compiler.h>
+
+static inline __u64 ring_buffer_read_head(struct perf_event_mmap_page *base)
+{
+ return smp_load_acquire(&base->data_head);
+}
+
+static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base,
+ __u64 tail)
+{
+ smp_store_release(&base->data_tail, tail);
+}
+
+#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */
diff --git a/src/contrib/libbpf/include/linux/types.h b/src/contrib/libbpf/include/linux/types.h
new file mode 100644
index 0000000..bae1ed8
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/types.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LINUX_TYPES_H
+#define __LINUX_TYPES_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <asm/types.h>
+#include <asm/posix_types.h>
+
+#define __bitwise__
+#define __bitwise __bitwise__
+
+typedef __u16 __bitwise __le16;
+typedef __u16 __bitwise __be16;
+typedef __u32 __bitwise __le32;
+typedef __u32 __bitwise __be32;
+typedef __u64 __bitwise __le64;
+typedef __u64 __bitwise __be64;
+
+#ifndef __aligned_u64
+# define __aligned_u64 __u64 __attribute__((aligned(8)))
+#endif
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#endif
diff --git a/src/contrib/libbpf/include/uapi/linux/bpf.h b/src/contrib/libbpf/include/uapi/linux/bpf.h
new file mode 100644
index 0000000..dbbcf0b
--- /dev/null
+++ b/src/contrib/libbpf/include/uapi/linux/bpf.h
@@ -0,0 +1,3692 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_H__
+#define _UAPI__LINUX_BPF_H__
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_JMP32 0x06 /* jmp mode in word width */
+#define BPF_ALU64 0x07 /* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW 0x18 /* double word (64-bit) */
+#define BPF_XADD 0xc0 /* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV 0xb0 /* mov reg to reg */
+#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END 0xd0 /* flags for endianness conversion: */
+#define BPF_TO_LE 0x00 /* convert to little-endian */
+#define BPF_TO_BE 0x08 /* convert to big-endian */
+#define BPF_FROM_LE BPF_TO_LE
+#define BPF_FROM_BE BPF_TO_BE
+
+/* jmp encodings */
+#define BPF_JNE 0x50 /* jump != */
+#define BPF_JLT 0xa0 /* LT is unsigned, '<' */
+#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */
+#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
+#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
+#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
+#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
+#define BPF_CALL 0x80 /* function call */
+#define BPF_EXIT 0x90 /* function return */
+
+/* Register numbers */
+enum {
+ BPF_REG_0 = 0,
+ BPF_REG_1,
+ BPF_REG_2,
+ BPF_REG_3,
+ BPF_REG_4,
+ BPF_REG_5,
+ BPF_REG_6,
+ BPF_REG_7,
+ BPF_REG_8,
+ BPF_REG_9,
+ BPF_REG_10,
+ __MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG __MAX_BPF_REG
+
+struct bpf_insn {
+ __u8 code; /* opcode */
+ __u8 dst_reg:4; /* dest register */
+ __u8 src_reg:4; /* source register */
+ __s16 off; /* signed offset */
+ __s32 imm; /* signed immediate constant */
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+struct bpf_lpm_trie_key {
+ __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
+ __u8 data[0]; /* Arbitrary size */
+};
+
+struct bpf_cgroup_storage_key {
+ __u64 cgroup_inode_id; /* cgroup inode id */
+ __u32 attach_type; /* program attach type */
+};
+
+/* BPF syscall commands, see bpf(2) man-page for details. */
+enum bpf_cmd {
+ BPF_MAP_CREATE,
+ BPF_MAP_LOOKUP_ELEM,
+ BPF_MAP_UPDATE_ELEM,
+ BPF_MAP_DELETE_ELEM,
+ BPF_MAP_GET_NEXT_KEY,
+ BPF_PROG_LOAD,
+ BPF_OBJ_PIN,
+ BPF_OBJ_GET,
+ BPF_PROG_ATTACH,
+ BPF_PROG_DETACH,
+ BPF_PROG_TEST_RUN,
+ BPF_PROG_GET_NEXT_ID,
+ BPF_MAP_GET_NEXT_ID,
+ BPF_PROG_GET_FD_BY_ID,
+ BPF_MAP_GET_FD_BY_ID,
+ BPF_OBJ_GET_INFO_BY_FD,
+ BPF_PROG_QUERY,
+ BPF_RAW_TRACEPOINT_OPEN,
+ BPF_BTF_LOAD,
+ BPF_BTF_GET_FD_BY_ID,
+ BPF_TASK_FD_QUERY,
+ BPF_MAP_LOOKUP_AND_DELETE_ELEM,
+ BPF_MAP_FREEZE,
+ BPF_BTF_GET_NEXT_ID,
+};
+
+enum bpf_map_type {
+ BPF_MAP_TYPE_UNSPEC,
+ BPF_MAP_TYPE_HASH,
+ BPF_MAP_TYPE_ARRAY,
+ BPF_MAP_TYPE_PROG_ARRAY,
+ BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ BPF_MAP_TYPE_PERCPU_HASH,
+ BPF_MAP_TYPE_PERCPU_ARRAY,
+ BPF_MAP_TYPE_STACK_TRACE,
+ BPF_MAP_TYPE_CGROUP_ARRAY,
+ BPF_MAP_TYPE_LRU_HASH,
+ BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ BPF_MAP_TYPE_LPM_TRIE,
+ BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ BPF_MAP_TYPE_HASH_OF_MAPS,
+ BPF_MAP_TYPE_DEVMAP,
+ BPF_MAP_TYPE_SOCKMAP,
+ BPF_MAP_TYPE_CPUMAP,
+ BPF_MAP_TYPE_XSKMAP,
+ BPF_MAP_TYPE_SOCKHASH,
+ BPF_MAP_TYPE_CGROUP_STORAGE,
+ BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ BPF_MAP_TYPE_QUEUE,
+ BPF_MAP_TYPE_STACK,
+ BPF_MAP_TYPE_SK_STORAGE,
+ BPF_MAP_TYPE_DEVMAP_HASH,
+};
+
+/* Note that tracing related programs such as
+ * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT}
+ * are not subject to a stable API since kernel internal data
+ * structures can change from release to release and may
+ * therefore break existing tracing BPF programs. Tracing BPF
+ * programs correspond to /a/ specific kernel which is to be
+ * analyzed, and not /a/ specific kernel /and/ all future ones.
+ */
+enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
+ BPF_PROG_TYPE_SCHED_CLS,
+ BPF_PROG_TYPE_SCHED_ACT,
+ BPF_PROG_TYPE_TRACEPOINT,
+ BPF_PROG_TYPE_XDP,
+ BPF_PROG_TYPE_PERF_EVENT,
+ BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_PROG_TYPE_LWT_IN,
+ BPF_PROG_TYPE_LWT_OUT,
+ BPF_PROG_TYPE_LWT_XMIT,
+ BPF_PROG_TYPE_SOCK_OPS,
+ BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_CGROUP_DEVICE,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_RAW_TRACEPOINT,
+ BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_PROG_TYPE_LWT_SEG6LOCAL,
+ BPF_PROG_TYPE_LIRC_MODE2,
+ BPF_PROG_TYPE_SK_REUSEPORT,
+ BPF_PROG_TYPE_FLOW_DISSECTOR,
+ BPF_PROG_TYPE_CGROUP_SYSCTL,
+ BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
+ BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ BPF_PROG_TYPE_TRACING,
+};
+
+enum bpf_attach_type {
+ BPF_CGROUP_INET_INGRESS,
+ BPF_CGROUP_INET_EGRESS,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_SOCK_OPS,
+ BPF_SK_SKB_STREAM_PARSER,
+ BPF_SK_SKB_STREAM_VERDICT,
+ BPF_CGROUP_DEVICE,
+ BPF_SK_MSG_VERDICT,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_LIRC_MODE2,
+ BPF_FLOW_DISSECTOR,
+ BPF_CGROUP_SYSCTL,
+ BPF_CGROUP_UDP4_RECVMSG,
+ BPF_CGROUP_UDP6_RECVMSG,
+ BPF_CGROUP_GETSOCKOPT,
+ BPF_CGROUP_SETSOCKOPT,
+ BPF_TRACE_RAW_TP,
+ BPF_TRACE_FENTRY,
+ BPF_TRACE_FEXIT,
+ __MAX_BPF_ATTACH_TYPE
+};
+
+#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+
+/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
+ *
+ * NONE(default): No further bpf programs allowed in the subtree.
+ *
+ * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
+ * the program in this cgroup yields to sub-cgroup program.
+ *
+ * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
+ * that cgroup program gets run in addition to the program in this cgroup.
+ *
+ * Only one program is allowed to be attached to a cgroup with
+ * NONE or BPF_F_ALLOW_OVERRIDE flag.
+ * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
+ * release old program and attach the new one. Attach flags has to match.
+ *
+ * Multiple programs are allowed to be attached to a cgroup with
+ * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
+ * (those that were attached first, run first)
+ * The programs of sub-cgroup are executed first, then programs of
+ * this cgroup and then programs of parent cgroup.
+ * When children program makes decision (like picking TCP CA or sock bind)
+ * parent program has a chance to override it.
+ *
+ * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
+ * A cgroup with NONE doesn't allow any programs in sub-cgroups.
+ * Ex1:
+ * cgrp1 (MULTI progs A, B) ->
+ * cgrp2 (OVERRIDE prog C) ->
+ * cgrp3 (MULTI prog D) ->
+ * cgrp4 (OVERRIDE prog E) ->
+ * cgrp5 (NONE prog F)
+ * the event in cgrp5 triggers execution of F,D,A,B in that order.
+ * if prog F is detached, the execution is E,D,A,B
+ * if prog F and D are detached, the execution is E,A,B
+ * if prog F, E and D are detached, the execution is C,A,B
+ *
+ * All eligible programs are executed regardless of return code from
+ * earlier programs.
+ */
+#define BPF_F_ALLOW_OVERRIDE (1U << 0)
+#define BPF_F_ALLOW_MULTI (1U << 1)
+
+/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
+ * verifier will perform strict alignment checking as if the kernel
+ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
+ * and NET_IP_ALIGN defined to 2.
+ */
+#define BPF_F_STRICT_ALIGNMENT (1U << 0)
+
+/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the
+ * verifier will allow any alignment whatsoever. On platforms
+ * with strict alignment requirements for loads ands stores (such
+ * as sparc and mips) the verifier validates that all loads and
+ * stores provably follow this requirement. This flag turns that
+ * checking and enforcement off.
+ *
+ * It is mostly used for testing when we want to validate the
+ * context and memory access aspects of the verifier, but because
+ * of an unaligned access the alignment check would trigger before
+ * the one we are interested in.
+ */
+#define BPF_F_ANY_ALIGNMENT (1U << 1)
+
+/* BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose.
+ * Verifier does sub-register def/use analysis and identifies instructions whose
+ * def only matters for low 32-bit, high 32-bit is never referenced later
+ * through implicit zero extension. Therefore verifier notifies JIT back-ends
+ * that it is safe to ignore clearing high 32-bit for these instructions. This
+ * saves some back-ends a lot of code-gen. However such optimization is not
+ * necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends
+ * hence hasn't used verifier's analysis result. But, we really want to have a
+ * way to be able to verify the correctness of the described optimization on
+ * x86_64 on which testsuites are frequently exercised.
+ *
+ * So, this flag is introduced. Once it is set, verifier will randomize high
+ * 32-bit for those instructions who has been identified as safe to ignore them.
+ * Then, if verifier is not doing correct analysis, such randomization will
+ * regress tests to expose bugs.
+ */
+#define BPF_F_TEST_RND_HI32 (1U << 2)
+
+/* The verifier internal test flag. Behavior is undefined */
+#define BPF_F_TEST_STATE_FREQ (1U << 3)
+
+/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
+ * two extensions:
+ *
+ * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm: map fd map fd
+ * insn[1].imm: 0 offset into value
+ * insn[0].off: 0 0
+ * insn[1].off: 0 0
+ * ldimm64 rewrite: address of map address of map[0]+offset
+ * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
+ */
+#define BPF_PSEUDO_MAP_FD 1
+#define BPF_PSEUDO_MAP_VALUE 2
+
+/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
+ * offset to another bpf function
+ */
+#define BPF_PSEUDO_CALL 1
+
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY 0 /* create new element or update existing */
+#define BPF_NOEXIST 1 /* create new element if it didn't exist */
+#define BPF_EXIST 2 /* update existing element */
+#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */
+
+/* flags for BPF_MAP_CREATE command */
+#define BPF_F_NO_PREALLOC (1U << 0)
+/* Instead of having one common LRU list in the
+ * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
+ * which can scale and perform better.
+ * Note, the LRU nodes (including free nodes) cannot be moved
+ * across different LRU lists.
+ */
+#define BPF_F_NO_COMMON_LRU (1U << 1)
+/* Specify numa node during map creation */
+#define BPF_F_NUMA_NODE (1U << 2)
+
+#define BPF_OBJ_NAME_LEN 16U
+
+/* Flags for accessing BPF object from syscall side. */
+#define BPF_F_RDONLY (1U << 3)
+#define BPF_F_WRONLY (1U << 4)
+
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID (1U << 5)
+
+/* Zero-initialize hash function seed. This should only be used for testing. */
+#define BPF_F_ZERO_SEED (1U << 6)
+
+/* Flags for accessing BPF object from program side. */
+#define BPF_F_RDONLY_PROG (1U << 7)
+#define BPF_F_WRONLY_PROG (1U << 8)
+
+/* Clone map from listener for newly accepted socket */
+#define BPF_F_CLONE (1U << 9)
+
+/* Enable memory-mapping BPF map */
+#define BPF_F_MMAPABLE (1U << 10)
+
+/* flags for BPF_PROG_QUERY */
+#define BPF_F_QUERY_EFFECTIVE (1U << 0)
+
+enum bpf_stack_build_id_status {
+ /* user space need an empty entry to identify end of a trace */
+ BPF_STACK_BUILD_ID_EMPTY = 0,
+ /* with valid build_id and offset */
+ BPF_STACK_BUILD_ID_VALID = 1,
+ /* couldn't get build_id, fallback to ip */
+ BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+ __s32 status;
+ unsigned char build_id[BPF_BUILD_ID_SIZE];
+ union {
+ __u64 offset;
+ __u64 ip;
+ };
+};
+
+union bpf_attr {
+ struct { /* anonymous struct used by BPF_MAP_CREATE command */
+ __u32 map_type; /* one of enum bpf_map_type */
+ __u32 key_size; /* size of key in bytes */
+ __u32 value_size; /* size of value in bytes */
+ __u32 max_entries; /* max number of entries in a map */
+ __u32 map_flags; /* BPF_MAP_CREATE related
+ * flags defined above.
+ */
+ __u32 inner_map_fd; /* fd pointing to the inner map */
+ __u32 numa_node; /* numa node (effective only if
+ * BPF_F_NUMA_NODE is set).
+ */
+ char map_name[BPF_OBJ_NAME_LEN];
+ __u32 map_ifindex; /* ifindex of netdev to create on */
+ __u32 btf_fd; /* fd pointing to a BTF type data */
+ __u32 btf_key_type_id; /* BTF type_id of the key */
+ __u32 btf_value_type_id; /* BTF type_id of the value */
+ };
+
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ __u32 map_fd;
+ __aligned_u64 key;
+ union {
+ __aligned_u64 value;
+ __aligned_u64 next_key;
+ };
+ __u64 flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_LOAD command */
+ __u32 prog_type; /* one of enum bpf_prog_type */
+ __u32 insn_cnt;
+ __aligned_u64 insns;
+ __aligned_u64 license;
+ __u32 log_level; /* verbosity level of verifier */
+ __u32 log_size; /* size of user buffer */
+ __aligned_u64 log_buf; /* user supplied buffer */
+ __u32 kern_version; /* not used */
+ __u32 prog_flags;
+ char prog_name[BPF_OBJ_NAME_LEN];
+ __u32 prog_ifindex; /* ifindex of netdev to prep for */
+ /* For some prog types expected attach type must be known at
+ * load time to verify attach type specific parts of prog
+ * (context accesses, allowed helpers, etc).
+ */
+ __u32 expected_attach_type;
+ __u32 prog_btf_fd; /* fd pointing to BTF type data */
+ __u32 func_info_rec_size; /* userspace bpf_func_info size */
+ __aligned_u64 func_info; /* func info */
+ __u32 func_info_cnt; /* number of bpf_func_info records */
+ __u32 line_info_rec_size; /* userspace bpf_line_info size */
+ __aligned_u64 line_info; /* line info */
+ __u32 line_info_cnt; /* number of bpf_line_info records */
+ __u32 attach_btf_id; /* in-kernel BTF type id to attach to */
+ __u32 attach_prog_fd; /* 0 to attach to vmlinux */
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_* commands */
+ __aligned_u64 pathname;
+ __u32 bpf_fd;
+ __u32 file_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+ __u32 target_fd; /* container object to attach to */
+ __u32 attach_bpf_fd; /* eBPF program to attach */
+ __u32 attach_type;
+ __u32 attach_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
+ __u32 prog_fd;
+ __u32 retval;
+ __u32 data_size_in; /* input: len of data_in */
+ __u32 data_size_out; /* input/output: len of data_out
+ * returns ENOSPC if data_out
+ * is too small.
+ */
+ __aligned_u64 data_in;
+ __aligned_u64 data_out;
+ __u32 repeat;
+ __u32 duration;
+ __u32 ctx_size_in; /* input: len of ctx_in */
+ __u32 ctx_size_out; /* input/output: len of ctx_out
+ * returns ENOSPC if ctx_out
+ * is too small.
+ */
+ __aligned_u64 ctx_in;
+ __aligned_u64 ctx_out;
+ } test;
+
+ struct { /* anonymous struct used by BPF_*_GET_*_ID */
+ union {
+ __u32 start_id;
+ __u32 prog_id;
+ __u32 map_id;
+ __u32 btf_id;
+ };
+ __u32 next_id;
+ __u32 open_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
+ __u32 bpf_fd;
+ __u32 info_len;
+ __aligned_u64 info;
+ } info;
+
+ struct { /* anonymous struct used by BPF_PROG_QUERY command */
+ __u32 target_fd; /* container object to query */
+ __u32 attach_type;
+ __u32 query_flags;
+ __u32 attach_flags;
+ __aligned_u64 prog_ids;
+ __u32 prog_cnt;
+ } query;
+
+ struct {
+ __u64 name;
+ __u32 prog_fd;
+ } raw_tracepoint;
+
+ struct { /* anonymous struct for BPF_BTF_LOAD */
+ __aligned_u64 btf;
+ __aligned_u64 btf_log_buf;
+ __u32 btf_size;
+ __u32 btf_log_size;
+ __u32 btf_log_level;
+ };
+
+ struct {
+ __u32 pid; /* input: pid */
+ __u32 fd; /* input: fd */
+ __u32 flags; /* input: flags */
+ __u32 buf_len; /* input/output: buf len */
+ __aligned_u64 buf; /* input/output:
+ * tp_name for tracepoint
+ * symbol for kprobe
+ * filename for uprobe
+ */
+ __u32 prog_id; /* output: prod_id */
+ __u32 fd_type; /* output: BPF_FD_TYPE_* */
+ __u64 probe_offset; /* output: probe_offset */
+ __u64 probe_addr; /* output: probe_addr */
+ } task_fd_query;
+} __attribute__((aligned(8)));
+
+/* The description below is an attempt at providing documentation to eBPF
+ * developers about the multiple available eBPF helper functions. It can be
+ * parsed and used to produce a manual page. The workflow is the following,
+ * and requires the rst2man utility:
+ *
+ * $ ./scripts/bpf_helpers_doc.py \
+ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
+ * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
+ * $ man /tmp/bpf-helpers.7
+ *
+ * Note that in order to produce this external documentation, some RST
+ * formatting is used in the descriptions to get "bold" and "italics" in
+ * manual pages. Also note that the few trailing white spaces are
+ * intentional, removing them would break paragraphs for rst2man.
+ *
+ * Start of BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Perform a lookup in *map* for an entry associated to *key*.
+ * Return
+ * Map value associated to *key*, or **NULL** if no entry was
+ * found.
+ *
+ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ * Description
+ * Add or update the value of the entry associated to *key* in
+ * *map* with *value*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * Flag value **BPF_NOEXIST** cannot be used for maps of types
+ * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all
+ * elements always exist), the helper would return an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Delete entry with *key* from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * For tracing programs, safely attempt to read *size* bytes from
+ * kernel space address *unsafe_ptr* and store the data in *dst*.
+ *
+ * Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
+ * instead.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_ktime_get_ns(void)
+ * Description
+ * Return the time elapsed since system boot, in nanoseconds.
+ * Return
+ * Current *ktime*.
+ *
+ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ * Description
+ * This helper is a "printk()-like" facility for debugging. It
+ * prints a message defined by format *fmt* (of size *fmt_size*)
+ * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * available. It can take up to three additional **u64**
+ * arguments (as an eBPF helpers, the total number of arguments is
+ * limited to five).
+ *
+ * Each time the helper is called, it appends a line to the trace.
+ * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
+ * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
+ * The format of the trace is customizable, and the exact output
+ * one will get depends on the options set in
+ * *\/sys/kernel/debug/tracing/trace_options* (see also the
+ * *README* file under the same directory). However, it usually
+ * defaults to something like:
+ *
+ * ::
+ *
+ * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+ *
+ * In the above:
+ *
+ * * ``telnet`` is the name of the current task.
+ * * ``470`` is the PID of the current task.
+ * * ``001`` is the CPU number on which the task is
+ * running.
+ * * In ``.N..``, each character refers to a set of
+ * options (whether irqs are enabled, scheduling
+ * options, whether hard/softirqs are running, level of
+ * preempt_disabled respectively). **N** means that
+ * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+ * are set.
+ * * ``419421.045894`` is a timestamp.
+ * * ``0x00000001`` is a fake value used by BPF for the
+ * instruction pointer register.
+ * * ``<formatted msg>`` is the message formatted with
+ * *fmt*.
+ *
+ * The conversion specifiers supported by *fmt* are similar, but
+ * more limited than for printk(). They are **%d**, **%i**,
+ * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+ * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+ * of field, padding with zeroes, etc.) is available, and the
+ * helper will return **-EINVAL** (but print nothing) if it
+ * encounters an unknown specifier.
+ *
+ * Also, note that **bpf_trace_printk**\ () is slow, and should
+ * only be used for debugging purposes. For this reason, a notice
+ * bloc (spanning several lines) is printed to kernel logs and
+ * states that the helper should not be used "for production use"
+ * the first time this helper is used (or more precisely, when
+ * **trace_printk**\ () buffers are allocated). For passing values
+ * to user space, perf events should be preferred.
+ * Return
+ * The number of bytes written to the buffer, or a negative error
+ * in case of failure.
+ *
+ * u32 bpf_get_prandom_u32(void)
+ * Description
+ * Get a pseudo-random number.
+ *
+ * From a security point of view, this helper uses its own
+ * pseudo-random internal state, and cannot be used to infer the
+ * seed of other random functions in the kernel. However, it is
+ * essential to note that the generator used by the helper is not
+ * cryptographically secure.
+ * Return
+ * A random 32-bit unsigned value.
+ *
+ * u32 bpf_get_smp_processor_id(void)
+ * Description
+ * Get the SMP (symmetric multiprocessing) processor id. Note that
+ * all programs run with preemption disabled, which means that the
+ * SMP processor id is stable during all the execution of the
+ * program.
+ * Return
+ * The SMP id of the processor running the program.
+ *
+ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. *flags* are a combination of
+ * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+ * checksum for the packet after storing the bytes) and
+ * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+ * **->swhash** and *skb*\ **->l4hash** to 0).
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ * Description
+ * Recompute the layer 3 (e.g. IP) checksum for the packet
+ * associated to *skb*. Computation is incremental, so the helper
+ * must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored in *size*.
+ * Alternatively, it is possible to store the difference between
+ * the previous and the new values of the header field in *to*, by
+ * setting *from* and *size* to 0. For both methods, *offset*
+ * indicates the location of the IP checksum within the packet.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ * Description
+ * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+ * packet associated to *skb*. Computation is incremental, so the
+ * helper must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored on the lowest
+ * four bits of *flags*. Alternatively, it is possible to store
+ * the difference between the previous and the new values of the
+ * header field in *to*, by setting *from* and the four lowest
+ * bits of *flags* to 0. For both methods, *offset* indicates the
+ * location of the IP checksum within the packet. In addition to
+ * the size of the field, *flags* can be added (bitwise OR) actual
+ * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+ * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+ * for updates resulting in a null checksum the value is set to
+ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ * the checksum is to be computed against a pseudo-header.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ * Description
+ * This special helper is used to trigger a "tail call", or in
+ * other words, to jump into another eBPF program. The same stack
+ * frame is used (but values on stack and in registers for the
+ * caller are not accessible to the callee). This mechanism allows
+ * for program chaining, either for raising the maximum number of
+ * available eBPF instructions, or to execute given programs in
+ * conditional blocks. For security reasons, there is an upper
+ * limit to the number of successive tail calls that can be
+ * performed.
+ *
+ * Upon call of this helper, the program attempts to jump into a
+ * program referenced at index *index* in *prog_array_map*, a
+ * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+ * *ctx*, a pointer to the context.
+ *
+ * If the call succeeds, the kernel immediately runs the first
+ * instruction of the new program. This is not a function call,
+ * and it never returns to the previous program. If the call
+ * fails, then the helper has no effect, and the caller continues
+ * to run its subsequent instructions. A call can fail if the
+ * destination program for the jump does not exist (i.e. *index*
+ * is superior to the number of entries in *prog_array_map*), or
+ * if the maximum number of tail calls has been reached for this
+ * chain of programs. This limit is defined in the kernel by the
+ * macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+ * which is currently set to 32.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ * Description
+ * Clone and redirect the packet associated to *skb* to another
+ * net device of index *ifindex*. Both ingress and egress
+ * interfaces can be used for redirection. The **BPF_F_INGRESS**
+ * value in *flags* is used to make the distinction (ingress path
+ * is selected if the flag is present, egress path otherwise).
+ * This is the only flag supported for now.
+ *
+ * In comparison with **bpf_redirect**\ () helper,
+ * **bpf_clone_redirect**\ () has the associated cost of
+ * duplicating the packet buffer, but this can be executed out of
+ * the eBPF program. Conversely, **bpf_redirect**\ () is more
+ * efficient, but it is handled through an action code where the
+ * redirection happens only after the eBPF program has returned.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_get_current_pid_tgid(void)
+ * Return
+ * A 64-bit integer containing the current tgid and pid, and
+ * created as such:
+ * *current_task*\ **->tgid << 32 \|**
+ * *current_task*\ **->pid**.
+ *
+ * u64 bpf_get_current_uid_gid(void)
+ * Return
+ * A 64-bit integer containing the current GID and UID, and
+ * created as such: *current_gid* **<< 32 \|** *current_uid*.
+ *
+ * int bpf_get_current_comm(void *buf, u32 size_of_buf)
+ * Description
+ * Copy the **comm** attribute of the current task into *buf* of
+ * *size_of_buf*. The **comm** attribute contains the name of
+ * the executable (excluding the path) for the current task. The
+ * *size_of_buf* must be strictly positive. On success, the
+ * helper makes sure that the *buf* is NUL-terminated. On failure,
+ * it is filled with zeroes.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
+ * Description
+ * Retrieve the classid for the current task, i.e. for the net_cls
+ * cgroup to which *skb* belongs.
+ *
+ * This helper can be used on TC egress path, but not on ingress.
+ *
+ * The net_cls cgroup provides an interface to tag network packets
+ * based on a user-provided identifier for all traffic coming from
+ * the tasks belonging to the related cgroup. See also the related
+ * kernel documentation, available from the Linux sources in file
+ * *Documentation/admin-guide/cgroup-v1/net_cls.rst*.
+ *
+ * The Linux kernel has two versions for cgroups: there are
+ * cgroups v1 and cgroups v2. Both are available to users, who can
+ * use a mixture of them, but note that the net_cls cgroup is for
+ * cgroup v1 only. This makes it incompatible with BPF programs
+ * run on cgroups, which is a cgroup-v2-only feature (a socket can
+ * only hold data for one version of cgroups at a time).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+ * "**y**" or to "**m**".
+ * Return
+ * The classid, or 0 for the default unconfigured classid.
+ *
+ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ * Description
+ * Push a *vlan_tci* (VLAN tag control information) of protocol
+ * *vlan_proto* to the packet associated to *skb*, then update
+ * the checksum. Note that if *vlan_proto* is different from
+ * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+ * be **ETH_P_8021Q**.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ * Description
+ * Pop a VLAN header from the packet associated to *skb*.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Get tunnel metadata. This helper takes a pointer *key* to an
+ * empty **struct bpf_tunnel_key** of **size**, that will be
+ * filled with tunnel metadata for the packet associated to *skb*.
+ * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+ * indicates that the tunnel is based on IPv6 protocol instead of
+ * IPv4.
+ *
+ * The **struct bpf_tunnel_key** is an object that generalizes the
+ * principal parameters used by various tunneling protocols into a
+ * single struct. This way, it can be used to easily make a
+ * decision based on the contents of the encapsulation header,
+ * "summarized" in this struct. In particular, it holds the IP
+ * address of the remote end (IPv4 or IPv6, depending on the case)
+ * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+ * this struct exposes the *key*\ **->tunnel_id**, which is
+ * generally mapped to a VNI (Virtual Network Identifier), making
+ * it programmable together with the **bpf_skb_set_tunnel_key**\
+ * () helper.
+ *
+ * Let's imagine that the following code is part of a program
+ * attached to the TC ingress interface, on one end of a GRE
+ * tunnel, and is supposed to filter out all messages coming from
+ * remote ends with IPv4 address other than 10.0.0.1:
+ *
+ * ::
+ *
+ * int ret;
+ * struct bpf_tunnel_key key = {};
+ *
+ * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ * if (ret < 0)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * if (key.remote_ipv4 != 0x0a000001)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * return TC_ACT_OK; // accept packet
+ *
+ * This interface can also be used with all encapsulation devices
+ * that can operate in "collect metadata" mode: instead of having
+ * one network device per specific configuration, the "collect
+ * metadata" mode only requires a single device where the
+ * configuration can be extracted from this helper.
+ *
+ * This can be used together with various tunnels such as VXLan,
+ * Geneve, GRE or IP in IP (IPIP).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Populate tunnel metadata for packet associated to *skb.* The
+ * tunnel metadata is set to the contents of *key*, of *size*. The
+ * *flags* can be set to a combination of the following values:
+ *
+ * **BPF_F_TUNINFO_IPV6**
+ * Indicate that the tunnel is based on IPv6 protocol
+ * instead of IPv4.
+ * **BPF_F_ZERO_CSUM_TX**
+ * For IPv4 packets, add a flag to tunnel metadata
+ * indicating that checksum computation should be skipped
+ * and checksum set to zeroes.
+ * **BPF_F_DONT_FRAGMENT**
+ * Add a flag to tunnel metadata indicating that the
+ * packet should not be fragmented.
+ * **BPF_F_SEQ_NUMBER**
+ * Add a flag to tunnel metadata indicating that a
+ * sequence number should be added to tunnel header before
+ * sending the packet. This flag was added for GRE
+ * encapsulation, but might be used with other protocols
+ * as well in the future.
+ *
+ * Here is a typical usage on the transmit path:
+ *
+ * ::
+ *
+ * struct bpf_tunnel_key key;
+ * populate key ...
+ * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+ *
+ * See also the description of the **bpf_skb_get_tunnel_key**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
+ * Description
+ * Read the value of a perf event counter. This helper relies on a
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+ * the perf event counter is selected when *map* is updated with
+ * perf event file descriptors. The *map* is an array whose size
+ * is the number of available CPUs, and each cell contains a value
+ * relative to one CPU. The value to retrieve is indicated by
+ * *flags*, that contains the index of the CPU to look up, masked
+ * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * Note that before Linux 4.13, only hardware perf event can be
+ * retrieved.
+ *
+ * Also, be aware that the newer helper
+ * **bpf_perf_event_read_value**\ () is recommended over
+ * **bpf_perf_event_read**\ () in general. The latter has some ABI
+ * quirks where error and counter value are used as a return code
+ * (which is wrong to do since ranges may overlap). This issue is
+ * fixed with **bpf_perf_event_read_value**\ (), which at the same
+ * time provides more features over the **bpf_perf_event_read**\
+ * () interface. Please refer to the description of
+ * **bpf_perf_event_read_value**\ () for details.
+ * Return
+ * The value of the perf event counter read from the map, or a
+ * negative error code in case of failure.
+ *
+ * int bpf_redirect(u32 ifindex, u64 flags)
+ * Description
+ * Redirect the packet to another net device of index *ifindex*.
+ * This helper is somewhat similar to **bpf_clone_redirect**\
+ * (), except that the packet is not cloned, which provides
+ * increased performance.
+ *
+ * Except for XDP, both ingress and egress interfaces can be used
+ * for redirection. The **BPF_F_INGRESS** value in *flags* is used
+ * to make the distinction (ingress path is selected if the flag
+ * is present, egress path otherwise). Currently, XDP only
+ * supports redirection to the egress interface, and accepts no
+ * flag at all.
+ *
+ * The same effect can be attained with the more generic
+ * **bpf_redirect_map**\ (), which requires specific maps to be
+ * used but offers better performance.
+ * Return
+ * For XDP, the helper returns **XDP_REDIRECT** on success or
+ * **XDP_ABORTED** on error. For other program types, the values
+ * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+ * error.
+ *
+ * u32 bpf_get_route_realm(struct sk_buff *skb)
+ * Description
+ * Retrieve the realm or the route, that is to say the
+ * **tclassid** field of the destination for the *skb*. The
+ * indentifier retrieved is a user-provided tag, similar to the
+ * one used with the net_cls cgroup (see description for
+ * **bpf_get_cgroup_classid**\ () helper), but here this tag is
+ * held by a route (a destination entry), not by a task.
+ *
+ * Retrieving this identifier works with the clsact TC egress hook
+ * (see also **tc-bpf(8)**), or alternatively on conventional
+ * classful egress qdiscs, but not on TC ingress path. In case of
+ * clsact TC egress hook, this has the advantage that, internally,
+ * the destination entry has not been dropped yet in the transmit
+ * path. Therefore, the destination entry does not need to be
+ * artificially held via **netif_keep_dst**\ () for a classful
+ * qdisc until the *skb* is freed.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_IP_ROUTE_CLASSID** configuration option.
+ * Return
+ * The realm of the route for the packet associated to *skb*, or 0
+ * if none was found.
+ *
+ * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * The context of the program *ctx* needs also be passed to the
+ * helper.
+ *
+ * On user space, a program willing to read the values needs to
+ * call **perf_event_open**\ () on the perf event (either for
+ * one or for all CPUs) and to store the file descriptor into the
+ * *map*. This must be done before the eBPF program can send data
+ * into it. An example is available in file
+ * *samples/bpf/trace_output_user.c* in the Linux kernel source
+ * tree (the eBPF program counterpart is in
+ * *samples/bpf/trace_output_kern.c*).
+ *
+ * **bpf_perf_event_output**\ () achieves better performance
+ * than **bpf_trace_printk**\ () for sharing data with user
+ * space, and is much better suitable for streaming data from eBPF
+ * programs.
+ *
+ * Note that this helper is not restricted to tracing use cases
+ * and can be used with programs attached to TC or XDP as well,
+ * where it allows for passing data to user space listeners. Data
+ * can be:
+ *
+ * * Only custom structs,
+ * * Only the packet payload, or
+ * * A combination of both.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len)
+ * Description
+ * This helper was provided as an easy way to load data from a
+ * packet. It can be used to load *len* bytes from *offset* from
+ * the packet associated to *skb*, into the buffer pointed by
+ * *to*.
+ *
+ * Since Linux 4.7, usage of this helper has mostly been replaced
+ * by "direct packet access", enabling packet data to be
+ * manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+ * pointing respectively to the first byte of packet data and to
+ * the byte after the last byte of packet data. However, it
+ * remains useful if one wishes to read large quantities of data
+ * at once from a packet into the eBPF stack.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags)
+ * Description
+ * Walk a user or a kernel stack and return its id. To achieve
+ * this, the helper needs *ctx*, which is a pointer to the context
+ * on which the tracing program is executed, and a pointer to a
+ * *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * a combination of the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_FAST_STACK_CMP**
+ * Compare stacks by hash only.
+ * **BPF_F_REUSE_STACKID**
+ * If two different stacks hash into the same *stackid*,
+ * discard the old one.
+ *
+ * The stack id retrieved is a 32 bit long integer handle which
+ * can be further combined with other data (including other stack
+ * ids) and used as a key into maps. This can be useful for
+ * generating a variety of graphs (such as flame graphs or off-cpu
+ * graphs).
+ *
+ * For walking a stack, this helper is an improvement over
+ * **bpf_probe_read**\ (), which can be used with unrolled loops
+ * but is not efficient and consumes a lot of eBPF instructions.
+ * Instead, **bpf_get_stackid**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+ * The positive or null stack id on success, or a negative error
+ * in case of failure.
+ *
+ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
+ * Description
+ * Compute a checksum difference, from the raw buffer pointed by
+ * *from*, of length *from_size* (that must be a multiple of 4),
+ * towards the raw buffer pointed by *to*, of size *to_size*
+ * (same remark). An optional *seed* can be added to the value
+ * (this can be cascaded, the seed may come from a previous call
+ * to the helper).
+ *
+ * This is flexible enough to be used in several ways:
+ *
+ * * With *from_size* == 0, *to_size* > 0 and *seed* set to
+ * checksum, it can be used when pushing new data.
+ * * With *from_size* > 0, *to_size* == 0 and *seed* set to
+ * checksum, it can be used when removing data from a packet.
+ * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+ * can be used to compute a diff. Note that *from_size* and
+ * *to_size* do not need to be equal.
+ *
+ * This helper can be used in combination with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+ * which one can feed in the difference computed with
+ * **bpf_csum_diff**\ ().
+ * Return
+ * The checksum result, or a negative error code in case of
+ * failure.
+ *
+ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
+ * Description
+ * Retrieve tunnel options metadata for the packet associated to
+ * *skb*, and store the raw tunnel option data to the buffer *opt*
+ * of *size*.
+ *
+ * This helper can be used with encapsulation devices that can
+ * operate in "collect metadata" mode (please refer to the related
+ * note in the description of **bpf_skb_get_tunnel_key**\ () for
+ * more details). A particular example where this can be used is
+ * in combination with the Geneve encapsulation protocol, where it
+ * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+ * and retrieving arbitrary TLVs (Type-Length-Value headers) from
+ * the eBPF program. This allows for full customization of these
+ * headers.
+ * Return
+ * The size of the option data retrieved.
+ *
+ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
+ * Description
+ * Set tunnel options metadata for the packet associated to *skb*
+ * to the option data contained in the raw buffer *opt* of *size*.
+ *
+ * See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ * Description
+ * Change the protocol of the *skb* to *proto*. Currently
+ * supported are transition from IPv4 to IPv6, and from IPv6 to
+ * IPv4. The helper takes care of the groundwork for the
+ * transition, including resizing the socket buffer. The eBPF
+ * program is expected to fill the new headers, if any, via
+ * **skb_store_bytes**\ () and to recompute the checksums with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+ * (). The main case for this helper is to perform NAT64
+ * operations out of an eBPF program.
+ *
+ * Internally, the GSO type is marked as dodgy so that headers are
+ * checked and segments are recalculated by the GSO/GRO engine.
+ * The size for GSO target is adapted as well.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ * Description
+ * Change the packet type for the packet associated to *skb*. This
+ * comes down to setting *skb*\ **->pkt_type** to *type*, except
+ * the eBPF program does not have a write access to *skb*\
+ * **->pkt_type** beside this helper. Using a helper here allows
+ * for graceful handling of errors.
+ *
+ * The major use case is to change incoming *skb*s to
+ * **PACKET_HOST** in a programmatic way instead of having to
+ * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+ * example.
+ *
+ * Note that *type* only allows certain values. At this time, they
+ * are:
+ *
+ * **PACKET_HOST**
+ * Packet is for us.
+ * **PACKET_BROADCAST**
+ * Send packet to all.
+ * **PACKET_MULTICAST**
+ * Send packet to group.
+ * **PACKET_OTHERHOST**
+ * Send packet to someone else.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ * Description
+ * Check whether *skb* is a descendant of the cgroup2 held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* failed the cgroup2 descendant test.
+ * * 1, if the *skb* succeeded the cgroup2 descendant test.
+ * * A negative error code, if an error occurred.
+ *
+ * u32 bpf_get_hash_recalc(struct sk_buff *skb)
+ * Description
+ * Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+ * not set, in particular if the hash was cleared due to mangling,
+ * recompute this hash. Later accesses to the hash can be done
+ * directly with *skb*\ **->hash**.
+ *
+ * Calling **bpf_set_hash_invalid**\ (), changing a packet
+ * prototype with **bpf_skb_change_proto**\ (), or calling
+ * **bpf_skb_store_bytes**\ () with the
+ * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+ * the hash and to trigger a new computation for the next call to
+ * **bpf_get_hash_recalc**\ ().
+ * Return
+ * The 32-bit hash.
+ *
+ * u64 bpf_get_current_task(void)
+ * Return
+ * A pointer to the current task struct.
+ *
+ * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ * Description
+ * Attempt in a safe way to write *len* bytes from the buffer
+ * *src* to *dst* in memory. It only works for threads that are in
+ * user context, and *dst* must be a valid user space address.
+ *
+ * This helper should not be used to implement any kind of
+ * security mechanism because of TOC-TOU attacks, but rather to
+ * debug, divert, and manipulate execution of semi-cooperative
+ * processes.
+ *
+ * Keep in mind that this feature is meant for experiments, and it
+ * has a risk of crashing the system and running programs.
+ * Therefore, when an eBPF program using this helper is attached,
+ * a warning including PID and process name is printed to kernel
+ * logs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ * Description
+ * Check whether the probe is being run is the context of a given
+ * subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* task belongs to the cgroup2.
+ * * 1, if the *skb* task does not belong to the cgroup2.
+ * * A negative error code, if an error occurred.
+ *
+ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Resize (trim or grow) the packet associated to *skb* to the
+ * new *len*. The *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * The basic idea is that the helper performs the needed work to
+ * change the size of the packet, then the eBPF program rewrites
+ * the rest via helpers like **bpf_skb_store_bytes**\ (),
+ * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+ * and others. This helper is a slow path utility intended for
+ * replies with control messages. And because it is targeted for
+ * slow path, the helper itself can afford to be slow: it
+ * implicitly linearizes, unclones and drops offloads from the
+ * *skb*.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ * Description
+ * Pull in non-linear data in case the *skb* is non-linear and not
+ * all of *len* are part of the linear section. Make *len* bytes
+ * from *skb* readable and writable. If a zero value is passed for
+ * *len*, then the whole length of the *skb* is pulled.
+ *
+ * This helper is only needed for reading and writing with direct
+ * packet access.
+ *
+ * For direct packet access, testing that offsets to access
+ * are within packet boundaries (test on *skb*\ **->data_end**) is
+ * susceptible to fail if offsets are invalid, or if the requested
+ * data is in non-linear parts of the *skb*. On failure the
+ * program can just bail out, or in the case of a non-linear
+ * buffer, use a helper to make the data available. The
+ * **bpf_skb_load_bytes**\ () helper is a first solution to access
+ * the data. Another one consists in using **bpf_skb_pull_data**
+ * to pull in once the non-linear parts, then retesting and
+ * eventually access the data.
+ *
+ * At the same time, this also makes sure the *skb* is uncloned,
+ * which is a necessary condition for direct write. As this needs
+ * to be an invariant for the write part only, the verifier
+ * detects writes and adds a prologue that is calling
+ * **bpf_skb_pull_data()** to effectively unclone the *skb* from
+ * the very beginning in case it is indeed cloned.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
+ * Description
+ * Add the checksum *csum* into *skb*\ **->csum** in case the
+ * driver has supplied a checksum for the entire packet into that
+ * field. Return an error otherwise. This helper is intended to be
+ * used in combination with **bpf_csum_diff**\ (), in particular
+ * when the checksum needs to be updated after data has been
+ * written into the packet through direct packet access.
+ * Return
+ * The checksum on success, or a negative error code in case of
+ * failure.
+ *
+ * void bpf_set_hash_invalid(struct sk_buff *skb)
+ * Description
+ * Invalidate the current *skb*\ **->hash**. It can be used after
+ * mangling on headers through direct packet access, in order to
+ * indicate that the hash is outdated and to trigger a
+ * recalculation the next time the kernel tries to access this
+ * hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *
+ * int bpf_get_numa_node_id(void)
+ * Description
+ * Return the id of the current NUMA node. The primary use case
+ * for this helper is the selection of sockets for the local NUMA
+ * node, when the program is attached to sockets using the
+ * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+ * but the helper is also available to other eBPF program types,
+ * similarly to **bpf_get_smp_processor_id**\ ().
+ * Return
+ * The id of current NUMA node.
+ *
+ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Grows headroom of packet associated to *skb* and adjusts the
+ * offset of the MAC header accordingly, adding *len* bytes of
+ * space. It automatically extends and reallocates memory as
+ * required.
+ *
+ * This helper can be used on a layer 3 *skb* to push a MAC header
+ * for redirection into a layer 2 device.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+ * it is possible to use a negative value for *delta*. This helper
+ * can be used to prepare the packet for pushing or popping
+ * headers.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Copy a NUL terminated string from an unsafe kernel address
+ * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
+ * more details.
+ *
+ * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
+ * instead.
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * u64 bpf_get_socket_cookie(struct sk_buff *skb)
+ * Description
+ * If the **struct sk_buff** pointed by *skb* has a known socket,
+ * retrieve the cookie (generated by the kernel) of this socket.
+ * If no cookie has been set yet, generate a new cookie. Once
+ * generated, the socket cookie remains stable for the life of the
+ * socket. This helper can be useful for monitoring per socket
+ * networking traffic statistics as it provides a global socket
+ * identifier that can be assumed unique.
+ * Return
+ * A 8-byte long non-decreasing number on success, or 0 if the
+ * socket field is missing inside *skb*.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_addr** context.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_ops** context.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
+ * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * Return
+ * The owner UID of the socket associated to *skb*. If the socket
+ * is **NULL**, or if it is not a full socket (i.e. if it is a
+ * time-wait or a request socket instead), **overflowuid** value
+ * is returned (note that **overflowuid** might also be the actual
+ * UID value for the socket).
+ *
+ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ * Description
+ * Set the full hash for *skb* (set the field *skb*\ **->hash**)
+ * to value *hash*.
+ * Return
+ * 0
+ *
+ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
+ * Description
+ * Emulate a call to **setsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **setsockopt(2)** for more information.
+ * The option value of length *optlen* is pointed by *optval*.
+ *
+ * This helper actually implements a subset of **setsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **SOL_SOCKET**, which supports the following *optname*\ s:
+ * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * * **IPPROTO_TCP**, which supports the following *optname*\ s:
+ * **TCP_CONGESTION**, **TCP_BPF_IW**,
+ * **TCP_BPF_SNDCWND_CLAMP**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
+ * Description
+ * Grow or shrink the room for data in the packet associated to
+ * *skb* by *len_diff*, and according to the selected *mode*.
+ *
+ * There are two supported modes at this time:
+ *
+ * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
+ * (room space is added or removed below the layer 2 header).
+ *
+ * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+ * (room space is added or removed below the layer 3 header).
+ *
+ * The following flags are supported at this time:
+ *
+ * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
+ * Adjusting mss in this way is not allowed for datagrams.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**,
+ * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**:
+ * Any new space is reserved to hold a tunnel header.
+ * Configure skb offsets and other fields accordingly.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**,
+ * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**:
+ * Use with ENCAP_L3 flags to further specify the tunnel type.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*):
+ * Use with ENCAP_L3/L4 flags to further specify the tunnel
+ * type; *len* is the length of the inner MAC header.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the endpoint referenced by *map* at
+ * index *key*. Depending on its type, this *map* can contain
+ * references to net devices (for forwarding packets through other
+ * ports), or to CPUs (for redirecting XDP frames to another CPU;
+ * but this is only implemented for native XDP (with driver
+ * support) as of this writing).
+ *
+ * The lower two bits of *flags* are used as the return code if
+ * the map lookup fails. This is so that the return value can be
+ * one of the XDP program return codes up to XDP_TX, as chosen by
+ * the caller. Any higher bits in the *flags* argument must be
+ * unset.
+ *
+ * When used to redirect packets to net devices, this helper
+ * provides a high performance increase over **bpf_redirect**\ ().
+ * This is due to various implementation details of the underlying
+ * mechanisms, one of which is the fact that **bpf_redirect_map**\
+ * () tries to send packet as a "bulk" to the device.
+ * Return
+ * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ *
+ * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Add an entry to, or update a *map* referencing sockets. The
+ * *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust the address pointed by *xdp_md*\ **->data_meta** by
+ * *delta* (which can be positive or negative). Note that this
+ * operation modifies the address stored in *xdp_md*\ **->data**,
+ * so the latter must be loaded only after the helper has been
+ * called.
+ *
+ * The use of *xdp_md*\ **->data_meta** is optional and programs
+ * are not required to use it. The rationale is that when the
+ * packet is processed with XDP (e.g. as DoS filter), it is
+ * possible to push further meta data along with it before passing
+ * to the stack, and to give the guarantee that an ingress eBPF
+ * program attached as a TC classifier on the same device can pick
+ * this up for further post-processing. Since TC works with socket
+ * buffers, it remains possible to set from XDP the **mark** or
+ * **priority** pointers, or other pointers for the socket buffer.
+ * Having this scratch space generic and programmable allows for
+ * more flexibility as the user is free to store whatever meta
+ * data they need.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * Read the value of a perf event counter, and store it into *buf*
+ * of size *buf_size*. This helper relies on a *map* of type
+ * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+ * counter is selected when *map* is updated with perf event file
+ * descriptors. The *map* is an array whose size is the number of
+ * available CPUs, and each cell contains a value relative to one
+ * CPU. The value to retrieve is indicated by *flags*, that
+ * contains the index of the CPU to look up, masked with
+ * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * This helper behaves in a way close to
+ * **bpf_perf_event_read**\ () helper, save that instead of
+ * just returning the value observed, it fills the *buf*
+ * structure. This allows for additional data to be retrieved: in
+ * particular, the enabled and running times (in *buf*\
+ * **->enabled** and *buf*\ **->running**, respectively) are
+ * copied. In general, **bpf_perf_event_read_value**\ () is
+ * recommended over **bpf_perf_event_read**\ (), which has some
+ * ABI issues and provides fewer functionalities.
+ *
+ * These values are interesting, because hardware PMU (Performance
+ * Monitoring Unit) counters are limited resources. When there are
+ * more PMU based perf events opened than available counters,
+ * kernel will multiplex these events so each event gets certain
+ * percentage (but not all) of the PMU time. In case that
+ * multiplexing happens, the number of samples or counter value
+ * will not reflect the case compared to when no multiplexing
+ * occurs. This makes comparison between different runs difficult.
+ * Typically, the counter value should be normalized before
+ * comparing to other experiments. The usual normalization is done
+ * as follows.
+ *
+ * ::
+ *
+ * normalized_counter = counter * t_enabled / t_running
+ *
+ * Where t_enabled is the time enabled for event and t_running is
+ * the time running for event since last normalization. The
+ * enabled and running times are accumulated since the perf event
+ * open. To achieve scaling factor between two invocations of an
+ * eBPF program, users can can use CPU id as the key (which is
+ * typical for perf array usage model) to remember the previous
+ * value and do the calculation inside the eBPF program.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * For en eBPF program attached to a perf event, retrieve the
+ * value of the event counter associated to *ctx* and store it in
+ * the structure pointed by *buf* and of size *buf_size*. Enabled
+ * and running times are also stored in the structure (see
+ * description of helper **bpf_perf_event_read_value**\ () for
+ * more details).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
+ * Description
+ * Emulate a call to **getsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **getsockopt(2)** for more information.
+ * The retrieved value is stored in the structure pointed by
+ * *opval* and of length *optlen*.
+ *
+ * This helper actually implements a subset of **getsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **IPPROTO_TCP**, which supports *optname*
+ * **TCP_CONGESTION**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_override_return(struct pt_regs *regs, u64 rc)
+ * Description
+ * Used for error injection, this helper uses kprobes to override
+ * the return value of the probed function, and to set it to *rc*.
+ * The first argument is the context *regs* on which the kprobe
+ * works.
+ *
+ * This helper works by setting setting the PC (program counter)
+ * to an override function which is run in place of the original
+ * probed function. This means the probed function is not run at
+ * all. The replacement function just returns with the required
+ * value.
+ *
+ * This helper has security implications, and thus is subject to
+ * restrictions. It is only available if the kernel was compiled
+ * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+ * option, and in this case it only works on functions tagged with
+ * **ALLOW_ERROR_INJECTION** in the kernel code.
+ *
+ * Also, the helper is only available for the architectures having
+ * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+ * x86 architecture is the only one to support this feature.
+ * Return
+ * 0
+ *
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+ * Description
+ * Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+ * for the full TCP socket associated to *bpf_sock_ops* to
+ * *argval*.
+ *
+ * The primary use of this field is to determine if there should
+ * be calls to eBPF programs of type
+ * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+ * code. A program of the same type can change its value, per
+ * connection and as necessary, when the connection is
+ * established. This field is directly accessible for reading, but
+ * this helper must be used for updates in order to return an
+ * error if an eBPF program tries to set a callback that is not
+ * supported in the current kernel.
+ *
+ * *argval* is a flag array which can combine these flags:
+ *
+ * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+ * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+ * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT)
+ *
+ * Therefore, this function can be used to clear a callback flag by
+ * setting the appropriate bit to zero. e.g. to disable the RTO
+ * callback:
+ *
+ * **bpf_sock_ops_cb_flags_set(bpf_sock,**
+ * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)**
+ *
+ * Here are some examples of where one could call such eBPF
+ * program:
+ *
+ * * When RTO fires.
+ * * When a packet is retransmitted.
+ * * When the connection terminates.
+ * * When a packet is sent.
+ * * When a packet is received.
+ * Return
+ * Code **-EINVAL** if the socket is not a full TCP socket;
+ * otherwise, a positive number containing the bits that could not
+ * be set is returned (which comes down to 0 if all bits were set
+ * as required).
+ *
+ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, apply the verdict of the eBPF program to
+ * the next *bytes* (number of bytes) of message *msg*.
+ *
+ * For example, this helper can be used in the following cases:
+ *
+ * * A single **sendmsg**\ () or **sendfile**\ () system call
+ * contains multiple logical messages that the eBPF program is
+ * supposed to read and for which it should apply a verdict.
+ * * An eBPF program only cares to read the first *bytes* of a
+ * *msg*. If the message has a large payload, then setting up
+ * and calling the eBPF program repeatedly for all bytes, even
+ * though the verdict is already known, would create unnecessary
+ * overhead.
+ *
+ * When called from within an eBPF program, the helper sets a
+ * counter internal to the BPF infrastructure, that is used to
+ * apply the last verdict to the next *bytes*. If *bytes* is
+ * smaller than the current data being processed from a
+ * **sendmsg**\ () or **sendfile**\ () system call, the first
+ * *bytes* will be sent and the eBPF program will be re-run with
+ * the pointer for start of data pointing to byte number *bytes*
+ * **+ 1**. If *bytes* is larger than the current data being
+ * processed, then the eBPF verdict will be applied to multiple
+ * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ * consumed.
+ *
+ * Note that if a socket closes with the internal counter holding
+ * a non-zero value, this is not a problem because data is not
+ * being buffered for *bytes* and is sent as it is received.
+ * Return
+ * 0
+ *
+ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, prevent the execution of the verdict eBPF
+ * program for message *msg* until *bytes* (byte number) have been
+ * accumulated.
+ *
+ * This can be used when one needs a specific number of bytes
+ * before a verdict can be assigned, even if the data spans
+ * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ * case would be a user calling **sendmsg**\ () repeatedly with
+ * 1-byte long message segments. Obviously, this is bad for
+ * performance, but it is still valid. If the eBPF program needs
+ * *bytes* bytes to validate a header, this helper can be used to
+ * prevent the eBPF program to be called again until *bytes* have
+ * been accumulated.
+ * Return
+ * 0
+ *
+ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ * Description
+ * For socket policies, pull in non-linear data from user space
+ * for *msg* and set pointers *msg*\ **->data** and *msg*\
+ * **->data_end** to *start* and *end* bytes offsets into *msg*,
+ * respectively.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it can only parse data that the (**data**, **data_end**)
+ * pointers have already consumed. For **sendmsg**\ () hooks this
+ * is likely the first scatterlist element. But for calls relying
+ * on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ * be the range (**0**, **0**) because the data is shared with
+ * user space and by default the objective is to avoid allowing
+ * user space to modify data while (or after) eBPF verdict is
+ * being decided. This helper can be used to pull in data and to
+ * set the start and end pointer to given values. Data will be
+ * copied if necessary (i.e. if data was not linear and if start
+ * and end pointers do not point to the same chunk).
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+ * Description
+ * Bind the socket associated to *ctx* to the address pointed by
+ * *addr*, of length *addr_len*. This allows for making outgoing
+ * connection from the desired IP address, which can be useful for
+ * example when all processes inside a cgroup should use one
+ * single IP address on a host that has multiple IP configured.
+ *
+ * This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+ * domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+ * **AF_INET6**). Looking for a free port to bind to can be
+ * expensive, therefore binding to port is not permitted by the
+ * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+ * must be set to zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+ * only possible to shrink the packet as of this writing,
+ * therefore *delta* must be a negative integer.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ * Description
+ * Retrieve the XFRM state (IP transform framework, see also
+ * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+ *
+ * The retrieved value is stored in the **struct bpf_xfrm_state**
+ * pointed by *xfrm_state* and of length *size*.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_XFRM** configuration option.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags)
+ * Description
+ * Return a user or a kernel stack in bpf program provided buffer.
+ * To achieve this, the helper needs *ctx*, which is a pointer
+ * to the context on which the tracing program is executed.
+ * To store the stacktrace, the bpf program provides *buf* with
+ * a nonnegative *size*.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_USER_BUILD_ID**
+ * Collect buildid+offset instead of ips for user stack,
+ * only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ * **bpf_get_stack**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ * to sufficient large buffer size. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+ * A non-negative value equal to or less than *size* on success,
+ * or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * Description
+ * This helper is similar to **bpf_skb_load_bytes**\ () in that
+ * it provides an easy way to load *len* bytes from *offset*
+ * from the packet associated to *skb*, into the buffer pointed
+ * by *to*. The difference to **bpf_skb_load_bytes**\ () is that
+ * a fifth argument *start_header* exists in order to select a
+ * base offset to start from. *start_header* can be one of:
+ *
+ * **BPF_HDR_START_MAC**
+ * Base offset to load data from is *skb*'s mac header.
+ * **BPF_HDR_START_NET**
+ * Base offset to load data from is *skb*'s network header.
+ *
+ * In general, "direct packet access" is the preferred method to
+ * access packet data, however, this helper is in particular useful
+ * in socket filters where *skb*\ **->data** does not always point
+ * to the start of the mac header and where "direct packet access"
+ * is not available.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ * Description
+ * Do FIB lookup in kernel tables using parameters in *params*.
+ * If lookup is successful and result shows packet is to be
+ * forwarded, the neighbor tables are searched for the nexthop.
+ * If successful (ie., FIB lookup shows forwarding and nexthop
+ * is resolved), the nexthop address is returned in ipv4_dst
+ * or ipv6_dst based on family, smac is set to mac address of
+ * egress device, dmac is set to nexthop mac address, rt_metric
+ * is set to metric from route (IPv4/IPv6 only), and ifindex
+ * is set to the device index of the nexthop from the FIB lookup.
+ *
+ * *plen* argument is the size of the passed in struct.
+ * *flags* argument can be a combination of one or more of the
+ * following values:
+ *
+ * **BPF_FIB_LOOKUP_DIRECT**
+ * Do a direct table lookup vs full lookup using FIB
+ * rules.
+ * **BPF_FIB_LOOKUP_OUTPUT**
+ * Perform lookup from an egress perspective (default is
+ * ingress).
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** tc cls_act programs.
+ * Return
+ * * < 0 if any input argument is invalid
+ * * 0 on success (packet is forwarded, nexthop neighbor exists)
+ * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
+ * packet is not forwarded or needs assist from full stack
+ *
+ * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Add an entry to, or update a sockhash *map* referencing sockets.
+ * The *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
+ * if the verdeict eBPF program returns **SK_PASS**), redirect it
+ * to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ * Description
+ * Encapsulate the packet associated to *skb* within a Layer 3
+ * protocol header. This header is provided in the buffer at
+ * address *hdr*, with *len* its size in bytes. *type* indicates
+ * the protocol of the header and can be one of:
+ *
+ * **BPF_LWT_ENCAP_SEG6**
+ * IPv6 encapsulation with Segment Routing Header
+ * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ * the IPv6 header is computed by the kernel.
+ * **BPF_LWT_ENCAP_SEG6_INLINE**
+ * Only works if *skb* contains an IPv6 packet. Insert a
+ * Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ * the IPv6 header.
+ * **BPF_LWT_ENCAP_IP**
+ * IP encapsulation (GRE/GUE/IPIP/etc). The outer header
+ * must be IPv4 or IPv6, followed by zero or more
+ * additional headers, up to **LWT_BPF_MAX_HEADROOM**
+ * total bytes in all prepended headers. Please note that
+ * if **skb_is_gso**\ (*skb*) is true, no more than two
+ * headers can be prepended, and the inner header, if
+ * present, should be either GRE or UDP/GUE.
+ *
+ * **BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs
+ * of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can
+ * be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and
+ * **BPF_PROG_TYPE_LWT_XMIT**.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ * inside the outermost IPv6 Segment Routing Header can be
+ * modified through this helper.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ * Description
+ * Adjust the size allocated to TLVs in the outermost IPv6
+ * Segment Routing Header contained in the packet associated to
+ * *skb*, at position *offset* by *delta* bytes. Only offsets
+ * after the segments are accepted. *delta* can be as well
+ * positive (growing) as negative (shrinking).
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ * Description
+ * Apply an IPv6 Segment Routing action of type *action* to the
+ * packet associated to *skb*. Each action takes a parameter
+ * contained at address *param*, and of length *param_len* bytes.
+ * *action* can be one of:
+ *
+ * **SEG6_LOCAL_ACTION_END_X**
+ * End.X action: Endpoint with Layer-3 cross-connect.
+ * Type of *param*: **struct in6_addr**.
+ * **SEG6_LOCAL_ACTION_END_T**
+ * End.T action: Endpoint with specific IPv6 table lookup.
+ * Type of *param*: **int**.
+ * **SEG6_LOCAL_ACTION_END_B6**
+ * End.B6 action: Endpoint bound to an SRv6 policy.
+ * Type of *param*: **struct ipv6_sr_hdr**.
+ * **SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ * End.B6.Encap action: Endpoint bound to an SRv6
+ * encapsulation policy.
+ * Type of *param*: **struct ipv6_sr_hdr**.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_rc_repeat(void *ctx)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded repeat key message. This delays
+ * the generation of a key up event for previously generated
+ * key down event.
+ *
+ * Some IR protocols like NEC have a special IR message for
+ * repeating last button, for when a button is held down.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ *
+ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded key press with *scancode*,
+ * *toggle* value in the given *protocol*. The scancode will be
+ * translated to a keycode using the rc keymap, and reported as
+ * an input key down event. After a period a key up event is
+ * generated. This period can be extended by calling either
+ * **bpf_rc_keydown**\ () again with the same values, or calling
+ * **bpf_rc_repeat**\ ().
+ *
+ * Some protocols include a toggle bit, in case the button was
+ * released and pressed again between consecutive scancodes.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * The *protocol* is the decoded protocol number (see
+ * **enum rc_proto** for some predefined values).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ *
+ * u64 bpf_skb_cgroup_id(struct sk_buff *skb)
+ * Description
+ * Return the cgroup v2 id of the socket associated with the *skb*.
+ * This is roughly similar to the **bpf_get_cgroup_classid**\ ()
+ * helper for cgroup v1 by providing a tag resp. identifier that
+ * can be matched on or used for map lookups e.g. to implement
+ * policy. The cgroup v2 id of a given path in the hierarchy is
+ * exposed in user space through the f_handle API in order to get
+ * to the same 64-bit id.
+ *
+ * This helper can be used on TC egress path, but not on ingress,
+ * and is available only if the kernel was compiled with the
+ * **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_get_current_cgroup_id(void)
+ * Return
+ * A 64-bit integer containing the current cgroup id based
+ * on the cgroup within which the current task is running.
+ *
+ * void *bpf_get_local_storage(void *map, u64 flags)
+ * Description
+ * Get the pointer to the local storage area.
+ * The type and the size of the local storage is defined
+ * by the *map* argument.
+ * The *flags* meaning is specific for each map type,
+ * and has to be 0 for cgroup local storage.
+ *
+ * Depending on the BPF program type, a local storage area
+ * can be shared between multiple instances of the BPF program,
+ * running simultaneously.
+ *
+ * A user should care about the synchronization by himself.
+ * For example, by using the **BPF_STX_XADD** instruction to alter
+ * the shared data.
+ * Return
+ * A pointer to the local storage area.
+ *
+ * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Select a **SO_REUSEPORT** socket from a
+ * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
+ * It checks the selected socket is matching the incoming
+ * request in the socket buffer.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *skb* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *skb*, then return value will be same as that
+ * of **bpf_skb_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *skb*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_skb_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ * Description
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is a negative signed 32-bit integer, then the
+ * socket lookup table in the netns associated with the *ctx* will
+ * will be used. For the TC hooks, this is the netns of the device
+ * in the skb. For socket hooks, this is the netns of the socket.
+ * If *netns* is any other signed 32-bit value greater than or
+ * equal to zero then it specifies the ID of the netns relative to
+ * the netns associated with the *ctx*. *netns* values beyond the
+ * range of 32-bit integers are reserved for future use.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from *reuse*\ **->socks**\ [] using the hash of the
+ * tuple.
+ *
+ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ * Description
+ * Look for UDP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is a negative signed 32-bit integer, then the
+ * socket lookup table in the netns associated with the *ctx* will
+ * will be used. For the TC hooks, this is the netns of the device
+ * in the skb. For socket hooks, this is the netns of the socket.
+ * If *netns* is any other signed 32-bit value greater than or
+ * equal to zero then it specifies the ID of the netns relative to
+ * the netns associated with the *ctx*. *netns* values beyond the
+ * range of 32-bit integers are reserved for future use.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from *reuse*\ **->socks**\ [] using the hash of the
+ * tuple.
+ *
+ * int bpf_sk_release(struct bpf_sock *sock)
+ * Description
+ * Release the reference held by *sock*. *sock* must be a
+ * non-**NULL** pointer that was returned from
+ * **bpf_sk_lookup_xxx**\ ().
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is
+ * removed to make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
+ * Description
+ * For socket policies, insert *len* bytes into *msg* at offset
+ * *start*.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it may want to insert metadata or options into the *msg*.
+ * This can later be read and used by any of the lower layer BPF
+ * hooks.
+ *
+ * This helper may fail if under memory pressure (a malloc
+ * fails) in these cases BPF programs will get an appropriate
+ * error and BPF programs will need to handle them.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
+ * Description
+ * Will remove *len* bytes from a *msg* starting at byte *start*.
+ * This may result in **ENOMEM** errors under certain situations if
+ * an allocation and copy are required due to a full ring buffer.
+ * However, the helper will try to avoid doing the allocation
+ * if possible. Other errors can occur if input parameters are
+ * invalid either due to *start* byte not being valid part of *msg*
+ * payload and/or *pop* value being to large.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded pointer movement.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ *
+ * int bpf_spin_lock(struct bpf_spin_lock *lock)
+ * Description
+ * Acquire a spinlock represented by the pointer *lock*, which is
+ * stored as part of a value of a map. Taking the lock allows to
+ * safely update the rest of the fields in that value. The
+ * spinlock can (and must) later be released with a call to
+ * **bpf_spin_unlock**\ (\ *lock*\ ).
+ *
+ * Spinlocks in BPF programs come with a number of restrictions
+ * and constraints:
+ *
+ * * **bpf_spin_lock** objects are only allowed inside maps of
+ * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this
+ * list could be extended in the future).
+ * * BTF description of the map is mandatory.
+ * * The BPF program can take ONE lock at a time, since taking two
+ * or more could cause dead locks.
+ * * Only one **struct bpf_spin_lock** is allowed per map element.
+ * * When the lock is taken, calls (either BPF to BPF or helpers)
+ * are not allowed.
+ * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not
+ * allowed inside a spinlock-ed region.
+ * * The BPF program MUST call **bpf_spin_unlock**\ () to release
+ * the lock, on all execution paths, before it returns.
+ * * The BPF program can access **struct bpf_spin_lock** only via
+ * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ ()
+ * helpers. Loading or storing data into the **struct
+ * bpf_spin_lock** *lock*\ **;** field of a map is not allowed.
+ * * To use the **bpf_spin_lock**\ () helper, the BTF description
+ * of the map value must be a struct and have **struct
+ * bpf_spin_lock** *anyname*\ **;** field at the top level.
+ * Nested lock inside another struct is not allowed.
+ * * The **struct bpf_spin_lock** *lock* field in a map value must
+ * be aligned on a multiple of 4 bytes in that value.
+ * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy
+ * the **bpf_spin_lock** field to user space.
+ * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from
+ * a BPF program, do not update the **bpf_spin_lock** field.
+ * * **bpf_spin_lock** cannot be on the stack or inside a
+ * networking packet (it can only be inside of a map values).
+ * * **bpf_spin_lock** is available to root only.
+ * * Tracing programs and socket filter programs cannot use
+ * **bpf_spin_lock**\ () due to insufficient preemption checks
+ * (but this may change in the future).
+ * * **bpf_spin_lock** is not allowed in inner maps of map-in-map.
+ * Return
+ * 0
+ *
+ * int bpf_spin_unlock(struct bpf_spin_lock *lock)
+ * Description
+ * Release the *lock* previously locked by a call to
+ * **bpf_spin_lock**\ (\ *lock*\ ).
+ * Return
+ * 0
+ *
+ * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
+ * Description
+ * This helper gets a **struct bpf_sock** pointer such
+ * that all the fields in this **bpf_sock** can be accessed.
+ * Return
+ * A **struct bpf_sock** pointer on success, or **NULL** in
+ * case of failure.
+ *
+ * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
+ * Description
+ * This helper gets a **struct bpf_tcp_sock** pointer from a
+ * **struct bpf_sock** pointer.
+ * Return
+ * A **struct bpf_tcp_sock** pointer on success, or **NULL** in
+ * case of failure.
+ *
+ * int bpf_skb_ecn_set_ce(struct sk_buff *skb)
+ * Description
+ * Set ECN (Explicit Congestion Notification) field of IP header
+ * to **CE** (Congestion Encountered) if current value is **ECT**
+ * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6
+ * and IPv4.
+ * Return
+ * 1 if the **CE** flag is set (either by the current helper call
+ * or because it was already present), 0 if it is not set.
+ *
+ * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)
+ * Description
+ * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state.
+ * **bpf_sk_release**\ () is unnecessary and not allowed.
+ * Return
+ * A **struct bpf_sock** pointer on success, or **NULL** in
+ * case of failure.
+ *
+ * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ * Description
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * This function is identical to **bpf_sk_lookup_tcp**\ (), except
+ * that it also returns timewait or request sockets. Use
+ * **bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the
+ * full structure.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from *reuse*\ **->socks**\ [] using the hash of the
+ * tuple.
+ *
+ * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * Description
+ * Check whether *iph* and *th* contain a valid SYN cookie ACK for
+ * the listening socket in *sk*.
+ *
+ * *iph* points to the start of the IPv4 or IPv6 header, while
+ * *iph_len* contains **sizeof**\ (**struct iphdr**) or
+ * **sizeof**\ (**struct ip6hdr**).
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains **sizeof**\ (**struct tcphdr**).
+ *
+ * Return
+ * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
+ * error otherwise.
+ *
+ * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
+ * Description
+ * Get name of sysctl in /proc/sys/ and copy it into provided by
+ * program buffer *buf* of size *buf_len*.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ *
+ * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is
+ * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name
+ * only (e.g. "tcp_mem").
+ * Return
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * Description
+ * Get current value of sysctl as it is presented in /proc/sys
+ * (incl. newline, etc), and copy it as a string into provided
+ * by program buffer *buf* of size *buf_len*.
+ *
+ * The whole value is copied, no matter what file position user
+ * space issued e.g. sys_read at.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ * Return
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * **-EINVAL** if current value was unavailable, e.g. because
+ * sysctl is uninitialized and read returns -EIO for it.
+ *
+ * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * Description
+ * Get new value being written by user space to sysctl (before
+ * the actual write happens) and copy it as a string into
+ * provided by program buffer *buf* of size *buf_len*.
+ *
+ * User space may write new value at file position > 0.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ * Return
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * **-EINVAL** if sysctl is being read.
+ *
+ * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
+ * Description
+ * Override new value being written by user space to sysctl with
+ * value provided by program in buffer *buf* of size *buf_len*.
+ *
+ * *buf* should contain a string in same form as provided by user
+ * space on sysctl write.
+ *
+ * User space may write new value at file position > 0. To override
+ * the whole sysctl value file position should be set to zero.
+ * Return
+ * 0 on success.
+ *
+ * **-E2BIG** if the *buf_len* is too big.
+ *
+ * **-EINVAL** if sysctl is being read.
+ *
+ * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
+ * Description
+ * Convert the initial part of the string from buffer *buf* of
+ * size *buf_len* to a long integer according to the given base
+ * and save the result in *res*.
+ *
+ * The string may begin with an arbitrary amount of white space
+ * (as determined by **isspace**\ (3)) followed by a single
+ * optional '**-**' sign.
+ *
+ * Five least significant bits of *flags* encode base, other bits
+ * are currently unused.
+ *
+ * Base must be either 8, 10, 16 or 0 to detect it automatically
+ * similar to user space **strtol**\ (3).
+ * Return
+ * Number of characters consumed on success. Must be positive but
+ * no more than *buf_len*.
+ *
+ * **-EINVAL** if no valid digits were found or unsupported base
+ * was provided.
+ *
+ * **-ERANGE** if resulting value was out of range.
+ *
+ * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
+ * Description
+ * Convert the initial part of the string from buffer *buf* of
+ * size *buf_len* to an unsigned long integer according to the
+ * given base and save the result in *res*.
+ *
+ * The string may begin with an arbitrary amount of white space
+ * (as determined by **isspace**\ (3)).
+ *
+ * Five least significant bits of *flags* encode base, other bits
+ * are currently unused.
+ *
+ * Base must be either 8, 10, 16 or 0 to detect it automatically
+ * similar to user space **strtoul**\ (3).
+ * Return
+ * Number of characters consumed on success. Must be positive but
+ * no more than *buf_len*.
+ *
+ * **-EINVAL** if no valid digits were found or unsupported base
+ * was provided.
+ *
+ * **-ERANGE** if resulting value was out of range.
+ *
+ * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags)
+ * Description
+ * Get a bpf-local-storage from a *sk*.
+ *
+ * Logically, it could be thought of getting the value from
+ * a *map* with *sk* as the **key**. From this
+ * perspective, the usage is not much different from
+ * **bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this
+ * helper enforces the key must be a full socket and the map must
+ * be a **BPF_MAP_TYPE_SK_STORAGE** also.
+ *
+ * Underneath, the value is stored locally at *sk* instead of
+ * the *map*. The *map* is used as the bpf-local-storage
+ * "type". The bpf-local-storage "type" (i.e. the *map*) is
+ * searched against all bpf-local-storages residing at *sk*.
+ *
+ * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be
+ * used such that a new bpf-local-storage will be
+ * created if one does not exist. *value* can be used
+ * together with **BPF_SK_STORAGE_GET_F_CREATE** to specify
+ * the initial value of a bpf-local-storage. If *value* is
+ * **NULL**, the new bpf-local-storage will be zero initialized.
+ * Return
+ * A bpf-local-storage pointer is returned on success.
+ *
+ * **NULL** if not found or there was an error in adding
+ * a new bpf-local-storage.
+ *
+ * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ * Description
+ * Delete a bpf-local-storage from a *sk*.
+ * Return
+ * 0 on success.
+ *
+ * **-ENOENT** if the bpf-local-storage cannot be found.
+ *
+ * int bpf_send_signal(u32 sig)
+ * Description
+ * Send signal *sig* to the current task.
+ * Return
+ * 0 on success or successfully queued.
+ *
+ * **-EBUSY** if work queue under nmi is full.
+ *
+ * **-EINVAL** if *sig* is invalid.
+ *
+ * **-EPERM** if no permission to send the *sig*.
+ *
+ * **-EAGAIN** if bpf program can try again.
+ *
+ * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * Description
+ * Try to issue a SYN cookie for the packet with corresponding
+ * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
+ *
+ * *iph* points to the start of the IPv4 or IPv6 header, while
+ * *iph_len* contains **sizeof**\ (**struct iphdr**) or
+ * **sizeof**\ (**struct ip6hdr**).
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains the length of the TCP header.
+ *
+ * Return
+ * On success, lower 32 bits hold the generated SYN cookie in
+ * followed by 16 bits which hold the MSS value for that cookie,
+ * and the top 16 bits are unused.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EINVAL** SYN cookie cannot be issued due to error
+ *
+ * **-ENOENT** SYN cookie should not be issued (no SYN flood)
+ *
+ * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
+ *
+ * **-EPROTONOSUPPORT** IP packet version is not 4 or 6
+ *
+ * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * *ctx* is a pointer to in-kernel struct sk_buff.
+ *
+ * This helper is similar to **bpf_perf_event_output**\ () but
+ * restricted to raw_tracepoint bpf programs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Safely attempt to read *size* bytes from user space address
+ * *unsafe_ptr* and store the data in *dst*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Safely attempt to read *size* bytes from kernel space address
+ * *unsafe_ptr* and store the data in *dst*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Copy a NUL terminated string from an unsafe user address
+ * *unsafe_ptr* to *dst*. The *size* should include the
+ * terminating NUL byte. In case the string length is smaller than
+ * *size*, the target is not padded with further NUL bytes. If the
+ * string length is larger than *size*, just *size*-1 bytes are
+ * copied and the last byte is set to NUL.
+ *
+ * On success, the length of the copied string is returned. This
+ * makes this helper useful in tracing programs for reading
+ * strings, and more importantly to get its length at runtime. See
+ * the following snippet:
+ *
+ * ::
+ *
+ * SEC("kprobe/sys_open")
+ * void bpf_sys_open(struct pt_regs *ctx)
+ * {
+ * char buf[PATHLEN]; // PATHLEN is defined to 256
+ * int res = bpf_probe_read_user_str(buf, sizeof(buf),
+ * ctx->di);
+ *
+ * // Consume buf, for example push it to
+ * // userspace via bpf_perf_event_output(); we
+ * // can use res (the string length) as event
+ * // size, after checking its boundaries.
+ * }
+ *
+ * In comparison, using **bpf_probe_read_user()** helper here
+ * instead to read the string would require to estimate the length
+ * at compile time, and would often result in copying more memory
+ * than necessary.
+ *
+ * Another useful use case is when parsing individual process
+ * arguments or individual environment variables navigating
+ * *current*\ **->mm->arg_start** and *current*\
+ * **->mm->env_start**: using this helper and the return value,
+ * one can quickly iterate at the right offset of the memory area.
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
+ * to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
+ * Return
+ * On success, the strictly positive length of the string, including
+ * the trailing NUL character. On error, a negative value.
+ */
+#define __BPF_FUNC_MAPPER(FN) \
+ FN(unspec), \
+ FN(map_lookup_elem), \
+ FN(map_update_elem), \
+ FN(map_delete_elem), \
+ FN(probe_read), \
+ FN(ktime_get_ns), \
+ FN(trace_printk), \
+ FN(get_prandom_u32), \
+ FN(get_smp_processor_id), \
+ FN(skb_store_bytes), \
+ FN(l3_csum_replace), \
+ FN(l4_csum_replace), \
+ FN(tail_call), \
+ FN(clone_redirect), \
+ FN(get_current_pid_tgid), \
+ FN(get_current_uid_gid), \
+ FN(get_current_comm), \
+ FN(get_cgroup_classid), \
+ FN(skb_vlan_push), \
+ FN(skb_vlan_pop), \
+ FN(skb_get_tunnel_key), \
+ FN(skb_set_tunnel_key), \
+ FN(perf_event_read), \
+ FN(redirect), \
+ FN(get_route_realm), \
+ FN(perf_event_output), \
+ FN(skb_load_bytes), \
+ FN(get_stackid), \
+ FN(csum_diff), \
+ FN(skb_get_tunnel_opt), \
+ FN(skb_set_tunnel_opt), \
+ FN(skb_change_proto), \
+ FN(skb_change_type), \
+ FN(skb_under_cgroup), \
+ FN(get_hash_recalc), \
+ FN(get_current_task), \
+ FN(probe_write_user), \
+ FN(current_task_under_cgroup), \
+ FN(skb_change_tail), \
+ FN(skb_pull_data), \
+ FN(csum_update), \
+ FN(set_hash_invalid), \
+ FN(get_numa_node_id), \
+ FN(skb_change_head), \
+ FN(xdp_adjust_head), \
+ FN(probe_read_str), \
+ FN(get_socket_cookie), \
+ FN(get_socket_uid), \
+ FN(set_hash), \
+ FN(setsockopt), \
+ FN(skb_adjust_room), \
+ FN(redirect_map), \
+ FN(sk_redirect_map), \
+ FN(sock_map_update), \
+ FN(xdp_adjust_meta), \
+ FN(perf_event_read_value), \
+ FN(perf_prog_read_value), \
+ FN(getsockopt), \
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set), \
+ FN(msg_redirect_map), \
+ FN(msg_apply_bytes), \
+ FN(msg_cork_bytes), \
+ FN(msg_pull_data), \
+ FN(bind), \
+ FN(xdp_adjust_tail), \
+ FN(skb_get_xfrm_state), \
+ FN(get_stack), \
+ FN(skb_load_bytes_relative), \
+ FN(fib_lookup), \
+ FN(sock_hash_update), \
+ FN(msg_redirect_hash), \
+ FN(sk_redirect_hash), \
+ FN(lwt_push_encap), \
+ FN(lwt_seg6_store_bytes), \
+ FN(lwt_seg6_adjust_srh), \
+ FN(lwt_seg6_action), \
+ FN(rc_repeat), \
+ FN(rc_keydown), \
+ FN(skb_cgroup_id), \
+ FN(get_current_cgroup_id), \
+ FN(get_local_storage), \
+ FN(sk_select_reuseport), \
+ FN(skb_ancestor_cgroup_id), \
+ FN(sk_lookup_tcp), \
+ FN(sk_lookup_udp), \
+ FN(sk_release), \
+ FN(map_push_elem), \
+ FN(map_pop_elem), \
+ FN(map_peek_elem), \
+ FN(msg_push_data), \
+ FN(msg_pop_data), \
+ FN(rc_pointer_rel), \
+ FN(spin_lock), \
+ FN(spin_unlock), \
+ FN(sk_fullsock), \
+ FN(tcp_sock), \
+ FN(skb_ecn_set_ce), \
+ FN(get_listener_sock), \
+ FN(skc_lookup_tcp), \
+ FN(tcp_check_syncookie), \
+ FN(sysctl_get_name), \
+ FN(sysctl_get_current_value), \
+ FN(sysctl_get_new_value), \
+ FN(sysctl_set_new_value), \
+ FN(strtol), \
+ FN(strtoul), \
+ FN(sk_storage_get), \
+ FN(sk_storage_delete), \
+ FN(send_signal), \
+ FN(tcp_gen_syncookie), \
+ FN(skb_output), \
+ FN(probe_read_user), \
+ FN(probe_read_kernel), \
+ FN(probe_read_user_str), \
+ FN(probe_read_kernel_str),
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
+enum bpf_func_id {
+ __BPF_FUNC_MAPPER(__BPF_ENUM_FN)
+ __BPF_FUNC_MAX_ID,
+};
+#undef __BPF_ENUM_FN
+
+/* All flags used by eBPF helper functions, placed here. */
+
+/* BPF_FUNC_skb_store_bytes flags. */
+#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
+#define BPF_F_INVALIDATE_HASH (1ULL << 1)
+
+/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
+ * First 4 bits are for passing the header field size.
+ */
+#define BPF_F_HDR_FIELD_MASK 0xfULL
+
+/* BPF_FUNC_l4_csum_replace flags. */
+#define BPF_F_PSEUDO_HDR (1ULL << 4)
+#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
+#define BPF_F_MARK_ENFORCE (1ULL << 6)
+
+/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
+#define BPF_F_INGRESS (1ULL << 0)
+
+/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
+#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
+
+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
+#define BPF_F_SKIP_FIELD_MASK 0xffULL
+#define BPF_F_USER_STACK (1ULL << 8)
+/* flags used by BPF_FUNC_get_stackid only. */
+#define BPF_F_FAST_STACK_CMP (1ULL << 9)
+#define BPF_F_REUSE_STACKID (1ULL << 10)
+/* flags used by BPF_FUNC_get_stack only. */
+#define BPF_F_USER_BUILD_ID (1ULL << 11)
+
+/* BPF_FUNC_skb_set_tunnel_key flags. */
+#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
+#define BPF_F_DONT_FRAGMENT (1ULL << 2)
+#define BPF_F_SEQ_NUMBER (1ULL << 3)
+
+/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
+ * BPF_FUNC_perf_event_read_value flags.
+ */
+#define BPF_F_INDEX_MASK 0xffffffffULL
+#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
+
+/* Current network namespace */
+#define BPF_F_CURRENT_NETNS (-1L)
+
+/* BPF_FUNC_skb_adjust_room flags. */
+#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
+
+#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff
+#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56
+
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
+#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \
+ BPF_ADJ_ROOM_ENCAP_L2_MASK) \
+ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
+
+/* BPF_FUNC_sysctl_get_name flags. */
+#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0)
+
+/* BPF_FUNC_sk_storage_get flags */
+#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0)
+
+/* Mode for BPF_FUNC_skb_adjust_room helper. */
+enum bpf_adj_room_mode {
+ BPF_ADJ_ROOM_NET,
+ BPF_ADJ_ROOM_MAC,
+};
+
+/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
+enum bpf_hdr_start_off {
+ BPF_HDR_START_MAC,
+ BPF_HDR_START_NET,
+};
+
+/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+enum bpf_lwt_encap_mode {
+ BPF_LWT_ENCAP_SEG6,
+ BPF_LWT_ENCAP_SEG6_INLINE,
+ BPF_LWT_ENCAP_IP,
+};
+
+#define __bpf_md_ptr(type, name) \
+union { \
+ type name; \
+ __u64 :64; \
+} __attribute__((aligned(8)))
+
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+ __u32 len;
+ __u32 pkt_type;
+ __u32 mark;
+ __u32 queue_mapping;
+ __u32 protocol;
+ __u32 vlan_present;
+ __u32 vlan_tci;
+ __u32 vlan_proto;
+ __u32 priority;
+ __u32 ingress_ifindex;
+ __u32 ifindex;
+ __u32 tc_index;
+ __u32 cb[5];
+ __u32 hash;
+ __u32 tc_classid;
+ __u32 data;
+ __u32 data_end;
+ __u32 napi_id;
+
+ /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ /* ... here. */
+
+ __u32 data_meta;
+ __bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
+ __u64 tstamp;
+ __u32 wire_len;
+ __u32 gso_segs;
+ __bpf_md_ptr(struct bpf_sock *, sk);
+};
+
+struct bpf_tunnel_key {
+ __u32 tunnel_id;
+ union {
+ __u32 remote_ipv4;
+ __u32 remote_ipv6[4];
+ };
+ __u8 tunnel_tos;
+ __u8 tunnel_ttl;
+ __u16 tunnel_ext; /* Padding, future use. */
+ __u32 tunnel_label;
+};
+
+/* user accessible mirror of in-kernel xfrm_state.
+ * new fields can only be added to the end of this structure
+ */
+struct bpf_xfrm_state {
+ __u32 reqid;
+ __u32 spi; /* Stored in network byte order */
+ __u16 family;
+ __u16 ext; /* Padding, future use. */
+ union {
+ __u32 remote_ipv4; /* Stored in network byte order */
+ __u32 remote_ipv6[4]; /* Stored in network byte order */
+ };
+};
+
+/* Generic BPF return codes which all BPF program types may support.
+ * The values are binary compatible with their TC_ACT_* counter-part to
+ * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
+ * programs.
+ *
+ * XDP is handled seprately, see XDP_*.
+ */
+enum bpf_ret_code {
+ BPF_OK = 0,
+ /* 1 reserved */
+ BPF_DROP = 2,
+ /* 3-6 reserved */
+ BPF_REDIRECT = 7,
+ /* >127 are reserved for prog type specific return codes.
+ *
+ * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and
+ * BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been
+ * changed and should be routed based on its new L3 header.
+ * (This is an L3 redirect, as opposed to L2 redirect
+ * represented by BPF_REDIRECT above).
+ */
+ BPF_LWT_REROUTE = 128,
+};
+
+struct bpf_sock {
+ __u32 bound_dev_if;
+ __u32 family;
+ __u32 type;
+ __u32 protocol;
+ __u32 mark;
+ __u32 priority;
+ /* IP address also allows 1 and 2 bytes access */
+ __u32 src_ip4;
+ __u32 src_ip6[4];
+ __u32 src_port; /* host byte order */
+ __u32 dst_port; /* network byte order */
+ __u32 dst_ip4;
+ __u32 dst_ip6[4];
+ __u32 state;
+};
+
+struct bpf_tcp_sock {
+ __u32 snd_cwnd; /* Sending congestion window */
+ __u32 srtt_us; /* smoothed round trip time << 3 in usecs */
+ __u32 rtt_min;
+ __u32 snd_ssthresh; /* Slow start size threshold */
+ __u32 rcv_nxt; /* What we want to receive next */
+ __u32 snd_nxt; /* Next sequence we send */
+ __u32 snd_una; /* First byte we want an ack for */
+ __u32 mss_cache; /* Cached effective mss, not including SACKS */
+ __u32 ecn_flags; /* ECN status bits. */
+ __u32 rate_delivered; /* saved rate sample: packets delivered */
+ __u32 rate_interval_us; /* saved rate sample: time elapsed */
+ __u32 packets_out; /* Packets which are "in flight" */
+ __u32 retrans_out; /* Retransmitted packets out */
+ __u32 total_retrans; /* Total retransmits for entire connection */
+ __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
+ * total number of segments in.
+ */
+ __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
+ * total number of data segments in.
+ */
+ __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
+ * The total number of segments sent.
+ */
+ __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
+ * total number of data segments sent.
+ */
+ __u32 lost_out; /* Lost packets */
+ __u32 sacked_out; /* SACK'd packets */
+ __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived
+ * sum(delta(rcv_nxt)), or how many bytes
+ * were acked.
+ */
+ __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
+ * sum(delta(snd_una)), or how many bytes
+ * were acked.
+ */
+ __u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups
+ * total number of DSACK blocks received
+ */
+ __u32 delivered; /* Total data packets delivered incl. rexmits */
+ __u32 delivered_ce; /* Like the above but only ECE marked packets */
+ __u32 icsk_retransmits; /* Number of unrecovered [RTO] timeouts */
+};
+
+struct bpf_sock_tuple {
+ union {
+ struct {
+ __be32 saddr;
+ __be32 daddr;
+ __be16 sport;
+ __be16 dport;
+ } ipv4;
+ struct {
+ __be32 saddr[4];
+ __be32 daddr[4];
+ __be16 sport;
+ __be16 dport;
+ } ipv6;
+ };
+};
+
+struct bpf_xdp_sock {
+ __u32 queue_id;
+};
+
+#define XDP_PACKET_HEADROOM 256
+
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will
+ * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
+ */
+enum xdp_action {
+ XDP_ABORTED = 0,
+ XDP_DROP,
+ XDP_PASS,
+ XDP_TX,
+ XDP_REDIRECT,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+ __u32 data;
+ __u32 data_end;
+ __u32 data_meta;
+ /* Below access go through struct xdp_rxq_info */
+ __u32 ingress_ifindex; /* rxq->dev->ifindex */
+ __u32 rx_queue_index; /* rxq->queue_index */
+};
+
+enum sk_action {
+ SK_DROP = 0,
+ SK_PASS,
+};
+
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+ __bpf_md_ptr(void *, data);
+ __bpf_md_ptr(void *, data_end);
+
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ __u32 size; /* Total size of sk_msg */
+};
+
+struct sk_reuseport_md {
+ /*
+ * Start of directly accessible data. It begins from
+ * the tcp/udp header.
+ */
+ __bpf_md_ptr(void *, data);
+ /* End of directly accessible data */
+ __bpf_md_ptr(void *, data_end);
+ /*
+ * Total length of packet (starting from the tcp/udp header).
+ * Note that the directly accessible bytes (data_end - data)
+ * could be less than this "len". Those bytes could be
+ * indirectly read by a helper "bpf_skb_load_bytes()".
+ */
+ __u32 len;
+ /*
+ * Eth protocol in the mac header (network byte order). e.g.
+ * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
+ */
+ __u32 eth_protocol;
+ __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
+ __u32 bind_inany; /* Is sock bound to an INANY address? */
+ __u32 hash; /* A hash of the packet 4 tuples */
+};
+
+#define BPF_TAG_SIZE 8
+
+struct bpf_prog_info {
+ __u32 type;
+ __u32 id;
+ __u8 tag[BPF_TAG_SIZE];
+ __u32 jited_prog_len;
+ __u32 xlated_prog_len;
+ __aligned_u64 jited_prog_insns;
+ __aligned_u64 xlated_prog_insns;
+ __u64 load_time; /* ns since boottime */
+ __u32 created_by_uid;
+ __u32 nr_map_ids;
+ __aligned_u64 map_ids;
+ char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u32 gpl_compatible:1;
+ __u32 :31; /* alignment pad */
+ __u64 netns_dev;
+ __u64 netns_ino;
+ __u32 nr_jited_ksyms;
+ __u32 nr_jited_func_lens;
+ __aligned_u64 jited_ksyms;
+ __aligned_u64 jited_func_lens;
+ __u32 btf_id;
+ __u32 func_info_rec_size;
+ __aligned_u64 func_info;
+ __u32 nr_func_info;
+ __u32 nr_line_info;
+ __aligned_u64 line_info;
+ __aligned_u64 jited_line_info;
+ __u32 nr_jited_line_info;
+ __u32 line_info_rec_size;
+ __u32 jited_line_info_rec_size;
+ __u32 nr_prog_tags;
+ __aligned_u64 prog_tags;
+ __u64 run_time_ns;
+ __u64 run_cnt;
+} __attribute__((aligned(8)));
+
+struct bpf_map_info {
+ __u32 type;
+ __u32 id;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 map_flags;
+ char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u32 :32;
+ __u64 netns_dev;
+ __u64 netns_ino;
+ __u32 btf_id;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
+} __attribute__((aligned(8)));
+
+struct bpf_btf_info {
+ __aligned_u64 btf;
+ __u32 btf_size;
+ __u32 id;
+} __attribute__((aligned(8)));
+
+/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
+ * by user and intended to be used by socket (e.g. to bind to, depends on
+ * attach attach type).
+ */
+struct bpf_sock_addr {
+ __u32 user_family; /* Allows 4-byte read, but no write. */
+ __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write.
+ * Stored in network byte order.
+ */
+ __u32 user_port; /* Allows 4-byte read and write.
+ * Stored in network byte order
+ */
+ __u32 family; /* Allows 4-byte read, but no write */
+ __u32 type; /* Allows 4-byte read, but no write */
+ __u32 protocol; /* Allows 4-byte read, but no write */
+ __u32 msg_src_ip4; /* Allows 1,2,4-byte read and 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 msg_src_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write.
+ * Stored in network byte order.
+ */
+ __bpf_md_ptr(struct bpf_sock *, sk);
+};
+
+/* User bpf_sock_ops struct to access socket values and specify request ops
+ * and their replies.
+ * Some of this fields are in network (bigendian) byte order and may need
+ * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
+ * New fields can only be added at the end of this structure
+ */
+struct bpf_sock_ops {
+ __u32 op;
+ union {
+ __u32 args[4]; /* Optionally passed to bpf program */
+ __u32 reply; /* Returned by bpf program */
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
+ };
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ __u32 is_fullsock; /* Some TCP fields are only valid if
+ * there is a full socket. If not, the
+ * fields read as zero.
+ */
+ __u32 snd_cwnd;
+ __u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+ __u32 state;
+ __u32 rtt_min;
+ __u32 snd_ssthresh;
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u32 mss_cache;
+ __u32 ecn_flags;
+ __u32 rate_delivered;
+ __u32 rate_interval_us;
+ __u32 packets_out;
+ __u32 retrans_out;
+ __u32 total_retrans;
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u32 lost_out;
+ __u32 sacked_out;
+ __u32 sk_txhash;
+ __u64 bytes_received;
+ __u64 bytes_acked;
+ __bpf_md_ptr(struct bpf_sock *, sk);
+};
+
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
+#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently
+ * supported cb flags
+ */
+
+/* List of known BPF sock_ops operators.
+ * New entries can only be added at the end
+ */
+enum {
+ BPF_SOCK_OPS_VOID,
+ BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or
+ * -1 if default value should be used
+ */
+ BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized
+ * window (in packets) or -1 if default
+ * value should be used
+ */
+ BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an
+ * active connection is initialized
+ */
+ BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an
+ * active connection is
+ * established
+ */
+ BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a
+ * passive connection is
+ * established
+ */
+ BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control
+ * needs ECN
+ */
+ BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is
+ * based on the path and may be
+ * dependent on the congestion control
+ * algorithm. In general it indicates
+ * a congestion threshold. RTTs above
+ * this indicate congestion
+ */
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
+ * Arg1: value of icsk_retransmits
+ * Arg2: value of icsk_rto
+ * Arg3: whether RTO has expired
+ */
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
+ * Arg1: sequence number of 1st byte
+ * Arg2: # segments
+ * Arg3: return value of
+ * tcp_transmit_skb (0 => success)
+ */
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
+ * Arg1: old_state
+ * Arg2: new_state
+ */
+ BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after
+ * socket transition to LISTEN state.
+ */
+ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT.
+ */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+ BPF_TCP_ESTABLISHED = 1,
+ BPF_TCP_SYN_SENT,
+ BPF_TCP_SYN_RECV,
+ BPF_TCP_FIN_WAIT1,
+ BPF_TCP_FIN_WAIT2,
+ BPF_TCP_TIME_WAIT,
+ BPF_TCP_CLOSE,
+ BPF_TCP_CLOSE_WAIT,
+ BPF_TCP_LAST_ACK,
+ BPF_TCP_LISTEN,
+ BPF_TCP_CLOSING, /* Now a valid state */
+ BPF_TCP_NEW_SYN_RECV,
+
+ BPF_TCP_MAX_STATES /* Leave at the end! */
+};
+
+#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
+#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
+
+struct bpf_perf_event_value {
+ __u64 counter;
+ __u64 enabled;
+ __u64 running;
+};
+
+#define BPF_DEVCG_ACC_MKNOD (1ULL << 0)
+#define BPF_DEVCG_ACC_READ (1ULL << 1)
+#define BPF_DEVCG_ACC_WRITE (1ULL << 2)
+
+#define BPF_DEVCG_DEV_BLOCK (1ULL << 0)
+#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
+
+struct bpf_cgroup_dev_ctx {
+ /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
+ __u32 access_type;
+ __u32 major;
+ __u32 minor;
+};
+
+struct bpf_raw_tracepoint_args {
+ __u64 args[0];
+};
+
+/* DIRECT: Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT: Do lookup from egress perspective; default is ingress
+ */
+#define BPF_FIB_LOOKUP_DIRECT (1U << 0)
+#define BPF_FIB_LOOKUP_OUTPUT (1U << 1)
+
+enum {
+ BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
+ BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */
+ BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */
+ BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */
+ BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */
+ BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
+ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
+ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
+ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+};
+
+struct bpf_fib_lookup {
+ /* input: network family for lookup (AF_INET, AF_INET6)
+ * output: network family of egress nexthop
+ */
+ __u8 family;
+
+ /* set if lookup is to consider L4 data - e.g., FIB rules */
+ __u8 l4_protocol;
+ __be16 sport;
+ __be16 dport;
+
+ /* total length of packet from network header - used for MTU check */
+ __u16 tot_len;
+
+ /* input: L3 device index for lookup
+ * output: device index from FIB lookup
+ */
+ __u32 ifindex;
+
+ union {
+ /* inputs to lookup */
+ __u8 tos; /* AF_INET */
+ __be32 flowinfo; /* AF_INET6, flow_label + priority */
+
+ /* output: metric of fib result (IPv4/IPv6 only) */
+ __u32 rt_metric;
+ };
+
+ union {
+ __be32 ipv4_src;
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ };
+
+ /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in
+ * network header. output: bpf_fib_lookup sets to gateway address
+ * if FIB lookup returns gateway route
+ */
+ union {
+ __be32 ipv4_dst;
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+
+ /* output */
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ __u8 smac[6]; /* ETH_ALEN */
+ __u8 dmac[6]; /* ETH_ALEN */
+};
+
+enum bpf_task_fd_type {
+ BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_UPROBE, /* filename + offset */
+ BPF_FD_TYPE_URETPROBE, /* filename + offset */
+};
+
+#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0)
+#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1)
+#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2)
+
+struct bpf_flow_keys {
+ __u16 nhoff;
+ __u16 thoff;
+ __u16 addr_proto; /* ETH_P_* of valid addrs */
+ __u8 is_frag;
+ __u8 is_first_frag;
+ __u8 is_encap;
+ __u8 ip_proto;
+ __be16 n_proto;
+ __be16 sport;
+ __be16 dport;
+ union {
+ struct {
+ __be32 ipv4_src;
+ __be32 ipv4_dst;
+ };
+ struct {
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+ };
+ __u32 flags;
+ __be32 flow_label;
+};
+
+struct bpf_func_info {
+ __u32 insn_off;
+ __u32 type_id;
+};
+
+#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10)
+#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff)
+
+struct bpf_line_info {
+ __u32 insn_off;
+ __u32 file_name_off;
+ __u32 line_off;
+ __u32 line_col;
+};
+
+struct bpf_spin_lock {
+ __u32 val;
+};
+
+struct bpf_sysctl {
+ __u32 write; /* Sysctl is being read (= 0) or written (= 1).
+ * Allows 1,2,4-byte read, but no write.
+ */
+ __u32 file_pos; /* Sysctl file position to read from, write to.
+ * Allows 1,2,4-byte read an 4-byte write.
+ */
+};
+
+struct bpf_sockopt {
+ __bpf_md_ptr(struct bpf_sock *, sk);
+ __bpf_md_ptr(void *, optval);
+ __bpf_md_ptr(void *, optval_end);
+
+ __s32 level;
+ __s32 optname;
+ __s32 optlen;
+ __s32 retval;
+};
+
+#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/src/contrib/libbpf/include/uapi/linux/bpf_common.h b/src/contrib/libbpf/include/uapi/linux/bpf_common.h
new file mode 100644
index 0000000..ee97668
--- /dev/null
+++ b/src/contrib/libbpf/include/uapi/linux/bpf_common.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_BPF_COMMON_H__
+#define _UAPI__LINUX_BPF_COMMON_H__
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define BPF_LD 0x00
+#define BPF_LDX 0x01
+#define BPF_ST 0x02
+#define BPF_STX 0x03
+#define BPF_ALU 0x04
+#define BPF_JMP 0x05
+#define BPF_RET 0x06
+#define BPF_MISC 0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code) ((code) & 0x18)
+#define BPF_W 0x00 /* 32-bit */
+#define BPF_H 0x08 /* 16-bit */
+#define BPF_B 0x10 /* 8-bit */
+/* eBPF BPF_DW 0x18 64-bit */
+#define BPF_MODE(code) ((code) & 0xe0)
+#define BPF_IMM 0x00
+#define BPF_ABS 0x20
+#define BPF_IND 0x40
+#define BPF_MEM 0x60
+#define BPF_LEN 0x80
+#define BPF_MSH 0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code) ((code) & 0xf0)
+#define BPF_ADD 0x00
+#define BPF_SUB 0x10
+#define BPF_MUL 0x20
+#define BPF_DIV 0x30
+#define BPF_OR 0x40
+#define BPF_AND 0x50
+#define BPF_LSH 0x60
+#define BPF_RSH 0x70
+#define BPF_NEG 0x80
+#define BPF_MOD 0x90
+#define BPF_XOR 0xa0
+
+#define BPF_JA 0x00
+#define BPF_JEQ 0x10
+#define BPF_JGT 0x20
+#define BPF_JGE 0x30
+#define BPF_JSET 0x40
+#define BPF_SRC(code) ((code) & 0x08)
+#define BPF_K 0x00
+#define BPF_X 0x08
+
+#ifndef BPF_MAXINSNS
+#define BPF_MAXINSNS 4096
+#endif
+
+#endif /* _UAPI__LINUX_BPF_COMMON_H__ */
diff --git a/src/contrib/libbpf/include/uapi/linux/btf.h b/src/contrib/libbpf/include/uapi/linux/btf.h
new file mode 100644
index 0000000..63ae4a3
--- /dev/null
+++ b/src/contrib/libbpf/include/uapi/linux/btf.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2018 Facebook */
+#ifndef _UAPI__LINUX_BTF_H__
+#define _UAPI__LINUX_BTF_H__
+
+#include <linux/types.h>
+
+#define BTF_MAGIC 0xeB9F
+#define BTF_VERSION 1
+
+struct btf_header {
+ __u16 magic;
+ __u8 version;
+ __u8 flags;
+ __u32 hdr_len;
+
+ /* All offsets are in bytes relative to the end of this header */
+ __u32 type_off; /* offset of type section */
+ __u32 type_len; /* length of type section */
+ __u32 str_off; /* offset of string section */
+ __u32 str_len; /* length of string section */
+};
+
+/* Max # of type identifier */
+#define BTF_MAX_TYPE 0x0000ffff
+/* Max offset into the string section */
+#define BTF_MAX_NAME_OFFSET 0x0000ffff
+/* Max # of struct/union/enum members or func args */
+#define BTF_MAX_VLEN 0xffff
+
+struct btf_type {
+ __u32 name_off;
+ /* "info" bits arrangement
+ * bits 0-15: vlen (e.g. # of struct's members)
+ * bits 16-23: unused
+ * bits 24-27: kind (e.g. int, ptr, array...etc)
+ * bits 28-30: unused
+ * bit 31: kind_flag, currently used by
+ * struct, union and fwd
+ */
+ __u32 info;
+ /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC.
+ * "size" tells the size of the type it is describing.
+ *
+ * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+ * FUNC, FUNC_PROTO and VAR.
+ * "type" is a type_id referring to another type.
+ */
+ union {
+ __u32 size;
+ __u32 type;
+ };
+};
+
+#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
+#define BTF_INFO_VLEN(info) ((info) & 0xffff)
+#define BTF_INFO_KFLAG(info) ((info) >> 31)
+
+#define BTF_KIND_UNKN 0 /* Unknown */
+#define BTF_KIND_INT 1 /* Integer */
+#define BTF_KIND_PTR 2 /* Pointer */
+#define BTF_KIND_ARRAY 3 /* Array */
+#define BTF_KIND_STRUCT 4 /* Struct */
+#define BTF_KIND_UNION 5 /* Union */
+#define BTF_KIND_ENUM 6 /* Enumeration */
+#define BTF_KIND_FWD 7 /* Forward */
+#define BTF_KIND_TYPEDEF 8 /* Typedef */
+#define BTF_KIND_VOLATILE 9 /* Volatile */
+#define BTF_KIND_CONST 10 /* Const */
+#define BTF_KIND_RESTRICT 11 /* Restrict */
+#define BTF_KIND_FUNC 12 /* Function */
+#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
+#define BTF_KIND_VAR 14 /* Variable */
+#define BTF_KIND_DATASEC 15 /* Section */
+#define BTF_KIND_MAX BTF_KIND_DATASEC
+#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
+
+/* For some specific BTF_KIND, "struct btf_type" is immediately
+ * followed by extra data.
+ */
+
+/* BTF_KIND_INT is followed by a u32 and the following
+ * is the 32 bits arrangement:
+ */
+#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
+#define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff0000) >> 16)
+#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff)
+
+/* Attributes stored in the BTF_INT_ENCODING */
+#define BTF_INT_SIGNED (1 << 0)
+#define BTF_INT_CHAR (1 << 1)
+#define BTF_INT_BOOL (1 << 2)
+
+/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
+ * The exact number of btf_enum is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_enum {
+ __u32 name_off;
+ __s32 val;
+};
+
+/* BTF_KIND_ARRAY is followed by one "struct btf_array" */
+struct btf_array {
+ __u32 type;
+ __u32 index_type;
+ __u32 nelems;
+};
+
+/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed
+ * by multiple "struct btf_member". The exact number
+ * of btf_member is stored in the vlen (of the info in
+ * "struct btf_type").
+ */
+struct btf_member {
+ __u32 name_off;
+ __u32 type;
+ /* If the type info kind_flag is set, the btf_member offset
+ * contains both member bitfield size and bit offset. The
+ * bitfield size is set for bitfield members. If the type
+ * info kind_flag is not set, the offset contains only bit
+ * offset.
+ */
+ __u32 offset;
+};
+
+/* If the struct/union type info kind_flag is set, the
+ * following two macros are used to access bitfield_size
+ * and bit_offset from btf_member.offset.
+ */
+#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24)
+#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff)
+
+/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param".
+ * The exact number of btf_param is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_param {
+ __u32 name_off;
+ __u32 type;
+};
+
+enum {
+ BTF_VAR_STATIC = 0,
+ BTF_VAR_GLOBAL_ALLOCATED,
+};
+
+/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe
+ * additional information related to the variable such as its linkage.
+ */
+struct btf_var {
+ __u32 linkage;
+};
+
+/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo"
+ * to describe all BTF_KIND_VAR types it contains along with it's
+ * in-section offset as well as size.
+ */
+struct btf_var_secinfo {
+ __u32 type;
+ __u32 offset;
+ __u32 size;
+};
+
+#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/src/contrib/libbpf/include/uapi/linux/if_link.h b/src/contrib/libbpf/include/uapi/linux/if_link.h
new file mode 100644
index 0000000..8aec876
--- /dev/null
+++ b/src/contrib/libbpf/include/uapi/linux/if_link.h
@@ -0,0 +1,1033 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_IF_LINK_H
+#define _UAPI_LINUX_IF_LINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+/* This struct should be in sync with struct rtnl_link_stats64 */
+struct rtnl_link_stats {
+ __u32 rx_packets; /* total packets received */
+ __u32 tx_packets; /* total packets transmitted */
+ __u32 rx_bytes; /* total bytes received */
+ __u32 tx_bytes; /* total bytes transmitted */
+ __u32 rx_errors; /* bad packets received */
+ __u32 tx_errors; /* packet transmit problems */
+ __u32 rx_dropped; /* no space in linux buffers */
+ __u32 tx_dropped; /* no space available in linux */
+ __u32 multicast; /* multicast packets received */
+ __u32 collisions;
+
+ /* detailed rx_errors: */
+ __u32 rx_length_errors;
+ __u32 rx_over_errors; /* receiver ring buff overflow */
+ __u32 rx_crc_errors; /* recved pkt with crc error */
+ __u32 rx_frame_errors; /* recv'd frame alignment error */
+ __u32 rx_fifo_errors; /* recv'r fifo overrun */
+ __u32 rx_missed_errors; /* receiver missed packet */
+
+ /* detailed tx_errors */
+ __u32 tx_aborted_errors;
+ __u32 tx_carrier_errors;
+ __u32 tx_fifo_errors;
+ __u32 tx_heartbeat_errors;
+ __u32 tx_window_errors;
+
+ /* for cslip etc */
+ __u32 rx_compressed;
+ __u32 tx_compressed;
+
+ __u32 rx_nohandler; /* dropped, no handler found */
+};
+
+/* The main device statistics structure */
+struct rtnl_link_stats64 {
+ __u64 rx_packets; /* total packets received */
+ __u64 tx_packets; /* total packets transmitted */
+ __u64 rx_bytes; /* total bytes received */
+ __u64 tx_bytes; /* total bytes transmitted */
+ __u64 rx_errors; /* bad packets received */
+ __u64 tx_errors; /* packet transmit problems */
+ __u64 rx_dropped; /* no space in linux buffers */
+ __u64 tx_dropped; /* no space available in linux */
+ __u64 multicast; /* multicast packets received */
+ __u64 collisions;
+
+ /* detailed rx_errors: */
+ __u64 rx_length_errors;
+ __u64 rx_over_errors; /* receiver ring buff overflow */
+ __u64 rx_crc_errors; /* recved pkt with crc error */
+ __u64 rx_frame_errors; /* recv'd frame alignment error */
+ __u64 rx_fifo_errors; /* recv'r fifo overrun */
+ __u64 rx_missed_errors; /* receiver missed packet */
+
+ /* detailed tx_errors */
+ __u64 tx_aborted_errors;
+ __u64 tx_carrier_errors;
+ __u64 tx_fifo_errors;
+ __u64 tx_heartbeat_errors;
+ __u64 tx_window_errors;
+
+ /* for cslip etc */
+ __u64 rx_compressed;
+ __u64 tx_compressed;
+
+ __u64 rx_nohandler; /* dropped, no handler found */
+};
+
+/* The struct should be in sync with struct ifmap */
+struct rtnl_link_ifmap {
+ __u64 mem_start;
+ __u64 mem_end;
+ __u64 base_addr;
+ __u16 irq;
+ __u8 dma;
+ __u8 port;
+};
+
+/*
+ * IFLA_AF_SPEC
+ * Contains nested attributes for address family specific attributes.
+ * Each address family may create a attribute with the address family
+ * number as type and create its own attribute structure in it.
+ *
+ * Example:
+ * [IFLA_AF_SPEC] = {
+ * [AF_INET] = {
+ * [IFLA_INET_CONF] = ...,
+ * },
+ * [AF_INET6] = {
+ * [IFLA_INET6_FLAGS] = ...,
+ * [IFLA_INET6_CONF] = ...,
+ * }
+ * }
+ */
+
+enum {
+ IFLA_UNSPEC,
+ IFLA_ADDRESS,
+ IFLA_BROADCAST,
+ IFLA_IFNAME,
+ IFLA_MTU,
+ IFLA_LINK,
+ IFLA_QDISC,
+ IFLA_STATS,
+ IFLA_COST,
+#define IFLA_COST IFLA_COST
+ IFLA_PRIORITY,
+#define IFLA_PRIORITY IFLA_PRIORITY
+ IFLA_MASTER,
+#define IFLA_MASTER IFLA_MASTER
+ IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */
+#define IFLA_WIRELESS IFLA_WIRELESS
+ IFLA_PROTINFO, /* Protocol specific information for a link */
+#define IFLA_PROTINFO IFLA_PROTINFO
+ IFLA_TXQLEN,
+#define IFLA_TXQLEN IFLA_TXQLEN
+ IFLA_MAP,
+#define IFLA_MAP IFLA_MAP
+ IFLA_WEIGHT,
+#define IFLA_WEIGHT IFLA_WEIGHT
+ IFLA_OPERSTATE,
+ IFLA_LINKMODE,
+ IFLA_LINKINFO,
+#define IFLA_LINKINFO IFLA_LINKINFO
+ IFLA_NET_NS_PID,
+ IFLA_IFALIAS,
+ IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */
+ IFLA_VFINFO_LIST,
+ IFLA_STATS64,
+ IFLA_VF_PORTS,
+ IFLA_PORT_SELF,
+ IFLA_AF_SPEC,
+ IFLA_GROUP, /* Group the device belongs to */
+ IFLA_NET_NS_FD,
+ IFLA_EXT_MASK, /* Extended info mask, VFs, etc */
+ IFLA_PROMISCUITY, /* Promiscuity count: > 0 means acts PROMISC */
+#define IFLA_PROMISCUITY IFLA_PROMISCUITY
+ IFLA_NUM_TX_QUEUES,
+ IFLA_NUM_RX_QUEUES,
+ IFLA_CARRIER,
+ IFLA_PHYS_PORT_ID,
+ IFLA_CARRIER_CHANGES,
+ IFLA_PHYS_SWITCH_ID,
+ IFLA_LINK_NETNSID,
+ IFLA_PHYS_PORT_NAME,
+ IFLA_PROTO_DOWN,
+ IFLA_GSO_MAX_SEGS,
+ IFLA_GSO_MAX_SIZE,
+ IFLA_PAD,
+ IFLA_XDP,
+ IFLA_EVENT,
+ IFLA_NEW_NETNSID,
+ IFLA_IF_NETNSID,
+ IFLA_TARGET_NETNSID = IFLA_IF_NETNSID, /* new alias */
+ IFLA_CARRIER_UP_COUNT,
+ IFLA_CARRIER_DOWN_COUNT,
+ IFLA_NEW_IFINDEX,
+ IFLA_MIN_MTU,
+ IFLA_MAX_MTU,
+ IFLA_PROP_LIST,
+ IFLA_ALT_IFNAME, /* Alternative ifname */
+ __IFLA_MAX
+};
+
+
+#define IFLA_MAX (__IFLA_MAX - 1)
+
+/* backwards compatibility for userspace */
+#ifndef __KERNEL__
+#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg))))
+#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg))
+#endif
+
+enum {
+ IFLA_INET_UNSPEC,
+ IFLA_INET_CONF,
+ __IFLA_INET_MAX,
+};
+
+#define IFLA_INET_MAX (__IFLA_INET_MAX - 1)
+
+/* ifi_flags.
+
+ IFF_* flags.
+
+ The only change is:
+ IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are
+ more not changeable by user. They describe link media
+ characteristics and set by device driver.
+
+ Comments:
+ - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid
+ - If neither of these three flags are set;
+ the interface is NBMA.
+
+ - IFF_MULTICAST does not mean anything special:
+ multicasts can be used on all not-NBMA links.
+ IFF_MULTICAST means that this media uses special encapsulation
+ for multicast frames. Apparently, all IFF_POINTOPOINT and
+ IFF_BROADCAST devices are able to use multicasts too.
+ */
+
+/* IFLA_LINK.
+ For usual devices it is equal ifi_index.
+ If it is a "virtual interface" (f.e. tunnel), ifi_link
+ can point to real physical interface (f.e. for bandwidth calculations),
+ or maybe 0, what means, that real media is unknown (usual
+ for IPIP tunnels, when route to endpoint is allowed to change)
+ */
+
+/* Subtype attributes for IFLA_PROTINFO */
+enum {
+ IFLA_INET6_UNSPEC,
+ IFLA_INET6_FLAGS, /* link flags */
+ IFLA_INET6_CONF, /* sysctl parameters */
+ IFLA_INET6_STATS, /* statistics */
+ IFLA_INET6_MCAST, /* MC things. What of them? */
+ IFLA_INET6_CACHEINFO, /* time values and max reasm size */
+ IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */
+ IFLA_INET6_TOKEN, /* device token */
+ IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
+ __IFLA_INET6_MAX
+};
+
+#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1)
+
+enum in6_addr_gen_mode {
+ IN6_ADDR_GEN_MODE_EUI64,
+ IN6_ADDR_GEN_MODE_NONE,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
+ IN6_ADDR_GEN_MODE_RANDOM,
+};
+
+/* Bridge section */
+
+enum {
+ IFLA_BR_UNSPEC,
+ IFLA_BR_FORWARD_DELAY,
+ IFLA_BR_HELLO_TIME,
+ IFLA_BR_MAX_AGE,
+ IFLA_BR_AGEING_TIME,
+ IFLA_BR_STP_STATE,
+ IFLA_BR_PRIORITY,
+ IFLA_BR_VLAN_FILTERING,
+ IFLA_BR_VLAN_PROTOCOL,
+ IFLA_BR_GROUP_FWD_MASK,
+ IFLA_BR_ROOT_ID,
+ IFLA_BR_BRIDGE_ID,
+ IFLA_BR_ROOT_PORT,
+ IFLA_BR_ROOT_PATH_COST,
+ IFLA_BR_TOPOLOGY_CHANGE,
+ IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
+ IFLA_BR_HELLO_TIMER,
+ IFLA_BR_TCN_TIMER,
+ IFLA_BR_TOPOLOGY_CHANGE_TIMER,
+ IFLA_BR_GC_TIMER,
+ IFLA_BR_GROUP_ADDR,
+ IFLA_BR_FDB_FLUSH,
+ IFLA_BR_MCAST_ROUTER,
+ IFLA_BR_MCAST_SNOOPING,
+ IFLA_BR_MCAST_QUERY_USE_IFADDR,
+ IFLA_BR_MCAST_QUERIER,
+ IFLA_BR_MCAST_HASH_ELASTICITY,
+ IFLA_BR_MCAST_HASH_MAX,
+ IFLA_BR_MCAST_LAST_MEMBER_CNT,
+ IFLA_BR_MCAST_STARTUP_QUERY_CNT,
+ IFLA_BR_MCAST_LAST_MEMBER_INTVL,
+ IFLA_BR_MCAST_MEMBERSHIP_INTVL,
+ IFLA_BR_MCAST_QUERIER_INTVL,
+ IFLA_BR_MCAST_QUERY_INTVL,
+ IFLA_BR_MCAST_QUERY_RESPONSE_INTVL,
+ IFLA_BR_MCAST_STARTUP_QUERY_INTVL,
+ IFLA_BR_NF_CALL_IPTABLES,
+ IFLA_BR_NF_CALL_IP6TABLES,
+ IFLA_BR_NF_CALL_ARPTABLES,
+ IFLA_BR_VLAN_DEFAULT_PVID,
+ IFLA_BR_PAD,
+ IFLA_BR_VLAN_STATS_ENABLED,
+ IFLA_BR_MCAST_STATS_ENABLED,
+ IFLA_BR_MCAST_IGMP_VERSION,
+ IFLA_BR_MCAST_MLD_VERSION,
+ IFLA_BR_VLAN_STATS_PER_PORT,
+ IFLA_BR_MULTI_BOOLOPT,
+ __IFLA_BR_MAX,
+};
+
+#define IFLA_BR_MAX (__IFLA_BR_MAX - 1)
+
+struct ifla_bridge_id {
+ __u8 prio[2];
+ __u8 addr[6]; /* ETH_ALEN */
+};
+
+enum {
+ BRIDGE_MODE_UNSPEC,
+ BRIDGE_MODE_HAIRPIN,
+};
+
+enum {
+ IFLA_BRPORT_UNSPEC,
+ IFLA_BRPORT_STATE, /* Spanning tree state */
+ IFLA_BRPORT_PRIORITY, /* " priority */
+ IFLA_BRPORT_COST, /* " cost */
+ IFLA_BRPORT_MODE, /* mode (hairpin) */
+ IFLA_BRPORT_GUARD, /* bpdu guard */
+ IFLA_BRPORT_PROTECT, /* root port protection */
+ IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */
+ IFLA_BRPORT_LEARNING, /* mac learning */
+ IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */
+ IFLA_BRPORT_PROXYARP, /* proxy ARP */
+ IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
+ IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */
+ IFLA_BRPORT_ROOT_ID, /* designated root */
+ IFLA_BRPORT_BRIDGE_ID, /* designated bridge */
+ IFLA_BRPORT_DESIGNATED_PORT,
+ IFLA_BRPORT_DESIGNATED_COST,
+ IFLA_BRPORT_ID,
+ IFLA_BRPORT_NO,
+ IFLA_BRPORT_TOPOLOGY_CHANGE_ACK,
+ IFLA_BRPORT_CONFIG_PENDING,
+ IFLA_BRPORT_MESSAGE_AGE_TIMER,
+ IFLA_BRPORT_FORWARD_DELAY_TIMER,
+ IFLA_BRPORT_HOLD_TIMER,
+ IFLA_BRPORT_FLUSH,
+ IFLA_BRPORT_MULTICAST_ROUTER,
+ IFLA_BRPORT_PAD,
+ IFLA_BRPORT_MCAST_FLOOD,
+ IFLA_BRPORT_MCAST_TO_UCAST,
+ IFLA_BRPORT_VLAN_TUNNEL,
+ IFLA_BRPORT_BCAST_FLOOD,
+ IFLA_BRPORT_GROUP_FWD_MASK,
+ IFLA_BRPORT_NEIGH_SUPPRESS,
+ IFLA_BRPORT_ISOLATED,
+ IFLA_BRPORT_BACKUP_PORT,
+ __IFLA_BRPORT_MAX
+};
+#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
+
+struct ifla_cacheinfo {
+ __u32 max_reasm_len;
+ __u32 tstamp; /* ipv6InterfaceTable updated timestamp */
+ __u32 reachable_time;
+ __u32 retrans_time;
+};
+
+enum {
+ IFLA_INFO_UNSPEC,
+ IFLA_INFO_KIND,
+ IFLA_INFO_DATA,
+ IFLA_INFO_XSTATS,
+ IFLA_INFO_SLAVE_KIND,
+ IFLA_INFO_SLAVE_DATA,
+ __IFLA_INFO_MAX,
+};
+
+#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1)
+
+/* VLAN section */
+
+enum {
+ IFLA_VLAN_UNSPEC,
+ IFLA_VLAN_ID,
+ IFLA_VLAN_FLAGS,
+ IFLA_VLAN_EGRESS_QOS,
+ IFLA_VLAN_INGRESS_QOS,
+ IFLA_VLAN_PROTOCOL,
+ __IFLA_VLAN_MAX,
+};
+
+#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1)
+
+struct ifla_vlan_flags {
+ __u32 flags;
+ __u32 mask;
+};
+
+enum {
+ IFLA_VLAN_QOS_UNSPEC,
+ IFLA_VLAN_QOS_MAPPING,
+ __IFLA_VLAN_QOS_MAX
+};
+
+#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1)
+
+struct ifla_vlan_qos_mapping {
+ __u32 from;
+ __u32 to;
+};
+
+/* MACVLAN section */
+enum {
+ IFLA_MACVLAN_UNSPEC,
+ IFLA_MACVLAN_MODE,
+ IFLA_MACVLAN_FLAGS,
+ IFLA_MACVLAN_MACADDR_MODE,
+ IFLA_MACVLAN_MACADDR,
+ IFLA_MACVLAN_MACADDR_DATA,
+ IFLA_MACVLAN_MACADDR_COUNT,
+ __IFLA_MACVLAN_MAX,
+};
+
+#define IFLA_MACVLAN_MAX (__IFLA_MACVLAN_MAX - 1)
+
+enum macvlan_mode {
+ MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */
+ MACVLAN_MODE_VEPA = 2, /* talk to other ports through ext bridge */
+ MACVLAN_MODE_BRIDGE = 4, /* talk to bridge ports directly */
+ MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */
+ MACVLAN_MODE_SOURCE = 16,/* use source MAC address list to assign */
+};
+
+enum macvlan_macaddr_mode {
+ MACVLAN_MACADDR_ADD,
+ MACVLAN_MACADDR_DEL,
+ MACVLAN_MACADDR_FLUSH,
+ MACVLAN_MACADDR_SET,
+};
+
+#define MACVLAN_FLAG_NOPROMISC 1
+
+/* VRF section */
+enum {
+ IFLA_VRF_UNSPEC,
+ IFLA_VRF_TABLE,
+ __IFLA_VRF_MAX
+};
+
+#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1)
+
+enum {
+ IFLA_VRF_PORT_UNSPEC,
+ IFLA_VRF_PORT_TABLE,
+ __IFLA_VRF_PORT_MAX
+};
+
+#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1)
+
+/* MACSEC section */
+enum {
+ IFLA_MACSEC_UNSPEC,
+ IFLA_MACSEC_SCI,
+ IFLA_MACSEC_PORT,
+ IFLA_MACSEC_ICV_LEN,
+ IFLA_MACSEC_CIPHER_SUITE,
+ IFLA_MACSEC_WINDOW,
+ IFLA_MACSEC_ENCODING_SA,
+ IFLA_MACSEC_ENCRYPT,
+ IFLA_MACSEC_PROTECT,
+ IFLA_MACSEC_INC_SCI,
+ IFLA_MACSEC_ES,
+ IFLA_MACSEC_SCB,
+ IFLA_MACSEC_REPLAY_PROTECT,
+ IFLA_MACSEC_VALIDATION,
+ IFLA_MACSEC_PAD,
+ __IFLA_MACSEC_MAX,
+};
+
+#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
+
+/* XFRM section */
+enum {
+ IFLA_XFRM_UNSPEC,
+ IFLA_XFRM_LINK,
+ IFLA_XFRM_IF_ID,
+ __IFLA_XFRM_MAX
+};
+
+#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1)
+
+enum macsec_validation_type {
+ MACSEC_VALIDATE_DISABLED = 0,
+ MACSEC_VALIDATE_CHECK = 1,
+ MACSEC_VALIDATE_STRICT = 2,
+ __MACSEC_VALIDATE_END,
+ MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1,
+};
+
+/* IPVLAN section */
+enum {
+ IFLA_IPVLAN_UNSPEC,
+ IFLA_IPVLAN_MODE,
+ IFLA_IPVLAN_FLAGS,
+ __IFLA_IPVLAN_MAX
+};
+
+#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1)
+
+enum ipvlan_mode {
+ IPVLAN_MODE_L2 = 0,
+ IPVLAN_MODE_L3,
+ IPVLAN_MODE_L3S,
+ IPVLAN_MODE_MAX
+};
+
+#define IPVLAN_F_PRIVATE 0x01
+#define IPVLAN_F_VEPA 0x02
+
+/* VXLAN section */
+enum {
+ IFLA_VXLAN_UNSPEC,
+ IFLA_VXLAN_ID,
+ IFLA_VXLAN_GROUP, /* group or remote address */
+ IFLA_VXLAN_LINK,
+ IFLA_VXLAN_LOCAL,
+ IFLA_VXLAN_TTL,
+ IFLA_VXLAN_TOS,
+ IFLA_VXLAN_LEARNING,
+ IFLA_VXLAN_AGEING,
+ IFLA_VXLAN_LIMIT,
+ IFLA_VXLAN_PORT_RANGE, /* source port */
+ IFLA_VXLAN_PROXY,
+ IFLA_VXLAN_RSC,
+ IFLA_VXLAN_L2MISS,
+ IFLA_VXLAN_L3MISS,
+ IFLA_VXLAN_PORT, /* destination port */
+ IFLA_VXLAN_GROUP6,
+ IFLA_VXLAN_LOCAL6,
+ IFLA_VXLAN_UDP_CSUM,
+ IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+ IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+ IFLA_VXLAN_REMCSUM_TX,
+ IFLA_VXLAN_REMCSUM_RX,
+ IFLA_VXLAN_GBP,
+ IFLA_VXLAN_REMCSUM_NOPARTIAL,
+ IFLA_VXLAN_COLLECT_METADATA,
+ IFLA_VXLAN_LABEL,
+ IFLA_VXLAN_GPE,
+ IFLA_VXLAN_TTL_INHERIT,
+ IFLA_VXLAN_DF,
+ __IFLA_VXLAN_MAX
+};
+#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
+
+struct ifla_vxlan_port_range {
+ __be16 low;
+ __be16 high;
+};
+
+enum ifla_vxlan_df {
+ VXLAN_DF_UNSET = 0,
+ VXLAN_DF_SET,
+ VXLAN_DF_INHERIT,
+ __VXLAN_DF_END,
+ VXLAN_DF_MAX = __VXLAN_DF_END - 1,
+};
+
+/* GENEVE section */
+enum {
+ IFLA_GENEVE_UNSPEC,
+ IFLA_GENEVE_ID,
+ IFLA_GENEVE_REMOTE,
+ IFLA_GENEVE_TTL,
+ IFLA_GENEVE_TOS,
+ IFLA_GENEVE_PORT, /* destination port */
+ IFLA_GENEVE_COLLECT_METADATA,
+ IFLA_GENEVE_REMOTE6,
+ IFLA_GENEVE_UDP_CSUM,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
+ IFLA_GENEVE_LABEL,
+ IFLA_GENEVE_TTL_INHERIT,
+ IFLA_GENEVE_DF,
+ __IFLA_GENEVE_MAX
+};
+#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
+
+enum ifla_geneve_df {
+ GENEVE_DF_UNSET = 0,
+ GENEVE_DF_SET,
+ GENEVE_DF_INHERIT,
+ __GENEVE_DF_END,
+ GENEVE_DF_MAX = __GENEVE_DF_END - 1,
+};
+
+/* PPP section */
+enum {
+ IFLA_PPP_UNSPEC,
+ IFLA_PPP_DEV_FD,
+ __IFLA_PPP_MAX
+};
+#define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1)
+
+/* GTP section */
+
+enum ifla_gtp_role {
+ GTP_ROLE_GGSN = 0,
+ GTP_ROLE_SGSN,
+};
+
+enum {
+ IFLA_GTP_UNSPEC,
+ IFLA_GTP_FD0,
+ IFLA_GTP_FD1,
+ IFLA_GTP_PDP_HASHSIZE,
+ IFLA_GTP_ROLE,
+ __IFLA_GTP_MAX,
+};
+#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
+
+/* Bonding section */
+
+enum {
+ IFLA_BOND_UNSPEC,
+ IFLA_BOND_MODE,
+ IFLA_BOND_ACTIVE_SLAVE,
+ IFLA_BOND_MIIMON,
+ IFLA_BOND_UPDELAY,
+ IFLA_BOND_DOWNDELAY,
+ IFLA_BOND_USE_CARRIER,
+ IFLA_BOND_ARP_INTERVAL,
+ IFLA_BOND_ARP_IP_TARGET,
+ IFLA_BOND_ARP_VALIDATE,
+ IFLA_BOND_ARP_ALL_TARGETS,
+ IFLA_BOND_PRIMARY,
+ IFLA_BOND_PRIMARY_RESELECT,
+ IFLA_BOND_FAIL_OVER_MAC,
+ IFLA_BOND_XMIT_HASH_POLICY,
+ IFLA_BOND_RESEND_IGMP,
+ IFLA_BOND_NUM_PEER_NOTIF,
+ IFLA_BOND_ALL_SLAVES_ACTIVE,
+ IFLA_BOND_MIN_LINKS,
+ IFLA_BOND_LP_INTERVAL,
+ IFLA_BOND_PACKETS_PER_SLAVE,
+ IFLA_BOND_AD_LACP_RATE,
+ IFLA_BOND_AD_SELECT,
+ IFLA_BOND_AD_INFO,
+ IFLA_BOND_AD_ACTOR_SYS_PRIO,
+ IFLA_BOND_AD_USER_PORT_KEY,
+ IFLA_BOND_AD_ACTOR_SYSTEM,
+ IFLA_BOND_TLB_DYNAMIC_LB,
+ IFLA_BOND_PEER_NOTIF_DELAY,
+ __IFLA_BOND_MAX,
+};
+
+#define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1)
+
+enum {
+ IFLA_BOND_AD_INFO_UNSPEC,
+ IFLA_BOND_AD_INFO_AGGREGATOR,
+ IFLA_BOND_AD_INFO_NUM_PORTS,
+ IFLA_BOND_AD_INFO_ACTOR_KEY,
+ IFLA_BOND_AD_INFO_PARTNER_KEY,
+ IFLA_BOND_AD_INFO_PARTNER_MAC,
+ __IFLA_BOND_AD_INFO_MAX,
+};
+
+#define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1)
+
+enum {
+ IFLA_BOND_SLAVE_UNSPEC,
+ IFLA_BOND_SLAVE_STATE,
+ IFLA_BOND_SLAVE_MII_STATUS,
+ IFLA_BOND_SLAVE_LINK_FAILURE_COUNT,
+ IFLA_BOND_SLAVE_PERM_HWADDR,
+ IFLA_BOND_SLAVE_QUEUE_ID,
+ IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
+ IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
+ IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+ __IFLA_BOND_SLAVE_MAX,
+};
+
+#define IFLA_BOND_SLAVE_MAX (__IFLA_BOND_SLAVE_MAX - 1)
+
+/* SR-IOV virtual function management section */
+
+enum {
+ IFLA_VF_INFO_UNSPEC,
+ IFLA_VF_INFO,
+ __IFLA_VF_INFO_MAX,
+};
+
+#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1)
+
+enum {
+ IFLA_VF_UNSPEC,
+ IFLA_VF_MAC, /* Hardware queue specific attributes */
+ IFLA_VF_VLAN, /* VLAN ID and QoS */
+ IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */
+ IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */
+ IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */
+ IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */
+ IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query
+ * on/off switch
+ */
+ IFLA_VF_STATS, /* network device statistics */
+ IFLA_VF_TRUST, /* Trust VF */
+ IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */
+ IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */
+ IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */
+ IFLA_VF_BROADCAST, /* VF broadcast */
+ __IFLA_VF_MAX,
+};
+
+#define IFLA_VF_MAX (__IFLA_VF_MAX - 1)
+
+struct ifla_vf_mac {
+ __u32 vf;
+ __u8 mac[32]; /* MAX_ADDR_LEN */
+};
+
+struct ifla_vf_broadcast {
+ __u8 broadcast[32];
+};
+
+struct ifla_vf_vlan {
+ __u32 vf;
+ __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+ __u32 qos;
+};
+
+enum {
+ IFLA_VF_VLAN_INFO_UNSPEC,
+ IFLA_VF_VLAN_INFO, /* VLAN ID, QoS and VLAN protocol */
+ __IFLA_VF_VLAN_INFO_MAX,
+};
+
+#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1)
+#define MAX_VLAN_LIST_LEN 1
+
+struct ifla_vf_vlan_info {
+ __u32 vf;
+ __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+ __u32 qos;
+ __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */
+};
+
+struct ifla_vf_tx_rate {
+ __u32 vf;
+ __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
+};
+
+struct ifla_vf_rate {
+ __u32 vf;
+ __u32 min_tx_rate; /* Min Bandwidth in Mbps */
+ __u32 max_tx_rate; /* Max Bandwidth in Mbps */
+};
+
+struct ifla_vf_spoofchk {
+ __u32 vf;
+ __u32 setting;
+};
+
+struct ifla_vf_guid {
+ __u32 vf;
+ __u64 guid;
+};
+
+enum {
+ IFLA_VF_LINK_STATE_AUTO, /* link state of the uplink */
+ IFLA_VF_LINK_STATE_ENABLE, /* link always up */
+ IFLA_VF_LINK_STATE_DISABLE, /* link always down */
+ __IFLA_VF_LINK_STATE_MAX,
+};
+
+struct ifla_vf_link_state {
+ __u32 vf;
+ __u32 link_state;
+};
+
+struct ifla_vf_rss_query_en {
+ __u32 vf;
+ __u32 setting;
+};
+
+enum {
+ IFLA_VF_STATS_RX_PACKETS,
+ IFLA_VF_STATS_TX_PACKETS,
+ IFLA_VF_STATS_RX_BYTES,
+ IFLA_VF_STATS_TX_BYTES,
+ IFLA_VF_STATS_BROADCAST,
+ IFLA_VF_STATS_MULTICAST,
+ IFLA_VF_STATS_PAD,
+ IFLA_VF_STATS_RX_DROPPED,
+ IFLA_VF_STATS_TX_DROPPED,
+ __IFLA_VF_STATS_MAX,
+};
+
+#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1)
+
+struct ifla_vf_trust {
+ __u32 vf;
+ __u32 setting;
+};
+
+/* VF ports management section
+ *
+ * Nested layout of set/get msg is:
+ *
+ * [IFLA_NUM_VF]
+ * [IFLA_VF_PORTS]
+ * [IFLA_VF_PORT]
+ * [IFLA_PORT_*], ...
+ * [IFLA_VF_PORT]
+ * [IFLA_PORT_*], ...
+ * ...
+ * [IFLA_PORT_SELF]
+ * [IFLA_PORT_*], ...
+ */
+
+enum {
+ IFLA_VF_PORT_UNSPEC,
+ IFLA_VF_PORT, /* nest */
+ __IFLA_VF_PORT_MAX,
+};
+
+#define IFLA_VF_PORT_MAX (__IFLA_VF_PORT_MAX - 1)
+
+enum {
+ IFLA_PORT_UNSPEC,
+ IFLA_PORT_VF, /* __u32 */
+ IFLA_PORT_PROFILE, /* string */
+ IFLA_PORT_VSI_TYPE, /* 802.1Qbg (pre-)standard VDP */
+ IFLA_PORT_INSTANCE_UUID, /* binary UUID */
+ IFLA_PORT_HOST_UUID, /* binary UUID */
+ IFLA_PORT_REQUEST, /* __u8 */
+ IFLA_PORT_RESPONSE, /* __u16, output only */
+ __IFLA_PORT_MAX,
+};
+
+#define IFLA_PORT_MAX (__IFLA_PORT_MAX - 1)
+
+#define PORT_PROFILE_MAX 40
+#define PORT_UUID_MAX 16
+#define PORT_SELF_VF -1
+
+enum {
+ PORT_REQUEST_PREASSOCIATE = 0,
+ PORT_REQUEST_PREASSOCIATE_RR,
+ PORT_REQUEST_ASSOCIATE,
+ PORT_REQUEST_DISASSOCIATE,
+};
+
+enum {
+ PORT_VDP_RESPONSE_SUCCESS = 0,
+ PORT_VDP_RESPONSE_INVALID_FORMAT,
+ PORT_VDP_RESPONSE_INSUFFICIENT_RESOURCES,
+ PORT_VDP_RESPONSE_UNUSED_VTID,
+ PORT_VDP_RESPONSE_VTID_VIOLATION,
+ PORT_VDP_RESPONSE_VTID_VERSION_VIOALTION,
+ PORT_VDP_RESPONSE_OUT_OF_SYNC,
+ /* 0x08-0xFF reserved for future VDP use */
+ PORT_PROFILE_RESPONSE_SUCCESS = 0x100,
+ PORT_PROFILE_RESPONSE_INPROGRESS,
+ PORT_PROFILE_RESPONSE_INVALID,
+ PORT_PROFILE_RESPONSE_BADSTATE,
+ PORT_PROFILE_RESPONSE_INSUFFICIENT_RESOURCES,
+ PORT_PROFILE_RESPONSE_ERROR,
+};
+
+struct ifla_port_vsi {
+ __u8 vsi_mgr_id;
+ __u8 vsi_type_id[3];
+ __u8 vsi_type_version;
+ __u8 pad[3];
+};
+
+
+/* IPoIB section */
+
+enum {
+ IFLA_IPOIB_UNSPEC,
+ IFLA_IPOIB_PKEY,
+ IFLA_IPOIB_MODE,
+ IFLA_IPOIB_UMCAST,
+ __IFLA_IPOIB_MAX
+};
+
+enum {
+ IPOIB_MODE_DATAGRAM = 0, /* using unreliable datagram QPs */
+ IPOIB_MODE_CONNECTED = 1, /* using connected QPs */
+};
+
+#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
+
+
+/* HSR section */
+
+enum {
+ IFLA_HSR_UNSPEC,
+ IFLA_HSR_SLAVE1,
+ IFLA_HSR_SLAVE2,
+ IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */
+ IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */
+ IFLA_HSR_SEQ_NR,
+ IFLA_HSR_VERSION, /* HSR version */
+ __IFLA_HSR_MAX,
+};
+
+#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
+
+/* STATS section */
+
+struct if_stats_msg {
+ __u8 family;
+ __u8 pad1;
+ __u16 pad2;
+ __u32 ifindex;
+ __u32 filter_mask;
+};
+
+/* A stats attribute can be netdev specific or a global stat.
+ * For netdev stats, lets use the prefix IFLA_STATS_LINK_*
+ */
+enum {
+ IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */
+ IFLA_STATS_LINK_64,
+ IFLA_STATS_LINK_XSTATS,
+ IFLA_STATS_LINK_XSTATS_SLAVE,
+ IFLA_STATS_LINK_OFFLOAD_XSTATS,
+ IFLA_STATS_AF_SPEC,
+ __IFLA_STATS_MAX,
+};
+
+#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
+
+#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1))
+
+/* These are embedded into IFLA_STATS_LINK_XSTATS:
+ * [IFLA_STATS_LINK_XSTATS]
+ * -> [LINK_XSTATS_TYPE_xxx]
+ * -> [rtnl link type specific attributes]
+ */
+enum {
+ LINK_XSTATS_TYPE_UNSPEC,
+ LINK_XSTATS_TYPE_BRIDGE,
+ LINK_XSTATS_TYPE_BOND,
+ __LINK_XSTATS_TYPE_MAX
+};
+#define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)
+
+/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */
+enum {
+ IFLA_OFFLOAD_XSTATS_UNSPEC,
+ IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */
+ __IFLA_OFFLOAD_XSTATS_MAX
+};
+#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1)
+
+/* XDP section */
+
+#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0)
+#define XDP_FLAGS_SKB_MODE (1U << 1)
+#define XDP_FLAGS_DRV_MODE (1U << 2)
+#define XDP_FLAGS_HW_MODE (1U << 3)
+#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \
+ XDP_FLAGS_DRV_MODE | \
+ XDP_FLAGS_HW_MODE)
+#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \
+ XDP_FLAGS_MODES)
+
+/* These are stored into IFLA_XDP_ATTACHED on dump. */
+enum {
+ XDP_ATTACHED_NONE = 0,
+ XDP_ATTACHED_DRV,
+ XDP_ATTACHED_SKB,
+ XDP_ATTACHED_HW,
+ XDP_ATTACHED_MULTI,
+};
+
+enum {
+ IFLA_XDP_UNSPEC,
+ IFLA_XDP_FD,
+ IFLA_XDP_ATTACHED,
+ IFLA_XDP_FLAGS,
+ IFLA_XDP_PROG_ID,
+ IFLA_XDP_DRV_PROG_ID,
+ IFLA_XDP_SKB_PROG_ID,
+ IFLA_XDP_HW_PROG_ID,
+ __IFLA_XDP_MAX,
+};
+
+#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1)
+
+enum {
+ IFLA_EVENT_NONE,
+ IFLA_EVENT_REBOOT, /* internal reset / reboot */
+ IFLA_EVENT_FEATURES, /* change in offload features */
+ IFLA_EVENT_BONDING_FAILOVER, /* change in active slave */
+ IFLA_EVENT_NOTIFY_PEERS, /* re-sent grat. arp/ndisc */
+ IFLA_EVENT_IGMP_RESEND, /* re-sent IGMP JOIN */
+ IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */
+};
+
+/* tun section */
+
+enum {
+ IFLA_TUN_UNSPEC,
+ IFLA_TUN_OWNER,
+ IFLA_TUN_GROUP,
+ IFLA_TUN_TYPE,
+ IFLA_TUN_PI,
+ IFLA_TUN_VNET_HDR,
+ IFLA_TUN_PERSIST,
+ IFLA_TUN_MULTI_QUEUE,
+ IFLA_TUN_NUM_QUEUES,
+ IFLA_TUN_NUM_DISABLED_QUEUES,
+ __IFLA_TUN_MAX,
+};
+
+#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
+
+/* rmnet section */
+
+#define RMNET_FLAGS_INGRESS_DEAGGREGATION (1U << 0)
+#define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3)
+
+enum {
+ IFLA_RMNET_UNSPEC,
+ IFLA_RMNET_MUX_ID,
+ IFLA_RMNET_FLAGS,
+ __IFLA_RMNET_MAX,
+};
+
+#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1)
+
+struct ifla_rmnet_flags {
+ __u32 flags;
+ __u32 mask;
+};
+
+#endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/src/contrib/libbpf/include/uapi/linux/if_xdp.h b/src/contrib/libbpf/include/uapi/linux/if_xdp.h
new file mode 100644
index 0000000..be328c5
--- /dev/null
+++ b/src/contrib/libbpf/include/uapi/linux/if_xdp.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * if_xdp: XDP socket user-space interface
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * Author(s): Björn Töpel <bjorn.topel@intel.com>
+ * Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#ifndef _LINUX_IF_XDP_H
+#define _LINUX_IF_XDP_H
+
+#include <linux/types.h>
+
+/* Options for the sxdp_flags field */
+#define XDP_SHARED_UMEM (1 << 0)
+#define XDP_COPY (1 << 1) /* Force copy-mode */
+#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */
+/* If this option is set, the driver might go sleep and in that case
+ * the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be
+ * set. If it is set, the application need to explicitly wake up the
+ * driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are
+ * running the driver and the application on the same core, you should
+ * use this option so that the kernel will yield to the user space
+ * application.
+ */
+#define XDP_USE_NEED_WAKEUP (1 << 3)
+
+/* Flags for xsk_umem_config flags */
+#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
+
+struct sockaddr_xdp {
+ __u16 sxdp_family;
+ __u16 sxdp_flags;
+ __u32 sxdp_ifindex;
+ __u32 sxdp_queue_id;
+ __u32 sxdp_shared_umem_fd;
+};
+
+/* XDP_RING flags */
+#define XDP_RING_NEED_WAKEUP (1 << 0)
+
+struct xdp_ring_offset {
+ __u64 producer;
+ __u64 consumer;
+ __u64 desc;
+ __u64 flags;
+};
+
+struct xdp_mmap_offsets {
+ struct xdp_ring_offset rx;
+ struct xdp_ring_offset tx;
+ struct xdp_ring_offset fr; /* Fill */
+ struct xdp_ring_offset cr; /* Completion */
+};
+
+/* XDP socket options */
+#define XDP_MMAP_OFFSETS 1
+#define XDP_RX_RING 2
+#define XDP_TX_RING 3
+#define XDP_UMEM_REG 4
+#define XDP_UMEM_FILL_RING 5
+#define XDP_UMEM_COMPLETION_RING 6
+#define XDP_STATISTICS 7
+#define XDP_OPTIONS 8
+
+struct xdp_umem_reg {
+ __u64 addr; /* Start of packet data area */
+ __u64 len; /* Length of packet data area */
+ __u32 chunk_size;
+ __u32 headroom;
+ __u32 flags;
+};
+
+struct xdp_statistics {
+ __u64 rx_dropped; /* Dropped for reasons other than invalid desc */
+ __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
+ __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
+};
+
+struct xdp_options {
+ __u32 flags;
+};
+
+/* Flags for the flags field of struct xdp_options */
+#define XDP_OPTIONS_ZEROCOPY (1 << 0)
+
+/* Pgoff for mmaping the rings */
+#define XDP_PGOFF_RX_RING 0
+#define XDP_PGOFF_TX_RING 0x80000000
+#define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL
+#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL
+
+/* Masks for unaligned chunks mode */
+#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48
+#define XSK_UNALIGNED_BUF_ADDR_MASK \
+ ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
+
+/* Rx/Tx descriptor */
+struct xdp_desc {
+ __u64 addr;
+ __u32 len;
+ __u32 options;
+};
+
+/* UMEM descriptor is __u64 */
+
+#endif /* _LINUX_IF_XDP_H */
diff --git a/src/contrib/libbpf/include/uapi/linux/netlink.h b/src/contrib/libbpf/include/uapi/linux/netlink.h
new file mode 100644
index 0000000..0a4d733
--- /dev/null
+++ b/src/contrib/libbpf/include/uapi/linux/netlink.h
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_NETLINK_H
+#define _UAPI__LINUX_NETLINK_H
+
+#include <linux/kernel.h>
+#include <linux/socket.h> /* for __kernel_sa_family_t */
+#include <linux/types.h>
+
+#define NETLINK_ROUTE 0 /* Routing/device hook */
+#define NETLINK_UNUSED 1 /* Unused number */
+#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */
+#define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */
+#define NETLINK_SOCK_DIAG 4 /* socket monitoring */
+#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */
+#define NETLINK_XFRM 6 /* ipsec */
+#define NETLINK_SELINUX 7 /* SELinux event notifications */
+#define NETLINK_ISCSI 8 /* Open-iSCSI */
+#define NETLINK_AUDIT 9 /* auditing */
+#define NETLINK_FIB_LOOKUP 10
+#define NETLINK_CONNECTOR 11
+#define NETLINK_NETFILTER 12 /* netfilter subsystem */
+#define NETLINK_IP6_FW 13
+#define NETLINK_DNRTMSG 14 /* DECnet routing messages */
+#define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */
+#define NETLINK_GENERIC 16
+/* leave room for NETLINK_DM (DM Events) */
+#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */
+#define NETLINK_ECRYPTFS 19
+#define NETLINK_RDMA 20
+#define NETLINK_CRYPTO 21 /* Crypto layer */
+#define NETLINK_SMC 22 /* SMC monitoring */
+
+#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG
+
+#define MAX_LINKS 32
+
+struct sockaddr_nl {
+ __kernel_sa_family_t nl_family; /* AF_NETLINK */
+ unsigned short nl_pad; /* zero */
+ __u32 nl_pid; /* port ID */
+ __u32 nl_groups; /* multicast groups mask */
+};
+
+struct nlmsghdr {
+ __u32 nlmsg_len; /* Length of message including header */
+ __u16 nlmsg_type; /* Message content */
+ __u16 nlmsg_flags; /* Additional flags */
+ __u32 nlmsg_seq; /* Sequence number */
+ __u32 nlmsg_pid; /* Sending process port ID */
+};
+
+/* Flags values */
+
+#define NLM_F_REQUEST 0x01 /* It is request message. */
+#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */
+#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */
+#define NLM_F_ECHO 0x08 /* Echo this request */
+#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */
+#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */
+
+/* Modifiers to GET request */
+#define NLM_F_ROOT 0x100 /* specify tree root */
+#define NLM_F_MATCH 0x200 /* return all matching */
+#define NLM_F_ATOMIC 0x400 /* atomic GET */
+#define NLM_F_DUMP (NLM_F_ROOT|NLM_F_MATCH)
+
+/* Modifiers to NEW request */
+#define NLM_F_REPLACE 0x100 /* Override existing */
+#define NLM_F_EXCL 0x200 /* Do not touch, if it exists */
+#define NLM_F_CREATE 0x400 /* Create, if it does not exist */
+#define NLM_F_APPEND 0x800 /* Add to end of list */
+
+/* Modifiers to DELETE request */
+#define NLM_F_NONREC 0x100 /* Do not delete recursively */
+
+/* Flags for ACK message */
+#define NLM_F_CAPPED 0x100 /* request was capped */
+#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */
+
+/*
+ 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL
+ 4.4BSD CHANGE NLM_F_REPLACE
+
+ True CHANGE NLM_F_CREATE|NLM_F_REPLACE
+ Append NLM_F_CREATE
+ Check NLM_F_EXCL
+ */
+
+#define NLMSG_ALIGNTO 4U
+#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
+#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
+#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN)
+#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
+#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))
+#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \
+ (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len)))
+#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \
+ (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \
+ (nlh)->nlmsg_len <= (len))
+#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len)))
+
+#define NLMSG_NOOP 0x1 /* Nothing. */
+#define NLMSG_ERROR 0x2 /* Error */
+#define NLMSG_DONE 0x3 /* End of a dump */
+#define NLMSG_OVERRUN 0x4 /* Data lost */
+
+#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */
+
+struct nlmsgerr {
+ int error;
+ struct nlmsghdr msg;
+ /*
+ * followed by the message contents unless NETLINK_CAP_ACK was set
+ * or the ACK indicates success (error == 0)
+ * message length is aligned with NLMSG_ALIGN()
+ */
+ /*
+ * followed by TLVs defined in enum nlmsgerr_attrs
+ * if NETLINK_EXT_ACK was set
+ */
+};
+
+/**
+ * enum nlmsgerr_attrs - nlmsgerr attributes
+ * @NLMSGERR_ATTR_UNUSED: unused
+ * @NLMSGERR_ATTR_MSG: error message string (string)
+ * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original
+ * message, counting from the beginning of the header (u32)
+ * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to
+ * be used - in the success case - to identify a created
+ * object or operation or similar (binary)
+ * @__NLMSGERR_ATTR_MAX: number of attributes
+ * @NLMSGERR_ATTR_MAX: highest attribute number
+ */
+enum nlmsgerr_attrs {
+ NLMSGERR_ATTR_UNUSED,
+ NLMSGERR_ATTR_MSG,
+ NLMSGERR_ATTR_OFFS,
+ NLMSGERR_ATTR_COOKIE,
+
+ __NLMSGERR_ATTR_MAX,
+ NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1
+};
+
+#define NETLINK_ADD_MEMBERSHIP 1
+#define NETLINK_DROP_MEMBERSHIP 2
+#define NETLINK_PKTINFO 3
+#define NETLINK_BROADCAST_ERROR 4
+#define NETLINK_NO_ENOBUFS 5
+#ifndef __KERNEL__
+#define NETLINK_RX_RING 6
+#define NETLINK_TX_RING 7
+#endif
+#define NETLINK_LISTEN_ALL_NSID 8
+#define NETLINK_LIST_MEMBERSHIPS 9
+#define NETLINK_CAP_ACK 10
+#define NETLINK_EXT_ACK 11
+#define NETLINK_GET_STRICT_CHK 12
+
+struct nl_pktinfo {
+ __u32 group;
+};
+
+struct nl_mmap_req {
+ unsigned int nm_block_size;
+ unsigned int nm_block_nr;
+ unsigned int nm_frame_size;
+ unsigned int nm_frame_nr;
+};
+
+struct nl_mmap_hdr {
+ unsigned int nm_status;
+ unsigned int nm_len;
+ __u32 nm_group;
+ /* credentials */
+ __u32 nm_pid;
+ __u32 nm_uid;
+ __u32 nm_gid;
+};
+
+#ifndef __KERNEL__
+enum nl_mmap_status {
+ NL_MMAP_STATUS_UNUSED,
+ NL_MMAP_STATUS_RESERVED,
+ NL_MMAP_STATUS_VALID,
+ NL_MMAP_STATUS_COPY,
+ NL_MMAP_STATUS_SKIP,
+};
+
+#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO
+#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT)
+#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr))
+#endif
+
+#define NET_MAJOR 36 /* Major 36 is reserved for networking */
+
+enum {
+ NETLINK_UNCONNECTED = 0,
+ NETLINK_CONNECTED,
+};
+
+/*
+ * <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)-->
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ * | Header | Pad | Payload | Pad |
+ * | (struct nlattr) | ing | | ing |
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ * <-------------- nlattr->nla_len -------------->
+ */
+
+struct nlattr {
+ __u16 nla_len;
+ __u16 nla_type;
+};
+
+/*
+ * nla_type (16 bits)
+ * +---+---+-------------------------------+
+ * | N | O | Attribute Type |
+ * +---+---+-------------------------------+
+ * N := Carries nested attributes
+ * O := Payload stored in network byte order
+ *
+ * Note: The N and O flag are mutually exclusive.
+ */
+#define NLA_F_NESTED (1 << 15)
+#define NLA_F_NET_BYTEORDER (1 << 14)
+#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER)
+
+#define NLA_ALIGNTO 4
+#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
+#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr)))
+
+/* Generic 32 bitflags attribute content sent to the kernel.
+ *
+ * The value is a bitmap that defines the values being set
+ * The selector is a bitmask that defines which value is legit
+ *
+ * Examples:
+ * value = 0x0, and selector = 0x1
+ * implies we are selecting bit 1 and we want to set its value to 0.
+ *
+ * value = 0x2, and selector = 0x2
+ * implies we are selecting bit 2 and we want to set its value to 1.
+ *
+ */
+struct nla_bitfield32 {
+ __u32 value;
+ __u32 selector;
+};
+
+#endif /* _UAPI__LINUX_NETLINK_H */