summaryrefslogtreecommitdiffstats
path: root/lib/libxdp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libxdp')
-rw-r--r--lib/libxdp/.gitignore5
-rw-r--r--lib/libxdp/Makefile167
-rw-r--r--lib/libxdp/README.org437
-rw-r--r--lib/libxdp/bpf_instr.h126
-rw-r--r--lib/libxdp/compat.h13
-rw-r--r--lib/libxdp/libxdp.3503
-rw-r--r--lib/libxdp/libxdp.c3408
-rw-r--r--lib/libxdp/libxdp.map78
-rw-r--r--lib/libxdp/libxdp.mk3
-rw-r--r--lib/libxdp/libxdp.pc.template12
-rw-r--r--lib/libxdp/libxdp_internal.h146
-rw-r--r--lib/libxdp/protocol.org473
-rw-r--r--lib/libxdp/tests/.gitignore4
-rw-r--r--lib/libxdp/tests/Makefile80
-rw-r--r--lib/libxdp/tests/check_kern_compat.c10
-rw-r--r--lib/libxdp/tests/test-libxdp.sh99
-rw-r--r--lib/libxdp/tests/test_dispatcher_versions.c300
-rwxr-xr-xlib/libxdp/tests/test_runner.sh118
-rw-r--r--lib/libxdp/tests/test_utils.h49
-rw-r--r--lib/libxdp/tests/test_xdp_frags.c339
-rw-r--r--lib/libxdp/tests/test_xsk_refcnt.c304
-rw-r--r--lib/libxdp/tests/xdp_dispatcher_v1.c43
-rw-r--r--lib/libxdp/tests/xdp_dispatcher_v1.h16
-rw-r--r--lib/libxdp/tests/xdp_pass.c11
-rw-r--r--lib/libxdp/xdp-dispatcher.c.in82
-rw-r--r--lib/libxdp/xsk.c1299
-rw-r--r--lib/libxdp/xsk_def_xdp_prog.c44
-rw-r--r--lib/libxdp/xsk_def_xdp_prog.h9
-rw-r--r--lib/libxdp/xsk_def_xdp_prog_5.3.c49
29 files changed, 8227 insertions, 0 deletions
diff --git a/lib/libxdp/.gitignore b/lib/libxdp/.gitignore
new file mode 100644
index 0000000..c5a9951
--- /dev/null
+++ b/lib/libxdp/.gitignore
@@ -0,0 +1,5 @@
+*.so.*
+*.a
+*.pc
+sharedobjs/
+staticobjs/
diff --git a/lib/libxdp/Makefile b/lib/libxdp/Makefile
new file mode 100644
index 0000000..431932a
--- /dev/null
+++ b/lib/libxdp/Makefile
@@ -0,0 +1,167 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+
+LIB_DIR = ..
+
+include libxdp.mk
+include $(LIB_DIR)/defines.mk
+
+OBJDIR ?= .
+SHARED_OBJDIR := $(OBJDIR)/sharedobjs
+STATIC_OBJDIR := $(OBJDIR)/staticobjs
+OBJS := libxdp.o xsk.o
+XDP_OBJS := xdp-dispatcher.o xsk_def_xdp_prog.o xsk_def_xdp_prog_5.3.o
+EMBEDDED_XDP_OBJS := $(addsuffix .embed.o,$(basename $(XDP_OBJS)))
+SHARED_OBJS := $(addprefix $(SHARED_OBJDIR)/,$(OBJS))
+STATIC_OBJS := $(addprefix $(STATIC_OBJDIR)/,$(OBJS)) $(EMBEDDED_XDP_OBJS)
+STATIC_LIBS := $(OBJDIR)/libxdp.a
+MAN_PAGE := libxdp.3
+MAN_OBJ := ${MAN_PAGE:.3=.man}
+MAN_FILES := $(MAN_PAGE)
+TEST_DIR := tests
+
+SHARED_CFLAGS += -fPIC -DSHARED
+LIB_HEADERS := $(wildcard $(HEADER_DIR)/xdp/*.h)
+BPF_HEADERS := $(wildcard $(HEADER_DIR)/bpf/*.h) $(wildcard $(HEADER_DIR)/xdp/*.h)
+EXTRA_LIB_DEPS := $(OBJECT_LIBBPF) $(LIBMK) $(LIB_OBJS) $(LIB_HEADERS) compat.h libxdp_internal.h xsk_def_xdp_prog.h bpf_instr.h
+PC_FILE := $(OBJDIR)/libxdp.pc
+TEMPLATED_SOURCES := xdp-dispatcher.c
+
+CFLAGS += -I$(HEADER_DIR)
+BPF_CFLAGS += -I$(HEADER_DIR)
+
+
+ifndef BUILD_STATIC_ONLY
+SHARED_LIBS := $(OBJDIR)/libxdp.so \
+ $(OBJDIR)/libxdp.so.$(LIBXDP_MAJOR_VERSION) \
+ $(OBJDIR)/libxdp.so.$(LIBXDP_VERSION)
+VERSION_SCRIPT := libxdp.map
+CHECK_RULES := check_abi
+endif
+
+all: $(STATIC_LIBS) $(SHARED_LIBS) $(XDP_OBJS) $(PC_FILE) check man
+
+clean:
+ $(Q)rm -f $(STATIC_LIBS) $(STATIC_OBJS) $(SHARED_LIBS) $(SHARED_OBJS) $(XDP_OBJS) $(PC_FILE) $(MAN_OBJ) $(TEMPLATED_SOURCES)
+ $(Q)for d in $(SHARED_OBJDIR) $(STATIC_OBJDIR); do \
+ [ -d "$$d" ] && rmdir "$$d"; done || true
+ $(Q)$(MAKE) -C $(TEST_DIR) clean
+
+install: all
+ $(Q)install -d -m 0755 $(DESTDIR)$(HDRDIR)
+ $(Q)install -d -m 0755 $(DESTDIR)$(LIBDIR)
+ $(Q)install -d -m 0755 $(DESTDIR)$(LIBDIR)/pkgconfig
+ $(Q)install -d -m 0755 $(DESTDIR)$(BPF_OBJECT_DIR)
+ $(Q)install -m 0644 $(LIB_HEADERS) $(DESTDIR)$(HDRDIR)/
+ $(Q)install -m 0644 $(PC_FILE) $(DESTDIR)$(LIBDIR)/pkgconfig/
+ $(Q)cp -fpR $(SHARED_LIBS) $(STATIC_LIBS) $(DESTDIR)$(LIBDIR)
+ $(Q)install -m 0755 $(XDP_OBJS) $(DESTDIR)$(BPF_OBJECT_DIR)
+ $(if $(MAN_FILES),$(Q)install -m 0755 -d $(DESTDIR)$(MANDIR)/man3)
+ $(if $(MAN_FILES),$(Q)install -m 0644 $(MAN_FILES) $(DESTDIR)$(MANDIR)/man3)
+
+
+$(OBJDIR)/libxdp.a: $(STATIC_OBJS)
+ $(QUIET_LINK)$(AR) rcs $@ $^
+
+$(OBJDIR)/libxdp.so: $(OBJDIR)/libxdp.so.$(LIBXDP_MAJOR_VERSION)
+ $(Q)ln -sf $(^F) $@
+
+$(OBJDIR)/libxdp.so.$(LIBXDP_MAJOR_VERSION): $(OBJDIR)/libxdp.so.$(LIBXDP_VERSION)
+ $(Q)ln -sf $(^F) $@
+
+$(OBJDIR)/libxdp.so.$(LIBXDP_VERSION): $(SHARED_OBJS)
+ $(QUIET_LINK)$(CC) -shared -Wl,-soname,libxdp.so.$(LIBXDP_MAJOR_VERSION) \
+ -Wl,--version-script=$(VERSION_SCRIPT) \
+ $^ $(LDFLAGS) $(LDLIBS) -o $@
+
+$(OBJDIR)/libxdp.pc:
+ $(Q)sed -e "s|@PREFIX@|$(PREFIX)|" \
+ -e "s|@LIBDIR@|$(LIBDIR)|" \
+ -e "s|@VERSION@|$(TOOLS_VERSION)|" \
+ < libxdp.pc.template > $@
+
+$(STATIC_OBJDIR):
+ $(Q)mkdir -p $(STATIC_OBJDIR)
+
+$(SHARED_OBJDIR):
+ $(Q)mkdir -p $(SHARED_OBJDIR)
+
+$(STATIC_OBJDIR)/%.o: %.c $(EXTRA_LIB_DEPS) | $(STATIC_OBJDIR)
+ $(QUIET_CC)$(CC) $(CFLAGS) $(CPPFLAGS) -D LIBXDP_STATIC=1 -Wall -I../../headers -c $< -o $@
+
+$(SHARED_OBJDIR)/%.o: %.c $(EXTRA_LIB_DEPS) | $(SHARED_OBJDIR)
+ $(QUIET_CC)$(CC) $(CFLAGS) $(CPPFLAGS) $(SHARED_CFLAGS) -Wall -I../../headers -c $< -o $@
+
+XDP_IN_SHARED := $(SHARED_OBJDIR)/libxdp.o $(SHARED_OBJDIR)/xsk.o
+
+GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(XDP_IN_SHARED) | \
+ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \
+ sed 's/\[.*\]//' | \
+ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
+ sort -u | wc -l)
+VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OBJDIR)/libxdp.so | \
+ grep -Eo '[^ ]+@LIBXDP_' | cut -d@ -f1 | sort -u | wc -l)
+
+check: $(CHECK_RULES)
+
+check_abi: $(OBJDIR)/libxdp.so
+ @if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \
+ echo "Warning: Num of global symbols in $(XDP_IN_SHARED)" \
+ "($(GLOBAL_SYM_COUNT)) does NOT match with num of" \
+ "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \
+ "Please make sure all symbols are" \
+ "versioned in $(VERSION_SCRIPT)." >&2; \
+ readelf -s --wide $(XDP_IN_SHARED) | \
+ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \
+ sed 's/\[.*\]//' | \
+ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
+ sort -u > $(OUTPUT)libxdp_global_syms.tmp; \
+ readelf --dyn-syms --wide $(OUTPUT)libxdp.so | \
+ grep -Eo '[^ ]+@LIBXDP_' | cut -d@ -f1 | \
+ sort -u > $(OUTPUT)libxdp_versioned_syms.tmp; \
+ diff -u $(OUTPUT)libxdp_global_syms.tmp \
+ $(OUTPUT)libxdp_versioned_syms.tmp; \
+ rm $(OUTPUT)libxdp_global_syms.tmp \
+ $(OUTPUT)libxdp_versioned_syms.tmp; \
+ exit 1; \
+ fi
+
+
+$(TEMPLATED_SOURCES): %.c: %.c.in Makefile
+ $(QUIET_M4)$(M4) $(DEFINES) $< > $@ || ( ret=$$?; rm -f $@; exit $$ret )
+
+$(EMBEDDED_XDP_OBJS): %.embed.o: %.o
+ $(QUIET_GEN)$(LD) -r -b binary -o $@ -z noexecstack --format=binary $<
+ $(Q)$(OBJCOPY) --rename-section .data=.rodata,alloc,load,readonly,data,contents $@
+
+$(XDP_OBJS): %.o: %.c $(BPF_HEADERS) $(LIBMK)
+ $(QUIET_CLANG)$(CLANG) -S \
+ -target $(BPF_TARGET) \
+ -D __BPF_TRACING__ \
+ $(BPF_CFLAGS) \
+ -Wall \
+ -Wno-unused-value \
+ -Wno-pointer-sign \
+ -Wno-compare-distinct-pointer-types \
+ -Werror \
+ -O2 -emit-llvm -c -g -o ${@:.o=.ll} $<
+ $(QUIET_LLC)$(LLC) -march=$(BPF_TARGET) -filetype=obj -o $@ ${@:.o=.ll}
+
+.PHONY: man
+ifeq ($(EMACS),)
+man: ;
+else
+man: $(MAN_PAGE)
+$(MAN_OBJ): README.org $(LIBMK)
+ $(Q)$(EMACS) -Q --batch --find-file $< --eval "(progn (require 'ox-man)(org-man-export-to-man))"
+ $(Q)touch -r $< $@
+
+$(MAN_PAGE): $(MAN_OBJ) $(LIBMK)
+ $(QUIET_GEN)MODDATE=$$(git log -1 --pretty="format:%cI" README.org 2>/dev/null); \
+ [ "$$?" -eq "0" ] && DATE=$$(date '+%B %_d, %Y' -d "$$MODDATE") || DATE=$$(date '+%B %_d, %Y'); \
+ sed -e "1 s/DATE/$$DATE/" -e "1 s/VERSION/v$(TOOLS_VERSION)/" -e '1,5 s/^.SH "\([^"]\+\) - \([^"]\+\)"/.SH "NAME"\n\1 \\- \2\n.SH "SYNOPSIS"/' $< > $@
+
+endif
+
+.PHONY: test
+test: all
+ $(Q)$(MAKE) -C $(TEST_DIR) run
diff --git a/lib/libxdp/README.org b/lib/libxdp/README.org
new file mode 100644
index 0000000..9ca7f2e
--- /dev/null
+++ b/lib/libxdp/README.org
@@ -0,0 +1,437 @@
+#+EXPORT_FILE_NAME: libxdp
+#+TITLE: libxdp
+#+OPTIONS: ^:nil
+#+MAN_CLASS_OPTIONS: :section-id "3\" \"DATE\" \"VERSION\" \"libxdp - library for loading XDP programs"
+# This file serves both as a README on github, and as the source for the man
+# page; the latter through the org-mode man page export support.
+# .
+# To export the man page, simply use the org-mode exporter; (require 'ox-man) if
+# it's not available. There's also a Makefile rule to export it.
+
+* libxdp - library for attaching XDP programs and using AF_XDP sockets
+
+This directory contains the files for the =libxdp= library for
+attaching XDP programs to network interfaces and using AF_XDP
+sockets. The library is fairly lightweight and relies on =libbpf= to
+do the heavy lifting for processing eBPF object files etc.
+
+=Libxdp= provides two primary features on top of =libbpf=. The first is
+the ability to load multiple XDP programs in sequence on a single
+network device (which is not natively supported by the kernel). This
+support relies on the =freplace= functionality in the kernel, which
+makes it possible to attach an eBPF program as a replacement for a
+global function in another (already loaded) eBPF program. The second
+main feature is helper functions for configuring AF_XDP sockets as
+well as reading and writing packets from these sockets.
+
+Some of the functionality provided by libxdp depends on particular kernel
+features; see the "Kernel feature compatibility" section below for details.
+
+** Using libxdp from an application
+
+Basic usage of libxdp from an application is quite straight forward. The
+following example loads, then unloads, an XDP program from the 'lo' interface:
+
+#+begin_src C
+#define IFINDEX 1
+
+struct xdp_program *prog;
+int err;
+
+prog = xdp_program__open_file("my-program.o", "section_name", NULL);
+err = xdp_program__attach(prog, IFINDEX, XDP_MODE_NATIVE, 0);
+
+if (!err)
+ xdp_program__detach(prog, IFINDEX, XDP_MODE_NATIVE, 0);
+
+xdp_program__close(prog);
+#+end_src
+
+The =xdp_program= structure is an opaque structure that represents a single XDP
+program. =libxdp= contains functions to create such a struct either from a BPF
+object file on disk, from a =libbpf= BPF object, or from an identifier of a
+program that is already loaded into the kernel:
+
+#+begin_src C
+struct xdp_program *xdp_program__from_bpf_obj(struct bpf_object *obj,
+ const char *section_name);
+struct xdp_program *xdp_program__find_file(const char *filename,
+ const char *section_name,
+ struct bpf_object_open_opts *opts);
+struct xdp_program *xdp_program__open_file(const char *filename,
+ const char *section_name,
+ struct bpf_object_open_opts *opts);
+struct xdp_program *xdp_program__from_fd(int fd);
+struct xdp_program *xdp_program__from_id(__u32 prog_id);
+struct xdp_program *xdp_program__from_pin(const char *pin_path);
+#+end_src
+
+The functions that open a BPF object or file need the function name of the XDP
+program as well as the file name or object, since an ELF file can contain
+multiple XDP programs. The =xdp_program__find_file()= function takes a filename
+without a path, and will look for the object in =LIBXDP_OBJECT_PATH= which
+defaults to =/usr/lib/bpf= (or =/usr/lib64/bpf= on systems using a split library
+path). This is convenient for applications shipping pre-compiled eBPF object
+files.
+
+The =xdp_program__attach()= function will attach the program to an interface,
+building a dispatcher program to execute it. Multiple programs can be attached
+at once with =xdp_program__attach_multi()=; they will be sorted in order of
+their run priority, and execution from one program to the next will proceed
+based on the chain call actions defined for each program (see the *Program
+metadata* section below). Because the loading process involves modifying the
+attach type of the program, the attach functions only work with =struct
+xdp_program= objects that have not yet been loaded into the kernel.
+
+When using the attach functions to attach to an interface that already has an
+XDP program loaded, libxdp will attempt to add the program to the list of loaded
+programs. However, this may fail, either due to missing kernel support, or
+because the already-attached program was not loaded using a dispatcher
+compatible with libxdp. If the kernel support for incremental attach (merged in
+kernel 5.10) is missing, the only way to actually run multiple programs on a
+single interface is to attach them all at the same time with
+=xdp_program__attach_multi()=. If the existing program is not an XDP dispatcher,
+that program will have to be detached from the interface before libxdp can
+attach a new one. This can be done by calling =xdp_program__detach()= with a
+reference to the loaded program; but note that this will of course break any
+application relying on that other XDP program to be present.
+
+* Program metadata
+
+To support multiple XDP programs on the same interface, libxdp uses two pieces
+of metadata for each XDP program: Run priority and chain call actions.
+
+*** Run priority
+This is the priority of the program and is a simple integer used
+to sort programs when loading multiple programs onto the same interface.
+Programs that wish to run early (such as a packet filter) should set low values
+for this, while programs that want to run later (such as a packet forwarder or
+counter) should set higher values. Note that later programs are only run if the
+previous programs end with a return code that is part of its chain call actions
+(see below). If not specified, the default priority value is 50.
+
+*** Chain call actions
+These are the program return codes that the program indicate for packets that
+should continue processing. If the program returns one of these actions, later
+programs in the call chain will be run, whereas if it returns any other action,
+processing will be interrupted, and the XDP dispatcher will return the verdict
+immediately. If not set, this defaults to just XDP_PASS, which is likely the
+value most programs should use.
+
+*** Specifying metadata
+The metadata outlined above is specified as BTF information embedded in the ELF
+file containing the XDP program. The =xdp_helpers.h= file shipped with libxdp
+contains helper macros to include this information, which can be used as
+follows:
+
+#+begin_src C
+#include <bpf/bpf_helpers.h>
+#include <xdp/xdp_helpers.h>
+
+struct {
+ __uint(priority, 10);
+ __uint(XDP_PASS, 1);
+ __uint(XDP_DROP, 1);
+} XDP_RUN_CONFIG(my_xdp_func);
+#+end_src
+
+This example specifies that the XDP program in =my_xdp_func= should have
+priority 10 and that its chain call actions are =XDP_PASS= and =XDP_DROP=.
+In a source file with multiple XDP programs in the same file, a definition like
+the above can be included for each program (main XDP function). Any program that
+does not specify any config information will use the default values outlined
+above.
+
+*** Inspecting and modifying metadata
+
+=libxdp= exposes the following functions that an application can use to inspect
+and modify the metadata on an XDP program. Modification is only possible before
+a program is attached on an interface. These functions won't modify the BTF
+information itself, but the new values will be stored as part of the program
+attachment.
+
+#+begin_src C
+unsigned int xdp_program__run_prio(const struct xdp_program *xdp_prog);
+int xdp_program__set_run_prio(struct xdp_program *xdp_prog,
+ unsigned int run_prio);
+bool xdp_program__chain_call_enabled(const struct xdp_program *xdp_prog,
+ enum xdp_action action);
+int xdp_program__set_chain_call_enabled(struct xdp_program *prog,
+ unsigned int action,
+ bool enabled);
+int xdp_program__print_chain_call_actions(const struct xdp_program *prog,
+ char *buf,
+ size_t buf_len);
+#+end_src
+
+* The dispatcher program
+To support multiple non-offloaded programs on the same network interface,
+=libxdp= uses a *dispatcher program* which is a small wrapper program that will
+call each component program in turn, expect the return code, and then chain call
+to the next program based on the chain call actions of the previous program (see
+the *Program metadata* section above).
+
+While applications using =libxdp= do not need to know the details of the
+dispatcher program to just load an XDP program unto an interface, =libxdp= does
+expose the dispatcher and its attached component programs, which can be used to
+list the programs currently attached to an interface.
+
+The structure used for this is =struct xdp_multiprog=, which can only be
+constructed from the programs loaded on an interface based on ifindex. The API
+for getting a multiprog reference and iterating through the attached programs
+looks like this:
+
+#+begin_src C
+struct xdp_multiprog *xdp_multiprog__get_from_ifindex(int ifindex);
+struct xdp_program *xdp_multiprog__next_prog(const struct xdp_program *prog,
+ const struct xdp_multiprog *mp);
+void xdp_multiprog__close(struct xdp_multiprog *mp);
+int xdp_multiprog__detach(struct xdp_multiprog *mp, int ifindex);
+enum xdp_attach_mode xdp_multiprog__attach_mode(const struct xdp_multiprog *mp);
+struct xdp_program *xdp_multiprog__main_prog(const struct xdp_multiprog *mp);
+struct xdp_program *xdp_multiprog__hw_prog(const struct xdp_multiprog *mp);
+bool xdp_multiprog__is_legacy(const struct xdp_multiprog *mp);
+#+end_src
+
+If a non-offloaded program is attached to the interface which =libxdp= doesn't
+recognise as a dispatcher program, an =xdp_multiprog= structure will still be
+returned, and =xdp_multiprog__is_legacy()= will return true for that program
+(note that this also holds true if only an offloaded program is loaded). A
+reference to that (regular) XDP program can be obtained by
+=xdp_multiprog__main_prog()=. If the program attached to the interface *is* a
+dispatcher program, =xdp_multiprog__main_prog()= will return a reference to the
+dispatcher program itself, which is mainly useful for obtaining other data about
+that program (such as the program ID). A reference to an offloaded program can
+be acquired using =xdp_multiprog_hw_prog()=. Function
+=xdp_multiprog__attach_mode()= returns the attach mode of the non-offloaded
+program, whether an offloaded program is attached should be checked through
+=xdp_multiprog_hw_prog()=.
+
+** Pinning in bpffs
+The kernel will automatically detach component programs from the dispatcher once
+the last reference to them disappears. To prevent this from happening, =libxdp=
+will pin the component program references in =bpffs= before attaching the
+dispatcher to the network interface. The pathnames generated for pinning is as
+follows:
+
+- /sys/fs/bpf/xdp/dispatch-IFINDEX-DID - dispatcher program for IFINDEX with BPF program ID DID
+- /sys/fs/bpf/xdp/dispatch-IFINDEX-DID/prog0-prog - component program 0, program reference
+- /sys/fs/bpf/xdp/dispatch-IFINDEX-DID/prog0-link - component program 0, bpf_link reference
+- /sys/fs/bpf/xdp/dispatch-IFINDEX-DID/prog1-prog - component program 1, program reference
+- /sys/fs/bpf/xdp/dispatch-IFINDEX-DID/prog1-link - component program 1, bpf_link reference
+- etc, up to ten component programs
+
+If set, the =LIBXDP_BPFFS= environment variable will override the location of
+=bpffs=, but the =xdp= subdirectory is always used. If no =bpffs= is mounted,
+libxdp will consult the environment variable =LIBXDP_BPFFS_AUTOMOUNT=. If this
+is set to =1=, libxdp will attempt to automount a bpffs. If not, libxdp will
+fall back to loading a single program without a dispatcher, as if the kernel did
+not support the features needed for multiprog attachment.
+
+* Using AF_XDP sockets
+
+Libxdp implements helper functions for configuring AF_XDP sockets as
+well as reading and writing packets from these sockets. AF_XDP sockets
+can be used to redirect packets to user-space at high rates from an
+XDP program. Note that this functionality used to reside in libbpf,
+but has now been moved over to libxdp as it is a better fit for this
+library. As of the 1.0 release of libbpf, the AF_XDP socket support
+will be removed and all future development will be performed
+in libxdp instead.
+
+For an overview of AF_XDP sockets, please refer to this Linux Plumbers
+paper
+(http://vger.kernel.org/lpc_net2018_talks/lpc18_pres_af_xdp_perf-v3.pdf)
+and the documentation in the Linux kernel
+(Documentation/networking/af_xdp.rst or
+https://www.kernel.org/doc/html/latest/networking/af_xdp.html).
+
+For an example on how to use the interface, take a look at the AF_XDP-example
+and AF_XDP-forwarding programs in the bpf-examples repository:
+https://github.com/xdp-project/bpf-examples.
+
+** Control path
+
+Libxdp provides helper functions for creating and destroying umems and
+sockets as shown below. The first thing that a user generally wants to
+do is to create a umem area. This is the area that will contain all
+packets received and the ones that are going to be sent. After that,
+AF_XDP sockets can be created tied to this umem. These can either be
+sockets that have exclusive ownership of that umem through
+xsk_socket__create() or shared with other sockets using
+xsk_socket__create_shared. There is one option called
+XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD that can be set in the
+libxdp_flags field (also called libbpf_flags for compatibility
+reasons). This will make libxdp not load any XDP program or set and
+BPF maps which is a must if users want to add their own XDP program.
+
+#+begin_src C
+int xsk_umem__create(struct xsk_umem **umem,
+ void *umem_area, __u64 size,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *config);
+int xsk_socket__create(struct xsk_socket **xsk,
+ const char *ifname, __u32 queue_id,
+ struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *config);
+int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+ const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_socket_config *config);
+int xsk_umem__delete(struct xsk_umem *umem);
+void xsk_socket__delete(struct xsk_socket *xsk);
+#+end_src
+
+There are also two helper function to get the file descriptor of a
+umem or a socket. These are needed when using standard Linux syscalls
+such as poll(), recvmsg(), sendto(), etc.
+
+#+begin_src C
+int xsk_umem__fd(const struct xsk_umem *umem);
+int xsk_socket__fd(const struct xsk_socket *xsk);
+#+end_src
+
+The control path also provides two APIs for setting up AF_XDP sockets when the
+process that is going to use the AF_XDP socket is non-privileged. These two
+functions perform the operations that require privileges and can be executed
+from some form of control process that has the necessary privileges. The
+xsk_socket__create executed on the non-privileged process will then skip these
+two steps. For an example on how to use these, please take a look at the
+AF_XDP-example program in the bpf-examples repository:
+https://github.com/xdp-project/bpf-examples/tree/master/AF_XDP-example.
+
+#+begin_src C
+int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd);
+int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd);
+#+end_src
+
+** Data path
+
+For performance reasons, all the data path functions are static inline
+functions found in the xsk.h header file so they can be optimized into
+the target application binary for best possible performance. There are
+four FIFO rings of two main types: producer rings (fill and Tx) and
+consumer rings (Rx and completion). The producer rings use
+xsk_ring_prod functions and consumer rings use xsk_ring_cons
+functions. For producer rings, you start with =reserving= one or more
+slots in a producer ring and then when they have been filled out, you
+=submit= them so that the kernel will act on them. For a consumer
+ring, you =peek= if there are any new packets in the ring and if so
+you can read them from the ring. Once you are done reading them, you
+=release= them back to the kernel so it can use them for new
+packets. There is also a =cancel= operation for consumer rings if the
+application does not want to consume all packets received with the
+peek operation.
+
+#+begin_src C
+__u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx);
+void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb);
+__u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx);
+void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb);
+void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb);
+#+end_src
+
+The functions below are used for reading and writing the descriptors
+of the rings. xsk_ring_prod__fill_addr() and xsk_ring_prod__tx_desc()
+*writes* entries in the fill and Tx rings respectively, while
+xsk_ring_cons__comp_addr and xsk_ring_cons__rx_desc *reads* entries from
+the completion and Rx rings respectively. The =idx= is the parameter
+returned in the xsk_ring_prod__reserve or xsk_ring_cons__peek
+calls. To advance to the next entry, simply do =idx++=.
+
+#+begin_src C
+__u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill, __u32 idx);
+struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx, __u32 idx);
+const __u64 *xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx);
+const struct xdp_desc *xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx);
+#+end_src
+
+The xsk_umem functions are used to get a pointer to the packet data
+itself, always located inside the umem. In the default aligned mode,
+you can get the addr variable straight from the Rx descriptor. But in
+unaligned mode, you need to use the three last function below as the
+offset used is carried in the upper 16 bits of the addr. Therefore,
+you cannot use the addr straight from the descriptor in the unaligned
+case.
+
+#+begin_src C
+void *xsk_umem__get_data(void *umem_area, __u64 addr);
+__u64 xsk_umem__extract_addr(__u64 addr);
+__u64 xsk_umem__extract_offset(__u64 addr);
+__u64 xsk_umem__add_offset_to_addr(__u64 addr);
+#+end_src
+
+There is one more function in the data path and that checks if the
+need_wakeup flag is set. Use of this flag is highly encouraged and
+should be enabled by setting =XDP_USE_NEED_WAKEUP= bit in the
+=xdp_bind_flags= field that is provided to the
+xsk_socket_create_[shared]() calls. If this function returns true,
+then you need to call =recvmsg()=, =sendto()=, or =poll()= depending on the
+situation. =recvmsg()= if you are *receiving*, or =sendto()= if you are
+*sending*. =poll()= can be used for both cases and provide the ability to
+sleep too, as with any other socket. But note that poll is a slower
+operation than the other two.
+
+#+begin_src C
+int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r);
+#+end_src
+
+For an example on how to use all these APIs, take a look at the AF_XDP-example
+and AF_XDP-forwarding programs in the bpf-examples repository:
+https://github.com/xdp-project/bpf-examples.
+
+* Kernel and BPF program feature compatibility
+
+The features exposed by libxdp relies on certain kernel versions and BPF
+features to work. To get the full benefit of all features, libxdp needs to be
+used with kernel 5.10 or newer, unless the commits mentioned below have been
+backported. However, libxdp will probe the kernel and transparently fall back to
+legacy loading procedures, so it is possible to use the library with older
+versions, although some features will be unavailable, as detailed below.
+
+The ability to attach multiple BPF programs to a single interface relies on the
+kernel "BPF program extension" feature which was introduced by commit
+be8704ff07d2 ("bpf: Introduce dynamic program extensions") in the upstream
+kernel and first appeared in kernel release 5.6. To *incrementally* attach
+multiple programs, a further refinement added by commit 4a1e7c0c63e0 ("bpf:
+Support attaching freplace programs to multiple attach points") is needed; this
+first appeared in the upstream kernel version 5.10. The functionality relies on
+the "BPF trampolines" feature which is unfortunately only available on the
+x86_64 architecture. In other words, kernels before 5.6 can only attach a single
+XDP program to each interface, kernels 5.6+ can attach multiple programs if they
+are all attached at the same time, and kernels 5.10 have full support for XDP
+multiprog on x86_64. On other architectures, only a single program can be
+attached to each interface.
+
+To load AF_XDP programs, kernel support for AF_XDP sockets needs to be included
+and enabled in the kernel build. In addition, when using AF_XDP sockets, an XDP
+program is also loaded on the interface. The XDP program used for this by libxdp
+requires the ability to do map lookups into XSK maps, which was introduced with
+commit fada7fdc83c0 ("bpf: Allow bpf_map_lookup_elem() on an xskmap") in kernel
+5.3. This means that the minimum required kernel version for using AF_XDP is
+kernel 5.3; however, for the AF_XDP XDP program to co-exist with other programs,
+the same constraints for multiprog applies as outlined above.
+
+Note that some Linux distributions backport features to earlier kernel versions,
+especially in enterprise kernels; for instance, Red Hat Enterprise Linux kernels
+include everything needed for libxdp to function since RHEL 8.5.
+
+Finally, XDP programs loaded using the multiprog facility must include type
+information (using the BPF Type Format, BTF). To get this, compile the programs
+with a recent version of Clang/LLVM (version 10+), and enable debug information
+when compiling (using the =-g= option).
+
+* BUGS
+Please report any bugs on Github: https://github.com/xdp-project/xdp-tools/issues
+
+* AUTHORS
+libxdp and this man page were written by Toke
+Høiland-Jørgensen. AF_XDP support and documentation was contributed by
+Magnus Karlsson.
diff --git a/lib/libxdp/bpf_instr.h b/lib/libxdp/bpf_instr.h
new file mode 100644
index 0000000..ff1a396
--- /dev/null
+++ b/lib/libxdp/bpf_instr.h
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __BPF_INSTR_H
+#define __BPF_INSTR_H
+
+#include <linux/bpf.h>
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = CODE, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = IMM })
+
+#define BPF_ALU64_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_MOV64_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_EMIT_CALL(FUNC) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_CALL, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((FUNC) - BPF_FUNC_unspec) })
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+#define BPF_MOV64_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_MOV32_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_LD_IMM64_RAW_FULL(DST, SRC, OFF1, OFF2, IMM1, IMM2) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF1, \
+ .imm = IMM1 }), \
+ ((struct bpf_insn) { \
+ .code = 0, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = OFF2, \
+ .imm = IMM2 })
+
+#define BPF_LD_MAP_FD(DST, MAP_FD) \
+ BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_FD, 0, 0, \
+ MAP_FD, 0)
+
+#define BPF_LD_MAP_VALUE(DST, MAP_FD, VALUE_OFF) \
+ BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_VALUE, 0, 0, \
+ MAP_FD, VALUE_OFF)
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+#endif
diff --git a/lib/libxdp/compat.h b/lib/libxdp/compat.h
new file mode 100644
index 0000000..6e9bc34
--- /dev/null
+++ b/lib/libxdp/compat.h
@@ -0,0 +1,13 @@
+#ifndef __COMPAT_H
+#define __COMPAT_H
+
+#ifndef HAVE_SECURE_GETENV
+#include <stdlib.h>
+// Source: https://www.openwall.com/lists/musl/2019/05/28/3
+static inline char *secure_getenv(const char *name)
+{
+ return libc.secure ? NULL : getenv(name);
+}
+#endif
+
+#endif
diff --git a/lib/libxdp/libxdp.3 b/lib/libxdp/libxdp.3
new file mode 100644
index 0000000..800d021
--- /dev/null
+++ b/lib/libxdp/libxdp.3
@@ -0,0 +1,503 @@
+.TH "libxdp" "3" "November 17, 2022" "v1.3.1" "libxdp - library for loading XDP programs"
+
+.SH "NAME"
+libxdp \- library for attaching XDP programs and using AF_XDP sockets
+.SH "SYNOPSIS"
+.PP
+This directory contains the files for the \fIlibxdp\fP library for
+attaching XDP programs to network interfaces and using AF_XDP
+sockets. The library is fairly lightweight and relies on \fIlibbpf\fP to
+do the heavy lifting for processing eBPF object files etc.
+
+.PP
+\fILibxdp\fP provides two primary features on top of \fIlibbpf\fP. The first is
+the ability to load multiple XDP programs in sequence on a single
+network device (which is not natively supported by the kernel). This
+support relies on the \fIfreplace\fP functionality in the kernel, which
+makes it possible to attach an eBPF program as a replacement for a
+global function in another (already loaded) eBPF program. The second
+main feature is helper functions for configuring AF_XDP sockets as
+well as reading and writing packets from these sockets.
+
+.PP
+Some of the functionality provided by libxdp depends on particular kernel
+features; see the "Kernel feature compatibility" section below for details.
+
+.SS "Using libxdp from an application"
+.PP
+Basic usage of libxdp from an application is quite straight forward. The
+following example loads, then unloads, an XDP program from the 'lo' interface:
+
+.RS
+.nf
+\fC#define IFINDEX 1
+
+struct xdp_program *prog;
+int err;
+
+prog = xdp_program__open_file("my-program.o", "section_name", NULL);
+err = xdp_program__attach(prog, IFINDEX, XDP_MODE_NATIVE, 0);
+
+if (!err)
+ xdp_program__detach(prog, IFINDEX, XDP_MODE_NATIVE, 0);
+
+xdp_program__close(prog);
+\fP
+.fi
+.RE
+
+.PP
+The \fIxdp_program\fP structure is an opaque structure that represents a single XDP
+program. \fIlibxdp\fP contains functions to create such a struct either from a BPF
+object file on disk, from a \fIlibbpf\fP BPF object, or from an identifier of a
+program that is already loaded into the kernel:
+
+.RS
+.nf
+\fCstruct xdp_program *xdp_program__from_bpf_obj(struct bpf_object *obj,
+ const char *section_name);
+struct xdp_program *xdp_program__find_file(const char *filename,
+ const char *section_name,
+ struct bpf_object_open_opts *opts);
+struct xdp_program *xdp_program__open_file(const char *filename,
+ const char *section_name,
+ struct bpf_object_open_opts *opts);
+struct xdp_program *xdp_program__from_fd(int fd);
+struct xdp_program *xdp_program__from_id(__u32 prog_id);
+struct xdp_program *xdp_program__from_pin(const char *pin_path);
+\fP
+.fi
+.RE
+
+.PP
+The functions that open a BPF object or file need the function name of the XDP
+program as well as the file name or object, since an ELF file can contain
+multiple XDP programs. The \fIxdp_program__find_file()\fP function takes a filename
+without a path, and will look for the object in \fILIBXDP_OBJECT_PATH\fP which
+defaults to \fI/usr/lib/bpf\fP (or \fI/usr/lib64/bpf\fP on systems using a split library
+path). This is convenient for applications shipping pre-compiled eBPF object
+files.
+
+.PP
+The \fIxdp_program__attach()\fP function will attach the program to an interface,
+building a dispatcher program to execute it. Multiple programs can be attached
+at once with \fIxdp_program__attach_multi()\fP; they will be sorted in order of
+their run priority, and execution from one program to the next will proceed
+based on the chain call actions defined for each program (see the \fBProgram
+metadata\fP section below). Because the loading process involves modifying the
+attach type of the program, the attach functions only work with \fIstruct
+xdp_program\fP objects that have not yet been loaded into the kernel.
+
+.PP
+When using the attach functions to attach to an interface that already has an
+XDP program loaded, libxdp will attempt to add the program to the list of loaded
+programs. However, this may fail, either due to missing kernel support, or
+because the already-attached program was not loaded using a dispatcher
+compatible with libxdp. If the kernel support for incremental attach (merged in
+kernel 5.10) is missing, the only way to actually run multiple programs on a
+single interface is to attach them all at the same time with
+\fIxdp_program__attach_multi()\fP. If the existing program is not an XDP dispatcher,
+that program will have to be detached from the interface before libxdp can
+attach a new one. This can be done by calling \fIxdp_program__detach()\fP with a
+reference to the loaded program; but note that this will of course break any
+application relying on that other XDP program to be present.
+
+.SH "Program metadata"
+.PP
+To support multiple XDP programs on the same interface, libxdp uses two pieces
+of metadata for each XDP program: Run priority and chain call actions.
+
+.SS "Run priority"
+.PP
+This is the priority of the program and is a simple integer used
+to sort programs when loading multiple programs onto the same interface.
+Programs that wish to run early (such as a packet filter) should set low values
+for this, while programs that want to run later (such as a packet forwarder or
+counter) should set higher values. Note that later programs are only run if the
+previous programs end with a return code that is part of its chain call actions
+(see below). If not specified, the default priority value is 50.
+
+.SS "Chain call actions"
+.PP
+These are the program return codes that the program indicate for packets that
+should continue processing. If the program returns one of these actions, later
+programs in the call chain will be run, whereas if it returns any other action,
+processing will be interrupted, and the XDP dispatcher will return the verdict
+immediately. If not set, this defaults to just XDP_PASS, which is likely the
+value most programs should use.
+
+.SS "Specifying metadata"
+.PP
+The metadata outlined above is specified as BTF information embedded in the ELF
+file containing the XDP program. The \fIxdp_helpers.h\fP file shipped with libxdp
+contains helper macros to include this information, which can be used as
+follows:
+
+.RS
+.nf
+\fC#include <bpf/bpf_helpers.h>
+#include <xdp/xdp_helpers.h>
+
+struct {
+ __uint(priority, 10);
+ __uint(XDP_PASS, 1);
+ __uint(XDP_DROP, 1);
+} XDP_RUN_CONFIG(my_xdp_func);
+\fP
+.fi
+.RE
+
+.PP
+This example specifies that the XDP program in \fImy_xdp_func\fP should have
+priority 10 and that its chain call actions are \fIXDP_PASS\fP and \fIXDP_DROP\fP.
+In a source file with multiple XDP programs in the same file, a definition like
+the above can be included for each program (main XDP function). Any program that
+does not specify any config information will use the default values outlined
+above.
+
+.SS "Inspecting and modifying metadata"
+.PP
+\fIlibxdp\fP exposes the following functions that an application can use to inspect
+and modify the metadata on an XDP program. Modification is only possible before
+a program is attached on an interface. These functions won't modify the BTF
+information itself, but the new values will be stored as part of the program
+attachment.
+
+.RS
+.nf
+\fCunsigned int xdp_program__run_prio(const struct xdp_program *xdp_prog);
+int xdp_program__set_run_prio(struct xdp_program *xdp_prog,
+ unsigned int run_prio);
+bool xdp_program__chain_call_enabled(const struct xdp_program *xdp_prog,
+ enum xdp_action action);
+int xdp_program__set_chain_call_enabled(struct xdp_program *prog,
+ unsigned int action,
+ bool enabled);
+int xdp_program__print_chain_call_actions(const struct xdp_program *prog,
+ char *buf,
+ size_t buf_len);
+\fP
+.fi
+.RE
+
+.SH "The dispatcher program"
+.PP
+To support multiple non-offloaded programs on the same network interface,
+\fIlibxdp\fP uses a \fBdispatcher program\fP which is a small wrapper program that will
+call each component program in turn, expect the return code, and then chain call
+to the next program based on the chain call actions of the previous program (see
+the \fBProgram metadata\fP section above).
+
+.PP
+While applications using \fIlibxdp\fP do not need to know the details of the
+dispatcher program to just load an XDP program unto an interface, \fIlibxdp\fP does
+expose the dispatcher and its attached component programs, which can be used to
+list the programs currently attached to an interface.
+
+.PP
+The structure used for this is \fIstruct xdp_multiprog\fP, which can only be
+constructed from the programs loaded on an interface based on ifindex. The API
+for getting a multiprog reference and iterating through the attached programs
+looks like this:
+
+.RS
+.nf
+\fCstruct xdp_multiprog *xdp_multiprog__get_from_ifindex(int ifindex);
+struct xdp_program *xdp_multiprog__next_prog(const struct xdp_program *prog,
+ const struct xdp_multiprog *mp);
+void xdp_multiprog__close(struct xdp_multiprog *mp);
+int xdp_multiprog__detach(struct xdp_multiprog *mp, int ifindex);
+enum xdp_attach_mode xdp_multiprog__attach_mode(const struct xdp_multiprog *mp);
+struct xdp_program *xdp_multiprog__main_prog(const struct xdp_multiprog *mp);
+struct xdp_program *xdp_multiprog__hw_prog(const struct xdp_multiprog *mp);
+bool xdp_multiprog__is_legacy(const struct xdp_multiprog *mp);
+\fP
+.fi
+.RE
+
+.PP
+If a non-offloaded program is attached to the interface which \fIlibxdp\fP doesn't
+recognise as a dispatcher program, an \fIxdp_multiprog\fP structure will still be
+returned, and \fIxdp_multiprog__is_legacy()\fP will return true for that program
+(note that this also holds true if only an offloaded program is loaded). A
+reference to that (regular) XDP program can be obtained by
+\fIxdp_multiprog__main_prog()\fP. If the program attached to the interface \fBis\fP a
+dispatcher program, \fIxdp_multiprog__main_prog()\fP will return a reference to the
+dispatcher program itself, which is mainly useful for obtaining other data about
+that program (such as the program ID). A reference to an offloaded program can
+be acquired using \fIxdp_multiprog_hw_prog()\fP. Function
+\fIxdp_multiprog__attach_mode()\fP returns the attach mode of the non-offloaded
+program, whether an offloaded program is attached should be checked through
+\fIxdp_multiprog_hw_prog()\fP.
+
+.SS "Pinning in bpffs"
+.PP
+The kernel will automatically detach component programs from the dispatcher once
+the last reference to them disappears. To prevent this from happening, \fIlibxdp\fP
+will pin the component program references in \fIbpffs\fP before attaching the
+dispatcher to the network interface. The pathnames generated for pinning is as
+follows:
+
+.IP \(em 4
+/sys/fs/bpf/xdp/dispatch-IFINDEX-DID - dispatcher program for IFINDEX with BPF program ID DID
+.IP \(em 4
+/sys/fs/bpf/xdp/dispatch-IFINDEX-DID/prog0-prog - component program 0, program reference
+.IP \(em 4
+/sys/fs/bpf/xdp/dispatch-IFINDEX-DID/prog0-link - component program 0, bpf_link reference
+.IP \(em 4
+/sys/fs/bpf/xdp/dispatch-IFINDEX-DID/prog1-prog - component program 1, program reference
+.IP \(em 4
+/sys/fs/bpf/xdp/dispatch-IFINDEX-DID/prog1-link - component program 1, bpf_link reference
+.IP \(em 4
+etc, up to ten component programs
+
+.PP
+If set, the \fILIBXDP_BPFFS\fP environment variable will override the location of
+\fIbpffs\fP, but the \fIxdp\fP subdirectory is always used. If no \fIbpffs\fP is mounted,
+libxdp will consult the environment variable \fILIBXDP_BPFFS_AUTOMOUNT\fP. If this
+is set to \fI1\fP, libxdp will attempt to automount a bpffs. If not, libxdp will
+fall back to loading a single program without a dispatcher, as if the kernel did
+not support the features needed for multiprog attachment.
+
+.SH "Using AF_XDP sockets"
+.PP
+Libxdp implements helper functions for configuring AF_XDP sockets as
+well as reading and writing packets from these sockets. AF_XDP sockets
+can be used to redirect packets to user-space at high rates from an
+XDP program. Note that this functionality used to reside in libbpf,
+but has now been moved over to libxdp as it is a better fit for this
+library. As of the 1.0 release of libbpf, the AF_XDP socket support
+will be removed and all future development will be performed
+in libxdp instead.
+
+.PP
+For an overview of AF_XDP sockets, please refer to this Linux Plumbers
+paper
+(\fIhttp://vger.kernel.org/lpc_net2018_talks/lpc18_pres_af_xdp_perf-v3.pdf\fP)
+and the documentation in the Linux kernel
+(Documentation/networking/af_xdp.rst or
+\fIhttps://www.kernel.org/doc/html/latest/networking/af_xdp.html\fP).
+
+.PP
+For an example on how to use the interface, take a look at the AF_XDP-example
+and AF_XDP-forwarding programs in the bpf-examples repository:
+\fIhttps://github.com/xdp-project/bpf-examples\fP.
+
+.SS "Control path"
+.PP
+Libxdp provides helper functions for creating and destroying umems and
+sockets as shown below. The first thing that a user generally wants to
+do is to create a umem area. This is the area that will contain all
+packets received and the ones that are going to be sent. After that,
+AF_XDP sockets can be created tied to this umem. These can either be
+sockets that have exclusive ownership of that umem through
+xsk_socket__create() or shared with other sockets using
+xsk_socket__create_shared. There is one option called
+XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD that can be set in the
+libxdp_flags field (also called libbpf_flags for compatibility
+reasons). This will make libxdp not load any XDP program or set and
+BPF maps which is a must if users want to add their own XDP program.
+
+.RS
+.nf
+\fCint xsk_umem__create(struct xsk_umem **umem,
+ void *umem_area, __u64 size,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *config);
+int xsk_socket__create(struct xsk_socket **xsk,
+ const char *ifname, __u32 queue_id,
+ struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *config);
+int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+ const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_socket_config *config);
+int xsk_umem__delete(struct xsk_umem *umem);
+void xsk_socket__delete(struct xsk_socket *xsk);
+\fP
+.fi
+.RE
+
+.PP
+There are also two helper function to get the file descriptor of a
+umem or a socket. These are needed when using standard Linux syscalls
+such as poll(), recvmsg(), sendto(), etc.
+
+.RS
+.nf
+\fCint xsk_umem__fd(const struct xsk_umem *umem);
+int xsk_socket__fd(const struct xsk_socket *xsk);
+\fP
+.fi
+.RE
+
+.PP
+The control path also provides two APIs for setting up AF_XDP sockets when the
+process that is going to use the AF_XDP socket is non-privileged. These two
+functions perform the operations that require privileges and can be executed
+from some form of control process that has the necessary privileges. The
+xsk_socket__create executed on the non-privileged process will then skip these
+two steps. For an example on how to use these, please take a look at the
+AF_XDP-example program in the bpf-examples repository:
+\fIhttps://github.com/xdp-project/bpf-examples/tree/master/AF_XDP-example\fP.
+
+.RS
+.nf
+\fCint xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd);
+int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd);
+\fP
+.fi
+.RE
+
+.SS "Data path"
+.PP
+For performance reasons, all the data path functions are static inline
+functions found in the xsk.h header file so they can be optimized into
+the target application binary for best possible performance. There are
+four FIFO rings of two main types: producer rings (fill and Tx) and
+consumer rings (Rx and completion). The producer rings use
+xsk_ring_prod functions and consumer rings use xsk_ring_cons
+functions. For producer rings, you start with \fIreserving\fP one or more
+slots in a producer ring and then when they have been filled out, you
+\fIsubmit\fP them so that the kernel will act on them. For a consumer
+ring, you \fIpeek\fP if there are any new packets in the ring and if so
+you can read them from the ring. Once you are done reading them, you
+\fIrelease\fP them back to the kernel so it can use them for new
+packets. There is also a \fIcancel\fP operation for consumer rings if the
+application does not want to consume all packets received with the
+peek operation.
+
+.RS
+.nf
+\fC__u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx);
+void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb);
+__u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx);
+void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb);
+void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb);
+\fP
+.fi
+.RE
+
+.PP
+The functions below are used for reading and writing the descriptors
+of the rings. xsk_ring_prod__fill_addr() and xsk_ring_prod__tx_desc()
+\fBwrites\fP entries in the fill and Tx rings respectively, while
+xsk_ring_cons__comp_addr and xsk_ring_cons__rx_desc \fBreads\fP entries from
+the completion and Rx rings respectively. The \fIidx\fP is the parameter
+returned in the xsk_ring_prod__reserve or xsk_ring_cons__peek
+calls. To advance to the next entry, simply do \fIidx++\fP.
+
+.RS
+.nf
+\fC__u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill, __u32 idx);
+struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx, __u32 idx);
+const __u64 *xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx);
+const struct xdp_desc *xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx);
+\fP
+.fi
+.RE
+
+.PP
+The xsk_umem functions are used to get a pointer to the packet data
+itself, always located inside the umem. In the default aligned mode,
+you can get the addr variable straight from the Rx descriptor. But in
+unaligned mode, you need to use the three last function below as the
+offset used is carried in the upper 16 bits of the addr. Therefore,
+you cannot use the addr straight from the descriptor in the unaligned
+case.
+
+.RS
+.nf
+\fCvoid *xsk_umem__get_data(void *umem_area, __u64 addr);
+__u64 xsk_umem__extract_addr(__u64 addr);
+__u64 xsk_umem__extract_offset(__u64 addr);
+__u64 xsk_umem__add_offset_to_addr(__u64 addr);
+\fP
+.fi
+.RE
+
+.PP
+There is one more function in the data path and that checks if the
+need_wakeup flag is set. Use of this flag is highly encouraged and
+should be enabled by setting \fIXDP_USE_NEED_WAKEUP\fP bit in the
+\fIxdp_bind_flags\fP field that is provided to the
+xsk_socket_create_[shared]() calls. If this function returns true,
+then you need to call \fIrecvmsg()\fP, \fIsendto()\fP, or \fIpoll()\fP depending on the
+situation. \fIrecvmsg()\fP if you are \fBreceiving\fP, or \fIsendto()\fP if you are
+\fBsending\fP. \fIpoll()\fP can be used for both cases and provide the ability to
+sleep too, as with any other socket. But note that poll is a slower
+operation than the other two.
+
+.RS
+.nf
+\fCint xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r);
+\fP
+.fi
+.RE
+
+.PP
+For an example on how to use all these APIs, take a look at the AF_XDP-example
+and AF_XDP-forwarding programs in the bpf-examples repository:
+\fIhttps://github.com/xdp-project/bpf-examples\fP.
+
+.SH "Kernel and BPF program feature compatibility"
+.PP
+The features exposed by libxdp relies on certain kernel versions and BPF
+features to work. To get the full benefit of all features, libxdp needs to be
+used with kernel 5.10 or newer, unless the commits mentioned below have been
+backported. However, libxdp will probe the kernel and transparently fall back to
+legacy loading procedures, so it is possible to use the library with older
+versions, although some features will be unavailable, as detailed below.
+
+.PP
+The ability to attach multiple BPF programs to a single interface relies on the
+kernel "BPF program extension" feature which was introduced by commit
+be8704ff07d2 ("bpf: Introduce dynamic program extensions") in the upstream
+kernel and first appeared in kernel release 5.6. To \fBincrementally\fP attach
+multiple programs, a further refinement added by commit 4a1e7c0c63e0 ("bpf:
+Support attaching freplace programs to multiple attach points") is needed; this
+first appeared in the upstream kernel version 5.10. The functionality relies on
+the "BPF trampolines" feature which is unfortunately only available on the
+x86_64 architecture. In other words, kernels before 5.6 can only attach a single
+XDP program to each interface, kernels 5.6+ can attach multiple programs if they
+are all attached at the same time, and kernels 5.10 have full support for XDP
+multiprog on x86_64. On other architectures, only a single program can be
+attached to each interface.
+
+.PP
+To load AF_XDP programs, kernel support for AF_XDP sockets needs to be included
+and enabled in the kernel build. In addition, when using AF_XDP sockets, an XDP
+program is also loaded on the interface. The XDP program used for this by libxdp
+requires the ability to do map lookups into XSK maps, which was introduced with
+commit fada7fdc83c0 ("bpf: Allow bpf_map_lookup_elem() on an xskmap") in kernel
+5.3. This means that the minimum required kernel version for using AF_XDP is
+kernel 5.3; however, for the AF_XDP XDP program to co-exist with other programs,
+the same constraints for multiprog applies as outlined above.
+
+.PP
+Note that some Linux distributions backport features to earlier kernel versions,
+especially in enterprise kernels; for instance, Red Hat Enterprise Linux kernels
+include everything needed for libxdp to function since RHEL 8.5.
+
+.PP
+Finally, XDP programs loaded using the multiprog facility must include type
+information (using the BPF Type Format, BTF). To get this, compile the programs
+with a recent version of Clang/LLVM (version 10+), and enable debug information
+when compiling (using the \fI\-g\fP option).
+
+.SH "BUGS"
+.PP
+Please report any bugs on Github: \fIhttps://github.com/xdp-project/xdp-tools/issues\fP
+
+.SH "AUTHORS"
+.PP
+libxdp and this man page were written by Toke
+Høiland-Jørgensen. AF_XDP support and documentation was contributed by
+Magnus Karlsson.
diff --git a/lib/libxdp/libxdp.c b/lib/libxdp/libxdp.c
new file mode 100644
index 0000000..9689457
--- /dev/null
+++ b/lib/libxdp/libxdp.c
@@ -0,0 +1,3408 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * XDP management utility functions
+ *
+ * Copyright (C) 2020 Toke Høiland-Jørgensen <toke@redhat.com>
+ */
+
+#include <linux/bpf.h>
+#define _GNU_SOURCE
+
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/file.h>
+#include <sys/vfs.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <dirent.h>
+
+#include <linux/err.h> /* ERR_PTR */
+#include <linux/if_link.h>
+#include <linux/magic.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <xdp/libxdp.h>
+#include <xdp/prog_dispatcher.h>
+
+#include "compat.h"
+#include "libxdp_internal.h"
+
+#define XDP_RUN_CONFIG_SEC ".xdp_run_config"
+#define XDP_SKIP_ENVVAR "LIBXDP_SKIP_DISPATCHER"
+
+/* When cloning BPF fds, we want to make sure they don't end up as any of the
+ * standard stdin, stderr, stdout descriptors: fd 0 can confuse the kernel, and
+ * there are orchestration systems that will force-close the others if they
+ * don't point to the "right" things. So just to be safe, use 3 as the minimum
+ * fd number.
+ */
+#define MIN_FD 3
+
+/* Max number of times we retry attachment */
+#define MAX_RETRY 10
+
+#define IFINDEX_LO 1
+
+static const char *dispatcher_feature_err =
+ "This means that the kernel does not support the features needed\n"
+ "by the multiprog dispatcher, either because it is too old entirely,\n"
+ "or because it is not yet supported on the current architecture.\n";
+
+struct xdp_program {
+ /* one of prog or prog_fd should be set */
+ struct bpf_program *bpf_prog;
+ struct bpf_object *bpf_obj;
+ struct btf *btf;
+ enum bpf_prog_type prog_type;
+ int prog_fd;
+ int link_fd;
+ char *prog_name;
+ char *attach_name;
+ __u8 prog_tag[BPF_TAG_SIZE];
+ __u32 prog_id;
+ __u64 load_time;
+ bool from_external_obj;
+ bool is_frags;
+ unsigned int run_prio;
+ unsigned int chain_call_actions; /* bitmap */
+
+ /* for building list of attached programs to multiprog */
+ struct xdp_program *next;
+};
+
+struct xdp_multiprog {
+ struct xdp_dispatcher_config config;
+ struct xdp_program *main_prog; /* dispatcher or legacy prog pointer */
+ struct xdp_program *first_prog; /* uses xdp_program->next to build a list */
+ struct xdp_program *hw_prog;
+ __u32 version;
+ size_t num_links;
+ bool is_loaded;
+ bool is_legacy;
+ bool kernel_frags_support;
+ bool checked_compat;
+ enum xdp_attach_mode attach_mode;
+ int ifindex;
+};
+
+#define XDP_DISPATCHER_VERSION_V1 1
+struct xdp_dispatcher_config_v1 {
+ __u8 num_progs_enabled; /* Number of active program slots */
+ __u32 chain_call_actions[MAX_DISPATCHER_ACTIONS];
+ __u32 run_prios[MAX_DISPATCHER_ACTIONS];
+};
+
+static const char *xdp_action_names[] = {
+ [XDP_ABORTED] = "XDP_ABORTED",
+ [XDP_DROP] = "XDP_DROP",
+ [XDP_PASS] = "XDP_PASS",
+ [XDP_TX] = "XDP_TX",
+ [XDP_REDIRECT] = "XDP_REDIRECT",
+};
+
+static struct xdp_program *xdp_program__create_from_obj(struct bpf_object *obj,
+ const char *section_name,
+ const char *prog_name,
+ bool external);
+
+#ifdef LIBXDP_STATIC
+struct xdp_embedded_obj {
+ const char *filename;
+ const void *data_start;
+ const void *data_end;
+};
+
+extern const char _binary_xdp_dispatcher_o_start;
+extern const char _binary_xdp_dispatcher_o_end;
+extern const char _binary_xsk_def_xdp_prog_o_start;
+extern const char _binary_xsk_def_xdp_prog_o_end;
+extern const char _binary_xsk_def_xdp_prog_5_3_o_start;
+extern const char _binary_xsk_def_xdp_prog_5_3_o_end;
+
+static struct xdp_embedded_obj embedded_objs[] = {
+ {"xdp-dispatcher.o", &_binary_xdp_dispatcher_o_start, &_binary_xdp_dispatcher_o_end},
+ {"xsk_def_xdp_prog.o", &_binary_xsk_def_xdp_prog_o_start, &_binary_xsk_def_xdp_prog_o_end},
+ {"xsk_def_xdp_prog_5.3.o", &_binary_xsk_def_xdp_prog_5_3_o_start, &_binary_xsk_def_xdp_prog_5_3_o_end},
+ {},
+};
+static struct xdp_program *xdp_program__find_embedded(const char *filename,
+ const char *section_name,
+ const char *prog_name,
+ struct bpf_object_open_opts *opts)
+{
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, default_opts,
+ .object_name = filename,
+ );
+ struct xdp_embedded_obj *eobj;
+ struct bpf_object *obj;
+ size_t size;
+ int err;
+
+ for (eobj = &embedded_objs[0]; eobj->filename; eobj++) {
+ if (strcmp(filename, eobj->filename))
+ continue;
+
+ size = eobj->data_end - eobj->data_start;
+
+ /* set the object name to the same as if we opened the file from
+ * the filesystem
+ */
+ if (!opts)
+ opts = &default_opts;
+ else if (!opts->object_name)
+ opts->object_name = filename;
+
+ pr_debug("Loading XDP program '%s' from embedded object file\n", filename);
+
+ obj = bpf_object__open_mem(eobj->data_start, size, opts);
+ err = libbpf_get_error(obj);
+ if (err)
+ return ERR_PTR(err);
+ return xdp_program__create_from_obj(obj, section_name, prog_name, false);
+ }
+
+ return NULL;
+}
+#else
+static inline struct xdp_program *xdp_program__find_embedded(__unused const char *filename,
+ __unused const char *section_name,
+ __unused const char *prog_name,
+ __unused struct bpf_object_open_opts *opts)
+{
+ return NULL;
+}
+#endif
+
+static int __base_pr(enum libxdp_print_level level, const char *format,
+ va_list args)
+{
+ if (level == LIBXDP_DEBUG)
+ return 0;
+
+ return vfprintf(stderr, format, args);
+}
+
+static libxdp_print_fn_t __libxdp_pr = __base_pr;
+
+libxdp_print_fn_t libxdp_set_print(libxdp_print_fn_t fn)
+{
+ libxdp_print_fn_t old_print_fn = __libxdp_pr;
+
+ __libxdp_pr = fn;
+ return old_print_fn;
+}
+
+__printf(2, 3) void libxdp_print(enum libxdp_print_level level, const char *format, ...)
+{
+ va_list args;
+
+ if (!__libxdp_pr)
+ return;
+
+ va_start(args, format);
+ __libxdp_pr(level, format, args);
+ va_end(args);
+}
+
+static enum {
+ COMPAT_UNKNOWN,
+ COMPAT_SUPPORTED,
+ COMPAT_UNSUPPORTED
+} kernel_compat = COMPAT_UNKNOWN;
+
+static int xdp_multiprog__attach(struct xdp_multiprog *old_mp,
+ struct xdp_multiprog *mp,
+ enum xdp_attach_mode mode);
+static struct xdp_multiprog *xdp_multiprog__generate(struct xdp_program **progs,
+ size_t num_progs,
+ int ifindex,
+ struct xdp_multiprog *old_mp,
+ bool remove_progs);
+static int xdp_multiprog__pin(struct xdp_multiprog *mp);
+static int xdp_multiprog__unpin(struct xdp_multiprog *mp);
+
+
+/* On NULL, libxdp always sets errno to 0 for old APIs, so that their
+ * compatibility is maintained wrt old libxdp_get_error that called the older
+ * version of libbpf_get_error which did PTR_ERR_OR_ZERO, but newer versions
+ * unconditionally return -errno on seeing NULL, as the libbpf practice changed
+ * to returning NULL or errors.
+ *
+ * The new APIs (like xdp_program__create) which indicate error using NULL set
+ * their errno when returning NULL.
+ */
+long libxdp_get_error(const void *ptr)
+{
+ if (!IS_ERR_OR_NULL(ptr))
+ return 0;
+
+ if (IS_ERR(ptr))
+ errno = -PTR_ERR(ptr);
+ return -errno;
+}
+
+int libxdp_strerror(int err, char *buf, size_t size)
+{
+ return libxdp_err(libbpf_strerror(err, buf, size));
+}
+
+static char *libxdp_strerror_r(int err, char *dst, size_t size)
+{
+ int ret = libxdp_strerror(err, dst, size);
+ if (ret)
+ snprintf(dst, size, "ERROR: strerror_r(%d)=%d", err, ret);
+ return dst;
+}
+
+#ifndef HAVE_LIBBPF_BTF__LOAD_FROM_KERNEL_BY_ID
+static struct btf *btf__load_from_kernel_by_id(__u32 id)
+{
+ struct btf *btf;
+ int err;
+
+ err = btf__get_from_id(id, &btf);
+ if (err)
+ return NULL;
+ return btf;
+}
+#endif
+
+#ifndef HAVE_LIBBPF_BTF__TYPE_CNT
+static __u32 btf__type_cnt(const struct btf *btf)
+{
+ /* old function didn't include 'void' type in count */
+ return btf__get_nr_types(btf) + 1;
+}
+#endif
+
+#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_MAP
+static struct bpf_map *bpf_object__next_map(const struct bpf_object *obj,
+ const struct bpf_map *map)
+{
+ return bpf_map__next(map, obj);
+}
+#endif
+
+#ifndef HAVE_LIBBPF_BPF_OBJECT__NEXT_PROGRAM
+static struct bpf_program *bpf_object__next_program(const struct bpf_object *obj,
+ struct bpf_program *prog)
+{
+ return bpf_program__next(prog, obj);
+}
+#endif
+
+#ifndef HAVE_LIBBPF_BPF_PROGRAM__INSN_CNT
+#define BPF_INSN_SZ (sizeof(struct bpf_insn))
+static size_t bpf_program__insn_cnt(const struct bpf_program *prog)
+{
+ size_t sz;
+
+ sz = bpf_program__size(prog);
+ return sz / BPF_INSN_SZ;
+}
+#endif
+
+/* This function has been deprecated in libbpf, but we expose an API that uses
+ * section names, so we reimplement it to keep compatibility
+ */
+static struct bpf_program *
+bpf_program_by_section_name(const struct bpf_object *obj,
+ const char *section_name)
+{
+ struct bpf_program *pos;
+ const char *sname;
+
+ bpf_object__for_each_program(pos, obj) {
+ sname = bpf_program__section_name(pos);
+ if (sname && !strcmp(sname, section_name))
+ return pos;
+ }
+ return NULL;
+}
+
+static bool bpf_is_valid_mntpt(const char *mnt)
+{
+ struct statfs st_fs;
+
+ if (statfs(mnt, &st_fs) < 0)
+ return false;
+ if ((unsigned long)st_fs.f_type != BPF_FS_MAGIC)
+ return false;
+
+ return true;
+}
+
+static int bpf_mnt_fs(const char *target)
+{
+ bool bind_done = false;
+ int err;
+
+retry:
+ err = mount("", target, "none", MS_PRIVATE | MS_REC, NULL);
+ if (err) {
+ if (errno != EINVAL || bind_done) {
+ err = -errno;
+ pr_warn("mount --make-private %s failed: %s\n",
+ target, strerror(-err));
+ return err;
+ }
+
+ err = mount(target, target, "none", MS_BIND, NULL);
+ if (err) {
+ err = -errno;
+ pr_warn("mount --bind %s %s failed: %s\n",
+ target, target, strerror(-err));
+ return err;
+ }
+
+ bind_done = true;
+ goto retry;
+ }
+
+ err = mount("bpf", target, "bpf", 0, "mode=0700");
+ if (err) {
+ err = -errno;
+ pr_warn("mount -t bpf bpf %s failed: %s\n",
+ target, strerror(-err));
+ return err;
+ }
+
+ return 0;
+}
+
+static const char *bpf_find_mntpt_single(char *mnt, int len, const char *mntpt, bool mount)
+{
+ int err;
+
+ if (!bpf_is_valid_mntpt(mntpt)) {
+ if (!mount)
+ return NULL;
+
+ pr_debug("No bpffs found at %s, mounting a new one\n",
+ mntpt);
+
+ err = bpf_mnt_fs(mntpt);
+ if (err)
+ return NULL;
+ }
+
+ strncpy(mnt, mntpt, len - 1);
+ mnt[len - 1] = '\0';
+ return mnt;
+}
+
+static const char *find_bpffs()
+{
+ static bool bpf_mnt_cached = false;
+ static char bpf_wrk_dir[PATH_MAX];
+ static const char *mnt = NULL;
+ char *envdir, *envval;
+ bool mount = false;
+
+ if (bpf_mnt_cached)
+ return mnt;
+
+ envdir = secure_getenv(XDP_BPFFS_ENVVAR);
+ envval = secure_getenv(XDP_BPFFS_MOUNT_ENVVAR);
+ if (envval && envval[0] == '1' && envval[1] == '\0')
+ mount = true;
+
+ mnt = bpf_find_mntpt_single(bpf_wrk_dir,
+ sizeof(bpf_wrk_dir),
+ envdir ?: BPF_DIR_MNT,
+ mount);
+ if (!mnt)
+ pr_warn("No bpffs found at %s\n", envdir ?: BPF_DIR_MNT);
+ else
+ bpf_mnt_cached = 1;
+
+ return mnt;
+}
+
+static int mk_state_subdir(char *dir, size_t dir_sz, const char *parent)
+{
+ int err;
+
+ err = try_snprintf(dir, dir_sz, "%s/xdp", parent);
+ if (err)
+ return err;
+
+ err = mkdir(dir, S_IRWXU);
+ if (err && errno != EEXIST)
+ return -errno;
+
+ return 0;
+}
+
+static const char *get_bpffs_dir(void)
+{
+ static char bpffs_dir[PATH_MAX];
+ static const char *dir = NULL;
+ const char *parent;
+ int err;
+
+ if (dir)
+ return dir;
+
+ parent = find_bpffs();
+ if (!parent) {
+ err = -ENOENT;
+ goto err;
+ }
+
+ err = mk_state_subdir(bpffs_dir, sizeof(bpffs_dir), parent);
+ if (err)
+ goto err;
+
+ dir = bpffs_dir;
+ return dir;
+err:
+ return ERR_PTR(err);
+}
+
+static const char *get_lock_dir(void)
+{
+ static const char *dir = NULL;
+ static char rundir[PATH_MAX];
+ int err;
+
+ if (dir)
+ return dir;
+
+ dir = get_bpffs_dir();
+ if (!IS_ERR(dir))
+ return dir;
+
+ err = mk_state_subdir(rundir, sizeof(rundir), RUNDIR);
+ if (err)
+ return ERR_PTR(err);
+
+ dir = rundir;
+ return dir;
+}
+
+int xdp_lock_acquire(void)
+{
+ int lock_fd, err;
+ const char *dir;
+
+ dir = get_lock_dir();
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ lock_fd = open(dir, O_DIRECTORY);
+ if (lock_fd < 0) {
+ err = -errno;
+ pr_warn("Couldn't open lock directory at %s: %s\n",
+ dir, strerror(-err));
+ return err;
+ }
+
+ err = flock(lock_fd, LOCK_EX);
+ if (err) {
+ err = -errno;
+ pr_warn("Couldn't flock fd %d: %s\n", lock_fd, strerror(-err));
+ close(lock_fd);
+ return err;
+ }
+
+ pr_debug("Acquired lock from %s with fd %d\n", dir, lock_fd);
+ return lock_fd;
+}
+
+int xdp_lock_release(int lock_fd)
+{
+ int err;
+
+ err = flock(lock_fd, LOCK_UN);
+ if (err) {
+ err = -errno;
+ pr_warn("Couldn't unlock fd %d: %s\n", lock_fd, strerror(-err));
+ } else {
+ pr_debug("Released lock fd %d\n", lock_fd);
+ }
+ close(lock_fd);
+ return err;
+}
+
+static int do_xdp_attach(int ifindex, int prog_fd, int old_fd, __u32 xdp_flags)
+{
+#ifdef HAVE_LIBBPF_BPF_XDP_ATTACH
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts,
+ .old_prog_fd = old_fd);
+ return bpf_xdp_attach(ifindex, prog_fd, xdp_flags, &opts);
+#else
+ DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = old_fd);
+ return bpf_set_link_xdp_fd_opts(ifindex, prog_fd, xdp_flags, old_fd ? &opts : NULL);
+#endif
+}
+
+int xdp_attach_fd(int prog_fd, int old_fd, int ifindex,
+ enum xdp_attach_mode mode)
+{
+ int err = 0, xdp_flags = 0;
+
+ pr_debug("Replacing XDP fd %d with %d on ifindex %d\n",
+ old_fd, prog_fd, ifindex);
+
+ if (old_fd == -1) {
+ xdp_flags |= XDP_FLAGS_UPDATE_IF_NOEXIST;
+ old_fd = 0;
+ }
+
+ switch (mode) {
+ case XDP_MODE_SKB:
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case XDP_MODE_NATIVE:
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+ break;
+ case XDP_MODE_HW:
+ xdp_flags |= XDP_FLAGS_HW_MODE;
+ break;
+ case XDP_MODE_UNSPEC:
+ break;
+ }
+again:
+ err = do_xdp_attach(ifindex, prog_fd, old_fd, xdp_flags);
+ if (err < 0) {
+ if (err == -EINVAL && old_fd) {
+ pr_debug("Got 'invalid argument', trying again without old_fd\n");
+ old_fd = 0;
+ goto again;
+ }
+ pr_info("Error attaching XDP program to ifindex %d: %s\n",
+ ifindex, strerror(-err));
+
+ if (err == -EEXIST && old_fd)
+ /* We raced with another attach/detach, have to retry */
+ return -EAGAIN;
+
+ switch (-err) {
+ case EBUSY:
+ case EEXIST:
+ pr_info("XDP already loaded on device\n");
+ break;
+ case EOPNOTSUPP:
+ pr_info("XDP mode not supported; try using SKB mode\n");
+ break;
+ default:
+ break;
+ }
+ }
+ return err;
+}
+
+const struct btf *xdp_program__btf(struct xdp_program *xdp_prog)
+{
+ if (!xdp_prog)
+ return libxdp_err_ptr(0, true);
+
+ return xdp_prog->btf;
+}
+
+enum xdp_attach_mode
+xdp_program__is_attached(const struct xdp_program *xdp_prog, int ifindex)
+{
+ struct xdp_program *prog = NULL;
+ struct xdp_multiprog *mp;
+ enum xdp_attach_mode ret = XDP_MODE_UNSPEC;
+
+ if (!xdp_prog || !xdp_prog->prog_id)
+ return ret;
+
+ mp = xdp_multiprog__get_from_ifindex(ifindex);
+ if (IS_ERR_OR_NULL(mp))
+ return ret;
+
+ prog = xdp_multiprog__hw_prog(mp);
+ if (xdp_program__id(prog) == xdp_program__id(xdp_prog)) {
+ ret = XDP_MODE_HW;
+ goto out;
+ }
+
+ if (xdp_multiprog__is_legacy(mp)) {
+ prog = xdp_multiprog__main_prog(mp);
+ if (xdp_program__id(prog) == xdp_program__id(xdp_prog))
+ ret = xdp_multiprog__attach_mode(mp);
+ goto out;
+ }
+
+ while ((prog = xdp_multiprog__next_prog(prog, mp))) {
+ if (xdp_program__id(prog) == xdp_program__id(xdp_prog)) {
+ ret = xdp_multiprog__attach_mode(mp);
+ break;
+ }
+ }
+
+out:
+ xdp_multiprog__close(mp);
+ return ret;
+}
+
+int xdp_program__set_chain_call_enabled(struct xdp_program *prog,
+ unsigned int action, bool enabled)
+{
+ if (IS_ERR_OR_NULL(prog) || prog->prog_fd >= 0 || action >= XDP_DISPATCHER_RETVAL)
+ return libxdp_err(-EINVAL);
+
+ if (enabled)
+ prog->chain_call_actions |= (1U << action);
+ else
+ prog->chain_call_actions &= ~(1U << action);
+
+ return 0;
+}
+
+bool xdp_program__chain_call_enabled(const struct xdp_program *prog,
+ enum xdp_action action)
+{
+ if (IS_ERR_OR_NULL(prog) || action >= XDP_DISPATCHER_RETVAL)
+ return false;
+
+ return !!(prog->chain_call_actions & (1U << action));
+}
+
+unsigned int xdp_program__run_prio(const struct xdp_program *prog)
+{
+ if (IS_ERR_OR_NULL(prog))
+ return XDP_DEFAULT_RUN_PRIO;
+
+ return prog->run_prio;
+}
+
+int xdp_program__set_run_prio(struct xdp_program *prog, unsigned int run_prio)
+{
+ if (IS_ERR_OR_NULL(prog) || prog->prog_fd >= 0)
+ return libxdp_err(-EINVAL);
+
+ prog->run_prio = run_prio;
+ return 0;
+}
+
+bool xdp_program__xdp_frags_support(const struct xdp_program *prog)
+{
+ if (IS_ERR_OR_NULL(prog))
+ return false;
+
+ /* Until we load the program we just check the bpf_program__flags() to
+ * ensure any changes made to those are honoured on the libxdp side. For
+ * loaded programs we keep our own state variable which is populated
+ * either by copying over the program flags in xdp_program__load(), or
+ * by loading the state from the dispatcher state variables if
+ * instantiating the object from the kernel.
+ */
+ if (!prog->bpf_prog || prog->prog_fd >= 0)
+ return prog->is_frags;
+
+ return !!(bpf_program__flags(prog->bpf_prog) & BPF_F_XDP_HAS_FRAGS);
+}
+
+int xdp_program__set_xdp_frags_support(struct xdp_program *prog, bool frags)
+{
+ __u32 prog_flags;
+ int ret;
+
+ if (IS_ERR_OR_NULL(prog) || !prog->bpf_prog || prog->prog_fd >= 0)
+ return libxdp_err(-EINVAL);
+
+ prog_flags = bpf_program__flags(prog->bpf_prog);
+
+ if (frags)
+ prog_flags |= BPF_F_XDP_HAS_FRAGS;
+ else
+ prog_flags &= ~BPF_F_XDP_HAS_FRAGS;
+
+ ret = bpf_program__set_flags(prog->bpf_prog, prog_flags);
+ if (!ret)
+ prog->is_frags = frags;
+
+ return ret;
+}
+
+const char *xdp_program__name(const struct xdp_program *prog)
+{
+ if (IS_ERR_OR_NULL(prog))
+ return libxdp_err_ptr(0, true);
+
+ return prog->prog_name;
+}
+
+struct bpf_object *xdp_program__bpf_obj(struct xdp_program *prog)
+{
+ if (IS_ERR_OR_NULL(prog))
+ return libxdp_err_ptr(0, true);
+
+ return prog->bpf_obj;
+}
+
+const unsigned char *xdp_program__tag(const struct xdp_program *prog)
+{
+ if (IS_ERR_OR_NULL(prog))
+ return libxdp_err_ptr(0, true);
+
+ return prog->prog_tag;
+}
+
+uint32_t xdp_program__id(const struct xdp_program *prog)
+{
+ if (IS_ERR_OR_NULL(prog))
+ return 0;
+
+ return prog->prog_id;
+}
+
+int xdp_program__fd(const struct xdp_program *prog)
+{
+ if (IS_ERR_OR_NULL(prog))
+ return errno = ENOENT, -1;
+
+ return prog->prog_fd;
+}
+
+int xdp_program__print_chain_call_actions(const struct xdp_program *prog,
+ char *buf, size_t buf_len)
+{
+ bool first = true;
+ char *pos = buf;
+ int i, len = 0;
+
+ if (IS_ERR_OR_NULL(prog) || !buf || !buf_len)
+ return libxdp_err(-EINVAL);
+
+ for (i = 0; i <= XDP_REDIRECT; i++) {
+ if (xdp_program__chain_call_enabled(prog, i)) {
+ if (!first) {
+ if (!buf_len)
+ goto err_len;
+ *pos++ = ',';
+ buf_len--;
+ } else {
+ first = false;
+ }
+ len = snprintf(pos, buf_len, "%s", xdp_action_names[i]);
+ if (len < 0 || (size_t)len >= buf_len)
+ goto err_len;
+ pos += len;
+ buf_len -= len;
+ }
+ }
+ return 0;
+err_len:
+ *pos = '\0';
+ return libxdp_err(-ENOSPC);
+}
+
+static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
+ __u32 id, __u32 *res_id)
+{
+ const struct btf_type *t = btf__type_by_id(btf, id);
+
+ if (res_id)
+ *res_id = id;
+
+ while (btf_is_mod(t) || btf_is_typedef(t)) {
+ if (res_id)
+ *res_id = t->type;
+ t = btf__type_by_id(btf, t->type);
+ }
+
+ return t;
+}
+
+static bool get_field_int(const struct btf *btf,
+ const char *t_name,
+ const struct btf_type *t,
+ __u32 *res)
+{
+ const struct btf_array *arr_info;
+ const struct btf_type *arr_t;
+
+ if (!btf_is_ptr(t)) {
+ pr_warn("attr '%s': expected PTR, got %u.\n",
+ t_name, btf_kind(t));
+ return false;
+ }
+
+ arr_t = btf__type_by_id(btf, t->type);
+ if (!arr_t) {
+ pr_warn("attr '%s': type [%u] not found.\n",
+ t_name, t->type);
+ return false;
+ }
+ if (!btf_is_array(arr_t)) {
+ pr_warn("attr '%s': expected ARRAY, got %u.\n",
+ t_name, btf_kind(arr_t));
+ return false;
+ }
+ arr_info = btf_array(arr_t);
+ *res = arr_info->nelems;
+ return true;
+}
+
+static bool get_xdp_action(const char *act_name, unsigned int *act)
+{
+ const char **name = xdp_action_names;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(xdp_action_names); i++, name++) {
+ if (!strcmp(act_name, *name)) {
+ *act = i;
+ return true;
+ }
+ }
+ return false;
+}
+
+/*
+ * Find BTF func definition for func_name, which may be a truncated prefix of
+ * the real function name.
+ * Return NULL on no, or ambiguous, match.
+ */
+static const struct btf_type *btf_get_function(const struct btf *btf,
+ const char *func_name)
+{
+ const struct btf_type *t, *match;
+ size_t len, matches = 0;
+ const char *name;
+ int nr_types, i;
+
+ if (!btf) {
+ pr_debug("No BTF found for program\n");
+ return NULL;
+ }
+
+ len = strlen(func_name);
+
+ nr_types = btf__type_cnt(btf);
+ for (i = 1; i < nr_types; i++) {
+ t = btf__type_by_id(btf, i);
+ if (!btf_is_func(t))
+ continue;
+
+ name = btf__name_by_offset(btf, t->name_off);
+ if (!strncmp(name, func_name, len)) {
+ pr_debug("Found func %s matching %s\n",
+ name, func_name);
+
+ if (strlen(name) == len)
+ return t; /* exact match */
+
+ /* prefix, may not be unique */
+ matches++;
+ match = t;
+ }
+ }
+
+ if (matches == 1) /* unique match */
+ return match;
+
+ pr_debug("Function '%s' not found or ambiguous (%zu matches).\n",
+ func_name, matches);
+ return NULL;
+}
+
+static const struct btf_type *btf_get_datasec(const struct btf *btf,
+ const char *sec_name)
+{
+ const struct btf_type *t;
+ int nr_types, i;
+ const char *name;
+
+ if (!btf) {
+ pr_debug("No BTF found for program\n");
+ return NULL;
+ }
+
+ nr_types = btf__type_cnt(btf);
+ for (i = 1; i < nr_types; i++) {
+ t = btf__type_by_id(btf, i);
+ if (!btf_is_datasec(t))
+ continue;
+ name = btf__name_by_offset(btf, t->name_off);
+ if (strcmp(name, sec_name) == 0)
+ return t;
+ }
+
+ pr_debug("DATASEC '%s' not found.\n", sec_name);
+ return NULL;
+}
+
+static const struct btf_type *btf_get_section_var(const struct btf *btf,
+ const struct btf_type *sec,
+ const char *var_name,
+ __u16 kind)
+{
+ const struct btf_var_secinfo *vi;
+ const struct btf_var *var_extra;
+ const struct btf_type *var, *def;
+ const char *name;
+ int vlen, i;
+
+ vlen = btf_vlen(sec);
+ vi = btf_var_secinfos(sec);
+ for (i = 0; i < vlen; i++, vi++) {
+ var = btf__type_by_id(btf, vi->type);
+ var_extra = btf_var(var);
+ name = btf__name_by_offset(btf, var->name_off);
+
+ if (strcmp(name, var_name))
+ continue;
+
+ if (!btf_is_var(var)) {
+ pr_warn("struct '%s': unexpected var kind %u.\n",
+ name, btf_kind(var));
+ return ERR_PTR(-EINVAL);
+ }
+ if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
+ var_extra->linkage != BTF_VAR_STATIC) {
+ pr_warn("struct '%s': unsupported var linkage %u.\n",
+ name, var_extra->linkage);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ def = skip_mods_and_typedefs(btf, var->type, NULL);
+ if (btf_kind(def) != kind) {
+ pr_warn("var '%s': unexpected def kind %u.\n",
+ name, btf_kind(def));
+ return ERR_PTR(-EINVAL);
+ }
+ return def;
+ }
+ return ERR_PTR(-ENOENT);
+}
+
+/**
+ * This function parses the run config information attached to an XDP program.
+ *
+ * This information is specified using BTF, in a format similar to how
+ * BTF-defined maps are done. The definition looks like this:
+ *
+ * struct {
+ * __uint(priority, 10);
+ * __uint(XDP_PASS, 1);
+ * } XDP_RUN_CONFIG(FUNCNAME);
+ *
+ * The priority is simply an integer that will be used to sort programs as they
+ * are attached on the interface (see cmp_xdp_programs() for full sort order).
+ * In addition to the priority, the run config can define an integer value for
+ * each XDP action. A non-zero value means that execution will continue to the
+ * next loaded program if the current program returns that action. I.e., in the
+ * above example, any return value other than XDP_PASS will cause the dispatcher
+ * to exit with that return code, whereas XDP_PASS means execution will
+ * continue.
+ *
+ * Since this information becomes part of the object file BTF info, it will
+ * survive loading into the kernel, and so it can be retrieved for
+ * already-loaded programs as well.
+ */
+static int xdp_program__parse_btf(struct xdp_program *xdp_prog,
+ const struct btf *btf)
+{
+ const struct btf_type *def, *sec;
+ const struct btf_member *m;
+ char struct_name[100];
+ int err, i, mlen;
+
+ if (!btf)
+ btf = xdp_program__btf(xdp_prog);
+
+ /* If the program name is the maximum allowed object name in the kernel,
+ * it may have been truncated, in which case we try to expand it by
+ * looking for a match in the BTF data.
+ */
+ if (strlen(xdp_prog->prog_name) >= BPF_OBJ_NAME_LEN - 1) {
+ const struct btf_type *func;
+ char *name;
+
+ func = btf_get_function(btf, xdp_prog->prog_name);
+ if (func) {
+ name = strdup(btf__name_by_offset(btf, func->name_off));
+ if (!name)
+ return -ENOMEM;
+ free(xdp_prog->prog_name);
+ xdp_prog->prog_name = name;
+ }
+ }
+
+ err = try_snprintf(struct_name, sizeof(struct_name), "_%s",
+ xdp_program__name(xdp_prog));
+ if (err)
+ return err;
+
+ sec = btf_get_datasec(btf, XDP_RUN_CONFIG_SEC);
+ if (!sec)
+ return -ENOENT;
+
+ def = btf_get_section_var(btf, sec, struct_name, BTF_KIND_STRUCT);
+ if (IS_ERR(def)) {
+ pr_debug("Couldn't find run order struct %s\n", struct_name);
+ return PTR_ERR(def);
+ }
+
+ mlen = btf_vlen(def);
+ m = btf_members(def);
+ for (i = 0; i < mlen; i++, m++) {
+ const char *mname = btf__name_by_offset(btf, m->name_off);
+ const struct btf_type *m_t;
+ unsigned int val, act;
+
+ if (!mname) {
+ pr_warn("struct '%s': invalid field #%d.\n", struct_name, i);
+ return -EINVAL;
+ }
+ m_t = skip_mods_and_typedefs(btf, m->type, NULL);
+
+ if (!strcmp(mname, "priority")) {
+ if (!get_field_int(btf, mname, m_t, &xdp_prog->run_prio))
+ return -EINVAL;
+ continue;
+ } else if (get_xdp_action(mname, &act)) {
+ if (!get_field_int(btf, mname, m_t, &val))
+ return -EINVAL;
+ xdp_program__set_chain_call_enabled(xdp_prog, act, val);
+ } else {
+ pr_warn("Invalid mname: %s\n", mname);
+ return -ENOTSUP;
+ }
+ }
+ return 0;
+}
+
+static struct xdp_program *xdp_program__new(void)
+{
+ struct xdp_program *xdp_prog;
+
+ xdp_prog = malloc(sizeof(*xdp_prog));
+ if (!xdp_prog)
+ return ERR_PTR(-ENOMEM);
+
+ memset(xdp_prog, 0, sizeof(*xdp_prog));
+
+ xdp_prog->prog_fd = -1;
+ xdp_prog->link_fd = -1;
+ xdp_prog->run_prio = XDP_DEFAULT_RUN_PRIO;
+ xdp_prog->chain_call_actions = XDP_DEFAULT_CHAIN_CALL_ACTIONS;
+
+ return xdp_prog;
+}
+
+void xdp_program__close(struct xdp_program *xdp_prog)
+{
+ if (!xdp_prog)
+ return;
+
+ if (xdp_prog->link_fd >= 0)
+ close(xdp_prog->link_fd);
+ if (xdp_prog->prog_fd >= 0)
+ close(xdp_prog->prog_fd);
+
+ free(xdp_prog->prog_name);
+ free(xdp_prog->attach_name);
+
+ if (!xdp_prog->from_external_obj) {
+ if (xdp_prog->bpf_obj)
+ bpf_object__close(xdp_prog->bpf_obj);
+ else if (xdp_prog->btf)
+ btf__free(xdp_prog->btf);
+ }
+
+ free(xdp_prog);
+}
+
+static struct xdp_program *xdp_program__create_from_obj(struct bpf_object *obj,
+ const char *section_name,
+ const char *prog_name,
+ bool external)
+{
+ struct xdp_program *xdp_prog;
+ struct bpf_program *bpf_prog;
+ int err;
+
+ if (!obj || (section_name && prog_name))
+ return ERR_PTR(-EINVAL);
+
+ if (section_name)
+ bpf_prog = bpf_program_by_section_name(obj, section_name);
+ else if (prog_name)
+ bpf_prog = bpf_object__find_program_by_name(obj, prog_name);
+ else
+ bpf_prog = bpf_object__next_program(obj, NULL);
+
+ if (!bpf_prog) {
+ pr_warn("Couldn't find xdp program in bpf object%s%s\n",
+ section_name ? " section " : "", section_name ?: "");
+ return ERR_PTR(-ENOENT);
+ }
+
+ xdp_prog = xdp_program__new();
+ if (IS_ERR(xdp_prog))
+ return xdp_prog;
+
+ xdp_prog->prog_name = strdup(bpf_program__name(bpf_prog));
+ if (!xdp_prog->prog_name) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ err = xdp_program__parse_btf(xdp_prog, bpf_object__btf(obj));
+ if (err && err != -ENOENT)
+ goto err;
+
+ xdp_prog->bpf_prog = bpf_prog;
+ xdp_prog->bpf_obj = obj;
+ xdp_prog->btf = bpf_object__btf(obj);
+ xdp_prog->from_external_obj = external;
+
+ return xdp_prog;
+err:
+ xdp_program__close(xdp_prog);
+ return ERR_PTR(err);
+}
+
+struct xdp_program *xdp_program__from_bpf_obj(struct bpf_object *obj,
+ const char *section_name)
+{
+ struct xdp_program *prog;
+
+ prog = xdp_program__create_from_obj(obj, section_name, NULL, true);
+ /* xdp_program__create_from_obj does not return NULL */
+ if (!IS_ERR(prog))
+ return prog;
+ return libxdp_err_ptr(PTR_ERR(prog), false);
+}
+
+static struct bpf_object *open_bpf_obj(const char *filename,
+ struct bpf_object_open_opts *opts)
+{
+ struct bpf_object *obj;
+ int err;
+
+ obj = bpf_object__open_file(filename, opts);
+ err = libbpf_get_error(obj);
+ if (err) {
+ if (err == -ENOENT)
+ pr_debug(
+ "Couldn't load the eBPF program (libbpf said 'no such file').\n"
+ "Maybe the program was compiled with a too old "
+ "version of LLVM (need v9.0+)?\n");
+ return ERR_PTR(err);
+ }
+
+ return obj;
+}
+
+static struct xdp_program *__xdp_program__open_file(const char *filename,
+ const char *section_name,
+ const char *prog_name,
+ struct bpf_object_open_opts *opts)
+{
+ struct xdp_program *xdp_prog;
+ struct bpf_object *obj;
+ int err;
+
+ if (!filename)
+ return ERR_PTR(-EINVAL);
+
+ obj = open_bpf_obj(filename, opts);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ return ERR_PTR(err);
+ }
+
+ xdp_prog = xdp_program__create_from_obj(obj, section_name, prog_name, false);
+ if (IS_ERR(xdp_prog))
+ bpf_object__close(obj);
+
+ return xdp_prog;
+}
+
+struct xdp_program *xdp_program__open_file(const char *filename,
+ const char *section_name,
+ struct bpf_object_open_opts *opts)
+{
+ struct xdp_program *prog;
+
+ prog = __xdp_program__open_file(filename, section_name, NULL, opts);
+ /* __xdp_program__open_file does not return NULL */
+ if (!IS_ERR(prog))
+ return prog;
+ return libxdp_err_ptr(PTR_ERR(prog), false);
+}
+
+static bool try_bpf_file(char *buf, size_t buf_size, char *path,
+ const char *progname)
+{
+ struct stat sb = {};
+
+ if (try_snprintf(buf, buf_size, "%s/%s", path, progname))
+ return false;
+
+ pr_debug("Looking for '%s'\n", buf);
+ if (stat(buf, &sb))
+ return false;
+
+ return true;
+}
+
+static int find_bpf_file(char *buf, size_t buf_size, const char *progname)
+{
+ static char *bpf_obj_paths[] = {
+#ifdef DEBUG
+ ".",
+#endif
+ BPF_OBJECT_PATH,
+ NULL
+ };
+ char *path, **p;
+
+ path = secure_getenv(XDP_OBJECT_ENVVAR);
+ if (path && try_bpf_file(buf, buf_size, path, progname)) {
+ return 0;
+ } else if (!path) {
+ for (p = bpf_obj_paths; *p; p++)
+ if (try_bpf_file(buf, buf_size, *p, progname))
+ return 0;
+ }
+
+ pr_warn("Couldn't find a BPF file with name %s\n", progname);
+ return -ENOENT;
+}
+
+static struct xdp_program *__xdp_program__find_file(const char *filename,
+ const char *section_name,
+ const char *prog_name,
+ struct bpf_object_open_opts *opts)
+{
+ struct xdp_program *prog;
+ char buf[PATH_MAX];
+ int err;
+
+ prog = xdp_program__find_embedded(filename, section_name, prog_name, opts);
+ if (prog)
+ return prog;
+
+ err = find_bpf_file(buf, sizeof(buf), filename);
+ if (err)
+ return ERR_PTR(err);
+
+ pr_debug("Loading XDP program from '%s' section '%s'\n", buf,
+ section_name ?: (prog_name ?: "(unknown)"));
+ return __xdp_program__open_file(buf, section_name, prog_name, opts);
+}
+
+struct xdp_program *xdp_program__find_file(const char *filename,
+ const char *section_name,
+ struct bpf_object_open_opts *opts)
+{
+ struct xdp_program *prog;
+
+ prog = __xdp_program__find_file(filename, section_name, NULL, opts);
+ /* __xdp_program__find_file does not return NULL */
+ if (!IS_ERR(prog))
+ return prog;
+ return libxdp_err_ptr(PTR_ERR(prog), false);
+}
+
+static int xdp_program__fill_from_fd(struct xdp_program *xdp_prog, int fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ struct btf *btf = NULL;
+ int err = 0, prog_fd;
+
+ if (!xdp_prog)
+ return -EINVAL;
+
+ /* Duplicate the descriptor, as we take ownership of the fd below */
+ prog_fd = fcntl(fd, F_DUPFD_CLOEXEC, MIN_FD);
+ if (prog_fd < 0) {
+ err = -errno;
+ pr_debug("Error on fcntl: %s", strerror(-err));
+ return err;
+ }
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
+ if (err) {
+ err = -errno;
+ pr_warn("couldn't get program info: %s", strerror(-err));
+ goto err;
+ }
+
+ if (!xdp_prog->prog_name) {
+ xdp_prog->prog_name = strdup(info.name);
+ if (!xdp_prog->prog_name) {
+ err = -ENOMEM;
+ pr_warn("failed to strdup program title");
+ goto err;
+ }
+ }
+
+ if (info.btf_id && !xdp_prog->btf) {
+ btf = btf__load_from_kernel_by_id(info.btf_id);
+ if (!btf) {
+ pr_warn("Couldn't get BTF for ID %ul\n", info.btf_id);
+ goto err;
+ }
+ xdp_prog->btf = btf;
+ }
+
+ pr_debug("Duplicated fd %d to %d for prog %s\n", fd, prog_fd, xdp_prog->prog_name);
+ memcpy(xdp_prog->prog_tag, info.tag, BPF_TAG_SIZE);
+ xdp_prog->load_time = info.load_time;
+ xdp_prog->prog_fd = prog_fd;
+ xdp_prog->prog_id = info.id;
+ xdp_prog->prog_type = info.type;
+
+ return 0;
+err:
+ close(prog_fd);
+ btf__free(btf);
+ return err;
+}
+
+struct xdp_program *xdp_program__from_fd(int fd)
+{
+ struct xdp_program *xdp_prog = NULL;
+ int err;
+
+ xdp_prog = xdp_program__new();
+ if (IS_ERR(xdp_prog))
+ return libxdp_err_ptr(PTR_ERR(xdp_prog), false);
+
+ err = xdp_program__fill_from_fd(xdp_prog, fd);
+ if (err)
+ goto err;
+
+ err = xdp_program__parse_btf(xdp_prog, NULL);
+ if (err && err != -ENOENT)
+ goto err;
+
+ return xdp_prog;
+err:
+ xdp_program__close(xdp_prog);
+ return libxdp_err_ptr(err, false);
+}
+
+struct xdp_program *xdp_program__from_id(__u32 id)
+{
+ struct xdp_program *prog;
+ int fd, err;
+
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0) {
+ err = -errno;
+ pr_warn("couldn't get program fd: %s", strerror(-err));
+ return libxdp_err_ptr(err, false);
+ }
+
+ prog = xdp_program__from_fd(fd);
+ if (IS_ERR(prog)) {
+ err = errno;
+ close(fd);
+ errno = err;
+ }
+ return prog;
+}
+
+struct xdp_program *xdp_program__from_pin(const char *pin_path)
+{
+ struct xdp_program *prog;
+ int fd, err;
+
+ fd = bpf_obj_get(pin_path);
+ if (fd < 0) {
+ err = -errno;
+ pr_warn("couldn't get program fd from %s: %s",
+ pin_path, strerror(-err));
+ return libxdp_err_ptr(err, false);
+ }
+
+ prog = xdp_program__from_fd(fd);
+ if (IS_ERR(prog)) {
+ err = errno;
+ close(fd);
+ errno = err;
+ }
+ return prog;
+}
+
+struct xdp_program *xdp_program__create(struct xdp_program_opts *opts)
+{
+ const char *pin_path, *prog_name, *find_filename, *open_filename;
+ struct bpf_object_open_opts *obj_opts;
+ struct xdp_program *prog;
+ struct bpf_object *obj;
+ __u32 id;
+ int fd;
+
+ if (!opts || !OPTS_VALID(opts, xdp_program_opts))
+ goto err;
+
+ obj = OPTS_GET(opts, obj, NULL);
+ obj_opts = OPTS_GET(opts, opts, NULL);
+ prog_name = OPTS_GET(opts, prog_name, NULL);
+ find_filename = OPTS_GET(opts, find_filename, NULL);
+ open_filename = OPTS_GET(opts, open_filename, NULL);
+ pin_path = OPTS_GET(opts, pin_path, NULL);
+ id = OPTS_GET(opts, id, 0);
+ fd = OPTS_GET(opts, fd, 0);
+
+ if (obj) { /* prog_name is optional */
+ if (obj_opts || find_filename || open_filename || pin_path || id || fd)
+ goto err;
+ prog = xdp_program__create_from_obj(obj, NULL, prog_name, true);
+ } else if (find_filename) { /* prog_name, obj_opts is optional */
+ if (obj || open_filename || pin_path || id || fd)
+ goto err;
+ prog = __xdp_program__find_file(find_filename, NULL, prog_name, obj_opts);
+ } else if (open_filename) { /* prog_name, obj_opts is optional */
+ if (obj || find_filename || pin_path || id || fd)
+ goto err;
+ prog = __xdp_program__open_file(open_filename, NULL, prog_name, obj_opts);
+ } else if (pin_path) {
+ if (obj || obj_opts || prog_name || find_filename || open_filename || id || fd)
+ goto err;
+ prog = xdp_program__from_pin(pin_path);
+ } else if (id) {
+ if (obj || obj_opts || prog_name || find_filename || open_filename || pin_path || fd)
+ goto err;
+ prog = xdp_program__from_id(id);
+ } else if (fd) {
+ if (obj || obj_opts || prog_name || find_filename || open_filename || pin_path || id)
+ goto err;
+ prog = xdp_program__from_fd(fd);
+ } else {
+ goto err;
+ }
+ if (IS_ERR(prog))
+ return libxdp_err_ptr(PTR_ERR(prog), true);
+ return prog;
+err:
+ return libxdp_err_ptr(-EINVAL, true);
+}
+
+static int cmp_xdp_programs(const void *_a, const void *_b)
+{
+ const struct xdp_program *a = *(struct xdp_program * const *)_a;
+ const struct xdp_program *b = *(struct xdp_program * const *)_b;
+ int cmp;
+
+ if (a->run_prio != b->run_prio)
+ return a->run_prio < b->run_prio ? -1 : 1;
+
+ cmp = strcmp(a->prog_name, b->prog_name);
+ if (cmp)
+ return cmp;
+
+ /* Hopefully the two checks above will resolve most comparisons; in
+ * cases where they don't, hopefully the checks below will keep the
+ * order stable.
+ */
+
+ /* loaded before non-loaded */
+ if (a->prog_fd >= 0 && b->prog_fd < 0)
+ return -1;
+ else if (a->prog_fd < 0 && b->prog_fd >= 0)
+ return 1;
+
+ /* two unloaded programs - compare by size */
+ if (a->bpf_prog && b->bpf_prog) {
+ size_t size_a, size_b;
+
+ size_a = bpf_program__insn_cnt(a->bpf_prog);
+ size_b = bpf_program__insn_cnt(b->bpf_prog);
+ if (size_a != size_b)
+ return size_a < size_b ? -1 : 1;
+ }
+
+ cmp = memcmp(a->prog_tag, b->prog_tag, BPF_TAG_SIZE);
+ if (cmp)
+ return cmp;
+
+ /* at this point we are really grasping for straws */
+ if (a->load_time != b->load_time)
+ return a->load_time < b->load_time ? -1 : 1;
+
+ return 0;
+}
+
+int xdp_program__pin(struct xdp_program *prog, const char *pin_path)
+{
+ if (IS_ERR_OR_NULL(prog) || prog->prog_fd < 0)
+ return libxdp_err(-EINVAL);
+
+ return libxdp_err(bpf_program__pin(prog->bpf_prog, pin_path));
+}
+
+static int xdp_program__load(struct xdp_program *prog)
+{
+ bool is_loaded, autoload;
+ int err;
+
+ if (IS_ERR_OR_NULL(prog))
+ return -EINVAL;
+
+ if (prog->prog_fd >= 0)
+ return -EEXIST;
+
+ if (!prog->bpf_obj || !prog->bpf_prog)
+ return -EINVAL;
+
+ /* bpf_program__set_autoload fails if the object is loaded, use this to
+ * detect if it is (since libbpf doesn't expose an API to discover
+ * this). This is necessary because of objects containing multiple
+ * programs: if a user creates xdp_program references to programs in
+ * such an object before loading it, they will get out of sync.
+ */
+ autoload = bpf_program__autoload(prog->bpf_prog);
+ is_loaded = !!bpf_program__set_autoload(prog->bpf_prog, autoload);
+ if (is_loaded) {
+ pr_debug("XDP program %s is already loaded with fd %d\n",
+ xdp_program__name(prog), bpf_program__fd(prog->bpf_prog));
+
+ prog->is_frags = !!(bpf_program__flags(prog->bpf_prog) & BPF_F_XDP_HAS_FRAGS);
+ } else {
+ /* We got an explicit load request, make sure we actually load */
+ if (!autoload)
+ bpf_program__set_autoload(prog->bpf_prog, true);
+
+ /* Make sure we sync is_frags to internal state variable (in case it was
+ * changed on bpf_prog since creation), and unset flag if we're loading
+ * an EXT program (the dispatcher will have the flag set instead in this
+ * case)
+ */
+ prog->is_frags = xdp_program__xdp_frags_support(prog);
+
+ if (bpf_program__type(prog->bpf_prog) == BPF_PROG_TYPE_EXT)
+ bpf_program__set_flags(prog->bpf_prog,
+ bpf_program__flags(prog->bpf_prog) & ~BPF_F_XDP_HAS_FRAGS);
+
+ err = bpf_object__load(prog->bpf_obj);
+ if (err)
+ return err;
+
+ pr_debug("Loaded XDP program %s, got fd %d\n",
+ xdp_program__name(prog), bpf_program__fd(prog->bpf_prog));
+ }
+
+ /* xdp_program__fill_from_fd() clones the fd and takes ownership of the clone */
+ return xdp_program__fill_from_fd(prog, bpf_program__fd(prog->bpf_prog));
+}
+
+struct xdp_program *xdp_program__clone(struct xdp_program *prog, unsigned int flags)
+{
+ if (IS_ERR_OR_NULL(prog) || flags || (prog->prog_fd < 0 && !prog->bpf_obj))
+ return libxdp_err_ptr(-EINVAL, false);
+
+ if (prog->prog_fd >= 0)
+ /* Clone a loaded program struct by creating a new object from the
+ program fd; xdp_program__fill_from_fd() already duplicates the fd
+ before filling in the object, so this creates a completely
+ independent xdp_program object.
+ */
+ return xdp_program__from_fd(prog->prog_fd);
+
+ return xdp_program__create_from_obj(prog->bpf_obj, NULL,
+ prog->prog_name, true);
+}
+
+
+static int xdp_program__attach_single(struct xdp_program *prog, int ifindex,
+ enum xdp_attach_mode mode)
+{
+ int err;
+
+ if (prog->prog_fd < 0) {
+ bpf_program__set_type(prog->bpf_prog, BPF_PROG_TYPE_XDP);
+ err = xdp_program__load(prog);
+ if (err)
+ return err;
+ }
+
+ if (prog->prog_fd < 0)
+ return -EINVAL;
+
+ return xdp_attach_fd(xdp_program__fd(prog), -1, ifindex, mode);
+}
+
+
+static int xdp_multiprog__main_fd(struct xdp_multiprog *mp)
+{
+ if (IS_ERR_OR_NULL(mp))
+ return -EINVAL;
+
+ if (!mp->main_prog)
+ return -ENOENT;
+
+ return mp->main_prog->prog_fd;
+}
+
+static __u32 xdp_multiprog__main_id(struct xdp_multiprog *mp)
+{
+ if (IS_ERR_OR_NULL(mp) || !mp->main_prog)
+ return 0;
+
+ return mp->main_prog->prog_id;
+}
+
+static int xdp_multiprog__hw_fd(struct xdp_multiprog *mp)
+{
+ if (IS_ERR_OR_NULL(mp))
+ return -EINVAL;
+
+ if (!mp->hw_prog)
+ return -ENOENT;
+
+ return mp->hw_prog->prog_fd;
+}
+
+static __u32 xdp_multiprog__hw_id(struct xdp_multiprog *mp)
+{
+ if (IS_ERR_OR_NULL(mp) || !mp->hw_prog)
+ return 0;
+
+ return mp->hw_prog->prog_id;
+}
+
+static int xdp_program__attach_hw(struct xdp_program *prog, int ifindex)
+{
+ struct bpf_map *map;
+
+ bpf_program__set_ifindex(prog->bpf_prog, ifindex);
+ bpf_object__for_each_map (map, prog->bpf_obj) {
+ bpf_map__set_ifindex(map, ifindex);
+ }
+
+ return xdp_program__attach_single(prog, ifindex, XDP_MODE_HW);
+}
+
+static int xdp_multiprog__detach_hw(struct xdp_multiprog *old_mp)
+{
+ int err = 0, hw_fd = -1, ifindex = -1;
+
+ if (!old_mp)
+ return -EINVAL;
+
+ ifindex = old_mp->ifindex;
+
+ hw_fd = xdp_multiprog__hw_fd(old_mp);
+ if (hw_fd < 0)
+ return -EINVAL;
+
+ err = xdp_attach_fd(-1, hw_fd, ifindex, XDP_MODE_HW);
+ if (err < 0)
+ return err;
+
+ pr_debug("Detached hw program on ifindex '%d'\n", ifindex);
+
+ return 0;
+}
+
+int xdp_program__attach_multi(struct xdp_program **progs, size_t num_progs,
+ int ifindex, enum xdp_attach_mode mode,
+ unsigned int flags)
+{
+ struct xdp_multiprog *old_mp = NULL, *mp;
+ int err = 0, retry_counter = 0;
+
+ if (!progs || !num_progs || flags)
+ return libxdp_err(-EINVAL);
+
+retry:
+ old_mp = xdp_multiprog__get_from_ifindex(ifindex);
+ if (IS_ERR_OR_NULL(old_mp))
+ old_mp = NULL;
+
+ if (mode == XDP_MODE_HW) {
+ bool old_hw_prog = xdp_multiprog__hw_prog(old_mp) != NULL;
+
+ xdp_multiprog__close(old_mp);
+
+ if (old_hw_prog) {
+ pr_warn("XDP program already loaded in HW mode on ifindex %d; "
+ "replacing HW mode programs not supported\n", ifindex);
+ return libxdp_err(-EEXIST);
+ }
+
+ if (num_progs > 1)
+ return libxdp_err(-EINVAL);
+
+ return libxdp_err(xdp_program__attach_hw(progs[0], ifindex));
+ }
+
+ if (num_progs == 1) {
+ char *envval;
+
+ envval = secure_getenv(XDP_SKIP_ENVVAR);
+ if (envval && envval[0] == '1' && envval[1] == '\0') {
+ pr_debug("Skipping dispatcher due to environment setting\n");
+ return libxdp_err(xdp_program__attach_single(progs[0], ifindex, mode));
+ }
+ }
+
+ mp = xdp_multiprog__generate(progs, num_progs, ifindex, old_mp, false);
+ if (IS_ERR(mp)) {
+ err = PTR_ERR(mp);
+ mp = NULL;
+ if (err == -EOPNOTSUPP) {
+ if (num_progs == 1) {
+ pr_info("Falling back to loading single prog "
+ "without dispatcher\n");
+ return libxdp_err(xdp_program__attach_single(progs[0], ifindex, mode));
+ } else {
+ pr_warn("Can't fall back to legacy load with %zu "
+ "programs\n%s\n", num_progs, dispatcher_feature_err);
+ }
+ }
+ goto out;
+ }
+
+ err = xdp_multiprog__pin(mp);
+ if (err) {
+ pr_warn("Failed to pin program: %s\n", strerror(-err));
+ goto out_close;
+ }
+
+ err = xdp_multiprog__attach(old_mp, mp, mode);
+ if (err) {
+ pr_debug("Failed to attach dispatcher on ifindex %d: %s\n",
+ ifindex, strerror(-err));
+ xdp_multiprog__unpin(mp);
+
+ if (err == -EAGAIN) {
+ if (++retry_counter > MAX_RETRY) {
+ pr_warn("Retried more than %d times, giving up\n",
+ retry_counter);
+ err = -EBUSY;
+ goto out_close;
+ }
+
+ pr_debug("Existing dispatcher replaced while building replacement, retrying.\n");
+ xdp_multiprog__close(old_mp);
+ xdp_multiprog__close(mp);
+ usleep(1 << retry_counter); /* exponential backoff */
+ goto retry;
+ }
+ goto out_close;
+ }
+
+ if (old_mp) {
+ err = xdp_multiprog__unpin(old_mp);
+ if (err) {
+ pr_warn("Failed to unpin old dispatcher: %s\n",
+ strerror(-err));
+ err = 0;
+ }
+ }
+
+out_close:
+ xdp_multiprog__close(mp);
+out:
+ if (old_mp)
+ xdp_multiprog__close(old_mp);
+ return libxdp_err(err);
+}
+
+int xdp_program__attach(struct xdp_program *prog, int ifindex,
+ enum xdp_attach_mode mode,
+ unsigned int flags)
+{
+ if (IS_ERR_OR_NULL(prog) || IS_ERR(prog))
+ return libxdp_err(-EINVAL);
+
+ return libxdp_err(xdp_program__attach_multi(&prog, 1, ifindex, mode, flags));
+}
+
+int xdp_program__detach_multi(struct xdp_program **progs, size_t num_progs,
+ int ifindex, enum xdp_attach_mode mode,
+ unsigned int flags)
+{
+ struct xdp_multiprog *new_mp, *mp;
+ int err = 0, retry_counter = 0;
+ size_t i;
+
+ if (flags || !num_progs || !progs)
+ return libxdp_err(-EINVAL);
+
+ retry:
+ new_mp = NULL;
+ mp = xdp_multiprog__get_from_ifindex(ifindex);
+ if (IS_ERR_OR_NULL(mp)) {
+ pr_warn("No XDP dispatcher found on ifindex %d\n", ifindex);
+ return libxdp_err(-ENOENT);
+ }
+
+ if (mode == XDP_MODE_HW || xdp_multiprog__is_legacy(mp)) {
+ __u32 id = (mode == XDP_MODE_HW) ?
+ xdp_multiprog__hw_id(mp) :
+ xdp_multiprog__main_id(mp);
+
+ if (num_progs > 1) {
+ pr_warn("Can only detach one program in legacy or HW mode\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (!xdp_program__id(progs[0])) {
+ pr_warn("Program 0 not loaded\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (id != xdp_program__id(progs[0])) {
+ pr_warn("Asked to unload prog %u but %u is loaded\n",
+ xdp_program__id(progs[0]), id);
+ err = -ENOENT;
+ goto out;
+ }
+ }
+
+ if (mode == XDP_MODE_HW) {
+ err = xdp_multiprog__detach_hw(mp);
+ goto out;
+ }
+
+ if (mode != XDP_MODE_UNSPEC && mp->attach_mode != mode) {
+ pr_warn("XDP dispatcher attached in mode %d, requested %d\n",
+ mp->attach_mode, mode);
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (xdp_multiprog__is_legacy(mp)) {
+ err = xdp_multiprog__attach(mp, NULL, mode);
+ goto out;
+ }
+
+ /* fist pass - check progs and count number still loaded */
+ for (i = 0; i < num_progs; i++) {
+ struct xdp_program *p = NULL;
+ bool found = false;
+
+ if (!progs[i]->prog_id) {
+ pr_warn("Program %zu not loaded\n", i);
+ err = -EINVAL;
+ goto out;
+ }
+
+ while ((p = xdp_multiprog__next_prog(p, mp))) {
+ if (progs[i]->prog_id == p->prog_id)
+ found = true;
+ }
+
+ if (!found) {
+ pr_warn("Couldn't find program with id %d on ifindex %d\n",
+ progs[i]->prog_id, ifindex);
+ err = -ENOENT;
+ goto out;
+ }
+ }
+
+ if (num_progs == mp->num_links) {
+ err = xdp_multiprog__attach(mp, NULL, mp->attach_mode);
+ if (err)
+ goto out;
+
+ err = xdp_multiprog__unpin(mp);
+ if (err)
+ goto out;
+ } else {
+ new_mp = xdp_multiprog__generate(progs, num_progs, ifindex, mp, true);
+ if (IS_ERR(new_mp)) {
+ err = PTR_ERR(new_mp);
+ if (err == -EOPNOTSUPP) {
+ pr_warn("Asked to detach %zu progs, but %zu loaded on ifindex %d, "
+ "and partial detach is not supported by the kernel.\n",
+ num_progs, mp->num_links, ifindex);
+ }
+ goto out;
+ }
+ err = xdp_multiprog__pin(new_mp);
+ if (err) {
+ pr_warn("Failed to pin program: %s\n", strerror(-err));
+ goto out;
+ }
+
+ err = xdp_multiprog__attach(mp, new_mp, mode);
+ if (err) {
+ pr_debug("Failed to attach dispatcher on ifindex %d: %s\n",
+ ifindex, strerror(-err));
+ xdp_multiprog__unpin(new_mp);
+ goto out;
+ }
+
+ err = xdp_multiprog__unpin(mp);
+ if (err) {
+ pr_warn("Failed to unpin old dispatcher: %s\n",
+ strerror(-err));
+ err = 0;
+ }
+ }
+
+out:
+ xdp_multiprog__close(mp);
+ xdp_multiprog__close(new_mp);
+ if (err == -EAGAIN) {
+ if (++retry_counter > MAX_RETRY) {
+ pr_warn("Retried more than %d times, giving up\n",
+ retry_counter);
+ return libxdp_err(-EBUSY);
+ }
+
+ pr_debug("Existing dispatcher replaced while building replacement, retrying.\n");
+ usleep(1 << retry_counter); /* exponential backoff */
+ goto retry;
+ }
+ return libxdp_err(err);
+}
+
+int xdp_program__detach(struct xdp_program *prog, int ifindex,
+ enum xdp_attach_mode mode,
+ unsigned int flags)
+{
+ if (IS_ERR_OR_NULL(prog) || IS_ERR(prog))
+ return -EINVAL;
+
+ return libxdp_err(xdp_program__detach_multi(&prog, 1, ifindex, mode, flags));
+}
+
+int xdp_program__test_run(struct xdp_program *prog, struct bpf_test_run_opts *opts, unsigned int flags)
+{
+ struct xdp_multiprog *mp = NULL;
+ int err, prog_fd;
+
+ if (IS_ERR_OR_NULL(prog) || flags)
+ return libxdp_err(-EINVAL);
+
+ if (prog->prog_fd < 0) {
+ err = xdp_program__load(prog);
+ if (err)
+ return libxdp_err(err);
+ }
+
+ if (prog->prog_type == BPF_PROG_TYPE_EXT) {
+ mp = xdp_multiprog__generate(&prog, 1, 0, NULL, false);
+ if (IS_ERR(mp)) {
+ err = PTR_ERR(mp);
+ if (err == -EOPNOTSUPP)
+ pr_warn("Program was already attached to a dispatcher, "
+ "and kernel doesn't support multiple attachments\n");
+ return libxdp_err(err);
+ }
+
+ prog_fd = xdp_multiprog__main_fd(mp);
+ } else if (prog->prog_type != BPF_PROG_TYPE_XDP) {
+ pr_warn("Can't test_run non-XDP programs\n");
+ return libxdp_err(-ENOEXEC);
+ } else {
+ prog_fd = prog->prog_fd;
+ }
+
+ err = bpf_prog_test_run_opts(prog_fd, opts);
+ if (err)
+ err = -errno;
+
+ if (mp)
+ xdp_multiprog__close(mp);
+
+ return libxdp_err(err);
+}
+
+static int xdp_multiprog__check_kernel_frags_support(struct xdp_multiprog *mp)
+{
+ struct xdp_program *test_prog;
+ int err;
+
+ pr_debug("Checking for kernel frags support\n");
+ test_prog = __xdp_program__find_file("xdp-dispatcher.o", NULL, "xdp_pass", NULL);
+ if (IS_ERR(test_prog)) {
+ err = PTR_ERR(test_prog);
+ pr_warn("Couldn't open BPF file xdp-dispatcher.o\n");
+ return err;
+ }
+
+ bpf_program__set_flags(test_prog->bpf_prog, BPF_F_XDP_HAS_FRAGS);
+ err = xdp_program__load(test_prog);
+ if (!err) {
+ pr_debug("Kernel supports XDP programs with frags\n");
+ mp->kernel_frags_support = true;
+ } else {
+ pr_debug("Kernel DOES NOT support XDP programs with frags\n");
+ }
+ xdp_program__close(test_prog);
+
+ return 0;
+}
+
+void xdp_multiprog__close(struct xdp_multiprog *mp)
+{
+ struct xdp_program *p, *next = NULL;
+
+ if (IS_ERR_OR_NULL(mp))
+ return;
+
+ xdp_program__close(mp->main_prog);
+ for (p = mp->first_prog; p; p = next) {
+ next = p->next;
+ xdp_program__close(p);
+ }
+ xdp_program__close(mp->hw_prog);
+
+ free(mp);
+}
+
+static struct xdp_multiprog *xdp_multiprog__new(int ifindex)
+{
+ struct xdp_multiprog *mp;
+
+ mp = malloc(sizeof *mp);
+ if (!mp)
+ return ERR_PTR(-ENOMEM);
+ memset(mp, 0, sizeof(*mp));
+ mp->ifindex = ifindex;
+ mp->version = XDP_DISPATCHER_VERSION;
+
+ return mp;
+}
+
+static int xdp_multiprog__load(struct xdp_multiprog *mp)
+{
+ char buf[100];
+ int err = 0;
+
+ if (IS_ERR_OR_NULL(mp) || !mp->main_prog || mp->is_loaded || xdp_multiprog__is_legacy(mp))
+ return -EINVAL;
+
+ pr_debug("Loading multiprog dispatcher for %d programs %s frags support\n",
+ mp->config.num_progs_enabled,
+ mp->config.is_xdp_frags ? "with" : "without");
+
+ if (mp->config.is_xdp_frags)
+ xdp_program__set_xdp_frags_support(mp->main_prog, true);
+
+ err = xdp_program__load(mp->main_prog);
+ if (err) {
+ pr_info("Failed to load dispatcher: %s\n",
+ libxdp_strerror_r(err, buf, sizeof(buf)));
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ mp->is_loaded = true;
+out:
+ return err;
+}
+
+int check_xdp_prog_version(const struct btf *btf, const char *name, __u32 *version)
+{
+ const struct btf_type *sec, *def;
+
+ sec = btf_get_datasec(btf, XDP_METADATA_SECTION);
+ if (!sec)
+ return libxdp_err(-ENOENT);
+
+ def = btf_get_section_var(btf, sec, name, BTF_KIND_PTR);
+ if (IS_ERR(def))
+ return libxdp_err(PTR_ERR(def));
+
+ if (!get_field_int(btf, name, def, version))
+ return libxdp_err(-ENOENT);
+
+ return 0;
+}
+
+static int check_dispatcher_version(struct xdp_multiprog *mp,
+ const char *prog_name, const struct btf *btf,
+ __u32 nr_maps, __u32 map_id)
+{
+ __u32 version = 0, map_key = 0, info_len = sizeof(struct bpf_map_info);
+ const char *name = "dispatcher_version";
+ struct bpf_map_info map_info = {};
+ int err, map_fd, i;
+ __u8 *buf = NULL;
+
+ if (prog_name && strcmp(prog_name, "xdp_dispatcher")) {
+ pr_debug("XDP program with name '%s' is not a dispatcher\n", prog_name);
+ return -ENOENT;
+ }
+
+ if (nr_maps != 1) {
+ pr_warn("Expected a single map for dispatcher, found %u\n", nr_maps);
+ return -ENOENT;
+ }
+
+ map_fd = bpf_map_get_fd_by_id(map_id);
+ if (map_fd < 0) {
+ err = -errno;
+ pr_warn("Could not get config map fd for id %u: %s\n", map_id, strerror(-err));
+ return err;
+ }
+
+ err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+ if (err) {
+ err = -errno;
+ pr_warn("Couldn't get map info: %s\n", strerror(-err));
+ goto out;
+ }
+
+ if (map_info.key_size != sizeof(map_key) ||
+ map_info.value_size < 2 ||
+ map_info.max_entries != 1 ||
+ !(map_info.map_flags & BPF_F_RDONLY_PROG)) {
+ pr_warn("Map flags or key/value size mismatch\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ buf = malloc(map_info.value_size);
+ if (!buf) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = bpf_map_lookup_elem(map_fd, &map_key, buf);
+ if (err) {
+ err = -errno;
+ pr_warn("Could not lookup map value: %s\n", strerror(-err));
+ goto out;
+ }
+
+ if (buf[0] == XDP_DISPATCHER_MAGIC) {
+ version = buf[1];
+ } else {
+ err = check_xdp_prog_version(btf, name, &version);
+ if (err)
+ goto out;
+ }
+
+ switch (version) {
+ case XDP_DISPATCHER_VERSION_V1:
+ {
+ struct xdp_dispatcher_config_v1 *config = (void *)buf;
+
+ for (i = 0; i < MAX_DISPATCHER_ACTIONS; i++) {
+ mp->config.chain_call_actions[i] = config->chain_call_actions[i];
+ mp->config.run_prios[i] = config->run_prios[i];
+ }
+ mp->config.num_progs_enabled = config->num_progs_enabled;
+ break;
+ }
+ case XDP_DISPATCHER_VERSION:
+ if (map_info.value_size != sizeof(mp->config)) {
+ pr_warn("Dispatcher version matches, but map size %u != expected %zu\n",
+ map_info.value_size, sizeof(mp->config));
+ err = -EINVAL;
+ goto out;
+ }
+ memcpy(&mp->config, buf, sizeof(mp->config));
+ break;
+
+ default:
+ pr_warn("XDP dispatcher version %u higher than supported %u\n",
+ version, XDP_DISPATCHER_VERSION);
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ pr_debug("Verified XDP dispatcher version %d <= %d\n",
+ version, XDP_DISPATCHER_VERSION);
+
+ mp->version = version;
+
+out:
+ close(map_fd);
+ free(buf);
+ return err;
+}
+
+static int xdp_multiprog__link_pinned_progs(struct xdp_multiprog *mp)
+{
+ char buf[PATH_MAX], pin_path[PATH_MAX];
+ struct xdp_program *prog, *p = NULL;
+ const char *bpffs_dir;
+ int err, lock_fd, i;
+ struct stat sb = {};
+
+ if (IS_ERR_OR_NULL(mp) || mp->first_prog)
+ return -EINVAL;
+
+ bpffs_dir = get_bpffs_dir();
+ if (IS_ERR(bpffs_dir))
+ return PTR_ERR(bpffs_dir);
+
+ err = try_snprintf(pin_path, sizeof(pin_path), "%s/dispatch-%d-%d",
+ bpffs_dir, mp->ifindex, mp->main_prog->prog_id);
+ if (err)
+ return err;
+
+ lock_fd = xdp_lock_acquire();
+ if (lock_fd < 0)
+ return lock_fd;
+
+ pr_debug("Reading multiprog component programs from pinned directory\n");
+ err = stat(pin_path, &sb);
+ if (err) {
+ err = -errno;
+ pr_debug("Couldn't stat pin_path '%s': %s\n",
+ pin_path, strerror(-err));
+ goto out;
+ }
+
+ for (i = 0; i < mp->config.num_progs_enabled; i++) {
+
+ err = try_snprintf(buf, sizeof(buf), "%s/prog%d-prog",
+ pin_path, i);
+ if (err)
+ goto err;
+
+ prog = xdp_program__from_pin(buf);
+ if (IS_ERR(prog)) {
+ err = PTR_ERR(prog);
+ goto err;
+ }
+ err = try_snprintf(buf, sizeof(buf), "prog%d", i);
+ if (err)
+ goto err;
+ prog->attach_name = strdup(buf);
+ if (!prog->attach_name) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ prog->chain_call_actions = (mp->config.chain_call_actions[i] &
+ ~(1U << XDP_DISPATCHER_RETVAL));
+ prog->run_prio = mp->config.run_prios[i];
+ prog->is_frags = !!(mp->config.program_flags[i] & BPF_F_XDP_HAS_FRAGS);
+
+ if (!p) {
+ mp->first_prog = prog;
+ p = mp->first_prog;
+ } else {
+ p->next = prog;
+ p = prog;
+ }
+ mp->num_links++;
+ }
+
+out:
+ xdp_lock_release(lock_fd);
+ return err;
+err:
+ prog = mp->first_prog;
+ while (prog) {
+ p = prog->next;
+ xdp_program__close(prog);
+ prog = p;
+ }
+ mp->first_prog = NULL;
+ goto out;
+}
+
+static int xdp_multiprog__fill_from_fd(struct xdp_multiprog *mp,
+ int prog_fd, int hw_fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len, map_id = 0;
+ struct xdp_program *prog;
+ struct btf *btf = NULL;
+ int err = 0;
+
+ if (IS_ERR_OR_NULL(mp))
+ return -EINVAL;
+
+ if (prog_fd > 0) {
+ info.nr_map_ids = 1;
+ info.map_ids = (uintptr_t)&map_id;
+ info_len = sizeof(info);
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ if (err) {
+ pr_warn("couldn't get program info for fd: %d", prog_fd);
+ return -EINVAL;
+ }
+
+ if (!info.btf_id) {
+ pr_debug("No BTF for prog ID %u\n", info.id);
+ mp->is_legacy = true;
+ goto legacy;
+ }
+
+ btf = btf__load_from_kernel_by_id(info.btf_id);
+ if (!btf) {
+ pr_warn("Couldn't get BTF for ID %ul\n", info.btf_id);
+ goto out;
+ }
+
+ err = check_dispatcher_version(mp, info.name, btf,
+ info.nr_map_ids, map_id);
+ if (err) {
+ if (err != -ENOENT) {
+ pr_warn("Dispatcher version check failed for ID %d\n",
+ info.id);
+ goto out;
+ } else {
+ /* no dispatcher, mark as legacy prog */
+ mp->is_legacy = true;
+ err = 0;
+ goto legacy;
+ }
+ }
+
+legacy:
+ prog = xdp_program__from_fd(prog_fd);
+ if (IS_ERR(prog)) {
+ err = PTR_ERR(prog);
+ goto out;
+ }
+
+ mp->main_prog = prog;
+
+ if (!xdp_multiprog__is_legacy(mp)) {
+ err = xdp_multiprog__link_pinned_progs(mp);
+ if (err) {
+ pr_warn("Unable to read pinned progs: %s\n", strerror(-err));
+ mp->is_legacy = true;
+ err = 0;
+ }
+ }
+
+ pr_debug("Found %s with id %d and %zu component progs\n",
+ xdp_multiprog__is_legacy(mp) ? "legacy program" : "multiprog",
+ mp->main_prog->prog_id, mp->num_links);
+ }
+
+ if (hw_fd > 0) {
+ prog = xdp_program__from_fd(hw_fd);
+ if (IS_ERR(prog)) {
+ err = PTR_ERR(prog);
+ goto out;
+ }
+
+ if (mp->first_prog == NULL)
+ mp->is_legacy = true;
+
+ mp->hw_prog = prog;
+
+ pr_debug("Found hw program with id %d\n", mp->hw_prog->prog_id);
+ }
+
+ mp->is_loaded = true;
+
+out:
+ btf__free(btf);
+ return err;
+}
+
+static struct xdp_multiprog *xdp_multiprog__from_fd(int fd, int hw_fd,
+ int ifindex)
+{
+ struct xdp_multiprog *mp = NULL;
+ int err;
+
+ mp = xdp_multiprog__new(ifindex);
+ if (IS_ERR(mp))
+ return mp;
+
+ err = xdp_multiprog__fill_from_fd(mp, fd, hw_fd);
+ if (err)
+ goto err;
+
+ return mp;
+err:
+ xdp_multiprog__close(mp);
+ return ERR_PTR(err);
+}
+
+
+static struct xdp_multiprog *xdp_multiprog__from_id(__u32 id, __u32 hw_id,
+ int ifindex)
+{
+ struct xdp_multiprog *mp;
+ int hw_fd = 0;
+ int fd = 0;
+ int err;
+
+ if (id) {
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0) {
+ err = -errno;
+ pr_warn("couldn't get program fd: %s", strerror(-err));
+ goto err;
+ }
+ }
+
+ if (hw_id) {
+ hw_fd = bpf_prog_get_fd_by_id(hw_id);
+ if (hw_fd < 0) {
+ err = -errno;
+ pr_warn("couldn't get program fd: %s", strerror(-err));
+ goto err;
+ }
+ }
+
+ mp = xdp_multiprog__from_fd(fd, hw_fd, ifindex);
+ if (IS_ERR(mp)) {
+ err = PTR_ERR(mp);
+ goto err;
+ }
+ return mp;
+err:
+ if (fd > 0)
+ close(fd);
+ if (hw_fd > 0)
+ close(hw_fd);
+ return ERR_PTR(err);
+}
+
+static int xdp_get_ifindex_prog_id(int ifindex, __u32 *prog_id,
+ __u32 *hw_prog_id, enum xdp_attach_mode *mode)
+{
+ __u32 _prog_id, _drv_prog_id, _hw_prog_id, _skb_prog_id;
+ enum xdp_attach_mode _mode;
+ __u8 _attach_mode;
+
+ if (!hw_prog_id)
+ hw_prog_id = &_prog_id;
+ if (!mode)
+ mode = &_mode;
+ int err;
+#ifdef HAVE_LIBBPF_BPF_XDP_ATTACH
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+ err = bpf_xdp_query(ifindex, 0, &opts);
+ if (err)
+ return err;
+
+ _drv_prog_id = opts.drv_prog_id;
+ _skb_prog_id = opts.skb_prog_id;
+ _hw_prog_id = opts.hw_prog_id;
+ _attach_mode = opts.attach_mode;
+#else
+ struct xdp_link_info xinfo = {};
+ err = bpf_get_link_xdp_info(ifindex, &xinfo, sizeof(xinfo), 0);
+ if (err)
+ return err;
+
+ _drv_prog_id = xinfo.drv_prog_id;
+ _skb_prog_id = xinfo.skb_prog_id;
+ _hw_prog_id = xinfo.hw_prog_id;
+ _attach_mode = xinfo.attach_mode;
+#endif
+ switch (_attach_mode) {
+ case XDP_ATTACHED_SKB:
+ *prog_id = _skb_prog_id;
+ *mode = XDP_MODE_SKB;
+ break;
+ case XDP_ATTACHED_DRV:
+ *prog_id = _drv_prog_id;
+ *mode = XDP_MODE_NATIVE;
+ break;
+ case XDP_ATTACHED_MULTI:
+ if (_drv_prog_id) {
+ *prog_id = _drv_prog_id;
+ *mode = XDP_MODE_NATIVE;
+ } else if (_skb_prog_id) {
+ *prog_id = _skb_prog_id;
+ *mode = XDP_MODE_SKB;
+ }
+ *hw_prog_id = _hw_prog_id;
+ break;
+ case XDP_ATTACHED_HW:
+ *hw_prog_id = _hw_prog_id;
+ *mode = XDP_MODE_UNSPEC;
+ break;
+ case XDP_ATTACHED_NONE:
+ default:
+ *mode = XDP_MODE_UNSPEC;
+ break;
+ }
+ return 0;
+}
+
+struct xdp_multiprog *xdp_multiprog__get_from_ifindex(int ifindex)
+{
+ enum xdp_attach_mode mode = XDP_MODE_UNSPEC;
+ int err, retry_counter = 0;
+ struct xdp_multiprog *mp;
+ __u32 hw_prog_id = 0;
+ __u32 prog_id = 0;
+
+retry:
+ err = xdp_get_ifindex_prog_id(ifindex, &prog_id, &hw_prog_id, &mode);
+ if (err)
+ return libxdp_err_ptr(err, false);
+
+ if (!prog_id && !hw_prog_id)
+ return libxdp_err_ptr(-ENOENT, false);
+
+ mp = xdp_multiprog__from_id(prog_id, hw_prog_id, ifindex);
+ if (!IS_ERR_OR_NULL(mp))
+ mp->attach_mode = mode;
+ else if (IS_ERR(mp)) {
+ err = PTR_ERR(mp);
+ if (err == -ENOENT) {
+ if (++retry_counter > MAX_RETRY) {
+ pr_warn("Retried more than %d times, giving up\n",
+ retry_counter);
+ err = -EBUSY;
+ } else {
+ pr_debug("Dispatcher disappeared before we could load it, retrying.\n");
+ usleep(1 << retry_counter); /* exponential backoff */
+ goto retry;
+ }
+ }
+
+ mp = libxdp_err_ptr(err, false);
+ } else
+ mp = libxdp_err_ptr(0, true);
+ return mp;
+}
+
+int libxdp_check_kern_compat(void)
+{
+ struct xdp_program *tgt_prog = NULL, *test_prog = NULL;
+ const char *bpffs_dir;
+ char buf[PATH_MAX];
+ int lock_fd;
+ int err = 0;
+
+ bpffs_dir = get_bpffs_dir();
+ if (IS_ERR(bpffs_dir)) {
+ err = PTR_ERR(bpffs_dir);
+ pr_warn("Can't use dispatcher without a working bpffs\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (kernel_compat > COMPAT_UNKNOWN)
+ goto skip;
+
+ pr_debug("Checking dispatcher compatibility\n");
+
+ tgt_prog = __xdp_program__find_file("xdp-dispatcher.o", NULL, "xdp_pass", NULL);
+ if (IS_ERR(tgt_prog)) {
+ err = PTR_ERR(tgt_prog);
+ pr_warn("Couldn't open BPF file xdp-dispatcher.o\n");
+ return err;
+ }
+
+ test_prog = __xdp_program__find_file("xdp-dispatcher.o", NULL, "xdp_pass", NULL);
+ if (IS_ERR(test_prog)) {
+ err = PTR_ERR(test_prog);
+ pr_warn("Couldn't open BPF file xdp-dispatcher.o\n");
+ return err;
+ }
+
+ err = xdp_program__load(tgt_prog);
+ if (err) {
+ pr_debug("Couldn't load XDP program: %s\n", strerror(-err));
+ goto out;
+ }
+
+ err = bpf_program__set_attach_target(test_prog->bpf_prog,
+ tgt_prog->prog_fd,
+ "xdp_pass");
+ if (err) {
+ pr_debug("Failed to set attach target: %s\n", strerror(-err));
+ goto out;
+ }
+
+ bpf_program__set_type(test_prog->bpf_prog, BPF_PROG_TYPE_EXT);
+ bpf_program__set_expected_attach_type(test_prog->bpf_prog, 0);
+ err = xdp_program__load(test_prog);
+ if (err) {
+ char buf[100] = {};
+ libxdp_strerror(err, buf, sizeof(buf));
+ pr_debug("Failed to load program %s: %s\n",
+ xdp_program__name(test_prog), buf);
+ goto out;
+ }
+
+ test_prog->link_fd = bpf_raw_tracepoint_open(NULL, test_prog->prog_fd);
+ if (test_prog->link_fd < 0) {
+ err = -errno;
+ pr_debug("Failed to attach test program to dispatcher: %s\n",
+ strerror(-err));
+ goto out;
+ }
+
+ err = try_snprintf(buf, sizeof(buf), "%s/prog-test-link-%i-%i",
+ bpffs_dir, IFINDEX_LO, test_prog->prog_id);
+ if (err)
+ goto out;
+
+ lock_fd = xdp_lock_acquire();
+ if (lock_fd < 0) {
+ err = lock_fd;
+ goto out;
+ }
+
+ err = bpf_obj_pin(test_prog->link_fd, buf);
+ if (err) {
+ err = -errno;
+ pr_warn("Couldn't pin link FD at %s: %s\n", buf, strerror(-err));
+ goto out_locked;
+ }
+ err = unlink(buf);
+ if (err) {
+ err = -errno;
+ pr_warn("Couldn't unlink file %s: %s\n", buf, strerror(-err));
+ goto out_locked;
+ }
+
+ kernel_compat = COMPAT_SUPPORTED;
+out_locked:
+ xdp_lock_release(lock_fd);
+out:
+ xdp_program__close(test_prog);
+ xdp_program__close(tgt_prog);
+ if (err) {
+ pr_info("Compatibility check for dispatcher program failed: %s\n",
+ strerror(-err));
+ kernel_compat = COMPAT_UNSUPPORTED;
+ }
+skip:
+ return kernel_compat == COMPAT_SUPPORTED ? 0 : -EOPNOTSUPP;
+}
+
+static int find_prog_btf_id(const char *name, __u32 attach_prog_fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_size = sizeof(info);
+ int err = -EINVAL;
+ struct btf *btf;
+
+ err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_size);
+ if (err) {
+ err = -errno;
+ pr_warn("failed get_prog_info for FD %d\n", attach_prog_fd);
+ return err;
+ }
+ if (!info.btf_id) {
+ pr_warn("The target program doesn't have BTF\n");
+ return -EINVAL;
+ }
+ btf = btf__load_from_kernel_by_id(info.btf_id);
+ if (!btf) {
+ pr_warn("Failed to get BTF of the program\n");
+ return -EINVAL;
+ }
+ err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
+ btf__free(btf);
+ if (err <= 0)
+ pr_warn("%s is not found in prog's BTF\n", name);
+
+ return err;
+}
+
+static int xdp_multiprog__link_prog(struct xdp_multiprog *mp,
+ struct xdp_program *prog)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ struct xdp_program *new_prog, *p;
+ bool was_loaded = false;
+ char buf[PATH_MAX];
+ int err, lfd = -1;
+ char *attach_func;
+ __s32 btf_id;
+
+ if (IS_ERR_OR_NULL(mp) || IS_ERR_OR_NULL(prog) || !mp->is_loaded ||
+ mp->num_links >= mp->config.num_progs_enabled)
+ return -EINVAL;
+
+ err = libxdp_check_kern_compat();
+ if (err)
+ return err;
+
+ if (!prog->btf) {
+ pr_warn("Program %s has no BTF information, so we can't load it as multiprog\n",
+ xdp_program__name(prog));
+ return -EOPNOTSUPP;
+ }
+
+ pr_debug("Linking prog %s as multiprog entry %zu\n",
+ xdp_program__name(prog), mp->num_links);
+
+ err = try_snprintf(buf, sizeof(buf), "prog%zu", mp->num_links);
+ if (err)
+ goto err;
+
+
+ if (mp->config.num_progs_enabled == 1)
+ attach_func = "xdp_dispatcher";
+ else
+ attach_func = buf;
+
+ btf_id = find_prog_btf_id(attach_func, mp->main_prog->prog_fd);
+ if (btf_id <= 0) {
+ err = btf_id;
+ pr_debug("Couldn't find BTF ID for %s: %d\n", attach_func, err);
+ goto err;
+ }
+
+ if (prog->prog_fd < 0) {
+ err = bpf_program__set_attach_target(prog->bpf_prog,
+ mp->main_prog->prog_fd,
+ attach_func);
+ if (err) {
+ pr_debug("Failed to set attach target: %s\n", strerror(-err));
+ goto err;
+ }
+
+ bpf_program__set_type(prog->bpf_prog, BPF_PROG_TYPE_EXT);
+ bpf_program__set_expected_attach_type(prog->bpf_prog, 0);
+ err = xdp_program__load(prog);
+ if (err) {
+ if (err == -E2BIG) {
+ pr_debug("Got 'argument list too long' error while "
+ "loading component program.\n");
+ err = -EOPNOTSUPP;
+ } else {
+ char buf[100] = {};
+ libxdp_strerror(err, buf, sizeof(buf));
+ pr_debug("Failed to load program %s: %s\n",
+ xdp_program__name(prog), buf);
+ }
+ goto err;
+ }
+
+ was_loaded = true;
+ }
+
+ /* clone the xdp_program ref so we can keep it */
+ new_prog = xdp_program__clone(prog, 0);
+ if (IS_ERR(new_prog)) {
+ err = PTR_ERR(new_prog);
+ pr_warn("Failed to clone xdp_program: %s\n", strerror(-err));
+ goto err;
+ }
+
+ opts.target_btf_id = btf_id;
+
+ /* The attach will disappear once this fd is closed */
+ lfd = bpf_link_create(new_prog->prog_fd, mp->main_prog->prog_fd, 0, &opts);
+ if (lfd < 0) {
+ err = -errno;
+ if (err == -EINVAL) {
+ if (!was_loaded) {
+ pr_debug("Kernel doesn't support re-attaching "
+ "freplace programs.\n");
+ err = -EOPNOTSUPP;
+ } else {
+ pr_debug("Got EINVAL, retrying "
+ "raw_tracepoint_open() without target\n");
+ /* we just loaded the program, so should be able
+ * to attach the old way */
+ lfd = bpf_raw_tracepoint_open(NULL, new_prog->prog_fd);
+ if (lfd < 0)
+ err = -errno;
+ else
+ goto attach_ok;
+ }
+ }
+ if (err == -EPERM) {
+ pr_debug("Got 'permission denied' error while "
+ "attaching program to dispatcher.\n%s\n",
+ dispatcher_feature_err);
+ err = -EOPNOTSUPP;
+ } else {
+ pr_warn("Failed to attach program %s to dispatcher: %s\n",
+ xdp_program__name(new_prog), strerror(-err));
+ }
+ goto err_free;
+ }
+
+attach_ok:
+ new_prog->attach_name = strdup(buf);
+ if (!new_prog->attach_name) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ pr_debug(
+ "Attached prog '%s' with priority %d in dispatcher entry '%s' with fd %d\n",
+ xdp_program__name(new_prog), xdp_program__run_prio(new_prog),
+ new_prog->attach_name, lfd);
+ new_prog->link_fd = lfd;
+
+ if (!mp->first_prog) {
+ mp->first_prog = new_prog;
+ } else {
+ p = mp->first_prog;
+ while (p->next)
+ p = p->next;
+ p->next = new_prog;
+ }
+
+ mp->num_links++;
+ return 0;
+
+err_free:
+ if (lfd >= 0)
+ close(lfd);
+ xdp_program__close(new_prog);
+err:
+ return err;
+}
+
+/*
+ * xdp_multiprog__generate - generate a new multiprog dispatcher
+ *
+ * This generates a new multiprog dispatcher for the programs in progs. If
+ * old_mp is set, the progs will either be added to or removed from the existing
+ * set of programs in the dispatcher represented by old_mp, depending on the
+ * value of remove_progs. If old_mp is not set, a new dispatcher will be created
+ * just holding the programs in progs. In both cases, the full set of programs
+ * will be sorted according to their run order (see cmp_xdp_programs).
+ *
+ * When called with remove_progs set, the caller is responsible for checking
+ * that all the programs in progs are actually present in old_mp.
+ */
+static struct xdp_multiprog *xdp_multiprog__generate(struct xdp_program **progs,
+ size_t num_progs,
+ int ifindex,
+ struct xdp_multiprog *old_mp,
+ bool remove_progs)
+{
+ size_t num_new_progs = old_mp ? old_mp->num_links : 0;
+ struct xdp_program **new_progs = NULL;
+ struct xdp_program *dispatcher;
+ struct xdp_multiprog *mp;
+ struct bpf_map *map;
+ size_t i;
+ int err;
+
+ if (!progs || !num_progs || (!old_mp && remove_progs))
+ return ERR_PTR(-EINVAL);
+
+ num_new_progs += remove_progs ? -num_progs : num_progs;
+
+ if (num_new_progs > MAX_DISPATCHER_ACTIONS)
+ return ERR_PTR(-E2BIG);
+
+ pr_debug("Generating multi-prog dispatcher for %zu programs\n",
+ num_new_progs);
+
+ mp = xdp_multiprog__new(ifindex);
+ if (IS_ERR(mp))
+ return mp;
+
+ err = xdp_multiprog__check_kernel_frags_support(mp);
+ if (err)
+ goto err;
+
+ if (old_mp) {
+ struct xdp_program *prog;
+ size_t j;
+
+ if (xdp_multiprog__is_legacy(old_mp)) {
+ pr_warn("Existing program is not using a dispatcher, can't replace; unload first\n");
+ err = -EBUSY;
+ goto err;
+ }
+
+ if (old_mp->version < mp->version) {
+ pr_warn("Existing dispatcher version %u is older than our version %u. "
+ "Refusing transparent upgrade, unload first\n",
+ old_mp->version, mp->version);
+ err = -EBUSY;
+ goto err;
+ }
+
+ new_progs = calloc(num_new_progs, sizeof(*new_progs));
+ if (!new_progs) {
+ err = -ENOMEM;
+ goto err;
+ }
+
+ for (i = 0, prog = old_mp->first_prog; prog; prog = prog->next) {
+ if (remove_progs) {
+ /* remove_new means new_progs is an array of
+ * programs we should remove from old_mp instead
+ * of adding them.
+ */
+ bool found = false;
+
+ for (j = 0; j < num_progs; j++)
+ if (progs[j]->prog_id == prog->prog_id)
+ found = true;
+ if (found)
+ continue;
+
+ /* Sanity check: caller should ensure all
+ * programs to remove actually exist; check here
+ * anyway to ensure we don't overrun the array
+ * if this is not done correctly.
+ */
+ if (i >= num_new_progs) {
+ pr_warn("Not all programs to remove were found\n");
+ err = -EINVAL;
+ goto err;
+ }
+ }
+ new_progs[i++] = prog;
+ }
+ if (!remove_progs)
+ for (j = 0; i < num_new_progs; i++, j++)
+ new_progs[i] = progs[j];
+
+ } else {
+ new_progs = progs;
+ }
+
+ if (num_new_progs > 1)
+ qsort(new_progs, num_new_progs, sizeof(*new_progs), cmp_xdp_programs);
+
+ dispatcher = __xdp_program__find_file("xdp-dispatcher.o",
+ NULL, "xdp_dispatcher", NULL);
+ if (IS_ERR(dispatcher)) {
+ err = PTR_ERR(dispatcher);
+ pr_warn("Couldn't open BPF file 'xdp-dispatcher.o'\n");
+ goto err;
+ }
+
+ mp->main_prog = dispatcher;
+
+ map = bpf_object__next_map(mp->main_prog->bpf_obj, NULL);
+ if (!map) {
+ pr_warn("Couldn't find rodata map in object file 'xdp-dispatcher.o'\n");
+ err = -ENOENT;
+ goto err;
+ }
+
+ mp->config.magic = XDP_DISPATCHER_MAGIC;
+ mp->config.dispatcher_version = mp->version;
+ mp->config.num_progs_enabled = num_new_progs;
+ mp->config.is_xdp_frags = mp->kernel_frags_support;
+ for (i = 0; i < num_new_progs; i++) {
+ mp->config.chain_call_actions[i] =
+ (new_progs[i]->chain_call_actions |
+ (1U << XDP_DISPATCHER_RETVAL));
+ mp->config.run_prios[i] = new_progs[i]->run_prio;
+
+ if (xdp_program__xdp_frags_support(new_progs[i]))
+ mp->config.program_flags[i] = BPF_F_XDP_HAS_FRAGS;
+ else
+ mp->config.is_xdp_frags = false;
+ }
+
+ if (mp->kernel_frags_support) {
+ if (!mp->config.is_xdp_frags)
+ pr_debug("At least one attached program doesn't "
+ "support frags, disabling it for the "
+ "dispatcher\n");
+ else
+ pr_debug("All attached programs support frags, "
+ "enabling it for the dispatcher\n");
+ }
+
+ err = bpf_map__set_initial_value(map, &mp->config, sizeof(mp->config));
+ if (err) {
+ pr_warn("Failed to set rodata for object file 'xdp-dispatcher.o'\n");
+ goto err;
+ }
+
+ err = xdp_multiprog__load(mp);
+ if (err)
+ goto err;
+
+ for (i = 0; i < num_new_progs; i++) {
+ err = xdp_multiprog__link_prog(mp, new_progs[i]);
+ if (err)
+ goto err;
+ }
+
+ if (old_mp)
+ free(new_progs);
+
+ return mp;
+
+err:
+ if (old_mp)
+ free(new_progs);
+ xdp_multiprog__close(mp);
+ return ERR_PTR(err);
+}
+
+static int xdp_multiprog__pin(struct xdp_multiprog *mp)
+{
+ char pin_path[PATH_MAX], buf[PATH_MAX];
+ struct xdp_program *prog;
+ const char *bpffs_dir;
+ int err = 0, lock_fd;
+
+ if (IS_ERR_OR_NULL(mp) || xdp_multiprog__is_legacy(mp))
+ return -EINVAL;
+
+ bpffs_dir = get_bpffs_dir();
+ if (IS_ERR(bpffs_dir))
+ return PTR_ERR(bpffs_dir);
+
+ err = try_snprintf(pin_path, sizeof(pin_path), "%s/dispatch-%d-%d",
+ bpffs_dir, mp->ifindex, mp->main_prog->prog_id);
+ if (err)
+ return err;
+
+ lock_fd = xdp_lock_acquire();
+ if (lock_fd < 0)
+ return lock_fd;
+
+ pr_debug("Pinning multiprog fd %d beneath %s\n",
+ mp->main_prog->prog_fd, pin_path);
+
+ err = mkdir(pin_path, S_IRWXU);
+ if (err && errno != EEXIST) {
+ err = -errno;
+ goto out;
+ }
+
+ for (prog = mp->first_prog; prog; prog = prog->next) {
+ if (prog->link_fd < 0) {
+ err = -EINVAL;
+ pr_warn("Prog %s not linked\n", prog->prog_name);
+ goto err_unpin;
+ }
+
+ err = try_snprintf(buf, sizeof(buf), "%s/%s-link",
+ pin_path, prog->attach_name);
+ if (err)
+ goto err_unpin;
+
+ err = bpf_obj_pin(prog->link_fd, buf);
+ if (err) {
+ err = -errno;
+ pr_warn("Couldn't pin link FD at %s: %s\n", buf, strerror(-err));
+ goto err_unpin;
+ }
+ pr_debug("Pinned link for prog %s at %s\n", prog->prog_name, buf);
+
+ err = try_snprintf(buf, sizeof(buf), "%s/%s-prog",
+ pin_path, prog->attach_name);
+ if (err)
+ goto err_unpin;
+
+ err = bpf_obj_pin(prog->prog_fd, buf);
+ if (err) {
+ err = -errno;
+ pr_warn("Couldn't pin prog FD at %s: %s\n", buf, strerror(-err));
+ goto err_unpin;
+ }
+
+ pr_debug("Pinned prog %s at %s\n", prog->prog_name, buf);
+ }
+out:
+ xdp_lock_release(lock_fd);
+ return err;
+
+err_unpin:
+ for (prog = mp->first_prog; prog; prog = prog->next) {
+ if (!try_snprintf(buf, sizeof(buf), "%s/%s-link",
+ pin_path, prog->attach_name))
+ unlink(buf);
+ if (!try_snprintf(buf, sizeof(buf), "%s/%s-prog",
+ pin_path, prog->attach_name))
+ unlink(buf);
+ }
+ rmdir(pin_path);
+ goto out;
+}
+
+static int xdp_multiprog__unpin(struct xdp_multiprog *mp)
+{
+ char pin_path[PATH_MAX], buf[PATH_MAX];
+ struct xdp_program *prog;
+ const char *bpffs_dir;
+ int err = 0, lock_fd;
+
+ if (IS_ERR_OR_NULL(mp) || xdp_multiprog__is_legacy(mp))
+ return -EINVAL;
+
+ bpffs_dir = get_bpffs_dir();
+ if (IS_ERR(bpffs_dir))
+ return PTR_ERR(bpffs_dir);
+
+ err = try_snprintf(pin_path, sizeof(pin_path), "%s/dispatch-%d-%d",
+ bpffs_dir, mp->ifindex, mp->main_prog->prog_id);
+ if (err)
+ return err;
+
+ lock_fd = xdp_lock_acquire();
+ if (lock_fd < 0)
+ return lock_fd;
+
+ pr_debug("Unpinning multiprog fd %d beneath %s\n",
+ mp->main_prog->prog_fd, pin_path);
+
+ for (prog = mp->first_prog; prog; prog = prog->next) {
+ err = try_snprintf(buf, sizeof(buf), "%s/%s-link",
+ pin_path, prog->attach_name);
+ if (err)
+ goto out;
+
+ err = unlink(buf);
+ if (err) {
+ err = -errno;
+ pr_warn("Couldn't unlink file %s: %s\n",
+ buf, strerror(-err));
+ goto out;
+ }
+ pr_debug("Unpinned link for prog %s from %s\n",
+ prog->prog_name, buf);
+
+ err = try_snprintf(buf, sizeof(buf), "%s/%s-prog",
+ pin_path, prog->attach_name);
+ if (err)
+ goto out;
+
+ err = unlink(buf);
+ if (err) {
+ err = -errno;
+ pr_warn("Couldn't unlink file %s: %s\n",
+ buf, strerror(-err));
+ goto out;
+ }
+
+ pr_debug("Unpinned prog %s from %s\n", prog->prog_name, buf);
+ }
+
+ err = rmdir(pin_path);
+ if (err)
+ err = -errno;
+ pr_debug("Removed pin directory %s\n", pin_path);
+out:
+ xdp_lock_release(lock_fd);
+ return err;
+}
+
+static int xdp_multiprog__attach(struct xdp_multiprog *old_mp,
+ struct xdp_multiprog *mp,
+ enum xdp_attach_mode mode)
+{
+ int err = 0, prog_fd = -1, old_fd = -1, ifindex = -1;
+
+ if (IS_ERR_OR_NULL(mp) && !old_mp)
+ return -EINVAL;
+
+ if (mode == XDP_MODE_HW)
+ return -EINVAL;
+
+ if (mp) {
+ prog_fd = xdp_multiprog__main_fd(mp);
+ if (prog_fd < 0)
+ return -EINVAL;
+ ifindex = mp->ifindex;
+ }
+
+ if (old_mp) {
+ old_fd = xdp_multiprog__main_fd(old_mp);
+ if (old_fd < 0)
+ return -EINVAL;
+ if (ifindex > -1 && ifindex != old_mp->ifindex)
+ return -EINVAL;
+ ifindex = old_mp->ifindex;
+ }
+
+
+ err = xdp_attach_fd(prog_fd, old_fd, ifindex, mode);
+ if (err < 0)
+ goto err;
+
+ if (mp)
+ pr_debug("Loaded %zu programs on ifindex %d%s\n",
+ mp->num_links, ifindex,
+ mode == XDP_MODE_SKB ? " in skb mode" : "");
+ else
+ pr_debug("Detached %s on ifindex %d%s\n",
+ xdp_multiprog__is_legacy(old_mp) ? "program" : "multiprog",
+ ifindex,
+ mode == XDP_MODE_SKB ? " in skb mode" : "");
+
+ return 0;
+err:
+ return err;
+}
+
+int xdp_multiprog__detach(struct xdp_multiprog *mp)
+{
+ int err = 0;
+
+ if (IS_ERR_OR_NULL(mp) || !mp->is_loaded)
+ return libxdp_err(-EINVAL);
+
+ if (mp->hw_prog) {
+ err = xdp_multiprog__detach_hw(mp);
+ if (err)
+ return libxdp_err(err);
+ }
+
+ if (mp->main_prog) {
+ err = xdp_multiprog__attach(mp, NULL, mp->attach_mode);
+ if (err)
+ return libxdp_err(err);
+
+ if (!xdp_multiprog__is_legacy(mp))
+ err = xdp_multiprog__unpin(mp);
+ }
+ return libxdp_err(err);
+}
+
+struct xdp_program *xdp_multiprog__next_prog(const struct xdp_program *prog,
+ const struct xdp_multiprog *mp)
+{
+ if (IS_ERR_OR_NULL(mp) || xdp_multiprog__is_legacy(mp))
+ return libxdp_err_ptr(0, true);
+
+ if (prog)
+ return prog->next;
+
+ return mp->first_prog;
+}
+
+struct xdp_program *xdp_multiprog__hw_prog(const struct xdp_multiprog *mp)
+{
+ if (IS_ERR_OR_NULL(mp))
+ return libxdp_err_ptr(0, true);
+
+ return mp->hw_prog;
+}
+
+enum xdp_attach_mode xdp_multiprog__attach_mode(const struct xdp_multiprog *mp)
+{
+ if (IS_ERR_OR_NULL(mp))
+ return XDP_MODE_UNSPEC;
+
+ return mp->attach_mode;
+}
+
+struct xdp_program *xdp_multiprog__main_prog(const struct xdp_multiprog *mp)
+{
+ if (IS_ERR_OR_NULL(mp))
+ return libxdp_err_ptr(0, true);
+
+ return mp->main_prog;
+}
+
+bool xdp_multiprog__is_legacy(const struct xdp_multiprog *mp)
+{
+ if (IS_ERR_OR_NULL(mp))
+ return false;
+
+ return mp->is_legacy;
+}
+
+int xdp_multiprog__program_count(const struct xdp_multiprog *mp)
+{
+ if (IS_ERR_OR_NULL(mp))
+ return libxdp_err(-EINVAL);
+
+ return mp->num_links;
+}
+
+bool xdp_multiprog__xdp_frags_support(const struct xdp_multiprog *mp)
+{
+ return !xdp_multiprog__is_legacy(mp) && mp->config.is_xdp_frags;
+}
+
+static int remove_pin_dir(const char *subdir)
+{
+ char prog_path[PATH_MAX], pin_path[PATH_MAX];
+ int err;
+ DIR *d;
+
+ const char *dir = get_bpffs_dir();
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+
+ err = try_snprintf(pin_path, sizeof(pin_path), "%s/%s", dir, subdir);
+ if (err)
+ return err;
+
+ d = opendir(pin_path);
+ if (!d) {
+ err = -errno;
+ pr_warn("Failed to open pin directory: %s\n", strerror(-err));
+ return err;
+ }
+
+ for (struct dirent *dent = readdir(d); dent; dent = readdir(d)) {
+ /* skip . and .. */
+ if (dent->d_type == DT_DIR)
+ continue;
+
+ err = try_snprintf(prog_path, sizeof(prog_path), "%s/%s",
+ pin_path, dent->d_name);
+ if (err)
+ goto err;
+
+ err = unlink(prog_path);
+ if (err) {
+ err = -errno;
+ pr_warn("Couldn't unlink file %s/%s: %s\n", subdir,
+ dent->d_name, strerror(-err));
+ goto err;
+ }
+ }
+ err = rmdir(pin_path);
+ if (err) {
+ err = -errno;
+ pr_warn("Failed to remove pin directory %s: %s\n", pin_path,
+ strerror(-err));
+ }
+err:
+ closedir(d);
+ return err;
+}
+
+int libxdp_clean_references(int ifindex)
+{
+ int err = 0, lock_fd, path_ifindex;
+ __u32 dir_prog_id, prog_id = 0;
+ DIR *d;
+
+ const char *dir = get_bpffs_dir();
+ if (IS_ERR(dir))
+ return libxdp_err(PTR_ERR(dir));
+
+ lock_fd = xdp_lock_acquire();
+ if (lock_fd < 0)
+ return libxdp_err(lock_fd);
+
+ d = opendir(dir);
+ if (!d) {
+ err = -errno;
+ pr_debug("Failed to open bpffs directory: %s\n",
+ strerror(-err));
+ goto out;
+ }
+
+ for (struct dirent *dent = readdir(d); dent; dent = readdir(d)) {
+ if (dent->d_type != DT_DIR)
+ continue;
+
+ if (sscanf(dent->d_name, "dispatch-%d-%"PRIu32"",
+ &path_ifindex, &dir_prog_id) != 2)
+ continue;
+
+ /* If ifindex is set, skip this dir if it doesn't match */
+ if (ifindex && path_ifindex != ifindex)
+ continue;
+
+ xdp_get_ifindex_prog_id(path_ifindex, &prog_id, NULL, NULL);
+ if (!prog_id || prog_id != dir_prog_id) {
+ pr_info("Prog id %"PRIu32" no longer attached on ifindex %d, removing pin directory %s\n",
+ dir_prog_id, path_ifindex, dent->d_name);
+ err = remove_pin_dir(dent->d_name);
+ if (err)
+ break;
+ }
+ }
+ closedir(d);
+out:
+ xdp_lock_release(lock_fd);
+ return libxdp_err(err);
+}
diff --git a/lib/libxdp/libxdp.map b/lib/libxdp/libxdp.map
new file mode 100644
index 0000000..9242794
--- /dev/null
+++ b/lib/libxdp/libxdp.map
@@ -0,0 +1,78 @@
+LIBXDP_1.0.0 {
+ global:
+ libxdp_get_error;
+ libxdp_set_print;
+ libxdp_strerror;
+ xdp_multiprog__attach_mode;
+ xdp_multiprog__close;
+ xdp_multiprog__detach;
+ xdp_multiprog__dispatcher;
+ xdp_multiprog__get_from_ifindex;
+ xdp_multiprog__is_legacy;
+ xdp_multiprog__next_prog;
+ xdp_multiprog__main_prog;
+ xdp_multiprog__hw_prog;
+ xdp_program__attach;
+ xdp_program__attach_multi;
+ xdp_program__bpf_obj;
+ xdp_program__btf;
+ xdp_program__chain_call_enabled;
+ xdp_program__close;
+ xdp_program__detach;
+ xdp_program__detach_multi;
+ xdp_program__find_file;
+ xdp_program__from_bpf_obj;
+ xdp_program__from_fd;
+ xdp_program__from_id;
+ xdp_program__from_pin;
+ xdp_program__fd;
+ xdp_program__id;
+ xdp_program__is_attached;
+ xdp_program__name;
+ xdp_program__open_file;
+ xdp_program__pin;
+ xdp_program__print_chain_call_actions;
+ xdp_program__run_prio;
+ xdp_program__set_chain_call_enabled;
+ xdp_program__set_run_prio;
+ xdp_program__tag;
+};
+
+LIBXDP_1.2.0 {
+ libxdp_clean_references;
+ xdp_multiprog__program_count;
+ xsk_setup_xdp_prog;
+ xsk_socket__create;
+ xsk_socket__create_shared;
+ xsk_socket__delete;
+ xsk_socket__fd;
+ xsk_socket__update_xskmap;
+ xsk_umem__create;
+ xsk_umem__delete;
+ xsk_umem__fd;
+ xsk_cons_nb_avail;
+ xsk_prod_nb_free;
+ xsk_ring_cons__cancel;
+ xsk_ring_cons__comp_addr;
+ xsk_ring_cons__peek;
+ xsk_ring_cons__release;
+ xsk_ring_cons__rx_desc;
+ xsk_ring_prod__fill_addr;
+ xsk_ring_prod__needs_wakeup;
+ xsk_ring_prod__reserve;
+ xsk_ring_prod__submit;
+ xsk_ring_prod__tx_desc;
+ xsk_umem__add_offset_to_addr;
+ xsk_umem__extract_addr;
+ xsk_umem__extract_offset;
+ xsk_umem__get_data;
+} LIBXDP_1.0.0;
+
+LIBXDP_1.3.0 {
+ xdp_multiprog__xdp_frags_support;
+ xdp_program__clone;
+ xdp_program__create;
+ xdp_program__set_xdp_frags_support;
+ xdp_program__test_run;
+ xdp_program__xdp_frags_support;
+} LIBXDP_1.2.0;
diff --git a/lib/libxdp/libxdp.mk b/lib/libxdp/libxdp.mk
new file mode 100644
index 0000000..18b60e5
--- /dev/null
+++ b/lib/libxdp/libxdp.mk
@@ -0,0 +1,3 @@
+LIBXDP_VERSION := $(shell sed -ne "/LIBXDP_[0-9\.]\+ {/ {s/LIBXDP_\([0-9\.]\+\) {/\1/;p}" $(LIB_DIR)/libxdp/libxdp.map | tail -n 1)
+LIBXDP_MAJOR_VERSION := $(shell echo $(LIBXDP_VERSION) | sed 's/\..*//')
+
diff --git a/lib/libxdp/libxdp.pc.template b/lib/libxdp/libxdp.pc.template
new file mode 100644
index 0000000..30b10d4
--- /dev/null
+++ b/lib/libxdp/libxdp.pc.template
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+prefix=@PREFIX@
+libdir=@LIBDIR@
+includedir=${prefix}/include
+
+Name: libxdp
+Description: XDP library
+Version: @VERSION@
+Libs: -L${libdir} -lxdp
+Requires.private: libbpf
+Cflags: -I${includedir}
diff --git a/lib/libxdp/libxdp_internal.h b/lib/libxdp/libxdp_internal.h
new file mode 100644
index 0000000..605735c
--- /dev/null
+++ b/lib/libxdp/libxdp_internal.h
@@ -0,0 +1,146 @@
+#ifndef __LIBXDP_LIBXDP_INTERNAL_H
+#define __LIBXDP_LIBXDP_INTERNAL_H
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <linux/err.h>
+#include <xdp/libxdp.h>
+
+#define LIBXDP_HIDE_SYMBOL __attribute__((visibility("hidden")))
+#define __unused __attribute__((unused))
+
+#define __printf(a, b) __attribute__((format(printf, a, b)))
+
+static inline int try_snprintf(char *buf, size_t buf_len, const char *format, ...)
+{
+ va_list args;
+ int len;
+
+ va_start(args, format);
+ len = vsnprintf(buf, buf_len, format, args);
+ va_end(args);
+
+ if (len < 0)
+ return -EINVAL;
+ else if ((size_t)len >= buf_len)
+ return -ENAMETOOLONG;
+
+ return 0;
+}
+
+LIBXDP_HIDE_SYMBOL __printf(2, 3) void libxdp_print(enum libxdp_print_level level,
+ const char *format, ...);
+#define __pr(level, fmt, ...) \
+ do { \
+ libxdp_print(level, "libxdp: " fmt, ##__VA_ARGS__); \
+ } while (0)
+
+#define pr_warn(fmt, ...) __pr(LIBXDP_WARN, fmt, ##__VA_ARGS__)
+#define pr_info(fmt, ...) __pr(LIBXDP_INFO, fmt, ##__VA_ARGS__)
+#define pr_debug(fmt, ...) __pr(LIBXDP_DEBUG, fmt, ##__VA_ARGS__)
+
+LIBXDP_HIDE_SYMBOL int check_xdp_prog_version(const struct btf *btf, const char *name,
+ __u32 *version);
+
+LIBXDP_HIDE_SYMBOL int libxdp_check_kern_compat(void);
+
+#define min(x, y) ((x) < (y) ? x : y)
+#define max(x, y) ((x) > (y) ? x : y)
+
+#ifndef offsetof
+#define offsetof(type, member) ((size_t) & ((type *)0)->member)
+#endif
+
+#ifndef offsetofend
+#define offsetofend(TYPE, FIELD) (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD))
+#endif
+
+#ifndef container_of
+#define container_of(ptr, type, member) \
+ ({ \
+ const typeof(((type *)0)->member) *__mptr = (ptr); \
+ (type *)((char *)__mptr - offsetof(type, member)); \
+ })
+#endif
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+
+/* OPTS macros, from libbpf_internal.h */
+
+static inline bool libxdp_is_mem_zeroed(const char *obj,
+ size_t off_start, size_t off_end)
+{
+ const char *p;
+
+ for (p = obj + off_start; p < obj + off_end; p++) {
+ if (*p)
+ return false;
+ }
+ return true;
+}
+
+static inline bool libxdp_validate_opts(const char *opts,
+ size_t opts_sz, size_t user_sz,
+ const char *type_name)
+{
+ if (user_sz < sizeof(size_t)) {
+ pr_warn("%s size (%zu) is too small\n", type_name, user_sz);
+ return false;
+ }
+ if (!libxdp_is_mem_zeroed(opts, opts_sz, user_sz)) {
+ pr_warn("%s has non-zero extra bytes\n", type_name);
+ return false;
+ }
+ return true;
+}
+
+#define OPTS_VALID(opts, type) \
+ (!(opts) || libxdp_validate_opts((const char *)opts, \
+ offsetofend(struct type, \
+ type##__last_field), \
+ (opts)->sz, #type))
+#define OPTS_HAS(opts, field) \
+ ((opts) && opts->sz >= offsetofend(typeof(*(opts)), field))
+#define OPTS_GET(opts, field, fallback_value) \
+ (OPTS_HAS(opts, field) ? (opts)->field : fallback_value)
+#define OPTS_SET(opts, field, value) \
+ do { \
+ if (OPTS_HAS(opts, field)) \
+ (opts)->field = value; \
+ } while (0)
+
+#define OPTS_ZEROED(opts, last_nonzero_field) \
+ (!(opts) || libxdp_is_mem_zeroed((const void *)opts, \
+ offsetofend(typeof(*(opts)), \
+ last_nonzero_field), \
+ (opts)->sz))
+
+/* handle direct returned errors */
+static inline int libxdp_err(int ret)
+{
+ if (ret < 0)
+ errno = -ret;
+ return ret;
+}
+
+/* handle error for pointer-returning APIs, err is assumed to be < 0 always */
+static inline void *libxdp_err_ptr(int err, bool ret_null)
+{
+ /* set errno on error, this doesn't break anything */
+ errno = -err;
+
+ if (ret_null)
+ return NULL;
+ /* legacy: encode err as ptr */
+ return ERR_PTR(err);
+}
+
+LIBXDP_HIDE_SYMBOL int xdp_lock_acquire(void);
+LIBXDP_HIDE_SYMBOL int xdp_lock_release(int lock_fd);
+LIBXDP_HIDE_SYMBOL int xdp_attach_fd(int prog_fd, int old_fd, int ifindex,
+ enum xdp_attach_mode mode);
+
+#endif /* __LIBXDP_LIBXDP_INTERNAL_H */
diff --git a/lib/libxdp/protocol.org b/lib/libxdp/protocol.org
new file mode 100644
index 0000000..2adaf6a
--- /dev/null
+++ b/lib/libxdp/protocol.org
@@ -0,0 +1,473 @@
+#+OPTIONS: ^:nil
+
+* Protocol for atomic loading of multi-prog dispatchers
+
+With the support for the =freplace= program type, it is possible to load
+multiple XDP programs on a single interface by building a /dispatcher/ program
+which will run on the interface, and which will call the component XDP programs
+as functions using the =freplace= type.
+
+For this to work in an interoperable way, applications need to agree on how to
+attach their XDP programs using this mechanism. This document outlines the
+protocol implemented by =libxdp=, serving as both documentation and a blueprint
+for anyone else who wants to implement the same protocol and interoperate.
+
+** Generating a dispatcher
+The dispatcher is simply an XDP program that will call each of a number of stub
+functions in turn, and depending on their return code either continue on to the
+next function or return immediately. These stub functions are then replaced at
+load time with the user XDP programs, using the =freplace= functionality.
+
+*** Dispatcher format
+The dispatcher XDP program contains the main function containing the dispatcher
+logic, 10 stub functions that can be replaced by component BPF programs, and a
+configuration structure that is used by the dispatcher logic.
+
+In =libxdp=, this dispatcher is generated by [[https://github.com/xdp-project/xdp-tools/blob/master/lib/libxdp/xdp-dispatcher.c.in][an M4 macro file]] which expands to
+the following:
+
+#+begin_src C
+#define XDP_METADATA_SECTION "xdp_metadata"
+#define XDP_DISPATCHER_VERSION 2
+#define XDP_DISPATCHER_MAGIC 236
+#define XDP_DISPATCHER_RETVAL 31
+#define MAX_DISPATCHER_ACTIONS 10
+
+struct xdp_dispatcher_config {
+ __u8 magic; /* Set to XDP_DISPATCHER_MAGIC */
+ __u8 dispatcher_version; /* Set to XDP_DISPATCHER_VERSION */
+ __u8 num_progs_enabled; /* Number of active program slots */
+ __u8 is_xdp_frags; /* Whether this dispatcher is loaded with XDP frags support */
+ __u32 chain_call_actions[MAX_DISPATCHER_ACTIONS];
+ __u32 run_prios[MAX_DISPATCHER_ACTIONS];
+ __u32 program_flags[MAX_DISPATCHER_ACTIONS];
+};
+
+/* While 'const volatile' sounds a little like an oxymoron, there's reason
+ * behind the madness:
+ *
+ * - const places the data in rodata, where libbpf will mark it as read-only and
+ * frozen on program load, letting the kernel do dead code elimination based
+ * on the values.
+ *
+ * - volatile prevents the compiler from optimising away the checks based on the
+ * compile-time value of the variables, which is important since we will be
+ * changing the values before loading the program into the kernel.
+ */
+static volatile const struct xdp_dispatcher_config conf = {};
+
+/* The volatile return value prevents the compiler from assuming it knows the
+ * return value and optimising based on that.
+ */
+__attribute__ ((noinline))
+int prog0(struct xdp_md *ctx) {
+ volatile int ret = XDP_DISPATCHER_RETVAL;
+
+ if (!ctx)
+ return XDP_ABORTED;
+ return ret;
+}
+/* the above is repeated as prog1...prog9 */
+
+SEC("xdp")
+int xdp_dispatcher(struct xdp_md *ctx)
+{
+ __u8 num_progs_enabled = conf.num_progs_enabled;
+ int ret;
+
+ if (num_progs_enabled < 1)
+ goto out;
+ ret = prog0(ctx);
+ if (!((1U << ret) & conf.chain_call_actions[0]))
+ return ret;
+
+ /* the above is repeated for prog1...prog9 */
+
+out:
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
+__uint(dispatcher_version, XDP_DISPATCHER_VERSION) SEC(XDP_METADATA_SECTION);
+#+end_src
+
+The dispatcher program is pre-compiled and distributed with =libxdp=. Because
+the configuration struct is marked as =const= in the source file, it will be put
+into the =rodata=, which libbpf will turn into a read-only (frozen) map on load.
+This allows the kernel verifier to perform dead code elimination based on the
+values in the map. This is also the reason for the =num_progs_enabled= member of
+the config struct: together with the checks in the main dispatcher function the
+verifier will effectively remove all the stub function calls not being used,
+without having to rely on dynamic compilation.
+
+When generating a dispatcher, this BPF object file is opened and the
+configuration struct is populated before the object is loaded. As a forward
+compatibility measure, =libxdp= will also check for the presence of the
+=dispatcher_version= field in the =xdp_metadata= section (encoded like the
+program metadata described in "Processing program metadata" below), and if it
+doesn't match the expected version (currently version 2), will abort any action.
+
+
+*** Populating the dispatcher configuration map
+On loading, the dispatcher configuration map is populated as follows:
+
+- The =magic= field is set to the =XDP_DISPATCHER_MAGIC= value (236). This field
+ is here to make it possible to check if a program is a dispatcher without
+ looking at the program BTF in the future.
+
+- The =dispatcher_version= field is set to the current dispatcher version (2).
+ This is redundant with the BTF-encoded version in the metadata field, but must
+ be checked so that the BTF metadata version can be removed in the future. See
+ the section on old dispatcher versions below.
+
+- The =num_progs_enabled= member is simply set to the number of active programs
+ that will be attached to this dispatcher.
+
+- The =is_xdp_frags= variable is set to 1 if dispatcher is loaded with XDP frags
+ support (see section below), or 0 otherwise.
+
+The two other fields contain per-component program metadata, which is read from
+the component programs as explained in the "Processing program metadata" section
+below.
+
+- The =chain_call_actions= array is populated with a bitmap signifying which XDP
+ actions (return codes) of each component program should be interpreted as a
+ signal to continue execution of the next XDP program. For instance, a packet
+ filtering program might designate that an =XDP_PASS= action should make
+ execution continue, while other return codes should immediately end the call
+ chain and return. The special =XDP_DISPATCHER_RETVAL= (which is set to 31
+ corresponding to the topmost bit in the bitmap) is always included in each
+ programs' =chain_call_actions=; this value is returned by the stub functions,
+ which ensures that should a component program become detached, processing
+ will always continue past the stub function.
+
+- The =run_prios= array contains the effective run priority of each component
+ program when it was installed. This is also read as program metadata, but
+ because it can be overridden at load time, the effective value is stored in
+ the configuration array so it can be carried forward when the dispatcher is
+ replaced. Component programs are expected to be sorted in order of their run
+ priority (as explained below in "Loading and attaching component programs").
+
+- The =program_flags= is used to store the flags that an XDP program was loaded
+ with. This is populated with the value of the =BPF_F_XDP_HAS_FRAGS= flag if
+ the component program in this slot had that flag set (see the section on XDP
+ frags support below), and is 0 otherwise.
+
+**** Processing program metadata
+As explained above, each component program must specify one or more chain call
+actions and a run priority on attach. When loading a user program, =libxdp= will
+attempt to read this metadata from the object file as explained in the
+following; if no values are found in the object file, a default run priority of
+50 will be applied, and =XDP_PASS= will be the only chain call action.
+
+The metadata is read from the object file by looking for BTF-encoded metadata in
+the =.xdp_run_config= object section, encoded similar to the BTF-defined maps
+used by libbpf (in the =.maps= section). Here, =libxdp= will look for a struct
+definition with the XDP program function name prefixed by an underscore (e.g.,
+if the main XDP function is called =xdp_main=, libxdp will look for a struct
+definition called =_xdp_main=). In this struct, a member =priority= encodes the
+run priority, each XDP action can be set as a chain call action by setting a
+struct member with the action name.
+
+The =xdp_helpers.h= header file included with XDP exposes helper macros that can
+be used with the existing helpers in =bpf_helpers.h= (from libbpf), so a full
+run configuration metadata section can be defined as follows:
+
+#+begin_src C
+#include <bpf/bpf_helpers.h>
+#include <xdp/xdp_helpers.h>
+
+struct {
+ __uint(priority, 10);
+ __uint(XDP_PASS, 1);
+ __uint(XDP_DROP, 1);
+} XDP_RUN_CONFIG(my_xdp_func);
+#+end_src
+
+This example sets priority 10 with chain call actions =XDP_PASS= and =XDP_DROP=
+for the XDP program starting at =my_xdp_func()=.
+
+This turns into the following BTF information (as shown by =bpftool btf dump=):
+
+#+begin_src
+[12] STRUCT '(anon)' size=24 vlen=3
+ 'priority' type_id=13 bits_offset=0
+ 'XDP_PASS' type_id=15 bits_offset=64
+ 'XDP_DROP' type_id=15 bits_offset=128
+[13] PTR '(anon)' type_id=14
+[14] ARRAY '(anon)' type_id=6 index_type_id=10 nr_elems=10
+[15] PTR '(anon)' type_id=16
+[16] ARRAY '(anon)' type_id=6 index_type_id=10 nr_elems=1
+[17] VAR '_my_xdp_func' type_id=12, linkage=global-alloc
+[18] DATASEC '.xdp_run_config' size=0 vlen=1
+ type_id=17 offset=0 size=24
+#+end_src
+
+The parser will look for the =.xdp_run_config= DATASEC, then follow the types
+recursively, extracting the field values from the =nr_elems= in the anonymous
+arrays in type IDs 14 and 16.
+
+While =libxdp= will automatically load any metadata specified as above in the
+program BTF, the application using =libxdp= can override these values at
+runtime. These overridden values will be the ones used when determining program
+order, and will be preserved in the dispatcher configuration map for subsequent
+operations.
+
+*** Old versions of the XDP dispatcher
+This document currently describes version 2 of the dispatcher and protocol. This
+differs from version 1 in the following respects:
+
+- The dispatcher configuration map has gained the =magic= and
+ =dispatcher_version= fields for identifying the dispatcher and its version..
+
+- The protocol now supports propagating the value of the =BPF_F_XDP_HAS_FRAGS=
+ field for supporting XDP frags programs for higher MTU. The dispatcher
+ configuration map has gained the =is_xdp_frags= and =program_flags= fields for
+ use with this feature. The protocol for propagating the frags field is
+ described below, and an implementation of this protocol that recognises
+ version 2 of the dispatcher MUST implement this protocol.
+
+Older versions of libxdp will check the dispatcher version field of any
+dispatcher loaded in the kernel, and refuse to operate on a dispatcher with a
+higher version than the library version implements. This means that if a newer
+dispatcher is loaded, old versions of the library will be locked out of
+modifying that dispatcher. This is by design: old library versions don't
+recognise the semantics of new features added in subsequent versions, and so
+would introduce bugs if it attempted to operate on newer versions.
+
+Newer versions of libxdp will, however, recognise older dispatcher versions. If
+a newer version of libxdp loads a new program and finds an old dispatcher
+version already loaded on an interface, it will display the programs attached to
+it, but will refuse to replace it with a newer version so as not to lock out the
+program that loaded the program(s) already attached. Manually unloading the
+loaded programs will be required to load a new dispatcher version on the
+interface.
+
+*** Loading and attaching component programs
+When loading one or more XDP programs onto an interface (assuming no existing
+program is found on the interface; for adding programs, see below), =libxdp=
+first prepares a dispatcher program with the right number of slots, by
+populating the configuration struct as described above. Then, this dispatcher
+program is loaded into the kernel, with the =BPF_F_XDP_HAS_FRAGS= flag set if
+all component programs have that flag set (see the section on supporting XDP
+frags below).
+
+Having loaded the dispatcher program, =libxdp= then loads each of the component
+programs. To do this, first the list of component programs is sorted by their
+run priority, forming the final run sequence. Should several programs have the
+same run priority, ties are broken in the following arbitrary, but
+deterministic, order (see =cmp_xdp_programs()= [[https://github.com/xdp-project/xdp-tools/blob/master/lib/libxdp/libxdp.c][in libxdp.c]]):
+
+- By XDP function name (=bpf_program__name()= from libbpf)
+- By sorting already-loaded programs before not-yet-loaded ones
+- By unloaded programs by program size
+- By loaded program bpf tag value (using =memcmp()=)
+- By load time
+
+Before loading, each component program type is reset to =BPF_PROG_TYPE_EXT= with
+an expected attach type of 0, and the =BPF_F_XDP_HAS_FRAGS= is unset (see the
+section on supporting frags below). Then, the attachment target is set to the
+dispatcher file descriptor and the BTF ID of the stub function to replace (i.e.,
+the first component program has =prog0()= as its target, and so on). Then the
+program is loaded, at which point the kernel will verify the component program's
+compatibility with the attach point.
+
+Having loaded the component program, it is attached to the dispatcher by way of
+=bpf_link_create()=, specifying the same target file description and BTF ID used
+when loading the program. This will return a link fd, which will be pinned to
+prevent the attachment to unravel when the fd is closed (see "Locking and
+pinning" below).
+
+*** Locking and pinning
+To prevent the kernel from detaching any =freplace= program when its last file
+description is closed, the programs must be pinned in =bpffs=. This is done in
+the =xdp= subdirectory of =bpffs=, which by default means =/sys/fs/bpf/xdp=. If
+the =LIBXDP_BPFFS= environment variable is set, this will override the location
+of the top-level =bpffs=, and the =xdp= subdirectory will be created beneath
+this path.
+
+The pathnames generated for pinning are the following:
+
+- /sys/fs/bpf/xdp/dispatch-IFINDEX-DID - dispatcher program for IFINDEX with BPF program ID DID
+- /sys/fs/bpf/xdp/dispatch-IFINDEX-DID/prog0-prog - component program 0, program reference
+- /sys/fs/bpf/xdp/dispatch-IFINDEX-DID/prog0-link - component program 0, bpf_link reference
+- /sys/fs/bpf/xdp/dispatch-IFINDEX-DID/prog1-prog - component program 1, program reference
+- /sys/fs/bpf/xdp/dispatch-IFINDEX-DID/prog1-link - component program 1, bpf_link reference
+- etc, up to ten component programs
+
+This means that several pin operations have to be performed for each dispatcher
+program. Semantically, these are all atomic, so to make sure every consumer of
+the hierarchy of pinned files gets a consistent view, locking is needed. This is
+implemented by opening the parent directory =/sys/fs/bpf/xdp= with the
+=O_DIRECTORY= flag, and obtaining a lock on the resulting file descriptor using
+=flock(lock_fd, LOCK_EX)=.
+
+When creating a new dispatcher program, it will first be fully populated, with
+all component programs attached. Then, the programs will be linked in =bpffs= as
+specified above, and once this succeeds, the program will be attached to the
+interface. If attaching the program fails, the programs will be unpinned again,
+and the error returned to the caller. This order ensures atomic attachment to
+the interface, without any risk that component programs will be automatically
+detached due to a badly timed application crash.
+
+When loading the initial dispatcher program, the =XDP_FLAGS_UPDATE_IF_NOEXIST=
+flag is set to prevent accidentally overriding any concurrent modifications. If
+this fails, the whole operation starts over, turning the load into a
+modification as described below.
+
+*** Supporting XDP programs with frags support (BPF_F_XDP_HAS_FRAGS flag)
+Linux kernel 5.18 added support for a new API that allows XDP programs to access
+packet data that spans more than a single page, allowing XDP programs to be
+loaded on interfaces with bigger MTUs. Such packets will not have all their
+packet data accessible by the traditional "direct packet access"; instead, only
+the first fragment will be available this way, and the rest of the packet data
+has to be accessed via the new =bpf_xdp_load_bytes()= helper.
+
+Existing XDP programs are written with the assumption that they can see the
+whole packet data using direct packet access, which means they can subtly
+malfunction if some of the packet data is suddenly invisible (for instance,
+counting packet lengths is no longer accurate). Whether a given XDP program
+supports the frags API or not is a semantic issue, and it's not possible for the
+kernel to auto-detect this. For this reason, programs have to opt in to XDP
+frags support at load time, by setting the =BPF_F_XDP_HAS_FRAGS= flag as they
+are loaded into the kernel. Programs that are not loaded with this flag will be
+rejected from attaching to network devices that use packet fragment (i.e., those
+with a large MTU).
+
+This has implications for the XDP dispatcher, as its purpose is for multiple
+programs to be loaded at the same time. Since the =BPF_F_XDP_HAS_FRAGS= cannot
+be set for individual component programs, it has to be set for the dispatcher as
+a whole. However, as described above, programs can subtly malfunction if they
+are exposed to packets with fragments without being ready to do so. This means
+that it's only safe to set the =BPF_F_XDP_HAS_FRAGS= on the dispatcher itself if
+*all* component programs have the flag set.
+
+To properly propagate the flags even when adding new programs to an existing
+dispatcher, the dispatcher itself needs to keep track of which of its component
+programs had the =BPF_F_XDP_HAS_FRAGS= flag set when they were added. The
+dispatcher configuration map users the =program_flags= array for this: for each
+component program, this field is set to the value of the =BPF_F_XDP_HAS_FRAGS=
+flag if that component program has the flag set, and to 0 otherwise. An
+additional field, =is_xdp_frags=, is set if the dispatcher itself is loaded with
+the frags field set (which may not be the case if the kernel doesn't support the
+flag).
+
+When generating a dispatcher for a set of programs, libxdp simply tracks if all
+component programs support the =BPF_F_XDP_HAS_FRAGS=, and if they do, the
+dispatcher is loaded with this flag set. If any program attached to the
+dispatcher does not support the flag, the dispatcher is loaded without this flag
+set (and the =is_xdp_frags= field in the dispatcher configuration is set
+accordingly). If libxdp determines that the running kernel does not support the
+=BPF_F_XDP_HAS_FRAGS=, the dispatcher is loaded without the flag regardless of
+the value of the component programs.
+
+When adding a program to an existing dispatcher, this may result in a
+"downgrade", i.e., loading a new dispatcher without the frags flag to replace an
+existing dispatcher that does have the flag set. This will result in the
+replacement dispatcher being rejected by the kernel at attach time, but only if
+the interface being attached to actually requires the frags flag (i.e., if it
+has a large MTU). If the attachment is rejected, the old dispatcher will stay in
+place, leading to no loss of functionality.
+
+** Adding or removing programs from an existing dispatcher
+The sections above explain how to generate a dispatcher and attach it to an
+interface, assuming no existing program is attached. When one or more programs
+is already attached, a couple of extra steps are required to ensure that the
+switch is made atomically.
+
+Briefly, changing the programs attached to an interface entails the following
+steps:
+
+- Reading the existing dispatcher program and obtaining references to the
+ component programs.
+
+- Generating a new dispatcher containing the new set of programs (adding or
+ removing the programs needed).
+
+- Atomically swapping out the XDP program attachment on the interface so the new
+ dispatcher takes over from the old one.
+
+- Unpinning and dismantling the old dispatcher.
+
+These operations are each described in turn in the following sections.
+
+*** Reading list of existing programs from the kernel
+The first step is to obtain the ID of the currently loaded XDP program using
+=bpf_get_link_xdp_info()=. A file descriptor to the dispatcher is obtained using
+=bpf_prog_get_fd_by_id()=, and the BTF information attached to the program is
+obtained from the kernel. This is checked for the presence of the dispatcher
+version field (as explained above), and the operation is aborted if this is not
+present, or doesn't match what the library expects.
+
+Having thus established that the program loaded on the interface is indeed a
+compatible dispatcher, the map ID of the map containing the configuration struct
+is obtained from the kernel, and the configuration data is loaded from the map
+(after checking that the map value size matches the expected configuration
+struct).
+
+Then, the file lock on the directory in =bpffs= is obtained as explained in
+the "Locking and pinning" section above, and, while holding this lock, file
+descriptors to each of the component programs and =bpf_link= objects are
+obtained. The end result is a reference to the full dispatcher structure (and
+its component programs), corresponding to that generated on load. When
+populating the component program structure in memory, the chain call actions and
+run priority from the dispatcher configuration map is used instead of parsing
+the BTF metadata of each program: This ensures that any modified values
+specified at load time will be retained in stead of being reverted to the
+values compiled into the BTF metadata. Similarly, the =program_flags= array of
+the in-kernel dispatcher is used to determine which of the existing component
+programs support the =BPF_F_XDP_HAS_FRAGS= flag (see the section on frags
+support above).
+
+*** Generating a new dispatcher
+Having obtained a reference to the existing dispatcher, =libxdp= takes that and
+the list of programs to add to or remove from the interface, and simply
+generates a new dispatcher with the new set of programs. When adding programs,
+the whole list of programs is sorted according to their run priorities (as
+explained above), resulting in new programs being inserted in the right place in
+the existing sequence according to their priority.
+
+Generating this secondary dispatcher relies on the support for multiple
+attachments for =freplace= programs, which was added in kernel 5.10. This allows
+the =bpf_link_create()= operation to specify an attachment target in the new
+dispatcher. In other words, the component programs will briefly be attached to
+both the old and new dispatcher, but only one of those will be attached to the
+interface.
+
+After completion of the new dispatcher, its component programs are pinned in
+=bpffs= as described above.
+
+*** Atomic replace and retry
+At this point, =libxdp= has references to both the old dispatcher, already
+attached to the interface, and the new one with the modified set of component
+programs. The new dispatcher is then atomically swapped out with the old one,
+using the =XDP_FLAGS_REPLACE= flag to the netlink operation (and the
+accompanying =IFLA_XDP_EXPECTED_FD= attribute).
+
+Once the atomic replace operation succeeds, the old dispatcher is unpinned from
+=bppfs= and the in-memory references to both the old and new dispatchers are
+released (since the new dispatcher was already pinned, preventing it from being
+detached from the interface).
+
+Should this atomic replace instead *fail* because the program attached to the
+interface changed while the new dispatcher was being built, the whole operation
+is simply started over from the beginning. That is, the new dispatcher is
+unpinned from =bpffs=, and the in-memory references to both dispatchers are
+released (but no unpinning of the old dispatcher is performed!). Then, the
+program ID attached to the interface is again read from the kernel, and the
+operation proceeds from "Reading list of existing programs from the kernel".
+
+
+** Compatibility with older kernels
+The full functionality described above can only be attained with kernels version
+5.10 or newer, because this is the version that introduced support for
+re-attaching an freplace program in a secondary attachment point. However, the
+freplace functionality itself was introduced in kernel 5.7, so for kernel
+versions 5.7 to 5.9, multiple programs can be attached as long as they are all
+attached to the dispatcher immediately as they are loaded. This is achieved by
+using =bpf_raw_tracepoint_open()= in place of =bpf_link_create()= when attaching
+the component programs to the dispatcher. The =bpf_raw_tracepoint_open()=
+function doesn't take an attach target as a parameter; instead, it simply
+attached the freplace program to the target that was specified at load time
+(which is why it only works when all component programs are loaded together with
+the dispatcher).
diff --git a/lib/libxdp/tests/.gitignore b/lib/libxdp/tests/.gitignore
new file mode 100644
index 0000000..cc3a114
--- /dev/null
+++ b/lib/libxdp/tests/.gitignore
@@ -0,0 +1,4 @@
+test_xsk_refcnt
+check_kern_compat
+test_xdp_frags
+test_dispatcher_versions
diff --git a/lib/libxdp/tests/Makefile b/lib/libxdp/tests/Makefile
new file mode 100644
index 0000000..3c22901
--- /dev/null
+++ b/lib/libxdp/tests/Makefile
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+
+USER_TARGETS := test_xsk_refcnt check_kern_compat test_xdp_frags test_dispatcher_versions
+BPF_TARGETS := xdp_dispatcher_v1 xdp_pass
+USER_LIBS := -lpthread
+
+EXTRA_DEPS += xdp_dispatcher_v1.h
+EXTRA_USER_DEPS += test_utils.h
+
+TEST_FILE := ./test-libxdp.sh
+TEST_RUNNER := ./test_runner.sh
+
+USER_C := ${USER_TARGETS:=.c}
+USER_OBJ := ${USER_C:.c=.o}
+BPF_OBJS := $(BPF_TARGETS:=.o)
+
+LIB_DIR := ../..
+LDLIBS += $(USER_LIBS)
+
+include $(LIB_DIR)/defines.mk
+
+LDFLAGS+=-L$(LIBXDP_DIR)
+ifeq ($(DYNAMIC_LIBXDP),1)
+ LDLIBS:=-lxdp $(LDLIBS)
+ OBJECT_LIBXDP:=$(LIBXDP_DIR)/libxdp.so.$(LIBXDP_VERSION)
+else
+ LDLIBS:=-l:libxdp.a $(LDLIBS)
+ OBJECT_LIBXDP:=$(LIBXDP_DIR)/libxdp.a
+endif
+
+# Detect submodule libbpf source file changes
+ifeq ($(SYSTEM_LIBBPF),n)
+ LIBBPF_SOURCES := $(wildcard $(LIBBPF_DIR)/src/*.[ch])
+endif
+
+LIBXDP_SOURCES := $(wildcard $(LIBXDP_DIR)/*.[ch] $(LIBXDP_DIR)/*.in)
+
+CFLAGS += -I$(HEADER_DIR)
+
+BPF_HEADERS := $(wildcard $(HEADER_DIR)/bpf/*.h) $(wildcard $(HEADER_DIR)/xdp/*.h)
+
+all: $(USER_TARGETS) $(BPF_OBJS)
+
+.PHONY: clean
+clean::
+ $(Q)rm -f $(USER_TARGETS) $(USER_OBJ)
+
+$(OBJECT_LIBBPF): $(LIBBPF_SOURCES)
+ $(Q)$(MAKE) -C $(LIB_DIR) libbpf
+
+$(OBJECT_LIBXDP): $(LIBXDP_SOURCES)
+ $(Q)$(MAKE) -C $(LIBXDP_DIR)
+
+# Create expansions for dependencies
+LIB_H := ${LIB_OBJS:.o=.h}
+
+# Detect if any of common obj changed and create dependency on .h-files
+$(LIB_OBJS): %.o: %.c %.h $(LIB_H)
+ $(Q)$(MAKE) -C $(dir $@) $(notdir $@)
+
+ALL_EXEC_TARGETS=$(USER_TARGETS)
+$(ALL_EXEC_TARGETS): %: %.c $(OBJECT_LIBBPF) $(OBJECT_LIBXDP) $(LIBMK) $(LIB_OBJS) $(EXTRA_DEPS) $(EXTRA_USER_DEPS)
+ $(QUIET_CC)$(CC) -Wall $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) -o $@ $(LIB_OBJS) \
+ $< $(LDLIBS)
+
+$(BPF_OBJS): %.o: %.c $(BPF_HEADERS) $(LIBMK) $(EXTRA_DEPS)
+ $(QUIET_CLANG)$(CLANG) -S \
+ -target $(BPF_TARGET) \
+ -D __BPF_TRACING__ \
+ $(BPF_CFLAGS) \
+ -Wall \
+ -Wno-unused-value \
+ -Wno-pointer-sign \
+ -Wno-compare-distinct-pointer-types \
+ -Werror \
+ -O2 -emit-llvm -c -g -o ${@:.o=.ll} $<
+ $(QUIET_LLC)$(LLC) -march=$(BPF_TARGET) -filetype=obj -o $@ ${@:.o=.ll}
+
+run: all
+ $(Q)env CC="$(CC)" CFLAGS="$(CFLAGS) $(LDFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDLIBS="$(LDLIBS)" V=$(V) $(TEST_RUNNER) $(TEST_FILE) $(RUN_TESTS)
diff --git a/lib/libxdp/tests/check_kern_compat.c b/lib/libxdp/tests/check_kern_compat.c
new file mode 100644
index 0000000..8fb8991
--- /dev/null
+++ b/lib/libxdp/tests/check_kern_compat.c
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "test_utils.h"
+#include "../libxdp_internal.h"
+
+int main(__unused int argc, __unused char** argv)
+{
+ silence_libbpf_logging();
+ return libxdp_check_kern_compat();
+}
diff --git a/lib/libxdp/tests/test-libxdp.sh b/lib/libxdp/tests/test-libxdp.sh
new file mode 100644
index 0000000..90fc44c
--- /dev/null
+++ b/lib/libxdp/tests/test-libxdp.sh
@@ -0,0 +1,99 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+
+ALL_TESTS="test_link_so test_link_a test_old_dispatcher test_xdp_frags test_xsk_prog_refcnt_bpffs test_xsk_prog_refcnt_legacy"
+
+TESTS_DIR=$(dirname "${BASH_SOURCE[0]}")
+
+test_link_so()
+{
+ TMPDIR=$(mktemp --tmpdir -d libxdp-test.XXXXXX)
+ cat >$TMPDIR/libxdptest.c <<EOF
+#include <xdp/libxdp.h>
+int main(int argc, char **argv) {
+ (void) argc; (void) argv;
+ (void) xdp_program__open_file("filename", "section_name", NULL);
+ return 0;
+}
+EOF
+ $CC -o $TMPDIR/libxdptest $TMPDIR/libxdptest.c $CFLAGS $CPPFLAGS -lxdp $LDLIBS 2>&1
+ retval=$?
+ rm -rf "$TMPDIR"
+ return $retval
+}
+
+test_link_a()
+{
+ TMPDIR=$(mktemp --tmpdir -d libxdp-test.XXXXXX)
+ cat >$TMPDIR/libxdptest.c <<EOF
+#include <xdp/libxdp.h>
+int main(int argc, char **argv) {
+ (void) argc; (void) argv;
+ (void) xdp_program__open_file("filename", "section_name", NULL);
+ return 0;
+}
+EOF
+ $CC -o $TMPDIR/libxdptest $TMPDIR/libxdptest.c $CFLAGS $CPPFLAGS -l:libxdp.a $LDLIBS 2>&1
+ retval=$?
+ rm -rf "$TMPDIR"
+ return $retval
+}
+
+test_refcnt_once()
+{
+ # We need multiple queues for this test
+ NUM_QUEUES_REQUIRED=3
+ ip link add xsk_veth0 numrxqueues $NUM_QUEUES_REQUIRED type veth peer name xsk_veth1
+ check_run $TESTS_DIR/test_xsk_refcnt xsk_veth0 2>&1
+ ip link delete xsk_veth0
+}
+
+check_mount_bpffs()
+{
+ mount | grep -q /sys/fs/bpf || mount -t bpf bpf /sys/fs/bpf/ || echo "Unable to mount /sys/fs/bpf"
+ mount | grep -q /sys/fs/bpf
+}
+
+check_unmount_bpffs()
+{
+ mount | grep -q /sys/fs/bpf && umount /sys/fs/bpf/ || echo "Unable to unmount /sys/fs/bpf"
+ ! mount | grep -q /sys/fs/bpf
+}
+
+test_xsk_prog_refcnt_bpffs()
+{
+ check_mount_bpffs && test_refcnt_once "$@"
+}
+
+test_xsk_prog_refcnt_legacy()
+{
+ check_unmount_bpffs && test_refcnt_once "$@"
+}
+
+test_xdp_frags()
+{
+ skip_if_missing_libxdp_compat
+
+ check_mount_bpffs || return 1
+ ip link add xdp_veth_big0 mtu 5000 type veth peer name xdp_veth_big1 mtu 5000
+ ip link add xdp_veth_small0 type veth peer name xdp_veth_small1
+ check_run $TESTS_DIR/test_xdp_frags xdp_veth_big0 xdp_veth_small0 2>&1
+ ip link delete xdp_veth_big0
+ ip link delete xdp_veth_small0
+}
+
+test_old_dispatcher()
+{
+ skip_if_missing_libxdp_compat
+
+ check_mount_bpffs || return 1
+ ip link add xdp_veth0 type veth peer name xdp_veth1
+ check_run $TESTS_DIR/test_dispatcher_versions xdp_veth0
+ ip link delete xdp_veth0
+}
+
+cleanup_tests()
+{
+ ip link del dev xdp_veth_big0 >/dev/null 2>&1
+ ip link del dev xdp_veth_small0 >/dev/null 2>&1
+ ip link del dev xsk_veth0 >/dev/null 2>&1
+}
diff --git a/lib/libxdp/tests/test_dispatcher_versions.c b/lib/libxdp/tests/test_dispatcher_versions.c
new file mode 100644
index 0000000..14a8ba8
--- /dev/null
+++ b/lib/libxdp/tests/test_dispatcher_versions.c
@@ -0,0 +1,300 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <linux/err.h>
+#include <net/if.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "test_utils.h"
+#include "../libxdp_internal.h"
+#include "xdp_dispatcher_v1.h"
+
+#include <xdp/libxdp.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+#define BPFFS_DIR "/sys/fs/bpf/xdp"
+
+#define PROG_RUN_PRIO 42
+#define PROG_CHAIN_CALL_ACTIONS (1 << XDP_DROP)
+
+int get_prog_id(int prog_fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ int err;
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
+ if (err)
+ return -errno;
+
+ return info.id;
+}
+
+int load_dispatcher_v1(int ifindex)
+{
+ struct xdp_dispatcher_config_v1 dispatcher_config = {};
+ struct bpf_object *obj_dispatcher, *obj_prog = NULL;
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ struct bpf_program *dispatcher_prog, *xdp_prog;
+ int ret, btf_id, lfd = -1, dispatcher_id;
+ char pin_path[PATH_MAX], buf[PATH_MAX];
+ const char *attach_func = "prog0";
+ struct bpf_map *map;
+
+ if (!ifindex)
+ return -ENOENT;
+
+ obj_dispatcher = bpf_object__open("xdp_dispatcher_v1.o");
+ if (!obj_dispatcher)
+ return -errno;
+
+ btf_id = btf__find_by_name_kind(bpf_object__btf(obj_dispatcher),
+ attach_func, BTF_KIND_FUNC);
+ if (btf_id <= 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+ opts.target_btf_id = btf_id;
+
+ map = bpf_object__next_map(obj_dispatcher, NULL);
+ if (!map) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ dispatcher_prog = bpf_object__find_program_by_name(obj_dispatcher,
+ "xdp_dispatcher");
+ if (!dispatcher_prog) {
+ ret = -errno;
+ goto out;
+ }
+
+ dispatcher_config.num_progs_enabled = 1;
+ dispatcher_config.chain_call_actions[0] = PROG_CHAIN_CALL_ACTIONS;
+ dispatcher_config.run_prios[0] = PROG_RUN_PRIO;
+
+ ret = bpf_map__set_initial_value(map, &dispatcher_config,
+ sizeof(dispatcher_config));
+ if (ret)
+ goto out;
+
+
+ ret = bpf_object__load(obj_dispatcher);
+ if (ret)
+ goto out;
+
+ dispatcher_id = get_prog_id(bpf_program__fd(dispatcher_prog));
+ if (dispatcher_id < 0) {
+ ret = dispatcher_id;
+ goto out;
+ }
+
+ obj_prog = bpf_object__open("xdp_pass.o");
+ if (!obj_prog) {
+ ret = -errno;
+ goto out;
+ }
+
+ xdp_prog = bpf_object__find_program_by_name(obj_prog, "xdp_pass");
+ if (!xdp_prog) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = bpf_program__set_attach_target(xdp_prog,
+ bpf_program__fd(dispatcher_prog),
+ attach_func);
+ if (ret)
+ goto out;
+
+ bpf_program__set_type(xdp_prog, BPF_PROG_TYPE_EXT);
+ bpf_program__set_expected_attach_type(xdp_prog, 0);
+
+ ret = bpf_object__load(obj_prog);
+ if (ret)
+ goto out;
+
+ lfd = bpf_link_create(bpf_program__fd(xdp_prog),
+ bpf_program__fd(dispatcher_prog), 0, &opts);
+ if (lfd < 0) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = try_snprintf(pin_path, sizeof(pin_path), "%s/dispatch-%d-%d",
+ BPFFS_DIR, ifindex, dispatcher_id);
+ if (ret)
+ goto out;
+
+ ret = mkdir(BPFFS_DIR, S_IRWXU);
+ if (ret && errno != EEXIST) {
+ ret = -errno;
+ printf("mkdir err (%s): %s\n", BPFFS_DIR, strerror(-ret));
+ goto out;
+ }
+
+ ret = mkdir(pin_path, S_IRWXU);
+ if (ret) {
+ ret = -errno;
+ printf("mkdir err (%s): %s\n", pin_path, strerror(-ret));
+ goto out;
+ }
+
+ ret = try_snprintf(buf, sizeof(buf), "%s/prog0-link", pin_path);
+ if (ret)
+ goto err_unpin;
+
+ ret = bpf_obj_pin(lfd, buf);
+ if (ret)
+ goto err_unpin;
+
+ ret = try_snprintf(buf, sizeof(buf), "%s/prog0-prog", pin_path);
+ if (ret)
+ goto err_unpin;
+
+ ret = bpf_obj_pin(bpf_program__fd(xdp_prog), buf);
+ if (ret)
+ goto err_unpin;
+
+ ret = xdp_attach_fd(bpf_program__fd(dispatcher_prog), -1, ifindex,
+ XDP_MODE_NATIVE);
+ if (ret)
+ goto err_unpin;
+
+out:
+ if (lfd >= 0)
+ close(lfd);
+ bpf_object__close(obj_dispatcher);
+ bpf_object__close(obj_prog);
+ return ret;
+
+err_unpin:
+ if (!try_snprintf(buf, sizeof(buf), "%s/prog0-link", pin_path))
+ unlink(buf);
+ if (!try_snprintf(buf, sizeof(buf), "%s/prog0-prog", pin_path))
+ unlink(buf);
+ rmdir(pin_path);
+ goto out;
+}
+
+int check_old_dispatcher(int ifindex)
+{
+ struct xdp_multiprog *mp = NULL;
+ struct xdp_program *xdp_prog;
+ char buf[100];
+ int ret;
+
+ ret = load_dispatcher_v1(ifindex);
+ if (ret)
+ goto out;
+
+ mp = xdp_multiprog__get_from_ifindex(ifindex);
+ ret = libxdp_get_error(mp);
+ if (ret)
+ goto out;
+
+ if (xdp_multiprog__is_legacy(mp)) {
+ printf("Got unexpected legacy multiprog\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (xdp_multiprog__program_count(mp) != 1) {
+ printf("Expected 1 attached program, got %d\n",
+ xdp_multiprog__program_count(mp));
+ ret = -EINVAL;
+ goto out;
+ }
+
+ xdp_prog = xdp_multiprog__next_prog(NULL, mp);
+ if (!xdp_prog) {
+ ret = -errno;
+ goto out;
+ }
+
+ if (strcmp(xdp_program__name(xdp_prog), "xdp_pass")) {
+ printf("Expected xdp_pass program, got %s\n",
+ xdp_program__name(xdp_prog));
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (xdp_program__run_prio(xdp_prog) != PROG_RUN_PRIO) {
+ printf("Expected run prio %d got %d\n", PROG_RUN_PRIO,
+ xdp_program__run_prio(xdp_prog));
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = xdp_program__print_chain_call_actions(xdp_prog, buf, sizeof(buf));
+ if (ret)
+ goto out;
+
+ if (strcmp(buf, "XDP_DROP")) {
+ printf("Expected actions XDP_PASS, got %s\n", buf);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ xdp_prog = xdp_program__open_file("xdp_pass.o", "xdp", NULL);
+ ret = libxdp_get_error(xdp_prog);
+ if (ret)
+ goto out;
+
+ ret = xdp_program__attach(xdp_prog, ifindex, XDP_MODE_NATIVE, 0);
+ xdp_program__close(xdp_prog);
+ if (!ret) {
+ printf("Shouldn't have been able to attach a new program to ifindex!\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ if (mp)
+ xdp_multiprog__detach(mp);
+ xdp_multiprog__close(mp);
+ return ret;
+}
+
+static void usage(char *progname)
+{
+ fprintf(stderr, "Usage: %s <ifname>\n", progname);
+ exit(EXIT_FAILURE);
+}
+
+int main(int argc, char **argv)
+{
+ int ifindex, ret;
+ char *envval;
+
+ envval = secure_getenv("VERBOSE_TESTS");
+
+ silence_libbpf_logging();
+ if (envval && envval[0] == '1')
+ verbose_libxdp_logging();
+ else
+ silence_libxdp_logging();
+
+ if (argc != 2)
+ usage(argv[0]);
+
+ ifindex = if_nametoindex(argv[1]);
+
+ ret = check_old_dispatcher(ifindex);
+
+ return ret;
+}
diff --git a/lib/libxdp/tests/test_runner.sh b/lib/libxdp/tests/test_runner.sh
new file mode 100755
index 0000000..eb043a1
--- /dev/null
+++ b/lib/libxdp/tests/test_runner.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Script to setup and manage tests for xdp-tools.
+# Based on the test-env script from xdp-tutorial.
+#
+# Author: Toke Høiland-Jørgensen (toke@redhat.com)
+# Date: 26 May 2020
+# Copyright (c) 2020 Red Hat
+
+set -o errexit
+set -o nounset
+umask 077
+
+TEST_PROG_DIR="${TEST_PROG_DIR:-$(dirname "${BASH_SOURCE[0]}")}"
+ALL_TESTS=""
+VERBOSE_TESTS=${V:-0}
+
+export VERBOSE_TESTS
+
+# Odd return value for skipping, as only 0-255 is valid.
+SKIPPED_TEST=249
+
+skip_if_missing_libxdp_compat()
+{
+ if ! $TEST_PROG_DIR/check_kern_compat; then
+ exit "$SKIPPED_TEST"
+ fi
+}
+
+is_func()
+{
+ type "$1" 2>/dev/null | grep -q 'is a function'
+}
+
+check_run()
+{
+ local ret
+
+ [ "$VERBOSE_TESTS" -eq "1" ] && echo "$@"
+ "$@"
+ ret=$?
+ if [ "$ret" -ne "0" ]; then
+ exit $ret
+ fi
+}
+
+exec_test()
+{
+ local testn="$1"
+ local output
+ local ret
+
+ printf " %-30s" "[$testn]"
+ if ! is_func "$testn"; then
+ echo "INVALID"
+ return 1
+ fi
+
+ output=$($testn 2>&1)
+ ret=$?
+ if [ "$ret" -eq "0" ]; then
+ echo "PASS"
+ elif [ "$ret" -eq "$SKIPPED_TEST" ]; then
+ echo "SKIPPED"
+ ret=0
+ else
+ echo "FAIL"
+ fi
+ if [ "$ret" -ne "0" ] || [ "$VERBOSE_TESTS" -eq "1" ]; then
+ echo "$output" | sed 's/^/\t/'
+ fi
+ return $ret
+}
+
+run_tests()
+{
+ local TESTS="$*"
+ local ret=0
+ [ -z "$TESTS" ] && TESTS="$ALL_TESTS"
+
+ echo " Running tests from $TEST_DEFINITIONS"
+
+ for testn in $TESTS; do
+ exec_test $testn || ret=1
+ if is_func cleanup_tests; then
+ cleanup_tests || true
+ fi
+ done
+
+ return $ret
+}
+
+usage()
+{
+ echo "Usage: $0 <test_definition_file> [test names]" >&2
+ exit 1
+}
+
+if [ "$EUID" -ne "0" ]; then
+ if command -v sudo >/dev/null 2>&1; then
+ exec sudo env CC="$CC" CFLAGS="$CFLAGS" CPPFLAGS="$CPPFLAGS" LDLIBS="$LDLIBS" V=${VERBOSE_TESTS} "$0" "$@"
+ else
+ die "Tests must be run as root"
+ fi
+else
+ if [ "${DID_UNSHARE:-0}" -ne "1" ]; then
+ echo "Executing tests in separate net- and mount namespaces" >&2
+ exec env DID_UNSHARE=1 unshare -n -m "$0" "$@"
+ fi
+fi
+
+TEST_DEFINITIONS="${1:-}"
+[ -f "$TEST_DEFINITIONS" ] || usage
+source "$TEST_DEFINITIONS"
+
+shift
+run_tests "$@"
diff --git a/lib/libxdp/tests/test_utils.h b/lib/libxdp/tests/test_utils.h
new file mode 100644
index 0000000..1642c12
--- /dev/null
+++ b/lib/libxdp/tests/test_utils.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __TEST_UTILS_H
+#define __TEST_UTILS_H
+
+#include <bpf/libbpf.h>
+#include <xdp/libxdp.h>
+
+#define __unused __attribute__((unused))
+
+static int libbpf_silent_func(__unused enum libbpf_print_level level,
+ __unused const char *format,
+ __unused va_list args)
+{
+ return 0;
+}
+
+static inline void silence_libbpf_logging(void)
+{
+ libbpf_set_print(libbpf_silent_func);
+}
+
+static int libxdp_silent_func(__unused enum libxdp_print_level level,
+ __unused const char *format,
+ __unused va_list args)
+{
+ return 0;
+}
+
+static int libxdp_verbose_func(__unused enum libxdp_print_level level,
+ __unused const char *format,
+ __unused va_list args)
+{
+ fprintf(stderr, " ");
+ vfprintf(stderr, format, args);
+ return 0;
+}
+
+static inline void silence_libxdp_logging(void)
+{
+ libxdp_set_print(libxdp_silent_func);
+}
+
+static inline void verbose_libxdp_logging(void)
+{
+ libxdp_set_print(libxdp_verbose_func);
+}
+
+#endif
diff --git a/lib/libxdp/tests/test_xdp_frags.c b/lib/libxdp/tests/test_xdp_frags.c
new file mode 100644
index 0000000..d70e802
--- /dev/null
+++ b/lib/libxdp/tests/test_xdp_frags.c
@@ -0,0 +1,339 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <linux/err.h>
+#include <net/if.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+#include "test_utils.h"
+
+#include <xdp/libxdp.h>
+#include <bpf/libbpf.h>
+
+# define ARRAY_SIZE(_x) (sizeof(_x) / sizeof((_x)[0]))
+
+static bool kern_compat;
+
+
+static struct xdp_program *load_prog(void)
+{
+ DECLARE_LIBXDP_OPTS(xdp_program_opts, opts,
+ .prog_name = "xdp_pass",
+ .find_filename = "xdp-dispatcher.o",
+ );
+ return xdp_program__create(&opts);
+}
+
+static int check_attached_progs(int ifindex, int count, bool frags)
+{
+ struct xdp_multiprog *mp;
+ int ret;
+
+ /* If the kernel does not support frags, we always expect
+ * frags support to be disabled on a returned dispatcher
+ */
+ if (!kern_compat)
+ frags = false;
+
+ mp = xdp_multiprog__get_from_ifindex(ifindex);
+ ret = libxdp_get_error(mp);
+ if (ret) {
+ fprintf(stderr, "Couldn't get multiprog on ifindex %d: %s\n",
+ ifindex, strerror(-ret));
+ return ret;
+ }
+
+ ret = -EINVAL;
+
+ if (xdp_multiprog__is_legacy(mp)) {
+ fprintf(stderr, "Found legacy prog on ifindex %d\n", ifindex);
+ goto out;
+ }
+
+ if (xdp_multiprog__program_count(mp) != count) {
+ fprintf(stderr, "Expected %d programs loaded on ifindex %d, found %d\n",
+ count, ifindex, xdp_multiprog__program_count(mp));
+ goto out;
+ }
+
+ if (xdp_multiprog__xdp_frags_support(mp) != frags) {
+ fprintf(stderr,
+ "Multiprog on ifindex %d %s frags, expected %s\n",
+ ifindex,
+ xdp_multiprog__xdp_frags_support(mp) ?
+ "supports" :
+ "does not support",
+ frags ? "support" : "no support");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ xdp_multiprog__close(mp);
+ return ret;
+}
+
+static void print_test_result(const char *func, int ret)
+{
+ fflush(stderr);
+ fprintf(stderr, "%s:\t%s\n", func, ret ? "FAILED" : "PASSED");
+ fflush(stdout);
+}
+
+static int load_attach_prog(struct xdp_program **prog, int ifindex, bool frags)
+{
+ int ret;
+
+ *prog = load_prog();
+ if (!*prog) {
+ ret = -errno;
+ fprintf(stderr, "Couldn't load program: %s\n", strerror(-ret));
+ return ret;
+ }
+
+ ret = xdp_program__set_xdp_frags_support(*prog, frags);
+ if (ret)
+ return ret;
+
+ return xdp_program__attach(*prog, ifindex, XDP_MODE_NATIVE, 0);
+}
+
+static int _check_load(int ifindex, bool frags, bool should_succeed)
+{
+ struct xdp_program *prog = NULL;
+ bool attached;
+ int ret;
+
+ ret = load_attach_prog(&prog, ifindex, frags);
+ attached = !ret;
+
+ if (attached != should_succeed) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (should_succeed)
+ ret = check_attached_progs(ifindex, 1, frags);
+ else
+ ret = 0;
+
+out:
+ if (attached)
+ xdp_program__detach(prog, ifindex, XDP_MODE_NATIVE, 0);
+ xdp_program__close(prog);
+ return ret;
+}
+
+static int check_load_frags(int ifindex_bigmtu, int ifindex_smallmtu)
+{
+ int ret = _check_load(ifindex_smallmtu, true, true);
+ if (!ret && ifindex_bigmtu)
+ _check_load(ifindex_bigmtu, true, true);
+ print_test_result(__func__, ret);
+ return ret;
+}
+
+static int check_load_nofrags_success(int ifindex)
+{
+ int ret = _check_load(ifindex, false, true);
+ print_test_result(__func__, ret);
+ return ret;
+}
+
+static int check_load_nofrags_fail(int ifindex)
+{
+ int ret = _check_load(ifindex, false, false);
+ print_test_result(__func__, ret);
+ return ret;
+}
+static int check_load_frags_multi(int ifindex)
+{
+ struct xdp_program *prog1 = NULL, *prog2 = NULL;
+ int ret;
+
+ ret = load_attach_prog(&prog1, ifindex, true);
+ if (ret)
+ goto out;
+
+ ret = load_attach_prog(&prog2, ifindex, true);
+ if (ret)
+ goto out_prog1;
+
+ ret = check_attached_progs(ifindex, 2, true);
+
+ xdp_program__detach(prog2, ifindex, XDP_MODE_NATIVE, 0);
+out_prog1:
+ xdp_program__detach(prog1, ifindex, XDP_MODE_NATIVE, 0);
+out:
+ xdp_program__close(prog2);
+ xdp_program__close(prog1);
+ print_test_result(__func__, ret);
+ return ret;
+}
+
+static int check_load_mix_small(int ifindex)
+{
+ struct xdp_program *prog1 = NULL, *prog2 = NULL;
+ int ret;
+
+ ret = load_attach_prog(&prog1, ifindex, true);
+ if (ret)
+ goto out;
+
+ /* First program attached, dispatcher supports frags */
+ ret = check_attached_progs(ifindex, 1, true);
+ if (ret)
+ goto out;
+
+ ret = load_attach_prog(&prog2, ifindex, false);
+ if (ret)
+ goto out_prog1;
+
+ /* Mixed program attachment, dispatcher should not support frags */
+ ret = check_attached_progs(ifindex, 2, false);
+
+ ret = xdp_program__detach(prog2, ifindex, XDP_MODE_NATIVE, 0) || ret;
+ if (ret)
+ goto out_prog1;
+
+ /* Second program removed, back to frags-only */
+ ret = check_attached_progs(ifindex, 1, true) || ret;
+
+out_prog1:
+ xdp_program__detach(prog1, ifindex, XDP_MODE_NATIVE, 0);
+
+out:
+ xdp_program__close(prog2);
+ xdp_program__close(prog1);
+ print_test_result(__func__, ret);
+ return ret;
+}
+
+static int check_load_mix_big(int ifindex)
+{
+ struct xdp_program *prog1 = NULL, *prog2 = NULL;
+ int ret;
+
+ ret = load_attach_prog(&prog1, ifindex, true);
+ if (ret)
+ goto out;
+
+ /* First program attached, dispatcher supports frags */
+ ret = check_attached_progs(ifindex, 1, true);
+ if (ret)
+ goto out;
+
+ /* Second non-frags program should fail on big-MTU device */
+ ret = load_attach_prog(&prog2, ifindex, false);
+ if (!ret) {
+ xdp_program__detach(prog2, ifindex, XDP_MODE_NATIVE, 0);
+ ret = -EINVAL;
+ goto out_prog1;
+ }
+
+ /* Still only a single program loaded, with frags support */
+ ret = check_attached_progs(ifindex, 1, true);
+
+out_prog1:
+ xdp_program__detach(prog1, ifindex, XDP_MODE_NATIVE, 0);
+
+out:
+ xdp_program__close(prog2);
+ xdp_program__close(prog1);
+ print_test_result(__func__, ret);
+ return ret;
+}
+
+
+static bool check_frags_compat(void)
+{
+ struct xdp_program *test_prog;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ bool ret = false;
+ int err;
+
+ test_prog = load_prog();
+ if (!test_prog)
+ return false;
+
+ obj = xdp_program__bpf_obj(test_prog);
+ if (!obj)
+ goto out;
+
+ prog = bpf_object__find_program_by_name(obj, "xdp_pass");
+ if (!prog)
+ goto out;
+
+ bpf_program__set_flags(prog, BPF_F_XDP_HAS_FRAGS);
+ err = bpf_object__load(obj);
+ if (!err) {
+ printf("Kernel supports XDP programs with frags\n");
+ ret = true;
+ } else {
+ printf("Kernel DOES NOT support XDP programs with frags\n");
+ }
+ fflush(stdout);
+
+out:
+ xdp_program__close(test_prog);
+ return ret;
+}
+
+static void usage(char *progname)
+{
+ fprintf(stderr, "Usage: %s <ifname_bigmtu> <ifname_smallmtu>\n", progname);
+ exit(EXIT_FAILURE);
+}
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ int ifindex_bigmtu, ifindex_smallmtu, ret;
+ char *envval;
+
+ envval = secure_getenv("VERBOSE_TESTS");
+
+ silence_libbpf_logging();
+ if (envval && envval[0] == '1')
+ verbose_libxdp_logging();
+ else
+ silence_libxdp_logging();
+
+ kern_compat = check_frags_compat();
+
+ if (argc != 3)
+ usage(argv[0]);
+
+ ifindex_bigmtu = if_nametoindex(argv[1]);
+ ifindex_smallmtu = if_nametoindex(argv[2]);
+ if (!ifindex_bigmtu || !ifindex_smallmtu) {
+ fprintf(stderr, "Interface '%s' or '%s' not found.\n", argv[1], argv[2]);
+ usage(argv[0]);
+ }
+
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n",
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ ret = check_load_frags(kern_compat ? ifindex_bigmtu : 0, ifindex_smallmtu);
+ ret = check_load_nofrags_success(ifindex_smallmtu) || ret;
+ if (kern_compat) {
+ ret = check_load_nofrags_fail(ifindex_bigmtu) || ret;
+ ret = check_load_frags_multi(ifindex_bigmtu) || ret;
+ ret = check_load_mix_big(ifindex_bigmtu) || ret;
+ }
+ ret = check_load_mix_small(ifindex_smallmtu) || ret;
+
+ return ret;
+}
diff --git a/lib/libxdp/tests/test_xsk_refcnt.c b/lib/libxdp/tests/test_xsk_refcnt.c
new file mode 100644
index 0000000..bdd22da
--- /dev/null
+++ b/lib/libxdp/tests/test_xsk_refcnt.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#include <errno.h>
+#include <linux/err.h>
+#include <net/if.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+#include "test_utils.h"
+
+#include <xdp/libxdp.h>
+#include <xdp/xsk.h>
+
+typedef __u64 u64;
+typedef __u32 u32;
+typedef __u16 u16;
+typedef __u8 u8;
+
+#define MAX_EVENTS 10
+#define MAX_NUM_QUEUES 4
+#define TEST_NAME_LENGTH 128
+
+struct xsk_umem_info {
+ struct xsk_ring_prod fq;
+ struct xsk_ring_cons cq;
+ struct xsk_umem *umem;
+ void *buffer;
+};
+
+struct xsk_socket_info {
+ struct xsk_ring_cons rx;
+ struct xsk_umem_info *umem;
+ struct xsk_socket *xsk;
+};
+
+/* Event holds socket operations that are run concurrently
+ * and in theory can produce a race condition
+ */
+struct xsk_test_event {
+ u32 num_create;
+ u32 num_delete;
+ u32 create_qids[MAX_NUM_QUEUES]; /* QIDs for sockets being created in this event */
+ u32 delete_qids[MAX_NUM_QUEUES]; /* QIDs for sockets being deleted in this event */
+};
+
+struct xsk_test {
+ char name[TEST_NAME_LENGTH];
+ u32 num_events;
+ struct xsk_test_event events[MAX_EVENTS];
+};
+
+/* Tests that use less queues must come first,
+ * so we can run all possible tests on VMs with
+ * small number of CPUs
+ */
+static struct xsk_test all_tests[] = {
+ { "Single socket created and deleted",
+ .num_events = 2,
+ .events = {{ .num_create = 1, .create_qids = {0} },
+ { .num_delete = 1, .delete_qids = {0} }
+ }},
+ { "2 sockets, created and deleted sequentially",
+ .num_events = 4,
+ .events = {{ .num_create = 1, .create_qids = {0} },
+ { .num_create = 1, .create_qids = {1} },
+ { .num_delete = 1, .delete_qids = {0} },
+ { .num_delete = 1, .delete_qids = {1} }
+ }},
+ { "2 sockets, created sequentially and deleted asynchronously",
+ .num_events = 3,
+ .events = {{ .num_create = 1, .create_qids = {0} },
+ { .num_create = 1, .create_qids = {1} },
+ { .num_delete = 2, .delete_qids = {0, 1} }
+ }},
+ { "2 sockets, asynchronously delete and create",
+ .num_events = 3,
+ .events = {{ .num_create = 1, .create_qids = {0} },
+ { .num_create = 1, .create_qids = {1},
+ .num_delete = 1, .delete_qids = {0} },
+ { .num_delete = 1, .delete_qids = {1} }
+ }},
+ { "3 sockets, created and deleted sequentially",
+ .num_events = 6,
+ .events = {{ .num_create = 1, .create_qids = {0} },
+ { .num_create = 1, .create_qids = {1} },
+ { .num_create = 1, .create_qids = {2} },
+ { .num_delete = 1, .delete_qids = {1} },
+ { .num_delete = 1, .delete_qids = {2} },
+ { .num_delete = 1, .delete_qids = {0} }
+ }},
+};
+
+# define ARRAY_SIZE(_x) (sizeof(_x) / sizeof((_x)[0]))
+
+static const char *opt_if;
+static const u8 num_tests = ARRAY_SIZE(all_tests);
+
+static struct xsk_socket_info *xsks[MAX_NUM_QUEUES];
+
+#define FRAME_SIZE 64
+#define NUM_FRAMES (XSK_RING_CONS__DEFAULT_NUM_DESCS * 2)
+
+static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
+{
+ struct xsk_umem_info *umem;
+ int ret;
+
+ umem = calloc(1, sizeof(*umem));
+ if (!umem)
+ exit(EXIT_FAILURE);
+
+ ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
+ NULL);
+ if (ret)
+ exit(ret);
+
+ umem->buffer = buffer;
+ return umem;
+}
+
+static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem,
+ unsigned int qid)
+{
+ struct xsk_socket_config cfg = {};
+ struct xsk_socket_info *xsk;
+ struct xsk_ring_cons *rxr;
+
+ xsk = calloc(1, sizeof(*xsk));
+ if (!xsk)
+ exit(EXIT_FAILURE);
+
+ xsk->umem = umem;
+ cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+
+ rxr = &xsk->rx;
+ xsk_socket__create(&xsk->xsk, opt_if, qid, umem->umem,
+ rxr, NULL, &cfg);
+
+ return xsk;
+}
+
+static void *create_socket(void *args)
+{
+ struct xsk_umem_info *umem;
+ u32 qid = *(u32 *)args;
+ void *buffs;
+
+ if (posix_memalign(&buffs,
+ getpagesize(), /* PAGE_SIZE aligned */
+ NUM_FRAMES * FRAME_SIZE)) {
+ fprintf(stderr, "ERROR: Can't allocate buffer memory \"%s\"\n",
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ umem = xsk_configure_umem(buffs, NUM_FRAMES * FRAME_SIZE);
+ xsks[qid] = xsk_configure_socket(umem, qid);
+
+ return NULL;
+}
+
+static void *delete_socket(void *args)
+{
+ u32 qid = *(u32 *)args;
+ struct xsk_umem *umem;
+ void *buff;
+
+ buff = xsks[qid]->umem->buffer;
+ umem = xsks[qid]->umem->umem;
+ xsk_socket__delete(xsks[qid]->xsk);
+ free(buff);
+ (void)xsk_umem__delete(umem);
+
+ return NULL;
+}
+
+static bool xsk_prog_attached(void)
+{
+ char xsk_prog_name[] = "xsk_def_prog";
+ int ifindex = if_nametoindex(opt_if);
+ struct xdp_program *xsk_prog;
+ struct xdp_multiprog *mp;
+ bool answer = false;
+
+ mp = xdp_multiprog__get_from_ifindex(ifindex);
+ if (IS_ERR_OR_NULL(mp))
+ return false;
+
+ xsk_prog = xdp_multiprog__is_legacy(mp) ? xdp_multiprog__main_prog(mp) :
+ xdp_multiprog__next_prog(NULL, mp);
+
+ if (IS_ERR_OR_NULL(xsk_prog))
+ goto free_mp;
+
+ answer = !strncmp(xsk_prog_name, xdp_program__name(xsk_prog),
+ sizeof(xsk_prog_name));
+free_mp:
+ xdp_multiprog__close(mp);
+ return answer;
+}
+
+static void update_reference_refcnt(struct xsk_test_event *event, int *refcnt)
+{
+ *refcnt += event->num_create;
+ *refcnt -= event->num_delete;
+}
+
+static bool check_run_event(struct xsk_test_event *event, int *refcnt)
+{
+ pthread_t threads[MAX_NUM_QUEUES];
+ bool prog_attached, prog_needed;
+ u8 thread_num = 0, i;
+ int ret;
+
+ update_reference_refcnt(event, refcnt);
+
+ for (i = 0; i < event->num_create; i++) {
+ ret = pthread_create(&threads[thread_num++], NULL,
+ &create_socket, &event->create_qids[i]);
+ if (ret)
+ exit(ret);
+ }
+
+ for (i = 0; i < event->num_delete; i++) {
+ ret = pthread_create(&threads[thread_num++], NULL,
+ &delete_socket, &event->delete_qids[i]);
+ if (ret)
+ exit(ret);
+ }
+
+ for (i = 0; i < thread_num; i++)
+ pthread_join(threads[i], NULL);
+
+ prog_attached = xsk_prog_attached();
+ prog_needed = *refcnt > 0;
+
+ if (prog_needed != prog_attached) {
+ printf("Program is referenced by %d sockets, but is %s attached\n",
+ *refcnt, prog_attached ? "still" : "not");
+ return false;
+ }
+
+ return true;
+}
+
+static bool check_run_test(struct xsk_test *test)
+{
+ bool test_ok = false;
+ int refcnt = 0;
+ u8 i = 0;
+
+ for (i = 0; i < test->num_events; i++) {
+ if (!check_run_event(&test->events[i], &refcnt)) {
+ printf("Event %u failed\n", i);
+ goto print_result;
+ }
+ }
+
+ /* Do not let tests interfere with each other */
+ sleep(1);
+
+ test_ok = true;
+
+print_result:
+ printf("%s: %s\n", test->name, test_ok ? "PASSED" : "FAILED");
+ return test_ok;
+}
+
+static int read_args(int argc, char **argv)
+{
+ if (argc != 2)
+ return -1;
+
+ opt_if = argv[1];
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+ u8 i = 0;
+
+ if (read_args(argc, argv))
+ return -1;
+
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n",
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ silence_libbpf_logging();
+
+ for (i = 0; i < num_tests; i++) {
+ if (!check_run_test(&all_tests[i]))
+ exit(EXIT_FAILURE);
+ }
+
+ return 0;
+}
diff --git a/lib/libxdp/tests/xdp_dispatcher_v1.c b/lib/libxdp/tests/xdp_dispatcher_v1.c
new file mode 100644
index 0000000..00bb426
--- /dev/null
+++ b/lib/libxdp/tests/xdp_dispatcher_v1.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include "xdp_dispatcher_v1.h"
+
+#define XDP_METADATA_SECTION "xdp_metadata"
+#define XDP_DISPATCHER_VERSION_V1 1
+#define XDP_DISPATCHER_RETVAL 31
+
+
+static volatile const struct xdp_dispatcher_config_v1 conf = {};
+
+__attribute__ ((noinline))
+int prog0(struct xdp_md *ctx) {
+ volatile int ret = XDP_DISPATCHER_RETVAL;
+
+ if (!ctx)
+ return XDP_ABORTED;
+ return ret;
+}
+__attribute__ ((noinline))
+
+SEC("xdp")
+int xdp_dispatcher(struct xdp_md *ctx)
+{
+ __u8 num_progs_enabled = conf.num_progs_enabled;
+ int ret;
+
+ if (num_progs_enabled < 1)
+ goto out;
+ ret = prog0(ctx);
+ if (!((1U << ret) & conf.chain_call_actions[0]))
+ return ret;
+
+out:
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
+__uint(dispatcher_version, XDP_DISPATCHER_VERSION_V1) SEC(XDP_METADATA_SECTION);
diff --git a/lib/libxdp/tests/xdp_dispatcher_v1.h b/lib/libxdp/tests/xdp_dispatcher_v1.h
new file mode 100644
index 0000000..55dac37
--- /dev/null
+++ b/lib/libxdp/tests/xdp_dispatcher_v1.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __XDP_DISPATCHER_V1_H
+#define __XDP_DISPATCHER_V1_H
+
+#ifndef MAX_DISPATCHER_ACTIONS
+#define MAX_DISPATCHER_ACTIONS 10
+#endif
+
+struct xdp_dispatcher_config_v1 {
+ __u8 num_progs_enabled;
+ __u32 chain_call_actions[MAX_DISPATCHER_ACTIONS];
+ __u32 run_prios[MAX_DISPATCHER_ACTIONS];
+};
+
+#endif
diff --git a/lib/libxdp/tests/xdp_pass.c b/lib/libxdp/tests/xdp_pass.c
new file mode 100644
index 0000000..6b61a00
--- /dev/null
+++ b/lib/libxdp/tests/xdp_pass.c
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp")
+int xdp_pass(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/lib/libxdp/xdp-dispatcher.c.in b/lib/libxdp/xdp-dispatcher.c.in
new file mode 100644
index 0000000..6214d78
--- /dev/null
+++ b/lib/libxdp/xdp-dispatcher.c.in
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+divert(-1)
+#forloop definition taken from example in the M4 manual
+define(`forloop', `pushdef(`$1', `$2')_forloop($@)popdef(`$1')')
+define(`_forloop',`$4`'ifelse($1, decr(`$3'), `', `define(`$1', incr($1))$0($@)')')
+define(`NUM_PROGS',ifdef(`MAX_DISPATCHER_ACTIONS', MAX_DISPATCHER_ACTIONS, `10'))
+divert(0)dnl
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include <xdp/prog_dispatcher.h>
+
+/* While 'const volatile' sounds a little like an oxymoron, there's reason
+ * behind the madness:
+ *
+ * - const places the data in rodata, where libbpf will mark it as read-only and
+ * frozen on program load, letting the kernel do dead code elimination based
+ * on the values.
+ *
+ * - volatile prevents the compiler from optimising away the checks based on the
+ * compile-time value of the variables, which is important since we will be
+ * changing the values before loading the program into the kernel.
+ */
+static volatile const struct xdp_dispatcher_config conf = {};
+
+/* The volatile return value prevents the compiler from assuming it knows the
+ * return value and optimising based on that.
+ */
+forloop(`i', `0', NUM_PROGS,
+`__attribute__ ((noinline))
+int format(`prog%d', i)(struct xdp_md *ctx) {
+ volatile int ret = XDP_DISPATCHER_RETVAL;
+
+ if (!ctx)
+ return XDP_ABORTED;
+ return ret;
+}
+')
+
+__attribute__ ((noinline))
+int compat_test(struct xdp_md *ctx) {
+ volatile int ret = XDP_DISPATCHER_RETVAL;
+
+ if (!ctx)
+ return XDP_ABORTED;
+ return ret;
+}
+
+
+SEC("xdp")
+int xdp_dispatcher(struct xdp_md *ctx)
+{
+ __u8 num_progs_enabled = conf.num_progs_enabled;
+ int ret;
+forloop(`i', `0', NUM_PROGS,
+`
+ if (num_progs_enabled < incr(i))
+ goto out;
+ ret = format(`prog%d', i)(ctx);
+ if (!((1U << ret) & conf.chain_call_actions[i]))
+ return ret;
+')
+ /* keep a reference to the compat_test() function so we can use it
+ * as an freplace target in xdp_multiprog__check_compat() in libxdp
+ */
+ if (num_progs_enabled < incr(NUM_PROGS))
+ goto out;
+ ret = compat_test(ctx);
+out:
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_pass(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
+__uint(dispatcher_version, XDP_DISPATCHER_VERSION) SEC(XDP_METADATA_SECTION);
diff --git a/lib/libxdp/xsk.c b/lib/libxdp/xsk.c
new file mode 100644
index 0000000..c6c201b
--- /dev/null
+++ b/lib/libxdp/xsk.c
@@ -0,0 +1,1299 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright(c) 2018 - 2021 Intel Corporation.
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <dirent.h>
+#include <linux/err.h>
+#include <linux/ethtool.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_link.h>
+#include <linux/if_packet.h>
+#include <linux/if_xdp.h>
+#include <linux/list.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <xdp/xsk.h>
+
+#include "libxdp_internal.h"
+#include "xsk_def_xdp_prog.h"
+#include "bpf_instr.h"
+
+#ifndef SOL_XDP
+ #define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+ #define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+ #define PF_XDP AF_XDP
+#endif
+
+#ifndef SO_NETNS_COOKIE
+ #define SO_NETNS_COOKIE 71
+#endif
+
+#define INIT_NS 1
+
+struct xsk_umem {
+ struct xsk_ring_prod *fill_save;
+ struct xsk_ring_cons *comp_save;
+ char *umem_area;
+ struct xsk_umem_config config;
+ int fd;
+ int refcount;
+ struct list_head ctx_list;
+ bool rx_ring_setup_done;
+ bool tx_ring_setup_done;
+};
+
+struct xsk_ctx {
+ struct xsk_ring_prod *fill;
+ struct xsk_ring_cons *comp;
+ struct xsk_umem *umem;
+ __u32 queue_id;
+ int refcount;
+ int ifindex;
+ __u64 netns_cookie;
+ int xsks_map_fd;
+ struct list_head list;
+ struct xdp_program *xdp_prog;
+ int refcnt_map_fd;
+ char ifname[IFNAMSIZ];
+};
+
+struct xsk_socket {
+ struct xsk_ring_cons *rx;
+ struct xsk_ring_prod *tx;
+ struct xsk_ctx *ctx;
+ struct xsk_socket_config config;
+ int fd;
+};
+
+struct xsk_nl_info {
+ int ifindex;
+ int fd;
+ bool xdp_prog_attached;
+};
+
+/* Up until and including Linux 5.3 */
+struct xdp_ring_offset_v1 {
+ __u64 producer;
+ __u64 consumer;
+ __u64 desc;
+};
+
+/* Up until and including Linux 5.3 */
+struct xdp_mmap_offsets_v1 {
+ struct xdp_ring_offset_v1 rx;
+ struct xdp_ring_offset_v1 tx;
+ struct xdp_ring_offset_v1 fr;
+ struct xdp_ring_offset_v1 cr;
+};
+
+/* Export all inline helpers as symbols for use by language bindings. */
+extern inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill,
+ __u32 idx);
+extern inline const __u64 *
+xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx);
+extern inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx,
+ __u32 idx);
+extern inline const struct xdp_desc *
+xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx);
+extern inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r);
+extern inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb);
+extern inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb);
+extern inline __u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb,
+ __u32 *idx);
+extern inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb);
+extern inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb,
+ __u32 *idx);
+extern inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb);
+extern inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb);
+extern inline void *xsk_umem__get_data(void *umem_area, __u64 addr);
+extern inline __u64 xsk_umem__extract_addr(__u64 addr);
+extern inline __u64 xsk_umem__extract_offset(__u64 addr);
+extern inline __u64 xsk_umem__add_offset_to_addr(__u64 addr);
+
+int xsk_umem__fd(const struct xsk_umem *umem)
+{
+ return umem ? umem->fd : -EINVAL;
+}
+
+int xsk_socket__fd(const struct xsk_socket *xsk)
+{
+ return xsk ? xsk->fd : -EINVAL;
+}
+
+static bool xsk_page_aligned(void *buffer)
+{
+ unsigned long addr = (unsigned long)buffer;
+
+ return !(addr & (getpagesize() - 1));
+}
+
+static void xsk_set_umem_config(struct xsk_umem_config *cfg,
+ const struct xsk_umem_config *usr_cfg)
+{
+ if (!usr_cfg) {
+ cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+ cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+ cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
+ return;
+ }
+
+ cfg->fill_size = usr_cfg->fill_size;
+ cfg->comp_size = usr_cfg->comp_size;
+ cfg->frame_size = usr_cfg->frame_size;
+ cfg->frame_headroom = usr_cfg->frame_headroom;
+ cfg->flags = usr_cfg->flags;
+}
+
+static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
+ const struct xsk_socket_config *usr_cfg)
+{
+ if (!usr_cfg) {
+ cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg->libbpf_flags = 0;
+ cfg->xdp_flags = 0;
+ cfg->bind_flags = 0;
+ return 0;
+ }
+
+ if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
+ return -EINVAL;
+
+ cfg->rx_size = usr_cfg->rx_size;
+ cfg->tx_size = usr_cfg->tx_size;
+ cfg->libbpf_flags = usr_cfg->libbpf_flags;
+ cfg->xdp_flags = usr_cfg->xdp_flags;
+ cfg->bind_flags = usr_cfg->bind_flags;
+
+ return 0;
+}
+
+static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
+{
+ struct xdp_mmap_offsets_v1 off_v1;
+
+ /* getsockopt on a kernel <= 5.3 has no flags fields.
+ * Copy over the offsets to the correct places in the >=5.4 format
+ * and put the flags where they would have been on that kernel.
+ */
+ memcpy(&off_v1, off, sizeof(off_v1));
+
+ off->rx.producer = off_v1.rx.producer;
+ off->rx.consumer = off_v1.rx.consumer;
+ off->rx.desc = off_v1.rx.desc;
+ off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
+
+ off->tx.producer = off_v1.tx.producer;
+ off->tx.consumer = off_v1.tx.consumer;
+ off->tx.desc = off_v1.tx.desc;
+ off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
+
+ off->fr.producer = off_v1.fr.producer;
+ off->fr.consumer = off_v1.fr.consumer;
+ off->fr.desc = off_v1.fr.desc;
+ off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
+
+ off->cr.producer = off_v1.cr.producer;
+ off->cr.consumer = off_v1.cr.consumer;
+ off->cr.desc = off_v1.cr.desc;
+ off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
+}
+
+static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
+{
+ socklen_t optlen;
+ int err;
+
+ optlen = sizeof(*off);
+ err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
+ if (err)
+ return err;
+
+ if (optlen == sizeof(*off))
+ return 0;
+
+ if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
+ xsk_mmap_offsets_v1(off);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp)
+{
+ struct xdp_mmap_offsets off;
+ void *map;
+ int err;
+
+ err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
+ &umem->config.fill_size,
+ sizeof(umem->config.fill_size));
+ if (err)
+ return -errno;
+
+ err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
+ &umem->config.comp_size,
+ sizeof(umem->config.comp_size));
+ if (err)
+ return -errno;
+
+ err = xsk_get_mmap_offsets(fd, &off);
+ if (err)
+ return -errno;
+
+ map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+ XDP_UMEM_PGOFF_FILL_RING);
+ if (map == MAP_FAILED)
+ return -errno;
+
+ fill->mask = umem->config.fill_size - 1;
+ fill->size = umem->config.fill_size;
+ fill->producer = map + off.fr.producer;
+ fill->consumer = map + off.fr.consumer;
+ fill->flags = map + off.fr.flags;
+ fill->ring = map + off.fr.desc;
+ fill->cached_cons = umem->config.fill_size;
+
+ map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+ XDP_UMEM_PGOFF_COMPLETION_RING);
+ if (map == MAP_FAILED) {
+ err = -errno;
+ goto out_mmap;
+ }
+
+ comp->mask = umem->config.comp_size - 1;
+ comp->size = umem->config.comp_size;
+ comp->producer = map + off.cr.producer;
+ comp->consumer = map + off.cr.consumer;
+ comp->flags = map + off.cr.flags;
+ comp->ring = map + off.cr.desc;
+
+ return 0;
+
+out_mmap:
+ munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
+ return err;
+}
+
+int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area,
+ __u64 size, struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *usr_config)
+{
+ struct xdp_umem_reg mr;
+ struct xsk_umem *umem;
+ int err;
+
+ if (!umem_area || !umem_ptr || !fill || !comp)
+ return -EFAULT;
+ if (!size && !xsk_page_aligned(umem_area))
+ return -EINVAL;
+
+ umem = calloc(1, sizeof(*umem));
+ if (!umem)
+ return -ENOMEM;
+
+ umem->fd = socket(AF_XDP, SOCK_RAW, 0);
+ if (umem->fd < 0) {
+ err = -errno;
+ goto out_umem_alloc;
+ }
+
+ umem->umem_area = umem_area;
+ INIT_LIST_HEAD(&umem->ctx_list);
+ xsk_set_umem_config(&umem->config, usr_config);
+
+ memset(&mr, 0, sizeof(mr));
+ mr.addr = (uintptr_t)umem_area;
+ mr.len = size;
+ mr.chunk_size = umem->config.frame_size;
+ mr.headroom = umem->config.frame_headroom;
+ mr.flags = umem->config.flags;
+
+ err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
+ if (err)
+ goto out_socket;
+
+ umem->fill_save = fill;
+ umem->comp_save = comp;
+ *umem_ptr = umem;
+ return 0;
+
+out_socket:
+ close(umem->fd);
+out_umem_alloc:
+ free(umem);
+ return err;
+}
+
+static int xsk_init_xsk_struct(struct xsk_socket *xsk, int ifindex)
+{
+ char ifname[IFNAMSIZ];
+ struct xsk_ctx *ctx;
+ char *interface;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx)
+ return -ENOMEM;
+
+ interface = if_indextoname(ifindex, &ifname[0]);
+ if (!interface) {
+ free(ctx);
+ return -errno;
+ }
+
+ ctx->ifindex = ifindex;
+ memcpy(ctx->ifname, ifname, IFNAMSIZ -1);
+ ctx->ifname[IFNAMSIZ - 1] = 0;
+
+ xsk->ctx = ctx;
+
+ return 0;
+}
+
+static enum xdp_attach_mode xsk_convert_xdp_flags(__u32 xdp_flags)
+{
+ if (xdp_flags & ~XDP_FLAGS_MASK)
+ pr_warn("XDP flag: 0x%x contains flags not supported by libxdp.\n", xdp_flags);
+
+ if (xdp_flags & XDP_FLAGS_SKB_MODE)
+ return XDP_MODE_SKB;
+ if (xdp_flags & XDP_FLAGS_DRV_MODE)
+ return XDP_MODE_NATIVE;
+ if (xdp_flags & XDP_FLAGS_HW_MODE)
+ return XDP_MODE_HW;
+
+ return XDP_MODE_NATIVE;
+}
+
+#define MAX_DEV_QUEUE_PATH_LEN 64
+
+static void xsk_get_queues_from_sysfs(const char* ifname, __u32 *rx, __u32 *tx) {
+ char buf[MAX_DEV_QUEUE_PATH_LEN];
+ struct dirent *entry;
+ DIR *dir;
+ int err;
+
+ *rx = *tx = 0;
+
+ err = try_snprintf(buf, MAX_DEV_QUEUE_PATH_LEN,
+ "/sys/class/net/%s/queues/", ifname);
+ if (err)
+ return;
+
+ dir = opendir(buf);
+ if(dir == NULL)
+ return;
+
+ while((entry = readdir(dir))) {
+ if (0 == strncmp(entry->d_name, "rx", 2))
+ ++*rx;
+
+ if (0 == strncmp(entry->d_name, "tx", 2))
+ ++*tx;
+ }
+
+ closedir(dir);
+}
+
+static int xsk_get_max_queues(char *ifname)
+{
+ struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
+ struct ifreq ifr = {};
+ int fd, err, ret;
+
+ fd = socket(AF_LOCAL, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -errno;
+
+ ifr.ifr_data = (void *)&channels;
+ memcpy(ifr.ifr_name, ifname, IFNAMSIZ - 1);
+ ifr.ifr_name[IFNAMSIZ - 1] = '\0';
+ err = ioctl(fd, SIOCETHTOOL, &ifr);
+ if (err && errno != EOPNOTSUPP) {
+ ret = -errno;
+ goto out;
+ }
+
+ if (err) {
+ /* If the device says it has no channels,
+ * try to get rx tx from sysfs, otherwise all traffic
+ * is sent to a single stream, so max queues = 1.
+ */
+ __u32 rx, tx;
+ xsk_get_queues_from_sysfs(ifr.ifr_name, &rx, &tx);
+ ret = max(max(rx, tx), 1);
+ } else {
+ /* Take the max of rx, tx, combined. Drivers return
+ * the number of channels in different ways.
+ */
+ ret = max(channels.max_rx, channels.max_tx);
+ ret = max(ret, (int)channels.max_combined);
+ }
+
+out:
+ close(fd);
+ return ret;
+}
+
+static int xsk_size_map(struct xdp_program *xdp_prog, char *ifname)
+{
+ struct bpf_object *bpf_obj = xdp_program__bpf_obj(xdp_prog);
+ struct bpf_map *map;
+ int max_queues;
+ int err;
+
+ max_queues = xsk_get_max_queues(ifname);
+ if (max_queues < 0)
+ return max_queues;
+
+ map = bpf_object__find_map_by_name(bpf_obj, "xsks_map");
+ if (!map)
+ return -ENOENT;
+
+ err = bpf_map__set_max_entries(map, max_queues);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static void xsk_delete_map_entry(int xsks_map_fd, __u32 queue_id)
+{
+ bpf_map_delete_elem(xsks_map_fd, &queue_id);
+ close(xsks_map_fd);
+}
+
+static int xsk_lookup_map_by_filter(int prog_fd,
+ bool (*map_info_filter)(struct bpf_map_info *map_info))
+{
+ __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
+ __u32 map_len = sizeof(struct bpf_map_info);
+ struct bpf_prog_info prog_info = {};
+ int fd, err, xsks_map_fd = -ENOENT;
+ struct bpf_map_info map_info;
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_len);
+ if (err)
+ return err;
+
+ num_maps = prog_info.nr_map_ids;
+
+ map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
+ if (!map_ids)
+ return -ENOMEM;
+
+ memset(&prog_info, 0, prog_len);
+ prog_info.nr_map_ids = num_maps;
+ prog_info.map_ids = (__u64)(unsigned long)map_ids;
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_len);
+ if (err) {
+ free(map_ids);
+ return err;
+ }
+
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ fd = bpf_map_get_fd_by_id(map_ids[i]);
+ if (fd < 0)
+ continue;
+
+ memset(&map_info, 0, map_len);
+ err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
+ if (err) {
+ close(fd);
+ continue;
+ }
+
+ if (map_info_filter(&map_info)) {
+ xsks_map_fd = fd;
+ break;
+ }
+
+ close(fd);
+ }
+
+ free(map_ids);
+ return xsks_map_fd;
+}
+
+static bool xsk_map_is_socket_map(struct bpf_map_info *map_info)
+{
+ return !strncmp(map_info->name, "xsks_map", sizeof(map_info->name)) &&
+ map_info->key_size == 4 && map_info->value_size == 4;
+}
+
+static bool xsk_map_is_refcnt_map(struct bpf_map_info *map_info)
+{
+ /* In order to avoid confusing users with multiple identically named
+ * maps, libbpf names non-custom internal maps (.data, .bss, etc.)
+ * in an unexpected way, namely the first 8 characters of a bpf object
+ * name + a suffix signifying the internal map type,
+ * ex. "xdp_def_" + ".data".
+ */
+ return !strncmp(map_info->name, "xsk_def_.data",
+ sizeof(map_info->name)) &&
+ map_info->value_size >= sizeof(int);
+}
+
+static int xsk_lookup_bpf_map(int prog_fd)
+{
+ return xsk_lookup_map_by_filter(prog_fd, &xsk_map_is_socket_map);
+}
+
+static int xsk_lookup_refcnt_map(int prog_fd, const char *xdp_filename)
+{
+ int map_fd = xsk_lookup_map_by_filter(prog_fd, &xsk_map_is_refcnt_map);
+
+ if (map_fd >= 0)
+ goto out;
+
+ if (map_fd != -ENOENT) {
+ pr_debug("Error getting refcount map: %s\n", strerror(-map_fd));
+ goto out;
+ }
+
+ if (xdp_filename)
+ pr_warn("Refcount was not found in %s or kernel does not support required features, so automatic program removal on unload is disabled\n",
+ xdp_filename);
+ else
+ pr_warn("Another XSK socket was created by a version of libxdp that doesn't support program refcnt, so automatic program removal on unload is disabled.\n");
+out:
+ return map_fd;
+}
+
+#ifdef HAVE_LIBBPF_BPF_MAP_CREATE
+/* bpf_map_create() and the new bpf_prog_create() were added at the same time -
+ * however there's a naming conflict with another bpf_prog_load() function in
+ * older versions of libbpf; to avoid hitting that we create our own wrapper
+ * function for this one even with new libbpf versions.
+ */
+static int xsk_check_create_prog(struct bpf_insn *insns, size_t insns_cnt)
+{
+ return bpf_prog_load(BPF_PROG_TYPE_XDP, "testprog",
+ "GPL", insns, insns_cnt, NULL);
+}
+#else
+static int bpf_map_create(enum bpf_map_type map_type,
+ __unused const char *map_name,
+ __u32 key_size,
+ __u32 value_size,
+ __u32 max_entries,
+ __unused void *opts)
+{
+ struct bpf_create_map_attr map_attr;
+
+ memset(&map_attr, 0, sizeof(map_attr));
+ map_attr.map_type = map_type;
+ map_attr.key_size = key_size;
+ map_attr.value_size = value_size;
+ map_attr.max_entries = max_entries;
+
+ return bpf_create_map_xattr(&map_attr);
+}
+
+static int xsk_check_create_prog(struct bpf_insn *insns, size_t insns_cnt)
+{
+ struct bpf_load_program_attr prog_attr;
+
+ memset(&prog_attr, 0, sizeof(prog_attr));
+ prog_attr.prog_type = BPF_PROG_TYPE_XDP;
+ prog_attr.insns = insns;
+ prog_attr.insns_cnt = insns_cnt;
+ prog_attr.license = "GPL";
+
+ return bpf_load_program_xattr(&prog_attr, NULL, 0);
+}
+#endif
+
+static bool xsk_check_redirect_flags(void)
+{
+ char data_in = 0, data_out;
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data_in,
+ .data_out = &data_out,
+ .data_size_in = 1);
+ struct bpf_insn insns[] = {
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, XDP_PASS),
+ BPF_EMIT_CALL(BPF_FUNC_redirect_map),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, map_fd, ret;
+ bool detected = false;
+
+ map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xskmap",
+ sizeof(int), sizeof(int), 1, NULL);
+ if (map_fd < 0)
+ return detected;
+
+ insns[0].imm = map_fd;
+
+ prog_fd = xsk_check_create_prog(insns, ARRAY_SIZE(insns));
+ if (prog_fd < 0) {
+ close(map_fd);
+ return detected;
+ }
+
+ ret = bpf_prog_test_run_opts(prog_fd, &opts);
+ if (!ret && opts.retval == XDP_PASS)
+ detected = true;
+ close(prog_fd);
+ close(map_fd);
+ return detected;
+}
+
+static struct xdp_program *xsk_lookup_program(int ifindex)
+{
+ const char *version_name = "xsk_prog_version";
+ const char *prog_name = "xsk_def_prog";
+ struct xdp_multiprog *multi_prog;
+ struct xdp_program *prog = NULL;
+ __u32 version;
+ int err;
+
+ multi_prog = xdp_multiprog__get_from_ifindex(ifindex);
+ if (IS_ERR(multi_prog))
+ return NULL;
+
+ if (xdp_multiprog__is_legacy(multi_prog)) {
+ prog = xdp_multiprog__main_prog(multi_prog);
+ prog = strcmp(xdp_program__name(prog), prog_name) ? NULL : prog;
+ goto check;
+ }
+
+ while ((prog = xdp_multiprog__next_prog(prog, multi_prog)))
+ if (!strcmp(xdp_program__name(prog), prog_name))
+ break;
+
+check:
+ if (!prog)
+ goto out;
+
+ err = check_xdp_prog_version(xdp_program__btf(prog), version_name, &version);
+ if (err) {
+ prog = ERR_PTR(err);
+ goto out;
+ }
+ if (version > XSK_PROG_VERSION) {
+ pr_warn("XSK default program version %d higher than supported %d\n", version,
+ XSK_PROG_VERSION);
+ prog = ERR_PTR(-EOPNOTSUPP);
+ }
+
+out:
+ if (!IS_ERR_OR_NULL(prog))
+ prog = xdp_program__clone(prog, 0);
+
+ xdp_multiprog__close(multi_prog);
+ return prog;
+}
+
+static int xsk_update_prog_refcnt(int refcnt_map_fd, int delta)
+{
+ struct bpf_map_info map_info = {};
+ __u32 info_len = sizeof(map_info);
+ int *value_data = NULL;
+ int lock_fd, ret;
+ __u32 key = 0;
+
+ ret = bpf_obj_get_info_by_fd(refcnt_map_fd, &map_info, &info_len);
+ if (ret)
+ return ret;
+
+ value_data = calloc(1, map_info.value_size);
+ if (!value_data)
+ return -ENOMEM;
+
+ lock_fd = xdp_lock_acquire();
+ if (lock_fd < 0) {
+ ret = lock_fd;
+ goto out;
+ }
+
+ /* Note, if other global variables are added before the refcnt,
+ * this changes map's value type, not number of elements,
+ * so additional offset must be applied to value_data,
+ * when reading refcount, but map key always stays zero
+ */
+ ret = bpf_map_lookup_elem(refcnt_map_fd, &key, value_data);
+ if (ret)
+ goto unlock;
+
+ /* If refcount is 0, program is awaiting detach and can't be used */
+ if (*value_data) {
+ *value_data += delta;
+ ret = bpf_map_update_elem(refcnt_map_fd, &key, value_data, 0);
+ if (ret)
+ goto unlock;
+ }
+
+ ret = *value_data;
+unlock:
+ xdp_lock_release(lock_fd);
+out:
+ free(value_data);
+ return ret;
+}
+
+static int xsk_incr_prog_refcnt(int refcnt_map_fd)
+{
+ return xsk_update_prog_refcnt(refcnt_map_fd, 1);
+}
+
+static int xsk_decr_prog_refcnt(int refcnt_map_fd)
+{
+ return xsk_update_prog_refcnt(refcnt_map_fd, -1);
+}
+
+static int __xsk_setup_xdp_prog(struct xsk_socket *xsk, int *xsks_map_fd)
+{
+ const char *fallback_prog = "xsk_def_xdp_prog_5.3.o";
+ const char *default_prog = "xsk_def_xdp_prog.o";
+ struct xsk_ctx *ctx = xsk->ctx;
+ const char *file_name = NULL;
+ bool attached = false;
+ int err;
+
+ ctx->xdp_prog = xsk_lookup_program(ctx->ifindex);
+ if (IS_ERR(ctx->xdp_prog))
+ return PTR_ERR(ctx->xdp_prog);
+
+ ctx->refcnt_map_fd = -ENOENT;
+
+ if (ctx->xdp_prog) {
+ int refcnt;
+
+ ctx->refcnt_map_fd = xsk_lookup_refcnt_map(xdp_program__fd(ctx->xdp_prog), NULL);
+ if (ctx->refcnt_map_fd == -ENOENT)
+ goto map_lookup;
+
+ if (ctx->refcnt_map_fd < 0) {
+ err = ctx->refcnt_map_fd;
+ goto err_prog_load;
+ }
+
+ refcnt = xsk_incr_prog_refcnt(ctx->refcnt_map_fd);
+ if (refcnt < 0) {
+ err = refcnt;
+ pr_debug("Error occurred when incrementing xsk XDP prog refcount: %s\n",
+ strerror(-err));
+ goto err_prog_load;
+ }
+
+ if (!refcnt) {
+ pr_warn("Current program is being detached, falling back on creating a new program\n");
+ close(ctx->refcnt_map_fd);
+ ctx->refcnt_map_fd = -ENOENT;
+ xdp_program__close(ctx->xdp_prog);
+ ctx->xdp_prog = NULL;
+ }
+ }
+
+ if (!ctx->xdp_prog) {
+ file_name = xsk_check_redirect_flags() ? default_prog : fallback_prog;
+ ctx->xdp_prog = xdp_program__find_file(file_name, NULL, NULL);
+ if (IS_ERR(ctx->xdp_prog))
+ return PTR_ERR(ctx->xdp_prog);
+
+ err = xsk_size_map(ctx->xdp_prog, ctx->ifname);
+ if (err)
+ goto err_prog_load;
+
+ err = xdp_program__attach(ctx->xdp_prog, ctx->ifindex,
+ xsk_convert_xdp_flags(xsk->config.xdp_flags), 0);
+ if (err)
+ goto err_prog_load;
+
+ attached = true;
+ }
+
+ if (ctx->refcnt_map_fd < 0) {
+ ctx->refcnt_map_fd = xsk_lookup_refcnt_map(xdp_program__fd(ctx->xdp_prog),
+ file_name);
+ if (ctx->refcnt_map_fd < 0 && ctx->refcnt_map_fd != -ENOENT) {
+ err = ctx->refcnt_map_fd;
+ goto err_prog_load;
+ }
+ }
+map_lookup:
+ ctx->xsks_map_fd = xsk_lookup_bpf_map(xdp_program__fd(ctx->xdp_prog));
+ if (ctx->xsks_map_fd < 0) {
+ err = ctx->xsks_map_fd;
+ goto err_lookup;
+ }
+
+ if (xsk->rx) {
+ err = bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id, &xsk->fd, 0);
+ if (err)
+ goto err_lookup;
+ }
+ if (xsks_map_fd)
+ *xsks_map_fd = ctx->xsks_map_fd;
+
+ return 0;
+
+err_lookup:
+ if (attached)
+ xdp_program__detach(ctx->xdp_prog, ctx->ifindex,
+ xsk_convert_xdp_flags(xsk->config.xdp_flags), 0);
+err_prog_load:
+ if (ctx->refcnt_map_fd >= 0)
+ close(ctx->refcnt_map_fd);
+ ctx->refcnt_map_fd = -ENOENT;
+ xdp_program__close(ctx->xdp_prog);
+ ctx->xdp_prog = NULL;
+ return err;
+}
+
+static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, __u64 netns_cookie, int ifindex, __u32 queue_id)
+{
+ struct xsk_ctx *ctx;
+
+ if (list_empty(&umem->ctx_list))
+ return NULL;
+
+ list_for_each_entry(ctx, &umem->ctx_list, list) {
+ if (ctx->netns_cookie == netns_cookie && ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
+ ctx->refcount++;
+ return ctx;
+ }
+ }
+
+ return NULL;
+}
+
+static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
+{
+ struct xsk_umem *umem = ctx->umem;
+ struct xdp_mmap_offsets off;
+ int err;
+
+ if (--ctx->refcount)
+ return;
+
+ if (!unmap)
+ goto out_free;
+
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (err)
+ goto out_free;
+
+ munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size *
+ sizeof(__u64));
+ munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size *
+ sizeof(__u64));
+
+out_free:
+ list_del(&ctx->list);
+ free(ctx);
+}
+
+static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
+ struct xsk_umem *umem, __u64 netns_cookie, int ifindex,
+ const char *ifname, __u32 queue_id,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp)
+{
+ struct xsk_ctx *ctx;
+ int err;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ if (!umem->fill_save) {
+ err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
+ if (err) {
+ free(ctx);
+ return NULL;
+ }
+ } else if (umem->fill_save != fill || umem->comp_save != comp) {
+ /* Copy over rings to new structs. */
+ memcpy(fill, umem->fill_save, sizeof(*fill));
+ memcpy(comp, umem->comp_save, sizeof(*comp));
+ }
+
+ ctx->netns_cookie = netns_cookie;
+ ctx->ifindex = ifindex;
+ ctx->refcount = 1;
+ ctx->umem = umem;
+ ctx->queue_id = queue_id;
+ memcpy(ctx->ifname, ifname, IFNAMSIZ - 1);
+ ctx->ifname[IFNAMSIZ - 1] = '\0';
+
+ ctx->fill = fill;
+ ctx->comp = comp;
+ list_add(&ctx->list, &umem->ctx_list);
+ return ctx;
+}
+
+static void xsk_destroy_xsk_struct(struct xsk_socket *xsk)
+{
+ free(xsk->ctx);
+ free(xsk);
+}
+
+int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd)
+{
+ struct xsk_ctx *ctx = xsk->ctx;
+
+ ctx->xsks_map_fd = fd;
+ return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id, &xsk->fd, 0);
+}
+
+int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd)
+{
+ struct xsk_socket *xsk;
+ int res;
+
+ xsk = calloc(1, sizeof(*xsk));
+ if (!xsk)
+ return -ENOMEM;
+
+ res = xsk_init_xsk_struct(xsk, ifindex);
+ if (res) {
+ free(xsk);
+ return -EINVAL;
+ }
+
+ res = __xsk_setup_xdp_prog(xsk, xsks_map_fd);
+
+ xsk_destroy_xsk_struct(xsk);
+
+ return res;
+}
+
+int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+ const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_socket_config *usr_config)
+{
+ bool rx_setup_done = false, tx_setup_done = false;
+ void *rx_map = NULL, *tx_map = NULL;
+ struct sockaddr_xdp sxdp = {};
+ struct xdp_mmap_offsets off;
+ struct xsk_socket *xsk;
+ struct xsk_ctx *ctx;
+ int err, ifindex;
+ __u64 netns_cookie;
+ socklen_t optlen;
+ bool unmap;
+
+ if (!umem || !xsk_ptr || !(rx || tx))
+ return -EFAULT;
+
+ xsk = calloc(1, sizeof(*xsk));
+ if (!xsk)
+ return -ENOMEM;
+
+ err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
+ if (err)
+ goto out_xsk_alloc;
+
+ ifindex = if_nametoindex(ifname);
+ if (!ifindex) {
+ err = -errno;
+ goto out_xsk_alloc;
+ }
+
+ if (umem->refcount++ > 0) {
+ xsk->fd = socket(AF_XDP, SOCK_RAW, 0);
+ if (xsk->fd < 0) {
+ err = -errno;
+ goto out_xsk_alloc;
+ }
+ } else {
+ xsk->fd = umem->fd;
+ rx_setup_done = umem->rx_ring_setup_done;
+ tx_setup_done = umem->tx_ring_setup_done;
+ }
+
+ optlen = sizeof(netns_cookie);
+ err = getsockopt(xsk->fd, SOL_SOCKET, SO_NETNS_COOKIE, &netns_cookie, &optlen);
+ if (err) {
+ if (errno != ENOPROTOOPT) {
+ err = -errno;
+ goto out_socket;
+ }
+ netns_cookie = INIT_NS;
+ }
+
+ ctx = xsk_get_ctx(umem, netns_cookie, ifindex, queue_id);
+ if (!ctx) {
+ if (!fill || !comp) {
+ err = -EFAULT;
+ goto out_socket;
+ }
+
+ ctx = xsk_create_ctx(xsk, umem, netns_cookie, ifindex, ifname, queue_id,
+ fill, comp);
+ if (!ctx) {
+ err = -ENOMEM;
+ goto out_socket;
+ }
+ }
+ xsk->ctx = ctx;
+
+ if (rx && !rx_setup_done) {
+ err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
+ &xsk->config.rx_size,
+ sizeof(xsk->config.rx_size));
+ if (err) {
+ err = -errno;
+ goto out_put_ctx;
+ }
+ if (xsk->fd == umem->fd)
+ umem->rx_ring_setup_done = true;
+
+ }
+ if (tx && !tx_setup_done) {
+ err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
+ &xsk->config.tx_size,
+ sizeof(xsk->config.tx_size));
+ if (err) {
+ err = -errno;
+ goto out_put_ctx;
+ }
+ if (xsk->fd == umem->fd)
+ umem->tx_ring_setup_done = true;
+ }
+
+ err = xsk_get_mmap_offsets(xsk->fd, &off);
+ if (err) {
+ err = -errno;
+ goto out_put_ctx;
+ }
+
+ if (rx) {
+ rx_map = mmap(NULL, off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_RX_RING);
+ if (rx_map == MAP_FAILED) {
+ err = -errno;
+ goto out_put_ctx;
+ }
+
+ rx->mask = xsk->config.rx_size - 1;
+ rx->size = xsk->config.rx_size;
+ rx->producer = rx_map + off.rx.producer;
+ rx->consumer = rx_map + off.rx.consumer;
+ rx->flags = rx_map + off.rx.flags;
+ rx->ring = rx_map + off.rx.desc;
+ rx->cached_prod = *rx->producer;
+ rx->cached_cons = *rx->consumer;
+ }
+ xsk->rx = rx;
+
+ if (tx) {
+ tx_map = mmap(NULL, off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_TX_RING);
+ if (tx_map == MAP_FAILED) {
+ err = -errno;
+ goto out_mmap_rx;
+ }
+
+ tx->mask = xsk->config.tx_size - 1;
+ tx->size = xsk->config.tx_size;
+ tx->producer = tx_map + off.tx.producer;
+ tx->consumer = tx_map + off.tx.consumer;
+ tx->flags = tx_map + off.tx.flags;
+ tx->ring = tx_map + off.tx.desc;
+ tx->cached_prod = *tx->producer;
+ /* cached_cons is r->size bigger than the real consumer pointer
+ * See xsk_prod_nb_free
+ */
+ tx->cached_cons = *tx->consumer + xsk->config.tx_size;
+ }
+ xsk->tx = tx;
+
+ sxdp.sxdp_family = PF_XDP;
+ sxdp.sxdp_ifindex = ctx->ifindex;
+ sxdp.sxdp_queue_id = ctx->queue_id;
+ if (umem->refcount > 1) {
+ sxdp.sxdp_flags |= XDP_SHARED_UMEM;
+ sxdp.sxdp_shared_umem_fd = umem->fd;
+ } else {
+ sxdp.sxdp_flags = xsk->config.bind_flags;
+ }
+
+ err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
+ if (err) {
+ err = -errno;
+ goto out_mmap_tx;
+ }
+
+ if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
+ err = __xsk_setup_xdp_prog(xsk, NULL);
+ if (err)
+ goto out_mmap_tx;
+ }
+
+ *xsk_ptr = xsk;
+ umem->fill_save = NULL;
+ umem->comp_save = NULL;
+ return 0;
+
+out_mmap_tx:
+ if (tx)
+ munmap(tx_map, off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc));
+out_mmap_rx:
+ if (rx)
+ munmap(rx_map, off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc));
+out_put_ctx:
+ unmap = umem->fill_save != fill;
+ xsk_put_ctx(ctx, unmap);
+out_socket:
+ if (--umem->refcount)
+ close(xsk->fd);
+out_xsk_alloc:
+ free(xsk);
+ return err;
+}
+
+int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *usr_config)
+{
+ if (!umem)
+ return -EFAULT;
+
+ return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
+ rx, tx, umem->fill_save,
+ umem->comp_save, usr_config);
+}
+
+int xsk_umem__delete(struct xsk_umem *umem)
+{
+ struct xdp_mmap_offsets off;
+ int err;
+
+ if (!umem)
+ return 0;
+
+ if (umem->refcount)
+ return -EBUSY;
+
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (!err && umem->fill_save && umem->comp_save) {
+ munmap(umem->fill_save->ring - off.fr.desc,
+ off.fr.desc + umem->config.fill_size * sizeof(__u64));
+ munmap(umem->comp_save->ring - off.cr.desc,
+ off.cr.desc + umem->config.comp_size * sizeof(__u64));
+ }
+
+ close(umem->fd);
+ free(umem);
+
+ return 0;
+}
+
+static void xsk_release_xdp_prog(struct xsk_socket *xsk)
+{
+ struct xsk_ctx *ctx = xsk->ctx;
+ int value;
+
+ if (xsk->ctx->refcnt_map_fd < 0)
+ goto out;
+
+ value = xsk_decr_prog_refcnt(ctx->refcnt_map_fd);
+ if (value < 0)
+ pr_warn("Error occurred when decrementing xsk XDP prog refcount: %s, please detach program yourself\n",
+ strerror(-value));
+ if (value)
+ goto out;
+
+ xdp_program__detach(ctx->xdp_prog, ctx->ifindex,
+ xsk_convert_xdp_flags(xsk->config.xdp_flags), 0);
+out:
+ xdp_program__close(ctx->xdp_prog);
+}
+
+void xsk_socket__delete(struct xsk_socket *xsk)
+{
+ size_t desc_sz = sizeof(struct xdp_desc);
+ struct xdp_mmap_offsets off;
+ struct xsk_umem *umem;
+ struct xsk_ctx *ctx;
+ int err;
+
+ if (!xsk)
+ return;
+
+ ctx = xsk->ctx;
+ umem = ctx->umem;
+ if (ctx->xdp_prog) {
+ xsk_delete_map_entry(ctx->xsks_map_fd, ctx->queue_id);
+ xsk_release_xdp_prog(xsk);
+ }
+
+ err = xsk_get_mmap_offsets(xsk->fd, &off);
+ if (!err) {
+ if (xsk->rx) {
+ munmap(xsk->rx->ring - off.rx.desc,
+ off.rx.desc + xsk->config.rx_size * desc_sz);
+ }
+ if (xsk->tx) {
+ munmap(xsk->tx->ring - off.tx.desc,
+ off.tx.desc + xsk->config.tx_size * desc_sz);
+ }
+ }
+
+ xsk_put_ctx(ctx, true);
+
+ umem->refcount--;
+ /* Do not close an fd that also has an associated umem connected
+ * to it.
+ */
+ if (xsk->fd != umem->fd)
+ close(xsk->fd);
+ free(xsk);
+}
diff --git a/lib/libxdp/xsk_def_xdp_prog.c b/lib/libxdp/xsk_def_xdp_prog.c
new file mode 100644
index 0000000..801ad12
--- /dev/null
+++ b/lib/libxdp/xsk_def_xdp_prog.c
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <xdp/xdp_helpers.h>
+
+#include "xsk_def_xdp_prog.h"
+
+#define DEFAULT_QUEUE_IDS 64
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, DEFAULT_QUEUE_IDS);
+} xsks_map SEC(".maps");
+
+struct {
+ __uint(priority, 20);
+ __uint(XDP_PASS, 1);
+} XDP_RUN_CONFIG(xsk_def_prog);
+
+/* Program refcount, in order to work properly,
+ * must be declared before any other global variables
+ * and initialized with '1'.
+ */
+volatile int refcnt = 1;
+
+/* This is the program for post 5.3 kernels. */
+SEC("xdp")
+int xsk_def_prog(struct xdp_md *ctx)
+{
+ /* Make sure refcount is referenced by the program */
+ if (!refcnt)
+ return XDP_PASS;
+
+ /* A set entry here means that the corresponding queue_id
+ * has an active AF_XDP socket bound to it.
+ */
+ return bpf_redirect_map(&xsks_map, ctx->rx_queue_index, XDP_PASS);
+}
+
+char _license[] SEC("license") = "GPL";
+__uint(xsk_prog_version, XSK_PROG_VERSION) SEC(XDP_METADATA_SECTION);
diff --git a/lib/libxdp/xsk_def_xdp_prog.h b/lib/libxdp/xsk_def_xdp_prog.h
new file mode 100644
index 0000000..b51883d
--- /dev/null
+++ b/lib/libxdp/xsk_def_xdp_prog.h
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+
+#ifndef __LIBXDP_XSK_DEF_XDP_PROG_H
+#define __LIBXDP_XSK_DEF_XDP_PROG_H
+
+#define XDP_METADATA_SECTION "xdp_metadata"
+#define XSK_PROG_VERSION 1
+
+#endif /* __LIBXDP_XSK_DEF_XDP_PROG_H */
diff --git a/lib/libxdp/xsk_def_xdp_prog_5.3.c b/lib/libxdp/xsk_def_xdp_prog_5.3.c
new file mode 100644
index 0000000..7973477
--- /dev/null
+++ b/lib/libxdp/xsk_def_xdp_prog_5.3.c
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <xdp/xdp_helpers.h>
+
+#include "xsk_def_xdp_prog.h"
+
+#define DEFAULT_QUEUE_IDS 64
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, DEFAULT_QUEUE_IDS);
+} xsks_map SEC(".maps");
+
+struct {
+ __uint(priority, 20);
+ __uint(XDP_PASS, 1);
+} XDP_RUN_CONFIG(xsk_def_prog);
+
+/* Program refcount, in order to work properly,
+ * must be declared before any other global variables
+ * and initialized with '1'.
+ */
+volatile int refcnt = 1;
+
+/* This is the program for 5.3 kernels and older. */
+SEC("xdp")
+int xsk_def_prog(struct xdp_md *ctx)
+{
+ int index = ctx->rx_queue_index;
+
+ /* Make sure refcount is referenced by the program */
+ if (!refcnt)
+ return XDP_PASS;
+
+ /* A set entry here means that the corresponding queue_id
+ * has an active AF_XDP socket bound to it.
+ */
+ if (bpf_map_lookup_elem(&xsks_map, &index))
+ return bpf_redirect_map(&xsks_map, index, 0);
+ return XDP_PASS;
+
+}
+
+char _license[] SEC("license") = "GPL";
+__uint(xsk_prog_version, XSK_PROG_VERSION) SEC(XDP_METADATA_SECTION);