diff options
Diffstat (limited to 'src/libknot/xdp')
-rw-r--r-- | src/libknot/xdp/Makefile.am | 15 | ||||
-rw-r--r-- | src/libknot/xdp/Makefile.in | 533 | ||||
-rw-r--r-- | src/libknot/xdp/bpf-consts.h | 23 | ||||
-rw-r--r-- | src/libknot/xdp/bpf-kernel-obj.c | 144 | ||||
-rw-r--r-- | src/libknot/xdp/bpf-kernel-obj.h | 2 | ||||
-rw-r--r-- | src/libknot/xdp/bpf-kernel.c | 159 | ||||
-rw-r--r-- | src/libknot/xdp/bpf-user.c | 287 | ||||
-rw-r--r-- | src/libknot/xdp/bpf-user.h | 112 | ||||
-rw-r--r-- | src/libknot/xdp/eth.c | 129 | ||||
-rw-r--r-- | src/libknot/xdp/eth.h | 57 | ||||
-rw-r--r-- | src/libknot/xdp/xdp.c | 717 | ||||
-rw-r--r-- | src/libknot/xdp/xdp.h | 167 |
12 files changed, 2345 insertions, 0 deletions
diff --git a/src/libknot/xdp/Makefile.am b/src/libknot/xdp/Makefile.am new file mode 100644 index 0000000..e4d309c --- /dev/null +++ b/src/libknot/xdp/Makefile.am @@ -0,0 +1,15 @@ +# Useful commands: +# make filter +# ip link show $eth +# sudo ip link set dev $eth xdp off +# sudo ip link set dev $eth xdp obj ./bpf-kernel.o + +EXTRA_DIST = bpf-kernel-obj.c bpf-kernel.c + +.PHONY: filter + +filter: + rm -f bpf-kernel.ll bpf-kernel.o bpf-kernel-obj.c + clang -S -target bpf -Wall -O2 -emit-llvm -c -DNDEBUG -o bpf-kernel.ll -I/usr/include/x86_64-linux-gnu -include ../../config.h bpf-kernel.c + llc -march=bpf -filetype=obj -o bpf-kernel.o bpf-kernel.ll + xxd -i bpf-kernel.o > bpf-kernel-obj.c diff --git a/src/libknot/xdp/Makefile.in b/src/libknot/xdp/Makefile.in new file mode 100644 index 0000000..5fcf7a3 --- /dev/null +++ b/src/libknot/xdp/Makefile.in @@ -0,0 +1,533 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Useful commands: +# make filter +# ip link show $eth +# sudo ip link set dev $eth xdp off +# sudo ip link set dev $eth xdp obj ./bpf-kernel.o +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = src/libknot/xdp +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_cc_clang.m4 \ + $(top_srcdir)/m4/ax_check_compile_flag.m4 \ + $(top_srcdir)/m4/ax_check_link_flag.m4 \ + $(top_srcdir)/m4/ax_compare_version.m4 \ + $(top_srcdir)/m4/code-coverage.m4 \ + $(top_srcdir)/m4/knot-lib-version.m4 \ + $(top_srcdir)/m4/knot-module.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/sanitizer.m4 $(top_srcdir)/m4/visibility.m4 \ + $(top_srcdir)/m4/knot-version.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CC_CLANG_VERSION = @CC_CLANG_VERSION@ +CFLAGS = @CFLAGS@ +CFLAG_VISIBILITY = @CFLAG_VISIBILITY@ +CODE_COVERAGE_ENABLED = @CODE_COVERAGE_ENABLED@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DNSTAP_CFLAGS = @DNSTAP_CFLAGS@ +DNSTAP_LIBS = @DNSTAP_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GENHTML = @GENHTML@ +GREP = @GREP@ +HAVE_VISIBILITY = @HAVE_VISIBILITY@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +KNOT_VERSION_MAJOR = @KNOT_VERSION_MAJOR@ +KNOT_VERSION_MINOR = @KNOT_VERSION_MINOR@ +KNOT_VERSION_PATCH = @KNOT_VERSION_PATCH@ +LCOV = @LCOV@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LDFLAG_EXCLUDE_LIBS = @LDFLAG_EXCLUDE_LIBS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PDFLATEX = @PDFLATEX@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PROTOC_C = @PROTOC_C@ +RANLIB = @RANLIB@ +RELEASE_DATE = @RELEASE_DATE@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SPHINXBUILD = @SPHINXBUILD@ +STRIP = @STRIP@ +VERSION = @VERSION@ +XDP_VISIBLE_HEADERS = @XDP_VISIBLE_HEADERS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +cap_ng_CFLAGS = @cap_ng_CFLAGS@ +cap_ng_LIBS = @cap_ng_LIBS@ +conf_mapsize = @conf_mapsize@ +config_dir = @config_dir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +dlopen_LIBS = @dlopen_LIBS@ +docdir = @docdir@ +dvidir = @dvidir@ +embedded_libbpf_CFLAGS = @embedded_libbpf_CFLAGS@ +embedded_libbpf_LIBS = @embedded_libbpf_LIBS@ +exec_prefix = @exec_prefix@ +fuzzer_CFLAGS = @fuzzer_CFLAGS@ +fuzzer_LDFLAGS = @fuzzer_LDFLAGS@ +gnutls_CFLAGS = @gnutls_CFLAGS@ +gnutls_LIBS = @gnutls_LIBS@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libbpf_CFLAGS = @libbpf_CFLAGS@ +libbpf_LIBS = @libbpf_LIBS@ +libdir = @libdir@ +libdnssec_SONAME = @libdnssec_SONAME@ +libdnssec_SOVERSION = @libdnssec_SOVERSION@ +libdnssec_VERSION_INFO = @libdnssec_VERSION_INFO@ +libedit_CFLAGS = @libedit_CFLAGS@ +libedit_LIBS = @libedit_LIBS@ +libelf_CFLAGS = @libelf_CFLAGS@ +libelf_LIBS = @libelf_LIBS@ +libexecdir = @libexecdir@ +libfstrm_CFLAGS = @libfstrm_CFLAGS@ +libfstrm_LIBS = @libfstrm_LIBS@ +libidn2_CFLAGS = @libidn2_CFLAGS@ +libidn2_LIBS = @libidn2_LIBS@ +libidn_CFLAGS = @libidn_CFLAGS@ +libidn_LIBS = @libidn_LIBS@ +libknot_SONAME = @libknot_SONAME@ +libknot_SOVERSION = @libknot_SOVERSION@ +libknot_VERSION_INFO = @libknot_VERSION_INFO@ +libmaxminddb_CFLAGS = @libmaxminddb_CFLAGS@ +libmaxminddb_LIBS = @libmaxminddb_LIBS@ +libnghttp2_CFLAGS = @libnghttp2_CFLAGS@ +libnghttp2_LIBS = @libnghttp2_LIBS@ +libprotobuf_c_CFLAGS = @libprotobuf_c_CFLAGS@ +libprotobuf_c_LIBS = @libprotobuf_c_LIBS@ +liburcu_CFLAGS = @liburcu_CFLAGS@ +liburcu_LIBS = @liburcu_LIBS@ +liburcu_PKGCONFIG = @liburcu_PKGCONFIG@ +libzscanner_SONAME = @libzscanner_SONAME@ +libzscanner_SOVERSION = @libzscanner_SOVERSION@ +libzscanner_VERSION_INFO = @libzscanner_VERSION_INFO@ +lmdb_CFLAGS = @lmdb_CFLAGS@ +lmdb_LIBS = @lmdb_LIBS@ +localedir = @localedir@ +localstatedir = @localstatedir@ +malloc_LIBS = @malloc_LIBS@ +mandir = @mandir@ +math_LIBS = @math_LIBS@ +mkdir_p = @mkdir_p@ +module_dir = @module_dir@ +module_instdir = @module_instdir@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +pkgconfigdir = @pkgconfigdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +pthread_LIBS = @pthread_LIBS@ +run_dir = @run_dir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +storage_dir = @storage_dir@ +sysconfdir = @sysconfdir@ +systemd_CFLAGS = @systemd_CFLAGS@ +systemd_LIBS = @systemd_LIBS@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +EXTRA_DIST = bpf-kernel-obj.c bpf-kernel.c +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign src/libknot/xdp/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign src/libknot/xdp/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +.PHONY: filter + +filter: + rm -f bpf-kernel.ll bpf-kernel.o bpf-kernel-obj.c + clang -S -target bpf -Wall -O2 -emit-llvm -c -DNDEBUG -o bpf-kernel.ll -I/usr/include/x86_64-linux-gnu -include ../../config.h bpf-kernel.c + llc -march=bpf -filetype=obj -o bpf-kernel.o bpf-kernel.ll + xxd -i bpf-kernel.o > bpf-kernel-obj.c + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/libknot/xdp/bpf-consts.h b/src/libknot/xdp/bpf-consts.h new file mode 100644 index 0000000..d40b1fd --- /dev/null +++ b/src/libknot/xdp/bpf-consts.h @@ -0,0 +1,23 @@ +/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +enum { + KNOT_XDP_LISTEN_PORT_MASK = 0xFFFF0000, /*!< Listen port option mask. */ + KNOT_XDP_LISTEN_PORT_ALL = 1 << 16, /*!< Listen on all ports. */ + KNOT_XDP_LISTEN_PORT_DROP = 1 << 17, /*!< Drop all incoming messages. */ +}; diff --git a/src/libknot/xdp/bpf-kernel-obj.c b/src/libknot/xdp/bpf-kernel-obj.c new file mode 100644 index 0000000..27f82f9 --- /dev/null +++ b/src/libknot/xdp/bpf-kernel-obj.c @@ -0,0 +1,144 @@ +unsigned char bpf_kernel_o[] = { + 0x7f, 0x45, 0x4c, 0x46, 0x02, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0xf7, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xd8, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, + 0x07, 0x00, 0x01, 0x00, 0xb7, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x61, 0x12, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x61, 0x13, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xbf, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x07, 0x06, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x2d, 0x26, 0x53, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x71, 0x35, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x71, 0x34, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x04, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x4f, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xb7, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x04, 0x14, 0x00, + 0x86, 0xdd, 0x00, 0x00, 0x55, 0x04, 0x4c, 0x00, 0x08, 0x00, 0x00, 0x00, + 0xbf, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x04, 0x00, 0x00, + 0x22, 0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x2d, 0x24, 0x48, 0x00, 0x00, 0x00, 0x00, 0x00, 0x71, 0x64, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xbf, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x57, 0x05, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x55, 0x05, 0x43, 0x00, 0x40, 0x00, 0x00, 0x00, + 0xb7, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x69, 0x35, 0x14, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x05, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x15, 0x05, 0x01, 0x00, 0x40, 0x00, 0x00, 0x00, 0xb7, 0x07, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x67, 0x04, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x57, 0x04, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x0f, 0x46, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x71, 0x35, 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x05, 0x00, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbf, 0x34, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x07, 0x04, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, + 0xb7, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2d, 0x24, 0x35, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x71, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x57, 0x05, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x55, 0x05, 0x31, 0x00, 0x60, 0x00, 0x00, 0x00, + 0xb7, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x71, 0x35, 0x14, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xbf, 0x46, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x55, 0x05, 0x06, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x07, 0x03, 0x00, 0x00, + 0x3e, 0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x2d, 0x23, 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb7, 0x07, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x71, 0x45, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xbf, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x55, 0x05, 0x25, 0x00, 0x11, 0x00, 0x00, 0x00, + 0xbf, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x03, 0x00, 0x00, + 0x08, 0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x2d, 0x23, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x62, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x69, 0x63, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xdc, 0x03, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x6d, 0x23, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x61, 0x11, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x63, 0x1a, 0xfc, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xbf, 0xa2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x07, 0x02, 0x00, 0x00, 0xfc, 0xff, 0xff, 0xff, 0x18, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x85, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0xbf, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x15, 0x01, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb7, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x61, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x03, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xbf, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5f, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x15, 0x02, 0x04, 0x00, + 0x00, 0x00, 0x01, 0x00, 0x15, 0x02, 0x0a, 0x00, 0x00, 0x00, 0x02, 0x00, + 0xb7, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x69, 0x62, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x5d, 0x21, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xb7, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x55, 0x07, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x61, 0xa2, 0xfc, 0xff, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xb7, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x85, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x95, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, + 0x04, 0x00, 0xf1, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x77, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x03, 0x00, 0x90, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x87, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x03, 0x00, 0x90, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x03, 0x00, 0xc8, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x03, 0x00, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x69, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, + 0x12, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xd0, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, + 0xa8, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x74, 0x65, 0x78, 0x74, 0x00, 0x6d, + 0x61, 0x70, 0x73, 0x00, 0x2e, 0x72, 0x65, 0x6c, 0x78, 0x64, 0x70, 0x5f, + 0x72, 0x65, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x5f, 0x75, 0x64, 0x70, + 0x00, 0x78, 0x73, 0x6b, 0x73, 0x5f, 0x6d, 0x61, 0x70, 0x00, 0x71, 0x69, + 0x64, 0x63, 0x6f, 0x6e, 0x66, 0x5f, 0x6d, 0x61, 0x70, 0x00, 0x78, 0x64, + 0x70, 0x5f, 0x72, 0x65, 0x64, 0x69, 0x72, 0x65, 0x63, 0x74, 0x5f, 0x75, + 0x64, 0x70, 0x5f, 0x66, 0x75, 0x6e, 0x63, 0x00, 0x62, 0x70, 0x66, 0x2d, + 0x6b, 0x65, 0x72, 0x6e, 0x65, 0x6c, 0x2e, 0x63, 0x00, 0x2e, 0x73, 0x74, + 0x72, 0x74, 0x61, 0x62, 0x00, 0x2e, 0x73, 0x79, 0x6d, 0x74, 0x61, 0x62, + 0x00, 0x4c, 0x42, 0x42, 0x30, 0x5f, 0x38, 0x00, 0x4c, 0x42, 0x42, 0x30, + 0x5f, 0x37, 0x00, 0x4c, 0x42, 0x42, 0x30, 0x5f, 0x31, 0x33, 0x00, 0x4c, + 0x42, 0x42, 0x30, 0x5f, 0x32, 0x32, 0x00, 0x4c, 0x42, 0x42, 0x30, 0x5f, + 0x32, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x04, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x8f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xd0, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0x04, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x61, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x38, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; +unsigned int bpf_kernel_o_len = 1688; diff --git a/src/libknot/xdp/bpf-kernel-obj.h b/src/libknot/xdp/bpf-kernel-obj.h new file mode 100644 index 0000000..7c3d759 --- /dev/null +++ b/src/libknot/xdp/bpf-kernel-obj.h @@ -0,0 +1,2 @@ +extern unsigned char bpf_kernel_o[]; +extern unsigned int bpf_kernel_o_len; diff --git a/src/libknot/xdp/bpf-kernel.c b/src/libknot/xdp/bpf-kernel.c new file mode 100644 index 0000000..a19a609 --- /dev/null +++ b/src/libknot/xdp/bpf-kernel.c @@ -0,0 +1,159 @@ +/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> + +#include "bpf-consts.h" +#include "../../contrib/libbpf/include/uapi/linux/bpf.h" +#include "../../contrib/libbpf/bpf/bpf_endian.h" +#include "../../contrib/libbpf/bpf/bpf_helpers.h" + +/* Don't fragment flag. */ +#define IP_DF 0x4000 + +/* Assume netdev has no more than 128 queues. */ +#define QUEUE_MAX 128 + +/* A set entry here means that the corresponding queue_id + * has an active AF_XDP socket bound to it. */ +struct bpf_map_def SEC("maps") qidconf_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = QUEUE_MAX, +}; +struct bpf_map_def SEC("maps") xsks_map = { + .type = BPF_MAP_TYPE_XSKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = QUEUE_MAX, +}; + +struct ipv6_frag_hdr { + unsigned char nexthdr; + unsigned char whatever[7]; +} __attribute__((packed)); + +SEC("xdp_redirect_udp") +int xdp_redirect_udp_func(struct xdp_md *ctx) +{ + const void *data = (void *)(long)ctx->data; + const void *data_end = (void *)(long)ctx->data_end; + + const struct ethhdr *eth = data; + const struct iphdr *ip4; + const struct ipv6hdr *ip6; + const struct udphdr *udp; + + __u8 ip_proto; + __u8 fragmented = 0; + + /* Parse Ethernet header. */ + if ((void *)eth + sizeof(*eth) > data_end) { + return XDP_DROP; + } + data += sizeof(*eth); + + /* Parse IPv4 or IPv6 header. */ + switch (eth->h_proto) { + case __constant_htons(ETH_P_IP): + ip4 = data; + if ((void *)ip4 + sizeof(*ip4) > data_end) { + return XDP_DROP; + } + if (ip4->version != 4) { + return XDP_DROP; + } + if (ip4->frag_off != 0 && + ip4->frag_off != __constant_htons(IP_DF)) { + fragmented = 1; + } + ip_proto = ip4->protocol; + udp = data + ip4->ihl * 4; + break; + case __constant_htons(ETH_P_IPV6): + ip6 = data; + if ((void *)ip6 + sizeof(*ip6) > data_end) { + return XDP_DROP; + } + if (ip6->version != 6) { + return XDP_DROP; + } + ip_proto = ip6->nexthdr; + data += sizeof(*ip6); + if (ip_proto == IPPROTO_FRAGMENT) { + fragmented = 1; + const struct ipv6_frag_hdr *frag = data; + if ((void *)frag + sizeof(*frag) > data_end) { + return XDP_DROP; + } + ip_proto = frag->nexthdr; + data += sizeof(*frag); + } + udp = data; + break; + default: + /* Also applies to VLAN. */ + return XDP_PASS; + } + + /* Treat UDP only. */ + if (ip_proto != IPPROTO_UDP) { + return XDP_PASS; + } + + /* Parse UDP header. */ + if ((void *)udp + sizeof(*udp) > data_end) { + return XDP_DROP; + } + + /* Check the UDP length. */ + if (data_end - (void *)udp < __bpf_ntohs(udp->len)) { + return XDP_DROP; + } + + /* Get the queue options. */ + int index = ctx->rx_queue_index; + int *qidconf = bpf_map_lookup_elem(&qidconf_map, &index); + if (!qidconf) { + return XDP_ABORTED; + } + + /* Treat specified destination ports only. */ + __u32 port_info = *qidconf; + switch (port_info & KNOT_XDP_LISTEN_PORT_MASK) { + case KNOT_XDP_LISTEN_PORT_DROP: + return XDP_DROP; + case KNOT_XDP_LISTEN_PORT_ALL: + break; + default: + if (udp->dest != port_info) { + return XDP_PASS; + } + } + + /* Drop fragmented UDP datagrams. */ + if (fragmented) { + return XDP_DROP; + } + + /* Forward the packet to user space. */ + return bpf_redirect_map(&xsks_map, index, 0); +} diff --git a/src/libknot/xdp/bpf-user.c b/src/libknot/xdp/bpf-user.c new file mode 100644 index 0000000..2201a5c --- /dev/null +++ b/src/libknot/xdp/bpf-user.c @@ -0,0 +1,287 @@ +/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <bpf/bpf.h> +#include <linux/if_link.h> +#include <net/if.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "libknot/endian.h" +#include "libknot/error.h" +#include "libknot/xdp/bpf-kernel-obj.h" +#include "libknot/xdp/bpf-user.h" +#include "libknot/xdp/eth.h" +#include "contrib/openbsd/strlcpy.h" + +#define NO_BPF_MAPS 2 + +static inline bool IS_ERR_OR_NULL(const void *ptr) +{ + return (ptr == NULL) || (unsigned long)ptr >= (unsigned long)-4095; +} + +static int prog_load(struct bpf_object **pobj, int *prog_fd) +{ + struct bpf_program *prog, *first_prog = NULL; + struct bpf_object *obj; + + obj = bpf_object__open_mem(bpf_kernel_o, bpf_kernel_o_len, NULL); + if (IS_ERR_OR_NULL(obj)) { + return KNOT_ENOENT; + } + + bpf_object__for_each_program(prog, obj) { + bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); + if (first_prog == NULL) { + first_prog = prog; + } + } + + if (first_prog == NULL) { + bpf_object__close(obj); + return KNOT_ENOENT; + } + + int ret = bpf_object__load(obj); + if (ret != 0) { + bpf_object__close(obj); + return KNOT_EINVAL; + } + + *pobj = obj; + *prog_fd = bpf_program__fd(first_prog); + + return KNOT_EOK; +} + +static int ensure_prog(struct kxsk_iface *iface, bool overwrite) +{ + if (bpf_kernel_o_len < 2) { + return KNOT_ENOTSUP; + } + + /* Use libbpf for extracting BPF byte-code from BPF-ELF object, and + * loading this into the kernel via bpf-syscall. */ + int prog_fd; + int ret = prog_load(&iface->prog_obj, &prog_fd); + if (ret != KNOT_EOK) { + return KNOT_EPROGRAM; + } + + ret = bpf_set_link_xdp_fd(iface->if_index, prog_fd, + overwrite ? 0 : XDP_FLAGS_UPDATE_IF_NOEXIST); + if (ret != 0) { + close(prog_fd); + } + if (ret == -EBUSY && !overwrite) { // We try accepting the present program. + uint32_t prog_id = 0; + ret = bpf_get_link_xdp_id(iface->if_index, &prog_id, 0); + if (ret == 0 && prog_id != 0) { + ret = prog_fd = bpf_prog_get_fd_by_id(prog_id); + } + } + if (ret < 0) { + return KNOT_EFD; + } else { + return prog_fd; + } +} + +/*! + * /brief Get FDs for the two maps and assign them into xsk_info-> fields. + * + * Inspired by xsk_lookup_bpf_maps() from libbpf before qidconf_map elimination. + */ +static int get_bpf_maps(int prog_fd, struct kxsk_iface *iface) +{ + uint32_t *map_ids = calloc(NO_BPF_MAPS, sizeof(*map_ids)); + if (map_ids == NULL) { + return KNOT_ENOMEM; + } + + struct bpf_prog_info prog_info = { + .nr_map_ids = NO_BPF_MAPS, + .map_ids = (__u64)(unsigned long)map_ids, + }; + + uint32_t prog_len = sizeof(struct bpf_prog_info); + int ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_len); + if (ret != 0) { + free(map_ids); + return ret; + } + + for (int i = 0; i < NO_BPF_MAPS; ++i) { + int fd = bpf_map_get_fd_by_id(map_ids[i]); + if (fd < 0) { + continue; + } + + struct bpf_map_info map_info; + uint32_t map_len = sizeof(struct bpf_map_info); + ret = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); + if (ret != 0) { + close(fd); + continue; + } + + if (strcmp(map_info.name, "qidconf_map") == 0) { + iface->qidconf_map_fd = fd; + continue; + } + + if (strcmp(map_info.name, "xsks_map") == 0) { + iface->xsks_map_fd = fd; + continue; + } + + close(fd); + } + + if (iface->qidconf_map_fd < 0 || iface->xsks_map_fd < 0) { + close(iface->qidconf_map_fd); + close(iface->xsks_map_fd); + iface->qidconf_map_fd = iface->xsks_map_fd = -1; + free(map_ids); + return KNOT_ENOENT; + } + + free(map_ids); + return KNOT_EOK; +} + +static void unget_bpf_maps(struct kxsk_iface *iface) +{ + close(iface->qidconf_map_fd); + close(iface->xsks_map_fd); + iface->qidconf_map_fd = iface->xsks_map_fd = -1; +} + +int kxsk_socket_start(const struct kxsk_iface *iface, uint32_t listen_port, + struct xsk_socket *xsk) +{ + if (iface == NULL || xsk == NULL) { + return KNOT_EINVAL; + } + + int fd = xsk_socket__fd(xsk); + int ret = bpf_map_update_elem(iface->xsks_map_fd, &iface->if_queue, &fd, 0); + if (ret != 0) { + return ret; + } + + int qid = (listen_port & KNOT_XDP_LISTEN_PORT_MASK) | htobe16(listen_port & 0xFFFF); + ret = bpf_map_update_elem(iface->qidconf_map_fd, &iface->if_queue, &qid, 0); + if (ret != 0) { + bpf_map_delete_elem(iface->xsks_map_fd, &iface->if_queue); + } + return ret; +} + +void kxsk_socket_stop(const struct kxsk_iface *iface) +{ + if (iface == NULL) { + return; + } + + int qid = 0; + (void)bpf_map_update_elem(iface->qidconf_map_fd, &iface->if_queue, &qid, 0); + bpf_map_delete_elem(iface->xsks_map_fd, &iface->if_queue); +} + +int kxsk_iface_new(const char *if_name, int if_queue, knot_xdp_load_bpf_t load_bpf, + struct kxsk_iface **out_iface) +{ + if (if_name == NULL || out_iface == NULL) { + return KNOT_EINVAL; + } + + struct kxsk_iface *iface = calloc(1, sizeof(*iface) + IFNAMSIZ); + if (iface == NULL) { + return KNOT_ENOMEM; + } + iface->if_name = (char *)(iface + 1); + strlcpy((char *)iface->if_name, if_name, IFNAMSIZ); + iface->if_index = if_nametoindex(if_name); + if (iface->if_index == 0) { + free(iface); + return KNOT_EINVAL; + } + iface->if_queue = if_queue; + iface->qidconf_map_fd = iface->xsks_map_fd = -1; + + int ret; + switch (load_bpf) { + case KNOT_XDP_LOAD_BPF_NEVER: + (void)0; + uint32_t prog_id = 0; + ret = bpf_get_link_xdp_id(iface->if_index, &prog_id, 0); + if (ret == 0) { + if (prog_id == 0) { + ret = KNOT_EPROGRAM; + } else { + ret = bpf_prog_get_fd_by_id(prog_id); + } + } + break; + case KNOT_XDP_LOAD_BPF_ALWAYS_UNLOAD: + (void)bpf_set_link_xdp_fd(iface->if_index, -1, 0); + sleep(1); + // FALLTHROUGH + case KNOT_XDP_LOAD_BPF_ALWAYS: + ret = ensure_prog(iface, true); + break; + case KNOT_XDP_LOAD_BPF_MAYBE: + ret = ensure_prog(iface, false); + break; + default: + return KNOT_EINVAL; + } + + if (ret >= 0) { + ret = get_bpf_maps(ret, iface); + } + if (ret < 0) { + free(iface); + return ret; + } + + knot_xdp_mode_t mode = knot_eth_xdp_mode(iface->if_index); + if (mode == KNOT_XDP_MODE_NONE) { + free(iface); + return KNOT_ENOTSUP; + } + + *out_iface = iface; + return KNOT_EOK; +} + +void kxsk_iface_free(struct kxsk_iface *iface) +{ + if (iface == NULL) { + return; + } + + unget_bpf_maps(iface); + + if (iface->prog_obj != NULL) { + bpf_object__close(iface->prog_obj); + } + + free(iface); +} diff --git a/src/libknot/xdp/bpf-user.h b/src/libknot/xdp/bpf-user.h new file mode 100644 index 0000000..ce30582 --- /dev/null +++ b/src/libknot/xdp/bpf-user.h @@ -0,0 +1,112 @@ +/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <bpf/xsk.h> + +#include "libknot/xdp/xdp.h" + +struct kxsk_iface { + /*! Interface name. */ + const char *if_name; + /*! Interface name index (derived from ifname). */ + int if_index; + /*! Network card queue id. */ + int if_queue; + + /*! Configuration BPF map file descriptor. */ + int qidconf_map_fd; + /*! XSK BPF map file descriptor. */ + int xsks_map_fd; + + /*! BPF program object. */ + struct bpf_object *prog_obj; +}; + +struct kxsk_umem { + /*! Fill queue: passing memory frames to kernel - ready to receive. */ + struct xsk_ring_prod fq; + /*! Completion queue: passing memory frames from kernel - after send finishes. */ + struct xsk_ring_cons cq; + /*! Handle internal to libbpf. */ + struct xsk_umem *umem; + + /*! The memory frames. */ + struct umem_frame *frames; + /*! The number of free frames (for TX). */ + uint32_t tx_free_count; + /*! Stack of indices of the free frames (for TX). */ + uint16_t tx_free_indices[]; +}; + +struct knot_xdp_socket { + /*! Receive queue: passing arrived packets from kernel. */ + struct xsk_ring_cons rx; + /*! Transmit queue: passing packets to kernel for sending. */ + struct xsk_ring_prod tx; + /*! Information about memory frames for all the passed packets. */ + struct kxsk_umem *umem; + /*! Handle internal to libbpf. */ + struct xsk_socket *xsk; + + /*! Interface context. */ + const struct kxsk_iface *iface; + + /*! The kernel has to be woken up by a syscall indication. */ + bool kernel_needs_wakeup; +}; + +/*! + * \brief Set up BPF program and map for one XDP socket. + * + * \param if_name Name of the net iface (e.g. eth0). + * \param if_queue Network card queue id. + * \param load_bpf Insert BPF program into packet processing. + * \param out_iface Output: created interface context. + * + * \return KNOT_E* or -errno + */ +int kxsk_iface_new(const char *if_name, int if_queue, knot_xdp_load_bpf_t load_bpf, + struct kxsk_iface **out_iface); + +/*! + * \brief Unload BPF maps for a socket. + * + * \note This keeps the loaded BPF program. We don't care. + * + * \param iface Interface context to be freed. + */ +void kxsk_iface_free(struct kxsk_iface *iface); + +/*! + * \brief Activate this AF_XDP socket through the BPF maps. + * + * \param iface Interface context. + * \param listen_port Port to listen on, or KNOT_XDP_LISTEN_PORT_* flag. + * \param xsk Socket ctx. + * + * \return KNOT_E* or -errno + */ +int kxsk_socket_start(const struct kxsk_iface *iface, uint32_t listen_port, + struct xsk_socket *xsk); + +/*! + * \brief Deactivate this AF_XDP socket through the BPF maps. + * + * \param iface Interface context. + */ +void kxsk_socket_stop(const struct kxsk_iface *iface); diff --git a/src/libknot/xdp/eth.c b/src/libknot/xdp/eth.c new file mode 100644 index 0000000..b3614b6 --- /dev/null +++ b/src/libknot/xdp/eth.c @@ -0,0 +1,129 @@ +/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <bpf/libbpf.h> +#include <errno.h> +#include <ifaddrs.h> +#include <linux/ethtool.h> +#include <linux/if.h> +#include <linux/if_link.h> +#include <linux/sockios.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include "contrib/openbsd/strlcpy.h" +#include "contrib/sockaddr.h" +#include "libknot/attribute.h" +#include "libknot/errcode.h" +#include "libknot/xdp/eth.h" + +_public_ +int knot_eth_queues(const char *devname) +{ + if (devname == NULL) { + return KNOT_EINVAL; + } + + int fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) { + return knot_map_errno(); + } + + struct ethtool_channels ch = { + .cmd = ETHTOOL_GCHANNELS + }; + struct ifreq ifr = { + .ifr_data = &ch + }; + strlcpy(ifr.ifr_name, devname, IFNAMSIZ); + + int ret = ioctl(fd, SIOCETHTOOL, &ifr); + if (ret != 0) { + if (errno == EOPNOTSUPP) { + ret = 1; + } else { + ret = knot_map_errno(); + } + } else { + if (ch.combined_count == 0) { + ret = 1; + } else { + ret = ch.combined_count; + } + } + + close(fd); + return ret; +} + +_public_ +int knot_eth_name_from_addr(const struct sockaddr_storage *addr, char *out, + size_t out_len) +{ + if (addr == NULL || out == NULL) { + return KNOT_EINVAL; + } + + struct ifaddrs *ifaces = NULL; + if (getifaddrs(&ifaces) != 0) { + return -errno; + } + + size_t matches = 0; + char *match_name = NULL; + + for (struct ifaddrs *ifa = ifaces; ifa != NULL; ifa = ifa->ifa_next) { + const struct sockaddr_storage *ifss = (struct sockaddr_storage *)ifa->ifa_addr; + if (ifss == NULL) { // Observed on interfaces without any address. + continue; + } + + if ((ifss->ss_family == addr->ss_family && sockaddr_is_any(addr)) || + sockaddr_cmp(ifss, addr, true) == 0) { + matches++; + match_name = ifa->ifa_name; + } + } + + if (matches == 1) { + size_t len = strlcpy(out, match_name, out_len); + freeifaddrs(ifaces); + return (len >= out_len) ? KNOT_ESPACE : KNOT_EOK; + } + + freeifaddrs(ifaces); + return matches == 0 ? KNOT_EADDRNOTAVAIL : KNOT_ELIMIT; +} + +_public_ +knot_xdp_mode_t knot_eth_xdp_mode(int if_index) +{ + struct xdp_link_info info; + int ret = bpf_get_link_xdp_info(if_index, &info, sizeof(info), 0); + if (ret != 0) { + return KNOT_XDP_MODE_NONE; + } + + switch (info.attach_mode) { + case XDP_ATTACHED_DRV: + case XDP_ATTACHED_HW: + return KNOT_XDP_MODE_FULL; + case XDP_ATTACHED_SKB: + return KNOT_XDP_MODE_EMUL; + default: + return KNOT_XDP_MODE_NONE; + } +} diff --git a/src/libknot/xdp/eth.h b/src/libknot/xdp/eth.h new file mode 100644 index 0000000..d743741 --- /dev/null +++ b/src/libknot/xdp/eth.h @@ -0,0 +1,57 @@ +/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <stddef.h> + +/*! + * \brief Get number of combined queues of a network interface. + * + * \param devname Name of the ethdev (e.g. eth1). + * + * \retval < 0 KNOT_E* if error. + * \retval 1 Default no of queues if the dev does not support. + * \return > 0 Number of queues. + */ +int knot_eth_queues(const char *devname); + +/*! + * \brief Get the corresponding network interface name for the address. + * + * \param addr Address of the inteface. + * \param out Output buffer for the interface name. + * \param out_len Size of the output buffer. + * + * \return KNOT_E* + */ +int knot_eth_name_from_addr(const struct sockaddr_storage *addr, char *out, + size_t out_len); + +typedef enum { + KNOT_XDP_MODE_NONE, /*!< XDP not available, BPF not loaded, or error. */ + KNOT_XDP_MODE_FULL, /*!< Full XDP support in driver or HW. */ + KNOT_XDP_MODE_EMUL, /*!< Emulated XDP support. */ +} knot_xdp_mode_t; + +/*! + * \brief Return the current XDP mode of a network inteface. + * + * \param if_index Index of the interface, output from if_nametoindex(). + * + * \return Current XDP mode. + */ +knot_xdp_mode_t knot_eth_xdp_mode(int if_index); diff --git a/src/libknot/xdp/xdp.c b/src/libknot/xdp/xdp.c new file mode 100644 index 0000000..29dfe61 --- /dev/null +++ b/src/libknot/xdp/xdp.c @@ -0,0 +1,717 @@ +/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <errno.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "libknot/attribute.h" +#include "libknot/endian.h" +#include "libknot/errcode.h" +#include "libknot/xdp/bpf-user.h" +#include "libknot/xdp/xdp.h" +#include "contrib/macros.h" + +/* Don't fragment flag. */ +#define IP_DF 0x4000 + +#define FRAME_SIZE 2048 +#define UMEM_FRAME_COUNT_RX 4096 +#define UMEM_FRAME_COUNT_TX UMEM_FRAME_COUNT_RX // No reason to differ so far. +#define UMEM_RING_LEN_RX (UMEM_FRAME_COUNT_RX * 2) +#define UMEM_RING_LEN_TX (UMEM_FRAME_COUNT_TX * 2) +#define UMEM_FRAME_COUNT (UMEM_FRAME_COUNT_RX + UMEM_FRAME_COUNT_TX) + +/* With recent compilers we statically check #defines for settings that + * get refused by AF_XDP drivers (in current versions, at least). */ +#if (__STDC_VERSION__ >= 201112L) +#define IS_POWER_OF_2(n) (((n) & (n - 1)) == 0) +_Static_assert((FRAME_SIZE == 4096 || FRAME_SIZE == 2048) + && IS_POWER_OF_2(UMEM_FRAME_COUNT) + /* The following two inequalities aren't required by drivers, but they allow + * our implementation assume that the rings can never get filled. */ + && IS_POWER_OF_2(UMEM_RING_LEN_RX) && UMEM_RING_LEN_RX > UMEM_FRAME_COUNT_RX + && IS_POWER_OF_2(UMEM_RING_LEN_TX) && UMEM_RING_LEN_TX > UMEM_FRAME_COUNT_TX + && UMEM_FRAME_COUNT_TX <= (1 << 16) /* see tx_free_indices */ + , "Incorrect #define combination for AF_XDP."); +#endif + +/*! \brief The memory layout of IPv4 umem frame. */ +struct udpv4 { + union { + uint8_t bytes[1]; + struct { + struct ethhdr eth; // No VLAN support; CRC at the "end" of .data! + struct iphdr ipv4; + struct udphdr udp; + uint8_t data[]; + } __attribute__((packed)); + }; +}; + +/*! \brief The memory layout of IPv6 umem frame. */ +struct udpv6 { + union { + uint8_t bytes[1]; + struct { + struct ethhdr eth; // No VLAN support; CRC at the "end" of .data! + struct ipv6hdr ipv6; + struct udphdr udp; + uint8_t data[]; + } __attribute__((packed)); + }; +}; + +/*! \brief The memory layout of each umem frame. */ +struct umem_frame { + union { + uint8_t bytes[FRAME_SIZE]; + union { + struct udpv4 udpv4; + struct udpv6 udpv6; + }; + }; +}; + +_public_ +const size_t KNOT_XDP_PAYLOAD_OFFSET4 = offsetof(struct udpv4, data) + offsetof(struct umem_frame, udpv4); +_public_ +const size_t KNOT_XDP_PAYLOAD_OFFSET6 = offsetof(struct udpv6, data) + offsetof(struct umem_frame, udpv6); + +static int configure_xsk_umem(struct kxsk_umem **out_umem) +{ + /* Allocate memory and call driver to create the UMEM. */ + struct kxsk_umem *umem = calloc(1, + offsetof(struct kxsk_umem, tx_free_indices) + + sizeof(umem->tx_free_indices[0]) * UMEM_FRAME_COUNT_TX); + if (umem == NULL) { + return KNOT_ENOMEM; + } + + int ret = posix_memalign((void **)&umem->frames, getpagesize(), + FRAME_SIZE * UMEM_FRAME_COUNT); + if (ret != 0) { + free(umem); + return KNOT_ENOMEM; + } + + const struct xsk_umem_config config = { + .fill_size = UMEM_RING_LEN_RX, + .comp_size = UMEM_RING_LEN_TX, + .frame_size = FRAME_SIZE, + .frame_headroom = 0, + }; + + ret = xsk_umem__create(&umem->umem, umem->frames, FRAME_SIZE * UMEM_FRAME_COUNT, + &umem->fq, &umem->cq, &config); + if (ret != KNOT_EOK) { + free(umem->frames); + free(umem); + return ret; + } + *out_umem = umem; + + /* Designate the starting chunk of buffers for TX, and put them onto the stack. */ + umem->tx_free_count = UMEM_FRAME_COUNT_TX; + for (uint32_t i = 0; i < UMEM_FRAME_COUNT_TX; ++i) { + umem->tx_free_indices[i] = i; + } + + /* Designate the rest of buffers for RX, and pass them to the driver. */ + uint32_t idx = 0; + ret = xsk_ring_prod__reserve(&umem->fq, UMEM_FRAME_COUNT_RX, &idx); + if (ret != UMEM_FRAME_COUNT - UMEM_FRAME_COUNT_TX) { + assert(0); + return KNOT_ERROR; + } + assert(idx == 0); + for (uint32_t i = UMEM_FRAME_COUNT_TX; i < UMEM_FRAME_COUNT; ++i) { + *xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * FRAME_SIZE; + } + xsk_ring_prod__submit(&umem->fq, UMEM_FRAME_COUNT_RX); + + return KNOT_EOK; +} + +static void deconfigure_xsk_umem(struct kxsk_umem *umem) +{ + (void)xsk_umem__delete(umem->umem); + free(umem->frames); + free(umem); +} + +static int configure_xsk_socket(struct kxsk_umem *umem, + const struct kxsk_iface *iface, + knot_xdp_socket_t **out_sock) +{ + knot_xdp_socket_t *xsk_info = calloc(1, sizeof(*xsk_info)); + if (xsk_info == NULL) { + return KNOT_ENOMEM; + } + xsk_info->iface = iface; + xsk_info->umem = umem; + + const struct xsk_socket_config sock_conf = { + .tx_size = UMEM_RING_LEN_TX, + .rx_size = UMEM_RING_LEN_RX, + .libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD, + }; + + int ret = xsk_socket__create(&xsk_info->xsk, iface->if_name, + iface->if_queue, umem->umem, + &xsk_info->rx, &xsk_info->tx, &sock_conf); + if (ret != 0) { + free(xsk_info); + return ret; + } + + *out_sock = xsk_info; + return KNOT_EOK; +} + +_public_ +int knot_xdp_init(knot_xdp_socket_t **socket, const char *if_name, int if_queue, + uint32_t listen_port, knot_xdp_load_bpf_t load_bpf) +{ + if (socket == NULL || if_name == NULL) { + return KNOT_EINVAL; + } + + struct kxsk_iface *iface; + int ret = kxsk_iface_new(if_name, if_queue, load_bpf, &iface); + if (ret != KNOT_EOK) { + return ret; + } + + /* Initialize shared packet_buffer for umem usage. */ + struct kxsk_umem *umem = NULL; + ret = configure_xsk_umem(&umem); + if (ret != KNOT_EOK) { + kxsk_iface_free(iface); + return ret; + } + + ret = configure_xsk_socket(umem, iface, socket); + if (ret != KNOT_EOK) { + deconfigure_xsk_umem(umem); + kxsk_iface_free(iface); + return ret; + } + + ret = kxsk_socket_start(iface, listen_port, (*socket)->xsk); + if (ret != KNOT_EOK) { + xsk_socket__delete((*socket)->xsk); + deconfigure_xsk_umem(umem); + kxsk_iface_free(iface); + free(*socket); + *socket = NULL; + return ret; + } + + return ret; +} + +_public_ +void knot_xdp_deinit(knot_xdp_socket_t *socket) +{ + if (socket == NULL) { + return; + } + + kxsk_socket_stop(socket->iface); + xsk_socket__delete(socket->xsk); + deconfigure_xsk_umem(socket->umem); + + kxsk_iface_free((struct kxsk_iface *)/*const-cast*/socket->iface); + free(socket); +} + +_public_ +int knot_xdp_socket_fd(knot_xdp_socket_t *socket) +{ + if (socket == NULL) { + return 0; + } + + return xsk_socket__fd(socket->xsk); +} + +static void tx_free_relative(struct kxsk_umem *umem, uint64_t addr_relative) +{ + /* The address may not point to *start* of buffer, but `/` solves that. */ + uint64_t index = addr_relative / FRAME_SIZE; + assert(index < UMEM_FRAME_COUNT); + umem->tx_free_indices[umem->tx_free_count++] = index; +} + +_public_ +void knot_xdp_send_prepare(knot_xdp_socket_t *socket) +{ + if (socket == NULL) { + return; + } + + struct kxsk_umem *const umem = socket->umem; + struct xsk_ring_cons *const cq = &umem->cq; + + uint32_t idx = 0; + const uint32_t completed = xsk_ring_cons__peek(cq, UINT32_MAX, &idx); + if (completed == 0) { + return; + } + assert(umem->tx_free_count + completed <= UMEM_FRAME_COUNT_TX); + + for (uint32_t i = 0; i < completed; ++i) { + uint64_t addr_relative = *xsk_ring_cons__comp_addr(cq, idx++); + tx_free_relative(umem, addr_relative); + } + + xsk_ring_cons__release(cq, completed); +} + +static struct umem_frame *alloc_tx_frame(struct kxsk_umem *umem) +{ + if (unlikely(umem->tx_free_count == 0)) { + return NULL; + } + + uint32_t index = umem->tx_free_indices[--umem->tx_free_count]; + return umem->frames + index; +} + +_public_ +int knot_xdp_send_alloc(knot_xdp_socket_t *socket, bool ipv6, knot_xdp_msg_t *out, + const knot_xdp_msg_t *in_reply_to) +{ + if (socket == NULL || out == NULL) { + return KNOT_EINVAL; + } + + size_t ofs = ipv6 ? KNOT_XDP_PAYLOAD_OFFSET6 : KNOT_XDP_PAYLOAD_OFFSET4; + + struct umem_frame *uframe = alloc_tx_frame(socket->umem); + if (uframe == NULL) { + return KNOT_ENOMEM; + } + + memset(out, 0, sizeof(*out)); + + out->payload.iov_base = ipv6 ? uframe->udpv6.data : uframe->udpv4.data; + out->payload.iov_len = MIN(UINT16_MAX, FRAME_SIZE - ofs); + + const struct ethhdr *eth = (struct ethhdr *)uframe; + out->eth_from = (void *)ð->h_source; + out->eth_to = (void *)ð->h_dest; + + if (in_reply_to != NULL) { + memcpy(out->eth_from, in_reply_to->eth_to, ETH_ALEN); + memcpy(out->eth_to, in_reply_to->eth_from, ETH_ALEN); + + memcpy(&out->ip_from, &in_reply_to->ip_to, sizeof(out->ip_from)); + memcpy(&out->ip_to, &in_reply_to->ip_from, sizeof(out->ip_to)); + } + + return KNOT_EOK; +} + +static uint16_t from32to16(uint32_t sum) +{ + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return sum; +} + +static uint16_t ipv4_checksum(const uint8_t *ipv4_hdr) +{ + const uint16_t *h = (const uint16_t *)ipv4_hdr; + uint32_t sum32 = 0; + for (int i = 0; i < 10; ++i) { + if (i != 5) { + sum32 += h[i]; + } + } + return ~from32to16(sum32); +} + +/* Checksum endianness implementation notes for ipv4_checksum() and udp_checksum_step(). + * + * The basis for checksum is addition on big-endian 16-bit words, with bit 16 carrying + * over to bit 0. That can be viewed as first byte carrying to the second and the + * second one carrying back to the first one, i.e. a symmetrical situation. + * Therefore the result is the same even when arithmetics is done on litte-endian (!) + */ + +static void udp_checksum_step(size_t *result, const void *_data, size_t _data_len) +{ + assert(!(_data_len & 1)); + const uint16_t *data = _data; + size_t len = _data_len / 2; + while (len-- > 0) { + *result += *data++; + } +} + +static void udp_checksum_finish(size_t *result) +{ + while (*result > 0xffff) { + *result = (*result & 0xffff) + (*result >> 16); + } + if (*result != 0xffff) { + *result = ~*result; + } +} + +static uint8_t *msg_uframe_ptr(knot_xdp_socket_t *socket, const knot_xdp_msg_t *msg, + /* Next parameters are just for debugging. */ + bool ipv6) +{ + uint8_t *uNULL = NULL; + uint8_t *uframe_p = uNULL + ((msg->payload.iov_base - NULL) & ~(FRAME_SIZE - 1)); + +#ifndef NDEBUG + intptr_t pd = (uint8_t *)msg->payload.iov_base - uframe_p + - (ipv6 ? KNOT_XDP_PAYLOAD_OFFSET6 : KNOT_XDP_PAYLOAD_OFFSET4); + /* This assertion might fire in some OK cases. For example, the second branch + * had to be added for cases with "emulated" AF_XDP support. */ + assert(pd == XDP_PACKET_HEADROOM || pd == 0); + + const uint8_t *umem_mem_start = socket->umem->frames->bytes; + const uint8_t *umem_mem_end = umem_mem_start + FRAME_SIZE * UMEM_FRAME_COUNT; + assert(umem_mem_start <= uframe_p && uframe_p < umem_mem_end); +#endif + return uframe_p; +} + +static void xsk_sendmsg_ipv4(knot_xdp_socket_t *socket, const knot_xdp_msg_t *msg, + uint32_t index) +{ + uint8_t *uframe_p = msg_uframe_ptr(socket, msg, false); + struct umem_frame *uframe = (struct umem_frame *)uframe_p; + struct udpv4 *h = &uframe->udpv4; + + const struct sockaddr_in *src_v4 = (const struct sockaddr_in *)&msg->ip_from; + const struct sockaddr_in *dst_v4 = (const struct sockaddr_in *)&msg->ip_to; + const uint16_t udp_len = sizeof(h->udp) + msg->payload.iov_len; + + h->eth.h_proto = __constant_htons(ETH_P_IP); + + h->ipv4.version = IPVERSION; + h->ipv4.ihl = 5; + h->ipv4.tos = 0; + h->ipv4.tot_len = htobe16(5 * 4 + udp_len); + h->ipv4.id = 0; + h->ipv4.frag_off = 0; + h->ipv4.ttl = IPDEFTTL; + h->ipv4.protocol = IPPROTO_UDP; + memcpy(&h->ipv4.saddr, &src_v4->sin_addr, sizeof(src_v4->sin_addr)); + memcpy(&h->ipv4.daddr, &dst_v4->sin_addr, sizeof(dst_v4->sin_addr)); + h->ipv4.check = ipv4_checksum(h->bytes + sizeof(struct ethhdr)); + + h->udp.len = htobe16(udp_len); + h->udp.source = src_v4->sin_port; + h->udp.dest = dst_v4->sin_port; + h->udp.check = 0; // Optional for IPv4 - not computed. + + *xsk_ring_prod__tx_desc(&socket->tx, index) = (struct xdp_desc){ + .addr = h->bytes - socket->umem->frames->bytes, + .len = KNOT_XDP_PAYLOAD_OFFSET4 + msg->payload.iov_len + }; +} + +static void xsk_sendmsg_ipv6(knot_xdp_socket_t *socket, const knot_xdp_msg_t *msg, + uint32_t index) +{ + uint8_t *uframe_p = msg_uframe_ptr(socket, msg, true); + struct umem_frame *uframe = (struct umem_frame *)uframe_p; + struct udpv6 *h = &uframe->udpv6; + + const struct sockaddr_in6 *src_v6 = (const struct sockaddr_in6 *)&msg->ip_from; + const struct sockaddr_in6 *dst_v6 = (const struct sockaddr_in6 *)&msg->ip_to; + const uint16_t udp_len = sizeof(h->udp) + msg->payload.iov_len; + + h->eth.h_proto = __constant_htons(ETH_P_IPV6); + + h->ipv6.version = 6; + h->ipv6.priority = 0; + memset(h->ipv6.flow_lbl, 0, sizeof(h->ipv6.flow_lbl)); + h->ipv6.payload_len = htobe16(udp_len); + h->ipv6.nexthdr = IPPROTO_UDP; + h->ipv6.hop_limit = IPDEFTTL; + memcpy(&h->ipv6.saddr, &src_v6->sin6_addr, sizeof(src_v6->sin6_addr)); + memcpy(&h->ipv6.daddr, &dst_v6->sin6_addr, sizeof(dst_v6->sin6_addr)); + + h->udp.len = htobe16(udp_len); + h->udp.source = src_v6->sin6_port; + h->udp.dest = dst_v6->sin6_port; + h->udp.check = 0; // Mandatory for IPv6 - computed afterwards. + + size_t chk = 0; + udp_checksum_step(&chk, &h->ipv6.saddr, sizeof(h->ipv6.saddr)); + udp_checksum_step(&chk, &h->ipv6.daddr, sizeof(h->ipv6.daddr)); + udp_checksum_step(&chk, &h->udp.len, sizeof(h->udp.len)); + __be16 version = htobe16(h->ipv6.nexthdr); + udp_checksum_step(&chk, &version, sizeof(version)); + udp_checksum_step(&chk, &h->udp, sizeof(h->udp)); + size_t padded_len = msg->payload.iov_len; + if (padded_len & 1) { + ((uint8_t *)msg->payload.iov_base)[padded_len++] = 0; + } + udp_checksum_step(&chk, msg->payload.iov_base, padded_len); + udp_checksum_finish(&chk); + h->udp.check = chk; + + *xsk_ring_prod__tx_desc(&socket->tx, index) = (struct xdp_desc){ + .addr = h->bytes - socket->umem->frames->bytes, + .len = KNOT_XDP_PAYLOAD_OFFSET6 + msg->payload.iov_len + }; +} + +_public_ +int knot_xdp_send(knot_xdp_socket_t *socket, const knot_xdp_msg_t msgs[], + uint32_t count, uint32_t *sent) +{ + if (socket == NULL || msgs == NULL || sent == NULL) { + return KNOT_EINVAL; + } + + /* Now we want to do something close to + * xsk_ring_prod__reserve(&socket->tx, count, *idx) + * but we don't know in advance if we utilize *whole* `count`, + * and the API doesn't allow "cancelling reservations". + * Therefore we handle `socket->tx.cached_prod` by hand; + * that's simplified by the fact that there is always free space. + */ + assert(UMEM_RING_LEN_TX > UMEM_FRAME_COUNT_TX); + uint32_t idx = socket->tx.cached_prod; + + for (uint32_t i = 0; i < count; ++i) { + const knot_xdp_msg_t *msg = &msgs[i]; + + if (msg->payload.iov_len && msg->ip_from.sin6_family == AF_INET) { + xsk_sendmsg_ipv4(socket, msg, idx++); + } else if (msg->payload.iov_len && msg->ip_from.sin6_family == AF_INET6) { + xsk_sendmsg_ipv6(socket, msg, idx++); + } else { + /* Some problem; we just ignore this message. */ + uint64_t addr_relative = (uint8_t *)msg->payload.iov_base + - socket->umem->frames->bytes; + tx_free_relative(socket->umem, addr_relative); + } + } + + *sent = idx - socket->tx.cached_prod; + assert(*sent <= count); + socket->tx.cached_prod = idx; + xsk_ring_prod__submit(&socket->tx, *sent); + socket->kernel_needs_wakeup = true; + + return KNOT_EOK; +} + +_public_ +int knot_xdp_send_finish(knot_xdp_socket_t *socket) +{ + if (socket == NULL) { + return KNOT_EINVAL; + } + + /* Trigger sending queued packets. */ + if (!socket->kernel_needs_wakeup) { + return KNOT_EOK; + } + + int ret = sendto(xsk_socket__fd(socket->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + const bool is_ok = (ret >= 0); + // List of "safe" errors taken from + // https://github.com/torvalds/linux/blame/master/samples/bpf/xdpsock_user.c + const bool is_again = !is_ok && (errno == ENOBUFS || errno == EAGAIN + || errno == EBUSY || errno == ENETDOWN); + // Some of the !is_ok cases are a little unclear - what to do about the syscall, + // including how caller of _sendmsg_finish() should react. + if (is_ok || !is_again) { + socket->kernel_needs_wakeup = false; + } + if (is_again) { + return KNOT_EAGAIN; + } else if (is_ok) { + return KNOT_EOK; + } else { + return -errno; + } + /* This syscall might be avoided with a newer kernel feature (>= 5.4): + https://www.kernel.org/doc/html/latest/networking/af_xdp.html#xdp-use-need-wakeup-bind-flag + Unfortunately it's not easy to continue supporting older kernels + when using this feature on newer ones. + */ +} + +static void rx_desc(knot_xdp_socket_t *socket, const struct xdp_desc *desc, + knot_xdp_msg_t *msg) +{ + uint8_t *uframe_p = socket->umem->frames->bytes + desc->addr; + const struct ethhdr *eth = (struct ethhdr *)uframe_p; + const struct iphdr *ip4 = NULL; + const struct ipv6hdr *ip6 = NULL; + const struct udphdr *udp = NULL; + + switch (eth->h_proto) { + case __constant_htons(ETH_P_IP): + ip4 = (struct iphdr *)(uframe_p + sizeof(struct ethhdr)); + // Next conditions are ensured by the BPF filter. + assert(ip4->version == 4); + assert(ip4->frag_off == 0 || + ip4->frag_off == __constant_htons(IP_DF)); + assert(ip4->protocol == IPPROTO_UDP); + // IPv4 header checksum is not verified! + udp = (struct udphdr *)(uframe_p + sizeof(struct ethhdr) + + ip4->ihl * 4); + break; + case __constant_htons(ETH_P_IPV6): + ip6 = (struct ipv6hdr *)(uframe_p + sizeof(struct ethhdr)); + // Next conditions are ensured by the BPF filter. + assert(ip6->version == 6); + assert(ip6->nexthdr == IPPROTO_UDP); + udp = (struct udphdr *)(uframe_p + sizeof(struct ethhdr) + + sizeof(struct ipv6hdr)); + break; + default: + assert(0); + msg->payload.iov_len = 0; + return; + } + // UDP checksum is not verified! + + assert(eth && (!!ip4 != !!ip6) && udp); + + // Process the packet; ownership is passed on, beware of holding frames. + + msg->payload.iov_base = (uint8_t *)udp + sizeof(struct udphdr); + msg->payload.iov_len = be16toh(udp->len) - sizeof(struct udphdr); + + msg->eth_from = (void *)ð->h_source; + msg->eth_to = (void *)ð->h_dest; + + if (ip4 != NULL) { + struct sockaddr_in *src_v4 = (struct sockaddr_in *)&msg->ip_from; + struct sockaddr_in *dst_v4 = (struct sockaddr_in *)&msg->ip_to; + memcpy(&src_v4->sin_addr, &ip4->saddr, sizeof(src_v4->sin_addr)); + memcpy(&dst_v4->sin_addr, &ip4->daddr, sizeof(dst_v4->sin_addr)); + src_v4->sin_port = udp->source; + dst_v4->sin_port = udp->dest; + src_v4->sin_family = AF_INET; + dst_v4->sin_family = AF_INET; + } else { + assert(ip6); + struct sockaddr_in6 *src_v6 = (struct sockaddr_in6 *)&msg->ip_from; + struct sockaddr_in6 *dst_v6 = (struct sockaddr_in6 *)&msg->ip_to; + memcpy(&src_v6->sin6_addr, &ip6->saddr, sizeof(src_v6->sin6_addr)); + memcpy(&dst_v6->sin6_addr, &ip6->daddr, sizeof(dst_v6->sin6_addr)); + src_v6->sin6_port = udp->source; + dst_v6->sin6_port = udp->dest; + src_v6->sin6_family = AF_INET6; + dst_v6->sin6_family = AF_INET6; + // Flow label is ignored. + } +} + +_public_ +int knot_xdp_recv(knot_xdp_socket_t *socket, knot_xdp_msg_t msgs[], + uint32_t max_count, uint32_t *count) +{ + if (socket == NULL || msgs == NULL || count == NULL) { + return KNOT_EINVAL; + } + + uint32_t idx = 0; + const uint32_t available = xsk_ring_cons__peek(&socket->rx, max_count, &idx); + if (available == 0) { + *count = 0; + return KNOT_EOK; + } + assert(available <= max_count); + + for (uint32_t i = 0; i < available; ++i) { + rx_desc(socket, xsk_ring_cons__rx_desc(&socket->rx, idx++), &msgs[i]); + } + + xsk_ring_cons__release(&socket->rx, available); + *count = available; + + return KNOT_EOK; +} + +_public_ +void knot_xdp_recv_finish(knot_xdp_socket_t *socket, const knot_xdp_msg_t msgs[], + uint32_t count) +{ + if (socket == NULL || msgs == NULL) { + return; + } + + struct kxsk_umem *const umem = socket->umem; + struct xsk_ring_prod *const fq = &umem->fq; + + uint32_t idx = 0; + const uint32_t reserved = xsk_ring_prod__reserve(fq, count, &idx); + assert(reserved == count); + + for (uint32_t i = 0; i < reserved; ++i) { + uint8_t *uframe_p = msg_uframe_ptr(socket, &msgs[i], + msgs[i].ip_from.sin6_family == AF_INET6); + uint64_t offset = uframe_p - umem->frames->bytes; + *xsk_ring_prod__fill_addr(fq, idx++) = offset; + } + + xsk_ring_prod__submit(fq, reserved); +} + +_public_ +void knot_xdp_info(const knot_xdp_socket_t *socket, FILE *file) +{ + if (socket == NULL || file == NULL) { + return; + } + + // The number of busy frames + #define RING_BUSY(ring) \ + ((*(ring)->producer - *(ring)->consumer) & (ring)->mask) + + #define RING_PRINFO(name, ring) \ + fprintf(file, "Ring %s: size %4d, busy %4d (prod %4d, cons %4d)\n", \ + name, (unsigned)(ring)->size, \ + (unsigned)RING_BUSY((ring)), \ + (unsigned)*(ring)->producer, (unsigned)*(ring)->consumer) + + const int rx_busyf = RING_BUSY(&socket->umem->fq) + RING_BUSY(&socket->rx); + fprintf(file, "\nLOST RX frames: %4d", (int)(UMEM_FRAME_COUNT_RX - rx_busyf)); + + const int tx_busyf = RING_BUSY(&socket->umem->cq) + RING_BUSY(&socket->tx); + const int tx_freef = socket->umem->tx_free_count; + fprintf(file, "\nLOST TX frames: %4d\n", (int)(UMEM_FRAME_COUNT_TX - tx_busyf - tx_freef)); + + RING_PRINFO("FQ", &socket->umem->fq); + RING_PRINFO("RX", &socket->rx); + RING_PRINFO("TX", &socket->tx); + RING_PRINFO("CQ", &socket->umem->cq); + fprintf(file, "TX free frames: %4d\n", tx_freef); +} diff --git a/src/libknot/xdp/xdp.h b/src/libknot/xdp/xdp.h new file mode 100644 index 0000000..74a845a --- /dev/null +++ b/src/libknot/xdp/xdp.h @@ -0,0 +1,167 @@ +/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <netinet/in.h> + +#include "libknot/xdp/bpf-consts.h" + +#ifdef ENABLE_XDP +#define KNOT_XDP_AVAILABLE 1 +#endif + +/*! \brief A packet with src & dst MAC & IP addrs + UDP payload. */ +typedef struct knot_xdp_msg knot_xdp_msg_t; +struct knot_xdp_msg { + struct sockaddr_in6 ip_from; + struct sockaddr_in6 ip_to; + uint8_t *eth_from; + uint8_t *eth_to; + struct iovec payload; +}; + +/*! + * \brief Styles of loading BPF program. + * + * \note In *all* the cases loading can only succeed if at the end + * a compatible BPF program is loaded on the interface. + */ +typedef enum { + KNOT_XDP_LOAD_BPF_NEVER, /*!< Do not load; error out if not loaded already. */ + KNOT_XDP_LOAD_BPF_ALWAYS, /*!< Always load a program (overwrite it). */ + KNOT_XDP_LOAD_BPF_ALWAYS_UNLOAD, /*!< KNOT_XDP_LOAD_BPF_ALWAYS + unload previous. */ + KNOT_XDP_LOAD_BPF_MAYBE, /*!< Try with present program or load if none. */ + /* Implementation caveat: when re-using program in _MAYBE case, we get a message: + * libbpf: Kernel error message: XDP program already attached */ +} knot_xdp_load_bpf_t; + +/*! \brief Context structure for one XDP socket. */ +typedef struct knot_xdp_socket knot_xdp_socket_t; + +/*! \brief Offset of DNS payload inside ethernet frame (IPv4 and v6 variants). */ +extern const size_t KNOT_XDP_PAYLOAD_OFFSET4; +extern const size_t KNOT_XDP_PAYLOAD_OFFSET6; + +/*! + * \brief Initialize XDP socket. + * + * \param socket Socket ctx. + * \param if_name Name of the net iface (e.g. eth0). + * \param if_queue Network card queue to be used (normally 1 socket per each queue). + * \param listen_port Port to listen on, or KNOT_XDP_LISTEN_PORT_* flag. + * \param load_bpf Insert BPF program into packet processing. + * + * \return KNOT_E* or -errno + */ +int knot_xdp_init(knot_xdp_socket_t **socket, const char *if_name, int if_queue, + uint32_t listen_port, knot_xdp_load_bpf_t load_bpf); + +/*! + * \brief De-init XDP socket. + * + * \param socket XDP socket. + */ +void knot_xdp_deinit(knot_xdp_socket_t *socket); + +/*! + * \brief Return a file descriptor to be polled on for incomming packets. + * + * \param socket XDP socket. + * + * \return KNOT_E* + */ +int knot_xdp_socket_fd(knot_xdp_socket_t *socket); + +/*! + * \brief Collect completed TX buffers, so they can be used by knot_xdp_send_alloc(). + * + * \param socket XDP socket. + */ +void knot_xdp_send_prepare(knot_xdp_socket_t *socket); + +/*! + * \brief Allocate one buffer for an outgoing packet. + * + * \param socket XDP socket. + * \param ipv6 The packet will use IPv6 (IPv4 otherwise). + * \param out Out: the allocated packet buffer. + * \param in_reply_to Optional: fill in addresses from this query. + * + * \return KNOT_E* + */ +int knot_xdp_send_alloc(knot_xdp_socket_t *socket, bool ipv6, knot_xdp_msg_t *out, + const knot_xdp_msg_t *in_reply_to); + +/*! + * \brief Send multiple packets thru XDP. + * + * \note The packets all must have been allocated by knot_xdp_send_alloc()! + * \note Do not free the packets payloads afterwards. + * \note Packets with zero length will be skipped. + * + * \param socket XDP socket. + * \param msgs Packets to be sent. + * \param count Number of packets. + * \param sent Out: number of packet successfully sent. + * + * \return KNOT_E* + */ +int knot_xdp_send(knot_xdp_socket_t *socket, const knot_xdp_msg_t msgs[], + uint32_t count, uint32_t *sent); + +/*! + * \brief Syscall to kernel to wake up the network card driver after knot_xdp_send(). + * + * \param socket XDP socket. + * + * \return KNOT_E* or -errno + */ +int knot_xdp_send_finish(knot_xdp_socket_t *socket); + +/*! + * \brief Receive multiple packets thru XDP. + * + * \param socket XDP socket. + * \param msgs Out: buffers to be filled in with incomming packets. + * \param max_count Limit for number of packets received at once. + * \param count Out: real number of received packets. + * + * \return KNOT_E* + */ +int knot_xdp_recv(knot_xdp_socket_t *socket, knot_xdp_msg_t msgs[], + uint32_t max_count, uint32_t *count); + +/*! + * \brief Free buffers with received packets. + * + * \param socket XDP socket. + * \param msgs Buffers with received packets. + * \param count Number of received packets to free. + */ +void knot_xdp_recv_finish(knot_xdp_socket_t *socket, const knot_xdp_msg_t msgs[], + uint32_t count); + +/*! + * \brief Print some info about the XDP socket. + * + * \param socket XDP socket. + * \param file Output file. + */ +void knot_xdp_info(const knot_xdp_socket_t *socket, FILE *file); |