summaryrefslogtreecommitdiffstats
path: root/xdp-bench
diff options
context:
space:
mode:
Diffstat (limited to 'xdp-bench')
-rw-r--r--xdp-bench/.gitignore1
-rw-r--r--xdp-bench/Makefile21
-rw-r--r--xdp-bench/README.org570
-rw-r--r--xdp-bench/hash_func01.h55
-rw-r--r--xdp-bench/tests/test-xdp-bench.sh126
-rw-r--r--xdp-bench/xdp-bench.8696
-rw-r--r--xdp-bench/xdp-bench.c275
-rw-r--r--xdp-bench/xdp-bench.h100
-rw-r--r--xdp-bench/xdp_basic.bpf.c68
-rw-r--r--xdp-bench/xdp_basic.c142
-rw-r--r--xdp-bench/xdp_redirect_basic.bpf.c44
-rw-r--r--xdp-bench/xdp_redirect_basic.c140
-rw-r--r--xdp-bench/xdp_redirect_cpumap.bpf.c539
-rw-r--r--xdp-bench/xdp_redirect_cpumap.c354
-rw-r--r--xdp-bench/xdp_redirect_devmap.bpf.c88
-rw-r--r--xdp-bench/xdp_redirect_devmap.c207
-rw-r--r--xdp-bench/xdp_redirect_devmap_multi.bpf.c77
-rw-r--r--xdp-bench/xdp_redirect_devmap_multi.c230
18 files changed, 3733 insertions, 0 deletions
diff --git a/xdp-bench/.gitignore b/xdp-bench/.gitignore
new file mode 100644
index 0000000..7c3bfd3
--- /dev/null
+++ b/xdp-bench/.gitignore
@@ -0,0 +1 @@
+xdp-bench
diff --git a/xdp-bench/Makefile b/xdp-bench/Makefile
new file mode 100644
index 0000000..5936277
--- /dev/null
+++ b/xdp-bench/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+
+XDP_TARGETS := xdp_redirect_basic.bpf xdp_redirect_cpumap.bpf xdp_redirect_devmap.bpf \
+ xdp_redirect_devmap_multi.bpf xdp_basic.bpf
+BPF_SKEL_TARGETS := $(XDP_TARGETS)
+
+# Don't install skeleton object files
+XDP_OBJ_INSTALL :=
+
+TOOL_NAME := xdp-bench
+MAN_PAGE := xdp-bench.8
+TEST_FILE := tests/test-xdp-bench.sh
+USER_TARGETS := xdp-bench
+USER_EXTRA_C := xdp_redirect_basic.c xdp_redirect_cpumap.c xdp_redirect_devmap.c \
+ xdp_redirect_devmap_multi.c xdp_basic.c
+EXTRA_USER_DEPS := xdp-bench.h
+
+LIB_DIR = ../lib
+
+include $(LIB_DIR)/common.mk
+
diff --git a/xdp-bench/README.org b/xdp-bench/README.org
new file mode 100644
index 0000000..068013c
--- /dev/null
+++ b/xdp-bench/README.org
@@ -0,0 +1,570 @@
+#+EXPORT_FILE_NAME: xdp-bench
+#+TITLE: xdp-bench
+#+MAN_CLASS_OPTIONS: :section-id "8\" \"DATE\" \"VERSION\" \"A simple XDP benchmarking tool"
+# This file serves both as a README on github, and as the source for the man
+# page; the latter through the org-mode man page export support.
+# .
+# To export the man page, simply use the org-mode exporter; (require 'ox-man) if
+# it's not available. There's also a Makefile rule to export it.
+
+* XDP-bench - a simple XDP benchmarking tool
+
+XDP-bench is a benchmarking utility for exercising the different operation modes
+of XDP. It is intended to be a simple program demonstrating the various
+operating modes; these include dropping packets, hairpin forwarding (using the
+=XDP_TX= return code), and redirection using the various in-kernel packet
+redirection facilities.
+
+The drop and TX modes support various options to control whether packet data is
+touched (read or written) before being dropped or transmitted. The redirection
+modes support using the simple ifindex-based =bpf_redirect= helper, the
+=bpf_redirect_map= helper using a cpumap as its target, =bpf_redirect_map= using
+a devmap as its target, and the devmap's broadcast mode which allows redirecting
+to multiple devices.
+
+There is more information on the meaning of the output in both default (terse)
+and extended output mode, in the *Output Format Description* section below.
+
+** Running xdp-bench
+The syntax for running xdp-bench is:
+
+#+begin_src sh
+Usage: xdp-bench COMMAND [options]
+
+COMMAND can be one of:
+ drop - Drop all packets on an interface
+ tx - Transmit packets back out on an interface (hairpin forwarding)
+ redirect - XDP redirect using the bpf_redirect() helper
+ redirect-cpu - XDP CPU redirect using BPF_MAP_TYPE_CPUMAP
+ redirect-map - XDP redirect using BPF_MAP_TYPE_DEVMAP
+ redirect-multi - XDP multi-redirect using BPF_MAP_TYPE_DEVMAP and the BPF_F_BROADCAST flag
+#+end_src
+
+Each command, and its options are explained below. Or use =xdp-bench COMMAND
+--help= to see the options for each command.
+
+* The DROP command
+In this mode, =xdp-bench= installs an XDP program on an interface that simply
+drops all packets. There are options to control what to do with the packet
+before dropping it (touch the packet data or not), as well as which statistics
+to gather. This is a basic benchmark for the baseline (best-case) performance of
+XDP on an interface.
+
+The syntax for the =drop= command is:
+
+=xdp-bench drop [options] <ifname>=
+
+Where =<ifname>= is the name of the interface the XDP program should be
+installed on.
+
+The supported options are:
+
+** -p, --packet-operation <ACTION>
+Specify which operation should be taken on the packet before dropping it. The
+following actions are available:
+
+#+begin_src sh
+ no-touch - Drop the packet without touching the packet data
+ touch - Read a field in the packet header before dropping
+ swap-macs - Swap the source and destination MAC addresses before dropping
+#+end_src
+
+Whether to touch the packet before dropping it can have a significant
+performance impact as this requires bringing packet data into the CPU cache (and
+flushing it back out if writing).
+
+The default for this option is =no-touch=.
+
+** -r, --rxq-stats
+If set, the XDP program will also gather statistics on which receive queue index
+each packet was received on. This is displayed in the extended output mode along
+with per-CPU data (which, depending on the hardware configuration may or may not
+be equivalent).
+
+** -i, --interval <SECONDS>
+Set the polling interval for collecting all statistics and displaying them to
+the output. The unit of interval is in seconds.
+
+** -e, --extended
+Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in
+"terse" mode. The output mode can be switched by hitting C-\ while the program
+is running. See also the *Output Format Description* section below.
+
+** -m, --mode
+Selects the XDP program mode (native or skb). Note that native XDP mode is the
+default, and loading the redirect program in skb manner is neither performant,
+nor recommended. However, this option is useful if the interface driver lacks
+native XDP support, or when simply testing the tool.
+
+** -v, --verbose
+Enable verbose logging. Supply twice to enable verbose logging from the
+underlying =libxdp= and =libbpf= libraries.
+
+** --version
+Show the application version and exit.
+
+** -h, --help
+Display a summary of the available options
+
+* The PASS command
+In this mode, =xdp-bench= installs an XDP program on an interface that passes
+all packets to the network stack after processing them (returning =XDP_PASS=).
+There are options to control what to do with the packet before passing it
+(touch the packet data or not), as well as which statistics to gather. This is a
+basic benchmark for the overhead of installing an XDP program on an interface
+while still running the regular network stack.
+
+The syntax for the =pass= command is:
+
+=xdp-bench pass [options] <ifname>=
+
+Where =<ifname>= is the name of the interface the XDP program should be
+installed on.
+
+The supported options are:
+
+** -p, --packet-operation <ACTION>
+Specify which operation should be taken on the packet before passing it. The
+following actions are available:
+
+#+begin_src sh
+ no-touch - Pass the packet without touching the packet data
+ touch - Read a field in the packet header before passing
+ swap-macs - Swap the source and destination MAC addresses before passing
+#+end_src
+
+The default for this option is =no-touch=.
+
+** -r, --rxq-stats
+If set, the XDP program will also gather statistics on which receive queue index
+each packet was received on. This is displayed in the extended output mode along
+with per-CPU data (which, depending on the hardware configuration may or may not
+be equivalent).
+
+** -i, --interval <SECONDS>
+Set the polling interval for collecting all statistics and displaying them to
+the output. The unit of interval is in seconds.
+
+** -e, --extended
+Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in
+"terse" mode. The output mode can be switched by hitting C-\ while the program
+is running. See also the *Output Format Description* section below.
+
+** -m, --mode
+Selects the XDP program mode (native or skb). Note that native XDP mode is the
+default, and loading the redirect program in skb manner is neither performant,
+nor recommended. However, this option is useful if the interface driver lacks
+native XDP support, or when simply testing the tool.
+
+** -v, --verbose
+Enable verbose logging. Supply twice to enable verbose logging from the
+underlying =libxdp= and =libbpf= libraries.
+
+** --version
+Show the application version and exit.
+
+** -h, --help
+Display a summary of the available options
+
+* The TX command
+In this mode, =xdp-bench= installs an XDP program on an interface that performs
+so-called "hairpin forwarding", which means each packet is transmitted back out
+the same interface (using the =XDP_TX= return code).. There are options to
+control what to do with the packet before transmitting it (touch the packet data
+or not), as well as which statistics to gather.
+
+The syntax for the =tx= command is:
+
+=xdp-bench tx [options] <ifname>=
+
+Where =<ifname>= is the name of the interface the XDP program should be
+installed on.
+
+The supported options are:
+
+** -p, --packet-operation <ACTION>
+Specify which operation should be taken on the packet before transmitting it. The
+following actions are available:
+
+#+begin_src sh
+ no-touch - Transmit the packet without touching the packet data
+ touch - Read a field in the packet header before transmitting
+ swap-macs - Swap the source and destination MAC addresses before transmitting
+#+end_src
+
+To allow the packet to be successfully transmitted back to the sender, the MAC
+addresses have to be swapped, so that the source MAC matches the network device.
+However, there is a performance overhead in doing swapping, so this option
+allows this function to be turned off.
+
+The default for this option is =swap-macs=.
+
+** -r, --rxq-stats
+If set, the XDP program will also gather statistics on which receive queue index
+each packet was received on. This is displayed in the extended output mode along
+with per-CPU data (which, depending on the hardware configuration may or may not
+be equivalent).
+
+** -i, --interval <SECONDS>
+Set the polling interval for collecting all statistics and displaying them to
+the output. The unit of interval is in seconds.
+
+** -e, --extended
+Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in
+"terse" mode. The output mode can be switched by hitting C-\ while the program
+is running. See also the *Output Format Description* section below.
+
+** -m, --mode
+Selects the XDP program mode (native or skb). Note that native XDP mode is the
+default, and loading the redirect program in skb manner is neither performant,
+nor recommended. However, this option is useful if the interface driver lacks
+native XDP support, or when simply testing the tool.
+
+** -v, --verbose
+Enable verbose logging. Supply twice to enable verbose logging from the
+underlying =libxdp= and =libbpf= libraries.
+
+** --version
+Show the application version and exit.
+
+** -h, --help
+Display a summary of the available options
+
+* The REDIRECT command
+In this mode, =xdp-bench= sets up packet redirection between the two
+interfaces supplied on the command line using the =bpf_redirect= BPF helper
+triggered on packet reception on the ingress interface.
+
+The syntax for the =redirect= command is:
+
+=xdp-bench redirect [options] <ifname_in> <ifname_out>=
+
+Where =<ifname_in>= is the name of the input interface from where packets will
+be redirect to the output interface =<ifname_out>=.
+
+The supported options are:
+
+** -i, --interval <SECONDS>
+Set the polling interval for collecting all statistics and displaying them to
+the output. The unit of interval is in seconds.
+
+** -s, --stats
+Enable statistics for successful redirection. This option comes with a per
+packet tracing overhead, for recording all successful redirections.
+
+** -e, --extended
+Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in
+"terse" mode. The output mode can be switched by hitting C-\ while the program
+is running. See also the *Output Format Description* section below.
+
+** -m, --mode
+Selects the XDP program mode (native or skb). Note that native XDP mode is the
+default, and loading the redirect program in skb manner is neither performant,
+nor recommended. However, this option is useful if the interface driver lacks
+native XDP support, or when simply testing the tool.
+
+** -v, --verbose
+Enable verbose logging. Supply twice to enable verbose logging from the
+underlying =libxdp= and =libbpf= libraries.
+
+** --version
+Show the application version and exit.
+
+** -h, --help
+Display a summary of the available options
+
+* The REDIRECT-CPU command
+In this mode, =xdp-bench= sets up packet redirection using the
+=bpf_redirect_map= BPF helper triggered on packet reception on the ingress
+interface, using a cpumap as its target. Hence, this tool can be used to
+redirect packets on an interface from one CPU to another. In addition to this,
+the tool then supports redirecting the packet to another output device when it
+is processed on the target CPU.
+
+The syntax for the =redirect-cpu= command is:
+
+=xdp-bench redirect-cpu [options] <ifname> -c 0 ... -c N=
+
+Where =<ifname>= is the name of the input interface from where packets will be
+redirect to the target CPU list specified using =-c=.
+
+The supported options are:
+
+** -c, --cpu <CPU>
+Specify a possible target CPU index. This option must be passed at least once,
+and can be passed multiple times to specify a list of CPUs. Which CPU is chosen
+for a given packet depends on the value of the =--program-mode= option,
+described below.
+
+** -p, --program-mode <MODE>
+Specify a program that embeds a predefined policy deciding how packets are
+redirected to different CPUs. The following options are available:
+
+#+begin_src sh
+ no-touch - Redirect without touching packet data
+ touch - Read packet data before redirecting
+ round-robin - Cycle between target CPUs in a round-robin fashion (for each packet)
+ l4-proto - Choose the target CPU based on the layer-4 protocol of packet
+ l4-filter - Like l4-proto, but drop UDP packets with destination port 9 (used by pktgen)
+ l4-hash - Use source and destination IP hashing to pick target CPU
+#+end_src
+
+The =no-touch= and =touch= modes always redirect packets to the same CPU (the
+first value supplied to =--cpu=). The =round-robin= and =l4-hash= modes
+distribute packets between all the CPUs supplied as =--cpu= arguments, while
+=l4-proto= and =l4-filter= send TCP and unrecognised packets to CPU index 0, UDP
+packets to CPU index 1 and ICMP packets to CPU index 2 (where the index refers
+to the order the actual CPUs are given on the command line).
+
+The default for this option is =l4-hash=.
+
+** -r --remote-action <ACTION>
+If this option is set, a separate program is installed into the cpumap, which
+will be invoked on the remote CPU after the packet is processed there. The
+action can be either =drop= or =pass= which will drop the packet or pass it to
+the regular networking stack, respectively. Or it can be =redirect=, which will
+cause the packet to be redirected to another interface and transmitted out that
+interface on the remote CPU. If this option is set to =redirect= the target
+device must be specified using =--redirect-device=.
+
+The default for this option is =disabled=.
+
+** -r, --redirect-device <IFNAME>
+Specify the device to redirect the packet to when it is received on the target CPU.
+Note that this option can only be specified with =--remote-action redirect=.
+
+** -q, --qsize <PACKETS>
+Set the queue size for the per-CPU cpumap ring buffer used for redirecting
+packets from multiple CPUs to one CPU. The default value is 2048 packets.
+
+** -x, --stress-mode
+Stress the cpumap implementation by deallocating and reallocating the cpumap
+ring buffer on each polling interval.
+
+** -i, --interval <SECONDS>
+Set the polling interval for collecting all statistics and displaying them to
+the output. The unit of interval is in seconds.
+
+** -s, --stats
+Enable statistics for successful redirection. This option comes with a per
+packet tracing overhead, for recording all successful redirections.
+
+** -e, --extended
+Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in
+"terse" mode. The output mode can be switched by hitting C-\ while the program
+is running. See also the *Output Format Description* section below.
+
+** -m, --mode
+Selects the XDP program mode (native or skb). Note that native XDP mode is the
+default, and loading the redirect program in skb manner is neither performant,
+nor recommended. However, this option is useful if the interface driver lacks
+native XDP support, or when simply testing the tool.
+
+** -v, --verbose
+Enable verbose logging. Supply twice to enable verbose logging from the
+underlying =libxdp= and =libbpf= libraries.
+
+** --version
+Show the application version and exit.
+
+** -h, --help
+Display a summary of the available options
+
+* The REDIRECT-MAP command
+In this mode, =xdp-bench= sets up packet redirection between two interfaces
+supplied on the command line using the =bpf_redirect_map()= BPF helper triggered
+on packet reception on the ingress interface, using a devmap as its target.
+
+The syntax for the =redirect-map= command is:
+
+=xdp-bench redirect-map [options] <ifname_in> <ifname_out>=
+
+Where =<ifname_in>= is the name of the input interface from where packets will
+be redirect to the output interface =<ifname_out>=.
+
+The supported options are:
+
+** -X, --load-egress
+Load a program in the devmap entry used for redirection, so that it is invoked
+after the packet is redirected to the target device, before it is transmitted
+out of the output interface. The remote program will update the packet data so
+its source MAC address matches the one of the destination interface.
+
+** -i, --interval <SECONDS>
+Set the polling interval for collecting all statistics and displaying them to
+the output. The unit of interval is in seconds.
+
+** -s, --stats
+Enable statistics for successful redirection. This option comes with a per
+packet tracing overhead, for recording all successful redirections.
+
+** -e, --extended
+Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in
+"terse" mode. The output mode can be switched by hitting C-\ while the program
+is running. See also the *Output Format Description* section below.
+
+** -m, --mode
+Selects the XDP program mode (native or skb). Note that native XDP mode is the
+default, and loading the redirect program in skb manner is neither performant,
+nor recommended. However, this option is useful if the interface driver lacks
+native XDP support, or when simply testing the tool.
+
+** -v, --verbose
+Enable verbose logging. Supply twice to enable verbose logging from the
+underlying =libxdp= and =libbpf= libraries.
+
+** --version
+Show the application version and exit.
+
+** -h, --help
+Display a summary of the available options
+
+* The REDIRECT-MULTI command
+In this mode, =xdp-bench= sets up one-to-many packet redirection between
+interfaces supplied on the command line, using the =bpf_redirect_map= BPF helper
+triggered on packet reception on the ingress interface, using a devmap as its
+target. The packet is broadcast to all output interfaces specified on the
+command line, using devmap's packet broadcast feature.
+
+The syntax for the =redirect-multi= command is:
+
+=xdp-bench redirect-multi [options] <ifname_in> <ifname_out1> ... <ifname_outN>=
+
+Where =<ifname_in>= is the name of the input interface from where packets will
+be redirect to one or many output interface(s).
+
+The supported options are:
+
+** -X, --load-egress
+Load a program in the devmap entry used for redirection, so that it is invoked
+after the packet is redirected to the target device, before it is transmitted
+out of the output interface. The remote program will update the packet data so
+its source MAC address matches the one of the destination interface.
+
+** -i, --interval <SECONDS>
+Set the polling interval for collecting all statistics and displaying them to
+the output. The unit of interval is in seconds.
+
+** -s, --stats
+Enable statistics for successful redirection. This option comes with a per
+packet tracing overhead, for recording all successful redirections.
+
+** -e, --extended
+Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in
+"terse" mode. The output mode can be switched by hitting C-\ while the program
+is running. See also the *Output Format Description* section below.
+
+** -m, --mode
+Selects the XDP program mode (native or skb). Note that native XDP mode is the
+default, and loading the redirect program in skb manner is neither performant,
+nor recommended. However, this option is useful if the interface driver lacks
+native XDP support, or when simply testing the tool.
+
+** -v, --verbose
+Enable verbose logging. Supply twice to enable verbose logging from the
+underlying =libxdp= and =libbpf= libraries.
+
+** --version
+Show the application version and exit.
+
+** -h, --help
+Display a summary of the available options
+
+
+* Output Format Description
+
+By default, redirect success statistics are disabled, use =--stats= to enable.
+The terse output mode is default, extended output mode can be activated using
+the =--extended= command line option.
+
+SIGQUIT (Ctrl + \\) can be used to switch the mode dynamically at runtime.
+
+Terse mode displays at most the following fields:
+#+begin_src sh
+ rx/s Number of packets received per second
+ redir/s Number of packets successfully redirected per second
+ err,drop/s Aggregated count of errors per second (including dropped packets when not using the drop command)
+ xmit/s Number of packets transmitted on the output device per second
+#+end_src
+
+Extended output mode displays at most the following fields:
+#+begin_src sh
+ FIELD DESCRIPTION
+ receive Displays the number of packets received and errors encountered
+
+ Whenever an error or packet drop occurs, details of per CPU error
+ and drop statistics will be expanded inline in terse mode.
+ pkt/s - Packets received per second
+ drop/s - Packets dropped per second
+ error/s - Errors encountered per second
+ redirect - Displays the number of packets successfully redirected
+ Errors encountered are expanded under redirect_err field
+ Note that passing -s to enable it has a per packet overhead
+ redir/s - Packets redirected successfully per second
+
+
+ redirect_err Displays the number of packets that failed redirection
+
+ The errno is expanded under this field with per CPU count
+ The recognized errors are:
+ EINVAL: Invalid redirection
+ ENETDOWN: Device being redirected to is down
+ EMSGSIZE: Packet length too large for device
+ EOPNOTSUPP: Operation not supported
+ ENOSPC: No space in ptr_ring of cpumap kthread
+
+ error/s - Packets that failed redirection per second
+
+
+ enqueue to cpu N Displays the number of packets enqueued to bulk queue of CPU N
+ Expands to cpu:FROM->N to display enqueue stats for each CPU enqueuing to CPU N
+ Received packets can be associated with the CPU redirect program is enqueuing
+ packets to.
+ pkt/s - Packets enqueued per second from other CPU to CPU N
+ drop/s - Packets dropped when trying to enqueue to CPU N
+ bulk-avg - Average number of packets processed for each event
+
+
+ kthread Displays the number of packets processed in CPUMAP kthread for each CPU
+ Packets consumed from ptr_ring in kthread, and its xdp_stats (after calling
+ CPUMAP bpf prog) are expanded below this. xdp_stats are expanded as a total and
+ then per-CPU to associate it to each CPU's pinned CPUMAP kthread.
+ pkt/s - Packets consumed per second from ptr_ring
+ drop/s - Packets dropped per second in kthread
+ sched - Number of times kthread called schedule()
+
+ xdp_stats (also expands to per-CPU counts)
+ pass/s - XDP_PASS count for CPUMAP program execution
+ drop/s - XDP_DROP count for CPUMAP program execution
+ redir/s - XDP_REDIRECT count for CPUMAP program execution
+
+
+ xdp_exception Displays xdp_exception tracepoint events
+
+ This can occur due to internal driver errors, unrecognized
+ XDP actions and due to explicit user trigger by use of XDP_ABORTED
+ Each action is expanded below this field with its count
+ hit/s - Number of times the tracepoint was hit per second
+
+
+ devmap_xmit Displays devmap_xmit tracepoint events
+
+ This tracepoint is invoked for successful transmissions on output
+ device but these statistics are not available for generic XDP mode,
+ hence they will be omitted from the output when using SKB mode
+ xmit/s - Number of packets that were transmitted per second
+ drop/s - Number of packets that failed transmissions per second
+ drv_err/s - Number of internal driver errors per second
+ bulk-avg - Average number of packets processed for each event
+#+end_src
+
+* BUGS
+
+Please report any bugs on Github: https://github.com/xdp-project/xdp-tools/issues
+
+* AUTHOR
+
+Earlier xdp-redirect tools were written by Jesper Dangaard Brouer and John
+Fastabend. They were then rewritten to support more features by Kumar Kartikeya
+Dwivedi, who also ported them to xdp-tools together with Toke Høiland-Jørgensen.
+This man page was written by Kumar Kartikeya Dwivedi and Toke Høiland-Jørgensen.
diff --git a/xdp-bench/hash_func01.h b/xdp-bench/hash_func01.h
new file mode 100644
index 0000000..ac96bc3
--- /dev/null
+++ b/xdp-bench/hash_func01.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1
+ *
+ * Based on Paul Hsieh's (LGPG 2.1) hash function
+ * From: http://www.azillionmonkeys.com/qed/hash.html
+ */
+
+#define get16bits(d) (*((const __u16 *) (d)))
+
+static __always_inline
+__u32 SuperFastHash(const char *data, int len, __u32 initval) {
+ __u32 hash = initval;
+ __u32 tmp;
+ int rem;
+
+ if (len <= 0 || data == NULL) return 0;
+
+ rem = len & 3;
+ len >>= 2;
+
+ /* Main loop */
+#pragma clang loop unroll(full)
+ for (;len > 0; len--) {
+ hash += get16bits (data);
+ tmp = (get16bits (data+2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 2*sizeof (__u16);
+ hash += hash >> 11;
+ }
+
+ /* Handle end cases */
+ switch (rem) {
+ case 3: hash += get16bits (data);
+ hash ^= hash << 16;
+ hash ^= ((signed char)data[sizeof (__u16)]) << 18;
+ hash += hash >> 11;
+ break;
+ case 2: hash += get16bits (data);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ case 1: hash += (signed char)*data;
+ hash ^= hash << 10;
+ hash += hash >> 1;
+ }
+
+ /* Force "avalanching" of final 127 bits */
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
+}
diff --git a/xdp-bench/tests/test-xdp-bench.sh b/xdp-bench/tests/test-xdp-bench.sh
new file mode 100644
index 0000000..7dfc357
--- /dev/null
+++ b/xdp-bench/tests/test-xdp-bench.sh
@@ -0,0 +1,126 @@
+XDP_LOADER=${XDP_LOADER:-./xdp-loader}
+XDP_BENCH=${XDP_BENCH:-./xdp-bench}
+ALL_TESTS="test_drop test_pass test_tx test_rxq_stats test_redirect test_redirect_cpu test_redirect_map test_redirect_map_egress test_redirect_multi test_redirect_multi_egress"
+
+test_basic()
+{
+ action=$1
+
+ export XDP_SAMPLE_IMMEDIATE_EXIT=1
+ check_run $XDP_BENCH $action $NS -vv
+ check_run $XDP_BENCH $action $NS -p read-data -vv
+ check_run $XDP_BENCH $action $NS -p swap-macs -vv
+ check_run $XDP_BENCH $action $NS -m skb -vv
+ check_run $XDP_BENCH $action $NS -e -vv
+}
+
+test_drop()
+{
+ test_basic drop
+}
+test_pass()
+{
+ test_basic pass
+}
+test_tx()
+{
+ test_basic tx
+}
+
+test_rxq_stats()
+{
+ skip_if_missing_veth_rxq
+
+ export XDP_SAMPLE_IMMEDIATE_EXIT=1
+ check_run $XDP_BENCH drop $NS -r -vv
+}
+
+test_redirect()
+{
+ export XDP_SAMPLE_IMMEDIATE_EXIT=1
+ check_run ip link add dev btest0 type veth peer name btest1
+ check_run $XDP_BENCH redirect btest0 btest1 -vv
+ check_run $XDP_BENCH redirect btest0 btest1 -s -vv
+ check_run $XDP_BENCH redirect btest0 btest1 -m skb -vv
+ check_run $XDP_BENCH redirect btest0 btest1 -e -vv
+ ip link del dev btest0
+}
+
+test_redirect_cpu()
+{
+ skip_if_missing_cpumap_attach
+
+ export XDP_SAMPLE_IMMEDIATE_EXIT=1
+ check_run ip link add dev btest0 type veth peer name btest1
+ check_run $XDP_BENCH redirect-cpu btest0 -c 0 -vv
+ check_run $XDP_BENCH redirect-cpu btest0 -c 0 -m skb -vv
+ check_run $XDP_BENCH redirect-cpu btest0 -c 0 -p touch -vv
+ check_run $XDP_BENCH redirect-cpu btest0 -c 0 -p round-robin -vv
+ check_run $XDP_BENCH redirect-cpu btest0 -c 0 -p l4-proto -vv
+ check_run $XDP_BENCH redirect-cpu btest0 -c 0 -p l4-filter -vv
+ check_run $XDP_BENCH redirect-cpu btest0 -c 0 -p l4-hash -vv
+
+ is_progmap_supported || export LIBXDP_SKIP_DISPATCHER=1
+ check_run $XDP_BENCH redirect-cpu btest0 -c 0 -r drop -vv
+ check_run $XDP_BENCH redirect-cpu btest0 -c 0 -r pass -vv
+ check_run $XDP_BENCH redirect-cpu btest0 -c 0 -r redirect -D btest1 -vv
+ ip link del dev btest0
+}
+
+test_redirect_map()
+{
+ export XDP_SAMPLE_IMMEDIATE_EXIT=1
+ check_run ip link add dev btest0 type veth peer name btest1
+ check_run $XDP_BENCH redirect-map btest0 btest1 -vv
+ check_run $XDP_BENCH redirect-map btest0 btest1 -s -vv
+ check_run $XDP_BENCH redirect-map btest0 btest1 -m skb -vv
+ check_run $XDP_BENCH redirect-map btest0 btest1 -e -vv
+ ip link del dev btest0
+}
+
+test_redirect_map_egress()
+{
+ skip_if_missing_cpumap_attach
+
+ export XDP_SAMPLE_IMMEDIATE_EXIT=1
+ check_run ip link add dev btest0 type veth peer name btest1
+ is_progmap_supported || export LIBXDP_SKIP_DISPATCHER=1
+ check_run $XDP_BENCH redirect-map btest0 btest1 -X -vv
+ ip link del dev btest0
+}
+
+test_redirect_multi()
+{
+ export XDP_SAMPLE_IMMEDIATE_EXIT=1
+ check_run ip link add dev btest0 type veth peer name btest1
+ check_run ip link add dev btest2 type veth peer name btest3
+ check_run $XDP_BENCH redirect-multi btest0 btest1 btest2 btest3 -vv
+ check_run $XDP_BENCH redirect-multi btest0 btest1 btest2 btest3 -s -vv
+ check_run $XDP_BENCH redirect-multi btest0 btest1 btest2 btest3 -m skb -vv
+ check_run $XDP_BENCH redirect-multi btest0 btest1 btest2 btest3 -e -vv
+ ip link del dev btest0
+ ip link del dev btest2
+}
+
+test_redirect_multi_egress()
+{
+ skip_if_missing_cpumap_attach
+
+ export XDP_SAMPLE_IMMEDIATE_EXIT=1
+ is_progmap_supported || export LIBXDP_SKIP_DISPATCHER=1
+ check_run ip link add dev btest0 type veth peer name btest1
+ check_run ip link add dev btest2 type veth peer name btest3
+
+ check_run $XDP_BENCH redirect-multi btest0 btest1 btest2 btest3 -X -vv
+
+ ip link del dev btest0
+ ip link del dev btest2
+}
+
+cleanup_tests()
+{
+ ip link del dev btest0 >/dev/null 2>&1
+ ip link del dev btest2 >/dev/null 2>&1
+ $XDP_LOADER unload $NS --all >/dev/null 2>&1
+ $XDP_LOADER clean >/dev/null 2>&1
+}
diff --git a/xdp-bench/xdp-bench.8 b/xdp-bench/xdp-bench.8
new file mode 100644
index 0000000..c89dd1b
--- /dev/null
+++ b/xdp-bench/xdp-bench.8
@@ -0,0 +1,696 @@
+.TH "xdp-bench" "8" "FEBRUARY 4, 2023" "V1.3.1" "A simple XDP benchmarking tool"
+
+.SH "NAME"
+XDP-bench \- a simple XDP benchmarking tool
+.SH "SYNOPSIS"
+.PP
+XDP-bench is a benchmarking utility for exercising the different operation modes
+of XDP. It is intended to be a simple program demonstrating the various
+operating modes; these include dropping packets, hairpin forwarding (using the
+\fIXDP_TX\fP return code), and redirection using the various in-kernel packet
+redirection facilities.
+
+.PP
+The drop and TX modes support various options to control whether packet data is
+touched (read or written) before being dropped or transmitted. The redirection
+modes support using the simple ifindex-based \fIbpf_redirect\fP helper, the
+\fIbpf_redirect_map\fP helper using a cpumap as its target, \fIbpf_redirect_map\fP using
+a devmap as its target, and the devmap's broadcast mode which allows redirecting
+to multiple devices.
+
+.PP
+There is more information on the meaning of the output in both default (terse)
+and extended output mode, in the \fBOutput Format Description\fP section below.
+
+.SS "Running xdp-bench"
+.PP
+The syntax for running xdp-bench is:
+
+.RS
+.nf
+\fCUsage: xdp-bench COMMAND [options]
+
+COMMAND can be one of:
+ drop - Drop all packets on an interface
+ tx - Transmit packets back out on an interface (hairpin forwarding)
+ redirect - XDP redirect using the bpf_redirect() helper
+ redirect-cpu - XDP CPU redirect using BPF_MAP_TYPE_CPUMAP
+ redirect-map - XDP redirect using BPF_MAP_TYPE_DEVMAP
+ redirect-multi - XDP multi-redirect using BPF_MAP_TYPE_DEVMAP and the BPF_F_BROADCAST flag
+\fP
+.fi
+.RE
+
+.PP
+Each command, and its options are explained below. Or use \fIxdp\-bench COMMAND
+\-\-help\fP to see the options for each command.
+
+.SH "The DROP command"
+.PP
+In this mode, \fIxdp\-bench\fP installs an XDP program on an interface that simply
+drops all packets. There are options to control what to do with the packet
+before dropping it (touch the packet data or not), as well as which statistics
+to gather. This is a basic benchmark for the baseline (best-case) performance of
+XDP on an interface.
+
+.PP
+The syntax for the \fIdrop\fP command is:
+
+.PP
+\fIxdp\-bench drop [options] <ifname>\fP
+
+.PP
+Where \fI<ifname>\fP is the name of the interface the XDP program should be
+installed on.
+
+.PP
+The supported options are:
+
+.SS "-p, --packet-operation <ACTION>"
+.PP
+Specify which operation should be taken on the packet before dropping it. The
+following actions are available:
+
+.RS
+.nf
+\fCno-touch - Drop the packet without touching the packet data
+touch - Read a field in the packet header before dropping
+swap-macs - Swap the source and destination MAC addresses before dropping
+\fP
+.fi
+.RE
+
+.PP
+Whether to touch the packet before dropping it can have a significant
+performance impact as this requires bringing packet data into the CPU cache (and
+flushing it back out if writing).
+
+.PP
+The default for this option is \fIno\-touch\fP.
+
+.SS "-r, --rxq-stats"
+.PP
+If set, the XDP program will also gather statistics on which receive queue index
+each packet was received on. This is displayed in the extended output mode along
+with per-CPU data (which, depending on the hardware configuration may or may not
+be equivalent).
+
+.SS "-i, --interval <SECONDS>"
+.PP
+Set the polling interval for collecting all statistics and displaying them to
+the output. The unit of interval is in seconds.
+
+.SS "-e, --extended"
+.PP
+Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in
+"terse" mode. The output mode can be switched by hitting C-$\ while the program
+is running. See also the \fBOutput Format Description\fP section below.
+
+.SS "-m, --mode"
+.PP
+Selects the XDP program mode (native or skb). Note that native XDP mode is the
+default, and loading the redirect program in skb manner is neither performant,
+nor recommended. However, this option is useful if the interface driver lacks
+native XDP support, or when simply testing the tool.
+
+.SS "-v, --verbose"
+.PP
+Enable verbose logging. Supply twice to enable verbose logging from the
+underlying \fIlibxdp\fP and \fIlibbpf\fP libraries.
+
+.SS "--version"
+.PP
+Show the application version and exit.
+
+.SS "-h, --help"
+.PP
+Display a summary of the available options
+
+.SH "The PASS command"
+.PP
+In this mode, \fIxdp\-bench\fP installs an XDP program on an interface that passes
+all packets to the network stack after processing them (returning \fIXDP_PASS\fP).
+There are options to control what to do with the packet before passing it
+(touch the packet data or not), as well as which statistics to gather. This is a
+basic benchmark for the overhead of installing an XDP program on an interface
+while still running the regular network stack.
+
+.PP
+The syntax for the \fIpass\fP command is:
+
+.PP
+\fIxdp\-bench pass [options] <ifname>\fP
+
+.PP
+Where \fI<ifname>\fP is the name of the interface the XDP program should be
+installed on.
+
+.PP
+The supported options are:
+
+.SS "-p, --packet-operation <ACTION>"
+.PP
+Specify which operation should be taken on the packet before passing it. The
+following actions are available:
+
+.RS
+.nf
+\fCno-touch - Pass the packet without touching the packet data
+touch - Read a field in the packet header before passing
+swap-macs - Swap the source and destination MAC addresses before passing
+\fP
+.fi
+.RE
+
+.PP
+The default for this option is \fIno\-touch\fP.
+
+.SS "-r, --rxq-stats"
+.PP
+If set, the XDP program will also gather statistics on which receive queue index
+each packet was received on. This is displayed in the extended output mode along
+with per-CPU data (which, depending on the hardware configuration may or may not
+be equivalent).
+
+.SS "-i, --interval <SECONDS>"
+.PP
+Set the polling interval for collecting all statistics and displaying them to
+the output. The unit of interval is in seconds.
+
+.SS "-e, --extended"
+.PP
+Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in
+"terse" mode. The output mode can be switched by hitting C-$\ while the program
+is running. See also the \fBOutput Format Description\fP section below.
+
+.SS "-m, --mode"
+.PP
+Selects the XDP program mode (native or skb). Note that native XDP mode is the
+default, and loading the redirect program in skb manner is neither performant,
+nor recommended. However, this option is useful if the interface driver lacks
+native XDP support, or when simply testing the tool.
+
+.SS "-v, --verbose"
+.PP
+Enable verbose logging. Supply twice to enable verbose logging from the
+underlying \fIlibxdp\fP and \fIlibbpf\fP libraries.
+
+.SS "--version"
+.PP
+Show the application version and exit.
+
+.SS "-h, --help"
+.PP
+Display a summary of the available options
+
+.SH "The TX command"
+.PP
+In this mode, \fIxdp\-bench\fP installs an XDP program on an interface that performs
+so-called "hairpin forwarding", which means each packet is transmitted back out
+the same interface (using the \fIXDP_TX\fP return code).. There are options to
+control what to do with the packet before transmitting it (touch the packet data
+or not), as well as which statistics to gather.
+
+.PP
+The syntax for the \fItx\fP command is:
+
+.PP
+\fIxdp\-bench tx [options] <ifname>\fP
+
+.PP
+Where \fI<ifname>\fP is the name of the interface the XDP program should be
+installed on.
+
+.PP
+The supported options are:
+
+.SS "-p, --packet-operation <ACTION>"
+.PP
+Specify which operation should be taken on the packet before transmitting it. The
+following actions are available:
+
+.RS
+.nf
+\fCno-touch - Transmit the packet without touching the packet data
+touch - Read a field in the packet header before transmitting
+swap-macs - Swap the source and destination MAC addresses before transmitting
+\fP
+.fi
+.RE
+
+.PP
+To allow the packet to be successfully transmitted back to the sender, the MAC
+addresses have to be swapped, so that the source MAC matches the network device.
+However, there is a performance overhead in doing swapping, so this option
+allows this function to be turned off.
+
+.PP
+The default for this option is \fIswap\-macs\fP.
+
+.SS "-r, --rxq-stats"
+.PP
+If set, the XDP program will also gather statistics on which receive queue index
+each packet was received on. This is displayed in the extended output mode along
+with per-CPU data (which, depending on the hardware configuration may or may not
+be equivalent).
+
+.SS "-i, --interval <SECONDS>"
+.PP
+Set the polling interval for collecting all statistics and displaying them to
+the output. The unit of interval is in seconds.
+
+.SS "-e, --extended"
+.PP
+Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in
+"terse" mode. The output mode can be switched by hitting C-$\ while the program
+is running. See also the \fBOutput Format Description\fP section below.
+
+.SS "-m, --mode"
+.PP
+Selects the XDP program mode (native or skb). Note that native XDP mode is the
+default, and loading the redirect program in skb manner is neither performant,
+nor recommended. However, this option is useful if the interface driver lacks
+native XDP support, or when simply testing the tool.
+
+.SS "-v, --verbose"
+.PP
+Enable verbose logging. Supply twice to enable verbose logging from the
+underlying \fIlibxdp\fP and \fIlibbpf\fP libraries.
+
+.SS "--version"
+.PP
+Show the application version and exit.
+
+.SS "-h, --help"
+.PP
+Display a summary of the available options
+
+.SH "The REDIRECT command"
+.PP
+In this mode, \fIxdp\-bench\fP sets up packet redirection between the two
+interfaces supplied on the command line using the \fIbpf_redirect\fP BPF helper
+triggered on packet reception on the ingress interface.
+
+.PP
+The syntax for the \fIredirect\fP command is:
+
+.PP
+\fIxdp\-bench redirect [options] <ifname_in> <ifname_out>\fP
+
+.PP
+Where \fI<ifname_in>\fP is the name of the input interface from where packets will
+be redirect to the output interface \fI<ifname_out>\fP.
+
+.PP
+The supported options are:
+
+.SS "-i, --interval <SECONDS>"
+.PP
+Set the polling interval for collecting all statistics and displaying them to
+the output. The unit of interval is in seconds.
+
+.SS "-s, --stats"
+.PP
+Enable statistics for successful redirection. This option comes with a per
+packet tracing overhead, for recording all successful redirections.
+
+.SS "-e, --extended"
+.PP
+Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in
+"terse" mode. The output mode can be switched by hitting C-$\ while the program
+is running. See also the \fBOutput Format Description\fP section below.
+
+.SS "-m, --mode"
+.PP
+Selects the XDP program mode (native or skb). Note that native XDP mode is the
+default, and loading the redirect program in skb manner is neither performant,
+nor recommended. However, this option is useful if the interface driver lacks
+native XDP support, or when simply testing the tool.
+
+.SS "-v, --verbose"
+.PP
+Enable verbose logging. Supply twice to enable verbose logging from the
+underlying \fIlibxdp\fP and \fIlibbpf\fP libraries.
+
+.SS "--version"
+.PP
+Show the application version and exit.
+
+.SS "-h, --help"
+.PP
+Display a summary of the available options
+
+.SH "The REDIRECT-CPU command"
+.PP
+In this mode, \fIxdp\-bench\fP sets up packet redirection using the
+\fIbpf_redirect_map\fP BPF helper triggered on packet reception on the ingress
+interface, using a cpumap as its target. Hence, this tool can be used to
+redirect packets on an interface from one CPU to another. In addition to this,
+the tool then supports redirecting the packet to another output device when it
+is processed on the target CPU.
+
+.PP
+The syntax for the \fIredirect\-cpu\fP command is:
+
+.PP
+\fIxdp\-bench redirect\-cpu [options] <ifname> \-c 0 ... \-c N\fP
+
+.PP
+Where \fI<ifname>\fP is the name of the input interface from where packets will be
+redirect to the target CPU list specified using \fI\-c\fP.
+
+.PP
+The supported options are:
+
+.SS "-c, --cpu <CPU>"
+.PP
+Specify a possible target CPU index. This option must be passed at least once,
+and can be passed multiple times to specify a list of CPUs. Which CPU is chosen
+for a given packet depends on the value of the \fI\-\-program\-mode\fP option,
+described below.
+
+.SS "-p, --program-mode <MODE>"
+.PP
+Specify a program that embeds a predefined policy deciding how packets are
+redirected to different CPUs. The following options are available:
+
+.RS
+.nf
+\fCno-touch - Redirect without touching packet data
+touch - Read packet data before redirecting
+round-robin - Cycle between target CPUs in a round-robin fashion (for each packet)
+l4-proto - Choose the target CPU based on the layer-4 protocol of packet
+l4-filter - Like l4-proto, but drop UDP packets with destination port 9 (used by pktgen)
+l4-hash - Use source and destination IP hashing to pick target CPU
+\fP
+.fi
+.RE
+
+.PP
+The \fIno\-touch\fP and \fItouch\fP modes always redirect packets to the same CPU (the
+first value supplied to \fI\-\-cpu\fP). The \fIround\-robin\fP and \fIl4\-hash\fP modes
+distribute packets between all the CPUs supplied as \fI\-\-cpu\fP arguments, while
+\fIl4\-proto\fP and \fIl4\-filter\fP send TCP and unrecognised packets to CPU index 0, UDP
+packets to CPU index 1 and ICMP packets to CPU index 2 (where the index refers
+to the order the actual CPUs are given on the command line).
+
+.PP
+The default for this option is \fIl4\-hash\fP.
+
+.SS "-r --remote-action <ACTION>"
+.PP
+If this option is set, a separate program is installed into the cpumap, which
+will be invoked on the remote CPU after the packet is processed there. The
+action can be either \fIdrop\fP or \fIpass\fP which will drop the packet or pass it to
+the regular networking stack, respectively. Or it can be \fIredirect\fP, which will
+cause the packet to be redirected to another interface and transmitted out that
+interface on the remote CPU. If this option is set to \fIredirect\fP the target
+device must be specified using \fI\-\-redirect\-device\fP.
+
+.PP
+The default for this option is \fIdisabled\fP.
+
+.SS "-r, --redirect-device <IFNAME>"
+.PP
+Specify the device to redirect the packet to when it is received on the target CPU.
+Note that this option can only be specified with \fI\-\-remote\-action redirect\fP.
+
+.SS "-q, --qsize <PACKETS>"
+.PP
+Set the queue size for the per-CPU cpumap ring buffer used for redirecting
+packets from multiple CPUs to one CPU. The default value is 2048 packets.
+
+.SS "-x, --stress-mode"
+.PP
+Stress the cpumap implementation by deallocating and reallocating the cpumap
+ring buffer on each polling interval.
+
+.SS "-i, --interval <SECONDS>"
+.PP
+Set the polling interval for collecting all statistics and displaying them to
+the output. The unit of interval is in seconds.
+
+.SS "-s, --stats"
+.PP
+Enable statistics for successful redirection. This option comes with a per
+packet tracing overhead, for recording all successful redirections.
+
+.SS "-e, --extended"
+.PP
+Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in
+"terse" mode. The output mode can be switched by hitting C-$\ while the program
+is running. See also the \fBOutput Format Description\fP section below.
+
+.SS "-m, --mode"
+.PP
+Selects the XDP program mode (native or skb). Note that native XDP mode is the
+default, and loading the redirect program in skb manner is neither performant,
+nor recommended. However, this option is useful if the interface driver lacks
+native XDP support, or when simply testing the tool.
+
+.SS "-v, --verbose"
+.PP
+Enable verbose logging. Supply twice to enable verbose logging from the
+underlying \fIlibxdp\fP and \fIlibbpf\fP libraries.
+
+.SS "--version"
+.PP
+Show the application version and exit.
+
+.SS "-h, --help"
+.PP
+Display a summary of the available options
+
+.SH "The REDIRECT-MAP command"
+.PP
+In this mode, \fIxdp\-bench\fP sets up packet redirection between two interfaces
+supplied on the command line using the \fIbpf_redirect_map()\fP BPF helper triggered
+on packet reception on the ingress interface, using a devmap as its target.
+
+.PP
+The syntax for the \fIredirect\-map\fP command is:
+
+.PP
+\fIxdp\-bench redirect\-map [options] <ifname_in> <ifname_out>\fP
+
+.PP
+Where \fI<ifname_in>\fP is the name of the input interface from where packets will
+be redirect to the output interface \fI<ifname_out>\fP.
+
+.PP
+The supported options are:
+
+.SS "-X, --load-egress"
+.PP
+Load a program in the devmap entry used for redirection, so that it is invoked
+after the packet is redirected to the target device, before it is transmitted
+out of the output interface. The remote program will update the packet data so
+its source MAC address matches the one of the destination interface.
+
+.SS "-i, --interval <SECONDS>"
+.PP
+Set the polling interval for collecting all statistics and displaying them to
+the output. The unit of interval is in seconds.
+
+.SS "-s, --stats"
+.PP
+Enable statistics for successful redirection. This option comes with a per
+packet tracing overhead, for recording all successful redirections.
+
+.SS "-e, --extended"
+.PP
+Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in
+"terse" mode. The output mode can be switched by hitting C-$\ while the program
+is running. See also the \fBOutput Format Description\fP section below.
+
+.SS "-m, --mode"
+.PP
+Selects the XDP program mode (native or skb). Note that native XDP mode is the
+default, and loading the redirect program in skb manner is neither performant,
+nor recommended. However, this option is useful if the interface driver lacks
+native XDP support, or when simply testing the tool.
+
+.SS "-v, --verbose"
+.PP
+Enable verbose logging. Supply twice to enable verbose logging from the
+underlying \fIlibxdp\fP and \fIlibbpf\fP libraries.
+
+.SS "--version"
+.PP
+Show the application version and exit.
+
+.SS "-h, --help"
+.PP
+Display a summary of the available options
+
+.SH "The REDIRECT-MULTI command"
+.PP
+In this mode, \fIxdp\-bench\fP sets up one-to-many packet redirection between
+interfaces supplied on the command line, using the \fIbpf_redirect_map\fP BPF helper
+triggered on packet reception on the ingress interface, using a devmap as its
+target. The packet is broadcast to all output interfaces specified on the
+command line, using devmap's packet broadcast feature.
+
+.PP
+The syntax for the \fIredirect\-multi\fP command is:
+
+.PP
+\fIxdp\-bench redirect\-multi [options] <ifname_in> <ifname_out1> ... <ifname_outN>\fP
+
+.PP
+Where \fI<ifname_in>\fP is the name of the input interface from where packets will
+be redirect to one or many output interface(s).
+
+.PP
+The supported options are:
+
+.SS "-X, --load-egress"
+.PP
+Load a program in the devmap entry used for redirection, so that it is invoked
+after the packet is redirected to the target device, before it is transmitted
+out of the output interface. The remote program will update the packet data so
+its source MAC address matches the one of the destination interface.
+
+.SS "-i, --interval <SECONDS>"
+.PP
+Set the polling interval for collecting all statistics and displaying them to
+the output. The unit of interval is in seconds.
+
+.SS "-s, --stats"
+.PP
+Enable statistics for successful redirection. This option comes with a per
+packet tracing overhead, for recording all successful redirections.
+
+.SS "-e, --extended"
+.PP
+Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in
+"terse" mode. The output mode can be switched by hitting C-$\ while the program
+is running. See also the \fBOutput Format Description\fP section below.
+
+.SS "-m, --mode"
+.PP
+Selects the XDP program mode (native or skb). Note that native XDP mode is the
+default, and loading the redirect program in skb manner is neither performant,
+nor recommended. However, this option is useful if the interface driver lacks
+native XDP support, or when simply testing the tool.
+
+.SS "-v, --verbose"
+.PP
+Enable verbose logging. Supply twice to enable verbose logging from the
+underlying \fIlibxdp\fP and \fIlibbpf\fP libraries.
+
+.SS "--version"
+.PP
+Show the application version and exit.
+
+.SS "-h, --help"
+.PP
+Display a summary of the available options
+
+
+.SH "Output Format Description"
+.PP
+By default, redirect success statistics are disabled, use \fI\-\-stats\fP to enable.
+The terse output mode is default, extended output mode can be activated using
+the \fI\-\-extended\fP command line option.
+
+.PP
+SIGQUIT (Ctrl + \\) can be used to switch the mode dynamically at runtime.
+
+.PP
+Terse mode displays at most the following fields:
+.RS
+.nf
+\fCrx/s Number of packets received per second
+redir/s Number of packets successfully redirected per second
+err,drop/s Aggregated count of errors per second (including dropped packets when not using the drop command)
+xmit/s Number of packets transmitted on the output device per second
+\fP
+.fi
+.RE
+
+.PP
+Extended output mode displays at most the following fields:
+.RS
+.nf
+\fCFIELD DESCRIPTION
+receive Displays the number of packets received and errors encountered
+
+ Whenever an error or packet drop occurs, details of per CPU error
+ and drop statistics will be expanded inline in terse mode.
+ pkt/s - Packets received per second
+ drop/s - Packets dropped per second
+ error/s - Errors encountered per second
+ redirect - Displays the number of packets successfully redirected
+ Errors encountered are expanded under redirect_err field
+ Note that passing -s to enable it has a per packet overhead
+ redir/s - Packets redirected successfully per second
+
+
+redirect_err Displays the number of packets that failed redirection
+
+ The errno is expanded under this field with per CPU count
+ The recognized errors are:
+ EINVAL: Invalid redirection
+ ENETDOWN: Device being redirected to is down
+ EMSGSIZE: Packet length too large for device
+ EOPNOTSUPP: Operation not supported
+ ENOSPC: No space in ptr_ring of cpumap kthread
+
+ error/s - Packets that failed redirection per second
+
+
+enqueue to cpu N Displays the number of packets enqueued to bulk queue of CPU N
+ Expands to cpu:FROM->N to display enqueue stats for each CPU enqueuing to CPU N
+ Received packets can be associated with the CPU redirect program is enqueuing
+ packets to.
+ pkt/s - Packets enqueued per second from other CPU to CPU N
+ drop/s - Packets dropped when trying to enqueue to CPU N
+ bulk-avg - Average number of packets processed for each event
+
+
+kthread Displays the number of packets processed in CPUMAP kthread for each CPU
+ Packets consumed from ptr_ring in kthread, and its xdp_stats (after calling
+ CPUMAP bpf prog) are expanded below this. xdp_stats are expanded as a total and
+ then per-CPU to associate it to each CPU's pinned CPUMAP kthread.
+ pkt/s - Packets consumed per second from ptr_ring
+ drop/s - Packets dropped per second in kthread
+ sched - Number of times kthread called schedule()
+
+ xdp_stats (also expands to per-CPU counts)
+ pass/s - XDP_PASS count for CPUMAP program execution
+ drop/s - XDP_DROP count for CPUMAP program execution
+ redir/s - XDP_REDIRECT count for CPUMAP program execution
+
+
+xdp_exception Displays xdp_exception tracepoint events
+
+ This can occur due to internal driver errors, unrecognized
+ XDP actions and due to explicit user trigger by use of XDP_ABORTED
+ Each action is expanded below this field with its count
+ hit/s - Number of times the tracepoint was hit per second
+
+
+devmap_xmit Displays devmap_xmit tracepoint events
+
+ This tracepoint is invoked for successful transmissions on output
+ device but these statistics are not available for generic XDP mode,
+ hence they will be omitted from the output when using SKB mode
+ xmit/s - Number of packets that were transmitted per second
+ drop/s - Number of packets that failed transmissions per second
+ drv_err/s - Number of internal driver errors per second
+ bulk-avg - Average number of packets processed for each event
+\fP
+.fi
+.RE
+
+.SH "BUGS"
+.PP
+Please report any bugs on Github: \fIhttps://github.com/xdp-project/xdp-tools/issues\fP
+
+.SH "AUTHOR"
+.PP
+Earlier xdp-redirect tools were written by Jesper Dangaard Brouer and John
+Fastabend. They were then rewritten to support more features by Kumar Kartikeya
+Dwivedi, who also ported them to xdp-tools together with Toke Høiland-Jørgensen.
+This man page was written by Kumar Kartikeya Dwivedi and Toke Høiland-Jørgensen.
diff --git a/xdp-bench/xdp-bench.c b/xdp-bench/xdp-bench.c
new file mode 100644
index 0000000..4a09514
--- /dev/null
+++ b/xdp-bench/xdp-bench.c
@@ -0,0 +1,275 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#include "xdp-bench.h"
+#include "params.h"
+
+#define PROG_NAME "xdp-bench"
+
+int do_help(__unused const void *cfg, __unused const char *pin_root_path)
+{
+ fprintf(stderr,
+ "Usage: xdp-bench COMMAND [options]\n"
+ "\n"
+ "COMMAND can be one of:\n"
+ " drop - Drop all packets on an interface\n"
+ " pass - Pass all packets to the network stack\n"
+ " tx - Transmit packets back out on an interface (hairpin forwarding)\n"
+ " redirect - XDP redirect using the bpf_redirect() helper\n"
+ " redirect-cpu - XDP CPU redirect using BPF_MAP_TYPE_CPUMAP\n"
+ " redirect-map - XDP redirect using BPF_MAP_TYPE_DEVMAP\n"
+ " redirect-multi - XDP multi-redirect using BPF_MAP_TYPE_DEVMAP and the BPF_F_BROADCAST flag\n"
+ " help - show this help message\n"
+ "\n"
+ "Use 'xdp-bench COMMAND --help' to see options for each command\n");
+ return -1;
+}
+
+
+struct enum_val xdp_modes[] = {
+ {"native", XDP_MODE_NATIVE},
+ {"skb", XDP_MODE_SKB},
+ {NULL, 0}
+};
+
+struct enum_val basic_program_modes[] = {
+ {"no-touch", BASIC_NO_TOUCH},
+ {"read-data", BASIC_READ_DATA},
+ {"swap-macs", BASIC_SWAP_MACS},
+ {NULL, 0}
+};
+
+struct enum_val cpumap_remote_actions[] = {
+ {"disabled", ACTION_DISABLED},
+ {"drop", ACTION_DROP},
+ {"pass", ACTION_PASS},
+ {"redirect", ACTION_REDIRECT},
+ {NULL, 0}
+};
+
+struct enum_val cpumap_program_modes[] = {
+ {"no-touch", CPUMAP_NO_TOUCH},
+ {"touch", CPUMAP_TOUCH_DATA},
+ {"round-robin", CPUMAP_CPU_ROUND_ROBIN},
+ {"l4-proto", CPUMAP_CPU_L4_PROTO},
+ {"l4-filter", CPUMAP_CPU_L4_PROTO_FILTER},
+ {"l4-hash", CPUMAP_CPU_L4_HASH},
+ {NULL, 0}
+};
+
+
+struct prog_option basic_options[] = {
+ DEFINE_OPTION("program-mode", OPT_ENUM, struct basic_opts, program_mode,
+ .short_opt = 'p',
+ .metavar = "<mode>",
+ .typearg = basic_program_modes,
+ .help = "Action to take before dropping packet."),
+ DEFINE_OPTION("rxq-stats", OPT_BOOL, struct basic_opts, rxq_stats,
+ .short_opt = 'r',
+ .help = "Collect per-RXQ drop statistics"),
+ DEFINE_OPTION("interval", OPT_U32, struct basic_opts, interval,
+ .short_opt = 'i',
+ .metavar = "<seconds>",
+ .help = "Polling interval (default 2)"),
+ DEFINE_OPTION("extended", OPT_BOOL, struct basic_opts, extended,
+ .short_opt = 'e',
+ .help = "Start running in extended output mode (C^\\ to toggle)"),
+ DEFINE_OPTION("xdp-mode", OPT_ENUM, struct basic_opts, mode,
+ .short_opt = 'm',
+ .typearg = xdp_modes,
+ .metavar = "<mode>",
+ .help = "Load XDP program in <mode>; default native"),
+ DEFINE_OPTION("dev", OPT_IFNAME, struct basic_opts, iface_in,
+ .positional = true,
+ .metavar = "<ifname>",
+ .required = true,
+ .help = "Load on device <ifname>"),
+ END_OPTIONS
+};
+
+struct prog_option redirect_basic_options[] = {
+ DEFINE_OPTION("interval", OPT_U32, struct redirect_opts, interval,
+ .short_opt = 'i',
+ .metavar = "<seconds>",
+ .help = "Polling interval (default 2)"),
+ DEFINE_OPTION("stats", OPT_BOOL, struct redirect_opts, stats,
+ .short_opt = 's',
+ .help = "Enable statistics for transmitted packets (not just errors)"),
+ DEFINE_OPTION("extended", OPT_BOOL, struct redirect_opts, extended,
+ .short_opt = 'e',
+ .help = "Start running in extended output mode (C^\\ to toggle)"),
+ DEFINE_OPTION("mode", OPT_ENUM, struct redirect_opts, mode,
+ .short_opt = 'm',
+ .typearg = xdp_modes,
+ .metavar = "<mode>",
+ .help = "Load XDP program in <mode>; default native"),
+ DEFINE_OPTION("dev_in", OPT_IFNAME, struct redirect_opts, iface_in,
+ .positional = true,
+ .metavar = "<ifname_in>",
+ .required = true,
+ .help = "Redirect from device <ifname>"),
+ DEFINE_OPTION("dev_out", OPT_IFNAME, struct redirect_opts, iface_out,
+ .positional = true,
+ .metavar = "<ifname_out>",
+ .required = true,
+ .help = "Redirect to device <ifname>"),
+ END_OPTIONS
+};
+
+struct prog_option redirect_cpumap_options[] = {
+ DEFINE_OPTION("cpu", OPT_U32_MULTI, struct cpumap_opts, cpus,
+ .short_opt = 'c',
+ .metavar = "<cpu>",
+ .required = true,
+ .help = "Insert CPU <cpu> into CPUMAP (can be specified multiple times)"),
+ DEFINE_OPTION("dev", OPT_IFNAME, struct cpumap_opts, iface_in,
+ .positional = true,
+ .metavar = "<ifname>",
+ .required = true,
+ .help = "Run on <ifname>"),
+ DEFINE_OPTION("program-mode", OPT_ENUM, struct cpumap_opts, program_mode,
+ .short_opt = 'p',
+ .metavar = "<mode>",
+ .typearg = cpumap_program_modes,
+ .help = "Redirect to CPUs using <mode>. Default l4-hash."),
+ DEFINE_OPTION("remote-action", OPT_ENUM, struct cpumap_opts, remote_action,
+ .short_opt = 'r',
+ .metavar = "<action>",
+ .typearg = cpumap_remote_actions,
+ .help = "Perform <action> on the remote CPU. Default disabled."),
+ DEFINE_OPTION("redirect-device", OPT_IFNAME, struct cpumap_opts, redir_iface,
+ .short_opt = 'D',
+ .metavar = "<ifname>",
+ .help = "Redirect packets to <ifname> on remote CPU (when --remote-action is 'redirect')"),
+ DEFINE_OPTION("qsize", OPT_U32, struct cpumap_opts, qsize,
+ .short_opt = 'q',
+ .metavar = "<packets>",
+ .help = "CPUMAP queue size (default 2048)"),
+ DEFINE_OPTION("stress-mode", OPT_BOOL, struct cpumap_opts, stress_mode,
+ .short_opt = 'x',
+ .help = "Stress the kernel CPUMAP setup and teardown code while running"),
+ DEFINE_OPTION("interval", OPT_U32, struct cpumap_opts, interval,
+ .short_opt = 'i',
+ .metavar = "<seconds>",
+ .help = "Polling interval (default 2)"),
+ DEFINE_OPTION("stats", OPT_BOOL, struct cpumap_opts, stats,
+ .short_opt = 's',
+ .help = "Enable statistics for transmitted packets (not just errors)"),
+ DEFINE_OPTION("extended", OPT_BOOL, struct basic_opts, extended,
+ .short_opt = 'e',
+ .help = "Start running in extended output mode (C^\\ to toggle)"),
+ DEFINE_OPTION("xdp-mode", OPT_ENUM, struct cpumap_opts, mode,
+ .short_opt = 'm',
+ .typearg = xdp_modes,
+ .metavar = "<mode>",
+ .help = "Load XDP program in <mode>; default native"),
+ END_OPTIONS
+};
+
+struct prog_option redirect_devmap_options[] = {
+ DEFINE_OPTION("load-egress", OPT_BOOL, struct devmap_opts, load_egress,
+ .short_opt = 'X',
+ .help = "Load an egress program into the devmap"),
+ DEFINE_OPTION("interval", OPT_U32, struct devmap_opts, interval,
+ .short_opt = 'i',
+ .metavar = "<seconds>",
+ .help = "Polling interval (default 2)"),
+ DEFINE_OPTION("stats", OPT_BOOL, struct devmap_opts, stats,
+ .short_opt = 's',
+ .help = "Enable statistics for transmitted packets (not just errors)"),
+ DEFINE_OPTION("extended", OPT_BOOL, struct devmap_opts, extended,
+ .short_opt = 'e',
+ .help = "Start running in extended output mode (C^\\ to toggle)"),
+ DEFINE_OPTION("mode", OPT_ENUM, struct devmap_opts, mode,
+ .short_opt = 'm',
+ .typearg = xdp_modes,
+ .metavar = "<mode>",
+ .help = "Load XDP program in <mode>; default native"),
+ DEFINE_OPTION("dev_in", OPT_IFNAME, struct devmap_opts, iface_in,
+ .positional = true,
+ .metavar = "<ifname_in>",
+ .required = true,
+ .help = "Redirect from device <ifname>"),
+ DEFINE_OPTION("dev_out", OPT_IFNAME, struct devmap_opts, iface_out,
+ .positional = true,
+ .metavar = "<ifname_out>",
+ .required = true,
+ .help = "Redirect to device <ifname>"),
+ END_OPTIONS
+};
+
+struct prog_option redirect_devmap_multi_options[] = {
+ DEFINE_OPTION("load-egress", OPT_BOOL, struct devmap_multi_opts, load_egress,
+ .short_opt = 'X',
+ .help = "Load an egress program into the devmap"),
+ DEFINE_OPTION("interval", OPT_U32, struct devmap_multi_opts, interval,
+ .short_opt = 'i',
+ .metavar = "<seconds>",
+ .help = "Polling interval (default 2)"),
+ DEFINE_OPTION("stats", OPT_BOOL, struct devmap_multi_opts, stats,
+ .short_opt = 's',
+ .help = "Enable statistics for transmitted packets (not just errors)"),
+ DEFINE_OPTION("extended", OPT_BOOL, struct devmap_multi_opts, extended,
+ .short_opt = 'e',
+ .help = "Start running in extended output mode (C^\\ to toggle)"),
+ DEFINE_OPTION("mode", OPT_ENUM, struct devmap_multi_opts, mode,
+ .short_opt = 'm',
+ .typearg = xdp_modes,
+ .metavar = "<mode>",
+ .help = "Load XDP program in <mode>; default native"),
+ DEFINE_OPTION("devs", OPT_IFNAME_MULTI, struct devmap_multi_opts, ifaces,
+ .positional = true,
+ .metavar = "<ifname...>",
+ .min_num = 2,
+ .max_num = MAX_IFACE_NUM,
+ .required = true,
+ .help = "Redirect from and to devices <ifname...>"),
+ END_OPTIONS
+};
+
+static const struct prog_command cmds[] = {
+ { .name = "drop",
+ .func = do_drop,
+ .options = basic_options,
+ .default_cfg = &defaults_drop,
+ .doc = "Drop all packets on an interface" },
+ { .name = "pass",
+ .func = do_pass,
+ .options = basic_options,
+ .default_cfg = &defaults_pass,
+ .doc = "Pass all packets to the network stack" },
+ { .name = "tx",
+ .func = do_tx,
+ .options = basic_options,
+ .default_cfg = &defaults_tx,
+ .doc = "Transmit packets back out an interface (hairpin forwarding)" },
+ DEFINE_COMMAND_NAME("redirect", redirect_basic,
+ "XDP redirect using the bpf_redirect() helper"),
+ DEFINE_COMMAND_NAME("redirect-cpu", redirect_cpumap,
+ "XDP CPU redirect using BPF_MAP_TYPE_CPUMAP"),
+ DEFINE_COMMAND_NAME("redirect-map", redirect_devmap,
+ "XDP redirect using BPF_MAP_TYPE_DEVMAP"),
+ DEFINE_COMMAND_NAME(
+ "redirect-multi", redirect_devmap_multi,
+ "XDP multi-redirect using BPF_MAP_TYPE_DEVMAP and the BPF_F_BROADCAST flag"),
+ { .name = "help", .func = do_help, .no_cfg = true },
+ END_COMMANDS
+};
+
+union all_opts {
+ struct basic_opts basic;
+ struct cpumap_opts cpumap;
+ struct devmap_opts devmap;
+ struct devmap_multi_opts devmap_multi;
+};
+
+int main(int argc, char **argv)
+{
+ if (argc > 1)
+ return dispatch_commands(argv[1], argc - 1, argv + 1, cmds,
+ sizeof(union all_opts), PROG_NAME, false);
+
+ return do_help(NULL, NULL);
+}
diff --git a/xdp-bench/xdp-bench.h b/xdp-bench/xdp-bench.h
new file mode 100644
index 0000000..b163119
--- /dev/null
+++ b/xdp-bench/xdp-bench.h
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef XDP_REDIRECT_H
+#define XDP_REDIRECT_H
+
+#include <xdp/libxdp.h>
+#include "params.h"
+#include "util.h"
+
+#define MAX_IFACE_NUM 32
+
+int do_drop(const void *cfg, const char *pin_root_path);
+int do_pass(const void *cfg, const char *pin_root_path);
+int do_tx(const void *cfg, const char *pin_root_path);
+int do_redirect_basic(const void *cfg, const char *pin_root_path);
+int do_redirect_cpumap(const void *cfg, const char *pin_root_path);
+int do_redirect_devmap(const void *cfg, const char *pin_root_path);
+int do_redirect_devmap_multi(const void *cfg, const char *pin_root_path);
+
+enum basic_program_mode {
+ BASIC_NO_TOUCH,
+ BASIC_READ_DATA,
+ BASIC_SWAP_MACS,
+};
+
+struct basic_opts {
+ bool extended;
+ bool rxq_stats;
+ __u32 interval;
+ enum xdp_attach_mode mode;
+ enum basic_program_mode program_mode;
+ struct iface iface_in;
+};
+
+struct redirect_opts {
+ bool stats;
+ bool extended;
+ __u32 interval;
+ enum xdp_attach_mode mode;
+ struct iface iface_in;
+ struct iface iface_out;
+};
+
+struct devmap_opts {
+ bool stats;
+ bool extended;
+ bool load_egress;
+ __u32 interval;
+ enum xdp_attach_mode mode;
+ struct iface iface_in;
+ struct iface iface_out;
+};
+
+struct devmap_multi_opts {
+ bool stats;
+ bool extended;
+ bool load_egress;
+ __u32 interval;
+ enum xdp_attach_mode mode;
+ struct iface *ifaces;
+};
+
+enum cpumap_remote_action {
+ ACTION_DISABLED,
+ ACTION_DROP,
+ ACTION_PASS,
+ ACTION_REDIRECT,
+};
+
+enum cpumap_program_mode {
+ CPUMAP_NO_TOUCH,
+ CPUMAP_TOUCH_DATA,
+ CPUMAP_CPU_ROUND_ROBIN,
+ CPUMAP_CPU_L4_PROTO,
+ CPUMAP_CPU_L4_PROTO_FILTER,
+ CPUMAP_CPU_L4_HASH,
+};
+
+struct cpumap_opts {
+ bool stats;
+ bool extended;
+ bool stress_mode;
+ __u32 interval;
+ __u32 qsize;
+ struct u32_multi cpus;
+ enum xdp_attach_mode mode;
+ enum cpumap_remote_action remote_action;
+ enum cpumap_program_mode program_mode;
+ struct iface iface_in;
+ struct iface redir_iface;
+};
+
+extern const struct basic_opts defaults_drop;
+extern const struct basic_opts defaults_pass;
+extern const struct basic_opts defaults_tx;
+extern const struct redirect_opts defaults_redirect_basic;
+extern const struct cpumap_opts defaults_redirect_cpumap;
+extern const struct devmap_opts defaults_redirect_devmap;
+extern const struct devmap_multi_opts defaults_redirect_devmap_multi;
+
+#endif
diff --git a/xdp-bench/xdp_basic.bpf.c b/xdp-bench/xdp_basic.bpf.c
new file mode 100644
index 0000000..a803a4b
--- /dev/null
+++ b/xdp-bench/xdp_basic.bpf.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
+*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <bpf/vmlinux.h>
+#include <xdp/xdp_sample_shared.h>
+#include <xdp/xdp_sample.bpf.h>
+#include <xdp/xdp_sample_common.bpf.h>
+#include <xdp/parsing_helpers.h>
+
+const volatile bool read_data = 0;
+const volatile bool swap_macs = 0;
+const volatile bool rxq_stats = 0;
+const volatile enum xdp_action action = XDP_DROP;
+
+SEC("xdp")
+int xdp_basic_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 key = bpf_get_smp_processor_id();
+ struct datarec *rec, *rxq_rec;
+ struct ethhdr *eth = data;
+ __u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ if (rxq_stats) {
+ key = ctx->rx_queue_index;
+ rxq_rec = bpf_map_lookup_elem(&rxq_cnt, &key);
+ if (!rxq_rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rxq_rec->processed);
+ }
+
+ if (read_data) {
+ if (bpf_ntohs(eth->h_proto) < ETH_P_802_3_MIN)
+ return XDP_ABORTED;
+
+ if (swap_macs)
+ swap_src_dst_mac(data);
+ }
+
+ if (action == XDP_DROP) {
+ NO_TEAR_INC(rec->dropped);
+ if (rxq_stats)
+ NO_TEAR_INC(rxq_rec->dropped);
+ }
+
+ return action;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/xdp-bench/xdp_basic.c b/xdp-bench/xdp_basic.c
new file mode 100644
index 0000000..f748c2b
--- /dev/null
+++ b/xdp-bench/xdp_basic.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <assert.h>
+#include <getopt.h>
+#include <libgen.h>
+#include <net/if.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/libbpf.h>
+#include <sys/resource.h>
+#include <linux/if_link.h>
+#include <xdp/libxdp.h>
+
+#include "logging.h"
+
+#include "xdp-bench.h"
+#include "xdp_sample.h"
+#include "xdp_basic.skel.h"
+
+static int mask = SAMPLE_RX_CNT | SAMPLE_EXCEPTION_CNT;
+
+DEFINE_SAMPLE_INIT(xdp_basic);
+
+const struct basic_opts defaults_drop = { .mode = XDP_MODE_NATIVE,
+ .interval = 2 };
+const struct basic_opts defaults_pass = { .mode = XDP_MODE_NATIVE,
+ .interval = 2 };
+const struct basic_opts defaults_tx = { .mode = XDP_MODE_NATIVE,
+ .interval = 2,
+ .program_mode = BASIC_SWAP_MACS };
+
+static int do_basic(const struct basic_opts *opt, enum xdp_action action)
+{
+ DECLARE_LIBBPF_OPTS(xdp_program_opts, opts);
+ struct xdp_program *xdp_prog = NULL;
+ int ret = EXIT_FAIL_OPTION;
+ struct xdp_basic *skel;
+
+ if (opt->extended)
+ sample_switch_mode();
+
+ skel = xdp_basic__open();
+ if (!skel) {
+ pr_warn("Failed to xdp_basic__open: %s\n", strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
+ }
+
+ ret = sample_init_pre_load(skel, opt->iface_in.ifname);
+ if (ret < 0) {
+ pr_warn("Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ skel->rodata->action = action;
+ if (action == XDP_DROP)
+ mask |= SAMPLE_DROP_OK;
+
+ if (opt->program_mode >= BASIC_READ_DATA)
+ skel->rodata->read_data = true;
+ if (opt->program_mode >= BASIC_SWAP_MACS)
+ skel->rodata->swap_macs = true;
+ if (opt->rxq_stats) {
+ skel->rodata->rxq_stats = true;
+ mask |= SAMPLE_RXQ_STATS;
+ }
+
+ opts.obj = skel->obj;
+ opts.prog_name = bpf_program__name(skel->progs.xdp_basic_prog);
+ xdp_prog = xdp_program__create(&opts);
+ if (!xdp_prog) {
+ ret = -errno;
+ pr_warn("Couldn't open XDP program: %s\n",
+ strerror(-ret));
+ goto end_destroy;
+ }
+
+ ret = xdp_program__attach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0);
+ if (ret < 0) {
+ pr_warn("Failed to attach XDP program: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ ret = sample_init(skel, mask, 0, 0);
+ if (ret < 0) {
+ pr_warn("Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_detach;
+ }
+
+ ret = EXIT_FAIL;
+
+ pr_info("%s packets on %s (ifindex %d; driver %s)\n",
+ action == XDP_DROP ? "Dropping" : "Hairpinning (XDP_TX)",
+ opt->iface_in.ifname, opt->iface_in.ifindex, get_driver_name(opt->iface_in.ifindex));
+
+ ret = sample_run(opt->interval, NULL, NULL);
+ if (ret < 0) {
+ pr_warn("Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_detach;
+ }
+ ret = EXIT_OK;
+end_detach:
+ xdp_program__detach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0);
+end_destroy:
+ xdp_basic__destroy(skel);
+end:
+ sample_teardown();
+ return ret;
+}
+
+int do_drop(const void *cfg, __unused const char *pin_root_path)
+{
+ const struct basic_opts *opt = cfg;
+
+ return do_basic(opt, XDP_DROP);
+}
+
+int do_pass(const void *cfg, __unused const char *pin_root_path)
+{
+ const struct basic_opts *opt = cfg;
+
+ return do_basic(opt, XDP_PASS);
+}
+
+int do_tx(const void *cfg, __unused const char *pin_root_path)
+{
+ const struct basic_opts *opt = cfg;
+
+ return do_basic(opt, XDP_TX);
+}
diff --git a/xdp-bench/xdp_redirect_basic.bpf.c b/xdp-bench/xdp_redirect_basic.bpf.c
new file mode 100644
index 0000000..ca2af1f
--- /dev/null
+++ b/xdp-bench/xdp_redirect_basic.bpf.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <bpf/vmlinux.h>
+#include <xdp/xdp_sample_shared.h>
+#include <xdp/xdp_sample.bpf.h>
+#include <xdp/xdp_sample_common.bpf.h>
+#include <linux/if_ether.h>
+
+const volatile int ifindex_out;
+
+SEC("xdp")
+int xdp_redirect_basic_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 key = bpf_get_smp_processor_id();
+ struct ethhdr *eth = data;
+ struct datarec *rec;
+ __u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ swap_src_dst_mac(data);
+ return bpf_redirect(ifindex_out, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/xdp-bench/xdp_redirect_basic.c b/xdp-bench/xdp_redirect_basic.c
new file mode 100644
index 0000000..7d011df
--- /dev/null
+++ b/xdp-bench/xdp_redirect_basic.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com>
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <assert.h>
+#include <getopt.h>
+#include <libgen.h>
+#include <net/if.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/libbpf.h>
+#include <sys/resource.h>
+#include <linux/if_link.h>
+#include <xdp/libxdp.h>
+
+#include "logging.h"
+
+#include "xdp-bench.h"
+#include "xdp_sample.h"
+#include "xdp_redirect_basic.skel.h"
+
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_CNT |
+ SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
+
+DEFINE_SAMPLE_INIT(xdp_redirect_basic);
+
+const struct redirect_opts defaults_redirect_basic = { .mode = XDP_MODE_NATIVE,
+ .interval = 2 };
+
+int do_redirect_basic(const void *cfg, __unused const char *pin_root_path)
+{
+ const struct redirect_opts *opt = cfg;
+
+ struct xdp_program *xdp_prog = NULL, *dummy_prog = NULL;
+ DECLARE_LIBBPF_OPTS(xdp_program_opts, opts);
+ struct xdp_redirect_basic *skel;
+ char str[2 * IF_NAMESIZE + 1];
+ int ret = EXIT_FAIL_OPTION;
+
+ if (opt->extended)
+ sample_switch_mode();
+
+ if (opt->mode == XDP_MODE_SKB)
+ /* devmap_xmit tracepoint not available */
+ mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI);
+
+ if (opt->stats)
+ mask |= SAMPLE_REDIRECT_CNT;
+
+
+ skel = xdp_redirect_basic__open();
+ if (!skel) {
+ pr_warn("Failed to xdp_redirect_basic__open: %s\n", strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
+ }
+
+ ret = sample_init_pre_load(skel, opt->iface_in.ifname);
+ if (ret < 0) {
+ pr_warn("Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ skel->rodata->from_match[0] = opt->iface_in.ifindex;
+ skel->rodata->to_match[0] = opt->iface_out.ifindex;
+ skel->rodata->ifindex_out = opt->iface_out.ifindex;
+
+ opts.obj = skel->obj;
+ opts.prog_name = bpf_program__name(skel->progs.xdp_redirect_basic_prog);
+ xdp_prog = xdp_program__create(&opts);
+ if (!xdp_prog) {
+ ret = -errno;
+ pr_warn("Couldn't open XDP program: %s\n",
+ strerror(-ret));
+ goto end_destroy;
+ }
+
+ ret = xdp_program__attach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0);
+ if (ret < 0) {
+ pr_warn("Failed to attach XDP program: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ ret = sample_init(skel, mask, opt->iface_in.ifindex, opt->iface_out.ifindex);
+ if (ret < 0) {
+ pr_warn("Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_detach;
+ }
+
+ opts.obj = NULL;
+ opts.prog_name = "xdp_pass";
+ opts.find_filename = "xdp-dispatcher.o";
+ dummy_prog = xdp_program__create(&opts);
+ if (!dummy_prog) {
+ pr_warn("Failed to load dummy program: %s\n", strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_detach;
+ }
+
+ ret = xdp_program__attach(dummy_prog, opt->iface_out.ifindex, opt->mode, 0);
+ if (ret < 0) {
+ pr_warn("Failed to attach dummy program: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_detach;
+ }
+
+ ret = EXIT_FAIL;
+
+ safe_strncpy(str, get_driver_name(opt->iface_in.ifindex), sizeof(str));
+ pr_info("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n",
+ opt->iface_in.ifname, opt->iface_in.ifindex, str,
+ opt->iface_out.ifname, opt->iface_out.ifindex, get_driver_name(opt->iface_out.ifindex));
+
+ ret = sample_run(opt->interval, NULL, NULL);
+ if (ret < 0) {
+ pr_warn("Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_detach;
+ }
+ ret = EXIT_OK;
+end_detach:
+ if (dummy_prog)
+ xdp_program__detach(dummy_prog, opt->iface_out.ifindex, opt->mode, 0);
+ xdp_program__detach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0);
+end_destroy:
+ xdp_redirect_basic__destroy(skel);
+end:
+ sample_teardown();
+ return ret;
+}
diff --git a/xdp-bench/xdp_redirect_cpumap.bpf.c b/xdp-bench/xdp_redirect_cpumap.bpf.c
new file mode 100644
index 0000000..bf6acda
--- /dev/null
+++ b/xdp-bench/xdp_redirect_cpumap.bpf.c
@@ -0,0 +1,539 @@
+/* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
+ *
+ * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
+ */
+#include <bpf/vmlinux.h>
+#include <xdp/xdp_sample_shared.h>
+#include <xdp/xdp_sample.bpf.h>
+#include <xdp/xdp_sample_common.bpf.h>
+#include <xdp/parsing_helpers.h>
+#include "hash_func01.h"
+
+/* Special map type that can XDP_REDIRECT frames to another CPU */
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
+} cpu_map SEC(".maps");
+
+/* Set of maps controlling available CPU, and for iterating through
+ * selectable redirect CPUs.
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+} cpus_available SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} cpus_count SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} cpus_iterator SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 1);
+} tx_port SEC(".maps");
+
+char tx_mac_addr[ETH_ALEN];
+
+/* Helper parse functions */
+
+static __always_inline
+bool parse_eth(struct ethhdr *eth, void *data_end,
+ __u16 *eth_proto, __u64 *l3_offset)
+{
+ __u16 eth_type;
+ __u64 offset;
+
+ offset = sizeof(*eth);
+ if ((void *)eth + offset > data_end)
+ return false;
+
+ eth_type = eth->h_proto;
+
+ /* Skip non 802.3 Ethertypes */
+ if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0))
+ return false;
+
+ /* Handle VLAN tagged packet */
+ if (eth_type == bpf_htons(ETH_P_8021Q) ||
+ eth_type == bpf_htons(ETH_P_8021AD)) {
+ struct vlan_hdr *vlan_hdr;
+
+ vlan_hdr = (void *)eth + offset;
+ offset += sizeof(*vlan_hdr);
+ if ((void *)eth + offset > data_end)
+ return false;
+ eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+ }
+ /* Handle double VLAN tagged packet */
+ if (eth_type == bpf_htons(ETH_P_8021Q) ||
+ eth_type == bpf_htons(ETH_P_8021AD)) {
+ struct vlan_hdr *vlan_hdr;
+
+ vlan_hdr = (void *)eth + offset;
+ offset += sizeof(*vlan_hdr);
+ if ((void *)eth + offset > data_end)
+ return false;
+ eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+ }
+
+ *eth_proto = bpf_ntohs(eth_type);
+ *l3_offset = offset;
+ return true;
+}
+
+static __always_inline
+__u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, __u64 nh_off)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct iphdr *iph = data + nh_off;
+ struct udphdr *udph;
+
+ if (iph + 1 > data_end)
+ return 0;
+ if (!(iph->protocol == IPPROTO_UDP))
+ return 0;
+
+ udph = (void *)(iph + 1);
+ if (udph + 1 > data_end)
+ return 0;
+
+ return bpf_ntohs(udph->dest);
+}
+
+static __always_inline
+int get_proto_ipv4(struct xdp_md *ctx, __u64 nh_off)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct iphdr *iph = data + nh_off;
+
+ if (iph + 1 > data_end)
+ return 0;
+ return iph->protocol;
+}
+
+static __always_inline
+int get_proto_ipv6(struct xdp_md *ctx, __u64 nh_off)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ipv6hdr *ip6h = data + nh_off;
+
+ if (ip6h + 1 > data_end)
+ return 0;
+ return ip6h->nexthdr;
+}
+
+SEC("xdp")
+int cpumap_no_touch(struct xdp_md *ctx)
+{
+ __u32 key = bpf_get_smp_processor_id();
+ struct datarec *rec;
+ __u32 *cpu_selected;
+ __u32 cpu_dest = 0;
+ __u32 key0 = 0;
+
+ /* Only use first entry in cpus_available */
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
+ if (!cpu_selected)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_selected;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
+ return XDP_ABORTED;
+ }
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int cpumap_touch_data(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 key = bpf_get_smp_processor_id();
+ struct ethhdr *eth = data;
+ struct datarec *rec;
+ __u32 *cpu_selected;
+ __u32 cpu_dest = 0;
+ __u32 key0 = 0;
+ __u16 eth_type;
+
+ /* Only use first entry in cpus_available */
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
+ if (!cpu_selected)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_selected;
+
+ /* Validate packet length is minimum Eth header size */
+ if (eth + 1 > data_end)
+ return XDP_ABORTED;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ /* Read packet data, and use it (drop non 802.3 Ethertypes) */
+ eth_type = eth->h_proto;
+ if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) {
+ NO_TEAR_INC(rec->dropped);
+ return XDP_DROP;
+ }
+
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
+ return XDP_ABORTED;
+ }
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int cpumap_round_robin(struct xdp_md *ctx)
+{
+ __u32 key = bpf_get_smp_processor_id();
+ struct datarec *rec;
+ __u32 cpu_dest = 0;
+ __u32 key0 = 0;
+
+ __u32 *cpu_selected;
+ __u32 *cpu_iterator;
+ __u32 *cpu_max;
+ __u32 cpu_idx;
+
+ cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
+ if (!cpu_max)
+ return XDP_ABORTED;
+
+ cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
+ if (!cpu_iterator)
+ return XDP_ABORTED;
+ cpu_idx = *cpu_iterator;
+
+ *cpu_iterator += 1;
+ if (*cpu_iterator == *cpu_max)
+ *cpu_iterator = 0;
+
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+ if (!cpu_selected)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_selected;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
+ return XDP_ABORTED;
+ }
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int cpumap_l4_proto(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 key = bpf_get_smp_processor_id();
+ struct ethhdr *eth = data;
+ __u8 ip_proto = IPPROTO_UDP;
+ struct datarec *rec;
+ __u16 eth_proto = 0;
+ __u64 l3_offset = 0;
+ __u32 cpu_dest = 0;
+ __u32 *cpu_lookup;
+ __u32 cpu_idx = 0;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+ return XDP_PASS; /* Just skip */
+
+ /* Extract L4 protocol */
+ switch (eth_proto) {
+ case ETH_P_IP:
+ ip_proto = get_proto_ipv4(ctx, l3_offset);
+ break;
+ case ETH_P_IPV6:
+ ip_proto = get_proto_ipv6(ctx, l3_offset);
+ break;
+ case ETH_P_ARP:
+ cpu_idx = 0; /* ARP packet handled on separate CPU */
+ break;
+ default:
+ cpu_idx = 0;
+ }
+
+ /* Choose CPU based on L4 protocol */
+ switch (ip_proto) {
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ cpu_idx = 2;
+ break;
+ case IPPROTO_TCP:
+ cpu_idx = 0;
+ break;
+ case IPPROTO_UDP:
+ cpu_idx = 1;
+ break;
+ default:
+ cpu_idx = 0;
+ }
+
+ cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+ if (!cpu_lookup)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_lookup;
+
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
+ return XDP_ABORTED;
+ }
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int cpumap_l4_filter(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 key = bpf_get_smp_processor_id();
+ struct ethhdr *eth = data;
+ __u8 ip_proto = IPPROTO_UDP;
+ struct datarec *rec;
+ __u16 eth_proto = 0;
+ __u64 l3_offset = 0;
+ __u32 cpu_dest = 0;
+ __u32 *cpu_lookup;
+ __u32 cpu_idx = 0;
+ __u16 dest_port;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+ return XDP_PASS; /* Just skip */
+
+ /* Extract L4 protocol */
+ switch (eth_proto) {
+ case ETH_P_IP:
+ ip_proto = get_proto_ipv4(ctx, l3_offset);
+ break;
+ case ETH_P_IPV6:
+ ip_proto = get_proto_ipv6(ctx, l3_offset);
+ break;
+ case ETH_P_ARP:
+ cpu_idx = 0; /* ARP packet handled on separate CPU */
+ break;
+ default:
+ cpu_idx = 0;
+ }
+
+ /* Choose CPU based on L4 protocol */
+ switch (ip_proto) {
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ cpu_idx = 2;
+ break;
+ case IPPROTO_TCP:
+ cpu_idx = 0;
+ break;
+ case IPPROTO_UDP:
+ cpu_idx = 1;
+ /* DDoS filter UDP port 9 (pktgen) */
+ dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
+ if (dest_port == 9) {
+ NO_TEAR_INC(rec->dropped);
+ return XDP_DROP;
+ }
+ break;
+ default:
+ cpu_idx = 0;
+ }
+
+ cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+ if (!cpu_lookup)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_lookup;
+
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
+ return XDP_ABORTED;
+ }
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+/* Hashing initval */
+#define INITVAL 15485863
+
+static __always_inline
+__u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, __u64 nh_off)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct iphdr *iph = data + nh_off;
+ __u32 cpu_hash;
+
+ if (iph + 1 > data_end)
+ return 0;
+
+ cpu_hash = iph->saddr + iph->daddr;
+ cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
+
+ return cpu_hash;
+}
+
+static __always_inline
+__u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, __u64 nh_off)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ipv6hdr *ip6h = data + nh_off;
+ __u32 cpu_hash;
+
+ if (ip6h + 1 > data_end)
+ return 0;
+
+ cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0];
+ cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1];
+ cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2];
+ cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3];
+ cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
+
+ return cpu_hash;
+}
+
+/* Load-Balance traffic based on hashing IP-addrs + L4-proto. The
+ * hashing scheme is symmetric, meaning swapping IP src/dest still hit
+ * same CPU.
+ */
+SEC("xdp")
+int cpumap_l4_hash(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 key = bpf_get_smp_processor_id();
+ struct ethhdr *eth = data;
+ struct datarec *rec;
+ __u16 eth_proto = 0;
+ __u64 l3_offset = 0;
+ __u32 cpu_dest = 0;
+ __u32 cpu_idx = 0;
+ __u32 *cpu_lookup;
+ __u32 key0 = 0;
+ __u32 *cpu_max;
+ __u32 cpu_hash;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
+ if (!cpu_max)
+ return XDP_ABORTED;
+
+ if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+ return XDP_PASS; /* Just skip */
+
+ /* Hash for IPv4 and IPv6 */
+ switch (eth_proto) {
+ case ETH_P_IP:
+ cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
+ break;
+ case ETH_P_IPV6:
+ cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
+ break;
+ case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
+ default:
+ cpu_hash = 0;
+ }
+
+ /* Choose CPU based on hash */
+ cpu_idx = cpu_hash % *cpu_max;
+
+ cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+ if (!cpu_lookup)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_lookup;
+
+ if (cpu_dest >= nr_cpus) {
+ NO_TEAR_INC(rec->issue);
+ return XDP_ABORTED;
+ }
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp/cpumap")
+int cpumap_redirect(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ __u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ swap_src_dst_mac(data);
+ return bpf_redirect_map(&tx_port, 0, 0);
+}
+
+SEC("xdp/cpumap")
+int cpumap_pass(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp/cpumap")
+int cpumap_drop(struct xdp_md *ctx)
+{
+ return XDP_DROP;
+}
+
+SEC("xdp/devmap")
+int redirect_egress_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ __u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/xdp-bench/xdp_redirect_cpumap.c b/xdp-bench/xdp_redirect_cpumap.c
new file mode 100644
index 0000000..07eee87
--- /dev/null
+++ b/xdp-bench/xdp_redirect_cpumap.c
@@ -0,0 +1,354 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
+ */
+
+#include <time.h>
+#include <errno.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <locale.h>
+#include <net/if.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <stdbool.h>
+#include <arpa/inet.h>
+#include <bpf/libbpf.h>
+#include <sys/sysinfo.h>
+#include <linux/limits.h>
+#include <sys/resource.h>
+#include <linux/if_link.h>
+#include <xdp/libxdp.h>
+
+#include "logging.h"
+
+#include "xdp-bench.h"
+#include "xdp_sample.h"
+#include "xdp_redirect_cpumap.skel.h"
+
+static int map_fd;
+static int avail_fd;
+static int count_fd;
+
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+ SAMPLE_CPUMAP_ENQUEUE_CNT | SAMPLE_CPUMAP_KTHREAD_CNT |
+ SAMPLE_EXCEPTION_CNT;
+
+const struct cpumap_opts defaults_redirect_cpumap = {
+ .mode = XDP_MODE_NATIVE,
+ .interval = 2,
+ .qsize = 2048,
+ .program_mode = CPUMAP_CPU_L4_HASH,
+};
+
+static const char *cpumap_prog_names[] = {
+ "cpumap_no_touch",
+ "cpumap_touch_data",
+ "cpumap_round_robin",
+ "cpumap_l4_proto",
+ "cpumap_l4_filter",
+ "cpumap_l4_hash",
+};
+
+DEFINE_SAMPLE_INIT(xdp_redirect_cpumap);
+
+static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
+ __u32 avail_idx, bool new)
+{
+ __u32 curr_cpus_count = 0;
+ __u32 key = 0;
+ int ret;
+
+ /* Add a CPU entry to cpumap, as this allocate a cpu entry in
+ * the kernel for the cpu.
+ */
+ ret = bpf_map_update_elem(map_fd, &cpu, value, 0);
+ if (ret < 0) {
+ pr_warn("Create CPU entry failed: %s\n", strerror(errno));
+ return ret;
+ }
+
+ /* Inform bpf_prog's that a new CPU is available to select
+ * from via some control maps.
+ */
+ ret = bpf_map_update_elem(avail_fd, &avail_idx, &cpu, 0);
+ if (ret < 0) {
+ pr_warn("Add to avail CPUs failed: %s\n", strerror(errno));
+ return ret;
+ }
+
+ /* When not replacing/updating existing entry, bump the count */
+ ret = bpf_map_lookup_elem(count_fd, &key, &curr_cpus_count);
+ if (ret < 0) {
+ pr_warn("Failed reading curr cpus_count: %s\n",
+ strerror(errno));
+ return ret;
+ }
+ if (new) {
+ curr_cpus_count++;
+ ret = bpf_map_update_elem(count_fd, &key,
+ &curr_cpus_count, 0);
+ if (ret < 0) {
+ pr_warn("Failed write curr cpus_count: %s\n",
+ strerror(errno));
+ return ret;
+ }
+ }
+
+ pr_debug("%s CPU: %u as idx: %u qsize: %d cpumap_prog_fd: %d (cpus_count: %u)\n",
+ new ? "Add new" : "Replace", cpu, avail_idx,
+ value->qsize, value->bpf_prog.fd, curr_cpus_count);
+
+ return 0;
+}
+
+/* CPUs are zero-indexed. Thus, add a special sentinel default value
+ * in map cpus_available to mark CPU index'es not configured
+ */
+static int mark_cpus_unavailable(void)
+{
+ int ret, i, n_cpus = libbpf_num_possible_cpus();
+ __u32 invalid_cpu = n_cpus;
+
+ for (i = 0; i < n_cpus; i++) {
+ ret = bpf_map_update_elem(avail_fd, &i,
+ &invalid_cpu, 0);
+ if (ret < 0) {
+ pr_warn("Failed marking CPU unavailable: %s\n",
+ strerror(errno));
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/* Stress cpumap management code by concurrently changing underlying cpumap */
+static void stress_cpumap(void *ctx)
+{
+ struct bpf_cpumap_val *value = ctx;
+
+ /* Changing qsize will cause kernel to free and alloc a new
+ * bpf_cpu_map_entry, with an associated/complicated tear-down
+ * procedure.
+ */
+ value->qsize = 1024;
+ create_cpu_entry(1, value, 0, false);
+ value->qsize = 8;
+ create_cpu_entry(1, value, 0, false);
+ value->qsize = 16000;
+ create_cpu_entry(1, value, 0, false);
+}
+
+static int set_cpumap_prog(struct xdp_redirect_cpumap *skel,
+ enum cpumap_remote_action action,
+ const struct iface *redir_iface)
+{
+ struct bpf_devmap_val val = {};
+ __u32 key = 0;
+ int err;
+
+ switch (action) {
+ case ACTION_DISABLED:
+ return 0;
+ case ACTION_DROP:
+ return bpf_program__fd(skel->progs.cpumap_drop);
+ case ACTION_PASS:
+ return bpf_program__fd(skel->progs.cpumap_pass);
+ case ACTION_REDIRECT:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!redir_iface->ifindex) {
+ pr_warn("Must specify redirect device when using --remote-action 'redirect'\n");
+ return -EINVAL;
+ }
+
+ if (get_mac_addr(redir_iface->ifindex, skel->bss->tx_mac_addr) < 0) {
+ pr_warn("Couldn't get MAC address for interface %s\n", redir_iface->ifname);
+ return -EINVAL;
+ }
+
+ val.ifindex = redir_iface->ifindex;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.redirect_egress_prog);
+
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.tx_port), &key, &val, 0);
+ if (err < 0)
+ return -errno;
+
+ return bpf_program__fd(skel->progs.cpumap_redirect);
+}
+
+int do_redirect_cpumap(const void *cfg, __unused const char *pin_root_path)
+{
+ const struct cpumap_opts *opt = cfg;
+
+ DECLARE_LIBBPF_OPTS(xdp_program_opts, opts);
+ struct xdp_program *xdp_prog = NULL;
+ struct xdp_redirect_cpumap *skel;
+ struct bpf_program *prog = NULL;
+ struct bpf_map_info info = {};
+ struct bpf_cpumap_val value;
+ __u32 infosz = sizeof(info);
+ int ret = EXIT_FAIL_OPTION;
+ int n_cpus, fd;
+ size_t i;
+
+ if (opt->extended)
+ sample_switch_mode();
+
+ if (opt->stats)
+ mask |= SAMPLE_REDIRECT_MAP_CNT;
+
+ if (opt->redir_iface.ifindex)
+ mask |= SAMPLE_DEVMAP_XMIT_CNT_MULTI;
+
+
+ n_cpus = libbpf_num_possible_cpus();
+
+ /* Notice: Choosing the queue size is very important when CPU is
+ * configured with power-saving states.
+ *
+ * If deepest state take 133 usec to wakeup from (133/10^6). When link
+ * speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can
+ * arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) =
+ * 166250 bytes. With MTU size packets this is 110 packets, and with
+ * minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets.
+ *
+ * Setting default cpumap queue to 2048 as worst-case (small packet)
+ * should be +64 packet due kthread wakeup call (due to xdp_do_flush)
+ * worst-case is 2043 packets.
+ *
+ * Sysadm can configured system to avoid deep-sleep via:
+ * tuned-adm profile network-latency
+ */
+
+
+ skel = xdp_redirect_cpumap__open();
+ if (!skel) {
+ pr_warn("Failed to xdp_redirect_cpumap__open: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
+ }
+
+ /* Make sure we only load the one XDP program we are interested in */
+ while ((prog = bpf_object__next_program(skel->obj, prog)) != NULL)
+ if (bpf_program__type(prog) == BPF_PROG_TYPE_XDP &&
+ bpf_program__expected_attach_type(prog) == BPF_XDP)
+ bpf_program__set_autoload(prog, false);
+
+ prog = bpf_object__find_program_by_name(skel->obj,
+ cpumap_prog_names[opt->program_mode]);
+ if (!prog) {
+ pr_warn("Failed to find program '%s'\n",
+ cpumap_prog_names[opt->program_mode]);
+ goto end_destroy;
+ }
+
+ ret = sample_init_pre_load(skel, opt->iface_in.ifname);
+ if (ret < 0) {
+ pr_warn("Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ if (bpf_map__set_max_entries(skel->maps.cpu_map, n_cpus) < 0) {
+ pr_warn("Failed to set max entries for cpu_map map: %s",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ if (bpf_map__set_max_entries(skel->maps.cpus_available, n_cpus) < 0) {
+ pr_warn("Failed to set max entries for cpus_available map: %s",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ ret = EXIT_FAIL_OPTION;
+
+ skel->rodata->from_match[0] = opt->iface_in.ifindex;
+ if (opt->redir_iface.ifindex)
+ skel->rodata->to_match[0] = opt->redir_iface.ifindex;
+
+ opts.obj = skel->obj;
+ opts.prog_name = bpf_program__name(prog);
+ xdp_prog = xdp_program__create(&opts);
+ if (!xdp_prog) {
+ ret = -errno;
+ pr_warn("Couldn't open XDP program: %s\n",
+ strerror(-ret));
+ goto end_destroy;
+ }
+
+ ret = xdp_program__attach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0);
+ if (ret < 0) {
+ pr_warn("Failed to attach XDP program: %s\n",
+ strerror(-ret));
+ goto end_destroy;
+ }
+
+ ret = bpf_obj_get_info_by_fd(bpf_map__fd(skel->maps.cpu_map), &info, &infosz);
+ if (ret < 0) {
+ pr_warn("Failed bpf_obj_get_info_by_fd for cpumap: %s\n",
+ strerror(errno));
+ goto end_detach;
+ }
+
+ skel->bss->cpumap_map_id = info.id;
+
+ map_fd = bpf_map__fd(skel->maps.cpu_map);
+ avail_fd = bpf_map__fd(skel->maps.cpus_available);
+ count_fd = bpf_map__fd(skel->maps.cpus_count);
+
+ ret = mark_cpus_unavailable();
+ if (ret < 0) {
+ pr_warn("Unable to mark CPUs as unavailable\n");
+ goto end_detach;
+ }
+
+ ret = sample_init(skel, mask, opt->iface_in.ifindex, 0);
+ if (ret < 0) {
+ pr_warn("Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_detach;
+ }
+
+ fd = set_cpumap_prog(skel, opt->remote_action, &opt->redir_iface);
+ if (fd < 0) {
+ ret = EXIT_FAIL_BPF;
+ goto end_detach;
+ }
+ value.qsize = opt->qsize;
+ value.bpf_prog.fd = fd;
+
+ for (i = 0; i < opt->cpus.num_vals; i++) {
+ if (create_cpu_entry(opt->cpus.vals[i], &value, i, true) < 0) {
+ pr_warn("Cannot proceed, exiting\n");
+ ret = EXIT_FAIL;
+ goto end_detach;
+ }
+ }
+
+ ret = sample_run(opt->interval, opt->stress_mode ? stress_cpumap : NULL, &value);
+ if (ret < 0) {
+ pr_warn("Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_detach;
+ }
+ ret = EXIT_OK;
+end_detach:
+ xdp_program__detach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0);
+end_destroy:
+ xdp_program__close(xdp_prog);
+ xdp_redirect_cpumap__destroy(skel);
+end:
+ sample_teardown();
+ return ret;
+}
diff --git a/xdp-bench/xdp_redirect_devmap.bpf.c b/xdp-bench/xdp_redirect_devmap.bpf.c
new file mode 100644
index 0000000..0212e82
--- /dev/null
+++ b/xdp-bench/xdp_redirect_devmap.bpf.c
@@ -0,0 +1,88 @@
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <bpf/vmlinux.h>
+#include <xdp/xdp_sample_shared.h>
+#include <xdp/xdp_sample.bpf.h>
+#include <xdp/xdp_sample_common.bpf.h>
+#include <xdp/parsing_helpers.h>
+
+/* The 2nd xdp prog on egress does not support skb mode, so we define two
+ * maps, tx_port_general and tx_port_native.
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, 1);
+} tx_port_general SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 1);
+} tx_port_native SEC(".maps");
+
+/* store egress interface mac address */
+const volatile char tx_mac_addr[ETH_ALEN];
+
+static __always_inline int xdp_redirect_devmap(struct xdp_md *ctx, void *redirect_map)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 key = bpf_get_smp_processor_id();
+ struct ethhdr *eth = data;
+ struct datarec *rec;
+ __u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+ swap_src_dst_mac(data);
+ return bpf_redirect_map(redirect_map, 0, 0);
+}
+
+SEC("xdp")
+int redir_devmap_general(struct xdp_md *ctx)
+{
+ return xdp_redirect_devmap(ctx, &tx_port_general);
+}
+
+SEC("xdp")
+int redir_devmap_native(struct xdp_md *ctx)
+{
+ return xdp_redirect_devmap(ctx, &tx_port_native);
+}
+
+SEC("xdp/devmap")
+int xdp_redirect_devmap_egress(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ __u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/xdp-bench/xdp_redirect_devmap.c b/xdp-bench/xdp_redirect_devmap.c
new file mode 100644
index 0000000..eca35f5
--- /dev/null
+++ b/xdp-bench/xdp_redirect_devmap.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
+ */
+#include <errno.h>
+#include <stdio.h>
+#include <assert.h>
+#include <getopt.h>
+#include <libgen.h>
+#include <net/if.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/libbpf.h>
+#include <xdp/libxdp.h>
+#include <linux/if_link.h>
+
+#include "logging.h"
+
+#include "xdp-bench.h"
+#include "xdp_sample.h"
+#include "xdp_redirect_devmap.skel.h"
+
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+ SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI;
+
+DEFINE_SAMPLE_INIT(xdp_redirect_devmap);
+
+const struct devmap_opts defaults_redirect_devmap = { .mode = XDP_MODE_NATIVE,
+ .interval = 2 };
+
+int do_redirect_devmap(const void *cfg, __unused const char *pin_root_path)
+{
+ const struct devmap_opts *opt = cfg;
+
+ struct xdp_program *xdp_prog = NULL, *dummy_prog = NULL;
+ const char *prog_name = "redir_devmap_native";
+ DECLARE_LIBBPF_OPTS(xdp_program_opts, opts);
+ struct bpf_devmap_val devmap_val = {};
+ struct bpf_map *tx_port_map = NULL;
+ struct xdp_redirect_devmap *skel;
+ struct bpf_program *prog = NULL;
+ char str[2 * IF_NAMESIZE + 1];
+ int ret = EXIT_FAIL_OPTION;
+ bool tried = false;
+ int key = 0;
+
+ if (opt->extended)
+ sample_switch_mode();
+
+ if (opt->mode == XDP_MODE_SKB)
+ /* devmap_xmit tracepoint not available */
+ mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI);
+
+ if (opt->stats)
+ mask |= SAMPLE_REDIRECT_CNT;
+
+restart:
+ skel = xdp_redirect_devmap__open();
+ if (!skel) {
+ pr_warn("Failed to xdp_redirect_devmap__open: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
+ }
+
+ /* Make sure we only load the one XDP program we are interested in */
+ while ((prog = bpf_object__next_program(skel->obj, prog)) != NULL)
+ if (bpf_program__type(prog) == BPF_PROG_TYPE_XDP &&
+ bpf_program__expected_attach_type(prog) == BPF_XDP)
+ bpf_program__set_autoload(prog, false);
+
+ if (tried) {
+ tx_port_map = skel->maps.tx_port_general;
+ bpf_program__set_autoload(skel->progs.xdp_redirect_devmap_egress, false);
+#ifdef HAVE_LIBBPF_BPF_MAP__SET_AUTOCREATE
+ bpf_map__set_autocreate(skel->maps.tx_port_native, false);
+#else
+ pr_warn("Libbpf is missing bpf_map__set_autocreate(), fallback won't work\n");
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+#endif
+ } else {
+#ifdef HAVE_LIBBPF_BPF_MAP__SET_AUTOCREATE
+ bpf_map__set_autocreate(skel->maps.tx_port_general, false);
+#endif
+ tx_port_map = skel->maps.tx_port_native;
+ }
+
+ ret = sample_init_pre_load(skel, opt->iface_in.ifname);
+ if (ret < 0) {
+ pr_warn("Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ /* Load 2nd xdp prog on egress. */
+ if (opt->load_egress) {
+ ret = get_mac_addr(opt->iface_out.ifindex, skel->rodata->tx_mac_addr);
+ if (ret < 0) {
+ pr_warn("Failed to get interface %s mac address: %s\n",
+ opt->iface_out.ifname, strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+ }
+
+ skel->rodata->from_match[0] = opt->iface_in.ifindex;
+ skel->rodata->to_match[0] = opt->iface_out.ifindex;
+
+ opts.obj = skel->obj;
+ opts.prog_name = prog_name;
+ xdp_prog = xdp_program__create(&opts);
+ if (!xdp_prog) {
+ ret = -errno;
+ pr_warn("Couldn't open XDP program: %s\n",
+ strerror(-ret));
+ goto end_destroy;
+ }
+
+ ret = xdp_program__attach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0);
+ if (ret < 0) {
+ /* First try with struct bpf_devmap_val as value for generic
+ * mode, then fallback to sizeof(int) for older kernels.
+ */
+ if (!opt->load_egress && !tried) {
+ pr_warn("Attempting fallback to int-sized devmap\n");
+ prog_name = "redir_devmap_general";
+ tried = true;
+
+ xdp_program__close(xdp_prog);
+ xdp_redirect_devmap__destroy(skel);
+ sample_teardown();
+ xdp_prog = NULL;
+ goto restart;
+ }
+ pr_warn("Failed to attach XDP program: %s\n",
+ strerror(-ret));
+ ret = EXIT_FAIL_XDP;
+ goto end_destroy;
+ }
+
+ ret = sample_init(skel, mask, opt->iface_in.ifindex, opt->iface_out.ifindex);
+ if (ret < 0) {
+ pr_warn("Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_detach;
+ }
+
+ opts.obj = NULL;
+ opts.prog_name = "xdp_pass";
+ opts.find_filename = "xdp-dispatcher.o";
+ dummy_prog = xdp_program__create(&opts);
+ if (!dummy_prog) {
+ pr_warn("Failed to load dummy program: %s\n", strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_detach;
+ }
+
+ ret = xdp_program__attach(dummy_prog, opt->iface_out.ifindex, opt->mode, 0);
+ if (ret < 0) {
+ pr_warn("Failed to attach dummy program: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_detach;
+ }
+
+ devmap_val.ifindex = opt->iface_out.ifindex;
+ if (opt->load_egress)
+ devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_devmap_egress);
+ ret = bpf_map_update_elem(bpf_map__fd(tx_port_map), &key, &devmap_val, 0);
+ if (ret < 0) {
+ pr_warn("Failed to update devmap value: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_detach;
+ }
+
+ ret = EXIT_FAIL;
+
+ safe_strncpy(str, get_driver_name(opt->iface_in.ifindex), sizeof(str));
+ pr_info("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n",
+ opt->iface_in.ifname, opt->iface_in.ifindex, str,
+ opt->iface_out.ifname, opt->iface_out.ifindex, get_driver_name(opt->iface_out.ifindex));
+
+ ret = sample_run(opt->interval, NULL, NULL);
+ if (ret < 0) {
+ pr_warn("Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_destroy;
+ }
+ ret = EXIT_OK;
+end_detach:
+ if (dummy_prog)
+ xdp_program__detach(dummy_prog, opt->iface_out.ifindex, opt->mode, 0);
+ xdp_program__detach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0);
+end_destroy:
+ xdp_program__close(xdp_prog);
+ xdp_program__close(dummy_prog);
+ xdp_redirect_devmap__destroy(skel);
+end:
+ sample_teardown();
+ return ret;
+}
diff --git a/xdp-bench/xdp_redirect_devmap_multi.bpf.c b/xdp-bench/xdp_redirect_devmap_multi.bpf.c
new file mode 100644
index 0000000..3e69783
--- /dev/null
+++ b/xdp-bench/xdp_redirect_devmap_multi.bpf.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/vmlinux.h>
+#include <xdp/xdp_sample_shared.h>
+#include <xdp/xdp_sample.bpf.h>
+#include <xdp/xdp_sample_common.bpf.h>
+#include <xdp/parsing_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, 32);
+} forward_map_general SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 32);
+} forward_map_native SEC(".maps");
+
+/* map to store egress interfaces mac addresses */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __be64);
+ __uint(max_entries, 32);
+} mac_map SEC(".maps");
+
+static int xdp_redirect_devmap_multi(struct xdp_md *ctx, void *forward_map)
+{
+ __u32 key = bpf_get_smp_processor_id();
+ struct datarec *rec;
+
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_PASS;
+ NO_TEAR_INC(rec->processed);
+
+ return bpf_redirect_map(forward_map, 0,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+SEC("xdp")
+int redir_multi_general(struct xdp_md *ctx)
+{
+ return xdp_redirect_devmap_multi(ctx, &forward_map_general);
+}
+
+SEC("xdp")
+int redir_multi_native(struct xdp_md *ctx)
+{
+ return xdp_redirect_devmap_multi(ctx, &forward_map_native);
+}
+
+SEC("xdp/devmap")
+int xdp_devmap_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 key = ctx->egress_ifindex;
+ struct ethhdr *eth = data;
+ __be64 *mac;
+ __u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ mac = bpf_map_lookup_elem(&mac_map, &key);
+ if (mac)
+ __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/xdp-bench/xdp_redirect_devmap_multi.c b/xdp-bench/xdp_redirect_devmap_multi.c
new file mode 100644
index 0000000..f3e9b3c
--- /dev/null
+++ b/xdp-bench/xdp_redirect_devmap_multi.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdio.h>
+#include <assert.h>
+#include <getopt.h>
+#include <libgen.h>
+#include <net/if.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <linux/bpf.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <bpf/libbpf.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/resource.h>
+#include <linux/if_link.h>
+#include <linux/if_ether.h>
+#include <xdp/libxdp.h>
+
+#include "logging.h"
+
+#include "xdp_sample.h"
+#include "xdp-bench.h"
+#include "xdp_redirect_devmap_multi.skel.h"
+
+static int ifaces[MAX_IFACE_NUM] = {};
+
+static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
+ SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_SKIP_HEADING;
+
+DEFINE_SAMPLE_INIT(xdp_redirect_devmap_multi);
+
+static int update_mac_map(struct bpf_map *map)
+{
+ int mac_map_fd = bpf_map__fd(map);
+ unsigned char mac_addr[6];
+ unsigned int ifindex;
+ int i, ret = -1;
+
+ for (i = 0; ifaces[i] > 0; i++) {
+ ifindex = ifaces[i];
+
+ ret = get_mac_addr(ifindex, mac_addr);
+ if (ret < 0) {
+ pr_warn("get interface %d mac failed\n",
+ ifindex);
+ return ret;
+ }
+
+ ret = bpf_map_update_elem(mac_map_fd, &ifindex, mac_addr, 0);
+ if (ret < 0) {
+ pr_warn("Failed to update mac address for ifindex %d\n",
+ ifindex);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+const struct devmap_multi_opts defaults_redirect_devmap_multi = { .mode = XDP_MODE_NATIVE,
+ .interval = 2 };
+
+
+int do_redirect_devmap_multi(const void *cfg, __unused const char *pin_root_path)
+{
+ const struct devmap_multi_opts *opt = cfg;
+
+ const char *prog_name = "redir_multi_native";
+ DECLARE_LIBBPF_OPTS(xdp_program_opts, opts);
+ struct xdp_redirect_devmap_multi *skel;
+ struct bpf_devmap_val devmap_val = {};
+ struct xdp_program *xdp_prog = NULL;
+ struct bpf_map *forward_map = NULL;
+ bool first = true, tried = false;
+ struct bpf_program *prog = NULL;
+ int ret = EXIT_FAIL_OPTION;
+ struct iface *iface;
+ int i;
+
+ if (opt->extended)
+ sample_switch_mode();
+
+ if (opt->mode == XDP_MODE_SKB)
+ /* devmap_xmit tracepoint not available */
+ mask &= ~(SAMPLE_DEVMAP_XMIT_CNT |
+ SAMPLE_DEVMAP_XMIT_CNT_MULTI);
+
+ if (opt->stats)
+ mask |= SAMPLE_REDIRECT_CNT;
+
+restart:
+ skel = xdp_redirect_devmap_multi__open();
+ if (!skel) {
+ pr_warn("Failed to xdp_redirect_devmap_multi__open: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end;
+ }
+
+ /* Make sure we only load the one XDP program we are interested in */
+ while ((prog = bpf_object__next_program(skel->obj, prog)) != NULL)
+ if (bpf_program__type(prog) == BPF_PROG_TYPE_XDP &&
+ bpf_program__expected_attach_type(prog) == BPF_XDP)
+ bpf_program__set_autoload(prog, false);
+
+ if (tried) {
+ forward_map = skel->maps.forward_map_general;
+ bpf_program__set_autoload(skel->progs.xdp_devmap_prog, false);
+#ifdef HAVE_LIBBPF_BPF_MAP__SET_AUTOCREATE
+ bpf_map__set_autocreate(skel->maps.forward_map_native, false);
+#else
+ pr_warn("Libbpf is missing bpf_map__set_autocreate(), fallback won't work\n");
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+#endif
+ } else {
+#ifdef HAVE_LIBBPF_BPF_MAP__SET_AUTOCREATE
+ bpf_map__set_autocreate(skel->maps.forward_map_general, false);
+#endif
+ forward_map = skel->maps.forward_map_native;
+ }
+
+ ret = sample_init_pre_load(skel, NULL);
+ if (ret < 0) {
+ pr_warn("Failed to sample_init_pre_load: %s\n", strerror(-ret));
+ ret = EXIT_FAIL_BPF;
+ goto end_destroy;
+ }
+
+ ret = EXIT_FAIL_OPTION;
+ /* opt parsing enforces num <= MAX_IFACES_NUM */
+ for (i = 0, iface = opt->ifaces; iface; i++, iface = iface->next) {
+ skel->rodata->from_match[i] = iface->ifindex;
+ skel->rodata->to_match[i] = iface->ifindex;
+ }
+
+
+ opts.obj = skel->obj;
+ opts.prog_name = prog_name;
+ xdp_prog = xdp_program__create(&opts);
+ if (!xdp_prog) {
+ ret = -errno;
+ pr_warn("Couldn't open XDP program: %s\n",
+ strerror(-ret));
+ goto end_destroy;
+ }
+
+ for (iface = opt->ifaces; iface; iface = iface->next) {
+ pr_debug("Loading program on interface %s\n", iface->ifname);
+
+ ret = xdp_program__attach(xdp_prog, iface->ifindex, opt->mode, 0);
+ if (ret) {
+ if (first) {
+ if (!opt->load_egress && !tried) {
+ pr_warn("Attempting fallback to int-sized devmap\n");
+ prog_name = "redir_multi_general";
+ tried = true;
+
+ xdp_program__close(xdp_prog);
+ xdp_redirect_devmap_multi__destroy(skel);
+ sample_teardown();
+ xdp_prog = NULL;
+ goto restart;
+ }
+ pr_warn("Failed to attach XDP program to iface %s: %s\n",
+ iface->ifname, strerror(-ret));
+ goto end_destroy;
+ }
+ pr_warn("Failed to attach XDP program to iface %s: %s\n",
+ iface->ifname, strerror(-ret));
+ goto end_detach;
+ }
+
+ /* Add all the interfaces to forward group and attach
+ * egress devmap program if exist
+ */
+ devmap_val.ifindex = iface->ifindex;
+ if (opt->load_egress)
+ devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_devmap_prog);
+ ret = bpf_map_update_elem(bpf_map__fd(forward_map), &iface->ifindex, &devmap_val, 0);
+ if (ret < 0) {
+ pr_warn("Failed to update devmap value: %s\n",
+ strerror(errno));
+ ret = EXIT_FAIL_BPF;
+ goto end_detach;
+ }
+
+ first = false;
+ }
+
+ if (opt->load_egress) {
+ /* Update mac_map with all egress interfaces' mac addr */
+ if (update_mac_map(skel->maps.mac_map) < 0) {
+ pr_warn("Updating mac address failed\n");
+ ret = EXIT_FAIL;
+ goto end_detach;
+ }
+ }
+
+ ret = sample_init(skel, mask, 0, 0);
+ if (ret < 0) {
+ pr_warn("Failed to initialize sample: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_detach;
+ }
+
+ ret = sample_run(opt->interval, NULL, NULL);
+ if (ret < 0) {
+ pr_warn("Failed during sample run: %s\n", strerror(-ret));
+ ret = EXIT_FAIL;
+ goto end_detach;
+ }
+ ret = EXIT_OK;
+end_detach:
+ for (iface = opt->ifaces; iface; iface = iface->next)
+ xdp_program__detach(xdp_prog, iface->ifindex, opt->mode, 0);
+end_destroy:
+ xdp_program__close(xdp_prog);
+ xdp_redirect_devmap_multi__destroy(skel);
+end:
+ sample_teardown();
+ return ret;
+}