diff options
Diffstat (limited to 'xdp-bench')
-rw-r--r-- | xdp-bench/.gitignore | 1 | ||||
-rw-r--r-- | xdp-bench/Makefile | 21 | ||||
-rw-r--r-- | xdp-bench/README.org | 570 | ||||
-rw-r--r-- | xdp-bench/hash_func01.h | 55 | ||||
-rw-r--r-- | xdp-bench/tests/test-xdp-bench.sh | 126 | ||||
-rw-r--r-- | xdp-bench/xdp-bench.8 | 696 | ||||
-rw-r--r-- | xdp-bench/xdp-bench.c | 275 | ||||
-rw-r--r-- | xdp-bench/xdp-bench.h | 100 | ||||
-rw-r--r-- | xdp-bench/xdp_basic.bpf.c | 68 | ||||
-rw-r--r-- | xdp-bench/xdp_basic.c | 142 | ||||
-rw-r--r-- | xdp-bench/xdp_redirect_basic.bpf.c | 44 | ||||
-rw-r--r-- | xdp-bench/xdp_redirect_basic.c | 140 | ||||
-rw-r--r-- | xdp-bench/xdp_redirect_cpumap.bpf.c | 539 | ||||
-rw-r--r-- | xdp-bench/xdp_redirect_cpumap.c | 354 | ||||
-rw-r--r-- | xdp-bench/xdp_redirect_devmap.bpf.c | 88 | ||||
-rw-r--r-- | xdp-bench/xdp_redirect_devmap.c | 207 | ||||
-rw-r--r-- | xdp-bench/xdp_redirect_devmap_multi.bpf.c | 77 | ||||
-rw-r--r-- | xdp-bench/xdp_redirect_devmap_multi.c | 230 |
18 files changed, 3733 insertions, 0 deletions
diff --git a/xdp-bench/.gitignore b/xdp-bench/.gitignore new file mode 100644 index 0000000..7c3bfd3 --- /dev/null +++ b/xdp-bench/.gitignore @@ -0,0 +1 @@ +xdp-bench diff --git a/xdp-bench/Makefile b/xdp-bench/Makefile new file mode 100644 index 0000000..5936277 --- /dev/null +++ b/xdp-bench/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) + +XDP_TARGETS := xdp_redirect_basic.bpf xdp_redirect_cpumap.bpf xdp_redirect_devmap.bpf \ + xdp_redirect_devmap_multi.bpf xdp_basic.bpf +BPF_SKEL_TARGETS := $(XDP_TARGETS) + +# Don't install skeleton object files +XDP_OBJ_INSTALL := + +TOOL_NAME := xdp-bench +MAN_PAGE := xdp-bench.8 +TEST_FILE := tests/test-xdp-bench.sh +USER_TARGETS := xdp-bench +USER_EXTRA_C := xdp_redirect_basic.c xdp_redirect_cpumap.c xdp_redirect_devmap.c \ + xdp_redirect_devmap_multi.c xdp_basic.c +EXTRA_USER_DEPS := xdp-bench.h + +LIB_DIR = ../lib + +include $(LIB_DIR)/common.mk + diff --git a/xdp-bench/README.org b/xdp-bench/README.org new file mode 100644 index 0000000..068013c --- /dev/null +++ b/xdp-bench/README.org @@ -0,0 +1,570 @@ +#+EXPORT_FILE_NAME: xdp-bench +#+TITLE: xdp-bench +#+MAN_CLASS_OPTIONS: :section-id "8\" \"DATE\" \"VERSION\" \"A simple XDP benchmarking tool" +# This file serves both as a README on github, and as the source for the man +# page; the latter through the org-mode man page export support. +# . +# To export the man page, simply use the org-mode exporter; (require 'ox-man) if +# it's not available. There's also a Makefile rule to export it. + +* XDP-bench - a simple XDP benchmarking tool + +XDP-bench is a benchmarking utility for exercising the different operation modes +of XDP. It is intended to be a simple program demonstrating the various +operating modes; these include dropping packets, hairpin forwarding (using the +=XDP_TX= return code), and redirection using the various in-kernel packet +redirection facilities. + +The drop and TX modes support various options to control whether packet data is +touched (read or written) before being dropped or transmitted. The redirection +modes support using the simple ifindex-based =bpf_redirect= helper, the +=bpf_redirect_map= helper using a cpumap as its target, =bpf_redirect_map= using +a devmap as its target, and the devmap's broadcast mode which allows redirecting +to multiple devices. + +There is more information on the meaning of the output in both default (terse) +and extended output mode, in the *Output Format Description* section below. + +** Running xdp-bench +The syntax for running xdp-bench is: + +#+begin_src sh +Usage: xdp-bench COMMAND [options] + +COMMAND can be one of: + drop - Drop all packets on an interface + tx - Transmit packets back out on an interface (hairpin forwarding) + redirect - XDP redirect using the bpf_redirect() helper + redirect-cpu - XDP CPU redirect using BPF_MAP_TYPE_CPUMAP + redirect-map - XDP redirect using BPF_MAP_TYPE_DEVMAP + redirect-multi - XDP multi-redirect using BPF_MAP_TYPE_DEVMAP and the BPF_F_BROADCAST flag +#+end_src + +Each command, and its options are explained below. Or use =xdp-bench COMMAND +--help= to see the options for each command. + +* The DROP command +In this mode, =xdp-bench= installs an XDP program on an interface that simply +drops all packets. There are options to control what to do with the packet +before dropping it (touch the packet data or not), as well as which statistics +to gather. This is a basic benchmark for the baseline (best-case) performance of +XDP on an interface. + +The syntax for the =drop= command is: + +=xdp-bench drop [options] <ifname>= + +Where =<ifname>= is the name of the interface the XDP program should be +installed on. + +The supported options are: + +** -p, --packet-operation <ACTION> +Specify which operation should be taken on the packet before dropping it. The +following actions are available: + +#+begin_src sh + no-touch - Drop the packet without touching the packet data + touch - Read a field in the packet header before dropping + swap-macs - Swap the source and destination MAC addresses before dropping +#+end_src + +Whether to touch the packet before dropping it can have a significant +performance impact as this requires bringing packet data into the CPU cache (and +flushing it back out if writing). + +The default for this option is =no-touch=. + +** -r, --rxq-stats +If set, the XDP program will also gather statistics on which receive queue index +each packet was received on. This is displayed in the extended output mode along +with per-CPU data (which, depending on the hardware configuration may or may not +be equivalent). + +** -i, --interval <SECONDS> +Set the polling interval for collecting all statistics and displaying them to +the output. The unit of interval is in seconds. + +** -e, --extended +Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in +"terse" mode. The output mode can be switched by hitting C-\ while the program +is running. See also the *Output Format Description* section below. + +** -m, --mode +Selects the XDP program mode (native or skb). Note that native XDP mode is the +default, and loading the redirect program in skb manner is neither performant, +nor recommended. However, this option is useful if the interface driver lacks +native XDP support, or when simply testing the tool. + +** -v, --verbose +Enable verbose logging. Supply twice to enable verbose logging from the +underlying =libxdp= and =libbpf= libraries. + +** --version +Show the application version and exit. + +** -h, --help +Display a summary of the available options + +* The PASS command +In this mode, =xdp-bench= installs an XDP program on an interface that passes +all packets to the network stack after processing them (returning =XDP_PASS=). +There are options to control what to do with the packet before passing it +(touch the packet data or not), as well as which statistics to gather. This is a +basic benchmark for the overhead of installing an XDP program on an interface +while still running the regular network stack. + +The syntax for the =pass= command is: + +=xdp-bench pass [options] <ifname>= + +Where =<ifname>= is the name of the interface the XDP program should be +installed on. + +The supported options are: + +** -p, --packet-operation <ACTION> +Specify which operation should be taken on the packet before passing it. The +following actions are available: + +#+begin_src sh + no-touch - Pass the packet without touching the packet data + touch - Read a field in the packet header before passing + swap-macs - Swap the source and destination MAC addresses before passing +#+end_src + +The default for this option is =no-touch=. + +** -r, --rxq-stats +If set, the XDP program will also gather statistics on which receive queue index +each packet was received on. This is displayed in the extended output mode along +with per-CPU data (which, depending on the hardware configuration may or may not +be equivalent). + +** -i, --interval <SECONDS> +Set the polling interval for collecting all statistics and displaying them to +the output. The unit of interval is in seconds. + +** -e, --extended +Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in +"terse" mode. The output mode can be switched by hitting C-\ while the program +is running. See also the *Output Format Description* section below. + +** -m, --mode +Selects the XDP program mode (native or skb). Note that native XDP mode is the +default, and loading the redirect program in skb manner is neither performant, +nor recommended. However, this option is useful if the interface driver lacks +native XDP support, or when simply testing the tool. + +** -v, --verbose +Enable verbose logging. Supply twice to enable verbose logging from the +underlying =libxdp= and =libbpf= libraries. + +** --version +Show the application version and exit. + +** -h, --help +Display a summary of the available options + +* The TX command +In this mode, =xdp-bench= installs an XDP program on an interface that performs +so-called "hairpin forwarding", which means each packet is transmitted back out +the same interface (using the =XDP_TX= return code).. There are options to +control what to do with the packet before transmitting it (touch the packet data +or not), as well as which statistics to gather. + +The syntax for the =tx= command is: + +=xdp-bench tx [options] <ifname>= + +Where =<ifname>= is the name of the interface the XDP program should be +installed on. + +The supported options are: + +** -p, --packet-operation <ACTION> +Specify which operation should be taken on the packet before transmitting it. The +following actions are available: + +#+begin_src sh + no-touch - Transmit the packet without touching the packet data + touch - Read a field in the packet header before transmitting + swap-macs - Swap the source and destination MAC addresses before transmitting +#+end_src + +To allow the packet to be successfully transmitted back to the sender, the MAC +addresses have to be swapped, so that the source MAC matches the network device. +However, there is a performance overhead in doing swapping, so this option +allows this function to be turned off. + +The default for this option is =swap-macs=. + +** -r, --rxq-stats +If set, the XDP program will also gather statistics on which receive queue index +each packet was received on. This is displayed in the extended output mode along +with per-CPU data (which, depending on the hardware configuration may or may not +be equivalent). + +** -i, --interval <SECONDS> +Set the polling interval for collecting all statistics and displaying them to +the output. The unit of interval is in seconds. + +** -e, --extended +Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in +"terse" mode. The output mode can be switched by hitting C-\ while the program +is running. See also the *Output Format Description* section below. + +** -m, --mode +Selects the XDP program mode (native or skb). Note that native XDP mode is the +default, and loading the redirect program in skb manner is neither performant, +nor recommended. However, this option is useful if the interface driver lacks +native XDP support, or when simply testing the tool. + +** -v, --verbose +Enable verbose logging. Supply twice to enable verbose logging from the +underlying =libxdp= and =libbpf= libraries. + +** --version +Show the application version and exit. + +** -h, --help +Display a summary of the available options + +* The REDIRECT command +In this mode, =xdp-bench= sets up packet redirection between the two +interfaces supplied on the command line using the =bpf_redirect= BPF helper +triggered on packet reception on the ingress interface. + +The syntax for the =redirect= command is: + +=xdp-bench redirect [options] <ifname_in> <ifname_out>= + +Where =<ifname_in>= is the name of the input interface from where packets will +be redirect to the output interface =<ifname_out>=. + +The supported options are: + +** -i, --interval <SECONDS> +Set the polling interval for collecting all statistics and displaying them to +the output. The unit of interval is in seconds. + +** -s, --stats +Enable statistics for successful redirection. This option comes with a per +packet tracing overhead, for recording all successful redirections. + +** -e, --extended +Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in +"terse" mode. The output mode can be switched by hitting C-\ while the program +is running. See also the *Output Format Description* section below. + +** -m, --mode +Selects the XDP program mode (native or skb). Note that native XDP mode is the +default, and loading the redirect program in skb manner is neither performant, +nor recommended. However, this option is useful if the interface driver lacks +native XDP support, or when simply testing the tool. + +** -v, --verbose +Enable verbose logging. Supply twice to enable verbose logging from the +underlying =libxdp= and =libbpf= libraries. + +** --version +Show the application version and exit. + +** -h, --help +Display a summary of the available options + +* The REDIRECT-CPU command +In this mode, =xdp-bench= sets up packet redirection using the +=bpf_redirect_map= BPF helper triggered on packet reception on the ingress +interface, using a cpumap as its target. Hence, this tool can be used to +redirect packets on an interface from one CPU to another. In addition to this, +the tool then supports redirecting the packet to another output device when it +is processed on the target CPU. + +The syntax for the =redirect-cpu= command is: + +=xdp-bench redirect-cpu [options] <ifname> -c 0 ... -c N= + +Where =<ifname>= is the name of the input interface from where packets will be +redirect to the target CPU list specified using =-c=. + +The supported options are: + +** -c, --cpu <CPU> +Specify a possible target CPU index. This option must be passed at least once, +and can be passed multiple times to specify a list of CPUs. Which CPU is chosen +for a given packet depends on the value of the =--program-mode= option, +described below. + +** -p, --program-mode <MODE> +Specify a program that embeds a predefined policy deciding how packets are +redirected to different CPUs. The following options are available: + +#+begin_src sh + no-touch - Redirect without touching packet data + touch - Read packet data before redirecting + round-robin - Cycle between target CPUs in a round-robin fashion (for each packet) + l4-proto - Choose the target CPU based on the layer-4 protocol of packet + l4-filter - Like l4-proto, but drop UDP packets with destination port 9 (used by pktgen) + l4-hash - Use source and destination IP hashing to pick target CPU +#+end_src + +The =no-touch= and =touch= modes always redirect packets to the same CPU (the +first value supplied to =--cpu=). The =round-robin= and =l4-hash= modes +distribute packets between all the CPUs supplied as =--cpu= arguments, while +=l4-proto= and =l4-filter= send TCP and unrecognised packets to CPU index 0, UDP +packets to CPU index 1 and ICMP packets to CPU index 2 (where the index refers +to the order the actual CPUs are given on the command line). + +The default for this option is =l4-hash=. + +** -r --remote-action <ACTION> +If this option is set, a separate program is installed into the cpumap, which +will be invoked on the remote CPU after the packet is processed there. The +action can be either =drop= or =pass= which will drop the packet or pass it to +the regular networking stack, respectively. Or it can be =redirect=, which will +cause the packet to be redirected to another interface and transmitted out that +interface on the remote CPU. If this option is set to =redirect= the target +device must be specified using =--redirect-device=. + +The default for this option is =disabled=. + +** -r, --redirect-device <IFNAME> +Specify the device to redirect the packet to when it is received on the target CPU. +Note that this option can only be specified with =--remote-action redirect=. + +** -q, --qsize <PACKETS> +Set the queue size for the per-CPU cpumap ring buffer used for redirecting +packets from multiple CPUs to one CPU. The default value is 2048 packets. + +** -x, --stress-mode +Stress the cpumap implementation by deallocating and reallocating the cpumap +ring buffer on each polling interval. + +** -i, --interval <SECONDS> +Set the polling interval for collecting all statistics and displaying them to +the output. The unit of interval is in seconds. + +** -s, --stats +Enable statistics for successful redirection. This option comes with a per +packet tracing overhead, for recording all successful redirections. + +** -e, --extended +Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in +"terse" mode. The output mode can be switched by hitting C-\ while the program +is running. See also the *Output Format Description* section below. + +** -m, --mode +Selects the XDP program mode (native or skb). Note that native XDP mode is the +default, and loading the redirect program in skb manner is neither performant, +nor recommended. However, this option is useful if the interface driver lacks +native XDP support, or when simply testing the tool. + +** -v, --verbose +Enable verbose logging. Supply twice to enable verbose logging from the +underlying =libxdp= and =libbpf= libraries. + +** --version +Show the application version and exit. + +** -h, --help +Display a summary of the available options + +* The REDIRECT-MAP command +In this mode, =xdp-bench= sets up packet redirection between two interfaces +supplied on the command line using the =bpf_redirect_map()= BPF helper triggered +on packet reception on the ingress interface, using a devmap as its target. + +The syntax for the =redirect-map= command is: + +=xdp-bench redirect-map [options] <ifname_in> <ifname_out>= + +Where =<ifname_in>= is the name of the input interface from where packets will +be redirect to the output interface =<ifname_out>=. + +The supported options are: + +** -X, --load-egress +Load a program in the devmap entry used for redirection, so that it is invoked +after the packet is redirected to the target device, before it is transmitted +out of the output interface. The remote program will update the packet data so +its source MAC address matches the one of the destination interface. + +** -i, --interval <SECONDS> +Set the polling interval for collecting all statistics and displaying them to +the output. The unit of interval is in seconds. + +** -s, --stats +Enable statistics for successful redirection. This option comes with a per +packet tracing overhead, for recording all successful redirections. + +** -e, --extended +Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in +"terse" mode. The output mode can be switched by hitting C-\ while the program +is running. See also the *Output Format Description* section below. + +** -m, --mode +Selects the XDP program mode (native or skb). Note that native XDP mode is the +default, and loading the redirect program in skb manner is neither performant, +nor recommended. However, this option is useful if the interface driver lacks +native XDP support, or when simply testing the tool. + +** -v, --verbose +Enable verbose logging. Supply twice to enable verbose logging from the +underlying =libxdp= and =libbpf= libraries. + +** --version +Show the application version and exit. + +** -h, --help +Display a summary of the available options + +* The REDIRECT-MULTI command +In this mode, =xdp-bench= sets up one-to-many packet redirection between +interfaces supplied on the command line, using the =bpf_redirect_map= BPF helper +triggered on packet reception on the ingress interface, using a devmap as its +target. The packet is broadcast to all output interfaces specified on the +command line, using devmap's packet broadcast feature. + +The syntax for the =redirect-multi= command is: + +=xdp-bench redirect-multi [options] <ifname_in> <ifname_out1> ... <ifname_outN>= + +Where =<ifname_in>= is the name of the input interface from where packets will +be redirect to one or many output interface(s). + +The supported options are: + +** -X, --load-egress +Load a program in the devmap entry used for redirection, so that it is invoked +after the packet is redirected to the target device, before it is transmitted +out of the output interface. The remote program will update the packet data so +its source MAC address matches the one of the destination interface. + +** -i, --interval <SECONDS> +Set the polling interval for collecting all statistics and displaying them to +the output. The unit of interval is in seconds. + +** -s, --stats +Enable statistics for successful redirection. This option comes with a per +packet tracing overhead, for recording all successful redirections. + +** -e, --extended +Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in +"terse" mode. The output mode can be switched by hitting C-\ while the program +is running. See also the *Output Format Description* section below. + +** -m, --mode +Selects the XDP program mode (native or skb). Note that native XDP mode is the +default, and loading the redirect program in skb manner is neither performant, +nor recommended. However, this option is useful if the interface driver lacks +native XDP support, or when simply testing the tool. + +** -v, --verbose +Enable verbose logging. Supply twice to enable verbose logging from the +underlying =libxdp= and =libbpf= libraries. + +** --version +Show the application version and exit. + +** -h, --help +Display a summary of the available options + + +* Output Format Description + +By default, redirect success statistics are disabled, use =--stats= to enable. +The terse output mode is default, extended output mode can be activated using +the =--extended= command line option. + +SIGQUIT (Ctrl + \\) can be used to switch the mode dynamically at runtime. + +Terse mode displays at most the following fields: +#+begin_src sh + rx/s Number of packets received per second + redir/s Number of packets successfully redirected per second + err,drop/s Aggregated count of errors per second (including dropped packets when not using the drop command) + xmit/s Number of packets transmitted on the output device per second +#+end_src + +Extended output mode displays at most the following fields: +#+begin_src sh + FIELD DESCRIPTION + receive Displays the number of packets received and errors encountered + + Whenever an error or packet drop occurs, details of per CPU error + and drop statistics will be expanded inline in terse mode. + pkt/s - Packets received per second + drop/s - Packets dropped per second + error/s - Errors encountered per second + redirect - Displays the number of packets successfully redirected + Errors encountered are expanded under redirect_err field + Note that passing -s to enable it has a per packet overhead + redir/s - Packets redirected successfully per second + + + redirect_err Displays the number of packets that failed redirection + + The errno is expanded under this field with per CPU count + The recognized errors are: + EINVAL: Invalid redirection + ENETDOWN: Device being redirected to is down + EMSGSIZE: Packet length too large for device + EOPNOTSUPP: Operation not supported + ENOSPC: No space in ptr_ring of cpumap kthread + + error/s - Packets that failed redirection per second + + + enqueue to cpu N Displays the number of packets enqueued to bulk queue of CPU N + Expands to cpu:FROM->N to display enqueue stats for each CPU enqueuing to CPU N + Received packets can be associated with the CPU redirect program is enqueuing + packets to. + pkt/s - Packets enqueued per second from other CPU to CPU N + drop/s - Packets dropped when trying to enqueue to CPU N + bulk-avg - Average number of packets processed for each event + + + kthread Displays the number of packets processed in CPUMAP kthread for each CPU + Packets consumed from ptr_ring in kthread, and its xdp_stats (after calling + CPUMAP bpf prog) are expanded below this. xdp_stats are expanded as a total and + then per-CPU to associate it to each CPU's pinned CPUMAP kthread. + pkt/s - Packets consumed per second from ptr_ring + drop/s - Packets dropped per second in kthread + sched - Number of times kthread called schedule() + + xdp_stats (also expands to per-CPU counts) + pass/s - XDP_PASS count for CPUMAP program execution + drop/s - XDP_DROP count for CPUMAP program execution + redir/s - XDP_REDIRECT count for CPUMAP program execution + + + xdp_exception Displays xdp_exception tracepoint events + + This can occur due to internal driver errors, unrecognized + XDP actions and due to explicit user trigger by use of XDP_ABORTED + Each action is expanded below this field with its count + hit/s - Number of times the tracepoint was hit per second + + + devmap_xmit Displays devmap_xmit tracepoint events + + This tracepoint is invoked for successful transmissions on output + device but these statistics are not available for generic XDP mode, + hence they will be omitted from the output when using SKB mode + xmit/s - Number of packets that were transmitted per second + drop/s - Number of packets that failed transmissions per second + drv_err/s - Number of internal driver errors per second + bulk-avg - Average number of packets processed for each event +#+end_src + +* BUGS + +Please report any bugs on Github: https://github.com/xdp-project/xdp-tools/issues + +* AUTHOR + +Earlier xdp-redirect tools were written by Jesper Dangaard Brouer and John +Fastabend. They were then rewritten to support more features by Kumar Kartikeya +Dwivedi, who also ported them to xdp-tools together with Toke Høiland-Jørgensen. +This man page was written by Kumar Kartikeya Dwivedi and Toke Høiland-Jørgensen. diff --git a/xdp-bench/hash_func01.h b/xdp-bench/hash_func01.h new file mode 100644 index 0000000..ac96bc3 --- /dev/null +++ b/xdp-bench/hash_func01.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: LGPL-2.1 + * + * Based on Paul Hsieh's (LGPG 2.1) hash function + * From: http://www.azillionmonkeys.com/qed/hash.html + */ + +#define get16bits(d) (*((const __u16 *) (d))) + +static __always_inline +__u32 SuperFastHash(const char *data, int len, __u32 initval) { + __u32 hash = initval; + __u32 tmp; + int rem; + + if (len <= 0 || data == NULL) return 0; + + rem = len & 3; + len >>= 2; + + /* Main loop */ +#pragma clang loop unroll(full) + for (;len > 0; len--) { + hash += get16bits (data); + tmp = (get16bits (data+2) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + data += 2*sizeof (__u16); + hash += hash >> 11; + } + + /* Handle end cases */ + switch (rem) { + case 3: hash += get16bits (data); + hash ^= hash << 16; + hash ^= ((signed char)data[sizeof (__u16)]) << 18; + hash += hash >> 11; + break; + case 2: hash += get16bits (data); + hash ^= hash << 11; + hash += hash >> 17; + break; + case 1: hash += (signed char)*data; + hash ^= hash << 10; + hash += hash >> 1; + } + + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; + + return hash; +} diff --git a/xdp-bench/tests/test-xdp-bench.sh b/xdp-bench/tests/test-xdp-bench.sh new file mode 100644 index 0000000..7dfc357 --- /dev/null +++ b/xdp-bench/tests/test-xdp-bench.sh @@ -0,0 +1,126 @@ +XDP_LOADER=${XDP_LOADER:-./xdp-loader} +XDP_BENCH=${XDP_BENCH:-./xdp-bench} +ALL_TESTS="test_drop test_pass test_tx test_rxq_stats test_redirect test_redirect_cpu test_redirect_map test_redirect_map_egress test_redirect_multi test_redirect_multi_egress" + +test_basic() +{ + action=$1 + + export XDP_SAMPLE_IMMEDIATE_EXIT=1 + check_run $XDP_BENCH $action $NS -vv + check_run $XDP_BENCH $action $NS -p read-data -vv + check_run $XDP_BENCH $action $NS -p swap-macs -vv + check_run $XDP_BENCH $action $NS -m skb -vv + check_run $XDP_BENCH $action $NS -e -vv +} + +test_drop() +{ + test_basic drop +} +test_pass() +{ + test_basic pass +} +test_tx() +{ + test_basic tx +} + +test_rxq_stats() +{ + skip_if_missing_veth_rxq + + export XDP_SAMPLE_IMMEDIATE_EXIT=1 + check_run $XDP_BENCH drop $NS -r -vv +} + +test_redirect() +{ + export XDP_SAMPLE_IMMEDIATE_EXIT=1 + check_run ip link add dev btest0 type veth peer name btest1 + check_run $XDP_BENCH redirect btest0 btest1 -vv + check_run $XDP_BENCH redirect btest0 btest1 -s -vv + check_run $XDP_BENCH redirect btest0 btest1 -m skb -vv + check_run $XDP_BENCH redirect btest0 btest1 -e -vv + ip link del dev btest0 +} + +test_redirect_cpu() +{ + skip_if_missing_cpumap_attach + + export XDP_SAMPLE_IMMEDIATE_EXIT=1 + check_run ip link add dev btest0 type veth peer name btest1 + check_run $XDP_BENCH redirect-cpu btest0 -c 0 -vv + check_run $XDP_BENCH redirect-cpu btest0 -c 0 -m skb -vv + check_run $XDP_BENCH redirect-cpu btest0 -c 0 -p touch -vv + check_run $XDP_BENCH redirect-cpu btest0 -c 0 -p round-robin -vv + check_run $XDP_BENCH redirect-cpu btest0 -c 0 -p l4-proto -vv + check_run $XDP_BENCH redirect-cpu btest0 -c 0 -p l4-filter -vv + check_run $XDP_BENCH redirect-cpu btest0 -c 0 -p l4-hash -vv + + is_progmap_supported || export LIBXDP_SKIP_DISPATCHER=1 + check_run $XDP_BENCH redirect-cpu btest0 -c 0 -r drop -vv + check_run $XDP_BENCH redirect-cpu btest0 -c 0 -r pass -vv + check_run $XDP_BENCH redirect-cpu btest0 -c 0 -r redirect -D btest1 -vv + ip link del dev btest0 +} + +test_redirect_map() +{ + export XDP_SAMPLE_IMMEDIATE_EXIT=1 + check_run ip link add dev btest0 type veth peer name btest1 + check_run $XDP_BENCH redirect-map btest0 btest1 -vv + check_run $XDP_BENCH redirect-map btest0 btest1 -s -vv + check_run $XDP_BENCH redirect-map btest0 btest1 -m skb -vv + check_run $XDP_BENCH redirect-map btest0 btest1 -e -vv + ip link del dev btest0 +} + +test_redirect_map_egress() +{ + skip_if_missing_cpumap_attach + + export XDP_SAMPLE_IMMEDIATE_EXIT=1 + check_run ip link add dev btest0 type veth peer name btest1 + is_progmap_supported || export LIBXDP_SKIP_DISPATCHER=1 + check_run $XDP_BENCH redirect-map btest0 btest1 -X -vv + ip link del dev btest0 +} + +test_redirect_multi() +{ + export XDP_SAMPLE_IMMEDIATE_EXIT=1 + check_run ip link add dev btest0 type veth peer name btest1 + check_run ip link add dev btest2 type veth peer name btest3 + check_run $XDP_BENCH redirect-multi btest0 btest1 btest2 btest3 -vv + check_run $XDP_BENCH redirect-multi btest0 btest1 btest2 btest3 -s -vv + check_run $XDP_BENCH redirect-multi btest0 btest1 btest2 btest3 -m skb -vv + check_run $XDP_BENCH redirect-multi btest0 btest1 btest2 btest3 -e -vv + ip link del dev btest0 + ip link del dev btest2 +} + +test_redirect_multi_egress() +{ + skip_if_missing_cpumap_attach + + export XDP_SAMPLE_IMMEDIATE_EXIT=1 + is_progmap_supported || export LIBXDP_SKIP_DISPATCHER=1 + check_run ip link add dev btest0 type veth peer name btest1 + check_run ip link add dev btest2 type veth peer name btest3 + + check_run $XDP_BENCH redirect-multi btest0 btest1 btest2 btest3 -X -vv + + ip link del dev btest0 + ip link del dev btest2 +} + +cleanup_tests() +{ + ip link del dev btest0 >/dev/null 2>&1 + ip link del dev btest2 >/dev/null 2>&1 + $XDP_LOADER unload $NS --all >/dev/null 2>&1 + $XDP_LOADER clean >/dev/null 2>&1 +} diff --git a/xdp-bench/xdp-bench.8 b/xdp-bench/xdp-bench.8 new file mode 100644 index 0000000..c89dd1b --- /dev/null +++ b/xdp-bench/xdp-bench.8 @@ -0,0 +1,696 @@ +.TH "xdp-bench" "8" "FEBRUARY 4, 2023" "V1.3.1" "A simple XDP benchmarking tool" + +.SH "NAME" +XDP-bench \- a simple XDP benchmarking tool +.SH "SYNOPSIS" +.PP +XDP-bench is a benchmarking utility for exercising the different operation modes +of XDP. It is intended to be a simple program demonstrating the various +operating modes; these include dropping packets, hairpin forwarding (using the +\fIXDP_TX\fP return code), and redirection using the various in-kernel packet +redirection facilities. + +.PP +The drop and TX modes support various options to control whether packet data is +touched (read or written) before being dropped or transmitted. The redirection +modes support using the simple ifindex-based \fIbpf_redirect\fP helper, the +\fIbpf_redirect_map\fP helper using a cpumap as its target, \fIbpf_redirect_map\fP using +a devmap as its target, and the devmap's broadcast mode which allows redirecting +to multiple devices. + +.PP +There is more information on the meaning of the output in both default (terse) +and extended output mode, in the \fBOutput Format Description\fP section below. + +.SS "Running xdp-bench" +.PP +The syntax for running xdp-bench is: + +.RS +.nf +\fCUsage: xdp-bench COMMAND [options] + +COMMAND can be one of: + drop - Drop all packets on an interface + tx - Transmit packets back out on an interface (hairpin forwarding) + redirect - XDP redirect using the bpf_redirect() helper + redirect-cpu - XDP CPU redirect using BPF_MAP_TYPE_CPUMAP + redirect-map - XDP redirect using BPF_MAP_TYPE_DEVMAP + redirect-multi - XDP multi-redirect using BPF_MAP_TYPE_DEVMAP and the BPF_F_BROADCAST flag +\fP +.fi +.RE + +.PP +Each command, and its options are explained below. Or use \fIxdp\-bench COMMAND +\-\-help\fP to see the options for each command. + +.SH "The DROP command" +.PP +In this mode, \fIxdp\-bench\fP installs an XDP program on an interface that simply +drops all packets. There are options to control what to do with the packet +before dropping it (touch the packet data or not), as well as which statistics +to gather. This is a basic benchmark for the baseline (best-case) performance of +XDP on an interface. + +.PP +The syntax for the \fIdrop\fP command is: + +.PP +\fIxdp\-bench drop [options] <ifname>\fP + +.PP +Where \fI<ifname>\fP is the name of the interface the XDP program should be +installed on. + +.PP +The supported options are: + +.SS "-p, --packet-operation <ACTION>" +.PP +Specify which operation should be taken on the packet before dropping it. The +following actions are available: + +.RS +.nf +\fCno-touch - Drop the packet without touching the packet data +touch - Read a field in the packet header before dropping +swap-macs - Swap the source and destination MAC addresses before dropping +\fP +.fi +.RE + +.PP +Whether to touch the packet before dropping it can have a significant +performance impact as this requires bringing packet data into the CPU cache (and +flushing it back out if writing). + +.PP +The default for this option is \fIno\-touch\fP. + +.SS "-r, --rxq-stats" +.PP +If set, the XDP program will also gather statistics on which receive queue index +each packet was received on. This is displayed in the extended output mode along +with per-CPU data (which, depending on the hardware configuration may or may not +be equivalent). + +.SS "-i, --interval <SECONDS>" +.PP +Set the polling interval for collecting all statistics and displaying them to +the output. The unit of interval is in seconds. + +.SS "-e, --extended" +.PP +Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in +"terse" mode. The output mode can be switched by hitting C-$\ while the program +is running. See also the \fBOutput Format Description\fP section below. + +.SS "-m, --mode" +.PP +Selects the XDP program mode (native or skb). Note that native XDP mode is the +default, and loading the redirect program in skb manner is neither performant, +nor recommended. However, this option is useful if the interface driver lacks +native XDP support, or when simply testing the tool. + +.SS "-v, --verbose" +.PP +Enable verbose logging. Supply twice to enable verbose logging from the +underlying \fIlibxdp\fP and \fIlibbpf\fP libraries. + +.SS "--version" +.PP +Show the application version and exit. + +.SS "-h, --help" +.PP +Display a summary of the available options + +.SH "The PASS command" +.PP +In this mode, \fIxdp\-bench\fP installs an XDP program on an interface that passes +all packets to the network stack after processing them (returning \fIXDP_PASS\fP). +There are options to control what to do with the packet before passing it +(touch the packet data or not), as well as which statistics to gather. This is a +basic benchmark for the overhead of installing an XDP program on an interface +while still running the regular network stack. + +.PP +The syntax for the \fIpass\fP command is: + +.PP +\fIxdp\-bench pass [options] <ifname>\fP + +.PP +Where \fI<ifname>\fP is the name of the interface the XDP program should be +installed on. + +.PP +The supported options are: + +.SS "-p, --packet-operation <ACTION>" +.PP +Specify which operation should be taken on the packet before passing it. The +following actions are available: + +.RS +.nf +\fCno-touch - Pass the packet without touching the packet data +touch - Read a field in the packet header before passing +swap-macs - Swap the source and destination MAC addresses before passing +\fP +.fi +.RE + +.PP +The default for this option is \fIno\-touch\fP. + +.SS "-r, --rxq-stats" +.PP +If set, the XDP program will also gather statistics on which receive queue index +each packet was received on. This is displayed in the extended output mode along +with per-CPU data (which, depending on the hardware configuration may or may not +be equivalent). + +.SS "-i, --interval <SECONDS>" +.PP +Set the polling interval for collecting all statistics and displaying them to +the output. The unit of interval is in seconds. + +.SS "-e, --extended" +.PP +Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in +"terse" mode. The output mode can be switched by hitting C-$\ while the program +is running. See also the \fBOutput Format Description\fP section below. + +.SS "-m, --mode" +.PP +Selects the XDP program mode (native or skb). Note that native XDP mode is the +default, and loading the redirect program in skb manner is neither performant, +nor recommended. However, this option is useful if the interface driver lacks +native XDP support, or when simply testing the tool. + +.SS "-v, --verbose" +.PP +Enable verbose logging. Supply twice to enable verbose logging from the +underlying \fIlibxdp\fP and \fIlibbpf\fP libraries. + +.SS "--version" +.PP +Show the application version and exit. + +.SS "-h, --help" +.PP +Display a summary of the available options + +.SH "The TX command" +.PP +In this mode, \fIxdp\-bench\fP installs an XDP program on an interface that performs +so-called "hairpin forwarding", which means each packet is transmitted back out +the same interface (using the \fIXDP_TX\fP return code).. There are options to +control what to do with the packet before transmitting it (touch the packet data +or not), as well as which statistics to gather. + +.PP +The syntax for the \fItx\fP command is: + +.PP +\fIxdp\-bench tx [options] <ifname>\fP + +.PP +Where \fI<ifname>\fP is the name of the interface the XDP program should be +installed on. + +.PP +The supported options are: + +.SS "-p, --packet-operation <ACTION>" +.PP +Specify which operation should be taken on the packet before transmitting it. The +following actions are available: + +.RS +.nf +\fCno-touch - Transmit the packet without touching the packet data +touch - Read a field in the packet header before transmitting +swap-macs - Swap the source and destination MAC addresses before transmitting +\fP +.fi +.RE + +.PP +To allow the packet to be successfully transmitted back to the sender, the MAC +addresses have to be swapped, so that the source MAC matches the network device. +However, there is a performance overhead in doing swapping, so this option +allows this function to be turned off. + +.PP +The default for this option is \fIswap\-macs\fP. + +.SS "-r, --rxq-stats" +.PP +If set, the XDP program will also gather statistics on which receive queue index +each packet was received on. This is displayed in the extended output mode along +with per-CPU data (which, depending on the hardware configuration may or may not +be equivalent). + +.SS "-i, --interval <SECONDS>" +.PP +Set the polling interval for collecting all statistics and displaying them to +the output. The unit of interval is in seconds. + +.SS "-e, --extended" +.PP +Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in +"terse" mode. The output mode can be switched by hitting C-$\ while the program +is running. See also the \fBOutput Format Description\fP section below. + +.SS "-m, --mode" +.PP +Selects the XDP program mode (native or skb). Note that native XDP mode is the +default, and loading the redirect program in skb manner is neither performant, +nor recommended. However, this option is useful if the interface driver lacks +native XDP support, or when simply testing the tool. + +.SS "-v, --verbose" +.PP +Enable verbose logging. Supply twice to enable verbose logging from the +underlying \fIlibxdp\fP and \fIlibbpf\fP libraries. + +.SS "--version" +.PP +Show the application version and exit. + +.SS "-h, --help" +.PP +Display a summary of the available options + +.SH "The REDIRECT command" +.PP +In this mode, \fIxdp\-bench\fP sets up packet redirection between the two +interfaces supplied on the command line using the \fIbpf_redirect\fP BPF helper +triggered on packet reception on the ingress interface. + +.PP +The syntax for the \fIredirect\fP command is: + +.PP +\fIxdp\-bench redirect [options] <ifname_in> <ifname_out>\fP + +.PP +Where \fI<ifname_in>\fP is the name of the input interface from where packets will +be redirect to the output interface \fI<ifname_out>\fP. + +.PP +The supported options are: + +.SS "-i, --interval <SECONDS>" +.PP +Set the polling interval for collecting all statistics and displaying them to +the output. The unit of interval is in seconds. + +.SS "-s, --stats" +.PP +Enable statistics for successful redirection. This option comes with a per +packet tracing overhead, for recording all successful redirections. + +.SS "-e, --extended" +.PP +Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in +"terse" mode. The output mode can be switched by hitting C-$\ while the program +is running. See also the \fBOutput Format Description\fP section below. + +.SS "-m, --mode" +.PP +Selects the XDP program mode (native or skb). Note that native XDP mode is the +default, and loading the redirect program in skb manner is neither performant, +nor recommended. However, this option is useful if the interface driver lacks +native XDP support, or when simply testing the tool. + +.SS "-v, --verbose" +.PP +Enable verbose logging. Supply twice to enable verbose logging from the +underlying \fIlibxdp\fP and \fIlibbpf\fP libraries. + +.SS "--version" +.PP +Show the application version and exit. + +.SS "-h, --help" +.PP +Display a summary of the available options + +.SH "The REDIRECT-CPU command" +.PP +In this mode, \fIxdp\-bench\fP sets up packet redirection using the +\fIbpf_redirect_map\fP BPF helper triggered on packet reception on the ingress +interface, using a cpumap as its target. Hence, this tool can be used to +redirect packets on an interface from one CPU to another. In addition to this, +the tool then supports redirecting the packet to another output device when it +is processed on the target CPU. + +.PP +The syntax for the \fIredirect\-cpu\fP command is: + +.PP +\fIxdp\-bench redirect\-cpu [options] <ifname> \-c 0 ... \-c N\fP + +.PP +Where \fI<ifname>\fP is the name of the input interface from where packets will be +redirect to the target CPU list specified using \fI\-c\fP. + +.PP +The supported options are: + +.SS "-c, --cpu <CPU>" +.PP +Specify a possible target CPU index. This option must be passed at least once, +and can be passed multiple times to specify a list of CPUs. Which CPU is chosen +for a given packet depends on the value of the \fI\-\-program\-mode\fP option, +described below. + +.SS "-p, --program-mode <MODE>" +.PP +Specify a program that embeds a predefined policy deciding how packets are +redirected to different CPUs. The following options are available: + +.RS +.nf +\fCno-touch - Redirect without touching packet data +touch - Read packet data before redirecting +round-robin - Cycle between target CPUs in a round-robin fashion (for each packet) +l4-proto - Choose the target CPU based on the layer-4 protocol of packet +l4-filter - Like l4-proto, but drop UDP packets with destination port 9 (used by pktgen) +l4-hash - Use source and destination IP hashing to pick target CPU +\fP +.fi +.RE + +.PP +The \fIno\-touch\fP and \fItouch\fP modes always redirect packets to the same CPU (the +first value supplied to \fI\-\-cpu\fP). The \fIround\-robin\fP and \fIl4\-hash\fP modes +distribute packets between all the CPUs supplied as \fI\-\-cpu\fP arguments, while +\fIl4\-proto\fP and \fIl4\-filter\fP send TCP and unrecognised packets to CPU index 0, UDP +packets to CPU index 1 and ICMP packets to CPU index 2 (where the index refers +to the order the actual CPUs are given on the command line). + +.PP +The default for this option is \fIl4\-hash\fP. + +.SS "-r --remote-action <ACTION>" +.PP +If this option is set, a separate program is installed into the cpumap, which +will be invoked on the remote CPU after the packet is processed there. The +action can be either \fIdrop\fP or \fIpass\fP which will drop the packet or pass it to +the regular networking stack, respectively. Or it can be \fIredirect\fP, which will +cause the packet to be redirected to another interface and transmitted out that +interface on the remote CPU. If this option is set to \fIredirect\fP the target +device must be specified using \fI\-\-redirect\-device\fP. + +.PP +The default for this option is \fIdisabled\fP. + +.SS "-r, --redirect-device <IFNAME>" +.PP +Specify the device to redirect the packet to when it is received on the target CPU. +Note that this option can only be specified with \fI\-\-remote\-action redirect\fP. + +.SS "-q, --qsize <PACKETS>" +.PP +Set the queue size for the per-CPU cpumap ring buffer used for redirecting +packets from multiple CPUs to one CPU. The default value is 2048 packets. + +.SS "-x, --stress-mode" +.PP +Stress the cpumap implementation by deallocating and reallocating the cpumap +ring buffer on each polling interval. + +.SS "-i, --interval <SECONDS>" +.PP +Set the polling interval for collecting all statistics and displaying them to +the output. The unit of interval is in seconds. + +.SS "-s, --stats" +.PP +Enable statistics for successful redirection. This option comes with a per +packet tracing overhead, for recording all successful redirections. + +.SS "-e, --extended" +.PP +Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in +"terse" mode. The output mode can be switched by hitting C-$\ while the program +is running. See also the \fBOutput Format Description\fP section below. + +.SS "-m, --mode" +.PP +Selects the XDP program mode (native or skb). Note that native XDP mode is the +default, and loading the redirect program in skb manner is neither performant, +nor recommended. However, this option is useful if the interface driver lacks +native XDP support, or when simply testing the tool. + +.SS "-v, --verbose" +.PP +Enable verbose logging. Supply twice to enable verbose logging from the +underlying \fIlibxdp\fP and \fIlibbpf\fP libraries. + +.SS "--version" +.PP +Show the application version and exit. + +.SS "-h, --help" +.PP +Display a summary of the available options + +.SH "The REDIRECT-MAP command" +.PP +In this mode, \fIxdp\-bench\fP sets up packet redirection between two interfaces +supplied on the command line using the \fIbpf_redirect_map()\fP BPF helper triggered +on packet reception on the ingress interface, using a devmap as its target. + +.PP +The syntax for the \fIredirect\-map\fP command is: + +.PP +\fIxdp\-bench redirect\-map [options] <ifname_in> <ifname_out>\fP + +.PP +Where \fI<ifname_in>\fP is the name of the input interface from where packets will +be redirect to the output interface \fI<ifname_out>\fP. + +.PP +The supported options are: + +.SS "-X, --load-egress" +.PP +Load a program in the devmap entry used for redirection, so that it is invoked +after the packet is redirected to the target device, before it is transmitted +out of the output interface. The remote program will update the packet data so +its source MAC address matches the one of the destination interface. + +.SS "-i, --interval <SECONDS>" +.PP +Set the polling interval for collecting all statistics and displaying them to +the output. The unit of interval is in seconds. + +.SS "-s, --stats" +.PP +Enable statistics for successful redirection. This option comes with a per +packet tracing overhead, for recording all successful redirections. + +.SS "-e, --extended" +.PP +Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in +"terse" mode. The output mode can be switched by hitting C-$\ while the program +is running. See also the \fBOutput Format Description\fP section below. + +.SS "-m, --mode" +.PP +Selects the XDP program mode (native or skb). Note that native XDP mode is the +default, and loading the redirect program in skb manner is neither performant, +nor recommended. However, this option is useful if the interface driver lacks +native XDP support, or when simply testing the tool. + +.SS "-v, --verbose" +.PP +Enable verbose logging. Supply twice to enable verbose logging from the +underlying \fIlibxdp\fP and \fIlibbpf\fP libraries. + +.SS "--version" +.PP +Show the application version and exit. + +.SS "-h, --help" +.PP +Display a summary of the available options + +.SH "The REDIRECT-MULTI command" +.PP +In this mode, \fIxdp\-bench\fP sets up one-to-many packet redirection between +interfaces supplied on the command line, using the \fIbpf_redirect_map\fP BPF helper +triggered on packet reception on the ingress interface, using a devmap as its +target. The packet is broadcast to all output interfaces specified on the +command line, using devmap's packet broadcast feature. + +.PP +The syntax for the \fIredirect\-multi\fP command is: + +.PP +\fIxdp\-bench redirect\-multi [options] <ifname_in> <ifname_out1> ... <ifname_outN>\fP + +.PP +Where \fI<ifname_in>\fP is the name of the input interface from where packets will +be redirect to one or many output interface(s). + +.PP +The supported options are: + +.SS "-X, --load-egress" +.PP +Load a program in the devmap entry used for redirection, so that it is invoked +after the packet is redirected to the target device, before it is transmitted +out of the output interface. The remote program will update the packet data so +its source MAC address matches the one of the destination interface. + +.SS "-i, --interval <SECONDS>" +.PP +Set the polling interval for collecting all statistics and displaying them to +the output. The unit of interval is in seconds. + +.SS "-s, --stats" +.PP +Enable statistics for successful redirection. This option comes with a per +packet tracing overhead, for recording all successful redirections. + +.SS "-e, --extended" +.PP +Start xdp-bench in "extended" output mode. If not set, xdp-bench will start in +"terse" mode. The output mode can be switched by hitting C-$\ while the program +is running. See also the \fBOutput Format Description\fP section below. + +.SS "-m, --mode" +.PP +Selects the XDP program mode (native or skb). Note that native XDP mode is the +default, and loading the redirect program in skb manner is neither performant, +nor recommended. However, this option is useful if the interface driver lacks +native XDP support, or when simply testing the tool. + +.SS "-v, --verbose" +.PP +Enable verbose logging. Supply twice to enable verbose logging from the +underlying \fIlibxdp\fP and \fIlibbpf\fP libraries. + +.SS "--version" +.PP +Show the application version and exit. + +.SS "-h, --help" +.PP +Display a summary of the available options + + +.SH "Output Format Description" +.PP +By default, redirect success statistics are disabled, use \fI\-\-stats\fP to enable. +The terse output mode is default, extended output mode can be activated using +the \fI\-\-extended\fP command line option. + +.PP +SIGQUIT (Ctrl + \\) can be used to switch the mode dynamically at runtime. + +.PP +Terse mode displays at most the following fields: +.RS +.nf +\fCrx/s Number of packets received per second +redir/s Number of packets successfully redirected per second +err,drop/s Aggregated count of errors per second (including dropped packets when not using the drop command) +xmit/s Number of packets transmitted on the output device per second +\fP +.fi +.RE + +.PP +Extended output mode displays at most the following fields: +.RS +.nf +\fCFIELD DESCRIPTION +receive Displays the number of packets received and errors encountered + + Whenever an error or packet drop occurs, details of per CPU error + and drop statistics will be expanded inline in terse mode. + pkt/s - Packets received per second + drop/s - Packets dropped per second + error/s - Errors encountered per second + redirect - Displays the number of packets successfully redirected + Errors encountered are expanded under redirect_err field + Note that passing -s to enable it has a per packet overhead + redir/s - Packets redirected successfully per second + + +redirect_err Displays the number of packets that failed redirection + + The errno is expanded under this field with per CPU count + The recognized errors are: + EINVAL: Invalid redirection + ENETDOWN: Device being redirected to is down + EMSGSIZE: Packet length too large for device + EOPNOTSUPP: Operation not supported + ENOSPC: No space in ptr_ring of cpumap kthread + + error/s - Packets that failed redirection per second + + +enqueue to cpu N Displays the number of packets enqueued to bulk queue of CPU N + Expands to cpu:FROM->N to display enqueue stats for each CPU enqueuing to CPU N + Received packets can be associated with the CPU redirect program is enqueuing + packets to. + pkt/s - Packets enqueued per second from other CPU to CPU N + drop/s - Packets dropped when trying to enqueue to CPU N + bulk-avg - Average number of packets processed for each event + + +kthread Displays the number of packets processed in CPUMAP kthread for each CPU + Packets consumed from ptr_ring in kthread, and its xdp_stats (after calling + CPUMAP bpf prog) are expanded below this. xdp_stats are expanded as a total and + then per-CPU to associate it to each CPU's pinned CPUMAP kthread. + pkt/s - Packets consumed per second from ptr_ring + drop/s - Packets dropped per second in kthread + sched - Number of times kthread called schedule() + + xdp_stats (also expands to per-CPU counts) + pass/s - XDP_PASS count for CPUMAP program execution + drop/s - XDP_DROP count for CPUMAP program execution + redir/s - XDP_REDIRECT count for CPUMAP program execution + + +xdp_exception Displays xdp_exception tracepoint events + + This can occur due to internal driver errors, unrecognized + XDP actions and due to explicit user trigger by use of XDP_ABORTED + Each action is expanded below this field with its count + hit/s - Number of times the tracepoint was hit per second + + +devmap_xmit Displays devmap_xmit tracepoint events + + This tracepoint is invoked for successful transmissions on output + device but these statistics are not available for generic XDP mode, + hence they will be omitted from the output when using SKB mode + xmit/s - Number of packets that were transmitted per second + drop/s - Number of packets that failed transmissions per second + drv_err/s - Number of internal driver errors per second + bulk-avg - Average number of packets processed for each event +\fP +.fi +.RE + +.SH "BUGS" +.PP +Please report any bugs on Github: \fIhttps://github.com/xdp-project/xdp-tools/issues\fP + +.SH "AUTHOR" +.PP +Earlier xdp-redirect tools were written by Jesper Dangaard Brouer and John +Fastabend. They were then rewritten to support more features by Kumar Kartikeya +Dwivedi, who also ported them to xdp-tools together with Toke Høiland-Jørgensen. +This man page was written by Kumar Kartikeya Dwivedi and Toke Høiland-Jørgensen. diff --git a/xdp-bench/xdp-bench.c b/xdp-bench/xdp-bench.c new file mode 100644 index 0000000..4a09514 --- /dev/null +++ b/xdp-bench/xdp-bench.c @@ -0,0 +1,275 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <errno.h> +#include <string.h> + +#include "xdp-bench.h" +#include "params.h" + +#define PROG_NAME "xdp-bench" + +int do_help(__unused const void *cfg, __unused const char *pin_root_path) +{ + fprintf(stderr, + "Usage: xdp-bench COMMAND [options]\n" + "\n" + "COMMAND can be one of:\n" + " drop - Drop all packets on an interface\n" + " pass - Pass all packets to the network stack\n" + " tx - Transmit packets back out on an interface (hairpin forwarding)\n" + " redirect - XDP redirect using the bpf_redirect() helper\n" + " redirect-cpu - XDP CPU redirect using BPF_MAP_TYPE_CPUMAP\n" + " redirect-map - XDP redirect using BPF_MAP_TYPE_DEVMAP\n" + " redirect-multi - XDP multi-redirect using BPF_MAP_TYPE_DEVMAP and the BPF_F_BROADCAST flag\n" + " help - show this help message\n" + "\n" + "Use 'xdp-bench COMMAND --help' to see options for each command\n"); + return -1; +} + + +struct enum_val xdp_modes[] = { + {"native", XDP_MODE_NATIVE}, + {"skb", XDP_MODE_SKB}, + {NULL, 0} +}; + +struct enum_val basic_program_modes[] = { + {"no-touch", BASIC_NO_TOUCH}, + {"read-data", BASIC_READ_DATA}, + {"swap-macs", BASIC_SWAP_MACS}, + {NULL, 0} +}; + +struct enum_val cpumap_remote_actions[] = { + {"disabled", ACTION_DISABLED}, + {"drop", ACTION_DROP}, + {"pass", ACTION_PASS}, + {"redirect", ACTION_REDIRECT}, + {NULL, 0} +}; + +struct enum_val cpumap_program_modes[] = { + {"no-touch", CPUMAP_NO_TOUCH}, + {"touch", CPUMAP_TOUCH_DATA}, + {"round-robin", CPUMAP_CPU_ROUND_ROBIN}, + {"l4-proto", CPUMAP_CPU_L4_PROTO}, + {"l4-filter", CPUMAP_CPU_L4_PROTO_FILTER}, + {"l4-hash", CPUMAP_CPU_L4_HASH}, + {NULL, 0} +}; + + +struct prog_option basic_options[] = { + DEFINE_OPTION("program-mode", OPT_ENUM, struct basic_opts, program_mode, + .short_opt = 'p', + .metavar = "<mode>", + .typearg = basic_program_modes, + .help = "Action to take before dropping packet."), + DEFINE_OPTION("rxq-stats", OPT_BOOL, struct basic_opts, rxq_stats, + .short_opt = 'r', + .help = "Collect per-RXQ drop statistics"), + DEFINE_OPTION("interval", OPT_U32, struct basic_opts, interval, + .short_opt = 'i', + .metavar = "<seconds>", + .help = "Polling interval (default 2)"), + DEFINE_OPTION("extended", OPT_BOOL, struct basic_opts, extended, + .short_opt = 'e', + .help = "Start running in extended output mode (C^\\ to toggle)"), + DEFINE_OPTION("xdp-mode", OPT_ENUM, struct basic_opts, mode, + .short_opt = 'm', + .typearg = xdp_modes, + .metavar = "<mode>", + .help = "Load XDP program in <mode>; default native"), + DEFINE_OPTION("dev", OPT_IFNAME, struct basic_opts, iface_in, + .positional = true, + .metavar = "<ifname>", + .required = true, + .help = "Load on device <ifname>"), + END_OPTIONS +}; + +struct prog_option redirect_basic_options[] = { + DEFINE_OPTION("interval", OPT_U32, struct redirect_opts, interval, + .short_opt = 'i', + .metavar = "<seconds>", + .help = "Polling interval (default 2)"), + DEFINE_OPTION("stats", OPT_BOOL, struct redirect_opts, stats, + .short_opt = 's', + .help = "Enable statistics for transmitted packets (not just errors)"), + DEFINE_OPTION("extended", OPT_BOOL, struct redirect_opts, extended, + .short_opt = 'e', + .help = "Start running in extended output mode (C^\\ to toggle)"), + DEFINE_OPTION("mode", OPT_ENUM, struct redirect_opts, mode, + .short_opt = 'm', + .typearg = xdp_modes, + .metavar = "<mode>", + .help = "Load XDP program in <mode>; default native"), + DEFINE_OPTION("dev_in", OPT_IFNAME, struct redirect_opts, iface_in, + .positional = true, + .metavar = "<ifname_in>", + .required = true, + .help = "Redirect from device <ifname>"), + DEFINE_OPTION("dev_out", OPT_IFNAME, struct redirect_opts, iface_out, + .positional = true, + .metavar = "<ifname_out>", + .required = true, + .help = "Redirect to device <ifname>"), + END_OPTIONS +}; + +struct prog_option redirect_cpumap_options[] = { + DEFINE_OPTION("cpu", OPT_U32_MULTI, struct cpumap_opts, cpus, + .short_opt = 'c', + .metavar = "<cpu>", + .required = true, + .help = "Insert CPU <cpu> into CPUMAP (can be specified multiple times)"), + DEFINE_OPTION("dev", OPT_IFNAME, struct cpumap_opts, iface_in, + .positional = true, + .metavar = "<ifname>", + .required = true, + .help = "Run on <ifname>"), + DEFINE_OPTION("program-mode", OPT_ENUM, struct cpumap_opts, program_mode, + .short_opt = 'p', + .metavar = "<mode>", + .typearg = cpumap_program_modes, + .help = "Redirect to CPUs using <mode>. Default l4-hash."), + DEFINE_OPTION("remote-action", OPT_ENUM, struct cpumap_opts, remote_action, + .short_opt = 'r', + .metavar = "<action>", + .typearg = cpumap_remote_actions, + .help = "Perform <action> on the remote CPU. Default disabled."), + DEFINE_OPTION("redirect-device", OPT_IFNAME, struct cpumap_opts, redir_iface, + .short_opt = 'D', + .metavar = "<ifname>", + .help = "Redirect packets to <ifname> on remote CPU (when --remote-action is 'redirect')"), + DEFINE_OPTION("qsize", OPT_U32, struct cpumap_opts, qsize, + .short_opt = 'q', + .metavar = "<packets>", + .help = "CPUMAP queue size (default 2048)"), + DEFINE_OPTION("stress-mode", OPT_BOOL, struct cpumap_opts, stress_mode, + .short_opt = 'x', + .help = "Stress the kernel CPUMAP setup and teardown code while running"), + DEFINE_OPTION("interval", OPT_U32, struct cpumap_opts, interval, + .short_opt = 'i', + .metavar = "<seconds>", + .help = "Polling interval (default 2)"), + DEFINE_OPTION("stats", OPT_BOOL, struct cpumap_opts, stats, + .short_opt = 's', + .help = "Enable statistics for transmitted packets (not just errors)"), + DEFINE_OPTION("extended", OPT_BOOL, struct basic_opts, extended, + .short_opt = 'e', + .help = "Start running in extended output mode (C^\\ to toggle)"), + DEFINE_OPTION("xdp-mode", OPT_ENUM, struct cpumap_opts, mode, + .short_opt = 'm', + .typearg = xdp_modes, + .metavar = "<mode>", + .help = "Load XDP program in <mode>; default native"), + END_OPTIONS +}; + +struct prog_option redirect_devmap_options[] = { + DEFINE_OPTION("load-egress", OPT_BOOL, struct devmap_opts, load_egress, + .short_opt = 'X', + .help = "Load an egress program into the devmap"), + DEFINE_OPTION("interval", OPT_U32, struct devmap_opts, interval, + .short_opt = 'i', + .metavar = "<seconds>", + .help = "Polling interval (default 2)"), + DEFINE_OPTION("stats", OPT_BOOL, struct devmap_opts, stats, + .short_opt = 's', + .help = "Enable statistics for transmitted packets (not just errors)"), + DEFINE_OPTION("extended", OPT_BOOL, struct devmap_opts, extended, + .short_opt = 'e', + .help = "Start running in extended output mode (C^\\ to toggle)"), + DEFINE_OPTION("mode", OPT_ENUM, struct devmap_opts, mode, + .short_opt = 'm', + .typearg = xdp_modes, + .metavar = "<mode>", + .help = "Load XDP program in <mode>; default native"), + DEFINE_OPTION("dev_in", OPT_IFNAME, struct devmap_opts, iface_in, + .positional = true, + .metavar = "<ifname_in>", + .required = true, + .help = "Redirect from device <ifname>"), + DEFINE_OPTION("dev_out", OPT_IFNAME, struct devmap_opts, iface_out, + .positional = true, + .metavar = "<ifname_out>", + .required = true, + .help = "Redirect to device <ifname>"), + END_OPTIONS +}; + +struct prog_option redirect_devmap_multi_options[] = { + DEFINE_OPTION("load-egress", OPT_BOOL, struct devmap_multi_opts, load_egress, + .short_opt = 'X', + .help = "Load an egress program into the devmap"), + DEFINE_OPTION("interval", OPT_U32, struct devmap_multi_opts, interval, + .short_opt = 'i', + .metavar = "<seconds>", + .help = "Polling interval (default 2)"), + DEFINE_OPTION("stats", OPT_BOOL, struct devmap_multi_opts, stats, + .short_opt = 's', + .help = "Enable statistics for transmitted packets (not just errors)"), + DEFINE_OPTION("extended", OPT_BOOL, struct devmap_multi_opts, extended, + .short_opt = 'e', + .help = "Start running in extended output mode (C^\\ to toggle)"), + DEFINE_OPTION("mode", OPT_ENUM, struct devmap_multi_opts, mode, + .short_opt = 'm', + .typearg = xdp_modes, + .metavar = "<mode>", + .help = "Load XDP program in <mode>; default native"), + DEFINE_OPTION("devs", OPT_IFNAME_MULTI, struct devmap_multi_opts, ifaces, + .positional = true, + .metavar = "<ifname...>", + .min_num = 2, + .max_num = MAX_IFACE_NUM, + .required = true, + .help = "Redirect from and to devices <ifname...>"), + END_OPTIONS +}; + +static const struct prog_command cmds[] = { + { .name = "drop", + .func = do_drop, + .options = basic_options, + .default_cfg = &defaults_drop, + .doc = "Drop all packets on an interface" }, + { .name = "pass", + .func = do_pass, + .options = basic_options, + .default_cfg = &defaults_pass, + .doc = "Pass all packets to the network stack" }, + { .name = "tx", + .func = do_tx, + .options = basic_options, + .default_cfg = &defaults_tx, + .doc = "Transmit packets back out an interface (hairpin forwarding)" }, + DEFINE_COMMAND_NAME("redirect", redirect_basic, + "XDP redirect using the bpf_redirect() helper"), + DEFINE_COMMAND_NAME("redirect-cpu", redirect_cpumap, + "XDP CPU redirect using BPF_MAP_TYPE_CPUMAP"), + DEFINE_COMMAND_NAME("redirect-map", redirect_devmap, + "XDP redirect using BPF_MAP_TYPE_DEVMAP"), + DEFINE_COMMAND_NAME( + "redirect-multi", redirect_devmap_multi, + "XDP multi-redirect using BPF_MAP_TYPE_DEVMAP and the BPF_F_BROADCAST flag"), + { .name = "help", .func = do_help, .no_cfg = true }, + END_COMMANDS +}; + +union all_opts { + struct basic_opts basic; + struct cpumap_opts cpumap; + struct devmap_opts devmap; + struct devmap_multi_opts devmap_multi; +}; + +int main(int argc, char **argv) +{ + if (argc > 1) + return dispatch_commands(argv[1], argc - 1, argv + 1, cmds, + sizeof(union all_opts), PROG_NAME, false); + + return do_help(NULL, NULL); +} diff --git a/xdp-bench/xdp-bench.h b/xdp-bench/xdp-bench.h new file mode 100644 index 0000000..b163119 --- /dev/null +++ b/xdp-bench/xdp-bench.h @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0-only +#ifndef XDP_REDIRECT_H +#define XDP_REDIRECT_H + +#include <xdp/libxdp.h> +#include "params.h" +#include "util.h" + +#define MAX_IFACE_NUM 32 + +int do_drop(const void *cfg, const char *pin_root_path); +int do_pass(const void *cfg, const char *pin_root_path); +int do_tx(const void *cfg, const char *pin_root_path); +int do_redirect_basic(const void *cfg, const char *pin_root_path); +int do_redirect_cpumap(const void *cfg, const char *pin_root_path); +int do_redirect_devmap(const void *cfg, const char *pin_root_path); +int do_redirect_devmap_multi(const void *cfg, const char *pin_root_path); + +enum basic_program_mode { + BASIC_NO_TOUCH, + BASIC_READ_DATA, + BASIC_SWAP_MACS, +}; + +struct basic_opts { + bool extended; + bool rxq_stats; + __u32 interval; + enum xdp_attach_mode mode; + enum basic_program_mode program_mode; + struct iface iface_in; +}; + +struct redirect_opts { + bool stats; + bool extended; + __u32 interval; + enum xdp_attach_mode mode; + struct iface iface_in; + struct iface iface_out; +}; + +struct devmap_opts { + bool stats; + bool extended; + bool load_egress; + __u32 interval; + enum xdp_attach_mode mode; + struct iface iface_in; + struct iface iface_out; +}; + +struct devmap_multi_opts { + bool stats; + bool extended; + bool load_egress; + __u32 interval; + enum xdp_attach_mode mode; + struct iface *ifaces; +}; + +enum cpumap_remote_action { + ACTION_DISABLED, + ACTION_DROP, + ACTION_PASS, + ACTION_REDIRECT, +}; + +enum cpumap_program_mode { + CPUMAP_NO_TOUCH, + CPUMAP_TOUCH_DATA, + CPUMAP_CPU_ROUND_ROBIN, + CPUMAP_CPU_L4_PROTO, + CPUMAP_CPU_L4_PROTO_FILTER, + CPUMAP_CPU_L4_HASH, +}; + +struct cpumap_opts { + bool stats; + bool extended; + bool stress_mode; + __u32 interval; + __u32 qsize; + struct u32_multi cpus; + enum xdp_attach_mode mode; + enum cpumap_remote_action remote_action; + enum cpumap_program_mode program_mode; + struct iface iface_in; + struct iface redir_iface; +}; + +extern const struct basic_opts defaults_drop; +extern const struct basic_opts defaults_pass; +extern const struct basic_opts defaults_tx; +extern const struct redirect_opts defaults_redirect_basic; +extern const struct cpumap_opts defaults_redirect_cpumap; +extern const struct devmap_opts defaults_redirect_devmap; +extern const struct devmap_multi_opts defaults_redirect_devmap_multi; + +#endif diff --git a/xdp-bench/xdp_basic.bpf.c b/xdp-bench/xdp_basic.bpf.c new file mode 100644 index 0000000..a803a4b --- /dev/null +++ b/xdp-bench/xdp_basic.bpf.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> +* + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <bpf/vmlinux.h> +#include <xdp/xdp_sample_shared.h> +#include <xdp/xdp_sample.bpf.h> +#include <xdp/xdp_sample_common.bpf.h> +#include <xdp/parsing_helpers.h> + +const volatile bool read_data = 0; +const volatile bool swap_macs = 0; +const volatile bool rxq_stats = 0; +const volatile enum xdp_action action = XDP_DROP; + +SEC("xdp") +int xdp_basic_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 key = bpf_get_smp_processor_id(); + struct datarec *rec, *rxq_rec; + struct ethhdr *eth = data; + __u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + if (rxq_stats) { + key = ctx->rx_queue_index; + rxq_rec = bpf_map_lookup_elem(&rxq_cnt, &key); + if (!rxq_rec) + return XDP_PASS; + NO_TEAR_INC(rxq_rec->processed); + } + + if (read_data) { + if (bpf_ntohs(eth->h_proto) < ETH_P_802_3_MIN) + return XDP_ABORTED; + + if (swap_macs) + swap_src_dst_mac(data); + } + + if (action == XDP_DROP) { + NO_TEAR_INC(rec->dropped); + if (rxq_stats) + NO_TEAR_INC(rxq_rec->dropped); + } + + return action; +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdp-bench/xdp_basic.c b/xdp-bench/xdp_basic.c new file mode 100644 index 0000000..f748c2b --- /dev/null +++ b/xdp-bench/xdp_basic.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> + */ +#include <errno.h> +#include <stdio.h> +#include <assert.h> +#include <getopt.h> +#include <libgen.h> +#include <net/if.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <bpf/bpf.h> +#include <stdbool.h> +#include <linux/bpf.h> +#include <bpf/libbpf.h> +#include <sys/resource.h> +#include <linux/if_link.h> +#include <xdp/libxdp.h> + +#include "logging.h" + +#include "xdp-bench.h" +#include "xdp_sample.h" +#include "xdp_basic.skel.h" + +static int mask = SAMPLE_RX_CNT | SAMPLE_EXCEPTION_CNT; + +DEFINE_SAMPLE_INIT(xdp_basic); + +const struct basic_opts defaults_drop = { .mode = XDP_MODE_NATIVE, + .interval = 2 }; +const struct basic_opts defaults_pass = { .mode = XDP_MODE_NATIVE, + .interval = 2 }; +const struct basic_opts defaults_tx = { .mode = XDP_MODE_NATIVE, + .interval = 2, + .program_mode = BASIC_SWAP_MACS }; + +static int do_basic(const struct basic_opts *opt, enum xdp_action action) +{ + DECLARE_LIBBPF_OPTS(xdp_program_opts, opts); + struct xdp_program *xdp_prog = NULL; + int ret = EXIT_FAIL_OPTION; + struct xdp_basic *skel; + + if (opt->extended) + sample_switch_mode(); + + skel = xdp_basic__open(); + if (!skel) { + pr_warn("Failed to xdp_basic__open: %s\n", strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end; + } + + ret = sample_init_pre_load(skel, opt->iface_in.ifname); + if (ret < 0) { + pr_warn("Failed to sample_init_pre_load: %s\n", strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_destroy; + } + + skel->rodata->action = action; + if (action == XDP_DROP) + mask |= SAMPLE_DROP_OK; + + if (opt->program_mode >= BASIC_READ_DATA) + skel->rodata->read_data = true; + if (opt->program_mode >= BASIC_SWAP_MACS) + skel->rodata->swap_macs = true; + if (opt->rxq_stats) { + skel->rodata->rxq_stats = true; + mask |= SAMPLE_RXQ_STATS; + } + + opts.obj = skel->obj; + opts.prog_name = bpf_program__name(skel->progs.xdp_basic_prog); + xdp_prog = xdp_program__create(&opts); + if (!xdp_prog) { + ret = -errno; + pr_warn("Couldn't open XDP program: %s\n", + strerror(-ret)); + goto end_destroy; + } + + ret = xdp_program__attach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0); + if (ret < 0) { + pr_warn("Failed to attach XDP program: %s\n", strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_destroy; + } + + ret = sample_init(skel, mask, 0, 0); + if (ret < 0) { + pr_warn("Failed to initialize sample: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_detach; + } + + ret = EXIT_FAIL; + + pr_info("%s packets on %s (ifindex %d; driver %s)\n", + action == XDP_DROP ? "Dropping" : "Hairpinning (XDP_TX)", + opt->iface_in.ifname, opt->iface_in.ifindex, get_driver_name(opt->iface_in.ifindex)); + + ret = sample_run(opt->interval, NULL, NULL); + if (ret < 0) { + pr_warn("Failed during sample run: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_detach; + } + ret = EXIT_OK; +end_detach: + xdp_program__detach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0); +end_destroy: + xdp_basic__destroy(skel); +end: + sample_teardown(); + return ret; +} + +int do_drop(const void *cfg, __unused const char *pin_root_path) +{ + const struct basic_opts *opt = cfg; + + return do_basic(opt, XDP_DROP); +} + +int do_pass(const void *cfg, __unused const char *pin_root_path) +{ + const struct basic_opts *opt = cfg; + + return do_basic(opt, XDP_PASS); +} + +int do_tx(const void *cfg, __unused const char *pin_root_path) +{ + const struct basic_opts *opt = cfg; + + return do_basic(opt, XDP_TX); +} diff --git a/xdp-bench/xdp_redirect_basic.bpf.c b/xdp-bench/xdp_redirect_basic.bpf.c new file mode 100644 index 0000000..ca2af1f --- /dev/null +++ b/xdp-bench/xdp_redirect_basic.bpf.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <bpf/vmlinux.h> +#include <xdp/xdp_sample_shared.h> +#include <xdp/xdp_sample.bpf.h> +#include <xdp/xdp_sample_common.bpf.h> +#include <linux/if_ether.h> + +const volatile int ifindex_out; + +SEC("xdp") +int xdp_redirect_basic_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 key = bpf_get_smp_processor_id(); + struct ethhdr *eth = data; + struct datarec *rec; + __u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + swap_src_dst_mac(data); + return bpf_redirect(ifindex_out, 0); +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdp-bench/xdp_redirect_basic.c b/xdp-bench/xdp_redirect_basic.c new file mode 100644 index 0000000..7d011df --- /dev/null +++ b/xdp-bench/xdp_redirect_basic.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> + */ +#include <errno.h> +#include <stdio.h> +#include <assert.h> +#include <getopt.h> +#include <libgen.h> +#include <net/if.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <bpf/bpf.h> +#include <stdbool.h> +#include <linux/bpf.h> +#include <bpf/libbpf.h> +#include <sys/resource.h> +#include <linux/if_link.h> +#include <xdp/libxdp.h> + +#include "logging.h" + +#include "xdp-bench.h" +#include "xdp_sample.h" +#include "xdp_redirect_basic.skel.h" + +static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_CNT | + SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI; + +DEFINE_SAMPLE_INIT(xdp_redirect_basic); + +const struct redirect_opts defaults_redirect_basic = { .mode = XDP_MODE_NATIVE, + .interval = 2 }; + +int do_redirect_basic(const void *cfg, __unused const char *pin_root_path) +{ + const struct redirect_opts *opt = cfg; + + struct xdp_program *xdp_prog = NULL, *dummy_prog = NULL; + DECLARE_LIBBPF_OPTS(xdp_program_opts, opts); + struct xdp_redirect_basic *skel; + char str[2 * IF_NAMESIZE + 1]; + int ret = EXIT_FAIL_OPTION; + + if (opt->extended) + sample_switch_mode(); + + if (opt->mode == XDP_MODE_SKB) + /* devmap_xmit tracepoint not available */ + mask &= ~(SAMPLE_DEVMAP_XMIT_CNT | + SAMPLE_DEVMAP_XMIT_CNT_MULTI); + + if (opt->stats) + mask |= SAMPLE_REDIRECT_CNT; + + + skel = xdp_redirect_basic__open(); + if (!skel) { + pr_warn("Failed to xdp_redirect_basic__open: %s\n", strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end; + } + + ret = sample_init_pre_load(skel, opt->iface_in.ifname); + if (ret < 0) { + pr_warn("Failed to sample_init_pre_load: %s\n", strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_destroy; + } + + skel->rodata->from_match[0] = opt->iface_in.ifindex; + skel->rodata->to_match[0] = opt->iface_out.ifindex; + skel->rodata->ifindex_out = opt->iface_out.ifindex; + + opts.obj = skel->obj; + opts.prog_name = bpf_program__name(skel->progs.xdp_redirect_basic_prog); + xdp_prog = xdp_program__create(&opts); + if (!xdp_prog) { + ret = -errno; + pr_warn("Couldn't open XDP program: %s\n", + strerror(-ret)); + goto end_destroy; + } + + ret = xdp_program__attach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0); + if (ret < 0) { + pr_warn("Failed to attach XDP program: %s\n", strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_destroy; + } + + ret = sample_init(skel, mask, opt->iface_in.ifindex, opt->iface_out.ifindex); + if (ret < 0) { + pr_warn("Failed to initialize sample: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_detach; + } + + opts.obj = NULL; + opts.prog_name = "xdp_pass"; + opts.find_filename = "xdp-dispatcher.o"; + dummy_prog = xdp_program__create(&opts); + if (!dummy_prog) { + pr_warn("Failed to load dummy program: %s\n", strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end_detach; + } + + ret = xdp_program__attach(dummy_prog, opt->iface_out.ifindex, opt->mode, 0); + if (ret < 0) { + pr_warn("Failed to attach dummy program: %s\n", strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_detach; + } + + ret = EXIT_FAIL; + + safe_strncpy(str, get_driver_name(opt->iface_in.ifindex), sizeof(str)); + pr_info("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n", + opt->iface_in.ifname, opt->iface_in.ifindex, str, + opt->iface_out.ifname, opt->iface_out.ifindex, get_driver_name(opt->iface_out.ifindex)); + + ret = sample_run(opt->interval, NULL, NULL); + if (ret < 0) { + pr_warn("Failed during sample run: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_detach; + } + ret = EXIT_OK; +end_detach: + if (dummy_prog) + xdp_program__detach(dummy_prog, opt->iface_out.ifindex, opt->mode, 0); + xdp_program__detach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0); +end_destroy: + xdp_redirect_basic__destroy(skel); +end: + sample_teardown(); + return ret; +} diff --git a/xdp-bench/xdp_redirect_cpumap.bpf.c b/xdp-bench/xdp_redirect_cpumap.bpf.c new file mode 100644 index 0000000..bf6acda --- /dev/null +++ b/xdp-bench/xdp_redirect_cpumap.bpf.c @@ -0,0 +1,539 @@ +/* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP) + * + * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. + */ +#include <bpf/vmlinux.h> +#include <xdp/xdp_sample_shared.h> +#include <xdp/xdp_sample.bpf.h> +#include <xdp/xdp_sample_common.bpf.h> +#include <xdp/parsing_helpers.h> +#include "hash_func01.h" + +/* Special map type that can XDP_REDIRECT frames to another CPU */ +struct { + __uint(type, BPF_MAP_TYPE_CPUMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bpf_cpumap_val)); +} cpu_map SEC(".maps"); + +/* Set of maps controlling available CPU, and for iterating through + * selectable redirect CPUs. + */ +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); +} cpus_available SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1); +} cpus_count SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1); +} cpus_iterator SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(struct bpf_devmap_val)); + __uint(max_entries, 1); +} tx_port SEC(".maps"); + +char tx_mac_addr[ETH_ALEN]; + +/* Helper parse functions */ + +static __always_inline +bool parse_eth(struct ethhdr *eth, void *data_end, + __u16 *eth_proto, __u64 *l3_offset) +{ + __u16 eth_type; + __u64 offset; + + offset = sizeof(*eth); + if ((void *)eth + offset > data_end) + return false; + + eth_type = eth->h_proto; + + /* Skip non 802.3 Ethertypes */ + if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0)) + return false; + + /* Handle VLAN tagged packet */ + if (eth_type == bpf_htons(ETH_P_8021Q) || + eth_type == bpf_htons(ETH_P_8021AD)) { + struct vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + offset += sizeof(*vlan_hdr); + if ((void *)eth + offset > data_end) + return false; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + } + /* Handle double VLAN tagged packet */ + if (eth_type == bpf_htons(ETH_P_8021Q) || + eth_type == bpf_htons(ETH_P_8021AD)) { + struct vlan_hdr *vlan_hdr; + + vlan_hdr = (void *)eth + offset; + offset += sizeof(*vlan_hdr); + if ((void *)eth + offset > data_end) + return false; + eth_type = vlan_hdr->h_vlan_encapsulated_proto; + } + + *eth_proto = bpf_ntohs(eth_type); + *l3_offset = offset; + return true; +} + +static __always_inline +__u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, __u64 nh_off) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct iphdr *iph = data + nh_off; + struct udphdr *udph; + + if (iph + 1 > data_end) + return 0; + if (!(iph->protocol == IPPROTO_UDP)) + return 0; + + udph = (void *)(iph + 1); + if (udph + 1 > data_end) + return 0; + + return bpf_ntohs(udph->dest); +} + +static __always_inline +int get_proto_ipv4(struct xdp_md *ctx, __u64 nh_off) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct iphdr *iph = data + nh_off; + + if (iph + 1 > data_end) + return 0; + return iph->protocol; +} + +static __always_inline +int get_proto_ipv6(struct xdp_md *ctx, __u64 nh_off) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ipv6hdr *ip6h = data + nh_off; + + if (ip6h + 1 > data_end) + return 0; + return ip6h->nexthdr; +} + +SEC("xdp") +int cpumap_no_touch(struct xdp_md *ctx) +{ + __u32 key = bpf_get_smp_processor_id(); + struct datarec *rec; + __u32 *cpu_selected; + __u32 cpu_dest = 0; + __u32 key0 = 0; + + /* Only use first entry in cpus_available */ + cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0); + if (!cpu_selected) + return XDP_ABORTED; + cpu_dest = *cpu_selected; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + if (cpu_dest >= nr_cpus) { + NO_TEAR_INC(rec->issue); + return XDP_ABORTED; + } + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +SEC("xdp") +int cpumap_touch_data(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 key = bpf_get_smp_processor_id(); + struct ethhdr *eth = data; + struct datarec *rec; + __u32 *cpu_selected; + __u32 cpu_dest = 0; + __u32 key0 = 0; + __u16 eth_type; + + /* Only use first entry in cpus_available */ + cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0); + if (!cpu_selected) + return XDP_ABORTED; + cpu_dest = *cpu_selected; + + /* Validate packet length is minimum Eth header size */ + if (eth + 1 > data_end) + return XDP_ABORTED; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + /* Read packet data, and use it (drop non 802.3 Ethertypes) */ + eth_type = eth->h_proto; + if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) { + NO_TEAR_INC(rec->dropped); + return XDP_DROP; + } + + if (cpu_dest >= nr_cpus) { + NO_TEAR_INC(rec->issue); + return XDP_ABORTED; + } + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +SEC("xdp") +int cpumap_round_robin(struct xdp_md *ctx) +{ + __u32 key = bpf_get_smp_processor_id(); + struct datarec *rec; + __u32 cpu_dest = 0; + __u32 key0 = 0; + + __u32 *cpu_selected; + __u32 *cpu_iterator; + __u32 *cpu_max; + __u32 cpu_idx; + + cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); + if (!cpu_max) + return XDP_ABORTED; + + cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0); + if (!cpu_iterator) + return XDP_ABORTED; + cpu_idx = *cpu_iterator; + + *cpu_iterator += 1; + if (*cpu_iterator == *cpu_max) + *cpu_iterator = 0; + + cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx); + if (!cpu_selected) + return XDP_ABORTED; + cpu_dest = *cpu_selected; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + if (cpu_dest >= nr_cpus) { + NO_TEAR_INC(rec->issue); + return XDP_ABORTED; + } + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +SEC("xdp") +int cpumap_l4_proto(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 key = bpf_get_smp_processor_id(); + struct ethhdr *eth = data; + __u8 ip_proto = IPPROTO_UDP; + struct datarec *rec; + __u16 eth_proto = 0; + __u64 l3_offset = 0; + __u32 cpu_dest = 0; + __u32 *cpu_lookup; + __u32 cpu_idx = 0; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) + return XDP_PASS; /* Just skip */ + + /* Extract L4 protocol */ + switch (eth_proto) { + case ETH_P_IP: + ip_proto = get_proto_ipv4(ctx, l3_offset); + break; + case ETH_P_IPV6: + ip_proto = get_proto_ipv6(ctx, l3_offset); + break; + case ETH_P_ARP: + cpu_idx = 0; /* ARP packet handled on separate CPU */ + break; + default: + cpu_idx = 0; + } + + /* Choose CPU based on L4 protocol */ + switch (ip_proto) { + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + cpu_idx = 2; + break; + case IPPROTO_TCP: + cpu_idx = 0; + break; + case IPPROTO_UDP: + cpu_idx = 1; + break; + default: + cpu_idx = 0; + } + + cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); + if (!cpu_lookup) + return XDP_ABORTED; + cpu_dest = *cpu_lookup; + + if (cpu_dest >= nr_cpus) { + NO_TEAR_INC(rec->issue); + return XDP_ABORTED; + } + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +SEC("xdp") +int cpumap_l4_filter(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 key = bpf_get_smp_processor_id(); + struct ethhdr *eth = data; + __u8 ip_proto = IPPROTO_UDP; + struct datarec *rec; + __u16 eth_proto = 0; + __u64 l3_offset = 0; + __u32 cpu_dest = 0; + __u32 *cpu_lookup; + __u32 cpu_idx = 0; + __u16 dest_port; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) + return XDP_PASS; /* Just skip */ + + /* Extract L4 protocol */ + switch (eth_proto) { + case ETH_P_IP: + ip_proto = get_proto_ipv4(ctx, l3_offset); + break; + case ETH_P_IPV6: + ip_proto = get_proto_ipv6(ctx, l3_offset); + break; + case ETH_P_ARP: + cpu_idx = 0; /* ARP packet handled on separate CPU */ + break; + default: + cpu_idx = 0; + } + + /* Choose CPU based on L4 protocol */ + switch (ip_proto) { + case IPPROTO_ICMP: + case IPPROTO_ICMPV6: + cpu_idx = 2; + break; + case IPPROTO_TCP: + cpu_idx = 0; + break; + case IPPROTO_UDP: + cpu_idx = 1; + /* DDoS filter UDP port 9 (pktgen) */ + dest_port = get_dest_port_ipv4_udp(ctx, l3_offset); + if (dest_port == 9) { + NO_TEAR_INC(rec->dropped); + return XDP_DROP; + } + break; + default: + cpu_idx = 0; + } + + cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); + if (!cpu_lookup) + return XDP_ABORTED; + cpu_dest = *cpu_lookup; + + if (cpu_dest >= nr_cpus) { + NO_TEAR_INC(rec->issue); + return XDP_ABORTED; + } + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +/* Hashing initval */ +#define INITVAL 15485863 + +static __always_inline +__u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, __u64 nh_off) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct iphdr *iph = data + nh_off; + __u32 cpu_hash; + + if (iph + 1 > data_end) + return 0; + + cpu_hash = iph->saddr + iph->daddr; + cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol); + + return cpu_hash; +} + +static __always_inline +__u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, __u64 nh_off) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ipv6hdr *ip6h = data + nh_off; + __u32 cpu_hash; + + if (ip6h + 1 > data_end) + return 0; + + cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0]; + cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1]; + cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2]; + cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3]; + cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr); + + return cpu_hash; +} + +/* Load-Balance traffic based on hashing IP-addrs + L4-proto. The + * hashing scheme is symmetric, meaning swapping IP src/dest still hit + * same CPU. + */ +SEC("xdp") +int cpumap_l4_hash(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 key = bpf_get_smp_processor_id(); + struct ethhdr *eth = data; + struct datarec *rec; + __u16 eth_proto = 0; + __u64 l3_offset = 0; + __u32 cpu_dest = 0; + __u32 cpu_idx = 0; + __u32 *cpu_lookup; + __u32 key0 = 0; + __u32 *cpu_max; + __u32 cpu_hash; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); + if (!cpu_max) + return XDP_ABORTED; + + if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) + return XDP_PASS; /* Just skip */ + + /* Hash for IPv4 and IPv6 */ + switch (eth_proto) { + case ETH_P_IP: + cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset); + break; + case ETH_P_IPV6: + cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset); + break; + case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */ + default: + cpu_hash = 0; + } + + /* Choose CPU based on hash */ + cpu_idx = cpu_hash % *cpu_max; + + cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); + if (!cpu_lookup) + return XDP_ABORTED; + cpu_dest = *cpu_lookup; + + if (cpu_dest >= nr_cpus) { + NO_TEAR_INC(rec->issue); + return XDP_ABORTED; + } + return bpf_redirect_map(&cpu_map, cpu_dest, 0); +} + +SEC("xdp/cpumap") +int cpumap_redirect(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + __u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + swap_src_dst_mac(data); + return bpf_redirect_map(&tx_port, 0, 0); +} + +SEC("xdp/cpumap") +int cpumap_pass(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +SEC("xdp/cpumap") +int cpumap_drop(struct xdp_md *ctx) +{ + return XDP_DROP; +} + +SEC("xdp/devmap") +int redirect_egress_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + __u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN); + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdp-bench/xdp_redirect_cpumap.c b/xdp-bench/xdp_redirect_cpumap.c new file mode 100644 index 0000000..07eee87 --- /dev/null +++ b/xdp-bench/xdp_redirect_cpumap.c @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. + */ + +#include <time.h> +#include <errno.h> +#include <stdio.h> +#include <getopt.h> +#include <locale.h> +#include <net/if.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <bpf/bpf.h> +#include <stdbool.h> +#include <arpa/inet.h> +#include <bpf/libbpf.h> +#include <sys/sysinfo.h> +#include <linux/limits.h> +#include <sys/resource.h> +#include <linux/if_link.h> +#include <xdp/libxdp.h> + +#include "logging.h" + +#include "xdp-bench.h" +#include "xdp_sample.h" +#include "xdp_redirect_cpumap.skel.h" + +static int map_fd; +static int avail_fd; +static int count_fd; + +static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | + SAMPLE_CPUMAP_ENQUEUE_CNT | SAMPLE_CPUMAP_KTHREAD_CNT | + SAMPLE_EXCEPTION_CNT; + +const struct cpumap_opts defaults_redirect_cpumap = { + .mode = XDP_MODE_NATIVE, + .interval = 2, + .qsize = 2048, + .program_mode = CPUMAP_CPU_L4_HASH, +}; + +static const char *cpumap_prog_names[] = { + "cpumap_no_touch", + "cpumap_touch_data", + "cpumap_round_robin", + "cpumap_l4_proto", + "cpumap_l4_filter", + "cpumap_l4_hash", +}; + +DEFINE_SAMPLE_INIT(xdp_redirect_cpumap); + +static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value, + __u32 avail_idx, bool new) +{ + __u32 curr_cpus_count = 0; + __u32 key = 0; + int ret; + + /* Add a CPU entry to cpumap, as this allocate a cpu entry in + * the kernel for the cpu. + */ + ret = bpf_map_update_elem(map_fd, &cpu, value, 0); + if (ret < 0) { + pr_warn("Create CPU entry failed: %s\n", strerror(errno)); + return ret; + } + + /* Inform bpf_prog's that a new CPU is available to select + * from via some control maps. + */ + ret = bpf_map_update_elem(avail_fd, &avail_idx, &cpu, 0); + if (ret < 0) { + pr_warn("Add to avail CPUs failed: %s\n", strerror(errno)); + return ret; + } + + /* When not replacing/updating existing entry, bump the count */ + ret = bpf_map_lookup_elem(count_fd, &key, &curr_cpus_count); + if (ret < 0) { + pr_warn("Failed reading curr cpus_count: %s\n", + strerror(errno)); + return ret; + } + if (new) { + curr_cpus_count++; + ret = bpf_map_update_elem(count_fd, &key, + &curr_cpus_count, 0); + if (ret < 0) { + pr_warn("Failed write curr cpus_count: %s\n", + strerror(errno)); + return ret; + } + } + + pr_debug("%s CPU: %u as idx: %u qsize: %d cpumap_prog_fd: %d (cpus_count: %u)\n", + new ? "Add new" : "Replace", cpu, avail_idx, + value->qsize, value->bpf_prog.fd, curr_cpus_count); + + return 0; +} + +/* CPUs are zero-indexed. Thus, add a special sentinel default value + * in map cpus_available to mark CPU index'es not configured + */ +static int mark_cpus_unavailable(void) +{ + int ret, i, n_cpus = libbpf_num_possible_cpus(); + __u32 invalid_cpu = n_cpus; + + for (i = 0; i < n_cpus; i++) { + ret = bpf_map_update_elem(avail_fd, &i, + &invalid_cpu, 0); + if (ret < 0) { + pr_warn("Failed marking CPU unavailable: %s\n", + strerror(errno)); + return ret; + } + } + + return 0; +} + +/* Stress cpumap management code by concurrently changing underlying cpumap */ +static void stress_cpumap(void *ctx) +{ + struct bpf_cpumap_val *value = ctx; + + /* Changing qsize will cause kernel to free and alloc a new + * bpf_cpu_map_entry, with an associated/complicated tear-down + * procedure. + */ + value->qsize = 1024; + create_cpu_entry(1, value, 0, false); + value->qsize = 8; + create_cpu_entry(1, value, 0, false); + value->qsize = 16000; + create_cpu_entry(1, value, 0, false); +} + +static int set_cpumap_prog(struct xdp_redirect_cpumap *skel, + enum cpumap_remote_action action, + const struct iface *redir_iface) +{ + struct bpf_devmap_val val = {}; + __u32 key = 0; + int err; + + switch (action) { + case ACTION_DISABLED: + return 0; + case ACTION_DROP: + return bpf_program__fd(skel->progs.cpumap_drop); + case ACTION_PASS: + return bpf_program__fd(skel->progs.cpumap_pass); + case ACTION_REDIRECT: + break; + default: + return -EINVAL; + } + + if (!redir_iface->ifindex) { + pr_warn("Must specify redirect device when using --remote-action 'redirect'\n"); + return -EINVAL; + } + + if (get_mac_addr(redir_iface->ifindex, skel->bss->tx_mac_addr) < 0) { + pr_warn("Couldn't get MAC address for interface %s\n", redir_iface->ifname); + return -EINVAL; + } + + val.ifindex = redir_iface->ifindex; + val.bpf_prog.fd = bpf_program__fd(skel->progs.redirect_egress_prog); + + err = bpf_map_update_elem(bpf_map__fd(skel->maps.tx_port), &key, &val, 0); + if (err < 0) + return -errno; + + return bpf_program__fd(skel->progs.cpumap_redirect); +} + +int do_redirect_cpumap(const void *cfg, __unused const char *pin_root_path) +{ + const struct cpumap_opts *opt = cfg; + + DECLARE_LIBBPF_OPTS(xdp_program_opts, opts); + struct xdp_program *xdp_prog = NULL; + struct xdp_redirect_cpumap *skel; + struct bpf_program *prog = NULL; + struct bpf_map_info info = {}; + struct bpf_cpumap_val value; + __u32 infosz = sizeof(info); + int ret = EXIT_FAIL_OPTION; + int n_cpus, fd; + size_t i; + + if (opt->extended) + sample_switch_mode(); + + if (opt->stats) + mask |= SAMPLE_REDIRECT_MAP_CNT; + + if (opt->redir_iface.ifindex) + mask |= SAMPLE_DEVMAP_XMIT_CNT_MULTI; + + + n_cpus = libbpf_num_possible_cpus(); + + /* Notice: Choosing the queue size is very important when CPU is + * configured with power-saving states. + * + * If deepest state take 133 usec to wakeup from (133/10^6). When link + * speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can + * arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) = + * 166250 bytes. With MTU size packets this is 110 packets, and with + * minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets. + * + * Setting default cpumap queue to 2048 as worst-case (small packet) + * should be +64 packet due kthread wakeup call (due to xdp_do_flush) + * worst-case is 2043 packets. + * + * Sysadm can configured system to avoid deep-sleep via: + * tuned-adm profile network-latency + */ + + + skel = xdp_redirect_cpumap__open(); + if (!skel) { + pr_warn("Failed to xdp_redirect_cpumap__open: %s\n", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end; + } + + /* Make sure we only load the one XDP program we are interested in */ + while ((prog = bpf_object__next_program(skel->obj, prog)) != NULL) + if (bpf_program__type(prog) == BPF_PROG_TYPE_XDP && + bpf_program__expected_attach_type(prog) == BPF_XDP) + bpf_program__set_autoload(prog, false); + + prog = bpf_object__find_program_by_name(skel->obj, + cpumap_prog_names[opt->program_mode]); + if (!prog) { + pr_warn("Failed to find program '%s'\n", + cpumap_prog_names[opt->program_mode]); + goto end_destroy; + } + + ret = sample_init_pre_load(skel, opt->iface_in.ifname); + if (ret < 0) { + pr_warn("Failed to sample_init_pre_load: %s\n", strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_destroy; + } + + if (bpf_map__set_max_entries(skel->maps.cpu_map, n_cpus) < 0) { + pr_warn("Failed to set max entries for cpu_map map: %s", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end_destroy; + } + + if (bpf_map__set_max_entries(skel->maps.cpus_available, n_cpus) < 0) { + pr_warn("Failed to set max entries for cpus_available map: %s", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end_destroy; + } + + ret = EXIT_FAIL_OPTION; + + skel->rodata->from_match[0] = opt->iface_in.ifindex; + if (opt->redir_iface.ifindex) + skel->rodata->to_match[0] = opt->redir_iface.ifindex; + + opts.obj = skel->obj; + opts.prog_name = bpf_program__name(prog); + xdp_prog = xdp_program__create(&opts); + if (!xdp_prog) { + ret = -errno; + pr_warn("Couldn't open XDP program: %s\n", + strerror(-ret)); + goto end_destroy; + } + + ret = xdp_program__attach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0); + if (ret < 0) { + pr_warn("Failed to attach XDP program: %s\n", + strerror(-ret)); + goto end_destroy; + } + + ret = bpf_obj_get_info_by_fd(bpf_map__fd(skel->maps.cpu_map), &info, &infosz); + if (ret < 0) { + pr_warn("Failed bpf_obj_get_info_by_fd for cpumap: %s\n", + strerror(errno)); + goto end_detach; + } + + skel->bss->cpumap_map_id = info.id; + + map_fd = bpf_map__fd(skel->maps.cpu_map); + avail_fd = bpf_map__fd(skel->maps.cpus_available); + count_fd = bpf_map__fd(skel->maps.cpus_count); + + ret = mark_cpus_unavailable(); + if (ret < 0) { + pr_warn("Unable to mark CPUs as unavailable\n"); + goto end_detach; + } + + ret = sample_init(skel, mask, opt->iface_in.ifindex, 0); + if (ret < 0) { + pr_warn("Failed to initialize sample: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_detach; + } + + fd = set_cpumap_prog(skel, opt->remote_action, &opt->redir_iface); + if (fd < 0) { + ret = EXIT_FAIL_BPF; + goto end_detach; + } + value.qsize = opt->qsize; + value.bpf_prog.fd = fd; + + for (i = 0; i < opt->cpus.num_vals; i++) { + if (create_cpu_entry(opt->cpus.vals[i], &value, i, true) < 0) { + pr_warn("Cannot proceed, exiting\n"); + ret = EXIT_FAIL; + goto end_detach; + } + } + + ret = sample_run(opt->interval, opt->stress_mode ? stress_cpumap : NULL, &value); + if (ret < 0) { + pr_warn("Failed during sample run: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_detach; + } + ret = EXIT_OK; +end_detach: + xdp_program__detach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0); +end_destroy: + xdp_program__close(xdp_prog); + xdp_redirect_cpumap__destroy(skel); +end: + sample_teardown(); + return ret; +} diff --git a/xdp-bench/xdp_redirect_devmap.bpf.c b/xdp-bench/xdp_redirect_devmap.bpf.c new file mode 100644 index 0000000..0212e82 --- /dev/null +++ b/xdp-bench/xdp_redirect_devmap.bpf.c @@ -0,0 +1,88 @@ +/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <bpf/vmlinux.h> +#include <xdp/xdp_sample_shared.h> +#include <xdp/xdp_sample.bpf.h> +#include <xdp/xdp_sample_common.bpf.h> +#include <xdp/parsing_helpers.h> + +/* The 2nd xdp prog on egress does not support skb mode, so we define two + * maps, tx_port_general and tx_port_native. + */ +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(max_entries, 1); +} tx_port_general SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(struct bpf_devmap_val)); + __uint(max_entries, 1); +} tx_port_native SEC(".maps"); + +/* store egress interface mac address */ +const volatile char tx_mac_addr[ETH_ALEN]; + +static __always_inline int xdp_redirect_devmap(struct xdp_md *ctx, void *redirect_map) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 key = bpf_get_smp_processor_id(); + struct ethhdr *eth = data; + struct datarec *rec; + __u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + swap_src_dst_mac(data); + return bpf_redirect_map(redirect_map, 0, 0); +} + +SEC("xdp") +int redir_devmap_general(struct xdp_md *ctx) +{ + return xdp_redirect_devmap(ctx, &tx_port_general); +} + +SEC("xdp") +int redir_devmap_native(struct xdp_md *ctx) +{ + return xdp_redirect_devmap(ctx, &tx_port_native); +} + +SEC("xdp/devmap") +int xdp_redirect_devmap_egress(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + __u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN); + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdp-bench/xdp_redirect_devmap.c b/xdp-bench/xdp_redirect_devmap.c new file mode 100644 index 0000000..eca35f5 --- /dev/null +++ b/xdp-bench/xdp_redirect_devmap.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io + */ +#include <errno.h> +#include <stdio.h> +#include <assert.h> +#include <getopt.h> +#include <libgen.h> +#include <net/if.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <bpf/bpf.h> +#include <stdbool.h> +#include <linux/bpf.h> +#include <bpf/libbpf.h> +#include <xdp/libxdp.h> +#include <linux/if_link.h> + +#include "logging.h" + +#include "xdp-bench.h" +#include "xdp_sample.h" +#include "xdp_redirect_devmap.skel.h" + +static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | + SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI; + +DEFINE_SAMPLE_INIT(xdp_redirect_devmap); + +const struct devmap_opts defaults_redirect_devmap = { .mode = XDP_MODE_NATIVE, + .interval = 2 }; + +int do_redirect_devmap(const void *cfg, __unused const char *pin_root_path) +{ + const struct devmap_opts *opt = cfg; + + struct xdp_program *xdp_prog = NULL, *dummy_prog = NULL; + const char *prog_name = "redir_devmap_native"; + DECLARE_LIBBPF_OPTS(xdp_program_opts, opts); + struct bpf_devmap_val devmap_val = {}; + struct bpf_map *tx_port_map = NULL; + struct xdp_redirect_devmap *skel; + struct bpf_program *prog = NULL; + char str[2 * IF_NAMESIZE + 1]; + int ret = EXIT_FAIL_OPTION; + bool tried = false; + int key = 0; + + if (opt->extended) + sample_switch_mode(); + + if (opt->mode == XDP_MODE_SKB) + /* devmap_xmit tracepoint not available */ + mask &= ~(SAMPLE_DEVMAP_XMIT_CNT | + SAMPLE_DEVMAP_XMIT_CNT_MULTI); + + if (opt->stats) + mask |= SAMPLE_REDIRECT_CNT; + +restart: + skel = xdp_redirect_devmap__open(); + if (!skel) { + pr_warn("Failed to xdp_redirect_devmap__open: %s\n", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end; + } + + /* Make sure we only load the one XDP program we are interested in */ + while ((prog = bpf_object__next_program(skel->obj, prog)) != NULL) + if (bpf_program__type(prog) == BPF_PROG_TYPE_XDP && + bpf_program__expected_attach_type(prog) == BPF_XDP) + bpf_program__set_autoload(prog, false); + + if (tried) { + tx_port_map = skel->maps.tx_port_general; + bpf_program__set_autoload(skel->progs.xdp_redirect_devmap_egress, false); +#ifdef HAVE_LIBBPF_BPF_MAP__SET_AUTOCREATE + bpf_map__set_autocreate(skel->maps.tx_port_native, false); +#else + pr_warn("Libbpf is missing bpf_map__set_autocreate(), fallback won't work\n"); + ret = EXIT_FAIL_BPF; + goto end_destroy; +#endif + } else { +#ifdef HAVE_LIBBPF_BPF_MAP__SET_AUTOCREATE + bpf_map__set_autocreate(skel->maps.tx_port_general, false); +#endif + tx_port_map = skel->maps.tx_port_native; + } + + ret = sample_init_pre_load(skel, opt->iface_in.ifname); + if (ret < 0) { + pr_warn("Failed to sample_init_pre_load: %s\n", strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_destroy; + } + + /* Load 2nd xdp prog on egress. */ + if (opt->load_egress) { + ret = get_mac_addr(opt->iface_out.ifindex, skel->rodata->tx_mac_addr); + if (ret < 0) { + pr_warn("Failed to get interface %s mac address: %s\n", + opt->iface_out.ifname, strerror(-ret)); + ret = EXIT_FAIL; + goto end_destroy; + } + } + + skel->rodata->from_match[0] = opt->iface_in.ifindex; + skel->rodata->to_match[0] = opt->iface_out.ifindex; + + opts.obj = skel->obj; + opts.prog_name = prog_name; + xdp_prog = xdp_program__create(&opts); + if (!xdp_prog) { + ret = -errno; + pr_warn("Couldn't open XDP program: %s\n", + strerror(-ret)); + goto end_destroy; + } + + ret = xdp_program__attach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0); + if (ret < 0) { + /* First try with struct bpf_devmap_val as value for generic + * mode, then fallback to sizeof(int) for older kernels. + */ + if (!opt->load_egress && !tried) { + pr_warn("Attempting fallback to int-sized devmap\n"); + prog_name = "redir_devmap_general"; + tried = true; + + xdp_program__close(xdp_prog); + xdp_redirect_devmap__destroy(skel); + sample_teardown(); + xdp_prog = NULL; + goto restart; + } + pr_warn("Failed to attach XDP program: %s\n", + strerror(-ret)); + ret = EXIT_FAIL_XDP; + goto end_destroy; + } + + ret = sample_init(skel, mask, opt->iface_in.ifindex, opt->iface_out.ifindex); + if (ret < 0) { + pr_warn("Failed to initialize sample: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_detach; + } + + opts.obj = NULL; + opts.prog_name = "xdp_pass"; + opts.find_filename = "xdp-dispatcher.o"; + dummy_prog = xdp_program__create(&opts); + if (!dummy_prog) { + pr_warn("Failed to load dummy program: %s\n", strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end_detach; + } + + ret = xdp_program__attach(dummy_prog, opt->iface_out.ifindex, opt->mode, 0); + if (ret < 0) { + pr_warn("Failed to attach dummy program: %s\n", strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_detach; + } + + devmap_val.ifindex = opt->iface_out.ifindex; + if (opt->load_egress) + devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_devmap_egress); + ret = bpf_map_update_elem(bpf_map__fd(tx_port_map), &key, &devmap_val, 0); + if (ret < 0) { + pr_warn("Failed to update devmap value: %s\n", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end_detach; + } + + ret = EXIT_FAIL; + + safe_strncpy(str, get_driver_name(opt->iface_in.ifindex), sizeof(str)); + pr_info("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n", + opt->iface_in.ifname, opt->iface_in.ifindex, str, + opt->iface_out.ifname, opt->iface_out.ifindex, get_driver_name(opt->iface_out.ifindex)); + + ret = sample_run(opt->interval, NULL, NULL); + if (ret < 0) { + pr_warn("Failed during sample run: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_destroy; + } + ret = EXIT_OK; +end_detach: + if (dummy_prog) + xdp_program__detach(dummy_prog, opt->iface_out.ifindex, opt->mode, 0); + xdp_program__detach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0); +end_destroy: + xdp_program__close(xdp_prog); + xdp_program__close(dummy_prog); + xdp_redirect_devmap__destroy(skel); +end: + sample_teardown(); + return ret; +} diff --git a/xdp-bench/xdp_redirect_devmap_multi.bpf.c b/xdp-bench/xdp_redirect_devmap_multi.bpf.c new file mode 100644 index 0000000..3e69783 --- /dev/null +++ b/xdp-bench/xdp_redirect_devmap_multi.bpf.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <bpf/vmlinux.h> +#include <xdp/xdp_sample_shared.h> +#include <xdp/xdp_sample.bpf.h> +#include <xdp/xdp_sample_common.bpf.h> +#include <xdp/parsing_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(max_entries, 32); +} forward_map_general SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(struct bpf_devmap_val)); + __uint(max_entries, 32); +} forward_map_native SEC(".maps"); + +/* map to store egress interfaces mac addresses */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, __be64); + __uint(max_entries, 32); +} mac_map SEC(".maps"); + +static int xdp_redirect_devmap_multi(struct xdp_md *ctx, void *forward_map) +{ + __u32 key = bpf_get_smp_processor_id(); + struct datarec *rec; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_PASS; + NO_TEAR_INC(rec->processed); + + return bpf_redirect_map(forward_map, 0, + BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); +} + +SEC("xdp") +int redir_multi_general(struct xdp_md *ctx) +{ + return xdp_redirect_devmap_multi(ctx, &forward_map_general); +} + +SEC("xdp") +int redir_multi_native(struct xdp_md *ctx) +{ + return xdp_redirect_devmap_multi(ctx, &forward_map_native); +} + +SEC("xdp/devmap") +int xdp_devmap_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 key = ctx->egress_ifindex; + struct ethhdr *eth = data; + __be64 *mac; + __u64 nh_off; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return XDP_DROP; + + mac = bpf_map_lookup_elem(&mac_map, &key); + if (mac) + __builtin_memcpy(eth->h_source, mac, ETH_ALEN); + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/xdp-bench/xdp_redirect_devmap_multi.c b/xdp-bench/xdp_redirect_devmap_multi.c new file mode 100644 index 0000000..f3e9b3c --- /dev/null +++ b/xdp-bench/xdp_redirect_devmap_multi.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <stdio.h> +#include <assert.h> +#include <getopt.h> +#include <libgen.h> +#include <net/if.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <bpf/bpf.h> +#include <linux/bpf.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <bpf/libbpf.h> +#include <netinet/in.h> +#include <sys/socket.h> +#include <sys/resource.h> +#include <linux/if_link.h> +#include <linux/if_ether.h> +#include <xdp/libxdp.h> + +#include "logging.h" + +#include "xdp_sample.h" +#include "xdp-bench.h" +#include "xdp_redirect_devmap_multi.skel.h" + +static int ifaces[MAX_IFACE_NUM] = {}; + +static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | + SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT | + SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_SKIP_HEADING; + +DEFINE_SAMPLE_INIT(xdp_redirect_devmap_multi); + +static int update_mac_map(struct bpf_map *map) +{ + int mac_map_fd = bpf_map__fd(map); + unsigned char mac_addr[6]; + unsigned int ifindex; + int i, ret = -1; + + for (i = 0; ifaces[i] > 0; i++) { + ifindex = ifaces[i]; + + ret = get_mac_addr(ifindex, mac_addr); + if (ret < 0) { + pr_warn("get interface %d mac failed\n", + ifindex); + return ret; + } + + ret = bpf_map_update_elem(mac_map_fd, &ifindex, mac_addr, 0); + if (ret < 0) { + pr_warn("Failed to update mac address for ifindex %d\n", + ifindex); + return ret; + } + } + + return 0; +} + +const struct devmap_multi_opts defaults_redirect_devmap_multi = { .mode = XDP_MODE_NATIVE, + .interval = 2 }; + + +int do_redirect_devmap_multi(const void *cfg, __unused const char *pin_root_path) +{ + const struct devmap_multi_opts *opt = cfg; + + const char *prog_name = "redir_multi_native"; + DECLARE_LIBBPF_OPTS(xdp_program_opts, opts); + struct xdp_redirect_devmap_multi *skel; + struct bpf_devmap_val devmap_val = {}; + struct xdp_program *xdp_prog = NULL; + struct bpf_map *forward_map = NULL; + bool first = true, tried = false; + struct bpf_program *prog = NULL; + int ret = EXIT_FAIL_OPTION; + struct iface *iface; + int i; + + if (opt->extended) + sample_switch_mode(); + + if (opt->mode == XDP_MODE_SKB) + /* devmap_xmit tracepoint not available */ + mask &= ~(SAMPLE_DEVMAP_XMIT_CNT | + SAMPLE_DEVMAP_XMIT_CNT_MULTI); + + if (opt->stats) + mask |= SAMPLE_REDIRECT_CNT; + +restart: + skel = xdp_redirect_devmap_multi__open(); + if (!skel) { + pr_warn("Failed to xdp_redirect_devmap_multi__open: %s\n", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end; + } + + /* Make sure we only load the one XDP program we are interested in */ + while ((prog = bpf_object__next_program(skel->obj, prog)) != NULL) + if (bpf_program__type(prog) == BPF_PROG_TYPE_XDP && + bpf_program__expected_attach_type(prog) == BPF_XDP) + bpf_program__set_autoload(prog, false); + + if (tried) { + forward_map = skel->maps.forward_map_general; + bpf_program__set_autoload(skel->progs.xdp_devmap_prog, false); +#ifdef HAVE_LIBBPF_BPF_MAP__SET_AUTOCREATE + bpf_map__set_autocreate(skel->maps.forward_map_native, false); +#else + pr_warn("Libbpf is missing bpf_map__set_autocreate(), fallback won't work\n"); + ret = EXIT_FAIL_BPF; + goto end_destroy; +#endif + } else { +#ifdef HAVE_LIBBPF_BPF_MAP__SET_AUTOCREATE + bpf_map__set_autocreate(skel->maps.forward_map_general, false); +#endif + forward_map = skel->maps.forward_map_native; + } + + ret = sample_init_pre_load(skel, NULL); + if (ret < 0) { + pr_warn("Failed to sample_init_pre_load: %s\n", strerror(-ret)); + ret = EXIT_FAIL_BPF; + goto end_destroy; + } + + ret = EXIT_FAIL_OPTION; + /* opt parsing enforces num <= MAX_IFACES_NUM */ + for (i = 0, iface = opt->ifaces; iface; i++, iface = iface->next) { + skel->rodata->from_match[i] = iface->ifindex; + skel->rodata->to_match[i] = iface->ifindex; + } + + + opts.obj = skel->obj; + opts.prog_name = prog_name; + xdp_prog = xdp_program__create(&opts); + if (!xdp_prog) { + ret = -errno; + pr_warn("Couldn't open XDP program: %s\n", + strerror(-ret)); + goto end_destroy; + } + + for (iface = opt->ifaces; iface; iface = iface->next) { + pr_debug("Loading program on interface %s\n", iface->ifname); + + ret = xdp_program__attach(xdp_prog, iface->ifindex, opt->mode, 0); + if (ret) { + if (first) { + if (!opt->load_egress && !tried) { + pr_warn("Attempting fallback to int-sized devmap\n"); + prog_name = "redir_multi_general"; + tried = true; + + xdp_program__close(xdp_prog); + xdp_redirect_devmap_multi__destroy(skel); + sample_teardown(); + xdp_prog = NULL; + goto restart; + } + pr_warn("Failed to attach XDP program to iface %s: %s\n", + iface->ifname, strerror(-ret)); + goto end_destroy; + } + pr_warn("Failed to attach XDP program to iface %s: %s\n", + iface->ifname, strerror(-ret)); + goto end_detach; + } + + /* Add all the interfaces to forward group and attach + * egress devmap program if exist + */ + devmap_val.ifindex = iface->ifindex; + if (opt->load_egress) + devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_devmap_prog); + ret = bpf_map_update_elem(bpf_map__fd(forward_map), &iface->ifindex, &devmap_val, 0); + if (ret < 0) { + pr_warn("Failed to update devmap value: %s\n", + strerror(errno)); + ret = EXIT_FAIL_BPF; + goto end_detach; + } + + first = false; + } + + if (opt->load_egress) { + /* Update mac_map with all egress interfaces' mac addr */ + if (update_mac_map(skel->maps.mac_map) < 0) { + pr_warn("Updating mac address failed\n"); + ret = EXIT_FAIL; + goto end_detach; + } + } + + ret = sample_init(skel, mask, 0, 0); + if (ret < 0) { + pr_warn("Failed to initialize sample: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_detach; + } + + ret = sample_run(opt->interval, NULL, NULL); + if (ret < 0) { + pr_warn("Failed during sample run: %s\n", strerror(-ret)); + ret = EXIT_FAIL; + goto end_detach; + } + ret = EXIT_OK; +end_detach: + for (iface = opt->ifaces; iface; iface = iface->next) + xdp_program__detach(xdp_prog, iface->ifindex, opt->mode, 0); +end_destroy: + xdp_program__close(xdp_prog); + xdp_redirect_devmap_multi__destroy(skel); +end: + sample_teardown(); + return ret; +} |