diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 07:10:00 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 07:10:00 +0000 |
commit | 4ba2b326284765e942044db13a7f0dae702bec93 (patch) | |
tree | cbdfaec33eed4f3a970c54cd10e8ddfe3003b3b1 /xdp-trafficgen | |
parent | Initial commit. (diff) | |
download | xdp-tools-4ba2b326284765e942044db13a7f0dae702bec93.tar.xz xdp-tools-4ba2b326284765e942044db13a7f0dae702bec93.zip |
Adding upstream version 1.3.1.upstream/1.3.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | xdp-trafficgen/.gitignore | 1 | ||||
-rw-r--r-- | xdp-trafficgen/Makefile | 19 | ||||
-rw-r--r-- | xdp-trafficgen/README.org | 157 | ||||
-rw-r--r-- | xdp-trafficgen/tests/test-xdp-trafficgen.sh | 42 | ||||
-rw-r--r-- | xdp-trafficgen/xdp-trafficgen.8 | 188 | ||||
-rw-r--r-- | xdp-trafficgen/xdp-trafficgen.c | 941 | ||||
-rw-r--r-- | xdp-trafficgen/xdp-trafficgen.h | 51 | ||||
-rw-r--r-- | xdp-trafficgen/xdp_trafficgen.bpf.c | 348 |
8 files changed, 1747 insertions, 0 deletions
diff --git a/xdp-trafficgen/.gitignore b/xdp-trafficgen/.gitignore new file mode 100644 index 0000000..65f81c2 --- /dev/null +++ b/xdp-trafficgen/.gitignore @@ -0,0 +1 @@ +xdp-trafficgen diff --git a/xdp-trafficgen/Makefile b/xdp-trafficgen/Makefile new file mode 100644 index 0000000..4a6892e --- /dev/null +++ b/xdp-trafficgen/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) + +XDP_TARGETS := xdp_trafficgen.bpf +BPF_SKEL_TARGETS := $(XDP_TARGETS) + +# Don't install skeleton object files +XDP_OBJ_INSTALL := + +TOOL_NAME := xdp-trafficgen +USER_TARGETS := xdp-trafficgen +MAN_PAGE := xdp-trafficgen.8 +EXTRA_DEPS := xdp-trafficgen.h +USER_LIBS = -lpthread +TEST_FILE := tests/test-xdp-trafficgen.sh + +LIB_DIR = ../lib + +include $(LIB_DIR)/common.mk + diff --git a/xdp-trafficgen/README.org b/xdp-trafficgen/README.org new file mode 100644 index 0000000..6d7c20f --- /dev/null +++ b/xdp-trafficgen/README.org @@ -0,0 +1,157 @@ +#+EXPORT_FILE_NAME: xdp-trafficgen +#+TITLE: xdp-trafficgen +#+OPTIONS: ^:nil +#+MAN_CLASS_OPTIONS: :section-id "8\" \"DATE\" \"VERSION\" \"An XDP-based traffic generator" +# This file serves both as a README on github, and as the source for the man +# page; the latter through the org-mode man page export support. +# . +# To export the man page, simply use the org-mode exporter; (require 'ox-man) if +# it's not available. There's also a Makefile rule to export it. + +* XDP-trafficgen - an XDP-based traffic generator + +XDP-trafficgen is a packet generator utilising the XDP kernel subsystem to +generate packets transmit them through a network interface. Packets are +dynamically generated and transmitted in the kernel, allowing for high +performance (millions of packets per second per core). + +XDP-trafficgen supports generating UDP traffic with fixed or dynamic destination +ports, and also has basic support for generating dummy TCP traffic on a single +flow. + +** Running xdp-traffigen +The syntax for running xdp-bench is: + +#+begin_src sh +Usage: xdp-trafficgen COMMAND [options] + +COMMAND can be one of: + udp - run in UDP mode + tcp - run in TCP mode +#+end_src + +Each command, and its options are explained below. Or use =xdp-trafficgen COMMAND +--help= to see the options for each command. + +* The UDP command +The UDP command generates UDP traffic to a given destination IP and either a +fixed destination port, or a range of port numbers. Only IPv6 traffic is +supported, and the generated packets will have their IP hop limit set to 1, so +they can't be routed. + +The syntax for the =udp= command is: + +=xdp-trafficgen udp [options] <ifname>= + +Where =<ifname>= is the name of the destination interface that packets will be +transmitted on. Note that the network driver of this network interface must +support being the target of XDP redirects (it must implement the =ndo_xdp_xmit= +driver operation). + +The supported options are: + +** -m, --dst-mac <mac addr> +Set the destination MAC address of generated packets. The default is to generate +packets with an all-zero destination MAC. + +** -M, --src-mac <mac addr> +Set the source MAC address of the generated packets. The default is to use the +MAC address of the interface packets are transmitted on. + +** -a, --dst-addr <addr> +Destination IP address of generated packets. The default is the link-local +=fe80::2= address. + +** -A, --src-addr <addr> +Source IP address of generated packets. The default is the link-local =fe80::1= +address. + +** -p, --dst-port <port> +Destination UDP port of generated packets, or the first port in the range if +running with =--dyn-ports= set. Defaults to 1. + +** -P, --src-port <port> +Source UDP port of generated packets. Defaults to 1. + +** -d, --dyn-ports <num ports> +Enable dynamic port mode where the destination port is varied over a range of +=<num ports>= starting from the =--dst-port=. + +** -n, --num-packets <port> +Number of packets to send before exiting. If not supplied, =xdp-trafficgen= will +keep sending packets until interrupted. + +** -t, --threads <threads> +Number of simultaneous threads to transmit from. Each thread will be pinned to a +separate CPU core if possible. Defaults to 1. + +** -I, --interval <s> +Output transmission statistics with this interval (in seconds). + +** -v, --verbose +Enable verbose logging (-vv: more verbose). + +** --version +Display version information and exit. + +** -h, --help +Display a summary of the available options + + +* The TCP command +The TCP command generates dummy TCP traffic in a single TCP flow. This relies on +first installing an ingress XDP program on the interface used to transmit on. +Then, a regular TCP socket connection is established from userspace, and once +the handshake is completed, the XDP program will take over and start generating +traffic on that flow tuple. The ingress XDP program will intercept ACK packets +from the receiver, and keep track of the receive window. + +The traffic generator has no congestion control, and only very basic retransmit +tracking: in essence, any duplicate ACKs from the receiver will cause the sender +to reset its send sequence number to the last ACKed value and restart from +there. The same thing happens if no progress on the window is made within two +seconds. This means that the traffic generator can generate a large amount of +dummy traffic, but if there's packet loss a lot of this can be retransmissions. + +The syntax for the =tcp= command is: + +=xdp-trafficgen tcp [options] -i <ifname> <hostname>= + +Where =<ifname>= is the name of the destination interface that packets will be +transmitted on and =<hostname>= is the peer hostname or IP address to connect to +(only IPv6 is supported). Note that the network driver of this network interface +must support being the target of XDP redirects (it must implement the +=ndo_xdp_xmit= driver operation). + +The supported options are: + +** -p, --dst-port <port> +Connect to destination <port>. Default 10000. + +** -m, --mode <mode> +Load ingress XDP program in <mode>; default native (valid values: native,skb,hw) + +** -n, --num-packets <port> +Number of packets to send before exiting. If not supplied, =xdp-trafficgen= will +keep sending packets until interrupted. + +** -I, --interval <s> +Output transmission statistics with this interval (in seconds). + +** -v, --verbose +Enable verbose logging (-vv: more verbose). + +** --version +Display version information and exit. + +** -h, --help +Display a summary of the available options + + +* BUGS + +Please report any bugs on Github: https://github.com/xdp-project/xdp-tools/issues + +* AUTHOR + +xdp-trafficgen and this man page were written by Toke Høiland-Jørgensen. diff --git a/xdp-trafficgen/tests/test-xdp-trafficgen.sh b/xdp-trafficgen/tests/test-xdp-trafficgen.sh new file mode 100644 index 0000000..a42032d --- /dev/null +++ b/xdp-trafficgen/tests/test-xdp-trafficgen.sh @@ -0,0 +1,42 @@ +XDP_LOADER=${XDP_LOADER:-./xdp-loader} +XDP_TRAFFICGEN=${XDP_TRAFFICGEN:-./xdp-trafficgen} +ALL_TESTS="test_udp test_tcp" + +PIDS="" + +skip_if_missing_kernel_support() +{ + $XDP_TRAFFICGEN probe + ret=$? + if [ "$ret" -eq "161" ]; then + exit $SKIPPED_TEST + elif [ "$ret" -ne "0" ]; then + exit 1 + fi +} + +test_udp() +{ + skip_if_missing_kernel_support + export XDP_SAMPLE_IMMEDIATE_EXIT=1 + + check_run $XDP_TRAFFICGEN udp $NS -n 1 +} + +test_tcp() +{ + skip_if_missing_kernel_support + export XDP_SAMPLE_IMMEDIATE_EXIT=1 + + PID=$(start_background_ns_devnull "nc -6 -l 10000") + $XDP_TRAFFICGEN tcp -i $NS $INSIDE_IP6 -n 1 + res=$? + stop_background $PID + return $res +} + +cleanup_tests() +{ + $XDP_LOADER unload $NS --all >/dev/null 2>&1 + $XDP_LOADER clean >/dev/null 2>&1 +} diff --git a/xdp-trafficgen/xdp-trafficgen.8 b/xdp-trafficgen/xdp-trafficgen.8 new file mode 100644 index 0000000..3bef2b6 --- /dev/null +++ b/xdp-trafficgen/xdp-trafficgen.8 @@ -0,0 +1,188 @@ +.TH "xdp-trafficgen" "8" "FEBRUARY 4, 2023" "V1.3.1" "An XDP-based traffic generator" + +.SH "NAME" +XDP-trafficgen \- an XDP-based traffic generator +.SH "SYNOPSIS" +.PP +XDP-trafficgen is a packet generator utilising the XDP kernel subsystem to +generate packets transmit them through a network interface. Packets are +dynamically generated and transmitted in the kernel, allowing for high +performance (millions of packets per second per core). + +.PP +XDP-trafficgen supports generating UDP traffic with fixed or dynamic destination +ports, and also has basic support for generating dummy TCP traffic on a single +flow. + +.SS "Running xdp-traffigen" +.PP +The syntax for running xdp-bench is: + +.RS +.nf +\fCUsage: xdp-trafficgen COMMAND [options] + +COMMAND can be one of: + udp - run in UDP mode + tcp - run in TCP mode +\fP +.fi +.RE + +.PP +Each command, and its options are explained below. Or use \fIxdp\-trafficgen COMMAND +\-\-help\fP to see the options for each command. + +.SH "The UDP command" +.PP +The UDP command generates UDP traffic to a given destination IP and either a +fixed destination port, or a range of port numbers. Only IPv6 traffic is +supported, and the generated packets will have their IP hop limit set to 1, so +they can't be routed. + +.PP +The syntax for the \fIudp\fP command is: + +.PP +\fIxdp\-trafficgen udp [options] <ifname>\fP + +.PP +Where \fI<ifname>\fP is the name of the destination interface that packets will be +transmitted on. Note that the network driver of this network interface must +support being the target of XDP redirects (it must implement the \fIndo_xdp_xmit\fP +driver operation). + +.PP +The supported options are: + +.SS "-m, --dst-mac <mac addr>" +.PP +Set the destination MAC address of generated packets. The default is to generate +packets with an all-zero destination MAC. + +.SS "-M, --src-mac <mac addr>" +.PP +Set the source MAC address of the generated packets. The default is to use the +MAC address of the interface packets are transmitted on. + +.SS "-a, --dst-addr <addr>" +.PP +Destination IP address of generated packets. The default is the link-local +\fIfe80::2\fP address. + +.SS "-A, --src-addr <addr>" +.PP +Source IP address of generated packets. The default is the link-local \fIfe80::1\fP +address. + +.SS "-p, --dst-port <port>" +.PP +Destination UDP port of generated packets, or the first port in the range if +running with \fI\-\-dyn\-ports\fP set. Defaults to 1. + +.SS "-P, --src-port <port>" +.PP +Source UDP port of generated packets. Defaults to 1. + +.SS "-d, --dyn-ports <num ports>" +.PP +Enable dynamic port mode where the destination port is varied over a range of +\fI<num ports>\fP starting from the \fI\-\-dst\-port\fP. + +.SS "-n, --num-packets <port>" +.PP +Number of packets to send before exiting. If not supplied, \fIxdp\-trafficgen\fP will +keep sending packets until interrupted. + +.SS "-t, --threads <threads>" +.PP +Number of simultaneous threads to transmit from. Each thread will be pinned to a +separate CPU core if possible. Defaults to 1. + +.SS "-I, --interval <s>" +.PP +Output transmission statistics with this interval (in seconds). + +.SS "-v, --verbose" +.PP +Enable verbose logging (-vv: more verbose). + +.SS "--version" +.PP +Display version information and exit. + +.SS "-h, --help" +.PP +Display a summary of the available options + + +.SH "The TCP command" +.PP +The TCP command generates dummy TCP traffic in a single TCP flow. This relies on +first installing an ingress XDP program on the interface used to transmit on. +Then, a regular TCP socket connection is established from userspace, and once +the handshake is completed, the XDP program will take over and start generating +traffic on that flow tuple. The ingress XDP program will intercept ACK packets +from the receiver, and keep track of the receive window. + +.PP +The traffic generator has no congestion control, and only very basic retransmit +tracking: in essence, any duplicate ACKs from the receiver will cause the sender +to reset its send sequence number to the last ACKed value and restart from +there. The same thing happens if no progress on the window is made within two +seconds. This means that the traffic generator can generate a large amount of +dummy traffic, but if there's packet loss a lot of this can be retransmissions. + +.PP +The syntax for the \fItcp\fP command is: + +.PP +\fIxdp\-trafficgen tcp [options] \-i <ifname> <hostname>\fP + +.PP +Where \fI<ifname>\fP is the name of the destination interface that packets will be +transmitted on and \fI<hostname>\fP is the peer hostname or IP address to connect to +(only IPv6 is supported). Note that the network driver of this network interface +must support being the target of XDP redirects (it must implement the +\fIndo_xdp_xmit\fP driver operation). + +.PP +The supported options are: + +.SS "-p, --dst-port <port>" +.PP +Connect to destination <port>. Default 10000. + +.SS "-m, --mode <mode>" +.PP +Load ingress XDP program in <mode>; default native (valid values: native,skb,hw) + +.SS "-n, --num-packets <port>" +.PP +Number of packets to send before exiting. If not supplied, \fIxdp\-trafficgen\fP will +keep sending packets until interrupted. + +.SS "-I, --interval <s>" +.PP +Output transmission statistics with this interval (in seconds). + +.SS "-v, --verbose" +.PP +Enable verbose logging (-vv: more verbose). + +.SS "--version" +.PP +Display version information and exit. + +.SS "-h, --help" +.PP +Display a summary of the available options + + +.SH "BUGS" +.PP +Please report any bugs on Github: \fIhttps://github.com/xdp-project/xdp-tools/issues\fP + +.SH "AUTHOR" +.PP +xdp-trafficgen and this man page were written by Toke Høiland-Jørgensen. diff --git a/xdp-trafficgen/xdp-trafficgen.c b/xdp-trafficgen/xdp-trafficgen.c new file mode 100644 index 0000000..293e416 --- /dev/null +++ b/xdp-trafficgen/xdp-trafficgen.c @@ -0,0 +1,941 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <errno.h> +#include <stdbool.h> +#include <unistd.h> +#include <fcntl.h> +#include <pthread.h> +#include <sched.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netdb.h> + +#include <bpf/bpf.h> +#include <bpf/bpf_endian.h> +#include <bpf/libbpf.h> +#include <xdp/libxdp.h> +#include <xdp/xdp_stats_kern_user.h> +#include <linux/bpf.h> +#include <linux/err.h> +#include <linux/if_link.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <linux/in6.h> +#include <linux/udp.h> +#include <linux/tcp.h> + +#include "params.h" +#include "logging.h" +#include "util.h" +#include "xdp_sample.h" +#include "xdp-trafficgen.h" + +#include "xdp_trafficgen.skel.h" + +#define PROG_NAME "xdp-trafficgen" + +#ifndef BPF_F_TEST_XDP_LIVE_FRAMES +#define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) +#endif + +#define IFINDEX_LO 1 + +static int mask = SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_DROP_OK; + +DEFINE_SAMPLE_INIT(xdp_trafficgen); + +static bool status_exited = false; +static bool runners_exited = false; + +struct udp_packet { + struct ethhdr eth; + struct ipv6hdr iph; + struct udphdr udp; + __u8 payload[64 - sizeof(struct udphdr) + - sizeof(struct ethhdr) - sizeof(struct ipv6hdr)]; +} __attribute__((__packed__)); + +static struct udp_packet pkt_udp = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), + .iph.version = 6, + .iph.nexthdr = IPPROTO_UDP, + .iph.payload_len = bpf_htons(sizeof(struct udp_packet) + - offsetof(struct udp_packet, udp)), + .iph.hop_limit = 1, + .iph.saddr.s6_addr16 = {bpf_htons(0xfe80), 0, 0, 0, 0, 0, 0, bpf_htons(1)}, + .iph.daddr.s6_addr16 = {bpf_htons(0xfe80), 0, 0, 0, 0, 0, 0, bpf_htons(2)}, + .udp.source = bpf_htons(1), + .udp.dest = bpf_htons(1), + .udp.len = bpf_htons(sizeof(struct udp_packet) + - offsetof(struct udp_packet, udp)), +}; + +struct thread_config { + void *pkt; + size_t pkt_size; + __u32 cpu_core_id; + __u32 num_pkts; + __u32 batch_size; + struct xdp_program *prog; +}; + +static int run_prog(const struct thread_config *cfg, bool *status_var) +{ +#ifdef HAVE_LIBBPF_BPF_PROG_TEST_RUN_OPTS + struct xdp_md ctx_in = { + .data_end = cfg->pkt_size, + }; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = cfg->pkt, + .data_size_in = cfg->pkt_size, + .ctx_in = &ctx_in, + .ctx_size_in = sizeof(ctx_in), + .repeat = cfg->num_pkts ?: 1 << 20, + .flags = BPF_F_TEST_XDP_LIVE_FRAMES, + .batch_size = cfg->batch_size, + ); + __u64 iterations = 0; + cpu_set_t cpu_cores; + int err; + + CPU_ZERO(&cpu_cores); + CPU_SET(cfg->cpu_core_id, &cpu_cores); + pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpu_cores); + do { + err = xdp_program__test_run(cfg->prog, &opts, 0); + if (err) + return -errno; + iterations += opts.repeat; + } while (!*status_var && (!cfg->num_pkts || cfg->num_pkts > iterations)); + + return 0; +#else + __unused const void *c = cfg, *s = status_var; + return -EOPNOTSUPP; +#endif +} + +static void *run_traffic(void *arg) +{ + const struct thread_config *cfg = arg; + int err; + + err = run_prog(cfg, &status_exited); + if (err) + pr_warn("Couldn't run trafficgen program: %s\n", strerror(-err)); + + runners_exited = true; + return NULL; +} + +static int probe_kernel_support(void) +{ + DECLARE_LIBXDP_OPTS(xdp_program_opts, opts); + struct xdp_trafficgen *skel; + struct xdp_program *prog; + int data = 0, err; + bool status = 0; + + skel = xdp_trafficgen__open(); + if (!skel) { + err = -errno; + pr_warn("Couldn't open XDP program: %s\n", strerror(-err)); + return err; + } + + err = sample_init_pre_load(skel, "lo"); + if (err < 0) { + pr_warn("Failed to sample_init_pre_load: %s\n", strerror(-err)); + goto out; + } + + opts.obj = skel->obj; + opts.prog_name = "xdp_drop"; + + prog = xdp_program__create(&opts); + if (!prog) { + err = -errno; + pr_warn("Couldn't load XDP program: %s\n", strerror(-err)); + goto out; + } + + const struct thread_config cfg = { + .pkt = &data, + .pkt_size = sizeof(data), + .num_pkts = 1, + .batch_size = 1, + .prog = prog + }; + err = run_prog(&cfg, &status); + if (err == -EOPNOTSUPP) { + pr_warn("BPF_PROG_RUN with batch size support is missing from libbpf.\n"); + } else if (err == -EINVAL) { + err = -EOPNOTSUPP; + pr_warn("Kernel doesn't support live packet mode for XDP BPF_PROG_RUN.\n"); + } else if (err) { + pr_warn("Error probing kernel support: %s\n", strerror(-err)); + } + + xdp_program__close(prog); +out: + xdp_trafficgen__destroy(skel); + return err; +} + +static int create_runners(pthread_t **runner_threads, struct thread_config **thread_configs, int num_threads, + struct thread_config *tcfg, struct xdp_program *prog) +{ + struct thread_config *t; + pthread_t *threads; + int i, err; + + threads = calloc(sizeof(pthread_t), num_threads); + if (!threads) { + pr_warn("Couldn't allocate memory\n"); + return -ENOMEM; + } + + t = calloc(sizeof(struct thread_config), num_threads); + if (!t) { + pr_warn("Couldn't allocate memory\n"); + free(threads); + return -ENOMEM; + } + + for (i = 0; i < num_threads; i++) { + memcpy(&t[i], tcfg, sizeof(*tcfg)); + tcfg->cpu_core_id++; + + t[i].prog = xdp_program__clone(prog, 0); + err = libxdp_get_error(t[i].prog); + if (err) { + pr_warn("Failed to clone xdp_program: %s\n", strerror(-err)); + t[i].prog = NULL; + goto err; + } + + err = pthread_create(&threads[i], NULL, run_traffic, &t[i]); + if (err < 0) { + pr_warn("Failed to create traffic thread: %s\n", strerror(-err)); + goto err; + } + } + + *runner_threads = threads; + *thread_configs = t; + + return 0; + +err: + for (i = 0; i < num_threads; i++) { + pthread_cancel(threads[i]); + xdp_program__close(t[i].prog); + } + free(t); + free(threads); + + return err; +} + + +static __be16 calc_udp_cksum(const struct udp_packet *pkt) +{ + __u32 chksum = pkt->iph.nexthdr + bpf_ntohs(pkt->iph.payload_len); + int i; + + for (i = 0; i < 8; i++) { + chksum += bpf_ntohs(pkt->iph.saddr.s6_addr16[i]); + chksum += bpf_ntohs(pkt->iph.daddr.s6_addr16[i]); + } + chksum += bpf_ntohs(pkt->udp.source); + chksum += bpf_ntohs(pkt->udp.dest); + chksum += bpf_ntohs(pkt->udp.len); + + while (chksum >> 16) + chksum = (chksum & 0xFFFF) + (chksum >> 16); + return bpf_htons(~chksum); +} + +static const struct udpopt { + __u32 num_pkts; + struct iface iface; + struct mac_addr dst_mac; + struct mac_addr src_mac; + struct ip_addr dst_ip; + struct ip_addr src_ip; + __u16 dst_port; + __u16 src_port; + __u16 dyn_ports; + __u16 threads; + __u16 interval; +} defaults_udp = { + .interval = 1, + .threads = 1, +}; + +static int prepare_udp_pkt(const struct udpopt *cfg) +{ + struct mac_addr src_mac = cfg->src_mac; + int err; + + if (macaddr_is_null(&src_mac)) { + err = get_mac_addr(cfg->iface.ifindex, &src_mac); + if (err) + return err; + } + memcpy(pkt_udp.eth.h_source, &src_mac, sizeof(src_mac)); + if (!macaddr_is_null(&cfg->dst_mac)) + memcpy(pkt_udp.eth.h_dest, &cfg->dst_mac, sizeof(cfg->dst_mac)); + + if (!ipaddr_is_null(&cfg->src_ip)) { + if (cfg->src_ip.af != AF_INET6) { + pr_warn("Only IPv6 is supported\n"); + return 1; + } + pkt_udp.iph.saddr = cfg->src_ip.addr.addr6; + } + + if (!ipaddr_is_null(&cfg->dst_ip)) { + if (cfg->dst_ip.af != AF_INET6) { + pr_warn("Only IPv6 is supported\n"); + return 1; + } + pkt_udp.iph.daddr = cfg->dst_ip.addr.addr6; + } + + if (cfg->src_port) + pkt_udp.udp.source = bpf_htons(cfg->src_port); + if (cfg->dst_port) + pkt_udp.udp.dest = bpf_htons(cfg->dst_port); + pkt_udp.udp.check = calc_udp_cksum(&pkt_udp); + return 0; +} + +static struct prog_option udp_options[] = { + DEFINE_OPTION("dst-mac", OPT_MACADDR, struct udpopt, dst_mac, + .short_opt = 'm', + .metavar = "<mac addr>", + .help = "Destination MAC address of generated packets"), + DEFINE_OPTION("src-mac", OPT_MACADDR, struct udpopt, src_mac, + .short_opt = 'M', + .metavar = "<mac addr>", + .help = "Source MAC address of generated packets"), + DEFINE_OPTION("dst-addr", OPT_IPADDR, struct udpopt, dst_ip, + .short_opt = 'a', + .metavar = "<addr>", + .help = "Destination IP address of generated packets"), + DEFINE_OPTION("src-addr", OPT_IPADDR, struct udpopt, src_ip, + .short_opt = 'A', + .metavar = "<addr>", + .help = "Source IP address of generated packets"), + DEFINE_OPTION("dst-port", OPT_U16, struct udpopt, dst_port, + .short_opt = 'p', + .metavar = "<port>", + .help = "Destination port of generated packets"), + DEFINE_OPTION("src-port", OPT_U16, struct udpopt, src_port, + .short_opt = 'P', + .metavar = "<port>", + .help = "Source port of generated packets"), + DEFINE_OPTION("dyn-ports", OPT_U16, struct udpopt, dyn_ports, + .short_opt = 'd', + .metavar = "<num ports>", + .help = "Dynamically vary destination port over a range of <num ports>"), + DEFINE_OPTION("num-packets", OPT_U32, struct udpopt, num_pkts, + .short_opt = 'n', + .metavar = "<port>", + .help = "Number of packets to send"), + DEFINE_OPTION("threads", OPT_U16, struct udpopt, threads, + .short_opt = 't', + .metavar = "<threads>", + .help = "Number of simultaneous threads to transmit from"), + DEFINE_OPTION("interval", OPT_U16, struct udpopt, interval, + .short_opt = 'I', + .metavar = "<s>", + .help = "Output statistics with this interval"), + DEFINE_OPTION("interface", OPT_IFNAME, struct udpopt, iface, + .positional = true, + .metavar = "<ifname>", + .required = true, + .help = "Load on device <ifname>"), + END_OPTIONS +}; + +int do_udp(const void *opt, __unused const char *pin_root_path) +{ + const struct udpopt *cfg = opt; + + DECLARE_LIBXDP_OPTS(xdp_program_opts, opts); + struct thread_config *t = NULL, tcfg = { + .pkt = &pkt_udp, + .pkt_size = sizeof(pkt_udp), + .num_pkts = cfg->num_pkts, + }; + struct trafficgen_state bpf_state = {}; + struct xdp_trafficgen *skel = NULL; + pthread_t *runner_threads = NULL; + struct xdp_program *prog = NULL; + int err = 0, i; + char buf[100]; + __u32 key = 0; + + err = probe_kernel_support(); + if (err) + return err; + + err = prepare_udp_pkt(cfg); + if (err) + goto out; + + skel = xdp_trafficgen__open(); + if (!skel) { + err = -errno; + pr_warn("Couldn't open XDP program: %s\n", strerror(-err)); + goto out; + } + + err = sample_init_pre_load(skel, cfg->iface.ifname); + if (err < 0) { + pr_warn("Failed to sample_init_pre_load: %s\n", strerror(-err)); + goto out; + } + + skel->rodata->config.port_start = cfg->dst_port; + skel->rodata->config.port_range = cfg->dyn_ports; + skel->rodata->config.ifindex_out = cfg->iface.ifindex; + bpf_state.next_port = cfg->dst_port; + + if (cfg->dyn_ports) + opts.prog_name = "xdp_redirect_update_port"; + else + opts.prog_name = "xdp_redirect_notouch"; + opts.obj = skel->obj; + + prog = xdp_program__create(&opts); + if (!prog) { + err = -errno; + libxdp_strerror(err, buf, sizeof(buf)); + pr_warn("Couldn't open BPF file: %s\n", buf); + goto out; + } + + err = xdp_trafficgen__load(skel); + if (err) + goto out; + + err = bpf_map_update_elem(bpf_map__fd(skel->maps.state_map), + &key, &bpf_state, BPF_EXIST); + if (err) { + err = -errno; + pr_warn("Couldn't set initial state map value: %s\n", strerror(-err)); + goto out; + } + + err = sample_init(skel, mask, IFINDEX_LO, cfg->iface.ifindex); + if (err < 0) { + pr_warn("Failed to initialize sample: %s\n", strerror(-err)); + goto out; + } + + err = create_runners(&runner_threads, &t, cfg->threads, &tcfg, prog); + if (err) + goto out; + + pr_info("Transmitting on %s (ifindex %d)\n", + cfg->iface.ifname, cfg->iface.ifindex); + + err = sample_run(cfg->interval, NULL, NULL); + status_exited = true; + + for (i = 0; i < cfg->threads; i++) { + pthread_join(runner_threads[i], NULL); + xdp_program__close(t[i].prog); + } + +out: + xdp_program__close(prog); + xdp_trafficgen__destroy(skel); + free(runner_threads); + free(t); + return err; +} + +struct tcp_packet { + struct ethhdr eth; + struct ipv6hdr iph; + struct tcphdr tcp; + __u8 payload[1500 - sizeof(struct tcphdr) + - sizeof(struct ethhdr) - sizeof(struct ipv6hdr)]; +} __attribute__((__packed__)); + +static __unused struct tcp_packet pkt_tcp = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), + .iph.version = 6, + .iph.nexthdr = IPPROTO_TCP, + .iph.payload_len = bpf_htons(sizeof(struct tcp_packet) + - offsetof(struct tcp_packet, tcp)), + .iph.hop_limit = 64, + .iph.saddr.s6_addr16 = {bpf_htons(0xfe80), 0, 0, 0, 0, 0, 0, bpf_htons(1)}, + .iph.daddr.s6_addr16 = {bpf_htons(0xfe80), 0, 0, 0, 0, 0, 0, bpf_htons(2)}, + .tcp.source = bpf_htons(1), + .tcp.dest = bpf_htons(1), + .tcp.window = bpf_htons(0x100), + .tcp.doff = 5, + .tcp.ack = 1, +}; + +static void hexdump_data(void *data, int size) +{ + unsigned char *ptr = data; + int i; + for (i = 0; i < size; i++) { + if (i % 16 == 0) + pr_debug("\n%06X: ", i); + else if (i % 2 == 0) + pr_debug(" "); + pr_debug("%02X", *ptr++); + } + pr_debug("\n"); +} + +static __be16 calc_tcp_cksum(const struct tcp_packet *pkt) +{ + __u32 chksum = bpf_htons(pkt->iph.nexthdr) + pkt->iph.payload_len; + int payload_len = sizeof(pkt->payload); + struct tcphdr tcph_ = pkt->tcp; + __u16 *ptr = (void *)&tcph_; + int i; + + tcph_.check = 0; + + for (i = 0; i < 8; i++) { + chksum += pkt->iph.saddr.s6_addr16[i]; + chksum += pkt->iph.daddr.s6_addr16[i]; + } + for (i = 0; i < 10; i++) + chksum += *(ptr++); + + ptr = (void *)&pkt->payload; + for (i = 0; i < payload_len / 2; i++) + chksum += *(ptr++); + + if (payload_len % 2) + chksum += (*((__u8 *)ptr)) << 8; + + while (chksum >> 16) + chksum = (chksum & 0xFFFF) + (chksum >> 16); + + return ~chksum; +} + +static void prepare_tcp_pkt(const struct tcp_flowkey *fkey, + const struct tcp_flowstate *fstate) +{ + memcpy(pkt_tcp.eth.h_source, fstate->src_mac, ETH_ALEN); + memcpy(pkt_tcp.eth.h_dest, fstate->dst_mac, ETH_ALEN); + + pkt_tcp.iph.saddr = fkey->src_ip; + pkt_tcp.iph.daddr = fkey->dst_ip; + pkt_tcp.tcp.source = fkey->src_port; + pkt_tcp.tcp.dest = fkey->dst_port; + pkt_tcp.tcp.seq = bpf_htonl(fstate->seq); + pkt_tcp.tcp.ack_seq = bpf_htonl(fstate->rcv_seq); + + pkt_tcp.tcp.check = calc_tcp_cksum(&pkt_tcp); + pr_debug("TCP packet:\n"); + hexdump_data(&pkt_tcp, sizeof(pkt_tcp)); +} + +struct enum_val xdp_modes[] = { + {"native", XDP_MODE_NATIVE}, + {"skb", XDP_MODE_SKB}, + {"hw", XDP_MODE_HW}, + {NULL, 0} +}; + +static const struct tcpopt { + __u32 num_pkts; + struct iface iface; + char *dst_addr; + __u16 dst_port; + __u16 interval; + __u16 timeout; + enum xdp_attach_mode mode; +} defaults_tcp = { + .interval = 1, + .dst_port = 10000, + .timeout = 2, + .mode = XDP_MODE_NATIVE, +}; + +static struct prog_option tcp_options[] = { + DEFINE_OPTION("dst-port", OPT_U16, struct tcpopt, dst_port, + .short_opt = 'p', + .metavar = "<port>", + .help = "Connect to destination <port>. Default 10000"), + DEFINE_OPTION("num-packets", OPT_U32, struct tcpopt, num_pkts, + .short_opt = 'n', + .metavar = "<port>", + .help = "Number of packets to send"), + DEFINE_OPTION("interval", OPT_U16, struct tcpopt, interval, + .short_opt = 'I', + .metavar = "<s>", + .help = "Output statistics with this interval"), + DEFINE_OPTION("timeout", OPT_U16, struct tcpopt, timeout, + .short_opt = 't', + .metavar = "<s>", + .help = "TCP connect timeout (default 2 seconds)."), + DEFINE_OPTION("interface", OPT_IFNAME, struct tcpopt, iface, + .metavar = "<ifname>", + .required = true, + .short_opt = 'i', + .help = "Connect through device <ifname>"), + DEFINE_OPTION("mode", OPT_ENUM, struct tcpopt, mode, + .short_opt = 'm', + .typearg = xdp_modes, + .metavar = "<mode>", + .help = "Load ingress XDP program in <mode>; default native"), + DEFINE_OPTION("dst-addr", OPT_STRING, struct tcpopt, dst_addr, + .positional = true, + .required = true, + .metavar = "<hostname>", + .help = "Destination host of generated stream"), + END_OPTIONS +}; + +int do_tcp(const void *opt, __unused const char *pin_root_path) +{ + const struct tcpopt *cfg = opt; + + struct addrinfo *ai = NULL, hints = { + .ai_family = AF_INET6, + .ai_socktype = SOCK_STREAM, + .ai_protocol = IPPROTO_TCP, + }; + struct ip_addr local_addr = { .af = AF_INET6 }, remote_addr = { .af = AF_INET6 }; + struct bpf_map *state_map = NULL, *fstate_map; + DECLARE_LIBXDP_OPTS(xdp_program_opts, opts, + .prog_name = "xdp_handle_tcp_recv"); + struct xdp_program *ifindex_prog = NULL, *test_prog = NULL; + struct sockaddr_in6 local_saddr = {}, *addr6; + struct thread_config *t = NULL, tcfg = { + .pkt = &pkt_tcp, + .pkt_size = sizeof(pkt_tcp), + .num_pkts = cfg->num_pkts, + }; + struct trafficgen_state bpf_state = {}; + struct xdp_trafficgen *skel = NULL; + char buf_local[50], buf_remote[50]; + pthread_t *runner_threads = NULL; + socklen_t sockaddr_sz, tcpi_sz; + __u16 local_port, remote_port; + int sock = -1, err = -EINVAL; + struct tcp_flowstate fstate; + struct timeval timeout = { + .tv_sec = cfg->timeout, + }; + struct tcp_info tcpi = {}; + bool attached = false; + __u16 num_threads = 1; + __u32 key = 0; + char port[6]; + int i, sopt; + + err = probe_kernel_support(); + if (err) + return err; + + skel = xdp_trafficgen__open(); + if (!skel) { + err = -errno; + pr_warn("Couldn't open XDP program: %s\n", strerror(-err)); + goto out; + } + + err = sample_init_pre_load(skel, cfg->iface.ifname); + if (err < 0) { + pr_warn("Failed to sample_init_pre_load: %s\n", strerror(-err)); + goto out; + } + + opts.obj = skel->obj; + skel->rodata->config.ifindex_out = cfg->iface.ifindex; + + snprintf(port, sizeof(port), "%d", cfg->dst_port); + + err = getaddrinfo(cfg->dst_addr, port, &hints, &ai); + if (err) { + pr_warn("Couldn't resolve hostname: %s\n", gai_strerror(err)); + goto out; + } + + addr6 = (struct sockaddr_in6* )ai->ai_addr; + remote_addr.addr.addr6 = addr6->sin6_addr; + remote_port = bpf_ntohs(addr6->sin6_port); + + bpf_state.flow_key.dst_port = addr6->sin6_port; + bpf_state.flow_key.dst_ip = addr6->sin6_addr; + + print_addr(buf_remote, sizeof(buf_remote), &remote_addr); + + ifindex_prog = xdp_program__create(&opts); + if (!ifindex_prog) { + err = -errno; + pr_warn("Couldn't open XDP program: %s\n", strerror(-err)); + goto out; + } + + opts.prog_name = "xdp_redirect_send_tcp"; + test_prog = xdp_program__create(&opts); + if (!test_prog) { + err = -errno; + pr_warn("Couldn't find test program: %s\n", strerror(-err)); + goto out; + } + + state_map = skel->maps.state_map; + fstate_map = skel->maps.flow_state_map; + + if (!fstate_map) { + pr_warn("Couldn't find BPF maps\n"); + goto out; + } + + err = xdp_program__attach(ifindex_prog, cfg->iface.ifindex, cfg->mode, 0); + if (err) { + err = -errno; + pr_warn("Couldn't attach XDP program to iface '%s': %s\n", + cfg->iface.ifname, strerror(-err)); + goto out; + } + attached = true; + + err = bpf_map_update_elem(bpf_map__fd(state_map), + &key, &bpf_state, BPF_EXIST); + + if (err) { + err = -errno; + pr_warn("Couldn't set initial state map value: %s\n", strerror(-err)); + goto out; + } + + err = sample_init(skel, mask, IFINDEX_LO, cfg->iface.ifindex); + if (err < 0) { + pr_warn("Failed to initialize sample: %s\n", strerror(-err)); + goto out; + } + + sock = socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP); + if (sock < 0) { + err = -errno; + pr_warn("Couldn't open TCP socket: %s\n", strerror(-err)); + goto out; + } + + err = setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX, + &cfg->iface.ifindex, sizeof(cfg->iface.ifindex)); + if (err) { + err = -errno; + pr_warn("Couldn't bind to device '%s': %s\n", cfg->iface.ifname, strerror(-err)); + goto out; + } + + sopt = fcntl(sock, F_GETFL, NULL); + if (sopt < 0) { + err = -errno; + pr_warn("Couldn't get socket opts: %s\n", strerror(-err)); + goto out; + } + + err = fcntl(sock, F_SETFL, sopt | O_NONBLOCK); + if (err) { + err = -errno; + pr_warn("Couldn't set socket non-blocking: %s\n", strerror(-err)); + goto out; + } + + err = connect(sock, ai->ai_addr, ai->ai_addrlen); + if (err && errno == EINPROGRESS) { + fd_set wait; + + FD_ZERO(&wait); + FD_SET(sock, &wait); + + err = select(sock + 1, NULL, &wait, NULL, &timeout); + if (!err) { + err = -1; + errno = ETIMEDOUT; + } else if (err > 0) { + err = 0; + } + } + if (err) { + err = -errno; + pr_warn("Couldn't connect to destination: %s\n", strerror(-err)); + goto out; + } + + err = fcntl(sock, F_SETFL, sopt); + if (err) { + err = -errno; + pr_warn("Couldn't reset socket opts: %s\n", strerror(-err)); + goto out; + } + + sockaddr_sz = sizeof(local_saddr); + err = getsockname(sock, &local_saddr, &sockaddr_sz); + if (err) { + err = -errno; + pr_warn("Couldn't get local address: %s\n", strerror(-err)); + goto out; + } + + local_addr.addr.addr6 = local_saddr.sin6_addr; + local_port = bpf_htons(local_saddr.sin6_port); + print_addr(buf_local, sizeof(buf_local), &local_addr); + + printf("Connected to %s port %d from %s port %d\n", + buf_remote, remote_port, buf_local, local_port); + + bpf_state.flow_key.src_port = local_saddr.sin6_port; + bpf_state.flow_key.src_ip = local_saddr.sin6_addr; + + tcpi_sz = sizeof(tcpi); + err = getsockopt(sock, IPPROTO_TCP, TCP_INFO, &tcpi, &tcpi_sz); + if (err) { + err = -errno; + pr_warn("Couldn't get TCP_INFO for socket: %s\n", strerror(-err)); + goto out; + } + + err = bpf_map_lookup_elem(bpf_map__fd(fstate_map), + &bpf_state.flow_key, &fstate); + if (err) { + err = -errno; + pr_warn("Couldn't find flow state in map: %s\n", strerror(-err)); + goto out; + } + + if (tcpi.tcpi_snd_wnd != fstate.window) { + pr_warn("TCP_INFO and packet data disagree on window (%u != %u)\n", + tcpi.tcpi_snd_wnd, fstate.window); + } + + fstate.wscale = tcpi.tcpi_rcv_wscale; + fstate.flow_state = FLOW_STATE_RUNNING; + err = bpf_map_update_elem(bpf_map__fd(fstate_map), + &bpf_state.flow_key, &fstate, BPF_EXIST); + if (err) { + err = -errno; + pr_warn("Couldn't update flow state map: %s\n", strerror(-err)); + goto out; + } + + err = bpf_map_update_elem(bpf_map__fd(state_map), + &key, &bpf_state, BPF_EXIST); + if (err) { + err = -errno; + pr_warn("Couldn't update program state map: %s\n", strerror(-err)); + goto out; + } + + prepare_tcp_pkt(&bpf_state.flow_key, &fstate); + + err = create_runners(&runner_threads, &t, num_threads, &tcfg, test_prog); + if (err) + goto out; + + err = sample_run(cfg->interval, NULL, NULL); + status_exited = true; + for (i = 0; i < num_threads; i++) { + pthread_join(runner_threads[i], NULL); + xdp_program__close(t[i].prog); + } + + /* send 3 RSTs with 200ms interval to kill the other side of the connection */ + for (i = 0; i < 3; i++) { + usleep(200000); + + pkt_tcp.tcp.rst = 1; + pkt_tcp.iph.payload_len = bpf_htons(sizeof(struct tcphdr)); + pkt_tcp.tcp.check = calc_tcp_cksum(&pkt_tcp); + tcfg.cpu_core_id = 0; + tcfg.num_pkts = 1; + tcfg.pkt_size = offsetof(struct tcp_packet, payload); + tcfg.prog = test_prog; + run_traffic(&tcfg); + } + +out: + if (ai) + freeaddrinfo(ai); + if (sock >= 0) + close(sock); + if (attached) + xdp_program__detach(ifindex_prog, cfg->iface.ifindex, cfg->mode, 0); + + xdp_program__close(ifindex_prog); + xdp_program__close(test_prog); + + xdp_trafficgen__destroy(skel); + + free(runner_threads); + free(t); + return err; +} + +static const struct probeopt { +} defaults_probe = {}; + +static struct prog_option probe_options[] = {}; + +int do_probe(__unused const void *cfg, __unused const char *pin_root_path) +{ + int err = probe_kernel_support(); + + if (!err) + pr_info("Kernel supports live packet mode for XDP BPF_PROG_RUN.\n"); + return err; +} + +int do_help(__unused const void *cfg, __unused const char *pin_root_path) +{ + fprintf(stderr, + "Usage: xdp-trafficgen COMMAND [options]\n" + "\n" + "COMMAND can be one of:\n" + " udp - run in UDP mode\n" + " tcp - run in TCP mode\n" + " help - show this help message\n" + "\n" + "Use 'xdp-trafficgen COMMAND --help' to see options for each command\n"); + return -1; +} + +static const struct prog_command cmds[] = { + DEFINE_COMMAND(udp, "Run in UDP mode"), + DEFINE_COMMAND(tcp, "Run in TCP mode"), + DEFINE_COMMAND(probe, "Probe kernel support"), + { .name = "help", .func = do_help, .no_cfg = true }, + END_COMMANDS +}; + +union all_opts { + struct udpopt udp; +}; + +int main(int argc, char **argv) +{ + if (argc > 1) + return dispatch_commands(argv[1], argc - 1, argv + 1, cmds, + sizeof(union all_opts), PROG_NAME, false); + + return do_help(NULL, NULL); +} diff --git a/xdp-trafficgen/xdp-trafficgen.h b/xdp-trafficgen/xdp-trafficgen.h new file mode 100644 index 0000000..f1e841b --- /dev/null +++ b/xdp-trafficgen/xdp-trafficgen.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef XDP_TRAFFICGEN_H +#define XDP_TRAFFICGEN_H + +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/ipv6.h> + +struct tcp_flowkey { + struct in6_addr src_ip; + struct in6_addr dst_ip; + __u16 dst_port; + __u16 src_port; +}; + +#define FLOW_STATE_NEW 1 +#define FLOW_STATE_RUNNING 2 +#define FLOW_STATE_DONE 3 + +struct tcp_flowstate { + struct bpf_spin_lock lock; + __u8 dst_mac[ETH_ALEN]; + __u8 src_mac[ETH_ALEN]; + __u64 last_progress; + __u64 retransmits; + __u32 flow_state; + __u32 seq; /* our last sent seqno */ + __u32 ack_seq; /* last seqno that got acked */ + __u32 rcv_seq; /* receiver's seqno (our ACK seq) */ + __u32 dupack; + __u32 last_print; + __u32 highest_seq; + __u16 window; + __u8 wscale; +}; + +struct trafficgen_config { + int ifindex_out; + __u16 port_start; + __u16 port_range; +}; + +struct trafficgen_state { + struct tcp_flowkey flow_key; + __u16 next_port; +}; + + + +#endif diff --git a/xdp-trafficgen/xdp_trafficgen.bpf.c b/xdp-trafficgen/xdp_trafficgen.bpf.c new file mode 100644 index 0000000..720ecc8 --- /dev/null +++ b/xdp-trafficgen/xdp_trafficgen.bpf.c @@ -0,0 +1,348 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define XDP_STATS_MAP_PINNING LIBBPF_PIN_NONE + +#include "xdp-trafficgen.h" +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/ipv6.h> +#include <linux/udp.h> +#include <linux/tcp.h> +#include <linux/if_ether.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <xdp/xdp_sample_shared.h> +#include <xdp/xdp_sample.bpf.h> +#include <xdp/xdp_sample_common.bpf.h> +#include <xdp/parsing_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct trafficgen_state); +} state_map SEC(".maps"); + + +const volatile struct trafficgen_config config; + +static void update_checksum(__u16 *sum, __u32 diff) +{ + /* We use the RFC 1071 method for incremental checksum updates + * because that can be used directly with the 32-bit sequence + * number difference (relying on folding for large differences) + */ + __u32 cksum = diff + (__u16)~bpf_ntohs(*sum); + + while (cksum > 0xffff) + cksum = (cksum & 0xffff) + (cksum >> 16); + *sum = bpf_htons(~cksum); +} + +static __u16 csum_fold_helper(__u32 csum) { + csum = (csum & 0xffff) + (csum >> 16); + return ~((csum & 0xffff) + (csum >> 16)); +} + +SEC("xdp") +int xdp_redirect_notouch(struct xdp_md *ctx) +{ + __u32 key = bpf_get_smp_processor_id();; + struct datarec *rec; + + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + return XDP_ABORTED; + + NO_TEAR_INC(rec->xdp_redirect); + + return bpf_redirect(config.ifindex_out, 0); +} + +SEC("xdp") +int xdp_redirect_update_port(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct trafficgen_state *state; + __u16 cur_port, port_diff; + int action = XDP_ABORTED; + struct datarec *rec; + struct udphdr *hdr; + __u32 key = 0; + + hdr = data + (sizeof(struct ethhdr) + sizeof(struct ipv6hdr)); + if (hdr + 1 > data_end) + goto out; + + state = bpf_map_lookup_elem(&state_map, &key); + if (!state) + goto out; + + key = bpf_get_smp_processor_id(); + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + goto out; + + cur_port = bpf_ntohs(hdr->dest); + port_diff = state->next_port - cur_port; + if (port_diff) { + update_checksum(&hdr->check, port_diff); + hdr->dest = bpf_htons(state->next_port); + } + if (state->next_port++ >= config.port_start + config.port_range - 1) + state->next_port = config.port_start; + + action = bpf_redirect(config.ifindex_out, 0); + NO_TEAR_INC(rec->processed); +out: + return action; +} + +SEC("xdp") +int xdp_drop(struct xdp_md *ctx) +{ + return XDP_DROP; +} + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, struct tcp_flowkey); + __type(value, struct tcp_flowstate); +} flow_state_map SEC(".maps"); + +static int cmp_ipaddr(struct in6_addr *a_, struct in6_addr *b_) +{ + __u8 *a = (void *)a_, *b = (void *)b_; + int i; + + for (i = 0; i < sizeof(struct in6_addr); i++) { + if (*a > *b) + return -1; + if (*a < *b) + return 1; + a++; + b++; + } + return 0; +} + +static inline __u8 before(__u32 seq1, __u32 seq2) +{ + return (__s32)(seq1 - seq2) < 0; +} + +/* Fixed 2 second timeout */ +#define TCP_RTO 2000000000UL + +SEC("xdp") +int xdp_handle_tcp_recv(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + struct tcp_flowstate *fstate, new_fstate = {}; + void *data = (void *)(long)ctx->data; + struct hdr_cursor nh = { .pos = data }; + struct trafficgen_state *state; + struct tcp_flowkey key = {}; + int eth_type, ip_type, err; + struct ipv6hdr *ipv6hdr; + struct tcphdr *tcphdr; + int action = XDP_PASS; + struct ethhdr *eth; + __u8 new_match; + __u32 ack_seq; + int i; + + eth_type = parse_ethhdr(&nh, data_end, ð); + if (eth_type != bpf_htons(ETH_P_IPV6)) + goto out; + + ip_type = parse_ip6hdr(&nh, data_end, &ipv6hdr); + if (ip_type != IPPROTO_TCP) + goto out; + + if (parse_tcphdr(&nh, data_end, &tcphdr) < 0) + goto out; + + state = bpf_map_lookup_elem(&state_map, &key); + if (!state) + goto out; + + /* swap dst and src for received packet */ + key.dst_ip = ipv6hdr->saddr; + key.dst_port = tcphdr->source; + + new_match = !cmp_ipaddr(&key.dst_ip, &state->flow_key.dst_ip) && key.dst_port == state->flow_key.dst_port; + + key.src_ip = ipv6hdr->daddr; + key.src_port = tcphdr->dest; + + fstate = bpf_map_lookup_elem(&flow_state_map, &key); + if (!fstate) { + if (!new_match) + goto out; + + new_fstate.flow_state = FLOW_STATE_NEW; + new_fstate.seq = bpf_ntohl(tcphdr->ack_seq); + for (i = 0; i < ETH_ALEN; i++) { + new_fstate.dst_mac[i] = eth->h_source[i]; + new_fstate.src_mac[i] = eth->h_dest[i]; + } + + err = bpf_map_update_elem(&flow_state_map, &key, &new_fstate, BPF_NOEXIST); + if (err) + goto out; + + fstate = bpf_map_lookup_elem(&flow_state_map, &key); + if (!fstate) + goto out; + } + + ack_seq = bpf_ntohl(tcphdr->ack_seq); +#ifdef DEBUG + bpf_printk("Got state seq %u ack_seq %u new %u seq %u new %u window %u\n", + fstate->seq, + fstate->ack_seq, ack_seq, + fstate->rcv_seq, bpf_ntohl(tcphdr->seq), bpf_htons(tcphdr->window)); +#endif + + bpf_spin_lock(&fstate->lock); + + if (fstate->ack_seq == ack_seq) + fstate->dupack++; + + fstate->window = bpf_ntohs(tcphdr->window); + fstate->ack_seq = ack_seq; + fstate->rcv_seq = bpf_ntohl(tcphdr->seq); + if (tcphdr->syn) + fstate->rcv_seq++; + + if (tcphdr->fin || tcphdr->rst) + fstate->flow_state = FLOW_STATE_DONE; + + /* If we've taken over the flow management, (after the handshake), drop + * the packet + */ + if (fstate->flow_state >= FLOW_STATE_RUNNING) + action = XDP_DROP; + bpf_spin_unlock(&fstate->lock); +out: + return action; +} + +SEC("xdp") +int xdp_redirect_send_tcp(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 new_seq, ack_seq, window; + struct trafficgen_state *state; + struct tcp_flowstate *fstate; + int action = XDP_ABORTED; + struct ipv6hdr *ipv6hdr; + struct tcphdr *tcphdr; + struct datarec *rec; + __u8 resend = 0; +#ifdef DEBUG + __u8 print = 0; +#endif + __u16 pkt_len; + __u32 key = 0; + __u64 now; + + ipv6hdr = data + sizeof(struct ethhdr); + tcphdr = data + (sizeof(struct ethhdr) + sizeof(struct ipv6hdr)); + if (tcphdr + 1 > data_end || ipv6hdr + 1 > data_end) + goto ret; + + pkt_len = bpf_ntohs(ipv6hdr->payload_len) - sizeof(*tcphdr); + + state = bpf_map_lookup_elem(&state_map, &key); + if (!state) + goto ret; + + key = bpf_get_smp_processor_id(); + rec = bpf_map_lookup_elem(&rx_cnt, &key); + if (!rec) + goto ret; + + fstate = bpf_map_lookup_elem(&flow_state_map, (const void *)&state->flow_key); + if (!fstate) + goto out; + + now = bpf_ktime_get_coarse_ns(); + + bpf_spin_lock(&fstate->lock); + + if (fstate->flow_state != FLOW_STATE_RUNNING) { + action = XDP_DROP; + bpf_spin_unlock(&fstate->lock); + goto out; + } + + /* reset sequence on packet loss */ + if (fstate->dupack || (fstate->last_progress && + now - fstate->last_progress > TCP_RTO)) { + fstate->seq = fstate->ack_seq; + fstate->dupack = 0; + } + new_seq = fstate->seq; + ack_seq = fstate->ack_seq; + window = fstate->window << fstate->wscale; +#ifdef DEBUG + if (fstate->last_print != fstate->seq) { + fstate->last_print = fstate->seq; + print = 1; + } +#endif + + if (!before(new_seq + pkt_len, ack_seq + window)) { + /* We caught up to the end up the RWIN, spin until ACKs come + * back opening up the window + */ + action = XDP_DROP; + bpf_spin_unlock(&fstate->lock); +#ifdef DEBUG + if (print) + bpf_printk("Dropping because %u isn't before %u (ack_seq %u wnd %u)", + new_seq + pkt_len, ack_seq + window, ack_seq, window); +#endif + goto out; + } + + if (!before(new_seq, fstate->highest_seq)) { + fstate->highest_seq = new_seq; + } else { + resend = 1; + fstate->retransmits++; + } + fstate->seq = new_seq + pkt_len; + fstate->last_progress = now; + bpf_spin_unlock(&fstate->lock); + + new_seq = bpf_htonl(new_seq); + if (new_seq != tcphdr->seq) { + __u32 csum; + csum = bpf_csum_diff(&tcphdr->seq, sizeof(__u32), + &new_seq, sizeof(new_seq), ~tcphdr->check); + + tcphdr->seq = new_seq; + tcphdr->check = csum_fold_helper(csum); + } + + action = bpf_redirect(config.ifindex_out, 0); +out: + /* record retransmissions as XDP_TX return codes until we get better stats */ + if (resend) + NO_TEAR_INC(rec->issue); + + if (action == XDP_REDIRECT) + NO_TEAR_INC(rec->xdp_redirect); + else + NO_TEAR_INC(rec->dropped); +ret: + return action; +} |