summaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests/net
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:17:46 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:17:46 +0000
commit7f3a4257159dea8e7ef66d1a539dc6df708b8ed3 (patch)
treebcc69b5f4609f348fac49e2f59e210b29eaea783 /tools/testing/selftests/net
parentAdding upstream version 6.9.12. (diff)
downloadlinux-7f3a4257159dea8e7ef66d1a539dc6df708b8ed3.tar.xz
linux-7f3a4257159dea8e7ef66d1a539dc6df708b8ed3.zip
Adding upstream version 6.10.3.upstream/6.10.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools/testing/selftests/net')
-rw-r--r--tools/testing/selftests/net/.gitignore4
-rw-r--r--tools/testing/selftests/net/Makefile60
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/config3
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c734
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c307
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c436
-rw-r--r--tools/testing/selftests/net/bpf.mk53
-rwxr-xr-xtools/testing/selftests/net/bpf_offload.py1341
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c32
-rwxr-xr-xtools/testing/selftests/net/cmsg_time.sh7
-rw-r--r--tools/testing/selftests/net/config2
-rwxr-xr-xtools/testing/selftests/net/devlink_port_split.py309
-rw-r--r--tools/testing/selftests/net/epoll_busy_poll.c320
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh46
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh24
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile9
-rw-r--r--tools/testing/selftests/net/forwarding/README33
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh18
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool.sh301
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_extended_state.sh117
-rw-r--r--tools/testing/selftests/net/forwarding/ethtool_lib.sh120
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_mm.sh340
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_rmon.sh143
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample53
-rwxr-xr-xtools/testing/selftests/net/forwarding/hw_stats_l3.sh340
-rwxr-xr-xtools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh111
-rw-r--r--tools/testing/selftests/net/forwarding/ipip_lib.sh1
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh253
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib_sh_test.sh208
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh30
-rwxr-xr-xtools/testing/selftests/net/forwarding/loopback.sh102
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh35
-rw-r--r--tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh12
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_nh.sh14
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_tests.sh19
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_red.sh10
-rw-r--r--tools/testing/selftests/net/forwarding/sch_tbf_core.sh2
-rw-r--r--tools/testing/selftests/net/forwarding/tc_common.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_tunnel_key.sh2
-rw-r--r--tools/testing/selftests/net/gro.c138
-rw-r--r--tools/testing/selftests/net/hsr/Makefile3
-rw-r--r--tools/testing/selftests/net/hsr/config1
-rw-r--r--tools/testing/selftests/net/hsr/hsr_common.sh84
-rwxr-xr-xtools/testing/selftests/net/hsr/hsr_ping.sh106
-rwxr-xr-xtools/testing/selftests/net/hsr/hsr_redbox.sh121
-rw-r--r--tools/testing/selftests/net/lib.sh9
-rw-r--r--tools/testing/selftests/net/lib/.gitignore2
-rw-r--r--tools/testing/selftests/net/lib/Makefile15
-rw-r--r--tools/testing/selftests/net/lib/csum.c (renamed from tools/testing/selftests/net/csum.c)18
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py8
-rw-r--r--tools/testing/selftests/net/lib/py/consts.py9
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py159
-rw-r--r--tools/testing/selftests/net/lib/py/netns.py31
-rw-r--r--tools/testing/selftests/net/lib/py/nsim.py134
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py102
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py49
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh53
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh2
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh149
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh165
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh34
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh295
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh16
-rw-r--r--tools/testing/selftests/net/nat6to4.bpf.c (renamed from tools/testing/selftests/net/nat6to4.c)0
-rw-r--r--tools/testing/selftests/net/netfilter/.gitignore6
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile52
-rw-r--r--tools/testing/selftests/net/netfilter/audit_logread.c165
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter.sh171
-rwxr-xr-xtools/testing/selftests/net/netfilter/bridge_brouter.sh122
-rw-r--r--tools/testing/selftests/net/netfilter/config89
-rw-r--r--tools/testing/selftests/net/netfilter/connect_close.c136
-rw-r--r--tools/testing/selftests/net/netfilter/conntrack_dump_flush.c469
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_icmp_related.sh278
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh191
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh87
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh164
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_vrf.sh220
-rwxr-xr-xtools/testing/selftests/net/netfilter/ipvs.sh211
-rw-r--r--tools/testing/selftests/net/netfilter/lib.sh10
-rwxr-xr-xtools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh71
-rwxr-xr-xtools/testing/selftests/net/netfilter/nf_nat_edemux.sh97
-rw-r--r--tools/testing/selftests/net/netfilter/nf_queue.c395
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_audit.sh268
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh1622
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range_perf.sh9
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_conntrack_helper.sh171
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_fib.sh234
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh671
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_meta.sh142
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat.sh1156
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat_zones.sh267
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh417
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_synproxy.sh96
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_zones_many.sh164
-rwxr-xr-xtools/testing/selftests/net/netfilter/packetdrill/common.sh33
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt118
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt62
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt59
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt44
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt51
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt34
-rwxr-xr-xtools/testing/selftests/net/netfilter/rpath.sh175
-rw-r--r--tools/testing/selftests/net/netfilter/sctp_collision.c99
-rw-r--r--tools/testing/selftests/net/netfilter/settings1
-rwxr-xr-xtools/testing/selftests/net/netfilter/xt_string.sh133
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py98
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py16
-rw-r--r--tools/testing/selftests/net/sample_map_ret0.bpf.c34
-rw-r--r--tools/testing/selftests/net/sample_ret0.bpf.c10
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh335
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh340
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_bench.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh8
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh2
-rwxr-xr-xtools/testing/selftests/net/veth.sh2
-rw-r--r--tools/testing/selftests/net/xdp_dummy.bpf.c (renamed from tools/testing/selftests/net/xdp_dummy.c)0
-rwxr-xr-xtools/testing/selftests/net/xfrm_policy.sh4
121 files changed, 14593 insertions, 2915 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 2f9d378ede..666ab7d939 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -2,9 +2,9 @@
bind_bhash
bind_timewait
bind_wildcard
-csum
cmsg_sender
diag_uid
+epoll_busy_poll
fin_ack_lat
gro
hwtstamp_config
@@ -31,6 +31,7 @@ reuseport_dualstack
rxtimestamp
sctp_hello
scm_pidfd
+scm_rights
sk_bind_sendto_listen
sk_connect_zero_addr
socket
@@ -42,7 +43,6 @@ tap
tcp_fastopen_backup_key
tcp_inq
tcp_mmap
-test_unix_oob
timestamping
tls
toeplitz
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 7b6918d5f4..d9393569d0 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -20,7 +20,6 @@ TEST_PROGS += reuseaddr_ports_exhausted.sh
TEST_PROGS += txtimestamp.sh
TEST_PROGS += vrf-xfrm-tests.sh
TEST_PROGS += rxtimestamp.sh
-TEST_PROGS += devlink_port_split.py
TEST_PROGS += drop_monitor_tests.sh
TEST_PROGS += vrf_route_leaking.sh
TEST_PROGS += bareudp.sh
@@ -35,6 +34,7 @@ TEST_PROGS += gre_gso.sh
TEST_PROGS += cmsg_so_mark.sh
TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
TEST_PROGS += netns-name.sh
+TEST_PROGS += nl_netdev.py
TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -43,6 +43,8 @@ TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh
TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh
TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh
TEST_PROGS += srv6_end_flavors_test.sh
+TEST_PROGS += srv6_end_dx4_netfilter_test.sh
+TEST_PROGS += srv6_end_dx6_netfilter_test.sh
TEST_PROGS += vrf_strict_mode_test.sh
TEST_PROGS += arp_ndisc_evict_nocarrier.sh
TEST_PROGS += ndisc_unsolicited_na_test.sh
@@ -67,7 +69,7 @@ TEST_GEN_FILES += ipsec
TEST_GEN_FILES += ioam6_parser
TEST_GEN_FILES += gro
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_poll
TEST_GEN_FILES += toeplitz
TEST_GEN_FILES += cmsg_sender
TEST_GEN_FILES += stress_reuseport_listen
@@ -81,9 +83,6 @@ TEST_PROGS += test_ingress_egress_chaining.sh
TEST_GEN_PROGS += so_incoming_cpu
TEST_PROGS += sctp_vrf.sh
TEST_GEN_FILES += sctp_hello
-TEST_GEN_FILES += csum
-TEST_GEN_FILES += nat6to4.o
-TEST_GEN_FILES += xdp_dummy.o
TEST_GEN_FILES += ip_local_port_range
TEST_GEN_FILES += bind_wildcard
TEST_PROGS += test_vxlan_mdb.sh
@@ -93,63 +92,22 @@ TEST_PROGS += test_bridge_backup_port.sh
TEST_PROGS += fdb_flush.sh
TEST_PROGS += fq_band_pktlimit.sh
TEST_PROGS += vlan_hw_filter.sh
+TEST_PROGS += bpf_offload.py
TEST_FILES := settings
TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
+TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
+
TEST_INCLUDES := forwarding/lib.sh
include ../lib.mk
+$(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto
$(OUTPUT)/tcp_inq: LDLIBS += -lpthread
$(OUTPUT)/bind_bhash: LDLIBS += -lpthread
$(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/
-# Rules to generate bpf objs
-CLANG ?= clang
-SCRATCH_DIR := $(OUTPUT)/tools
-BUILD_DIR := $(SCRATCH_DIR)/build
-BPFDIR := $(abspath ../../../lib/bpf)
-APIDIR := $(abspath ../../../include/uapi)
-
-CCINCLUDE += -I../bpf
-CCINCLUDE += -I../../../../usr/include/
-CCINCLUDE += -I$(SCRATCH_DIR)/include
-
-BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
-
-MAKE_DIRS := $(BUILD_DIR)/libbpf
-$(MAKE_DIRS):
- mkdir -p $@
-
-# Get Clang's default includes on this system, as opposed to those seen by
-# '--target=bpf'. This fixes "missing" files on some architectures/distros,
-# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
-#
-# Use '-idirafter': Don't interfere with include mechanics except where the
-# build would have failed anyways.
-define get_sys_includes
-$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
- | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
-$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
-endef
-
-ifneq ($(CROSS_COMPILE),)
-CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
-endif
-
-CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
-
-$(OUTPUT)/nat6to4.o $(OUTPUT)/xdp_dummy.o: $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS)
- $(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
-
-$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
- $(APIDIR)/linux/bpf.h \
- | $(BUILD_DIR)/libbpf
- $(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
- EXTRA_CFLAGS='-g -O0' \
- DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
-
-EXTRA_CLEAN := $(SCRATCH_DIR)
+include bpf.mk
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 221c387a7d..5058447954 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,4 @@
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd
+TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config
new file mode 100644
index 0000000000..3736856776
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/config
@@ -0,0 +1,3 @@
+CONFIG_UNIX=y
+CONFIG_AF_UNIX_OOB=y
+CONFIG_UNIX_DIAG=m
diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c
new file mode 100644
index 0000000000..16d0c172ea
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/msg_oob.c
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <netinet/in.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/signalfd.h>
+#include <sys/socket.h>
+
+#include "../../kselftest_harness.h"
+
+#define BUF_SZ 32
+
+FIXTURE(msg_oob)
+{
+ int fd[4]; /* 0: AF_UNIX sender
+ * 1: AF_UNIX receiver
+ * 2: TCP sender
+ * 3: TCP receiver
+ */
+ int signal_fd;
+ int epoll_fd[2]; /* 0: AF_UNIX receiver
+ * 1: TCP receiver
+ */
+ bool tcp_compliant;
+};
+
+FIXTURE_VARIANT(msg_oob)
+{
+ bool peek;
+};
+
+FIXTURE_VARIANT_ADD(msg_oob, no_peek)
+{
+ .peek = false,
+};
+
+FIXTURE_VARIANT_ADD(msg_oob, peek)
+{
+ .peek = true
+};
+
+static void create_unix_socketpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ int ret;
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, self->fd);
+ ASSERT_EQ(ret, 0);
+}
+
+static void create_tcp_socketpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ struct sockaddr_in addr;
+ socklen_t addrlen;
+ int listen_fd;
+ int ret;
+
+ listen_fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(listen_fd, 0);
+
+ ret = listen(listen_fd, -1);
+ ASSERT_EQ(ret, 0);
+
+ addrlen = sizeof(addr);
+ ret = getsockname(listen_fd, (struct sockaddr *)&addr, &addrlen);
+ ASSERT_EQ(ret, 0);
+
+ self->fd[2] = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(self->fd[2], 0);
+
+ ret = connect(self->fd[2], (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(ret, 0);
+
+ self->fd[3] = accept(listen_fd, (struct sockaddr *)&addr, &addrlen);
+ ASSERT_GE(self->fd[3], 0);
+
+ ret = fcntl(self->fd[3], F_SETFL, O_NONBLOCK);
+ ASSERT_EQ(ret, 0);
+}
+
+static void setup_sigurg(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ struct signalfd_siginfo siginfo;
+ int pid = getpid();
+ sigset_t mask;
+ int i, ret;
+
+ for (i = 0; i < 2; i++) {
+ ret = ioctl(self->fd[i * 2 + 1], FIOSETOWN, &pid);
+ ASSERT_EQ(ret, 0);
+ }
+
+ ret = sigemptyset(&mask);
+ ASSERT_EQ(ret, 0);
+
+ ret = sigaddset(&mask, SIGURG);
+ ASSERT_EQ(ret, 0);
+
+ ret = sigprocmask(SIG_BLOCK, &mask, NULL);
+ ASSERT_EQ(ret, 0);
+
+ self->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK);
+ ASSERT_GE(self->signal_fd, 0);
+
+ ret = read(self->signal_fd, &siginfo, sizeof(siginfo));
+ ASSERT_EQ(ret, -1);
+}
+
+static void setup_epollpri(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ struct epoll_event event = {
+ .events = EPOLLPRI,
+ };
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ self->epoll_fd[i] = epoll_create1(0);
+ ASSERT_GE(self->epoll_fd[i], 0);
+
+ ret = epoll_ctl(self->epoll_fd[i], EPOLL_CTL_ADD, self->fd[i * 2 + 1], &event);
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+static void close_sockets(FIXTURE_DATA(msg_oob) *self)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ close(self->fd[i]);
+}
+
+FIXTURE_SETUP(msg_oob)
+{
+ create_unix_socketpair(_metadata, self);
+ create_tcp_socketpair(_metadata, self);
+
+ setup_sigurg(_metadata, self);
+ setup_epollpri(_metadata, self);
+
+ self->tcp_compliant = true;
+}
+
+FIXTURE_TEARDOWN(msg_oob)
+{
+ close_sockets(self);
+}
+
+static void __epollpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ bool oob_remaining)
+{
+ struct epoll_event event[2] = {};
+ int i, ret[2];
+
+ for (i = 0; i < 2; i++)
+ ret[i] = epoll_wait(self->epoll_fd[i], &event[i], 1, 0);
+
+ ASSERT_EQ(ret[0], oob_remaining);
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(ret[0], ret[1]);
+
+ if (oob_remaining) {
+ ASSERT_EQ(event[0].events, EPOLLPRI);
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(event[0].events, event[1].events);
+ }
+}
+
+static void __sendpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ const void *buf, size_t len, int flags)
+{
+ int i, ret[2];
+
+ for (i = 0; i < 2; i++) {
+ struct signalfd_siginfo siginfo = {};
+ int bytes;
+
+ ret[i] = send(self->fd[i * 2], buf, len, flags);
+
+ bytes = read(self->signal_fd, &siginfo, sizeof(siginfo));
+
+ if (flags & MSG_OOB) {
+ ASSERT_EQ(bytes, sizeof(siginfo));
+ ASSERT_EQ(siginfo.ssi_signo, SIGURG);
+
+ bytes = read(self->signal_fd, &siginfo, sizeof(siginfo));
+ }
+
+ ASSERT_EQ(bytes, -1);
+ }
+
+ ASSERT_EQ(ret[0], len);
+ ASSERT_EQ(ret[0], ret[1]);
+}
+
+static void __recvpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ const void *expected_buf, int expected_len,
+ int buf_len, int flags)
+{
+ int i, ret[2], recv_errno[2], expected_errno = 0;
+ char recv_buf[2][BUF_SZ] = {};
+ bool printed = false;
+
+ ASSERT_GE(BUF_SZ, buf_len);
+
+ errno = 0;
+
+ for (i = 0; i < 2; i++) {
+ ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags);
+ recv_errno[i] = errno;
+ }
+
+ if (expected_len < 0) {
+ expected_errno = -expected_len;
+ expected_len = -1;
+ }
+
+ if (ret[0] != expected_len || recv_errno[0] != expected_errno) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf);
+
+ ASSERT_EQ(ret[0], expected_len);
+ ASSERT_EQ(recv_errno[0], expected_errno);
+ }
+
+ if (ret[0] != ret[1] || recv_errno[0] != recv_errno[1]) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]);
+
+ printed = true;
+
+ if (self->tcp_compliant) {
+ ASSERT_EQ(ret[0], ret[1]);
+ ASSERT_EQ(recv_errno[0], recv_errno[1]);
+ }
+ }
+
+ if (expected_len >= 0) {
+ int cmp;
+
+ cmp = strncmp(expected_buf, recv_buf[0], expected_len);
+ if (cmp) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf);
+
+ ASSERT_EQ(cmp, 0);
+ }
+
+ cmp = strncmp(recv_buf[0], recv_buf[1], expected_len);
+ if (cmp) {
+ if (!printed) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]);
+ }
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(cmp, 0);
+ }
+ }
+}
+
+static void __setinlinepair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ int i, oob_inline = 1;
+
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ ret = setsockopt(self->fd[i * 2 + 1], SOL_SOCKET, SO_OOBINLINE,
+ &oob_inline, sizeof(oob_inline));
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+static void __siocatmarkpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ bool oob_head)
+{
+ int answ[2] = {};
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ ret = ioctl(self->fd[i * 2 + 1], SIOCATMARK, &answ[i]);
+ ASSERT_EQ(ret, 0);
+ }
+
+ ASSERT_EQ(answ[0], oob_head);
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(answ[0], answ[1]);
+}
+
+#define sendpair(buf, len, flags) \
+ __sendpair(_metadata, self, buf, len, flags)
+
+#define recvpair(expected_buf, expected_len, buf_len, flags) \
+ do { \
+ if (variant->peek) \
+ __recvpair(_metadata, self, \
+ expected_buf, expected_len, \
+ buf_len, (flags) | MSG_PEEK); \
+ __recvpair(_metadata, self, \
+ expected_buf, expected_len, buf_len, flags); \
+ } while (0)
+
+#define epollpair(oob_remaining) \
+ __epollpair(_metadata, self, oob_remaining)
+
+#define siocatmarkpair(oob_head) \
+ __siocatmarkpair(_metadata, self, oob_head)
+
+#define setinlinepair() \
+ __setinlinepair(_metadata, self)
+
+#define tcp_incompliant \
+ for (self->tcp_compliant = false; \
+ self->tcp_compliant == false; \
+ self->tcp_compliant = true)
+
+TEST_F(msg_oob, non_oob)
+{
+ sendpair("x", 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+}
+
+TEST_F(msg_oob, oob_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("", -EAGAIN, 1, 0); /* Drop OOB. */
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob_ahead)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 4, 0);
+ epollpair(false);
+ siocatmarkpair(true);
+}
+
+TEST_F(msg_oob, oob_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 5, 0); /* Break at OOB even with enough buffer. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ recvpair("", -EAGAIN, 1, 0);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob_ahead_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 9, 0); /* Break at OOB even after it's recv()ed. */
+ epollpair(false);
+ siocatmarkpair(true);
+
+ recvpair("world", 5, 5, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob_break_drop)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 10, 0); /* Break at OOB even with enough buffer. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("world", 5, 10, 0); /* Drop OOB and recv() the next skb. */
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("wor", 3, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("ld", 2, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("r", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ recvpair("ld", 2, 2, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */
+ epollpair(true);
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0); /* TCP drops "y" by passing through it. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, MSG_OOB); /* TCP returns -EINVAL. */
+ epollpair(false);
+ siocatmarkpair(true);
+ }
+}
+
+TEST_F(msg_oob, ex_oob_drop_2)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */
+ epollpair(true);
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+ }
+
+ recvpair("y", 1, 1, MSG_OOB);
+ epollpair(false);
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0); /* TCP returns -EAGAIN. */
+ epollpair(false);
+ siocatmarkpair(true);
+ }
+}
+
+TEST_F(msg_oob, ex_oob_ahead_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("wor", 3, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("r", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ sendpair("ld", 2, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ tcp_incompliant {
+ recvpair("hellowol", 8, 10, 0); /* TCP recv()s "helloworl", why "r" ?? */
+ }
+
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("d", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+}
+
+TEST_F(msg_oob, ex_oob_siocatmark)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_oob)
+{
+ setinlinepair();
+
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_oob_break)
+{
+ setinlinepair();
+
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 5, 0); /* Break at OOB but not at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("o", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_oob_ahead_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ setinlinepair();
+
+ recvpair("hell", 4, 9, 0); /* Break at OOB even with enough buffer. */
+ epollpair(false);
+ siocatmarkpair(true);
+
+ tcp_incompliant {
+ recvpair("world", 5, 6, 0); /* TCP recv()s "oworld", ... "o" ??? */
+ }
+
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_ex_oob_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("wor", 3, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("ld", 2, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ setinlinepair();
+
+ recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("rld", 3, 3, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_ex_oob_no_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ setinlinepair();
+
+ sendpair("y", 1, MSG_OOB); /* TCP does NOT drops "x" at this moment. */
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_ex_oob_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */
+ epollpair(true);
+
+ setinlinepair();
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0); /* TCP recv()s "y". */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, 0); /* TCP returns -EAGAIN. */
+ epollpair(false);
+ siocatmarkpair(false);
+ }
+}
+
+TEST_F(msg_oob, inline_ex_oob_siocatmark)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ setinlinepair();
+
+ sendpair("world", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(false);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c
new file mode 100644
index 0000000000..d663362565
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/scm_rights.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(scm_rights)
+{
+ int fd[32];
+};
+
+FIXTURE_VARIANT(scm_rights)
+{
+ char name[32];
+ int type;
+ int flags;
+ bool test_listener;
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, dgram)
+{
+ .name = "UNIX ",
+ .type = SOCK_DGRAM,
+ .flags = 0,
+ .test_listener = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = true,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = true,
+};
+
+static int count_sockets(struct __test_metadata *_metadata,
+ const FIXTURE_VARIANT(scm_rights) *variant)
+{
+ int sockets = -1, len, ret;
+ char *line = NULL;
+ size_t unused;
+ FILE *f;
+
+ f = fopen("/proc/net/protocols", "r");
+ ASSERT_NE(NULL, f);
+
+ len = strlen(variant->name);
+
+ while (getline(&line, &unused, f) != -1) {
+ int unused2;
+
+ if (strncmp(line, variant->name, len))
+ continue;
+
+ ret = sscanf(line + len, "%d %d", &unused2, &sockets);
+ ASSERT_EQ(2, ret);
+
+ break;
+ }
+
+ free(line);
+
+ ret = fclose(f);
+ ASSERT_EQ(0, ret);
+
+ return sockets;
+}
+
+FIXTURE_SETUP(scm_rights)
+{
+ int ret;
+
+ ret = unshare(CLONE_NEWNET);
+ ASSERT_EQ(0, ret);
+
+ ret = count_sockets(_metadata, variant);
+ ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(scm_rights)
+{
+ int ret;
+
+ sleep(1);
+
+ ret = count_sockets(_metadata, variant);
+ ASSERT_EQ(0, ret);
+}
+
+static void create_listeners(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ int n)
+{
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ socklen_t addrlen;
+ int i, ret;
+
+ for (i = 0; i < n * 2; i += 2) {
+ self->fd[i] = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, self->fd[i]);
+
+ addrlen = sizeof(addr.sun_family);
+ ret = bind(self->fd[i], (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(0, ret);
+
+ ret = listen(self->fd[i], -1);
+ ASSERT_EQ(0, ret);
+
+ addrlen = sizeof(addr);
+ ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen);
+ ASSERT_EQ(0, ret);
+
+ self->fd[i + 1] = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, self->fd[i + 1]);
+
+ ret = connect(self->fd[i + 1], (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(0, ret);
+ }
+}
+
+static void create_socketpairs(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
+ int n)
+{
+ int i, ret;
+
+ ASSERT_GE(sizeof(self->fd) / sizeof(int), n);
+
+ for (i = 0; i < n * 2; i += 2) {
+ ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i);
+ ASSERT_EQ(0, ret);
+ }
+}
+
+static void __create_sockets(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
+ int n)
+{
+ ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0]));
+
+ if (variant->test_listener)
+ create_listeners(_metadata, self, n);
+ else
+ create_socketpairs(_metadata, self, variant, n);
+}
+
+static void __close_sockets(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ int n)
+{
+ int i, ret;
+
+ ASSERT_GE(sizeof(self->fd) / sizeof(int), n);
+
+ for (i = 0; i < n * 2; i++) {
+ ret = close(self->fd[i]);
+ ASSERT_EQ(0, ret);
+ }
+}
+
+void __send_fd(struct __test_metadata *_metadata,
+ const FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
+ int inflight, int receiver)
+{
+#define MSG "x"
+#define MSGLEN 1
+ struct {
+ struct cmsghdr cmsghdr;
+ int fd[2];
+ } cmsg = {
+ .cmsghdr = {
+ .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)),
+ .cmsg_level = SOL_SOCKET,
+ .cmsg_type = SCM_RIGHTS,
+ },
+ .fd = {
+ self->fd[inflight * 2],
+ self->fd[inflight * 2],
+ },
+ };
+ struct iovec iov = {
+ .iov_base = MSG,
+ .iov_len = MSGLEN,
+ };
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &cmsg,
+ .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)),
+ };
+ int ret;
+
+ ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags);
+ ASSERT_EQ(MSGLEN, ret);
+}
+
+#define create_sockets(n) \
+ __create_sockets(_metadata, self, variant, n)
+#define close_sockets(n) \
+ __close_sockets(_metadata, self, n)
+#define send_fd(inflight, receiver) \
+ __send_fd(_metadata, self, variant, inflight, receiver)
+
+TEST_F(scm_rights, self_ref)
+{
+ create_sockets(2);
+
+ send_fd(0, 0);
+
+ send_fd(1, 1);
+
+ close_sockets(2);
+}
+
+TEST_F(scm_rights, triangle)
+{
+ create_sockets(6);
+
+ send_fd(0, 1);
+ send_fd(1, 2);
+ send_fd(2, 0);
+
+ send_fd(3, 4);
+ send_fd(4, 5);
+ send_fd(5, 3);
+
+ close_sockets(6);
+}
+
+TEST_F(scm_rights, cross_edge)
+{
+ create_sockets(8);
+
+ send_fd(0, 1);
+ send_fd(1, 2);
+ send_fd(2, 0);
+ send_fd(1, 3);
+ send_fd(3, 2);
+
+ send_fd(4, 5);
+ send_fd(5, 6);
+ send_fd(6, 4);
+ send_fd(5, 7);
+ send_fd(7, 6);
+
+ close_sockets(8);
+}
+
+TEST_F(scm_rights, backtrack_from_scc)
+{
+ create_sockets(10);
+
+ send_fd(0, 1);
+ send_fd(0, 4);
+ send_fd(1, 2);
+ send_fd(2, 3);
+ send_fd(3, 1);
+
+ send_fd(5, 6);
+ send_fd(5, 9);
+ send_fd(6, 7);
+ send_fd(7, 8);
+ send_fd(8, 6);
+
+ close_sockets(10);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
deleted file mode 100644
index a7c51889ac..0000000000
--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
+++ /dev/null
@@ -1,436 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <string.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <errno.h>
-#include <netinet/tcp.h>
-#include <sys/un.h>
-#include <sys/signal.h>
-#include <sys/poll.h>
-
-static int pipefd[2];
-static int signal_recvd;
-static pid_t producer_id;
-static char sock_name[32];
-
-static void sig_hand(int sn, siginfo_t *si, void *p)
-{
- signal_recvd = sn;
-}
-
-static int set_sig_handler(int signal)
-{
- struct sigaction sa;
-
- sa.sa_sigaction = sig_hand;
- sigemptyset(&sa.sa_mask);
- sa.sa_flags = SA_SIGINFO | SA_RESTART;
-
- return sigaction(signal, &sa, NULL);
-}
-
-static void set_filemode(int fd, int set)
-{
- int flags = fcntl(fd, F_GETFL, 0);
-
- if (set)
- flags &= ~O_NONBLOCK;
- else
- flags |= O_NONBLOCK;
- fcntl(fd, F_SETFL, flags);
-}
-
-static void signal_producer(int fd)
-{
- char cmd;
-
- cmd = 'S';
- write(fd, &cmd, sizeof(cmd));
-}
-
-static void wait_for_signal(int fd)
-{
- char buf[5];
-
- read(fd, buf, 5);
-}
-
-static void die(int status)
-{
- fflush(NULL);
- unlink(sock_name);
- kill(producer_id, SIGTERM);
- exit(status);
-}
-
-int is_sioctatmark(int fd)
-{
- int ans = -1;
-
- if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
-#ifdef DEBUG
- perror("SIOCATMARK Failed");
-#endif
- }
- return ans;
-}
-
-void read_oob(int fd, char *c)
-{
-
- *c = ' ';
- if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
-#ifdef DEBUG
- perror("Reading MSG_OOB Failed");
-#endif
- }
-}
-
-int read_data(int pfd, char *buf, int size)
-{
- int len = 0;
-
- memset(buf, size, '0');
- len = read(pfd, buf, size);
-#ifdef DEBUG
- if (len < 0)
- perror("read failed");
-#endif
- return len;
-}
-
-static void wait_for_data(int pfd, int event)
-{
- struct pollfd pfds[1];
-
- pfds[0].fd = pfd;
- pfds[0].events = event;
- poll(pfds, 1, -1);
-}
-
-void producer(struct sockaddr_un *consumer_addr)
-{
- int cfd;
- char buf[64];
- int i;
-
- memset(buf, 'x', sizeof(buf));
- cfd = socket(AF_UNIX, SOCK_STREAM, 0);
-
- wait_for_signal(pipefd[0]);
- if (connect(cfd, (struct sockaddr *)consumer_addr,
- sizeof(*consumer_addr)) != 0) {
- perror("Connect failed");
- kill(0, SIGTERM);
- exit(1);
- }
-
- for (i = 0; i < 2; i++) {
- /* Test 1: Test for SIGURG and OOB */
- wait_for_signal(pipefd[0]);
- memset(buf, 'x', sizeof(buf));
- buf[63] = '@';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- wait_for_signal(pipefd[0]);
-
- /* Test 2: Test for OOB being overwitten */
- memset(buf, 'x', sizeof(buf));
- buf[63] = '%';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- memset(buf, 'x', sizeof(buf));
- buf[63] = '#';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- wait_for_signal(pipefd[0]);
-
- /* Test 3: Test for SIOCATMARK */
- memset(buf, 'x', sizeof(buf));
- buf[63] = '@';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- memset(buf, 'x', sizeof(buf));
- buf[63] = '%';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- memset(buf, 'x', sizeof(buf));
- send(cfd, buf, sizeof(buf), 0);
-
- wait_for_signal(pipefd[0]);
-
- /* Test 4: Test for 1byte OOB msg */
- memset(buf, 'x', sizeof(buf));
- buf[0] = '@';
- send(cfd, buf, 1, MSG_OOB);
- }
-}
-
-int
-main(int argc, char **argv)
-{
- int lfd, pfd;
- struct sockaddr_un consumer_addr, paddr;
- socklen_t len = sizeof(consumer_addr);
- char buf[1024];
- int on = 0;
- char oob;
- int atmark;
-
- lfd = socket(AF_UNIX, SOCK_STREAM, 0);
- memset(&consumer_addr, 0, sizeof(consumer_addr));
- consumer_addr.sun_family = AF_UNIX;
- sprintf(sock_name, "unix_oob_%d", getpid());
- unlink(sock_name);
- strcpy(consumer_addr.sun_path, sock_name);
-
- if ((bind(lfd, (struct sockaddr *)&consumer_addr,
- sizeof(consumer_addr))) != 0) {
- perror("socket bind failed");
- exit(1);
- }
-
- pipe(pipefd);
-
- listen(lfd, 1);
-
- producer_id = fork();
- if (producer_id == 0) {
- producer(&consumer_addr);
- exit(0);
- }
-
- set_sig_handler(SIGURG);
- signal_producer(pipefd[1]);
-
- pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
- fcntl(pfd, F_SETOWN, getpid());
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 1:
- * veriyf that SIGURG is
- * delivered, 63 bytes are
- * read, oob is '@', and POLLPRI works.
- */
- wait_for_data(pfd, POLLPRI);
- read_oob(pfd, &oob);
- len = read_data(pfd, buf, 1024);
- if (!signal_recvd || len != 63 || oob != '@') {
- fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 2:
- * Verify that the first OOB is over written by
- * the 2nd one and the first OOB is returned as
- * part of the read, and sigurg is received.
- */
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = 0;
- while (len < 70)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- read_oob(pfd, &oob);
- if (!signal_recvd || len != 127 || oob != '#') {
- fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 3:
- * verify that 2nd oob over writes
- * the first one and read breaks at
- * oob boundary returning 127 bytes
- * and sigurg is received and atmark
- * is set.
- * oob is '%' and second read returns
- * 64 bytes.
- */
- len = 0;
- wait_for_data(pfd, POLLIN | POLLPRI);
- while (len < 150)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- read_oob(pfd, &oob);
-
- if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
- fprintf(stderr,
- "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n",
- signal_recvd, len, oob, atmark);
- die(1);
- }
-
- signal_recvd = 0;
-
- len = read_data(pfd, buf, 1024);
- if (len != 64) {
- fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 4:
- * verify that a single byte
- * oob message is delivered.
- * set non blocking mode and
- * check proper error is
- * returned and sigurg is
- * received and correct
- * oob is read.
- */
-
- set_filemode(pfd, 0);
-
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = read_data(pfd, buf, 1024);
- if ((len == -1) && (errno == 11))
- len = 0;
-
- read_oob(pfd, &oob);
-
- if (!signal_recvd || len != 0 || oob != '@') {
- fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- set_filemode(pfd, 1);
-
- /* Inline Testing */
-
- on = 1;
- if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
- perror("SO_OOBINLINE");
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 1 -- Inline:
- * Check that SIGURG is
- * delivered and 63 bytes are
- * read and oob is '@'
- */
-
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = read_data(pfd, buf, 1024);
-
- if (!signal_recvd || len != 63) {
- fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
- signal_recvd, len);
- die(1);
- }
-
- len = read_data(pfd, buf, 1024);
-
- if (len != 1) {
- fprintf(stderr,
- "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 2 -- Inline:
- * Verify that the first OOB is over written by
- * the 2nd one and read breaks correctly on
- * 2nd OOB boundary with the first OOB returned as
- * part of the read, and sigurg is delivered and
- * siocatmark returns true.
- * next read returns one byte, the oob byte
- * and siocatmark returns false.
- */
- len = 0;
- wait_for_data(pfd, POLLIN | POLLPRI);
- while (len < 70)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (len != 127 || atmark != 1 || !signal_recvd) {
- fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
- len, atmark);
- die(1);
- }
-
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (len != 1 || buf[0] != '#' || atmark == 1) {
- fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
- len, buf[0], atmark);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 3 -- Inline:
- * verify that 2nd oob over writes
- * the first one and read breaks at
- * oob boundary returning 127 bytes
- * and sigurg is received and siocatmark
- * is true after the read.
- * subsequent read returns 65 bytes
- * because of oob which should be '%'.
- */
- len = 0;
- wait_for_data(pfd, POLLIN | POLLPRI);
- while (len < 126)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (!signal_recvd || len != 127 || !atmark) {
- fprintf(stderr,
- "Test 3 Inline failed, sigurg %d len %d data %c\n",
- signal_recvd, len, buf[0]);
- die(1);
- }
-
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (len != 65 || buf[0] != '%' || atmark != 0) {
- fprintf(stderr,
- "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
- len, buf[0], atmark);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 4 -- Inline:
- * verify that a single
- * byte oob message is delivered
- * and read returns one byte, the oob
- * byte and sigurg is received
- */
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = read_data(pfd, buf, 1024);
- if (!signal_recvd || len != 1 || buf[0] != '@') {
- fprintf(stderr,
- "Test 4 Inline failed, signal %d len %d data %c\n",
- signal_recvd, len, buf[0]);
- die(1);
- }
- die(0);
-}
diff --git a/tools/testing/selftests/net/bpf.mk b/tools/testing/selftests/net/bpf.mk
new file mode 100644
index 0000000000..a4f6755dd8
--- /dev/null
+++ b/tools/testing/selftests/net/bpf.mk
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+# Rules to generate bpf objs
+CLANG ?= clang
+SCRATCH_DIR := $(OUTPUT)/tools
+BUILD_DIR := $(SCRATCH_DIR)/build
+BPFDIR := $(top_srcdir)/tools/lib/bpf
+APIDIR := $(top_srcdir)/tools/include/uapi
+
+CCINCLUDE += -I$(selfdir)/bpf
+CCINCLUDE += -I$(top_srcdir)/usr/include/
+CCINCLUDE += -I$(SCRATCH_DIR)/include
+
+BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+
+MAKE_DIRS := $(BUILD_DIR)/libbpf
+$(MAKE_DIRS):
+ $(call msg,MKDIR,,$@)
+ $(Q)mkdir -p $@
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+endef
+
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
+
+BPF_PROG_OBJS := $(patsubst %.c,$(OUTPUT)/%.o,$(wildcard *.bpf.c))
+
+$(BPF_PROG_OBJS): $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS)
+ $(call msg,BPF_PROG,,$@)
+ $(Q)$(CLANG) -O2 -g --target=bpf $(CCINCLUDE) $(CLANG_SYS_INCLUDES) \
+ -c $< -o $@
+
+$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
+ $(APIDIR)/linux/bpf.h \
+ | $(BUILD_DIR)/libbpf
+ $(call msg,MAKE,,$@)
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
+ EXTRA_CFLAGS='-g -O0' \
+ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
+
+EXTRA_CLEAN += $(SCRATCH_DIR)
diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py
new file mode 100755
index 0000000000..3efe44f6e9
--- /dev/null
+++ b/tools/testing/selftests/net/bpf_offload.py
@@ -0,0 +1,1341 @@
+#!/usr/bin/env python3
+
+# Copyright (C) 2017 Netronome Systems, Inc.
+# Copyright (c) 2019 Mellanox Technologies. All rights reserved
+#
+# This software is licensed under the GNU General License Version 2,
+# June 1991 as shown in the file COPYING in the top-level directory of this
+# source tree.
+#
+# THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
+# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+# OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+# THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+from datetime import datetime
+import argparse
+import errno
+import json
+import os
+import pprint
+import random
+import re
+import stat
+import string
+import struct
+import subprocess
+import time
+import traceback
+
+from lib.py import NetdevSim, NetdevSimDev
+
+
+logfile = None
+log_level = 1
+skip_extack = False
+bpf_test_dir = os.path.dirname(os.path.realpath(__file__))
+pp = pprint.PrettyPrinter()
+devs = [] # devices we created for clean up
+files = [] # files to be removed
+netns = [] # net namespaces to be removed
+
+def log_get_sec(level=0):
+ return "*" * (log_level + level)
+
+def log_level_inc(add=1):
+ global log_level
+ log_level += add
+
+def log_level_dec(sub=1):
+ global log_level
+ log_level -= sub
+
+def log_level_set(level):
+ global log_level
+ log_level = level
+
+def log(header, data, level=None):
+ """
+ Output to an optional log.
+ """
+ if logfile is None:
+ return
+ if level is not None:
+ log_level_set(level)
+
+ if not isinstance(data, str):
+ data = pp.pformat(data)
+
+ if len(header):
+ logfile.write("\n" + log_get_sec() + " ")
+ logfile.write(header)
+ if len(header) and len(data.strip()):
+ logfile.write("\n")
+ logfile.write(data)
+
+def skip(cond, msg):
+ if not cond:
+ return
+ print("SKIP: " + msg)
+ log("SKIP: " + msg, "", level=1)
+ os.sys.exit(0)
+
+def fail(cond, msg):
+ if not cond:
+ return
+ print("FAIL: " + msg)
+ tb = "".join(traceback.extract_stack().format())
+ print(tb)
+ log("FAIL: " + msg, tb, level=1)
+ os.sys.exit(1)
+
+def start_test(msg):
+ log(msg, "", level=1)
+ log_level_inc()
+ print(msg)
+
+def cmd(cmd, shell=True, include_stderr=False, background=False, fail=True):
+ """
+ Run a command in subprocess and return tuple of (retval, stdout);
+ optionally return stderr as well as third value.
+ """
+ proc = subprocess.Popen(cmd, shell=shell, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ if background:
+ msg = "%s START: %s" % (log_get_sec(1),
+ datetime.now().strftime("%H:%M:%S.%f"))
+ log("BKG " + proc.args, msg)
+ return proc
+
+ return cmd_result(proc, include_stderr=include_stderr, fail=fail)
+
+def cmd_result(proc, include_stderr=False, fail=False):
+ stdout, stderr = proc.communicate()
+ stdout = stdout.decode("utf-8")
+ stderr = stderr.decode("utf-8")
+ proc.stdout.close()
+ proc.stderr.close()
+
+ stderr = "\n" + stderr
+ if stderr[-1] == "\n":
+ stderr = stderr[:-1]
+
+ sec = log_get_sec(1)
+ log("CMD " + proc.args,
+ "RETCODE: %d\n%s STDOUT:\n%s%s STDERR:%s\n%s END: %s" %
+ (proc.returncode, sec, stdout, sec, stderr,
+ sec, datetime.now().strftime("%H:%M:%S.%f")))
+
+ if proc.returncode != 0 and fail:
+ if len(stderr) > 0 and stderr[-1] == "\n":
+ stderr = stderr[:-1]
+ raise Exception("Command failed: %s\n%s" % (proc.args, stderr))
+
+ if include_stderr:
+ return proc.returncode, stdout, stderr
+ else:
+ return proc.returncode, stdout
+
+def rm(f):
+ cmd("rm -f %s" % (f))
+ if f in files:
+ files.remove(f)
+
+def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False):
+ params = ""
+ if JSON:
+ params += "%s " % (flags["json"])
+
+ if ns:
+ ns = "ip netns exec %s " % (ns)
+ elif ns is None:
+ ns = ""
+
+ if include_stderr:
+ ret, stdout, stderr = cmd(ns + name + " " + params + args,
+ fail=fail, include_stderr=True)
+ else:
+ ret, stdout = cmd(ns + name + " " + params + args,
+ fail=fail, include_stderr=False)
+
+ if JSON and len(stdout.strip()) != 0:
+ out = json.loads(stdout)
+ else:
+ out = stdout
+
+ if include_stderr:
+ return ret, out, stderr
+ else:
+ return ret, out
+
+def bpftool(args, JSON=True, ns="", fail=True, include_stderr=False):
+ return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns,
+ fail=fail, include_stderr=include_stderr)
+
+def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True):
+ _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
+ # Remove the base progs
+ for p in base_progs:
+ if p in progs:
+ progs.remove(p)
+ if exclude_orphaned:
+ progs = [ p for p in progs if not p['orphaned'] ]
+ if expected is not None:
+ if len(progs) != expected:
+ fail(True, "%d BPF programs loaded, expected %d" %
+ (len(progs), expected))
+ return progs
+
+def bpftool_map_list(expected=None, ns=""):
+ _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
+ # Remove the base maps
+ maps = [m for m in maps if m not in base_maps and m.get('name') and m.get('name') not in base_map_names]
+ if expected is not None:
+ if len(maps) != expected:
+ fail(True, "%d BPF maps loaded, expected %d" %
+ (len(maps), expected))
+ return maps
+
+def bpftool_prog_list_wait(expected=0, n_retry=20):
+ for i in range(n_retry):
+ nprogs = len(bpftool_prog_list())
+ if nprogs == expected:
+ return
+ time.sleep(0.05)
+ raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
+
+def bpftool_map_list_wait(expected=0, n_retry=20, ns=""):
+ for i in range(n_retry):
+ maps = bpftool_map_list(ns=ns)
+ if len(maps) == expected:
+ return maps
+ time.sleep(0.05)
+ raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
+
+def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None,
+ fail=True, include_stderr=False):
+ args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name)
+ if prog_type is not None:
+ args += " type " + prog_type
+ if dev is not None:
+ args += " dev " + dev
+ if len(maps):
+ args += " map " + " map ".join(maps)
+
+ res = bpftool(args, fail=fail, include_stderr=include_stderr)
+ if res[0] == 0:
+ files.append(file_name)
+ return res
+
+def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False):
+ if force:
+ args = "-force " + args
+ return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns,
+ fail=fail, include_stderr=include_stderr)
+
+def tc(args, JSON=True, ns="", fail=True, include_stderr=False):
+ return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns,
+ fail=fail, include_stderr=include_stderr)
+
+def ethtool(dev, opt, args, fail=True):
+ return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
+
+def bpf_obj(name, sec="xdp", path=bpf_test_dir,):
+ return "obj %s sec %s" % (os.path.join(path, name), sec)
+
+def bpf_pinned(name):
+ return "pinned %s" % (name)
+
+def bpf_bytecode(bytecode):
+ return "bytecode \"%s\"" % (bytecode)
+
+def mknetns(n_retry=10):
+ for i in range(n_retry):
+ name = ''.join([random.choice(string.ascii_letters) for i in range(8)])
+ ret, _ = ip("netns add %s" % (name), fail=False)
+ if ret == 0:
+ netns.append(name)
+ return name
+ return None
+
+def int2str(fmt, val):
+ ret = []
+ for b in struct.pack(fmt, val):
+ ret.append(int(b))
+ return " ".join(map(lambda x: str(x), ret))
+
+def str2int(strtab):
+ inttab = []
+ for i in strtab:
+ inttab.append(int(i, 16))
+ ba = bytearray(inttab)
+ if len(strtab) == 4:
+ fmt = "I"
+ elif len(strtab) == 8:
+ fmt = "Q"
+ else:
+ raise Exception("String array of len %d can't be unpacked to an int" %
+ (len(strtab)))
+ return struct.unpack(fmt, ba)[0]
+
+class DebugfsDir:
+ """
+ Class for accessing DebugFS directories as a dictionary.
+ """
+
+ def __init__(self, path):
+ self.path = path
+ self._dict = self._debugfs_dir_read(path)
+
+ def __len__(self):
+ return len(self._dict.keys())
+
+ def __getitem__(self, key):
+ if type(key) is int:
+ key = list(self._dict.keys())[key]
+ return self._dict[key]
+
+ def __setitem__(self, key, value):
+ log("DebugFS set %s = %s" % (key, value), "")
+ log_level_inc()
+
+ cmd("echo '%s' > %s/%s" % (value, self.path, key))
+ log_level_dec()
+
+ _, out = cmd('cat %s/%s' % (self.path, key))
+ self._dict[key] = out.strip()
+
+ def _debugfs_dir_read(self, path):
+ dfs = {}
+
+ log("DebugFS state for %s" % (path), "")
+ log_level_inc(add=2)
+
+ _, out = cmd('ls ' + path)
+ for f in out.split():
+ if f == "ports":
+ continue
+
+ p = os.path.join(path, f)
+ if not os.stat(p).st_mode & stat.S_IRUSR:
+ continue
+
+ if os.path.isfile(p):
+ # We need to init trap_flow_action_cookie before read it
+ if f == "trap_flow_action_cookie":
+ cmd('echo deadbeef > %s/%s' % (path, f))
+ _, out = cmd('cat %s/%s' % (path, f))
+ dfs[f] = out.strip()
+ elif os.path.isdir(p):
+ dfs[f] = DebugfsDir(p)
+ else:
+ raise Exception("%s is neither file nor directory" % (p))
+
+ log_level_dec()
+ log("DebugFS state", dfs)
+ log_level_dec()
+
+ return dfs
+
+class BpfNetdevSimDev(NetdevSimDev):
+ """
+ Class for netdevsim bus device and its attributes.
+ """
+ def __init__(self, port_count=1, ns=None):
+ super().__init__(port_count, ns=ns)
+ devs.append(self)
+
+ def _make_port(self, port_index, ifname):
+ return BpfNetdevSim(self, port_index, ifname, self.ns)
+
+ def dfs_num_bound_progs(self):
+ path = os.path.join(self.dfs_dir, "bpf_bound_progs")
+ _, progs = cmd('ls %s' % (path))
+ return len(progs.split())
+
+ def dfs_get_bound_progs(self, expected):
+ progs = DebugfsDir(os.path.join(self.dfs_dir, "bpf_bound_progs"))
+ if expected is not None:
+ if len(progs) != expected:
+ fail(True, "%d BPF programs bound, expected %d" %
+ (len(progs), expected))
+ return progs
+
+ def remove(self):
+ super().remove()
+ devs.remove(self)
+
+
+class BpfNetdevSim(NetdevSim):
+ """
+ Class for netdevsim netdevice and its attributes.
+ """
+
+ def __init__(self, nsimdev, port_index, ifname, ns=None):
+ super().__init__(nsimdev, port_index, ifname, ns=ns)
+
+ self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index)
+ self.dfs_refresh()
+
+ def __getitem__(self, key):
+ return self.dev[key]
+
+ def remove(self):
+ self.nsimdev.remove_nsim(self)
+
+ def dfs_refresh(self):
+ self.dfs = DebugfsDir(self.dfs_dir)
+ return self.dfs
+
+ def dfs_read(self, f):
+ path = os.path.join(self.dfs_dir, f)
+ _, data = cmd('cat %s' % (path))
+ return data.strip()
+
+ def wait_for_flush(self, bound=0, total=0, n_retry=20):
+ for i in range(n_retry):
+ nbound = self.nsimdev.dfs_num_bound_progs()
+ nprogs = len(bpftool_prog_list())
+ if nbound == bound and nprogs == total:
+ return
+ time.sleep(0.05)
+ raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs))
+
+ def set_ns(self, ns):
+ name = ns if ns else "1"
+ ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns)
+ self.ns = ns
+
+ def set_mtu(self, mtu, fail=True):
+ return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
+ fail=fail)
+
+ def set_xdp(self, bpf, mode, force=False, JSON=True, verbose=False,
+ fail=True, include_stderr=False):
+ if verbose:
+ bpf += " verbose"
+ return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf),
+ force=force, JSON=JSON,
+ fail=fail, include_stderr=include_stderr)
+
+ def unset_xdp(self, mode, force=False, JSON=True,
+ fail=True, include_stderr=False):
+ return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode),
+ force=force, JSON=JSON,
+ fail=fail, include_stderr=include_stderr)
+
+ def ip_link_show(self, xdp):
+ _, link = ip("link show dev %s" % (self['ifname']))
+ if len(link) > 1:
+ raise Exception("Multiple objects on ip link show")
+ if len(link) < 1:
+ return {}
+ fail(xdp != "xdp" in link,
+ "XDP program not reporting in iplink (reported %s, expected %s)" %
+ ("xdp" in link, xdp))
+ return link[0]
+
+ def tc_add_ingress(self):
+ tc("qdisc add dev %s ingress" % (self['ifname']))
+
+ def tc_del_ingress(self):
+ tc("qdisc del dev %s ingress" % (self['ifname']))
+
+ def tc_flush_filters(self, bound=0, total=0):
+ self.tc_del_ingress()
+ self.tc_add_ingress()
+ self.wait_for_flush(bound=bound, total=total)
+
+ def tc_show_ingress(self, expected=None):
+ # No JSON support, oh well...
+ flags = ["skip_sw", "skip_hw", "in_hw"]
+ named = ["protocol", "pref", "chain", "handle", "id", "tag"]
+
+ args = "-s filter show dev %s ingress" % (self['ifname'])
+ _, out = tc(args, JSON=False)
+
+ filters = []
+ lines = out.split('\n')
+ for line in lines:
+ words = line.split()
+ if "handle" not in words:
+ continue
+ fltr = {}
+ for flag in flags:
+ fltr[flag] = flag in words
+ for name in named:
+ try:
+ idx = words.index(name)
+ fltr[name] = words[idx + 1]
+ except ValueError:
+ pass
+ filters.append(fltr)
+
+ if expected is not None:
+ fail(len(filters) != expected,
+ "%d ingress filters loaded, expected %d" %
+ (len(filters), expected))
+ return filters
+
+ def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None,
+ chain=None, cls="", params="",
+ fail=True, include_stderr=False):
+ spec = ""
+ if prio is not None:
+ spec += " prio %d" % (prio)
+ if handle:
+ spec += " handle %s" % (handle)
+ if chain is not None:
+ spec += " chain %d" % (chain)
+
+ return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\
+ .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec,
+ cls=cls, params=params),
+ fail=fail, include_stderr=include_stderr)
+
+ def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None,
+ chain=None, da=False, verbose=False,
+ skip_sw=False, skip_hw=False,
+ fail=True, include_stderr=False):
+ cls = "bpf " + bpf
+
+ params = ""
+ if da:
+ params += " da"
+ if verbose:
+ params += " verbose"
+ if skip_sw:
+ params += " skip_sw"
+ if skip_hw:
+ params += " skip_hw"
+
+ return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls,
+ chain=chain, params=params,
+ fail=fail, include_stderr=include_stderr)
+
+ def set_ethtool_tc_offloads(self, enable, fail=True):
+ args = "hw-tc-offload %s" % ("on" if enable else "off")
+ return ethtool(self, "-K", args, fail=fail)
+
+################################################################################
+def clean_up():
+ global files, netns, devs
+
+ for dev in devs:
+ dev.remove()
+ for f in files:
+ cmd("rm -f %s" % (f))
+ for ns in netns:
+ cmd("ip netns delete %s" % (ns))
+ files = []
+ netns = []
+
+def pin_prog(file_name, idx=0):
+ progs = bpftool_prog_list(expected=(idx + 1))
+ prog = progs[idx]
+ bpftool("prog pin id %d %s" % (prog["id"], file_name))
+ files.append(file_name)
+
+ return file_name, bpf_pinned(file_name)
+
+def pin_map(file_name, idx=0, expected=1):
+ maps = bpftool_map_list_wait(expected=expected)
+ m = maps[idx]
+ bpftool("map pin id %d %s" % (m["id"], file_name))
+ files.append(file_name)
+
+ return file_name, bpf_pinned(file_name)
+
+def check_dev_info_removed(prog_file=None, map_file=None):
+ bpftool_prog_list(expected=0)
+ bpftool_prog_list(expected=1, exclude_orphaned=False)
+ ret, err = bpftool("prog show pin %s" % (prog_file), fail=False)
+ fail(ret != 0, "failed to show prog with removed device")
+
+ bpftool_map_list_wait(expected=0)
+ ret, err = bpftool("map show pin %s" % (map_file), fail=False)
+ fail(ret == 0, "Showing map with removed device did not fail")
+ fail(err["error"].find("No such device") == -1,
+ "Showing map with removed device expected ENODEV, error is %s" %
+ (err["error"]))
+
+def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False):
+ progs = bpftool_prog_list(expected=1, ns=ns)
+ prog = progs[0]
+
+ fail("dev" not in prog.keys(), "Device parameters not reported")
+ dev = prog["dev"]
+ fail("ifindex" not in dev.keys(), "Device parameters not reported")
+ fail("ns_dev" not in dev.keys(), "Device parameters not reported")
+ fail("ns_inode" not in dev.keys(), "Device parameters not reported")
+
+ if not other_ns:
+ fail("ifname" not in dev.keys(), "Ifname not reported")
+ fail(dev["ifname"] != sim["ifname"],
+ "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"]))
+ else:
+ fail("ifname" in dev.keys(), "Ifname is reported for other ns")
+
+ maps = bpftool_map_list_wait(expected=2, ns=ns)
+ for m in maps:
+ fail("dev" not in m.keys(), "Device parameters not reported")
+ fail(dev != m["dev"], "Map's device different than program's")
+
+def check_extack(output, reference, args):
+ if skip_extack:
+ return
+ lines = output.split("\n")
+ comp = len(lines) >= 2 and lines[1] == 'Error: ' + reference
+ fail(not comp, "Missing or incorrect netlink extack message")
+
+def check_extack_nsim(output, reference, args):
+ check_extack(output, "netdevsim: " + reference, args)
+
+def check_no_extack(res, needle):
+ fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"),
+ "Found '%s' in command output, leaky extack?" % (needle))
+
+def check_verifier_log(output, reference):
+ lines = output.split("\n")
+ for l in reversed(lines):
+ if l == reference:
+ return
+ fail(True, "Missing or incorrect message from netdevsim in verifier log")
+
+def check_multi_basic(two_xdps):
+ fail(two_xdps["mode"] != 4, "Bad mode reported with multiple programs")
+ fail("prog" in two_xdps, "Base program reported in multi program mode")
+ fail(len(two_xdps["attached"]) != 2,
+ "Wrong attached program count with two programs")
+ fail(two_xdps["attached"][0]["prog"]["id"] ==
+ two_xdps["attached"][1]["prog"]["id"],
+ "Offloaded and other programs have the same id")
+
+def test_spurios_extack(sim, obj, skip_hw, needle):
+ res = sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=skip_hw,
+ include_stderr=True)
+ check_no_extack(res, needle)
+ res = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1,
+ skip_hw=skip_hw, include_stderr=True)
+ check_no_extack(res, needle)
+ res = sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf",
+ include_stderr=True)
+ check_no_extack(res, needle)
+
+def test_multi_prog(simdev, sim, obj, modename, modeid):
+ start_test("Test multi-attachment XDP - %s + offload..." %
+ (modename or "default", ))
+ sim.set_xdp(obj, "offload")
+ xdp = sim.ip_link_show(xdp=True)["xdp"]
+ offloaded = sim.dfs_read("bpf_offloaded_id")
+ fail("prog" not in xdp, "Base program not reported in single program mode")
+ fail(len(xdp["attached"]) != 1,
+ "Wrong attached program count with one program")
+
+ sim.set_xdp(obj, modename)
+ two_xdps = sim.ip_link_show(xdp=True)["xdp"]
+
+ fail(xdp["attached"][0] not in two_xdps["attached"],
+ "Offload program not reported after other activated")
+ check_multi_basic(two_xdps)
+
+ offloaded2 = sim.dfs_read("bpf_offloaded_id")
+ fail(offloaded != offloaded2,
+ "Offload ID changed after loading other program")
+
+ start_test("Test multi-attachment XDP - replace...")
+ ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True)
+ fail(ret == 0, "Replaced one of programs without -force")
+ check_extack(err, "XDP program already attached.", args)
+
+ start_test("Test multi-attachment XDP - remove without mode...")
+ ret, _, err = sim.unset_xdp("", force=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Removed program without a mode flag")
+ check_extack(err, "More than one program loaded, unset mode is ambiguous.", args)
+
+ sim.unset_xdp("offload")
+ xdp = sim.ip_link_show(xdp=True)["xdp"]
+ offloaded = sim.dfs_read("bpf_offloaded_id")
+
+ fail(xdp["mode"] != modeid, "Bad mode reported after multiple programs")
+ fail("prog" not in xdp,
+ "Base program not reported after multi program mode")
+ fail(xdp["attached"][0] not in two_xdps["attached"],
+ "Offload program not reported after other activated")
+ fail(len(xdp["attached"]) != 1,
+ "Wrong attached program count with remaining programs")
+ fail(offloaded != "0", "Offload ID reported with only other program left")
+
+ start_test("Test multi-attachment XDP - reattach...")
+ sim.set_xdp(obj, "offload")
+ two_xdps = sim.ip_link_show(xdp=True)["xdp"]
+
+ fail(xdp["attached"][0] not in two_xdps["attached"],
+ "Other program not reported after offload activated")
+ check_multi_basic(two_xdps)
+
+ start_test("Test multi-attachment XDP - device remove...")
+ simdev.remove()
+
+ simdev = BpfNetdevSimDev()
+ sim, = simdev.nsims
+ sim.set_ethtool_tc_offloads(True)
+ return [simdev, sim]
+
+# Parse command line
+parser = argparse.ArgumentParser()
+parser.add_argument("--log", help="output verbose log to given file")
+args = parser.parse_args()
+if args.log:
+ logfile = open(args.log, 'w+')
+ logfile.write("# -*-Org-*-")
+
+log("Prepare...", "", level=1)
+log_level_inc()
+
+# Check permissions
+skip(os.getuid() != 0, "test must be run as root")
+
+# Check tools
+ret, progs = bpftool("prog", fail=False)
+skip(ret != 0, "bpftool not installed")
+base_progs = progs
+_, base_maps = bpftool("map")
+base_map_names = [
+ 'pid_iter.rodata', # created on each bpftool invocation
+ 'libbpf_det_bind', # created on each bpftool invocation
+]
+
+# Check netdevsim
+if not os.path.isdir("/sys/bus/netdevsim/"):
+ ret, out = cmd("modprobe netdevsim", fail=False)
+ skip(ret != 0, "netdevsim module could not be loaded")
+
+# Check debugfs
+_, out = cmd("mount")
+if out.find("/sys/kernel/debug type debugfs") == -1:
+ cmd("mount -t debugfs none /sys/kernel/debug")
+
+# Check samples are compiled
+samples = ["sample_ret0.bpf.o", "sample_map_ret0.bpf.o"]
+for s in samples:
+ ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False)
+ skip(ret != 0, "sample %s/%s not found, please compile it" %
+ (bpf_test_dir, s))
+
+# Check if iproute2 is built with libmnl (needed by extack support)
+_, _, err = cmd("tc qdisc delete dev lo handle 0",
+ fail=False, include_stderr=True)
+if err.find("Error: Failed to find qdisc with specified handle.") == -1:
+ print("Warning: no extack message in iproute2 output, libmnl missing?")
+ log("Warning: no extack message in iproute2 output, libmnl missing?", "")
+ skip_extack = True
+
+# Check if net namespaces seem to work
+ns = mknetns()
+skip(ns is None, "Could not create a net namespace")
+cmd("ip netns delete %s" % (ns))
+netns = []
+
+try:
+ obj = bpf_obj("sample_ret0.bpf.o")
+ bytecode = bpf_bytecode("1,6 0 0 4294967295,")
+
+ start_test("Test destruction of generic XDP...")
+ simdev = BpfNetdevSimDev()
+ sim, = simdev.nsims
+ sim.set_xdp(obj, "generic")
+ simdev.remove()
+ bpftool_prog_list_wait(expected=0)
+
+ simdev = BpfNetdevSimDev()
+ sim, = simdev.nsims
+ sim.tc_add_ingress()
+
+ start_test("Test TC non-offloaded...")
+ ret, _ = sim.cls_bpf_add_filter(obj, skip_hw=True, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+
+ start_test("Test TC non-offloaded isn't getting bound...")
+ ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+ simdev.dfs_get_bound_progs(expected=0)
+
+ sim.tc_flush_filters()
+
+ start_test("Test TC offloads are off by default...")
+ ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "TC filter loaded without enabling TC offloads")
+ check_extack(err, "TC offload is disabled on net device.", args)
+ sim.wait_for_flush()
+
+ sim.set_ethtool_tc_offloads(True)
+ sim.dfs["bpf_tc_non_bound_accept"] = "Y"
+
+ start_test("Test TC offload by default...")
+ ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+ simdev.dfs_get_bound_progs(expected=0)
+ ingress = sim.tc_show_ingress(expected=1)
+ fltr = ingress[0]
+ fail(not fltr["in_hw"], "Filter not offloaded by default")
+
+ sim.tc_flush_filters()
+
+ start_test("Test TC cBPF bytcode tries offload by default...")
+ ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+ simdev.dfs_get_bound_progs(expected=0)
+ ingress = sim.tc_show_ingress(expected=1)
+ fltr = ingress[0]
+ fail(not fltr["in_hw"], "Bytecode not offloaded by default")
+
+ sim.tc_flush_filters()
+ sim.dfs["bpf_tc_non_bound_accept"] = "N"
+
+ start_test("Test TC cBPF unbound bytecode doesn't offload...")
+ ret, _, err = sim.cls_bpf_add_filter(bytecode, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "TC bytecode loaded for offload")
+ check_extack_nsim(err, "netdevsim configured to reject unbound programs.",
+ args)
+ sim.wait_for_flush()
+
+ start_test("Test non-0 chain offload...")
+ ret, _, err = sim.cls_bpf_add_filter(obj, chain=1, prio=1, handle=1,
+ skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Offloaded a filter to chain other than 0")
+ check_extack(err, "Driver supports only offload of chain 0.", args)
+ sim.tc_flush_filters()
+
+ start_test("Test TC replace...")
+ sim.cls_bpf_add_filter(obj, prio=1, handle=1)
+ sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1)
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_sw=True)
+ sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_sw=True)
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=True)
+ sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_hw=True)
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ start_test("Test TC replace bad flags...")
+ for i in range(3):
+ for j in range(3):
+ ret, _ = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1,
+ skip_sw=(j == 1), skip_hw=(j == 2),
+ fail=False)
+ fail(bool(ret) != bool(j),
+ "Software TC incorrect load in replace test, iteration %d" %
+ (j))
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ start_test("Test spurious extack from the driver...")
+ test_spurios_extack(sim, obj, False, "netdevsim")
+ test_spurios_extack(sim, obj, True, "netdevsim")
+
+ sim.set_ethtool_tc_offloads(False)
+
+ test_spurios_extack(sim, obj, False, "TC offload is disabled")
+ test_spurios_extack(sim, obj, True, "TC offload is disabled")
+
+ sim.set_ethtool_tc_offloads(True)
+
+ sim.tc_flush_filters()
+
+ start_test("Test TC offloads failure...")
+ sim.dfs["dev/bpf_bind_verifier_accept"] = 0
+ ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "TC filter did not reject with TC offloads enabled")
+ check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+ sim.dfs["dev/bpf_bind_verifier_accept"] = 1
+
+ start_test("Test TC offloads work...")
+ ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret != 0, "TC filter did not load with TC offloads enabled")
+
+ start_test("Test TC offload basics...")
+ dfs = simdev.dfs_get_bound_progs(expected=1)
+ progs = bpftool_prog_list(expected=1)
+ ingress = sim.tc_show_ingress(expected=1)
+
+ dprog = dfs[0]
+ prog = progs[0]
+ fltr = ingress[0]
+ fail(fltr["skip_hw"], "TC does reports 'skip_hw' on offloaded filter")
+ fail(not fltr["in_hw"], "TC does not report 'in_hw' for offloaded filter")
+ fail(not fltr["skip_sw"], "TC does not report 'skip_sw' back")
+
+ start_test("Test TC offload is device-bound...")
+ fail(str(prog["id"]) != fltr["id"], "Program IDs don't match")
+ fail(prog["tag"] != fltr["tag"], "Program tags don't match")
+ fail(fltr["id"] != dprog["id"], "Program IDs don't match")
+ fail(dprog["state"] != "xlated", "Offloaded program state not translated")
+ fail(dprog["loaded"] != "Y", "Offloaded program is not loaded")
+
+ start_test("Test disabling TC offloads is rejected while filters installed...")
+ ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
+ fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...")
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test qdisc removal frees things...")
+ sim.tc_flush_filters()
+ sim.tc_show_ingress(expected=0)
+
+ start_test("Test disabling TC offloads is OK without filters...")
+ ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
+ fail(ret != 0,
+ "Driver refused to disable TC offloads without filters installed...")
+
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test destroying device gets rid of TC filters...")
+ sim.cls_bpf_add_filter(obj, skip_sw=True)
+ simdev.remove()
+ bpftool_prog_list_wait(expected=0)
+
+ simdev = BpfNetdevSimDev()
+ sim, = simdev.nsims
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test destroying device gets rid of XDP...")
+ sim.set_xdp(obj, "offload")
+ simdev.remove()
+ bpftool_prog_list_wait(expected=0)
+
+ simdev = BpfNetdevSimDev()
+ sim, = simdev.nsims
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test XDP prog reporting...")
+ sim.set_xdp(obj, "drv")
+ ipl = sim.ip_link_show(xdp=True)
+ progs = bpftool_prog_list(expected=1)
+ fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
+ "Loaded program has wrong ID")
+
+ start_test("Test XDP prog replace without force...")
+ ret, _ = sim.set_xdp(obj, "drv", fail=False)
+ fail(ret == 0, "Replaced XDP program without -force")
+ sim.wait_for_flush(total=1)
+
+ start_test("Test XDP prog replace with force...")
+ ret, _ = sim.set_xdp(obj, "drv", force=True, fail=False)
+ fail(ret != 0, "Could not replace XDP program with -force")
+ bpftool_prog_list_wait(expected=1)
+ ipl = sim.ip_link_show(xdp=True)
+ progs = bpftool_prog_list(expected=1)
+ fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
+ "Loaded program has wrong ID")
+ fail("dev" in progs[0].keys(),
+ "Device parameters reported for non-offloaded program")
+
+ start_test("Test XDP prog replace with bad flags...")
+ ret, _, err = sim.set_xdp(obj, "generic", force=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Replaced XDP program with a program in different mode")
+ check_extack(err,
+ "Native and generic XDP can't be active at the same time.",
+ args)
+
+ start_test("Test MTU restrictions...")
+ ret, _ = sim.set_mtu(9000, fail=False)
+ fail(ret == 0,
+ "Driver should refuse to increase MTU to 9000 with XDP loaded...")
+ sim.unset_xdp("drv")
+ bpftool_prog_list_wait(expected=0)
+ sim.set_mtu(9000)
+ ret, _, err = sim.set_xdp(obj, "drv", fail=False, include_stderr=True)
+ fail(ret == 0, "Driver should refuse to load program with MTU of 9000...")
+ check_extack_nsim(err, "MTU too large w/ XDP enabled.", args)
+ sim.set_mtu(1500)
+
+ sim.wait_for_flush()
+ start_test("Test non-offload XDP attaching to HW...")
+ bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/nooffload")
+ nooffload = bpf_pinned("/sys/fs/bpf/nooffload")
+ ret, _, err = sim.set_xdp(nooffload, "offload",
+ fail=False, include_stderr=True)
+ fail(ret == 0, "attached non-offloaded XDP program to HW")
+ check_extack_nsim(err, "xdpoffload of non-bound program.", args)
+ rm("/sys/fs/bpf/nooffload")
+
+ start_test("Test offload XDP attaching to drv...")
+ bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload",
+ dev=sim['ifname'])
+ offload = bpf_pinned("/sys/fs/bpf/offload")
+ ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True)
+ fail(ret == 0, "attached offloaded XDP program to drv")
+ check_extack(err, "Using offloaded program without HW_MODE flag is not supported.", args)
+ rm("/sys/fs/bpf/offload")
+ sim.wait_for_flush()
+
+ start_test("Test XDP load failure...")
+ sim.dfs["dev/bpf_bind_verifier_accept"] = 0
+ ret, _, err = bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload",
+ dev=sim['ifname'], fail=False, include_stderr=True)
+ fail(ret == 0, "verifier should fail on load")
+ check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+ sim.dfs["dev/bpf_bind_verifier_accept"] = 1
+ sim.wait_for_flush()
+
+ start_test("Test XDP offload...")
+ _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True)
+ ipl = sim.ip_link_show(xdp=True)
+ link_xdp = ipl["xdp"]["prog"]
+ progs = bpftool_prog_list(expected=1)
+ prog = progs[0]
+ fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID")
+
+ start_test("Test XDP offload is device bound...")
+ dfs = simdev.dfs_get_bound_progs(expected=1)
+ dprog = dfs[0]
+
+ fail(prog["id"] != link_xdp["id"], "Program IDs don't match")
+ fail(prog["tag"] != link_xdp["tag"], "Program tags don't match")
+ fail(str(link_xdp["id"]) != dprog["id"], "Program IDs don't match")
+ fail(dprog["state"] != "xlated", "Offloaded program state not translated")
+ fail(dprog["loaded"] != "Y", "Offloaded program is not loaded")
+
+ start_test("Test removing XDP program many times...")
+ sim.unset_xdp("offload")
+ sim.unset_xdp("offload")
+ sim.unset_xdp("drv")
+ sim.unset_xdp("drv")
+ sim.unset_xdp("")
+ sim.unset_xdp("")
+ bpftool_prog_list_wait(expected=0)
+
+ start_test("Test attempt to use a program for a wrong device...")
+ simdev2 = BpfNetdevSimDev()
+ sim2, = simdev2.nsims
+ sim2.set_xdp(obj, "offload")
+ pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
+
+ ret, _, err = sim.set_xdp(pinned, "offload",
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Pinned program loaded for a different device accepted")
+ check_extack(err, "Program bound to different device.", args)
+ simdev2.remove()
+ ret, _, err = sim.set_xdp(pinned, "offload",
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Pinned program loaded for a removed device accepted")
+ check_extack(err, "Program bound to different device.", args)
+ rm(pin_file)
+ bpftool_prog_list_wait(expected=0)
+
+ simdev, sim = test_multi_prog(simdev, sim, obj, "", 1)
+ simdev, sim = test_multi_prog(simdev, sim, obj, "drv", 1)
+ simdev, sim = test_multi_prog(simdev, sim, obj, "generic", 2)
+
+ start_test("Test mixing of TC and XDP...")
+ sim.tc_add_ingress()
+ sim.set_xdp(obj, "offload")
+ ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Loading TC when XDP active should fail")
+ check_extack_nsim(err, "driver and netdev offload states mismatch.", args)
+ sim.unset_xdp("offload")
+ sim.wait_for_flush()
+
+ sim.cls_bpf_add_filter(obj, skip_sw=True)
+ ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True)
+ fail(ret == 0, "Loading XDP when TC active should fail")
+ check_extack_nsim(err, "TC program is already loaded.", args)
+
+ start_test("Test binding TC from pinned...")
+ pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
+ sim.tc_flush_filters(bound=1, total=1)
+ sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True)
+ sim.tc_flush_filters(bound=1, total=1)
+
+ start_test("Test binding XDP from pinned...")
+ sim.set_xdp(obj, "offload")
+ pin_file, pinned = pin_prog("/sys/fs/bpf/tmp2", idx=1)
+
+ sim.set_xdp(pinned, "offload", force=True)
+ sim.unset_xdp("offload")
+ sim.set_xdp(pinned, "offload", force=True)
+ sim.unset_xdp("offload")
+
+ start_test("Test offload of wrong type fails...")
+ ret, _ = sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True, fail=False)
+ fail(ret == 0, "Managed to attach XDP program to TC")
+
+ start_test("Test asking for TC offload of two filters...")
+ sim.cls_bpf_add_filter(obj, da=True, skip_sw=True)
+ ret, _, err = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Managed to offload two TC filters at the same time")
+ check_extack_nsim(err, "driver and netdev offload states mismatch.", args)
+
+ sim.tc_flush_filters(bound=2, total=2)
+
+ start_test("Test if netdev removal waits for translation...")
+ delay_msec = 500
+ sim.dfs["dev/bpf_bind_verifier_delay"] = delay_msec
+ start = time.time()
+ cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \
+ (sim['ifname'], obj)
+ tc_proc = cmd(cmd_line, background=True, fail=False)
+ # Wait for the verifier to start
+ while simdev.dfs_num_bound_progs() <= 2:
+ pass
+ simdev.remove()
+ end = time.time()
+ ret, _ = cmd_result(tc_proc, fail=False)
+ time_diff = end - start
+ log("Time", "start:\t%s\nend:\t%s\ndiff:\t%s" % (start, end, time_diff))
+
+ fail(ret == 0, "Managed to load TC filter on a unregistering device")
+ delay_sec = delay_msec * 0.001
+ fail(time_diff < delay_sec, "Removal process took %s, expected %s" %
+ (time_diff, delay_sec))
+
+ # Remove all pinned files and reinstantiate the netdev
+ clean_up()
+ bpftool_prog_list_wait(expected=0)
+
+ simdev = BpfNetdevSimDev()
+ sim, = simdev.nsims
+ map_obj = bpf_obj("sample_map_ret0.bpf.o")
+ start_test("Test loading program with maps...")
+ sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+
+ start_test("Test bpftool bound info reporting (own ns)...")
+ check_dev_info(False, "")
+
+ start_test("Test bpftool bound info reporting (other ns)...")
+ ns = mknetns()
+ sim.set_ns(ns)
+ check_dev_info(True, "")
+
+ start_test("Test bpftool bound info reporting (remote ns)...")
+ check_dev_info(False, ns)
+
+ start_test("Test bpftool bound info reporting (back to own ns)...")
+ sim.set_ns("")
+ check_dev_info(False, "")
+
+ prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog")
+ map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2)
+ simdev.remove()
+
+ start_test("Test bpftool bound info reporting (removed dev)...")
+ check_dev_info_removed(prog_file=prog_file, map_file=map_file)
+
+ # Remove all pinned files and reinstantiate the netdev
+ clean_up()
+ bpftool_prog_list_wait(expected=0)
+
+ simdev = BpfNetdevSimDev()
+ sim, = simdev.nsims
+
+ start_test("Test map update (no flags)...")
+ sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+ maps = bpftool_map_list_wait(expected=2)
+ array = maps[0] if maps[0]["type"] == "array" else maps[1]
+ htab = maps[0] if maps[0]["type"] == "hash" else maps[1]
+ for m in maps:
+ for i in range(2):
+ bpftool("map update id %d key %s value %s" %
+ (m["id"], int2str("I", i), int2str("Q", i * 3)))
+
+ for m in maps:
+ ret, _ = bpftool("map update id %d key %s value %s" %
+ (m["id"], int2str("I", 3), int2str("Q", 3 * 3)),
+ fail=False)
+ fail(ret == 0, "added too many entries")
+
+ start_test("Test map update (exists)...")
+ for m in maps:
+ for i in range(2):
+ bpftool("map update id %d key %s value %s exist" %
+ (m["id"], int2str("I", i), int2str("Q", i * 3)))
+
+ for m in maps:
+ ret, err = bpftool("map update id %d key %s value %s exist" %
+ (m["id"], int2str("I", 3), int2str("Q", 3 * 3)),
+ fail=False)
+ fail(ret == 0, "updated non-existing key")
+ fail(err["error"].find("No such file or directory") == -1,
+ "expected ENOENT, error is '%s'" % (err["error"]))
+
+ start_test("Test map update (noexist)...")
+ for m in maps:
+ for i in range(2):
+ ret, err = bpftool("map update id %d key %s value %s noexist" %
+ (m["id"], int2str("I", i), int2str("Q", i * 3)),
+ fail=False)
+ fail(ret == 0, "updated existing key")
+ fail(err["error"].find("File exists") == -1,
+ "expected EEXIST, error is '%s'" % (err["error"]))
+
+ start_test("Test map dump...")
+ for m in maps:
+ _, entries = bpftool("map dump id %d" % (m["id"]))
+ for i in range(2):
+ key = str2int(entries[i]["key"])
+ fail(key != i, "expected key %d, got %d" % (key, i))
+ val = str2int(entries[i]["value"])
+ fail(val != i * 3, "expected value %d, got %d" % (val, i * 3))
+
+ start_test("Test map getnext...")
+ for m in maps:
+ _, entry = bpftool("map getnext id %d" % (m["id"]))
+ key = str2int(entry["next_key"])
+ fail(key != 0, "next key %d, expected %d" % (key, 0))
+ _, entry = bpftool("map getnext id %d key %s" %
+ (m["id"], int2str("I", 0)))
+ key = str2int(entry["next_key"])
+ fail(key != 1, "next key %d, expected %d" % (key, 1))
+ ret, err = bpftool("map getnext id %d key %s" %
+ (m["id"], int2str("I", 1)), fail=False)
+ fail(ret == 0, "got next key past the end of map")
+ fail(err["error"].find("No such file or directory") == -1,
+ "expected ENOENT, error is '%s'" % (err["error"]))
+
+ start_test("Test map delete (htab)...")
+ for i in range(2):
+ bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i)))
+
+ start_test("Test map delete (array)...")
+ for i in range(2):
+ ret, err = bpftool("map delete id %d key %s" %
+ (htab["id"], int2str("I", i)), fail=False)
+ fail(ret == 0, "removed entry from an array")
+ fail(err["error"].find("No such file or directory") == -1,
+ "expected ENOENT, error is '%s'" % (err["error"]))
+
+ start_test("Test map remove...")
+ sim.unset_xdp("offload")
+ bpftool_map_list_wait(expected=0)
+ simdev.remove()
+
+ simdev = BpfNetdevSimDev()
+ sim, = simdev.nsims
+ sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+ simdev.remove()
+ bpftool_map_list_wait(expected=0)
+
+ start_test("Test map creation fail path...")
+ simdev = BpfNetdevSimDev()
+ sim, = simdev.nsims
+ sim.dfs["bpf_map_accept"] = "N"
+ ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False)
+ fail(ret == 0,
+ "netdevsim didn't refuse to create a map with offload disabled")
+
+ simdev.remove()
+
+ start_test("Test multi-dev ASIC program reuse...")
+ simdevA = BpfNetdevSimDev()
+ simA, = simdevA.nsims
+ simdevB = BpfNetdevSimDev(3)
+ simB1, simB2, simB3 = simdevB.nsims
+ sims = (simA, simB1, simB2, simB3)
+ simB = (simB1, simB2, simB3)
+
+ bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimA",
+ dev=simA['ifname'])
+ progA = bpf_pinned("/sys/fs/bpf/nsimA")
+ bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimB",
+ dev=simB1['ifname'])
+ progB = bpf_pinned("/sys/fs/bpf/nsimB")
+
+ simA.set_xdp(progA, "offload", JSON=False)
+ for d in simdevB.nsims:
+ d.set_xdp(progB, "offload", JSON=False)
+
+ start_test("Test multi-dev ASIC cross-dev replace...")
+ ret, _ = simA.set_xdp(progB, "offload", force=True, JSON=False, fail=False)
+ fail(ret == 0, "cross-ASIC program allowed")
+ for d in simdevB.nsims:
+ ret, _ = d.set_xdp(progA, "offload", force=True, JSON=False, fail=False)
+ fail(ret == 0, "cross-ASIC program allowed")
+
+ start_test("Test multi-dev ASIC cross-dev install...")
+ for d in sims:
+ d.unset_xdp("offload")
+
+ ret, _, err = simA.set_xdp(progB, "offload", force=True, JSON=False,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "cross-ASIC program allowed")
+ check_extack(err, "Program bound to different device.", args)
+ for d in simdevB.nsims:
+ ret, _, err = d.set_xdp(progA, "offload", force=True, JSON=False,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "cross-ASIC program allowed")
+ check_extack(err, "Program bound to different device.", args)
+
+ start_test("Test multi-dev ASIC cross-dev map reuse...")
+
+ mapA = bpftool("prog show %s" % (progA))[1]["map_ids"][0]
+ mapB = bpftool("prog show %s" % (progB))[1]["map_ids"][0]
+
+ ret, _ = bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimB_",
+ dev=simB3['ifname'],
+ maps=["idx 0 id %d" % (mapB)],
+ fail=False)
+ fail(ret != 0, "couldn't reuse a map on the same ASIC")
+ rm("/sys/fs/bpf/nsimB_")
+
+ ret, _, err = bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimA_",
+ dev=simA['ifname'],
+ maps=["idx 0 id %d" % (mapB)],
+ fail=False, include_stderr=True)
+ fail(ret == 0, "could reuse a map on a different ASIC")
+ fail(err.count("offload device mismatch between prog and map") == 0,
+ "error message missing for cross-ASIC map")
+
+ ret, _, err = bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimB_",
+ dev=simB1['ifname'],
+ maps=["idx 0 id %d" % (mapA)],
+ fail=False, include_stderr=True)
+ fail(ret == 0, "could reuse a map on a different ASIC")
+ fail(err.count("offload device mismatch between prog and map") == 0,
+ "error message missing for cross-ASIC map")
+
+ start_test("Test multi-dev ASIC cross-dev destruction...")
+ bpftool_prog_list_wait(expected=2)
+
+ simdevA.remove()
+ bpftool_prog_list_wait(expected=1)
+
+ ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+ fail(ifnameB != simB1['ifname'], "program not bound to original device")
+ simB1.remove()
+ bpftool_prog_list_wait(expected=1)
+
+ start_test("Test multi-dev ASIC cross-dev destruction - move...")
+ ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+ fail(ifnameB not in (simB2['ifname'], simB3['ifname']),
+ "program not bound to remaining devices")
+
+ simB2.remove()
+ ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+ fail(ifnameB != simB3['ifname'], "program not bound to remaining device")
+
+ simB3.remove()
+ simdevB.remove()
+ bpftool_prog_list_wait(expected=0)
+
+ start_test("Test multi-dev ASIC cross-dev destruction - orphaned...")
+ ret, out = bpftool("prog show %s" % (progB), fail=False)
+ fail(ret != 0, "couldn't get information about orphaned program")
+
+ print("%s: OK" % (os.path.basename(__file__)))
+
+finally:
+ log("Clean up...", "", level=1)
+ log_level_inc()
+ clean_up()
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 161db24e3c..876c2db02a 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -260,15 +260,8 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit);
if (opt.txtime.ena) {
- struct sock_txtime so_txtime = {
- .clockid = CLOCK_MONOTONIC,
- };
__u64 txtime;
- if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
- &so_txtime, sizeof(so_txtime)))
- error(ERN_SOCKOPT, errno, "setsockopt TXTIME");
-
txtime = time_start_mono.tv_sec * (1000ULL * 1000 * 1000) +
time_start_mono.tv_nsec +
opt.txtime.delay * 1000;
@@ -284,13 +277,6 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
memcpy(CMSG_DATA(cmsg), &txtime, sizeof(txtime));
}
if (opt.ts.ena) {
- __u32 val = SOF_TIMESTAMPING_SOFTWARE |
- SOF_TIMESTAMPING_OPT_TSONLY;
-
- if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
- &val, sizeof(val)))
- error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING");
-
cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
cmsg_len += CMSG_SPACE(sizeof(__u32));
if (cbuf_sz < cmsg_len)
@@ -426,6 +412,24 @@ static void ca_set_sockopts(int fd)
setsockopt(fd, SOL_SOCKET, SO_PRIORITY,
&opt.sockopt.priority, sizeof(opt.sockopt.priority)))
error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY");
+
+ if (opt.txtime.ena) {
+ struct sock_txtime so_txtime = {
+ .clockid = CLOCK_MONOTONIC,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
+ &so_txtime, sizeof(so_txtime)))
+ error(ERN_SOCKOPT, errno, "setsockopt TXTIME");
+ }
+ if (opt.ts.ena) {
+ __u32 val = SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_TSONLY;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
+ &val, sizeof(val)))
+ error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING");
+ }
}
int main(int argc, char *argv[])
diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh
index af85267ad1..1d7e756644 100755
--- a/tools/testing/selftests/net/cmsg_time.sh
+++ b/tools/testing/selftests/net/cmsg_time.sh
@@ -66,10 +66,13 @@ for i in "-4 $TGT4" "-6 $TGT6"; do
awk '/SND/ { if ($3 > 1000) print "OK"; }')
check_result $? "$ts" "OK" "$prot - TXTIME abs"
- ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 |
+ [ "$KSFT_MACHINE_SLOW" = yes ] && delay=8000 || delay=1000
+
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d $delay |
awk '/SND/ {snd=$3}
/SCHED/ {sch=$3}
- END { if (snd - sch > 500) print "OK"; }')
+ END { if (snd - sch > '$((delay/2))') print "OK";
+ else print snd, "-", sch, "<", '$((delay/2))'; }')
check_result $? "$ts" "OK" "$prot - TXTIME rel"
done
done
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 04de7a6ba6..d4891f7a2b 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -101,3 +101,5 @@ CONFIG_NETFILTER_XT_MATCH_POLICY=m
CONFIG_CRYPTO_ARIA=y
CONFIG_XFRM_INTERFACE=m
CONFIG_XFRM_USER=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
deleted file mode 100755
index 2d84c7a0be..0000000000
--- a/tools/testing/selftests/net/devlink_port_split.py
+++ /dev/null
@@ -1,309 +0,0 @@
-#!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0
-
-from subprocess import PIPE, Popen
-import json
-import time
-import argparse
-import collections
-import sys
-
-#
-# Test port split configuration using devlink-port lanes attribute.
-# The test is skipped in case the attribute is not available.
-#
-# First, check that all the ports with 1 lane fail to split.
-# Second, check that all the ports with more than 1 lane can be split
-# to all valid configurations (e.g., split to 2, split to 4 etc.)
-#
-
-
-# Kselftest framework requirement - SKIP code is 4
-KSFT_SKIP=4
-Port = collections.namedtuple('Port', 'bus_info name')
-
-
-def run_command(cmd, should_fail=False):
- """
- Run a command in subprocess.
- Return: Tuple of (stdout, stderr).
- """
-
- p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
- stdout, stderr = p.communicate()
- stdout, stderr = stdout.decode(), stderr.decode()
-
- if stderr != "" and not should_fail:
- print("Error sending command: %s" % cmd)
- print(stdout)
- print(stderr)
- return stdout, stderr
-
-
-class devlink_ports(object):
- """
- Class that holds information on the devlink ports, required to the tests;
- if_names: A list of interfaces in the devlink ports.
- """
-
- def get_if_names(dev):
- """
- Get a list of physical devlink ports.
- Return: Array of tuples (bus_info/port, if_name).
- """
-
- arr = []
-
- cmd = "devlink -j port show"
- stdout, stderr = run_command(cmd)
- assert stderr == ""
- ports = json.loads(stdout)['port']
-
- validate_devlink_output(ports, 'flavour')
-
- for port in ports:
- if dev in port:
- if ports[port]['flavour'] == 'physical':
- arr.append(Port(bus_info=port, name=ports[port]['netdev']))
-
- return arr
-
- def __init__(self, dev):
- self.if_names = devlink_ports.get_if_names(dev)
-
-
-def get_max_lanes(port):
- """
- Get the $port's maximum number of lanes.
- Return: number of lanes, e.g. 1, 2, 4 and 8.
- """
-
- cmd = "devlink -j port show %s" % port
- stdout, stderr = run_command(cmd)
- assert stderr == ""
- values = list(json.loads(stdout)['port'].values())[0]
-
- if 'lanes' in values:
- lanes = values['lanes']
- else:
- lanes = 0
- return lanes
-
-
-def get_split_ability(port):
- """
- Get the $port split ability.
- Return: split ability, true or false.
- """
-
- cmd = "devlink -j port show %s" % port.name
- stdout, stderr = run_command(cmd)
- assert stderr == ""
- values = list(json.loads(stdout)['port'].values())[0]
-
- return values['splittable']
-
-
-def split(k, port, should_fail=False):
- """
- Split $port into $k ports.
- If should_fail == True, the split should fail. Otherwise, should pass.
- Return: Array of sub ports after splitting.
- If the $port wasn't split, the array will be empty.
- """
-
- cmd = "devlink port split %s count %s" % (port.bus_info, k)
- stdout, stderr = run_command(cmd, should_fail=should_fail)
-
- if should_fail:
- if not test(stderr != "", "%s is unsplittable" % port.name):
- print("split an unsplittable port %s" % port.name)
- return create_split_group(port, k)
- else:
- if stderr == "":
- return create_split_group(port, k)
- print("didn't split a splittable port %s" % port.name)
-
- return []
-
-
-def unsplit(port):
- """
- Unsplit $port.
- """
-
- cmd = "devlink port unsplit %s" % port
- stdout, stderr = run_command(cmd)
- test(stderr == "", "Unsplit port %s" % port)
-
-
-def exists(port, dev):
- """
- Check if $port exists in the devlink ports.
- Return: True is so, False otherwise.
- """
-
- return any(dev_port.name == port
- for dev_port in devlink_ports.get_if_names(dev))
-
-
-def exists_and_lanes(ports, lanes, dev):
- """
- Check if every port in the list $ports exists in the devlink ports and has
- $lanes number of lanes after splitting.
- Return: True if both are True, False otherwise.
- """
-
- for port in ports:
- max_lanes = get_max_lanes(port)
- if not exists(port, dev):
- print("port %s doesn't exist in devlink ports" % port)
- return False
- if max_lanes != lanes:
- print("port %s has %d lanes, but %s were expected"
- % (port, lanes, max_lanes))
- return False
- return True
-
-
-def test(cond, msg):
- """
- Check $cond and print a message accordingly.
- Return: True is pass, False otherwise.
- """
-
- if cond:
- print("TEST: %-60s [ OK ]" % msg)
- else:
- print("TEST: %-60s [FAIL]" % msg)
-
- return cond
-
-
-def create_split_group(port, k):
- """
- Create the split group for $port.
- Return: Array with $k elements, which are the split port group.
- """
-
- return list(port.name + "s" + str(i) for i in range(k))
-
-
-def split_unsplittable_port(port, k):
- """
- Test that splitting of unsplittable port fails.
- """
-
- # split to max
- new_split_group = split(k, port, should_fail=True)
-
- if new_split_group != []:
- unsplit(port.bus_info)
-
-
-def split_splittable_port(port, k, lanes, dev):
- """
- Test that splitting of splittable port passes correctly.
- """
-
- new_split_group = split(k, port)
-
- # Once the split command ends, it takes some time to the sub ifaces'
- # to get their names. Use udevadm to continue only when all current udev
- # events are handled.
- cmd = "udevadm settle"
- stdout, stderr = run_command(cmd)
- assert stderr == ""
-
- if new_split_group != []:
- test(exists_and_lanes(new_split_group, lanes/k, dev),
- "split port %s into %s" % (port.name, k))
-
- unsplit(port.bus_info)
-
-
-def validate_devlink_output(devlink_data, target_property=None):
- """
- Determine if test should be skipped by checking:
- 1. devlink_data contains values
- 2. The target_property exist in devlink_data
- """
- skip_reason = None
- if any(devlink_data.values()):
- if target_property:
- skip_reason = "{} not found in devlink output, test skipped".format(target_property)
- for key in devlink_data:
- if target_property in devlink_data[key]:
- skip_reason = None
- else:
- skip_reason = 'devlink output is empty, test skipped'
-
- if skip_reason:
- print(skip_reason)
- sys.exit(KSFT_SKIP)
-
-
-def make_parser():
- parser = argparse.ArgumentParser(description='A test for port splitting.')
- parser.add_argument('--dev',
- help='The devlink handle of the device under test. ' +
- 'The default is the first registered devlink ' +
- 'handle.')
-
- return parser
-
-
-def main(cmdline=None):
- parser = make_parser()
- args = parser.parse_args(cmdline)
-
- dev = args.dev
- if not dev:
- cmd = "devlink -j dev show"
- stdout, stderr = run_command(cmd)
- assert stderr == ""
-
- validate_devlink_output(json.loads(stdout))
- devs = json.loads(stdout)['dev']
- dev = list(devs.keys())[0]
-
- cmd = "devlink dev show %s" % dev
- stdout, stderr = run_command(cmd)
- if stderr != "":
- print("devlink device %s can not be found" % dev)
- sys.exit(1)
-
- ports = devlink_ports(dev)
-
- found_max_lanes = False
- for port in ports.if_names:
- max_lanes = get_max_lanes(port.name)
-
- # If max lanes is 0, do not test port splitting at all
- if max_lanes == 0:
- continue
-
- # If 1 lane, shouldn't be able to split
- elif max_lanes == 1:
- test(not get_split_ability(port),
- "%s should not be able to split" % port.name)
- split_unsplittable_port(port, max_lanes)
-
- # Else, splitting should pass and all the split ports should exist.
- else:
- lane = max_lanes
- test(get_split_ability(port),
- "%s should be able to split" % port.name)
- while lane > 1:
- split_splittable_port(port, lane, max_lanes, dev)
-
- lane //= 2
- found_max_lanes = True
-
- if not found_max_lanes:
- print(f"Test not started, no port of device {dev} reports max_lanes")
- sys.exit(KSFT_SKIP)
-
-
-if __name__ == "__main__":
- main()
diff --git a/tools/testing/selftests/net/epoll_busy_poll.c b/tools/testing/selftests/net/epoll_busy_poll.c
new file mode 100644
index 0000000000..16e457c2f8
--- /dev/null
+++ b/tools/testing/selftests/net/epoll_busy_poll.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* Basic per-epoll context busy poll test.
+ *
+ * Only tests the ioctls, but should be expanded to test two connected hosts in
+ * the future
+ */
+
+#define _GNU_SOURCE
+
+#include <error.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/capability.h>
+
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include "../kselftest_harness.h"
+
+/* if the headers haven't been updated, we need to define some things */
+#if !defined(EPOLL_IOC_TYPE)
+struct epoll_params {
+ uint32_t busy_poll_usecs;
+ uint16_t busy_poll_budget;
+ uint8_t prefer_busy_poll;
+
+ /* pad the struct to a multiple of 64bits */
+ uint8_t __pad;
+};
+
+#define EPOLL_IOC_TYPE 0x8A
+#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
+#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
+#endif
+
+FIXTURE(invalid_fd)
+{
+ int invalid_fd;
+ struct epoll_params params;
+};
+
+FIXTURE_SETUP(invalid_fd)
+{
+ int ret;
+
+ ret = socket(AF_UNIX, SOCK_DGRAM, 0);
+ EXPECT_NE(-1, ret)
+ TH_LOG("error creating unix socket");
+
+ self->invalid_fd = ret;
+}
+
+FIXTURE_TEARDOWN(invalid_fd)
+{
+ int ret;
+
+ ret = close(self->invalid_fd);
+ EXPECT_EQ(0, ret);
+}
+
+TEST_F(invalid_fd, test_invalid_fd)
+{
+ int ret;
+
+ ret = ioctl(self->invalid_fd, EPIOCGPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCGPARAMS on invalid epoll FD should error");
+
+ EXPECT_EQ(ENOTTY, errno)
+ TH_LOG("EPIOCGPARAMS on invalid epoll FD should set errno to ENOTTY");
+
+ memset(&self->params, 0, sizeof(struct epoll_params));
+
+ ret = ioctl(self->invalid_fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS on invalid epoll FD should error");
+
+ EXPECT_EQ(ENOTTY, errno)
+ TH_LOG("EPIOCSPARAMS on invalid epoll FD should set errno to ENOTTY");
+}
+
+FIXTURE(epoll_busy_poll)
+{
+ int fd;
+ struct epoll_params params;
+ struct epoll_params *invalid_params;
+ cap_t caps;
+};
+
+FIXTURE_SETUP(epoll_busy_poll)
+{
+ int ret;
+
+ ret = epoll_create1(0);
+ EXPECT_NE(-1, ret)
+ TH_LOG("epoll_create1 failed?");
+
+ self->fd = ret;
+
+ self->caps = cap_get_proc();
+ EXPECT_NE(NULL, self->caps);
+}
+
+FIXTURE_TEARDOWN(epoll_busy_poll)
+{
+ int ret;
+
+ ret = close(self->fd);
+ EXPECT_EQ(0, ret);
+
+ ret = cap_free(self->caps);
+ EXPECT_NE(-1, ret)
+ TH_LOG("unable to free capabilities");
+}
+
+TEST_F(epoll_busy_poll, test_get_params)
+{
+ /* begin by getting the epoll params from the kernel
+ *
+ * the default should be default and all fields should be zero'd by the
+ * kernel, so set params fields to garbage to test this.
+ */
+ int ret = 0;
+
+ self->params.busy_poll_usecs = 0xff;
+ self->params.busy_poll_budget = 0xff;
+ self->params.prefer_busy_poll = 1;
+ self->params.__pad = 0xf;
+
+ ret = ioctl(self->fd, EPIOCGPARAMS, &self->params);
+ EXPECT_EQ(0, ret)
+ TH_LOG("ioctl EPIOCGPARAMS should succeed");
+
+ EXPECT_EQ(0, self->params.busy_poll_usecs)
+ TH_LOG("EPIOCGPARAMS busy_poll_usecs should have been 0");
+
+ EXPECT_EQ(0, self->params.busy_poll_budget)
+ TH_LOG("EPIOCGPARAMS busy_poll_budget should have been 0");
+
+ EXPECT_EQ(0, self->params.prefer_busy_poll)
+ TH_LOG("EPIOCGPARAMS prefer_busy_poll should have been 0");
+
+ EXPECT_EQ(0, self->params.__pad)
+ TH_LOG("EPIOCGPARAMS __pad should have been 0");
+
+ self->invalid_params = (struct epoll_params *)0xdeadbeef;
+ ret = ioctl(self->fd, EPIOCGPARAMS, self->invalid_params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCGPARAMS should error with invalid params");
+
+ EXPECT_EQ(EFAULT, errno)
+ TH_LOG("EPIOCGPARAMS with invalid params should set errno to EFAULT");
+}
+
+TEST_F(epoll_busy_poll, test_set_invalid)
+{
+ int ret;
+
+ memset(&self->params, 0, sizeof(struct epoll_params));
+
+ self->params.__pad = 1;
+
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS non-zero __pad should error");
+
+ EXPECT_EQ(EINVAL, errno)
+ TH_LOG("EPIOCSPARAMS non-zero __pad errno should be EINVAL");
+
+ self->params.__pad = 0;
+ self->params.busy_poll_usecs = (uint32_t)INT_MAX + 1;
+
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS should error busy_poll_usecs > S32_MAX");
+
+ EXPECT_EQ(EINVAL, errno)
+ TH_LOG("EPIOCSPARAMS busy_poll_usecs > S32_MAX errno should be EINVAL");
+
+ self->params.__pad = 0;
+ self->params.busy_poll_usecs = 32;
+ self->params.prefer_busy_poll = 2;
+
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS should error prefer_busy_poll > 1");
+
+ EXPECT_EQ(EINVAL, errno)
+ TH_LOG("EPIOCSPARAMS prefer_busy_poll > 1 errno should be EINVAL");
+
+ self->params.__pad = 0;
+ self->params.busy_poll_usecs = 32;
+ self->params.prefer_busy_poll = 1;
+
+ /* set budget well above kernel's NAPI_POLL_WEIGHT of 64 */
+ self->params.busy_poll_budget = UINT16_MAX;
+
+ /* test harness should run with CAP_NET_ADMIN, but let's make sure */
+ cap_flag_value_t tmp;
+
+ ret = cap_get_flag(self->caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &tmp);
+ EXPECT_EQ(0, ret)
+ TH_LOG("unable to get CAP_NET_ADMIN cap flag");
+
+ EXPECT_EQ(CAP_SET, tmp)
+ TH_LOG("expecting CAP_NET_ADMIN to be set for the test harness");
+
+ /* at this point we know CAP_NET_ADMIN is available, so setting the
+ * params with a busy_poll_budget > NAPI_POLL_WEIGHT should succeed
+ */
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(0, ret)
+ TH_LOG("EPIOCSPARAMS should allow busy_poll_budget > NAPI_POLL_WEIGHT");
+
+ /* remove CAP_NET_ADMIN from our effective set */
+ cap_value_t net_admin[] = { CAP_NET_ADMIN };
+
+ ret = cap_set_flag(self->caps, CAP_EFFECTIVE, 1, net_admin, CAP_CLEAR);
+ EXPECT_EQ(0, ret)
+ TH_LOG("couldn't clear CAP_NET_ADMIN");
+
+ ret = cap_set_proc(self->caps);
+ EXPECT_EQ(0, ret)
+ TH_LOG("cap_set_proc should drop CAP_NET_ADMIN");
+
+ /* this is now expected to fail */
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS should error busy_poll_budget > NAPI_POLL_WEIGHT");
+
+ EXPECT_EQ(EPERM, errno)
+ TH_LOG("EPIOCSPARAMS errno should be EPERM busy_poll_budget > NAPI_POLL_WEIGHT");
+
+ /* restore CAP_NET_ADMIN to our effective set */
+ ret = cap_set_flag(self->caps, CAP_EFFECTIVE, 1, net_admin, CAP_SET);
+ EXPECT_EQ(0, ret)
+ TH_LOG("couldn't restore CAP_NET_ADMIN");
+
+ ret = cap_set_proc(self->caps);
+ EXPECT_EQ(0, ret)
+ TH_LOG("cap_set_proc should set CAP_NET_ADMIN");
+
+ self->invalid_params = (struct epoll_params *)0xdeadbeef;
+ ret = ioctl(self->fd, EPIOCSPARAMS, self->invalid_params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS should error when epoll_params is invalid");
+
+ EXPECT_EQ(EFAULT, errno)
+ TH_LOG("EPIOCSPARAMS should set errno to EFAULT when epoll_params is invalid");
+}
+
+TEST_F(epoll_busy_poll, test_set_and_get_valid)
+{
+ int ret;
+
+ memset(&self->params, 0, sizeof(struct epoll_params));
+
+ self->params.busy_poll_usecs = 25;
+ self->params.busy_poll_budget = 16;
+ self->params.prefer_busy_poll = 1;
+
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(0, ret)
+ TH_LOG("EPIOCSPARAMS with valid params should not error");
+
+ /* check that the kernel returns the same values back */
+
+ memset(&self->params, 0, sizeof(struct epoll_params));
+
+ ret = ioctl(self->fd, EPIOCGPARAMS, &self->params);
+
+ EXPECT_EQ(0, ret)
+ TH_LOG("EPIOCGPARAMS should not error");
+
+ EXPECT_EQ(25, self->params.busy_poll_usecs)
+ TH_LOG("params.busy_poll_usecs incorrect");
+
+ EXPECT_EQ(16, self->params.busy_poll_budget)
+ TH_LOG("params.busy_poll_budget incorrect");
+
+ EXPECT_EQ(1, self->params.prefer_busy_poll)
+ TH_LOG("params.prefer_busy_poll incorrect");
+
+ EXPECT_EQ(0, self->params.__pad)
+ TH_LOG("params.__pad was not 0");
+}
+
+TEST_F(epoll_busy_poll, test_invalid_ioctl)
+{
+ int invalid_ioctl = EPIOCGPARAMS + 10;
+ int ret;
+
+ ret = ioctl(self->fd, invalid_ioctl, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("invalid ioctl should return error");
+
+ EXPECT_EQ(EINVAL, errno)
+ TH_LOG("invalid ioctl should set errno to EINVAL");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 51157a5559..7c01f58a20 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -9,6 +9,7 @@ PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
RTABLE=100
RTABLE_PEER=101
+RTABLE_VRF=102
GW_IP4=192.51.100.2
SRC_IP=192.51.100.3
GW_IP6=2001:db8:1::2
@@ -17,7 +18,14 @@ SRC_IP6=2001:db8:1::3
DEV_ADDR=192.51.100.1
DEV_ADDR6=2001:db8:1::1
DEV=dummy0
-TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect"
+TESTS="
+ fib_rule6
+ fib_rule4
+ fib_rule6_connect
+ fib_rule4_connect
+ fib_rule6_vrf
+ fib_rule4_vrf
+"
SELFTEST_PATH=""
@@ -27,13 +35,18 @@ log_test()
local expected=$2
local msg="$3"
+ $IP rule show | grep -q l3mdev
+ if [ $? -eq 0 ]; then
+ msg="$msg (VRF)"
+ fi
+
if [ ${rc} -eq ${expected} ]; then
nsuccess=$((nsuccess+1))
- printf "\n TEST: %-50s [ OK ]\n" "${msg}"
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
else
ret=1
nfail=$((nfail+1))
- printf "\n TEST: %-50s [FAIL]\n" "${msg}"
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
@@ -130,6 +143,17 @@ cleanup_peer()
ip netns del $peerns
}
+setup_vrf()
+{
+ $IP link add name vrf0 up type vrf table $RTABLE_VRF
+ $IP link set dev $DEV master vrf0
+}
+
+cleanup_vrf()
+{
+ $IP link del dev vrf0
+}
+
fib_check_iproute_support()
{
ip rule help 2>&1 | grep -q $1
@@ -248,6 +272,13 @@ fib_rule6_test()
fi
}
+fib_rule6_vrf_test()
+{
+ setup_vrf
+ fib_rule6_test
+ cleanup_vrf
+}
+
# Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly
# taken into account when connecting the socket and when sending packets.
fib_rule6_connect_test()
@@ -385,6 +416,13 @@ fib_rule4_test()
fi
}
+fib_rule4_vrf_test()
+{
+ setup_vrf
+ fib_rule4_test
+ cleanup_vrf
+}
+
# Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken
# into account when connecting the socket and when sending packets.
fib_rule4_connect_test()
@@ -467,6 +505,8 @@ do
fib_rule4_test|fib_rule4) fib_rule4_test;;
fib_rule6_connect_test|fib_rule6_connect) fib_rule6_connect_test;;
fib_rule4_connect_test|fib_rule4_connect) fib_rule4_connect_test;;
+ fib_rule6_vrf_test|fib_rule6_vrf) fib_rule6_vrf_test;;
+ fib_rule4_vrf_test|fib_rule4_vrf) fib_rule4_vrf_test;;
help) echo "Test names: $TESTS"; exit 0;;
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 73895711cd..5f3c28fc86 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -1737,53 +1737,53 @@ ipv4_rt_dsfield()
# DSCP 0x10 should match the specific route, no matter the ECN bits
$IP route get fibmatch 172.16.102.1 dsfield 0x10 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:Not-ECT"
$IP route get fibmatch 172.16.102.1 dsfield 0x11 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:ECT(1)"
$IP route get fibmatch 172.16.102.1 dsfield 0x12 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:ECT(0)"
$IP route get fibmatch 172.16.102.1 dsfield 0x13 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:CE"
# Unknown DSCP should match the generic route, no matter the ECN bits
$IP route get fibmatch 172.16.102.1 dsfield 0x14 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:Not-ECT"
$IP route get fibmatch 172.16.102.1 dsfield 0x15 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(1)"
$IP route get fibmatch 172.16.102.1 dsfield 0x16 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(0)"
$IP route get fibmatch 172.16.102.1 dsfield 0x17 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:CE"
# Null DSCP should match the generic route, no matter the ECN bits
$IP route get fibmatch 172.16.102.1 dsfield 0x00 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:Not-ECT"
$IP route get fibmatch 172.16.102.1 dsfield 0x01 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(1)"
$IP route get fibmatch 172.16.102.1 dsfield 0x02 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(0)"
$IP route get fibmatch 172.16.102.1 dsfield 0x03 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:CE"
}
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 535865b3d1..fa7b59ff40 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -15,18 +15,12 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
bridge_vlan_unaware.sh \
custom_multipath_hash.sh \
dual_vxlan_bridge.sh \
- ethtool_extended_state.sh \
- ethtool_mm.sh \
- ethtool_rmon.sh \
- ethtool.sh \
gre_custom_multipath_hash.sh \
gre_inner_v4_multipath.sh \
gre_inner_v6_multipath.sh \
gre_multipath_nh_res.sh \
gre_multipath_nh.sh \
gre_multipath.sh \
- hw_stats_l3.sh \
- hw_stats_l3_gre.sh \
ip6_forward_instats_vrf.sh \
ip6gre_custom_multipath_hash.sh \
ip6gre_flat_key.sh \
@@ -43,8 +37,8 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
ipip_hier_gre_key.sh \
ipip_hier_gre_keys.sh \
ipip_hier_gre.sh \
+ lib_sh_test.sh \
local_termination.sh \
- loopback.sh \
mirror_gre_bound.sh \
mirror_gre_bridge_1d.sh \
mirror_gre_bridge_1d_vlan.sh \
@@ -113,7 +107,6 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
vxlan_symmetric.sh
TEST_FILES := devlink_lib.sh \
- ethtool_lib.sh \
fib_offload_lib.sh \
forwarding.config.sample \
ip6gre_lib.sh \
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
index b8a2af8fcf..7fdb6a9ca5 100644
--- a/tools/testing/selftests/net/forwarding/README
+++ b/tools/testing/selftests/net/forwarding/README
@@ -56,3 +56,36 @@ o Checks shall be added to lib.sh for any external dependencies.
o Code shall be checked using ShellCheck [1] prior to submission.
1. https://www.shellcheck.net/
+
+Customization
+=============
+
+The forwarding selftests framework uses a number of variables that
+influence its behavior and tools it invokes, and how it invokes them, in
+various ways. A number of these variables can be overridden. The way these
+overridable variables are specified is typically one of the following two
+syntaxes:
+
+ : "${VARIABLE:=default_value}"
+ VARIABLE=${VARIABLE:=default_value}
+
+Any of these variables can be overridden. Notably net/forwarding/lib.sh and
+net/lib.sh contain a number of overridable variables.
+
+One way of overriding these variables is through the environment:
+
+ PAUSE_ON_FAIL=yes ./some_test.sh
+
+The variable NETIFS is special. Since it is an array variable, there is no
+way to pass it through the environment. Its value can instead be given as
+consecutive arguments to the selftest:
+
+ ./some_test.sh swp{1..8}
+
+A way to customize variables in a persistent fashion is to create a file
+named forwarding.config in this directory. lib.sh sources the file if
+present, so it can contain any shell code. Typically it will contain
+assignments of variables whose value should be overridden.
+
+forwarding.config.sample is available in the directory as an example of
+how forwarding.config might look.
diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
index 0760a34b71..a21b7085da 100755
--- a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
@@ -178,6 +178,22 @@ fdb_del()
check_err $? "Failed to remove a FDB entry of type ${type}"
}
+check_fdb_n_learned_support()
+{
+ if ! ip link help bridge 2>&1 | grep -q "fdb_max_learned"; then
+ echo "SKIP: iproute2 too old, missing bridge max learned support"
+ exit $ksft_skip
+ fi
+
+ ip link add dev br0 type bridge
+ local learned=$(fdb_get_n_learned)
+ ip link del dev br0
+ if [ "$learned" == "null" ]; then
+ echo "SKIP: kernel too old; bridge fdb_n_learned feature not supported."
+ exit $ksft_skip
+ fi
+}
+
check_accounting_one_type()
{
local type=$1 is_counted=$2 overrides_learned=$3
@@ -274,6 +290,8 @@ check_limit()
done
}
+check_fdb_n_learned_support
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index f1de525cfa..62a05bca1e 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -122,6 +122,8 @@ devlink_reload()
still_pending=$(devlink resource show "$DEVLINK_DEV" | \
grep -c "size_new")
check_err $still_pending "Failed reload - There are still unset sizes"
+
+ udevadm settle
}
declare -A DEVLINK_ORIG
diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh
deleted file mode 100755
index aa2eafb7b2..0000000000
--- a/tools/testing/selftests/net/forwarding/ethtool.sh
+++ /dev/null
@@ -1,301 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-ALL_TESTS="
- same_speeds_autoneg_off
- different_speeds_autoneg_off
- combination_of_neg_on_and_off
- advertise_subset_of_speeds
- check_highest_speed_is_chosen
- different_speeds_autoneg_on
-"
-NUM_NETIFS=2
-source lib.sh
-source ethtool_lib.sh
-
-h1_create()
-{
- simple_if_init $h1 192.0.2.1/24
-}
-
-h1_destroy()
-{
- simple_if_fini $h1 192.0.2.1/24
-}
-
-h2_create()
-{
- simple_if_init $h2 192.0.2.2/24
-}
-
-h2_destroy()
-{
- simple_if_fini $h2 192.0.2.2/24
-}
-
-setup_prepare()
-{
- h1=${NETIFS[p1]}
- h2=${NETIFS[p2]}
-
- h1_create
- h2_create
-}
-
-cleanup()
-{
- pre_cleanup
-
- h2_destroy
- h1_destroy
-}
-
-same_speeds_autoneg_off()
-{
- # Check that when each of the reported speeds is forced, the links come
- # up and are operational.
- local -a speeds_arr=($(common_speeds_get $h1 $h2 0 0))
-
- for speed in "${speeds_arr[@]}"; do
- RET=0
- ethtool_set $h1 speed $speed autoneg off
- ethtool_set $h2 speed $speed autoneg off
-
- setup_wait_dev_with_timeout $h1
- setup_wait_dev_with_timeout $h2
- ping_do $h1 192.0.2.2
- check_err $? "speed $speed autoneg off"
- log_test "force of same speed autoneg off"
- log_info "speed = $speed"
- done
-
- ethtool -s $h2 autoneg on
- ethtool -s $h1 autoneg on
-}
-
-different_speeds_autoneg_off()
-{
- # Test that when we force different speeds, links are not up and ping
- # fails.
- RET=0
-
- local -a speeds_arr=($(different_speeds_get $h1 $h2 0 0))
- local speed1=${speeds_arr[0]}
- local speed2=${speeds_arr[1]}
-
- ethtool_set $h1 speed $speed1 autoneg off
- ethtool_set $h2 speed $speed2 autoneg off
-
- setup_wait_dev_with_timeout $h1
- setup_wait_dev_with_timeout $h2
- ping_do $h1 192.0.2.2
- check_fail $? "ping with different speeds"
-
- log_test "force of different speeds autoneg off"
-
- ethtool -s $h2 autoneg on
- ethtool -s $h1 autoneg on
-}
-
-combination_of_neg_on_and_off()
-{
- # Test that when one device is forced to a speed supported by both
- # endpoints and the other device is configured to autoneg on, the links
- # are up and ping passes.
- local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1))
-
- for speed in "${speeds_arr[@]}"; do
- RET=0
- ethtool_set $h1 speed $speed autoneg off
-
- setup_wait_dev_with_timeout $h1
- setup_wait_dev_with_timeout $h2
- ping_do $h1 192.0.2.2
- check_err $? "h1-speed=$speed autoneg off, h2 autoneg on"
- log_test "one side with autoneg off and another with autoneg on"
- log_info "force speed = $speed"
- done
-
- ethtool -s $h1 autoneg on
-}
-
-hex_speed_value_get()
-{
- local speed=$1; shift
-
- local shift_size=${speed_values[$speed]}
- speed=$((0x1 << $"shift_size"))
- printf "%#x" "$speed"
-}
-
-subset_of_common_speeds_get()
-{
- local dev1=$1; shift
- local dev2=$1; shift
- local adver=$1; shift
-
- local -a speeds_arr=($(common_speeds_get $dev1 $dev2 0 $adver))
- local speed_to_advertise=0
- local speed_to_remove=${speeds_arr[0]}
- speed_to_remove+='base'
-
- local -a speeds_mode_arr=($(common_speeds_get $dev1 $dev2 1 $adver))
-
- for speed in ${speeds_mode_arr[@]}; do
- if [[ $speed != $speed_to_remove* ]]; then
- speed=$(hex_speed_value_get $speed)
- speed_to_advertise=$(($speed_to_advertise | \
- $speed))
- fi
-
- done
-
- # Convert to hex.
- printf "%#x" "$speed_to_advertise"
-}
-
-speed_to_advertise_get()
-{
- # The function returns the hex number that is composed by OR-ing all
- # the modes corresponding to the provided speed.
- local speed_without_mode=$1; shift
- local supported_speeds=("$@"); shift
- local speed_to_advertise=0
-
- speed_without_mode+='base'
-
- for speed in ${supported_speeds[@]}; do
- if [[ $speed == $speed_without_mode* ]]; then
- speed=$(hex_speed_value_get $speed)
- speed_to_advertise=$(($speed_to_advertise | \
- $speed))
- fi
-
- done
-
- # Convert to hex.
- printf "%#x" "$speed_to_advertise"
-}
-
-advertise_subset_of_speeds()
-{
- # Test that when one device advertises a subset of speeds and another
- # advertises a specific speed (but all modes of this speed), the links
- # are up and ping passes.
- RET=0
-
- local speed_1_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1)
- ethtool_set $h1 advertise $speed_1_to_advertise
-
- if [ $RET != 0 ]; then
- log_test "advertise subset of speeds"
- return
- fi
-
- local -a speeds_arr_without_mode=($(common_speeds_get $h1 $h2 0 1))
- # Check only speeds that h1 advertised. Remove the first speed.
- unset speeds_arr_without_mode[0]
- local -a speeds_arr_with_mode=($(common_speeds_get $h1 $h2 1 1))
-
- for speed_value in ${speeds_arr_without_mode[@]}; do
- RET=0
- local speed_2_to_advertise=$(speed_to_advertise_get $speed_value \
- "${speeds_arr_with_mode[@]}")
- ethtool_set $h2 advertise $speed_2_to_advertise
-
- setup_wait_dev_with_timeout $h1
- setup_wait_dev_with_timeout $h2
- ping_do $h1 192.0.2.2
- check_err $? "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)"
-
- log_test "advertise subset of speeds"
- log_info "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise"
- done
-
- ethtool -s $h2 autoneg on
- ethtool -s $h1 autoneg on
-}
-
-check_highest_speed_is_chosen()
-{
- # Test that when one device advertises a subset of speeds, the other
- # chooses the highest speed. This test checks configuration without
- # traffic.
- RET=0
-
- local max_speed
- local chosen_speed
- local speed_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1)
-
- ethtool_set $h1 advertise $speed_to_advertise
-
- if [ $RET != 0 ]; then
- log_test "check highest speed"
- return
- fi
-
- local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1))
-
- max_speed=${speeds_arr[0]}
- for current in ${speeds_arr[@]}; do
- if [[ $current -gt $max_speed ]]; then
- max_speed=$current
- fi
- done
-
- setup_wait_dev_with_timeout $h1
- setup_wait_dev_with_timeout $h2
- chosen_speed=$(ethtool $h1 | grep 'Speed:')
- chosen_speed=${chosen_speed%"Mb/s"*}
- chosen_speed=${chosen_speed#*"Speed: "}
- ((chosen_speed == max_speed))
- check_err $? "h1 advertise $speed_to_advertise, h2 sync to speed $chosen_speed"
-
- log_test "check highest speed"
-
- ethtool -s $h2 autoneg on
- ethtool -s $h1 autoneg on
-}
-
-different_speeds_autoneg_on()
-{
- # Test that when we configure links to advertise different speeds,
- # links are not up and ping fails.
- RET=0
-
- local -a speeds=($(different_speeds_get $h1 $h2 1 1))
- local speed1=${speeds[0]}
- local speed2=${speeds[1]}
-
- speed1=$(hex_speed_value_get $speed1)
- speed2=$(hex_speed_value_get $speed2)
-
- ethtool_set $h1 advertise $speed1
- ethtool_set $h2 advertise $speed2
-
- if (($RET)); then
- setup_wait_dev_with_timeout $h1
- setup_wait_dev_with_timeout $h2
- ping_do $h1 192.0.2.2
- check_fail $? "ping with different speeds autoneg on"
- fi
-
- log_test "advertise different speeds autoneg on"
-
- ethtool -s $h2 autoneg on
- ethtool -s $h1 autoneg on
-}
-
-skip_on_veth
-
-trap cleanup EXIT
-
-setup_prepare
-setup_wait
-
-declare -gA speed_values
-eval "speed_values=($(speeds_arr_get))"
-
-tests_run
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
deleted file mode 100755
index 17f89c3b7c..0000000000
--- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-ALL_TESTS="
- autoneg
- autoneg_force_mode
- no_cable
-"
-
-NUM_NETIFS=2
-source lib.sh
-source ethtool_lib.sh
-
-TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
-
-setup_prepare()
-{
- swp1=${NETIFS[p1]}
- swp2=${NETIFS[p2]}
- swp3=$NETIF_NO_CABLE
-}
-
-ethtool_ext_state()
-{
- local dev=$1; shift
- local expected_ext_state=$1; shift
- local expected_ext_substate=${1:-""}; shift
-
- local ext_state=$(ethtool $dev | grep "Link detected" \
- | cut -d "(" -f2 | cut -d ")" -f1)
- local ext_substate=$(echo $ext_state | cut -sd "," -f2 \
- | sed -e 's/^[[:space:]]*//')
- ext_state=$(echo $ext_state | cut -d "," -f1)
-
- if [[ $ext_state != $expected_ext_state ]]; then
- echo "Expected \"$expected_ext_state\", got \"$ext_state\""
- return 1
- fi
- if [[ $ext_substate != $expected_ext_substate ]]; then
- echo "Expected \"$expected_ext_substate\", got \"$ext_substate\""
- return 1
- fi
-}
-
-autoneg()
-{
- local msg
-
- RET=0
-
- ip link set dev $swp1 up
-
- msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \
- "Autoneg" "No partner detected")
- check_err $? "$msg"
-
- log_test "Autoneg, No partner detected"
-
- ip link set dev $swp1 down
-}
-
-autoneg_force_mode()
-{
- local msg
-
- RET=0
-
- ip link set dev $swp1 up
- ip link set dev $swp2 up
-
- local -a speeds_arr=($(different_speeds_get $swp1 $swp2 0 0))
- local speed1=${speeds_arr[0]}
- local speed2=${speeds_arr[1]}
-
- ethtool_set $swp1 speed $speed1 autoneg off
- ethtool_set $swp2 speed $speed2 autoneg off
-
- msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \
- "Autoneg" "No partner detected during force mode")
- check_err $? "$msg"
-
- msg=$(busywait $TIMEOUT ethtool_ext_state $swp2 \
- "Autoneg" "No partner detected during force mode")
- check_err $? "$msg"
-
- log_test "Autoneg, No partner detected during force mode"
-
- ethtool -s $swp2 autoneg on
- ethtool -s $swp1 autoneg on
-
- ip link set dev $swp2 down
- ip link set dev $swp1 down
-}
-
-no_cable()
-{
- local msg
-
- RET=0
-
- ip link set dev $swp3 up
-
- msg=$(busywait $TIMEOUT ethtool_ext_state $swp3 "No cable")
- check_err $? "$msg"
-
- log_test "No cable"
-
- ip link set dev $swp3 down
-}
-
-skip_on_veth
-
-setup_prepare
-
-tests_run
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
deleted file mode 100644
index b9bfb45085..0000000000
--- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-speeds_arr_get()
-{
- cmd='/ETHTOOL_LINK_MODE_[^[:space:]]*_BIT[[:space:]]+=[[:space:]]+/ \
- {sub(/,$/, "") \
- sub(/ETHTOOL_LINK_MODE_/,"") \
- sub(/_BIT/,"") \
- sub(/_Full/,"/Full") \
- sub(/_Half/,"/Half");\
- print "["$1"]="$3}'
-
- awk "${cmd}" /usr/include/linux/ethtool.h
-}
-
-ethtool_set()
-{
- local cmd="$@"
- local out=$(ethtool -s $cmd 2>&1 | wc -l)
-
- check_err $out "error in configuration. $cmd"
-}
-
-dev_linkmodes_params_get()
-{
- local dev=$1; shift
- local adver=$1; shift
- local -a linkmodes_params
- local param_count
- local arr
-
- if (($adver)); then
- mode="Advertised link modes"
- else
- mode="Supported link modes"
- fi
-
- local -a dev_linkmodes=($(dev_speeds_get $dev 1 $adver))
- for ((i=0; i<${#dev_linkmodes[@]}; i++)); do
- linkmodes_params[$i]=$(echo -e "${dev_linkmodes[$i]}" | \
- # Replaces all non numbers with spaces
- sed -e 's/[^0-9]/ /g' | \
- # Squeeze spaces in sequence to 1 space
- tr -s ' ')
- # Count how many numbers were found in the linkmode
- param_count=$(echo "${linkmodes_params[$i]}" | wc -w)
- if [[ $param_count -eq 1 ]]; then
- linkmodes_params[$i]="${linkmodes_params[$i]} 1"
- elif [[ $param_count -ge 3 ]]; then
- arr=(${linkmodes_params[$i]})
- # Take only first two params
- linkmodes_params[$i]=$(echo "${arr[@]:0:2}")
- fi
- done
- echo ${linkmodes_params[@]}
-}
-
-dev_speeds_get()
-{
- local dev=$1; shift
- local with_mode=$1; shift
- local adver=$1; shift
- local speeds_str
-
- if (($adver)); then
- mode="Advertised link modes"
- else
- mode="Supported link modes"
- fi
-
- speeds_str=$(ethtool "$dev" | \
- # Snip everything before the link modes section.
- sed -n '/'"$mode"':/,$p' | \
- # Quit processing the rest at the start of the next section.
- # When checking, skip the header of this section (hence the 2,).
- sed -n '2,${/^[\t][^ \t]/q};p' | \
- # Drop the section header of the current section.
- cut -d':' -f2)
-
- local -a speeds_arr=($speeds_str)
- if [[ $with_mode -eq 0 ]]; then
- for ((i=0; i<${#speeds_arr[@]}; i++)); do
- speeds_arr[$i]=${speeds_arr[$i]%base*}
- done
- fi
- echo ${speeds_arr[@]}
-}
-
-common_speeds_get()
-{
- dev1=$1; shift
- dev2=$1; shift
- with_mode=$1; shift
- adver=$1; shift
-
- local -a dev1_speeds=($(dev_speeds_get $dev1 $with_mode $adver))
- local -a dev2_speeds=($(dev_speeds_get $dev2 $with_mode $adver))
-
- comm -12 \
- <(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \
- <(printf '%s\n' "${dev2_speeds[@]}" | sort -u)
-}
-
-different_speeds_get()
-{
- local dev1=$1; shift
- local dev2=$1; shift
- local with_mode=$1; shift
- local adver=$1; shift
-
- local -a speeds_arr
-
- speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver))
- if [[ ${#speeds_arr[@]} < 2 ]]; then
- check_err 1 "cannot check different speeds. There are not enough speeds"
- fi
-
- echo ${speeds_arr[0]} ${speeds_arr[1]}
-}
diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
deleted file mode 100755
index 50d5bfb17e..0000000000
--- a/tools/testing/selftests/net/forwarding/ethtool_mm.sh
+++ /dev/null
@@ -1,340 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-ALL_TESTS="
- manual_with_verification_h1_to_h2
- manual_with_verification_h2_to_h1
- manual_without_verification_h1_to_h2
- manual_without_verification_h2_to_h1
- manual_failed_verification_h1_to_h2
- manual_failed_verification_h2_to_h1
- lldp
-"
-
-NUM_NETIFS=2
-REQUIRE_MZ=no
-PREEMPTIBLE_PRIO=0
-source lib.sh
-
-traffic_test()
-{
- local if=$1; shift
- local src=$1; shift
- local num_pkts=10000
- local before=
- local after=
- local delta=
-
- if [ ${has_pmac_stats[$if]} = false ]; then
- src="aggregate"
- fi
-
- before=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src)
-
- $MZ $if -q -c $num_pkts -p 64 -b bcast -t ip -R $PREEMPTIBLE_PRIO
-
- after=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src)
-
- delta=$((after - before))
-
- # Allow an extra 1% tolerance for random packets sent by the stack
- [ $delta -ge $num_pkts ] && [ $delta -le $((num_pkts + 100)) ]
-}
-
-manual_with_verification()
-{
- local tx=$1; shift
- local rx=$1; shift
-
- RET=0
-
- # It isn't completely clear from IEEE 802.3-2018 Figure 99-5: Transmit
- # Processing state diagram whether the "send_r" variable (send response
- # to verification frame) should be taken into consideration while the
- # MAC Merge TX direction is disabled. That being said, at least the
- # NXP ENETC does not, and requires tx-enabled on in order to respond to
- # the link partner's verification frames.
- ethtool --set-mm $rx tx-enabled on
- ethtool --set-mm $tx verify-enabled on tx-enabled on
-
- # Wait for verification to finish
- sleep 1
-
- ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
- grep -q 'SUCCEEDED'
- check_err "$?" "Verification did not succeed"
-
- ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
- check_err "$?" "pMAC TX is not active"
-
- traffic_test $tx "pmac"
- check_err "$?" "Traffic did not get sent through $tx's pMAC"
-
- ethtool --set-mm $tx verify-enabled off tx-enabled off
- ethtool --set-mm $rx tx-enabled off
-
- log_test "Manual configuration with verification: $tx to $rx"
-}
-
-manual_with_verification_h1_to_h2()
-{
- manual_with_verification $h1 $h2
-}
-
-manual_with_verification_h2_to_h1()
-{
- manual_with_verification $h2 $h1
-}
-
-manual_without_verification()
-{
- local tx=$1; shift
- local rx=$1; shift
-
- RET=0
-
- ethtool --set-mm $tx verify-enabled off tx-enabled on
-
- ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
- grep -q 'DISABLED'
- check_err "$?" "Verification is not disabled"
-
- ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
- check_err "$?" "pMAC TX is not active"
-
- traffic_test $tx "pmac"
- check_err "$?" "Traffic did not get sent through $tx's pMAC"
-
- ethtool --set-mm $tx verify-enabled off tx-enabled off
-
- log_test "Manual configuration without verification: $tx to $rx"
-}
-
-manual_without_verification_h1_to_h2()
-{
- manual_without_verification $h1 $h2
-}
-
-manual_without_verification_h2_to_h1()
-{
- manual_without_verification $h2 $h1
-}
-
-manual_failed_verification()
-{
- local tx=$1; shift
- local rx=$1; shift
-
- RET=0
-
- ethtool --set-mm $rx pmac-enabled off
- ethtool --set-mm $tx verify-enabled on tx-enabled on
-
- # Wait for verification to time out
- sleep 1
-
- ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
- grep -q 'SUCCEEDED'
- check_fail "$?" "Verification succeeded when it shouldn't have"
-
- ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
- check_fail "$?" "pMAC TX is active when it shouldn't have"
-
- traffic_test $tx "emac"
- check_err "$?" "Traffic did not get sent through $tx's eMAC"
-
- ethtool --set-mm $tx verify-enabled off tx-enabled off
- ethtool --set-mm $rx pmac-enabled on
-
- log_test "Manual configuration with failed verification: $tx to $rx"
-}
-
-manual_failed_verification_h1_to_h2()
-{
- manual_failed_verification $h1 $h2
-}
-
-manual_failed_verification_h2_to_h1()
-{
- manual_failed_verification $h2 $h1
-}
-
-smallest_supported_add_frag_size()
-{
- local iface=$1
- local rx_min_frag_size=
-
- rx_min_frag_size=$(ethtool --json --show-mm $iface | \
- jq '.[]."rx-min-frag-size"')
-
- if [ $rx_min_frag_size -le 60 ]; then
- echo 0
- elif [ $rx_min_frag_size -le 124 ]; then
- echo 1
- elif [ $rx_min_frag_size -le 188 ]; then
- echo 2
- elif [ $rx_min_frag_size -le 252 ]; then
- echo 3
- else
- echo "$iface: RX min frag size $rx_min_frag_size cannot be advertised over LLDP"
- exit 1
- fi
-}
-
-expected_add_frag_size()
-{
- local iface=$1
- local requested=$2
- local min=$(smallest_supported_add_frag_size $iface)
-
- [ $requested -le $min ] && echo $min || echo $requested
-}
-
-lldp_change_add_frag_size()
-{
- local add_frag_size=$1
- local pattern=
-
- lldptool -T -i $h1 -V addEthCaps addFragSize=$add_frag_size >/dev/null
- # Wait for TLVs to be received
- sleep 2
- pattern=$(printf "Additional fragment size: %d" \
- $(expected_add_frag_size $h1 $add_frag_size))
- lldptool -i $h2 -t -n -V addEthCaps | grep -q "$pattern"
-}
-
-lldp()
-{
- RET=0
-
- systemctl start lldpad
-
- # Configure the interfaces to receive and transmit LLDPDUs
- lldptool -L -i $h1 adminStatus=rxtx >/dev/null
- lldptool -L -i $h2 adminStatus=rxtx >/dev/null
-
- # Enable the transmission of Additional Ethernet Capabilities TLV
- lldptool -T -i $h1 -V addEthCaps enableTx=yes >/dev/null
- lldptool -T -i $h2 -V addEthCaps enableTx=yes >/dev/null
-
- # Wait for TLVs to be received
- sleep 2
-
- lldptool -i $h1 -t -n -V addEthCaps | \
- grep -q "Preemption capability active"
- check_err "$?" "$h1 pMAC TX is not active"
-
- lldptool -i $h2 -t -n -V addEthCaps | \
- grep -q "Preemption capability active"
- check_err "$?" "$h2 pMAC TX is not active"
-
- lldp_change_add_frag_size 3
- check_err "$?" "addFragSize 3"
-
- lldp_change_add_frag_size 2
- check_err "$?" "addFragSize 2"
-
- lldp_change_add_frag_size 1
- check_err "$?" "addFragSize 1"
-
- lldp_change_add_frag_size 0
- check_err "$?" "addFragSize 0"
-
- traffic_test $h1 "pmac"
- check_err "$?" "Traffic did not get sent through $h1's pMAC"
-
- traffic_test $h2 "pmac"
- check_err "$?" "Traffic did not get sent through $h2's pMAC"
-
- systemctl stop lldpad
-
- log_test "LLDP"
-}
-
-h1_create()
-{
- ip link set dev $h1 up
-
- tc qdisc add dev $h1 root mqprio num_tc 4 map 0 1 2 3 \
- queues 1@0 1@1 1@2 1@3 \
- fp P E E E \
- hw 1
-
- ethtool --set-mm $h1 pmac-enabled on tx-enabled off verify-enabled off
-}
-
-h2_create()
-{
- ip link set dev $h2 up
-
- ethtool --set-mm $h2 pmac-enabled on tx-enabled off verify-enabled off
-
- tc qdisc add dev $h2 root mqprio num_tc 4 map 0 1 2 3 \
- queues 1@0 1@1 1@2 1@3 \
- fp P E E E \
- hw 1
-}
-
-h1_destroy()
-{
- ethtool --set-mm $h1 pmac-enabled off tx-enabled off verify-enabled off
-
- tc qdisc del dev $h1 root
-
- ip link set dev $h1 down
-}
-
-h2_destroy()
-{
- tc qdisc del dev $h2 root
-
- ethtool --set-mm $h2 pmac-enabled off tx-enabled off verify-enabled off
-
- ip link set dev $h2 down
-}
-
-setup_prepare()
-{
- h1=${NETIFS[p1]}
- h2=${NETIFS[p2]}
-
- h1_create
- h2_create
-}
-
-cleanup()
-{
- pre_cleanup
-
- h2_destroy
- h1_destroy
-}
-
-check_ethtool_mm_support
-check_tc_fp_support
-require_command lldptool
-bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
-
-for netif in ${NETIFS[@]}; do
- ethtool --show-mm $netif 2>&1 &> /dev/null
- if [[ $? -ne 0 ]]; then
- echo "SKIP: $netif does not support MAC Merge"
- exit $ksft_skip
- fi
-
- if check_ethtool_pmac_std_stats_support $netif eth-mac; then
- has_pmac_stats[$netif]=true
- else
- has_pmac_stats[$netif]=false
- echo "$netif does not report pMAC statistics, falling back to aggregate"
- fi
-done
-
-trap cleanup EXIT
-
-setup_prepare
-setup_wait
-
-tests_run
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh b/tools/testing/selftests/net/forwarding/ethtool_rmon.sh
deleted file mode 100755
index e78776db85..0000000000
--- a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh
+++ /dev/null
@@ -1,143 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-ALL_TESTS="
- rmon_rx_histogram
- rmon_tx_histogram
-"
-
-NUM_NETIFS=2
-source lib.sh
-
-ETH_FCS_LEN=4
-ETH_HLEN=$((6+6+2))
-
-declare -A netif_mtu
-
-ensure_mtu()
-{
- local iface=$1; shift
- local len=$1; shift
- local current=$(ip -j link show dev $iface | jq -r '.[0].mtu')
- local required=$((len - ETH_HLEN - ETH_FCS_LEN))
-
- if [ $current -lt $required ]; then
- ip link set dev $iface mtu $required || return 1
- fi
-}
-
-bucket_test()
-{
- local iface=$1; shift
- local neigh=$1; shift
- local set=$1; shift
- local bucket=$1; shift
- local len=$1; shift
- local num_rx=10000
- local num_tx=20000
- local expected=
- local before=
- local after=
- local delta=
-
- # Mausezahn does not include FCS bytes in its length - but the
- # histogram counters do
- len=$((len - ETH_FCS_LEN))
-
- before=$(ethtool --json -S $iface --groups rmon | \
- jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
-
- # Send 10k one way and 20k in the other, to detect counters
- # mapped to the wrong direction
- $MZ $neigh -q -c $num_rx -p $len -a own -b bcast -d 10us
- $MZ $iface -q -c $num_tx -p $len -a own -b bcast -d 10us
-
- after=$(ethtool --json -S $iface --groups rmon | \
- jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
-
- delta=$((after - before))
-
- expected=$([ $set = rx ] && echo $num_rx || echo $num_tx)
-
- # Allow some extra tolerance for other packets sent by the stack
- [ $delta -ge $expected ] && [ $delta -le $((expected + 100)) ]
-}
-
-rmon_histogram()
-{
- local iface=$1; shift
- local neigh=$1; shift
- local set=$1; shift
- local nbuckets=0
- local step=
-
- RET=0
-
- while read -r -a bucket; do
- step="$set-pkts${bucket[0]}to${bucket[1]} on $iface"
-
- for if in $iface $neigh; do
- if ! ensure_mtu $if ${bucket[0]}; then
- log_test_xfail "$if does not support the required MTU for $step"
- return
- fi
- done
-
- if ! bucket_test $iface $neigh $set $nbuckets ${bucket[0]}; then
- check_err 1 "$step failed"
- return 1
- fi
- log_test "$step"
- nbuckets=$((nbuckets + 1))
- done < <(ethtool --json -S $iface --groups rmon | \
- jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null)
-
- if [ $nbuckets -eq 0 ]; then
- log_test_xfail "$iface does not support $set histogram counters"
- return
- fi
-}
-
-rmon_rx_histogram()
-{
- rmon_histogram $h1 $h2 rx
- rmon_histogram $h2 $h1 rx
-}
-
-rmon_tx_histogram()
-{
- rmon_histogram $h1 $h2 tx
- rmon_histogram $h2 $h1 tx
-}
-
-setup_prepare()
-{
- h1=${NETIFS[p1]}
- h2=${NETIFS[p2]}
-
- for iface in $h1 $h2; do
- netif_mtu[$iface]=$(ip -j link show dev $iface | jq -r '.[0].mtu')
- ip link set dev $iface up
- done
-}
-
-cleanup()
-{
- pre_cleanup
-
- for iface in $h2 $h1; do
- ip link set dev $iface \
- mtu ${netif_mtu[$iface]} \
- down
- done
-}
-
-check_ethtool_counter_group_support
-trap cleanup EXIT
-
-setup_prepare
-setup_wait
-
-tests_run
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index 1fc4f0242f..f1ca95e79a 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -3,51 +3,28 @@
##############################################################################
# Topology description. p1 looped back to p2, p3 to p4 and so on.
-declare -A NETIFS
-NETIFS[p1]=veth0
-NETIFS[p2]=veth1
-NETIFS[p3]=veth2
-NETIFS[p4]=veth3
-NETIFS[p5]=veth4
-NETIFS[p6]=veth5
-NETIFS[p7]=veth6
-NETIFS[p8]=veth7
-NETIFS[p9]=veth8
-NETIFS[p10]=veth9
+NETIFS=(
+ [p1]=veth0
+ [p2]=veth1
+ [p3]=veth2
+ [p4]=veth3
+ [p5]=veth4
+ [p6]=veth5
+ [p7]=veth6
+ [p8]=veth7
+ [p9]=veth8
+ [p10]=veth9
+)
# Port that does not have a cable connected.
NETIF_NO_CABLE=eth8
##############################################################################
-# Defines
+# In addition to the topology-related variables, it is also possible to override
+# in this file other variables that net/lib.sh, net/forwarding/lib.sh or other
+# libraries or selftests use. E.g.:
-# IPv4 ping utility name
-PING=ping
-# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
PING6=ping6
-# Packet generator. Some distributions use 'mz'.
MZ=mausezahn
-# mausezahn delay between transmissions in microseconds.
-MZ_DELAY=0
-# Time to wait after interfaces participating in the test are all UP
WAIT_TIME=5
-# Whether to pause on failure or not.
-PAUSE_ON_FAIL=no
-# Whether to pause on cleanup or not.
-PAUSE_ON_CLEANUP=no
-# Type of network interface to create
-NETIF_TYPE=veth
-# Whether to create virtual interfaces (veth) or not
-NETIF_CREATE=yes
-# Timeout (in seconds) before ping exits regardless of how many packets have
-# been sent or received
-PING_TIMEOUT=5
-# Minimum ageing_time (in centiseconds) supported by hardware
-LOW_AGEING_TIME=1000
-# Flag for tc match, supposed to be skip_sw/skip_hw which means do not process
-# filter by software/hardware
-TC_FLAG=skip_hw
-# IPv6 traceroute utility name.
-TROUTE6=traceroute6
-
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
deleted file mode 100755
index 48584a5138..0000000000
--- a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
+++ /dev/null
@@ -1,340 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# +--------------------+ +----------------------+
-# | H1 | | H2 |
-# | | | |
-# | $h1.200 + | | + $h2.200 |
-# | 192.0.2.1/28 | | | | 192.0.2.18/28 |
-# | 2001:db8:1::1/64 | | | | 2001:db8:2::1/64 |
-# | | | | | |
-# | $h1 + | | + $h2 |
-# | | | | | |
-# +------------------|-+ +-|--------------------+
-# | |
-# +------------------|-------------------------|--------------------+
-# | SW | | |
-# | | | |
-# | $rp1 + + $rp2 |
-# | | | |
-# | $rp1.200 + + $rp2.200 |
-# | 192.0.2.2/28 192.0.2.17/28 |
-# | 2001:db8:1::2/64 2001:db8:2::2/64 |
-# | |
-# +-----------------------------------------------------------------+
-
-ALL_TESTS="
- ping_ipv4
- ping_ipv6
- test_stats_rx_ipv4
- test_stats_tx_ipv4
- test_stats_rx_ipv6
- test_stats_tx_ipv6
- respin_enablement
- test_stats_rx_ipv4
- test_stats_tx_ipv4
- test_stats_rx_ipv6
- test_stats_tx_ipv6
- reapply_config
- ping_ipv4
- ping_ipv6
- test_stats_rx_ipv4
- test_stats_tx_ipv4
- test_stats_rx_ipv6
- test_stats_tx_ipv6
- test_stats_report_rx
- test_stats_report_tx
- test_destroy_enabled
- test_double_enable
-"
-NUM_NETIFS=4
-source lib.sh
-
-h1_create()
-{
- simple_if_init $h1
- vlan_create $h1 200 v$h1 192.0.2.1/28 2001:db8:1::1/64
- ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
- ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
-}
-
-h1_destroy()
-{
- ip -6 route del 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
- ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
- vlan_destroy $h1 200
- simple_if_fini $h1
-}
-
-h2_create()
-{
- simple_if_init $h2
- vlan_create $h2 200 v$h2 192.0.2.18/28 2001:db8:2::1/64
- ip route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
- ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
-}
-
-h2_destroy()
-{
- ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
- ip route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
- vlan_destroy $h2 200
- simple_if_fini $h2
-}
-
-router_rp1_200_create()
-{
- ip link add name $rp1.200 link $rp1 type vlan id 200
- ip link set dev $rp1.200 addrgenmode eui64
- ip link set dev $rp1.200 up
- ip address add dev $rp1.200 192.0.2.2/28
- ip address add dev $rp1.200 2001:db8:1::2/64
- ip stats set dev $rp1.200 l3_stats on
-}
-
-router_rp1_200_destroy()
-{
- ip stats set dev $rp1.200 l3_stats off
- ip address del dev $rp1.200 2001:db8:1::2/64
- ip address del dev $rp1.200 192.0.2.2/28
- ip link del dev $rp1.200
-}
-
-router_create()
-{
- ip link set dev $rp1 up
- router_rp1_200_create
-
- ip link set dev $rp2 up
- vlan_create $rp2 200 "" 192.0.2.17/28 2001:db8:2::2/64
-}
-
-router_destroy()
-{
- vlan_destroy $rp2 200
- ip link set dev $rp2 down
-
- router_rp1_200_destroy
- ip link set dev $rp1 down
-}
-
-setup_prepare()
-{
- h1=${NETIFS[p1]}
- rp1=${NETIFS[p2]}
-
- rp2=${NETIFS[p3]}
- h2=${NETIFS[p4]}
-
- rp1mac=$(mac_get $rp1)
- rp2mac=$(mac_get $rp2)
-
- vrf_prepare
-
- h1_create
- h2_create
-
- router_create
-
- forwarding_enable
-}
-
-cleanup()
-{
- pre_cleanup
-
- forwarding_restore
-
- router_destroy
-
- h2_destroy
- h1_destroy
-
- vrf_cleanup
-}
-
-ping_ipv4()
-{
- ping_test $h1.200 192.0.2.18 " IPv4"
-}
-
-ping_ipv6()
-{
- ping_test $h1.200 2001:db8:2::1 " IPv6"
-}
-
-send_packets_rx_ipv4()
-{
- # Send 21 packets instead of 20, because the first one might trap and go
- # through the SW datapath, which might not bump the HW counter.
- $MZ $h1.200 -c 21 -d 20msec -p 100 \
- -a own -b $rp1mac -A 192.0.2.1 -B 192.0.2.18 \
- -q -t udp sp=54321,dp=12345
-}
-
-send_packets_rx_ipv6()
-{
- $MZ $h1.200 -6 -c 21 -d 20msec -p 100 \
- -a own -b $rp1mac -A 2001:db8:1::1 -B 2001:db8:2::1 \
- -q -t udp sp=54321,dp=12345
-}
-
-send_packets_tx_ipv4()
-{
- $MZ $h2.200 -c 21 -d 20msec -p 100 \
- -a own -b $rp2mac -A 192.0.2.18 -B 192.0.2.1 \
- -q -t udp sp=54321,dp=12345
-}
-
-send_packets_tx_ipv6()
-{
- $MZ $h2.200 -6 -c 21 -d 20msec -p 100 \
- -a own -b $rp2mac -A 2001:db8:2::1 -B 2001:db8:1::1 \
- -q -t udp sp=54321,dp=12345
-}
-
-___test_stats()
-{
- local dir=$1; shift
- local prot=$1; shift
-
- local a
- local b
-
- a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
- send_packets_${dir}_${prot}
- "$@"
- b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
- hw_stats_get l3_stats $rp1.200 ${dir} packets)
- check_err $? "Traffic not reflected in the counter: $a -> $b"
-}
-
-__test_stats()
-{
- local dir=$1; shift
- local prot=$1; shift
-
- RET=0
- ___test_stats "$dir" "$prot"
- log_test "Test $dir packets: $prot"
-}
-
-test_stats_rx_ipv4()
-{
- __test_stats rx ipv4
-}
-
-test_stats_tx_ipv4()
-{
- __test_stats tx ipv4
-}
-
-test_stats_rx_ipv6()
-{
- __test_stats rx ipv6
-}
-
-test_stats_tx_ipv6()
-{
- __test_stats tx ipv6
-}
-
-# Make sure everything works well even after stats have been disabled and
-# reenabled on the same device without touching the L3 configuration.
-respin_enablement()
-{
- log_info "Turning stats off and on again"
- ip stats set dev $rp1.200 l3_stats off
- ip stats set dev $rp1.200 l3_stats on
-}
-
-# For the initial run, l3_stats is enabled on a completely set up netdevice. Now
-# do it the other way around: enabling the L3 stats on an L2 netdevice, and only
-# then apply the L3 configuration.
-reapply_config()
-{
- log_info "Reapplying configuration"
-
- router_rp1_200_destroy
-
- ip link add name $rp1.200 link $rp1 type vlan id 200
- ip link set dev $rp1.200 addrgenmode none
- ip stats set dev $rp1.200 l3_stats on
- ip link set dev $rp1.200 addrgenmode eui64
- ip link set dev $rp1.200 up
- ip address add dev $rp1.200 192.0.2.2/28
- ip address add dev $rp1.200 2001:db8:1::2/64
-}
-
-__test_stats_report()
-{
- local dir=$1; shift
- local prot=$1; shift
-
- local a
- local b
-
- RET=0
-
- a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
- send_packets_${dir}_${prot}
- ip address flush dev $rp1.200
- b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
- hw_stats_get l3_stats $rp1.200 ${dir} packets)
- check_err $? "Traffic not reflected in the counter: $a -> $b"
- log_test "Test ${dir} packets: stats pushed on loss of L3"
-
- ip stats set dev $rp1.200 l3_stats off
- ip link del dev $rp1.200
- router_rp1_200_create
-}
-
-test_stats_report_rx()
-{
- __test_stats_report rx ipv4
-}
-
-test_stats_report_tx()
-{
- __test_stats_report tx ipv4
-}
-
-test_destroy_enabled()
-{
- RET=0
-
- ip link del dev $rp1.200
- router_rp1_200_create
-
- log_test "Destroy l3_stats-enabled netdev"
-}
-
-test_double_enable()
-{
- RET=0
- ___test_stats rx ipv4 \
- ip stats set dev $rp1.200 l3_stats on
- log_test "Test stat retention across a spurious enablement"
-}
-
-trap cleanup EXIT
-
-setup_prepare
-setup_wait
-
-used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info |
- jq '.[].info.l3_stats.used')
-kind=$(ip -j -d link show dev $rp1 |
- jq -r '.[].linkinfo.info_kind')
-if [[ $used != true ]]; then
- if [[ $kind == veth ]]; then
- log_test_skip "l3_stats not offloaded on veth interface"
- EXIT_STATUS=$ksft_skip
- else
- RET=1 log_test "l3_stats not offloaded"
- fi
-else
- tests_run
-fi
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
deleted file mode 100755
index 7594bbb490..0000000000
--- a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# Test L3 stats on IP-in-IP GRE tunnel without key.
-
-# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more
-# details.
-
-ALL_TESTS="
- ping_ipv4
- test_stats_rx
- test_stats_tx
-"
-NUM_NETIFS=6
-source lib.sh
-source ipip_lib.sh
-
-setup_prepare()
-{
- h1=${NETIFS[p1]}
- ol1=${NETIFS[p2]}
-
- ul1=${NETIFS[p3]}
- ul2=${NETIFS[p4]}
-
- ol2=${NETIFS[p5]}
- h2=${NETIFS[p6]}
-
- ol1mac=$(mac_get $ol1)
-
- forwarding_enable
- vrf_prepare
- h1_create
- h2_create
- sw1_flat_create gre $ol1 $ul1
- sw2_flat_create gre $ol2 $ul2
- ip stats set dev g1a l3_stats on
- ip stats set dev g2a l3_stats on
-}
-
-cleanup()
-{
- pre_cleanup
-
- ip stats set dev g1a l3_stats off
- ip stats set dev g2a l3_stats off
-
- sw2_flat_destroy $ol2 $ul2
- sw1_flat_destroy $ol1 $ul1
- h2_destroy
- h1_destroy
-
- vrf_cleanup
- forwarding_restore
-}
-
-ping_ipv4()
-{
- RET=0
-
- ping_test $h1 192.0.2.18 " gre flat"
-}
-
-send_packets_ipv4()
-{
- # Send 21 packets instead of 20, because the first one might trap and go
- # through the SW datapath, which might not bump the HW counter.
- $MZ $h1 -c 21 -d 20msec -p 100 \
- -a own -b $ol1mac -A 192.0.2.1 -B 192.0.2.18 \
- -q -t udp sp=54321,dp=12345
-}
-
-test_stats()
-{
- local dev=$1; shift
- local dir=$1; shift
-
- local a
- local b
-
- RET=0
-
- a=$(hw_stats_get l3_stats $dev $dir packets)
- send_packets_ipv4
- b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
- hw_stats_get l3_stats $dev $dir packets)
- check_err $? "Traffic not reflected in the counter: $a -> $b"
-
- log_test "Test $dir packets: $prot"
-}
-
-test_stats_tx()
-{
- test_stats g1a tx
-}
-
-test_stats_rx()
-{
- test_stats g2a rx
-}
-
-skip_on_veth
-
-trap cleanup EXIT
-
-setup_prepare
-setup_wait
-
-tests_run
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ipip_lib.sh b/tools/testing/selftests/net/forwarding/ipip_lib.sh
index 30f36a57ba..01e62c4ac9 100644
--- a/tools/testing/selftests/net/forwarding/ipip_lib.sh
+++ b/tools/testing/selftests/net/forwarding/ipip_lib.sh
@@ -141,7 +141,6 @@
# | $h2 + |
# | 192.0.2.18/28 |
# +---------------------------+
-source lib.sh
h1_create()
{
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index e78f11140e..eabbdf00d8 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -2,33 +2,124 @@
# SPDX-License-Identifier: GPL-2.0
##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+
+declare -A NETIFS=(
+ [p1]=veth0
+ [p2]=veth1
+ [p3]=veth2
+ [p4]=veth3
+ [p5]=veth4
+ [p6]=veth5
+ [p7]=veth6
+ [p8]=veth7
+ [p9]=veth8
+ [p10]=veth9
+)
+
+# Port that does not have a cable connected.
+: "${NETIF_NO_CABLE:=eth8}"
+
+##############################################################################
# Defines
-# Can be overridden by the configuration file.
-PING=${PING:=ping}
-PING6=${PING6:=ping6}
-MZ=${MZ:=mausezahn}
-MZ_DELAY=${MZ_DELAY:=0}
-ARPING=${ARPING:=arping}
-TEAMD=${TEAMD:=teamd}
-WAIT_TIME=${WAIT_TIME:=5}
-PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
-PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
-NETIF_TYPE=${NETIF_TYPE:=veth}
-NETIF_CREATE=${NETIF_CREATE:=yes}
-MCD=${MCD:=smcrouted}
-MC_CLI=${MC_CLI:=smcroutectl}
-PING_COUNT=${PING_COUNT:=10}
-PING_TIMEOUT=${PING_TIMEOUT:=5}
-WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
-INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
-LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
-REQUIRE_JQ=${REQUIRE_JQ:=yes}
-REQUIRE_MZ=${REQUIRE_MZ:=yes}
-REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
-STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
-TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
-TROUTE6=${TROUTE6:=traceroute6}
+# Networking utilities.
+: "${PING:=ping}"
+: "${PING6:=ping6}" # Some distros just use ping.
+: "${ARPING:=arping}"
+: "${TROUTE6:=traceroute6}"
+
+# Packet generator.
+: "${MZ:=mausezahn}" # Some distributions use 'mz'.
+: "${MZ_DELAY:=0}"
+
+# Host configuration tools.
+: "${TEAMD:=teamd}"
+: "${MCD:=smcrouted}"
+: "${MC_CLI:=smcroutectl}"
+
+# Constants for netdevice bring-up:
+# Default time in seconds to wait for an interface to come up before giving up
+# and bailing out. Used during initial setup.
+: "${INTERFACE_TIMEOUT:=600}"
+# Like INTERFACE_TIMEOUT, but default for ad-hoc waiting in testing scripts.
+: "${WAIT_TIMEOUT:=20}"
+# Time to wait after interfaces participating in the test are all UP.
+: "${WAIT_TIME:=5}"
+
+# Whether to pause on, respectively, after a failure and before cleanup.
+: "${PAUSE_ON_FAIL:=no}"
+: "${PAUSE_ON_CLEANUP:=no}"
+
+# Whether to create virtual interfaces, and what netdevice type they should be.
+: "${NETIF_CREATE:=yes}"
+: "${NETIF_TYPE:=veth}"
+
+# Constants for ping tests:
+# How many packets should be sent.
+: "${PING_COUNT:=10}"
+# Timeout (in seconds) before ping exits regardless of how many packets have
+# been sent or received
+: "${PING_TIMEOUT:=5}"
+
+# Minimum ageing_time (in centiseconds) supported by hardware
+: "${LOW_AGEING_TIME:=1000}"
+
+# Whether to check for availability of certain tools.
+: "${REQUIRE_JQ:=yes}"
+: "${REQUIRE_MZ:=yes}"
+: "${REQUIRE_MTOOLS:=no}"
+
+# Whether to override MAC addresses on interfaces participating in the test.
+: "${STABLE_MAC_ADDRS:=no}"
+
+# Flags for tcpdump
+: "${TCPDUMP_EXTRA_FLAGS:=}"
+
+# Flags for TC filters.
+: "${TC_FLAG:=skip_hw}"
+
+# Whether the machine is "slow" -- i.e. might be incapable of running tests
+# involving heavy traffic. This might be the case on a debug kernel, a VM, or
+# e.g. a low-power board.
+: "${KSFT_MACHINE_SLOW:=no}"
+
+##############################################################################
+# Find netifs by test-specified driver name
+
+driver_name_get()
+{
+ local dev=$1; shift
+ local driver_path="/sys/class/net/$dev/device/driver"
+
+ if [[ -L $driver_path ]]; then
+ basename `realpath $driver_path`
+ fi
+}
+
+netif_find_driver()
+{
+ local ifnames=`ip -j link show | jq -r ".[].ifname"`
+ local count=0
+
+ for ifname in $ifnames
+ do
+ local driver_name=`driver_name_get $ifname`
+ if [[ ! -z $driver_name && $driver_name == $NETIF_FIND_DRIVER ]]; then
+ count=$((count + 1))
+ NETIFS[p$count]="$ifname"
+ fi
+ done
+}
+
+# Whether to find netdevice according to the driver speficied by the importer
+: "${NETIF_FIND_DRIVER:=}"
+
+if [[ $NETIF_FIND_DRIVER ]]; then
+ unset NETIFS
+ declare -A NETIFS
+ netif_find_driver
+fi
net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
@@ -179,22 +270,23 @@ check_port_mab_support()
fi
}
-skip_on_veth()
+if [[ "$(id -u)" -ne 0 ]]; then
+ echo "SKIP: need root privileges"
+ exit $ksft_skip
+fi
+
+check_driver()
{
- local kind=$(ip -j -d link show dev ${NETIFS[p1]} |
- jq -r '.[].linkinfo.info_kind')
+ local dev=$1; shift
+ local expected=$1; shift
+ local driver_name=`driver_name_get $dev`
- if [[ $kind == veth ]]; then
- echo "SKIP: Test cannot be run with veth pairs"
+ if [[ $driver_name != $expected ]]; then
+ echo "SKIP: expected driver $expected for $dev, got $driver_name instead"
exit $ksft_skip
fi
}
-if [[ "$(id -u)" -ne 0 ]]; then
- echo "SKIP: need root privileges"
- exit $ksft_skip
-fi
-
if [[ "$CHECK_TC" = "yes" ]]; then
check_tc_version
fi
@@ -209,6 +301,21 @@ require_command()
fi
}
+# IPv6 support was added in v3.0
+check_mtools_version()
+{
+ local version="$(msend -v)"
+ local major
+
+ version=${version##msend version }
+ major=$(echo $version | cut -d. -f1)
+
+ if [ $major -lt 3 ]; then
+ echo "SKIP: expected mtools version 3.0, got $version"
+ exit $ksft_skip
+ fi
+}
+
if [[ "$REQUIRE_JQ" = "yes" ]]; then
require_command jq
fi
@@ -216,15 +323,10 @@ if [[ "$REQUIRE_MZ" = "yes" ]]; then
require_command $MZ
fi
if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
- # https://github.com/vladimiroltean/mtools/
- # patched for IPv6 support
+ # https://github.com/troglobit/mtools
require_command msend
require_command mreceive
-fi
-
-if [[ ! -v NUM_NETIFS ]]; then
- echo "SKIP: importer does not define \"NUM_NETIFS\""
- exit $ksft_skip
+ check_mtools_version
fi
##############################################################################
@@ -245,6 +347,23 @@ done
##############################################################################
# Network interfaces configuration
+if [[ ! -v NUM_NETIFS ]]; then
+ echo "SKIP: importer does not define \"NUM_NETIFS\""
+ exit $ksft_skip
+fi
+
+if (( NUM_NETIFS > ${#NETIFS[@]} )); then
+ echo "SKIP: Importer requires $NUM_NETIFS NETIFS, but only ${#NETIFS[@]} are defined (${NETIFS[@]})"
+ exit $ksft_skip
+fi
+
+for i in $(seq ${#NETIFS[@]}); do
+ if [[ ! ${NETIFS[p$i]} ]]; then
+ echo "SKIP: NETIFS[p$i] not given"
+ exit $ksft_skip
+ fi
+done
+
create_netif_veth()
{
local i
@@ -343,13 +462,20 @@ ret_set_ksft_status()
fi
}
+# Whether FAILs should be interpreted as XFAILs. Internal.
+FAIL_TO_XFAIL=
+
check_err()
{
local err=$1
local msg=$2
if ((err)); then
- ret_set_ksft_status $ksft_fail "$msg"
+ if [[ $FAIL_TO_XFAIL = yes ]]; then
+ ret_set_ksft_status $ksft_xfail "$msg"
+ else
+ ret_set_ksft_status $ksft_fail "$msg"
+ fi
fi
}
@@ -374,6 +500,29 @@ check_err_fail()
fi
}
+xfail_on_slow()
+{
+ if [[ $KSFT_MACHINE_SLOW = yes ]]; then
+ FAIL_TO_XFAIL=yes "$@"
+ else
+ "$@"
+ fi
+}
+
+xfail_on_veth()
+{
+ local dev=$1; shift
+ local kind
+
+ kind=$(ip -j -d link show dev $dev |
+ jq -r '.[].linkinfo.info_kind')
+ if [[ $kind = veth ]]; then
+ FAIL_TO_XFAIL=yes "$@"
+ else
+ "$@"
+ fi
+}
+
log_test_result()
{
local test_name=$1; shift
@@ -569,6 +718,19 @@ setup_wait()
sleep $WAIT_TIME
}
+wait_for_dev()
+{
+ local dev=$1; shift
+ local timeout=${1:-$WAIT_TIMEOUT}; shift
+
+ slowwait $timeout ip link show dev $dev &> /dev/null
+ if (( $? )); then
+ check_err 1
+ log_test wait_for_dev "Interface $dev did not appear."
+ exit $EXIT_STATUS
+ fi
+}
+
cmd_jq()
{
local cmd=$1
@@ -1995,6 +2157,8 @@ bail_on_lldpad()
{
local reason1="$1"; shift
local reason2="$1"; shift
+ local caller=${FUNCNAME[1]}
+ local src=${BASH_SOURCE[1]}
if systemctl is-active --quiet lldpad; then
@@ -2015,7 +2179,8 @@ bail_on_lldpad()
an environment variable ALLOW_LLDPAD to a
non-empty string.
EOF
- exit 1
+ log_test_skip $src:$caller
+ exit $EXIT_STATUS
else
return
fi
diff --git a/tools/testing/selftests/net/forwarding/lib_sh_test.sh b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
new file mode 100755
index 0000000000..ff2accccaf
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
@@ -0,0 +1,208 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This tests the operation of lib.sh itself.
+
+ALL_TESTS="
+ test_ret
+ test_exit_status
+"
+NUM_NETIFS=0
+source lib.sh
+
+# Simulated checks.
+
+do_test()
+{
+ local msg=$1; shift
+
+ "$@"
+ check_err $? "$msg"
+}
+
+tpass()
+{
+ do_test "tpass" true
+}
+
+tfail()
+{
+ do_test "tfail" false
+}
+
+txfail()
+{
+ FAIL_TO_XFAIL=yes do_test "txfail" false
+}
+
+# Simulated tests.
+
+pass()
+{
+ RET=0
+ do_test "true" true
+ log_test "true"
+}
+
+fail()
+{
+ RET=0
+ do_test "false" false
+ log_test "false"
+}
+
+xfail()
+{
+ RET=0
+ FAIL_TO_XFAIL=yes do_test "xfalse" false
+ log_test "xfalse"
+}
+
+skip()
+{
+ RET=0
+ log_test_skip "skip"
+}
+
+slow_xfail()
+{
+ RET=0
+ xfail_on_slow do_test "slow_false" false
+ log_test "slow_false"
+}
+
+# lib.sh tests.
+
+ret_tests_run()
+{
+ local t
+
+ RET=0
+ retmsg=
+ for t in "$@"; do
+ $t
+ done
+ echo "$retmsg"
+ return $RET
+}
+
+ret_subtest()
+{
+ local expect_ret=$1; shift
+ local expect_retmsg=$1; shift
+ local -a tests=( "$@" )
+
+ local status_names=(pass fail xfail xpass skip)
+ local ret
+ local out
+
+ RET=0
+
+ # Run this in a subshell, so that our environment is intact.
+ out=$(ret_tests_run "${tests[@]}")
+ ret=$?
+
+ (( ret == expect_ret ))
+ check_err $? "RET=$ret expected $expect_ret"
+
+ [[ $out == $expect_retmsg ]]
+ check_err $? "retmsg=$out expected $expect_retmsg"
+
+ log_test "RET $(echo ${tests[@]}) -> ${status_names[$ret]}"
+}
+
+test_ret()
+{
+ ret_subtest $ksft_pass ""
+
+ ret_subtest $ksft_pass "" tpass
+ ret_subtest $ksft_fail "tfail" tfail
+ ret_subtest $ksft_xfail "txfail" txfail
+
+ ret_subtest $ksft_pass "" tpass tpass
+ ret_subtest $ksft_fail "tfail" tpass tfail
+ ret_subtest $ksft_xfail "txfail" tpass txfail
+
+ ret_subtest $ksft_fail "tfail" tfail tpass
+ ret_subtest $ksft_xfail "txfail" txfail tpass
+
+ ret_subtest $ksft_fail "tfail" tfail tfail
+ ret_subtest $ksft_fail "tfail" tfail txfail
+
+ ret_subtest $ksft_fail "tfail" txfail tfail
+
+ ret_subtest $ksft_xfail "txfail" txfail txfail
+}
+
+exit_status_tests_run()
+{
+ EXIT_STATUS=0
+ tests_run > /dev/null
+ return $EXIT_STATUS
+}
+
+exit_status_subtest()
+{
+ local expect_exit_status=$1; shift
+ local tests=$1; shift
+ local what=$1; shift
+
+ local status_names=(pass fail xfail xpass skip)
+ local exit_status
+ local out
+
+ RET=0
+
+ # Run this in a subshell, so that our environment is intact.
+ out=$(TESTS="$tests" exit_status_tests_run)
+ exit_status=$?
+
+ (( exit_status == expect_exit_status ))
+ check_err $? "EXIT_STATUS=$exit_status, expected $expect_exit_status"
+
+ log_test "EXIT_STATUS $tests$what -> ${status_names[$exit_status]}"
+}
+
+test_exit_status()
+{
+ exit_status_subtest $ksft_pass ":"
+
+ exit_status_subtest $ksft_pass "pass"
+ exit_status_subtest $ksft_fail "fail"
+ exit_status_subtest $ksft_pass "xfail"
+ exit_status_subtest $ksft_skip "skip"
+
+ exit_status_subtest $ksft_pass "pass pass"
+ exit_status_subtest $ksft_fail "pass fail"
+ exit_status_subtest $ksft_pass "pass xfail"
+ exit_status_subtest $ksft_skip "pass skip"
+
+ exit_status_subtest $ksft_fail "fail pass"
+ exit_status_subtest $ksft_pass "xfail pass"
+ exit_status_subtest $ksft_skip "skip pass"
+
+ exit_status_subtest $ksft_fail "fail fail"
+ exit_status_subtest $ksft_fail "fail xfail"
+ exit_status_subtest $ksft_fail "fail skip"
+
+ exit_status_subtest $ksft_fail "xfail fail"
+ exit_status_subtest $ksft_fail "skip fail"
+
+ exit_status_subtest $ksft_pass "xfail xfail"
+ exit_status_subtest $ksft_skip "xfail skip"
+ exit_status_subtest $ksft_skip "skip xfail"
+
+ exit_status_subtest $ksft_skip "skip skip"
+
+ KSFT_MACHINE_SLOW=yes \
+ exit_status_subtest $ksft_pass "slow_xfail" ": slow"
+
+ KSFT_MACHINE_SLOW=no \
+ exit_status_subtest $ksft_fail "slow_xfail" ": fast"
+}
+
+trap pre_cleanup EXIT
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
index c5b0cbc85b..4b364cdf3e 100755
--- a/tools/testing/selftests/net/forwarding/local_termination.sh
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -155,25 +155,30 @@ run_test()
"$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \
true
- check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
- "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
- false
+ xfail_on_veth $h1 \
+ check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
+ "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
+ false
check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \
"$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \
true
- check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, allmulti" \
- "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
- false
+ xfail_on_veth $h1 \
+ check_rcv $rcv_if_name \
+ "Unicast IPv4 to unknown MAC address, allmulti" \
+ "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
+ false
check_rcv $rcv_if_name "Multicast IPv4 to joined group" \
"$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \
true
- check_rcv $rcv_if_name "Multicast IPv4 to unknown group" \
- "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \
- false
+ xfail_on_veth $h1 \
+ check_rcv $rcv_if_name \
+ "Multicast IPv4 to unknown group" \
+ "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \
+ false
check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \
"$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \
@@ -187,9 +192,10 @@ run_test()
"$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \
true
- check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \
- "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \
- false
+ xfail_on_veth $h1 \
+ check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \
+ "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \
+ false
check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \
"$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \
diff --git a/tools/testing/selftests/net/forwarding/loopback.sh b/tools/testing/selftests/net/forwarding/loopback.sh
deleted file mode 100755
index 8f4057310b..0000000000
--- a/tools/testing/selftests/net/forwarding/loopback.sh
+++ /dev/null
@@ -1,102 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-ALL_TESTS="loopback_test"
-NUM_NETIFS=2
-source tc_common.sh
-source lib.sh
-
-h1_create()
-{
- simple_if_init $h1 192.0.2.1/24
- tc qdisc add dev $h1 clsact
-}
-
-h1_destroy()
-{
- tc qdisc del dev $h1 clsact
- simple_if_fini $h1 192.0.2.1/24
-}
-
-h2_create()
-{
- simple_if_init $h2
-}
-
-h2_destroy()
-{
- simple_if_fini $h2
-}
-
-loopback_test()
-{
- RET=0
-
- tc filter add dev $h1 ingress protocol arp pref 1 handle 101 flower \
- skip_hw arp_op reply arp_tip 192.0.2.1 action drop
-
- $MZ $h1 -c 1 -t arp -q
-
- tc_check_packets "dev $h1 ingress" 101 1
- check_fail $? "Matched on a filter without loopback setup"
-
- ethtool -K $h1 loopback on
- check_err $? "Failed to enable loopback"
-
- setup_wait_dev $h1
-
- $MZ $h1 -c 1 -t arp -q
-
- tc_check_packets "dev $h1 ingress" 101 1
- check_err $? "Did not match on filter with loopback"
-
- ethtool -K $h1 loopback off
- check_err $? "Failed to disable loopback"
-
- $MZ $h1 -c 1 -t arp -q
-
- tc_check_packets "dev $h1 ingress" 101 2
- check_fail $? "Matched on a filter after loopback was removed"
-
- tc filter del dev $h1 ingress protocol arp pref 1 handle 101 flower
-
- log_test "loopback"
-}
-
-setup_prepare()
-{
- h1=${NETIFS[p1]}
- h2=${NETIFS[p2]}
-
- vrf_prepare
-
- h1_create
- h2_create
-
- if ethtool -k $h1 | grep loopback | grep -q fixed; then
- log_test "SKIP: dev $h1 does not support loopback feature"
- exit $ksft_skip
- fi
-}
-
-cleanup()
-{
- pre_cleanup
-
- h2_destroy
- h1_destroy
-
- vrf_cleanup
-}
-
-trap cleanup EXIT
-
-setup_prepare
-setup_wait
-
-tests_run
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index 3f0f5dc955..2ba44247c6 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -1,6 +1,41 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 192.0.2.2/24 | |
+# | 2001:db8:1::2/64 | |
+# +-------------------|-----+
+# |
+# +-------------------|----------------------+
+# | | R1 |
+# | $rp11 + |
+# | 192.0.2.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# | + $rp12 + $rp13 |
+# | | 169.254.2.12/24 | 169.254.3.13/24 |
+# | | fe80:2::12/64 | fe80:3::13/64 |
+# +--|--------------------|------------------+
+# | |
+# +--|--------------------|------------------+
+# | + $rp22 + $rp23 |
+# | 169.254.2.22/24 169.254.3.23/24 |
+# | fe80:2::22/64 fe80:3::23/64 |
+# | |
+# | $rp21 + |
+# | 198.51.100.1/24 | |
+# | 2001:db8:2::1/64 | R2 |
+# +-------------------|----------------------+
+# |
+# +-------------------|-----+
+# | | |
+# | $h2 + |
+# | 198.51.100.2/24 |
+# | 2001:db8:2::2/64 H2 |
+# +-------------------------+
+
ALL_TESTS="
ping_ipv4
ping_ipv6
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
index b2d2c6cecc..2903294d8b 100644
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
@@ -56,21 +56,12 @@ nh_stats_test_dispatch_swhw()
local group_id=$1; shift
local mz="$@"
- local used
-
nh_stats_do_test "$what" "$nh1_id" "$nh2_id" "$group_id" \
nh_stats_get "${mz[@]}"
- used=$(ip -s -j -d nexthop show id $group_id |
- jq '.[].hw_stats.used')
- kind=$(ip -j -d link show dev $rp11 |
- jq -r '.[].linkinfo.info_kind')
- if [[ $used == true ]]; then
+ xfail_on_veth $rp11 \
nh_stats_do_test "HW $what" "$nh1_id" "$nh2_id" "$group_id" \
nh_stats_get_hw "${mz[@]}"
- elif [[ $kind == veth ]]; then
- log_test_xfail "HW stats not offloaded on veth topology"
- fi
}
nh_stats_test_dispatch()
@@ -83,7 +74,6 @@ nh_stats_test_dispatch()
local mz="$@"
local enabled
- local kind
if ! ip nexthop help 2>&1 | grep -q hw_stats; then
log_test_skip "NH stats test: ip doesn't support HW stats"
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
index 4b483d24ad..cd9e346436 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -1,6 +1,41 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 192.0.2.2/24 | |
+# | 2001:db8:1::2/64 | |
+# +-------------------|-----+
+# |
+# +-------------------|----------------------+
+# | | R1 |
+# | $rp11 + |
+# | 192.0.2.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# | + $rp12 + $rp13 |
+# | | 169.254.2.12/24 | 169.254.3.13/24 |
+# | | fe80:2::12/64 | fe80:3::13/64 |
+# +--|--------------------|------------------+
+# | |
+# +--|--------------------|------------------+
+# | + $rp22 + $rp23 |
+# | 169.254.2.22/24 169.254.3.23/24 |
+# | fe80:2::22/64 fe80:3::23/64 |
+# | |
+# | $rp21 + |
+# | 198.51.100.1/24 | |
+# | 2001:db8:2::1/64 | R2 |
+# +-------------------|----------------------+
+# |
+# +-------------------|-----+
+# | | |
+# | $h2 + |
+# | 198.51.100.2/24 |
+# | 2001:db8:2::2/64 H2 |
+# +-------------------------+
+
ALL_TESTS="
ping_ipv4
ping_ipv6
diff --git a/tools/testing/selftests/net/forwarding/router_nh.sh b/tools/testing/selftests/net/forwarding/router_nh.sh
index f3a53738bd..92904b01ea 100755
--- a/tools/testing/selftests/net/forwarding/router_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_nh.sh
@@ -1,6 +1,20 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# +-------------------------+ +-------------------------+
+# | H1 | | H2 |
+# | $h1 + | | $h2 + |
+# | 192.0.2.2/24 | | | 198.51.100.2/24 | |
+# | 2001:db8:1::2/64 | | | 2001:db8:2::2/64 | |
+# +-------------------|-----+ +-------------------|-----+
+# | |
+# +-------------------|----------------------------|-----+
+# | R1 | | |
+# | $rp1 + $rp2 + |
+# | 192.0.2.1/24 198.51.100.1/24 |
+# | 2001:db8:1::1/64 2001:db8:2::1/64 |
+# +------------------------------------------------------+
+
ALL_TESTS="
ping_ipv4
ping_ipv6
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
index cdf689e994..f9d26a7911 100644
--- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
@@ -199,25 +199,28 @@ ets_set_dwrr_two_bands()
ets_test_strict()
{
ets_set_strict
- ets_dwrr_test_01
- ets_dwrr_test_12
+ xfail_on_slow ets_dwrr_test_01
+ xfail_on_slow ets_dwrr_test_12
}
ets_test_mixed()
{
ets_set_mixed
- ets_dwrr_test_01
- ets_dwrr_test_12
+ xfail_on_slow ets_dwrr_test_01
+ xfail_on_slow ets_dwrr_test_12
}
ets_test_dwrr()
{
ets_set_dwrr_uniform
- ets_dwrr_test_012
+ xfail_on_slow ets_dwrr_test_012
+
ets_set_dwrr_varying
- ets_dwrr_test_012
+ xfail_on_slow ets_dwrr_test_012
+
ets_change_quantum
- ets_dwrr_test_012
+ xfail_on_slow ets_dwrr_test_012
+
ets_set_dwrr_two_bands
- ets_dwrr_test_01
+ xfail_on_slow ets_dwrr_test_01
}
diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh
index 81f31179ac..17f2864456 100755
--- a/tools/testing/selftests/net/forwarding/sch_red.sh
+++ b/tools/testing/selftests/net/forwarding/sch_red.sh
@@ -451,35 +451,35 @@ uninstall_qdisc()
ecn_test()
{
install_qdisc ecn
- do_ecn_test $BACKLOG
+ xfail_on_slow do_ecn_test $BACKLOG
uninstall_qdisc
}
ecn_nodrop_test()
{
install_qdisc ecn nodrop
- do_ecn_nodrop_test $BACKLOG
+ xfail_on_slow do_ecn_nodrop_test $BACKLOG
uninstall_qdisc
}
red_test()
{
install_qdisc
- do_red_test $BACKLOG
+ xfail_on_slow do_red_test $BACKLOG
uninstall_qdisc
}
red_qevent_test()
{
install_qdisc qevent early_drop block 10
- do_red_qevent_test $BACKLOG
+ xfail_on_slow do_red_qevent_test $BACKLOG
uninstall_qdisc
}
ecn_qevent_test()
{
install_qdisc ecn qevent mark block 10
- do_ecn_qevent_test $BACKLOG
+ xfail_on_slow do_ecn_qevent_test $BACKLOG
uninstall_qdisc
}
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
index d1f26cb7cd..9cd884d4a5 100644
--- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
@@ -227,7 +227,7 @@ do_tbf_test()
local nr=$(rate $t2 $t3 10)
local nr_pct=$((100 * (nr - er) / er))
((-5 <= nr_pct && nr_pct <= 5))
- check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%."
+ xfail_on_slow check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%."
log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit"
}
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
index bce8bb8d2b..2e3326edfa 100644
--- a/tools/testing/selftests/net/forwarding/tc_common.sh
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -4,7 +4,7 @@
CHECK_TC="yes"
# Can be overridden by the configuration file. See lib.sh
-TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms
+: "${TC_HIT_TIMEOUT:=1000}" # ms
tc_check_packets()
{
diff --git a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
index 5a5dd90348..79775b10b9 100755
--- a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
+++ b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
@@ -1,7 +1,5 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
ALL_TESTS="tunnel_key_nofrag_test"
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
index 6038b96ece..b2184847e3 100644
--- a/tools/testing/selftests/net/gro.c
+++ b/tools/testing/selftests/net/gro.c
@@ -93,6 +93,7 @@ static bool tx_socket = true;
static int tcp_offset = -1;
static int total_hdr_len = -1;
static int ethhdr_proto = -1;
+static const int num_flush_id_cases = 6;
static void vlog(const char *fmt, ...)
{
@@ -620,6 +621,113 @@ static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext
iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
}
+static void fix_ip4_checksum(struct iphdr *iph)
+{
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+}
+
+static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
+{
+ static char buf1[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf2[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf3[MAX_HDR_LEN + PAYLOAD_LEN];
+ bool send_three = false;
+ struct iphdr *iph1;
+ struct iphdr *iph2;
+ struct iphdr *iph3;
+
+ iph1 = (struct iphdr *)(buf1 + ETH_HLEN);
+ iph2 = (struct iphdr *)(buf2 + ETH_HLEN);
+ iph3 = (struct iphdr *)(buf3 + ETH_HLEN);
+
+ create_packet(buf1, 0, 0, PAYLOAD_LEN, 0);
+ create_packet(buf2, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+
+ switch (tcase) {
+ case 0: /* DF=1, Incrementing - should coalesce */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(9);
+ break;
+
+ case 1: /* DF=1, Fixed - should coalesce */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(8);
+ break;
+
+ case 2: /* DF=0, Incrementing - should coalesce */
+ iph1->frag_off &= ~htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off &= ~htons(IP_DF);
+ iph2->id = htons(9);
+ break;
+
+ case 3: /* DF=0, Fixed - should not coalesce */
+ iph1->frag_off &= ~htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off &= ~htons(IP_DF);
+ iph2->id = htons(8);
+ break;
+
+ case 4: /* DF=1, two packets incrementing, and one fixed - should
+ * coalesce only the first two packets
+ */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(9);
+
+ iph3->frag_off |= htons(IP_DF);
+ iph3->id = htons(9);
+ send_three = true;
+ break;
+
+ case 5: /* DF=1, two packets fixed, and one incrementing - should
+ * coalesce only the first two packets
+ */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(8);
+
+ iph3->frag_off |= htons(IP_DF);
+ iph3->id = htons(9);
+ send_three = true;
+ break;
+ }
+
+ fix_ip4_checksum(iph1);
+ fix_ip4_checksum(iph2);
+ write_packet(fd, buf1, total_hdr_len + PAYLOAD_LEN, daddr);
+ write_packet(fd, buf2, total_hdr_len + PAYLOAD_LEN, daddr);
+
+ if (send_three) {
+ fix_ip4_checksum(iph3);
+ write_packet(fd, buf3, total_hdr_len + PAYLOAD_LEN, daddr);
+ }
+}
+
+static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt)
+{
+ for (int i = 0; i < num_flush_id_cases; i++) {
+ sleep(1);
+ send_flush_id_case(fd, daddr, i);
+ sleep(1);
+ write_packet(fd, fin_pkt, total_hdr_len, daddr);
+ }
+}
+
static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
{
static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
@@ -938,6 +1046,8 @@ static void gro_sender(void)
send_fragment4(txfd, &daddr);
sleep(1);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ test_flush_id(txfd, &daddr, fin_pkt);
} else if (proto == PF_INET6) {
sleep(1);
send_fragment6(txfd, &daddr);
@@ -1064,6 +1174,34 @@ static void gro_receiver(void)
printf("fragmented ip4 doesn't coalesce: ");
check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* is_atomic checks */
+ printf("DF=1, Incrementing - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("DF=1, Fixed - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("DF=0, Incrementing - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("DF=0, Fixed - should not coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
} else if (proto == PF_INET6) {
/* GRO doesn't check for ipv6 hop limit when flushing.
* Hence no corresponding test to the ipv4 case.
diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile
index 92c1d9d080..884cd2cc06 100644
--- a/tools/testing/selftests/net/hsr/Makefile
+++ b/tools/testing/selftests/net/hsr/Makefile
@@ -2,6 +2,7 @@
top_srcdir = ../../../../..
-TEST_PROGS := hsr_ping.sh
+TEST_PROGS := hsr_ping.sh hsr_redbox.sh
+TEST_FILES += hsr_common.sh
include ../../lib.mk
diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config
index 22061204fb..241542441c 100644
--- a/tools/testing/selftests/net/hsr/config
+++ b/tools/testing/selftests/net/hsr/config
@@ -2,3 +2,4 @@ CONFIG_IPV6=y
CONFIG_NET_SCH_NETEM=m
CONFIG_HSR=y
CONFIG_VETH=y
+CONFIG_BRIDGE=y
diff --git a/tools/testing/selftests/net/hsr/hsr_common.sh b/tools/testing/selftests/net/hsr/hsr_common.sh
new file mode 100644
index 0000000000..8e97b1f2e7
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/hsr_common.sh
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: GPL-2.0
+# Common code for HSR testing scripts
+
+source ../lib.sh
+ret=0
+ksft_skip=4
+
+# $1: IP address
+is_v6()
+{
+ [ -z "${1##*:*}" ]
+}
+
+do_ping()
+{
+ local netns="$1"
+ local connect_addr="$2"
+ local ping_args="-q -c 2"
+
+ if is_v6 "${connect_addr}"; then
+ $ipv6 || return 0
+ ping_args="${ping_args} -6"
+ fi
+
+ ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null
+ if [ $? -ne 0 ] ; then
+ echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+do_ping_long()
+{
+ local netns="$1"
+ local connect_addr="$2"
+ local ping_args="-q -c 10"
+
+ if is_v6 "${connect_addr}"; then
+ $ipv6 || return 0
+ ping_args="${ping_args} -6"
+ fi
+
+ OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)"
+ if [ $? -ne 0 ] ; then
+ echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2
+ ret=1
+ return 1
+ fi
+
+ VAL="$(echo $OUT | cut -d' ' -f1-8)"
+ SED_VAL="$(echo ${VAL} | sed -r -e 's/([0-9]{2}).*([0-9]{2}).*[[:space:]]([0-9]+%).*/\1 transmitted \2 received \3 loss/')"
+ if [ "${SED_VAL}" != "10 transmitted 10 received 0% loss" ]
+ then
+ echo "$netns -> $connect_addr ping TEST [ FAIL ]"
+ echo "Expect to send and receive 10 packets and no duplicates."
+ echo "Full message: ${OUT}."
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+stop_if_error()
+{
+ local msg="$1"
+
+ if [ ${ret} -ne 0 ]; then
+ echo "FAIL: ${msg}" 1>&2
+ exit ${ret}
+ fi
+}
+
+check_prerequisites()
+{
+ ip -Version > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+ fi
+}
diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
index 1c6457e546..3684b813b0 100755
--- a/tools/testing/selftests/net/hsr/hsr_ping.sh
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -1,10 +1,10 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ret=0
-ksft_skip=4
ipv6=true
+source ./hsr_common.sh
+
optstring="h4"
usage() {
echo "Usage: $0 [OPTION]"
@@ -27,88 +27,6 @@ while getopts "$optstring" option;do
esac
done
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
-ns2="ns2-$rndh"
-ns3="ns3-$rndh"
-
-cleanup()
-{
- local netns
- for netns in "$ns1" "$ns2" "$ns3" ;do
- ip netns del $netns
- done
-}
-
-# $1: IP address
-is_v6()
-{
- [ -z "${1##*:*}" ]
-}
-
-do_ping()
-{
- local netns="$1"
- local connect_addr="$2"
- local ping_args="-q -c 2"
-
- if is_v6 "${connect_addr}"; then
- $ipv6 || return 0
- ping_args="${ping_args} -6"
- fi
-
- ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null
- if [ $? -ne 0 ] ; then
- echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2
- ret=1
- return 1
- fi
-
- return 0
-}
-
-do_ping_long()
-{
- local netns="$1"
- local connect_addr="$2"
- local ping_args="-q -c 10"
-
- if is_v6 "${connect_addr}"; then
- $ipv6 || return 0
- ping_args="${ping_args} -6"
- fi
-
- OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)"
- if [ $? -ne 0 ] ; then
- echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2
- ret=1
- return 1
- fi
-
- VAL="$(echo $OUT | cut -d' ' -f1-8)"
- if [ "$VAL" != "10 packets transmitted, 10 received, 0% packet loss," ]
- then
- echo "$netns -> $connect_addr ping TEST [ FAIL ]"
- echo "Expect to send and receive 10 packets and no duplicates."
- echo "Full message: ${OUT}."
- ret=1
- return 1
- fi
-
- return 0
-}
-
-stop_if_error()
-{
- local msg="$1"
-
- if [ ${ret} -ne 0 ]; then
- echo "FAIL: ${msg}" 1>&2
- exit ${ret}
- fi
-}
-
do_complete_ping_test()
{
echo "INFO: Initial validation ping."
@@ -248,27 +166,15 @@ setup_hsr_interfaces()
ip -net "$ns3" link set hsr3 up
}
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
+check_prerequisites
+setup_ns ns1 ns2 ns3
-trap cleanup EXIT
-
-for i in "$ns1" "$ns2" "$ns3" ;do
- ip netns add $i || exit $ksft_skip
- ip -net $i link set lo up
-done
+trap cleanup_all_ns EXIT
setup_hsr_interfaces 0
do_complete_ping_test
-cleanup
-for i in "$ns1" "$ns2" "$ns3" ;do
- ip netns add $i || exit $ksft_skip
- ip -net $i link set lo up
-done
+setup_ns ns1 ns2 ns3
setup_hsr_interfaces 1
do_complete_ping_test
diff --git a/tools/testing/selftests/net/hsr/hsr_redbox.sh b/tools/testing/selftests/net/hsr/hsr_redbox.sh
new file mode 100755
index 0000000000..1f36785347
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/hsr_redbox.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ipv6=false
+
+source ./hsr_common.sh
+
+do_complete_ping_test()
+{
+ echo "INFO: Initial validation ping (HSR-SAN/RedBox)."
+ # Each node has to be able to reach each one.
+ do_ping "${ns1}" 100.64.0.2
+ do_ping "${ns2}" 100.64.0.1
+ # Ping between SANs (test bridge)
+ do_ping "${ns4}" 100.64.0.51
+ do_ping "${ns5}" 100.64.0.41
+ # Ping from SANs to hsr1 (via hsr2) (and opposite)
+ do_ping "${ns3}" 100.64.0.1
+ do_ping "${ns1}" 100.64.0.3
+ do_ping "${ns1}" 100.64.0.41
+ do_ping "${ns4}" 100.64.0.1
+ do_ping "${ns1}" 100.64.0.51
+ do_ping "${ns5}" 100.64.0.1
+ stop_if_error "Initial validation failed."
+
+ # Wait for MGNT HSR frames being received and nodes being
+ # merged.
+ sleep 5
+
+ echo "INFO: Longer ping test (HSR-SAN/RedBox)."
+ # Ping from SAN to hsr1 (via hsr2)
+ do_ping_long "${ns3}" 100.64.0.1
+ # Ping from hsr1 (via hsr2) to SANs (and opposite)
+ do_ping_long "${ns1}" 100.64.0.3
+ do_ping_long "${ns1}" 100.64.0.41
+ do_ping_long "${ns4}" 100.64.0.1
+ do_ping_long "${ns1}" 100.64.0.51
+ do_ping_long "${ns5}" 100.64.0.1
+ stop_if_error "Longer ping test failed."
+
+ echo "INFO: All good."
+}
+
+setup_hsr_interfaces()
+{
+ local HSRv="$1"
+
+ echo "INFO: preparing interfaces for HSRv${HSRv} (HSR-SAN/RedBox)."
+#
+# IPv4 addresses (100.64.X.Y/24), and [X.Y] is presented on below diagram:
+#
+#
+# |NS1 | |NS4 |
+# | [0.1] | | |
+# | /-- hsr1 --\ | | [0.41] |
+# | ns1eth1 ns1eth2 | | ns4eth1 (SAN) |
+# |------------------------| |-------------------|
+# | | |
+# | | |
+# | | |
+# |------------------------| |-------------------------------|
+# | ns2eth1 ns2eth2 | | ns3eth2 |
+# | \-- hsr2 --/ | | / |
+# | [0.2] \ | | / | |------------|
+# | ns2eth3 |---| ns3eth1 -- ns3br1 -- ns3eth3--|--| ns5eth1 |
+# | (interlink)| | [0.3] [0.11] | | [0.51] |
+# |NS2 (RedBOX) | |NS3 (BR) | | NS5 (SAN) |
+#
+#
+ # Check if iproute2 supports adding interlink port to hsrX device
+ ip link help hsr | grep -q INTERLINK
+ [ $? -ne 0 ] && { echo "iproute2: HSR interlink interface not supported!"; exit 0; }
+
+ # Create interfaces for name spaces
+ ip link add ns1eth1 netns "${ns1}" type veth peer name ns2eth1 netns "${ns2}"
+ ip link add ns1eth2 netns "${ns1}" type veth peer name ns2eth2 netns "${ns2}"
+ ip link add ns2eth3 netns "${ns2}" type veth peer name ns3eth1 netns "${ns3}"
+ ip link add ns3eth2 netns "${ns3}" type veth peer name ns4eth1 netns "${ns4}"
+ ip link add ns3eth3 netns "${ns3}" type veth peer name ns5eth1 netns "${ns5}"
+
+ sleep 1
+
+ ip -n "${ns1}" link set ns1eth1 up
+ ip -n "${ns1}" link set ns1eth2 up
+
+ ip -n "${ns2}" link set ns2eth1 up
+ ip -n "${ns2}" link set ns2eth2 up
+ ip -n "${ns2}" link set ns2eth3 up
+
+ ip -n "${ns3}" link add name ns3br1 type bridge
+ ip -n "${ns3}" link set ns3br1 up
+ ip -n "${ns3}" link set ns3eth1 master ns3br1 up
+ ip -n "${ns3}" link set ns3eth2 master ns3br1 up
+ ip -n "${ns3}" link set ns3eth3 master ns3br1 up
+
+ ip -n "${ns4}" link set ns4eth1 up
+ ip -n "${ns5}" link set ns5eth1 up
+
+ ip -net "${ns1}" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version ${HSRv} proto 0
+ ip -net "${ns2}" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 interlink ns2eth3 supervision 45 version ${HSRv} proto 0
+
+ ip -n "${ns1}" addr add 100.64.0.1/24 dev hsr1
+ ip -n "${ns2}" addr add 100.64.0.2/24 dev hsr2
+ ip -n "${ns3}" addr add 100.64.0.11/24 dev ns3br1
+ ip -n "${ns3}" addr add 100.64.0.3/24 dev ns3eth1
+ ip -n "${ns4}" addr add 100.64.0.41/24 dev ns4eth1
+ ip -n "${ns5}" addr add 100.64.0.51/24 dev ns5eth1
+
+ ip -n "${ns1}" link set hsr1 up
+ ip -n "${ns2}" link set hsr2 up
+}
+
+check_prerequisites
+setup_ns ns1 ns2 ns3 ns4 ns5
+
+trap cleanup_all_ns EXIT
+
+setup_hsr_interfaces 1
+do_complete_ping_test
+
+exit $ret
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 16372ca167..9155c914c0 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -4,11 +4,16 @@
##############################################################################
# Defines
-WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
+: "${WAIT_TIMEOUT:=20}"
+
BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
-# Kselftest framework requirement - SKIP code is 4.
+# Kselftest framework constants.
+ksft_pass=0
+ksft_fail=1
+ksft_xfail=2
ksft_skip=4
+
# namespace list created by setup_ns
NS_LIST=()
diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore
new file mode 100644
index 0000000000..1ebc6187f4
--- /dev/null
+++ b/tools/testing/selftests/net/lib/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+csum
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
new file mode 100644
index 0000000000..82c3264b11
--- /dev/null
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES)
+# Additional include paths needed by kselftest.h
+CFLAGS += -I../../
+
+TEST_FILES := ../../../../../Documentation/netlink/specs
+TEST_FILES += ../../../../net/ynl
+
+TEST_GEN_FILES += csum
+
+TEST_INCLUDES := $(wildcard py/*.py)
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/lib/csum.c
index 90eb06fefa..b9f3fc3c34 100644
--- a/tools/testing/selftests/net/csum.c
+++ b/tools/testing/selftests/net/lib/csum.c
@@ -682,7 +682,7 @@ static int recv_verify_packet_ipv6(void *nh, int len)
}
/* return whether auxdata includes TP_STATUS_CSUM_VALID */
-static bool recv_verify_packet_csum(struct msghdr *msg)
+static uint32_t recv_get_packet_csum_status(struct msghdr *msg)
{
struct tpacket_auxdata *aux = NULL;
struct cmsghdr *cm;
@@ -706,7 +706,7 @@ static bool recv_verify_packet_csum(struct msghdr *msg)
if (!aux)
error(1, 0, "cmsg: no auxdata");
- return aux->tp_status & TP_STATUS_CSUM_VALID;
+ return aux->tp_status;
}
static int recv_packet(int fd)
@@ -716,6 +716,7 @@ static int recv_packet(int fd)
char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
struct pkt *buf = (void *)_buf;
struct msghdr msg = {0};
+ uint32_t tp_status;
struct iovec iov;
int len, ret;
@@ -737,6 +738,17 @@ static int recv_packet(int fd)
if (len == -1)
error(1, errno, "recv p");
+ tp_status = recv_get_packet_csum_status(&msg);
+
+ /* GRO might coalesce randomized packets. Such GSO packets are
+ * then reinitialized for csum offload (CHECKSUM_PARTIAL), with
+ * a pseudo csum. Do not try to validate these checksums.
+ */
+ if (tp_status & TP_STATUS_CSUMNOTREADY) {
+ fprintf(stderr, "cmsg: GSO packet has partial csum: skip\n");
+ continue;
+ }
+
if (cfg_family == PF_INET6)
ret = recv_verify_packet_ipv6(buf, len);
else
@@ -753,7 +765,7 @@ static int recv_packet(int fd)
* Do not fail if kernel does not validate a good csum:
* Absence of validation does not imply invalid.
*/
- if (recv_verify_packet_csum(&msg) && cfg_bad_csum) {
+ if (tp_status & TP_STATUS_CSUM_VALID && cfg_bad_csum) {
fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n");
bad_validations++;
}
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
new file mode 100644
index 0000000000..b6d498d125
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from .consts import KSRC
+from .ksft import *
+from .netns import NetNS
+from .nsim import *
+from .utils import *
+from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily
diff --git a/tools/testing/selftests/net/lib/py/consts.py b/tools/testing/selftests/net/lib/py/consts.py
new file mode 100644
index 0000000000..f518ce79d8
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/consts.py
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import sys
+from pathlib import Path
+
+KSFT_DIR = (Path(__file__).parent / "../../..").resolve()
+KSRC = (Path(__file__).parent / "../../../../../..").resolve()
+
+KSFT_MAIN_NAME = Path(sys.argv[0]).with_suffix("").name
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
new file mode 100644
index 0000000000..4769b4eb1e
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -0,0 +1,159 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import builtins
+import inspect
+import sys
+import time
+import traceback
+from .consts import KSFT_MAIN_NAME
+
+KSFT_RESULT = None
+KSFT_RESULT_ALL = True
+
+
+class KsftFailEx(Exception):
+ pass
+
+
+class KsftSkipEx(Exception):
+ pass
+
+
+class KsftXfailEx(Exception):
+ pass
+
+
+def ksft_pr(*objs, **kwargs):
+ print("#", *objs, **kwargs)
+
+
+def _fail(*args):
+ global KSFT_RESULT
+ KSFT_RESULT = False
+
+ frame = inspect.stack()[2]
+ ksft_pr("At " + frame.filename + " line " + str(frame.lineno) + ":")
+ ksft_pr(*args)
+
+
+def ksft_eq(a, b, comment=""):
+ global KSFT_RESULT
+ if a != b:
+ _fail("Check failed", a, "!=", b, comment)
+
+
+def ksft_true(a, comment=""):
+ if not a:
+ _fail("Check failed", a, "does not eval to True", comment)
+
+
+def ksft_in(a, b, comment=""):
+ if a not in b:
+ _fail("Check failed", a, "not in", b, comment)
+
+
+def ksft_ge(a, b, comment=""):
+ if a < b:
+ _fail("Check failed", a, "<", b, comment)
+
+
+class ksft_raises:
+ def __init__(self, expected_type):
+ self.exception = None
+ self.expected_type = expected_type
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ if exc_type is None:
+ _fail(f"Expected exception {str(self.expected_type.__name__)}, none raised")
+ elif self.expected_type != exc_type:
+ _fail(f"Expected exception {str(self.expected_type.__name__)}, raised {str(exc_type.__name__)}")
+ self.exception = exc_val
+ # Suppress the exception if its the expected one
+ return self.expected_type == exc_type
+
+
+def ksft_busy_wait(cond, sleep=0.005, deadline=1, comment=""):
+ end = time.monotonic() + deadline
+ while True:
+ if cond():
+ return
+ if time.monotonic() > end:
+ _fail("Waiting for condition timed out", comment)
+ return
+ time.sleep(sleep)
+
+
+def ktap_result(ok, cnt=1, case="", comment=""):
+ global KSFT_RESULT_ALL
+ KSFT_RESULT_ALL = KSFT_RESULT_ALL and ok
+
+ res = ""
+ if not ok:
+ res += "not "
+ res += "ok "
+ res += str(cnt) + " "
+ res += KSFT_MAIN_NAME
+ if case:
+ res += "." + str(case.__name__)
+ if comment:
+ res += " # " + comment
+ print(res)
+
+
+def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
+ cases = cases or []
+
+ if globs and case_pfx:
+ for key, value in globs.items():
+ if not callable(value):
+ continue
+ for prefix in case_pfx:
+ if key.startswith(prefix):
+ cases.append(value)
+ break
+
+ totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
+
+ print("KTAP version 1")
+ print("1.." + str(len(cases)))
+
+ global KSFT_RESULT
+ cnt = 0
+ for case in cases:
+ KSFT_RESULT = True
+ cnt += 1
+ try:
+ case(*args)
+ except KsftSkipEx as e:
+ ktap_result(True, cnt, case, comment="SKIP " + str(e))
+ totals['skip'] += 1
+ continue
+ except KsftXfailEx as e:
+ ktap_result(True, cnt, case, comment="XFAIL " + str(e))
+ totals['xfail'] += 1
+ continue
+ except Exception as e:
+ tb = traceback.format_exc()
+ for line in tb.strip().split('\n'):
+ ksft_pr("Exception|", line)
+ ktap_result(False, cnt, case)
+ totals['fail'] += 1
+ continue
+
+ ktap_result(KSFT_RESULT, cnt, case)
+ if KSFT_RESULT:
+ totals['pass'] += 1
+ else:
+ totals['fail'] += 1
+
+ print(
+ f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0"
+ )
+
+
+def ksft_exit():
+ global KSFT_RESULT_ALL
+ sys.exit(0 if KSFT_RESULT_ALL else 1)
diff --git a/tools/testing/selftests/net/lib/py/netns.py b/tools/testing/selftests/net/lib/py/netns.py
new file mode 100644
index 0000000000..ecff85f907
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/netns.py
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from .utils import ip
+import random
+import string
+
+
+class NetNS:
+ def __init__(self, name=None):
+ if name:
+ self.name = name
+ else:
+ self.name = ''.join(random.choice(string.ascii_lowercase) for _ in range(8))
+ ip('netns add ' + self.name)
+
+ def __del__(self):
+ if self.name:
+ ip('netns del ' + self.name)
+ self.name = None
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ self.__del__()
+
+ def __str__(self):
+ return self.name
+
+ def __repr__(self):
+ return f"NetNS({self.name})"
diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py
new file mode 100644
index 0000000000..f571a8b313
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/nsim.py
@@ -0,0 +1,134 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import json
+import os
+import random
+import re
+import time
+from .utils import cmd, ip
+
+
+class NetdevSim:
+ """
+ Class for netdevsim netdevice and its attributes.
+ """
+
+ def __init__(self, nsimdev, port_index, ifname, ns=None):
+ # In case udev renamed the netdev to according to new schema,
+ # check if the name matches the port_index.
+ nsimnamere = re.compile(r"eni\d+np(\d+)")
+ match = nsimnamere.match(ifname)
+ if match and int(match.groups()[0]) != port_index + 1:
+ raise Exception("netdevice name mismatches the expected one")
+
+ self.ifname = ifname
+ self.nsimdev = nsimdev
+ self.port_index = port_index
+ self.ns = ns
+ self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index)
+ ret = ip("-j link show dev %s" % ifname, ns=ns)
+ self.dev = json.loads(ret.stdout)[0]
+ self.ifindex = self.dev["ifindex"]
+
+ def dfs_write(self, path, val):
+ self.nsimdev.dfs_write(f'ports/{self.port_index}/' + path, val)
+
+
+class NetdevSimDev:
+ """
+ Class for netdevsim bus device and its attributes.
+ """
+ @staticmethod
+ def ctrl_write(path, val):
+ fullpath = os.path.join("/sys/bus/netdevsim/", path)
+ with open(fullpath, "w") as f:
+ f.write(val)
+
+ def dfs_write(self, path, val):
+ fullpath = os.path.join(f"/sys/kernel/debug/netdevsim/netdevsim{self.addr}/", path)
+ with open(fullpath, "w") as f:
+ f.write(val)
+
+ def __init__(self, port_count=1, queue_count=1, ns=None):
+ # nsim will spawn in init_net, we'll set to actual ns once we switch it there
+ self.ns = None
+
+ if not os.path.exists("/sys/bus/netdevsim"):
+ cmd("modprobe netdevsim")
+
+ addr = random.randrange(1 << 15)
+ while True:
+ try:
+ self.ctrl_write("new_device", "%u %u %u" % (addr, port_count, queue_count))
+ except OSError as e:
+ if e.errno == errno.ENOSPC:
+ addr = random.randrange(1 << 15)
+ continue
+ raise e
+ break
+ self.addr = addr
+
+ # As probe of netdevsim device might happen from a workqueue,
+ # so wait here until all netdevs appear.
+ self.wait_for_netdevs(port_count)
+
+ if ns:
+ cmd(f"devlink dev reload netdevsim/netdevsim{addr} netns {ns.name}")
+ self.ns = ns
+
+ cmd("udevadm settle", ns=self.ns)
+ ifnames = self.get_ifnames()
+
+ self.dfs_dir = "/sys/kernel/debug/netdevsim/netdevsim%u/" % addr
+
+ self.nsims = []
+ for port_index in range(port_count):
+ self.nsims.append(self._make_port(port_index, ifnames[port_index]))
+
+ self.removed = False
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ """
+ __exit__ gets called at the end of a "with" block.
+ """
+ self.remove()
+
+ def _make_port(self, port_index, ifname):
+ return NetdevSim(self, port_index, ifname, self.ns)
+
+ def get_ifnames(self):
+ ifnames = []
+ listdir = cmd(f"ls /sys/bus/netdevsim/devices/netdevsim{self.addr}/net/",
+ ns=self.ns).stdout.split()
+ for ifname in listdir:
+ ifnames.append(ifname)
+ ifnames.sort()
+ return ifnames
+
+ def wait_for_netdevs(self, port_count):
+ timeout = 5
+ timeout_start = time.time()
+
+ while True:
+ try:
+ ifnames = self.get_ifnames()
+ except FileNotFoundError as e:
+ ifnames = []
+ if len(ifnames) == port_count:
+ break
+ if time.time() < timeout_start + timeout:
+ continue
+ raise Exception("netdevices did not appear within timeout")
+
+ def remove(self):
+ if not self.removed:
+ self.ctrl_write("del_device", "%u" % (self.addr, ))
+ self.removed = True
+
+ def remove_nsim(self, nsim):
+ self.nsims.remove(nsim)
+ self.ctrl_write("devices/netdevsim%u/del_port" % (self.addr, ),
+ "%u" % (nsim.port_index, ))
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
new file mode 100644
index 0000000000..0540ea2492
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import json as _json
+import random
+import re
+import subprocess
+import time
+
+
+class cmd:
+ def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None, timeout=5):
+ if ns:
+ comm = f'ip netns exec {ns} ' + comm
+
+ self.stdout = None
+ self.stderr = None
+ self.ret = None
+
+ self.comm = comm
+ if host:
+ self.proc = host.cmd(comm)
+ else:
+ self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ if not background:
+ self.process(terminate=False, fail=fail, timeout=timeout)
+
+ def process(self, terminate=True, fail=None, timeout=5):
+ if fail is None:
+ fail = not terminate
+
+ if terminate:
+ self.proc.terminate()
+ stdout, stderr = self.proc.communicate(timeout)
+ self.stdout = stdout.decode("utf-8")
+ self.stderr = stderr.decode("utf-8")
+ self.proc.stdout.close()
+ self.proc.stderr.close()
+ self.ret = self.proc.returncode
+
+ if self.proc.returncode != 0 and fail:
+ if len(stderr) > 0 and stderr[-1] == "\n":
+ stderr = stderr[:-1]
+ raise Exception("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" %
+ (self.proc.args, stdout, stderr))
+
+
+class bkg(cmd):
+ def __init__(self, comm, shell=True, fail=None, ns=None, host=None,
+ exit_wait=False):
+ super().__init__(comm, background=True,
+ shell=shell, fail=fail, ns=ns, host=host)
+ self.terminate = not exit_wait
+ self.check_fail = fail
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ return self.process(terminate=self.terminate, fail=self.check_fail)
+
+
+def tool(name, args, json=None, ns=None, host=None):
+ cmd_str = name + ' '
+ if json:
+ cmd_str += '--json '
+ cmd_str += args
+ cmd_obj = cmd(cmd_str, ns=ns, host=host)
+ if json:
+ return _json.loads(cmd_obj.stdout)
+ return cmd_obj
+
+
+def ip(args, json=None, ns=None, host=None):
+ if ns:
+ args = f'-netns {ns} ' + args
+ return tool('ip', args, json=json, host=host)
+
+
+def rand_port():
+ """
+ Get unprivileged port, for now just random, one day we may decide to check if used.
+ """
+ return random.randint(10000, 65535)
+
+
+def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5):
+ end = time.monotonic() + deadline
+
+ pattern = f":{port:04X} .* "
+ if proto == "tcp": # for tcp protocol additionally check the socket state
+ pattern += "0A"
+ pattern = re.compile(pattern)
+
+ while True:
+ data = cmd(f'cat /proc/net/{proto}*', ns=ns, host=host, shell=True).stdout
+ for row in data.split("\n"):
+ if pattern.search(row):
+ return
+ if time.monotonic() > end:
+ raise Exception("Waiting for port listen timed out")
+ time.sleep(sleep)
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
new file mode 100644
index 0000000000..1ace58370c
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import sys
+from pathlib import Path
+from .consts import KSRC, KSFT_DIR
+from .ksft import ksft_pr, ktap_result
+
+# Resolve paths
+try:
+ if (KSFT_DIR / "kselftest-list.txt").exists():
+ # Running in "installed" selftests
+ tools_full_path = KSFT_DIR
+ SPEC_PATH = KSFT_DIR / "net/lib/specs"
+
+ sys.path.append(tools_full_path.as_posix())
+ from net.lib.ynl.lib import YnlFamily, NlError
+ else:
+ # Running in tree
+ tools_full_path = KSRC / "tools"
+ SPEC_PATH = KSRC / "Documentation/netlink/specs"
+
+ sys.path.append(tools_full_path.as_posix())
+ from net.ynl.lib import YnlFamily, NlError
+except ModuleNotFoundError as e:
+ ksft_pr("Failed importing `ynl` library from kernel sources")
+ ksft_pr(str(e))
+ ktap_result(True, comment="SKIP")
+ sys.exit(4)
+
+#
+# Wrapper classes, loading the right specs
+# Set schema='' to avoid jsonschema validation, it's slow
+#
+class EthtoolFamily(YnlFamily):
+ def __init__(self):
+ super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(),
+ schema='')
+
+
+class RtnlFamily(YnlFamily):
+ def __init__(self):
+ super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(),
+ schema='')
+
+
+class NetdevFamily(YnlFamily):
+ def __init__(self):
+ super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(),
+ schema='')
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index bc97ab33a0..776d43a692 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -200,6 +200,58 @@ chk_msk_cestab()
"${expected}" "${msg}" ""
}
+msk_info_get_value()
+{
+ local port="${1}"
+ local info="${2}"
+
+ ss -N "${ns}" -inHM dport "${port}" | \
+ mptcp_lib_get_info_value "${info}" "${info}"
+}
+
+chk_msk_info()
+{
+ local port="${1}"
+ local info="${2}"
+ local cnt="${3}"
+ local msg="....chk ${info}"
+ local delta_ms=250 # half what we waited before, just to be sure
+ local now
+
+ now=$(msk_info_get_value "${port}" "${info}")
+
+ mptcp_lib_print_title "${msg}"
+ if { [ -z "${cnt}" ] || [ -z "${now}" ]; } &&
+ ! mptcp_lib_expect_all_features; then
+ mptcp_lib_pr_skip "Feature probably not supported"
+ mptcp_lib_result_skip "${msg}"
+ elif [ "$((cnt + delta_ms))" -lt "${now}" ]; then
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "${msg}"
+ else
+ mptcp_lib_pr_fail "value of ${info} changed by $((now - cnt))ms," \
+ "expected at least ${delta_ms}ms"
+ mptcp_lib_result_fail "${msg}"
+ ret=${KSFT_FAIL}
+ fi
+}
+
+chk_last_time_info()
+{
+ local port="${1}"
+ local data_sent data_recv ack_recv
+
+ data_sent=$(msk_info_get_value "${port}" "last_data_sent")
+ data_recv=$(msk_info_get_value "${port}" "last_data_recv")
+ ack_recv=$(msk_info_get_value "${port}" "last_ack_recv")
+
+ sleep 0.5 # wait to check after if the timestamps difference
+
+ chk_msk_info "${port}" "last_data_sent" "${data_sent}"
+ chk_msk_info "${port}" "last_data_recv" "${data_recv}"
+ chk_msk_info "${port}" "last_ack_recv" "${ack_recv}"
+}
+
wait_connected()
{
local listener_ns="${1}"
@@ -233,6 +285,7 @@ echo "b" | \
127.0.0.1 >/dev/null &
wait_connected $ns 10000
chk_msk_nr 2 "after MPC handshake "
+chk_last_time_info 10000
chk_msk_remote_key_nr 2 "....chk remote_key"
chk_msk_fallback_nr 0 "....chk no fallback"
chk_msk_inuse 2
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 4131f3263a..b77fb7065b 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -147,7 +147,7 @@ cleanup()
mptcp_lib_check_mptcp
mptcp_lib_check_kallsyms
-mptcp_lib_check_tools ip
+mptcp_lib_check_tools ip tc
sin=$(mktemp)
sout=$(mktemp)
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 2290125f6d..108aeeb84e 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -31,7 +31,6 @@ timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
capture=false
checksum=false
-ip_mptcp=0
check_invert=0
validate_checksum=false
init=0
@@ -142,7 +141,7 @@ init() {
mptcp_lib_check_mptcp
mptcp_lib_check_kallsyms
- mptcp_lib_check_tools ip ss "${iptables}" "${ip6tables}"
+ mptcp_lib_check_tools ip tc ss "${iptables}" "${ip6tables}"
sin=$(mktemp)
sout=$(mktemp)
@@ -610,173 +609,65 @@ kill_events_pids()
pm_nl_set_limits()
{
- local ns=$1
- local addrs=$2
- local subflows=$3
-
- if [ $ip_mptcp -eq 1 ]; then
- ip -n $ns mptcp limits set add_addr_accepted $addrs subflows $subflows
- else
- ip netns exec $ns ./pm_nl_ctl limits $addrs $subflows
- fi
+ mptcp_lib_pm_nl_set_limits "${@}"
}
pm_nl_add_endpoint()
{
- local ns=$1
- local addr=$2
- local flags _flags
- local port _port
- local dev _dev
- local id _id
- local nr=2
-
- local p
- for p in "${@}"
- do
- if [ $p = "flags" ]; then
- eval _flags=\$"$nr"
- [ -n "$_flags" ]; flags="flags $_flags"
- fi
- if [ $p = "dev" ]; then
- eval _dev=\$"$nr"
- [ -n "$_dev" ]; dev="dev $_dev"
- fi
- if [ $p = "id" ]; then
- eval _id=\$"$nr"
- [ -n "$_id" ]; id="id $_id"
- fi
- if [ $p = "port" ]; then
- eval _port=\$"$nr"
- [ -n "$_port" ]; port="port $_port"
- fi
-
- nr=$((nr + 1))
- done
-
- if [ $ip_mptcp -eq 1 ]; then
- ip -n $ns mptcp endpoint add $addr ${_flags//","/" "} $dev $id $port
- else
- ip netns exec $ns ./pm_nl_ctl add $addr $flags $dev $id $port
- fi
+ mptcp_lib_pm_nl_add_endpoint "${@}"
}
pm_nl_del_endpoint()
{
- local ns=$1
- local id=$2
- local addr=$3
-
- if [ $ip_mptcp -eq 1 ]; then
- [ $id -ne 0 ] && addr=''
- ip -n $ns mptcp endpoint delete id $id $addr
- else
- ip netns exec $ns ./pm_nl_ctl del $id $addr
- fi
+ mptcp_lib_pm_nl_del_endpoint "${@}"
}
pm_nl_flush_endpoint()
{
- local ns=$1
-
- if [ $ip_mptcp -eq 1 ]; then
- ip -n $ns mptcp endpoint flush
- else
- ip netns exec $ns ./pm_nl_ctl flush
- fi
+ mptcp_lib_pm_nl_flush_endpoint "${@}"
}
pm_nl_show_endpoints()
{
- local ns=$1
-
- if [ $ip_mptcp -eq 1 ]; then
- ip -n $ns mptcp endpoint show
- else
- ip netns exec $ns ./pm_nl_ctl dump
- fi
+ mptcp_lib_pm_nl_show_endpoints "${@}"
}
pm_nl_change_endpoint()
{
- local ns=$1
- local id=$2
- local flags=$3
-
- if [ $ip_mptcp -eq 1 ]; then
- ip -n $ns mptcp endpoint change id $id ${flags//","/" "}
- else
- ip netns exec $ns ./pm_nl_ctl set id $id flags $flags
- fi
+ mptcp_lib_pm_nl_change_endpoint "${@}"
}
pm_nl_check_endpoint()
{
- local line expected_line
local msg="$1"
local ns=$2
local addr=$3
- local _flags=""
- local flags
- local _port
- local port
- local dev
- local _id
- local id
+ local flags dev id port
print_check "${msg}"
shift 3
while [ -n "$1" ]; do
- if [ $1 = "flags" ]; then
- _flags=$2
- [ -n "$_flags" ]; flags="flags $_flags"
- shift
- elif [ $1 = "dev" ]; then
- [ -n "$2" ]; dev="dev $2"
+ case "${1}" in
+ "flags" | "dev" | "id" | "port")
+ eval "${1}"="${2}"
shift
- elif [ $1 = "id" ]; then
- _id=$2
- [ -n "$_id" ]; id="id $_id"
- shift
- elif [ $1 = "port" ]; then
- _port=$2
- [ -n "$_port" ]; port=" port $_port"
- shift
- fi
+ ;;
+ *)
+ ;;
+ esac
shift
done
- if [ -z "$id" ]; then
+ if [ -z "${id}" ]; then
test_fail "bad test - missing endpoint id"
return
fi
- if [ $ip_mptcp -eq 1 ]; then
- # get line and trim trailing whitespace
- line=$(ip -n $ns mptcp endpoint show $id)
- line="${line% }"
- # the dump order is: address id flags port dev
- [ -n "$addr" ] && expected_line="$addr"
- expected_line+=" $id"
- [ -n "$_flags" ] && expected_line+=" ${_flags//","/" "}"
- [ -n "$dev" ] && expected_line+=" $dev"
- [ -n "$port" ] && expected_line+=" $port"
- else
- line=$(ip netns exec $ns ./pm_nl_ctl get $_id)
- # the dump order is: id flags dev address port
- expected_line="$id"
- [ -n "$flags" ] && expected_line+=" $flags"
- [ -n "$dev" ] && expected_line+=" $dev"
- [ -n "$addr" ] && expected_line+=" $addr"
- [ -n "$_port" ] && expected_line+=" $_port"
- fi
- if [ "$line" = "$expected_line" ]; then
- print_ok
- else
- fail_test "expected '$expected_line' found '$line'"
- fi
+ check_output "mptcp_lib_pm_nl_get_endpoint ${ns} ${id}" \
+ "$(mptcp_lib_pm_nl_format_endpoints \
+ "${id},${addr},${flags//","/" "},${dev},${port}")"
}
pm_nl_set_endpoint()
@@ -3711,7 +3602,7 @@ while getopts "${all_tests_args}cCih" opt; do
checksum=true
;;
i)
- ip_mptcp=1
+ mptcp_lib_set_ip_mptcp
;;
h)
usage
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index d529b4b37a..6ffa9b7a32 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -21,8 +21,10 @@ declare -rx MPTCP_LIB_AF_INET6=10
MPTCP_LIB_SUBTESTS=()
MPTCP_LIB_SUBTESTS_DUPLICATED=0
+MPTCP_LIB_SUBTEST_FLAKY=0
MPTCP_LIB_TEST_COUNTER=0
MPTCP_LIB_TEST_FORMAT="%02u %-50s"
+MPTCP_LIB_IP_MPTCP=0
# only if supported (or forced) and not disabled, see no-color.org
if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } &&
@@ -40,6 +42,16 @@ else
readonly MPTCP_LIB_COLOR_RESET=
fi
+# SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY env var can be set not to ignore errors
+# from subtests marked as flaky
+mptcp_lib_override_flaky() {
+ [ "${SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY:-}" = 1 ]
+}
+
+mptcp_lib_subtest_is_flaky() {
+ [ "${MPTCP_LIB_SUBTEST_FLAKY}" = 1 ] && ! mptcp_lib_override_flaky
+}
+
# $1: color, $2: text
mptcp_lib_print_color() {
echo -e "${MPTCP_LIB_START_PRINT:-}${*}${MPTCP_LIB_COLOR_RESET}"
@@ -71,7 +83,16 @@ mptcp_lib_pr_skip() {
}
mptcp_lib_pr_fail() {
- mptcp_lib_print_err "[FAIL]${1:+ ${*}}"
+ local title cmt
+
+ if mptcp_lib_subtest_is_flaky; then
+ title="IGNO"
+ cmt=" (flaky)"
+ else
+ title="FAIL"
+ fi
+
+ mptcp_lib_print_err "[${title}]${cmt}${1:+ ${*}}"
}
mptcp_lib_pr_info() {
@@ -207,7 +228,13 @@ mptcp_lib_result_pass() {
# $1: test name
mptcp_lib_result_fail() {
- __mptcp_lib_result_add "not ok" "${1}"
+ if mptcp_lib_subtest_is_flaky; then
+ # It might sound better to use 'not ok # TODO' or 'ok # SKIP',
+ # but some CIs don't understand 'TODO' and treat SKIP as errors.
+ __mptcp_lib_result_add "ok" "${1} # IGNORE Flaky"
+ else
+ __mptcp_lib_result_add "not ok" "${1}"
+ fi
}
# $1: test name
@@ -384,6 +411,12 @@ mptcp_lib_check_tools() {
exit ${KSFT_SKIP}
fi
;;
+ "tc")
+ if ! tc -help &> /dev/null; then
+ mptcp_lib_pr_skip "Could not run test without tc tool"
+ exit ${KSFT_SKIP}
+ fi
+ ;;
"ss")
if ! ss -h | grep -q MPTCP; then
mptcp_lib_pr_skip "ss tool does not support MPTCP"
@@ -505,3 +538,131 @@ mptcp_lib_verify_listener_events() {
mptcp_lib_check_expected "type" "family" "saddr" "sport" || rc="${?}"
return "${rc}"
}
+
+mptcp_lib_set_ip_mptcp() {
+ MPTCP_LIB_IP_MPTCP=1
+}
+
+mptcp_lib_is_ip_mptcp() {
+ [ "${MPTCP_LIB_IP_MPTCP}" = "1" ]
+}
+
+# format: <id>,<ip>,<flags>,<dev>
+mptcp_lib_pm_nl_format_endpoints() {
+ local entry id ip flags dev port
+
+ for entry in "${@}"; do
+ IFS=, read -r id ip flags dev port <<< "${entry}"
+ if mptcp_lib_is_ip_mptcp; then
+ echo -n "${ip}"
+ [ -n "${port}" ] && echo -n " port ${port}"
+ echo -n " id ${id}"
+ [ -n "${flags}" ] && echo -n " ${flags}"
+ [ -n "${dev}" ] && echo -n " dev ${dev}"
+ echo " " # always a space at the end
+ else
+ echo -n "id ${id}"
+ echo -n " flags ${flags//" "/","}"
+ [ -n "${dev}" ] && echo -n " dev ${dev}"
+ echo -n " ${ip}"
+ [ -n "${port}" ] && echo -n " ${port}"
+ echo ""
+ fi
+ done
+}
+
+mptcp_lib_pm_nl_get_endpoint() {
+ local ns=${1}
+ local id=${2}
+
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns}" mptcp endpoint show id "${id}"
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl get "${id}"
+ fi
+}
+
+mptcp_lib_pm_nl_set_limits() {
+ local ns=${1}
+ local addrs=${2}
+ local subflows=${3}
+
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns}" mptcp limits set add_addr_accepted "${addrs}" subflows "${subflows}"
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl limits "${addrs}" "${subflows}"
+ fi
+}
+
+mptcp_lib_pm_nl_add_endpoint() {
+ local ns=${1}
+ local addr=${2}
+ local flags dev id port
+ local nr=2
+
+ local p
+ for p in "${@}"; do
+ case "${p}" in
+ "flags" | "dev" | "id" | "port")
+ eval "${p}"=\$"${nr}"
+ ;;
+ esac
+
+ nr=$((nr + 1))
+ done
+
+ if mptcp_lib_is_ip_mptcp; then
+ # shellcheck disable=SC2086 # blanks in flags, no double quote
+ ip -n "${ns}" mptcp endpoint add "${addr}" ${flags//","/" "} \
+ ${dev:+dev "${dev}"} ${id:+id "${id}"} ${port:+port "${port}"}
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl add "${addr}" ${flags:+flags "${flags}"} \
+ ${dev:+dev "${dev}"} ${id:+id "${id}"} ${port:+port "${port}"}
+ fi
+}
+
+mptcp_lib_pm_nl_del_endpoint() {
+ local ns=${1}
+ local id=${2}
+ local addr=${3}
+
+ if mptcp_lib_is_ip_mptcp; then
+ [ "${id}" -ne 0 ] && addr=''
+ ip -n "${ns}" mptcp endpoint delete id "${id}" ${addr:+"${addr}"}
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl del "${id}" "${addr}"
+ fi
+}
+
+mptcp_lib_pm_nl_flush_endpoint() {
+ local ns=${1}
+
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns}" mptcp endpoint flush
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl flush
+ fi
+}
+
+mptcp_lib_pm_nl_show_endpoints() {
+ local ns=${1}
+
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns}" mptcp endpoint show
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl dump
+ fi
+}
+
+mptcp_lib_pm_nl_change_endpoint() {
+ local ns=${1}
+ local id=${2}
+ local flags=${3}
+
+ if mptcp_lib_is_ip_mptcp; then
+ # shellcheck disable=SC2086 # blanks in flags, no double quote
+ ip -n "${ns}" mptcp endpoint change id "${id}" ${flags//","/" "}
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl set id "${id}" flags "${flags}"
+ fi
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index e2d70c1878..68899a303a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -22,6 +22,28 @@ ns1=""
ns2=""
ns_sbox=""
+usage() {
+ echo "Usage: $0 [ -i ] [ -h ]"
+ echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'"
+ echo -e "\t-h: help"
+}
+
+while getopts "hi" option;do
+ case "$option" in
+ "h")
+ usage "$0"
+ exit ${KSFT_PASS}
+ ;;
+ "i")
+ mptcp_lib_set_ip_mptcp
+ ;;
+ "?")
+ usage "$0"
+ exit ${KSFT_FAIL}
+ ;;
+ esac
+done
+
add_mark_rules()
{
local ns=$1
@@ -58,15 +80,15 @@ init()
# let $ns2 reach any $ns1 address from any interface
ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.$i.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add dead:beef:$i::1 flags signal
+ mptcp_lib_pm_nl_add_endpoint "${ns1}" "10.0.${i}.1" flags signal
+ mptcp_lib_pm_nl_add_endpoint "${ns1}" "dead:beef:${i}::1" flags signal
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.$i.2 flags signal
- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:$i::2 flags signal
+ mptcp_lib_pm_nl_add_endpoint "${ns2}" "10.0.${i}.2" flags signal
+ mptcp_lib_pm_nl_add_endpoint "${ns2}" "dead:beef:${i}::2" flags signal
done
- ip netns exec $ns1 ./pm_nl_ctl limits 8 8
- ip netns exec $ns2 ./pm_nl_ctl limits 8 8
+ mptcp_lib_pm_nl_set_limits "${ns1}" 8 8
+ mptcp_lib_pm_nl_set_limits "${ns2}" 8 8
add_mark_rules $ns1 1
add_mark_rules $ns2 2
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 6ab8c5d363..2757378b1b 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -1,28 +1,28 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Double quotes to prevent globbing and word splitting is recommended in new
-# code but we accept it, especially because there were too many before having
-# address all other issues detected by shellcheck.
-#shellcheck disable=SC2086
-
. "$(dirname "${0}")/mptcp_lib.sh"
ret=0
usage() {
- echo "Usage: $0 [ -h ]"
+ echo "Usage: $0 [ -i ] [ -h ]"
+ echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'"
+ echo -e "\t-h: help"
}
-optstring=h
+optstring=hi
while getopts "$optstring" option;do
case "$option" in
"h")
- usage $0
+ usage "$0"
exit ${KSFT_PASS}
;;
+ "i")
+ mptcp_lib_set_ip_mptcp
+ ;;
"?")
- usage $0
+ usage "$0"
exit ${KSFT_FAIL}
;;
esac
@@ -35,7 +35,7 @@ err=$(mktemp)
#shellcheck disable=SC2317
cleanup()
{
- rm -f $err
+ rm -f "${err}"
mptcp_lib_ns_exit "${ns1}"
}
@@ -46,6 +46,76 @@ trap cleanup EXIT
mptcp_lib_ns_init ns1
+format_limits() {
+ local accept="${1}"
+ local subflows="${2}"
+
+ if mptcp_lib_is_ip_mptcp; then
+ # with a space at the end
+ printf "add_addr_accepted %d subflows %d \n" "${accept}" "${subflows}"
+ else
+ printf "accept %d\nsubflows %d\n" "${accept}" "${subflows}"
+ fi
+}
+
+get_limits() {
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns1}" mptcp limits
+ else
+ ip netns exec "${ns1}" ./pm_nl_ctl limits
+ fi
+}
+
+format_endpoints() {
+ mptcp_lib_pm_nl_format_endpoints "${@}"
+}
+
+get_endpoint() {
+ # shellcheck disable=SC2317 # invoked indirectly
+ mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}"
+}
+
+change_address() {
+ local addr=${1}
+ local flags=${2}
+
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns1}" mptcp endpoint change "${addr}" "${flags}"
+ else
+ ip netns exec "${ns1}" ./pm_nl_ctl set "${addr}" flags "${flags}"
+ fi
+}
+
+set_limits()
+{
+ mptcp_lib_pm_nl_set_limits "${ns1}" "${@}"
+}
+
+add_endpoint()
+{
+ mptcp_lib_pm_nl_add_endpoint "${ns1}" "${@}"
+}
+
+del_endpoint()
+{
+ mptcp_lib_pm_nl_del_endpoint "${ns1}" "${@}"
+}
+
+flush_endpoint()
+{
+ mptcp_lib_pm_nl_flush_endpoint "${ns1}"
+}
+
+show_endpoints()
+{
+ mptcp_lib_pm_nl_show_endpoints "${ns1}"
+}
+
+change_endpoint()
+{
+ mptcp_lib_pm_nl_change_endpoint "${ns1}" "${@}"
+}
+
check()
{
local cmd="$1"
@@ -67,125 +137,126 @@ check()
fi
}
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list"
+check "show_endpoints" "" "defaults addr list"
-default_limits="$(ip netns exec $ns1 ./pm_nl_ctl limits)"
+default_limits="$(get_limits)"
if mptcp_lib_expect_all_features; then
- check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
-subflows 2" "defaults limits"
+ check "get_limits" "$(format_limits 0 2)" "defaults limits"
fi
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 flags signal,backup
-check "ip netns exec $ns1 ./pm_nl_ctl get 1" "id 1 flags 10.0.1.1" "simple add/get addr"
+add_endpoint 10.0.1.1
+add_endpoint 10.0.1.2 flags subflow dev lo
+add_endpoint 10.0.1.3 flags signal,backup
+check "get_endpoint 1" "$(format_endpoints "1,10.0.1.1")" "simple add/get addr"
-check "ip netns exec $ns1 ./pm_nl_ctl dump" \
-"id 1 flags 10.0.1.1
-id 2 flags subflow dev lo 10.0.1.2
-id 3 flags signal,backup 10.0.1.3" "dump addrs"
+check "show_endpoints" \
+ "$(format_endpoints "1,10.0.1.1" \
+ "2,10.0.1.2,subflow,lo" \
+ "3,10.0.1.3,signal backup")" "dump addrs"
-ip netns exec $ns1 ./pm_nl_ctl del 2
-check "ip netns exec $ns1 ./pm_nl_ctl get 2" "" "simple del addr"
-check "ip netns exec $ns1 ./pm_nl_ctl dump" \
-"id 1 flags 10.0.1.1
-id 3 flags signal,backup 10.0.1.3" "dump addrs after del"
+del_endpoint 2
+check "get_endpoint 2" "" "simple del addr"
+check "show_endpoints" \
+ "$(format_endpoints "1,10.0.1.1" \
+ "3,10.0.1.3,signal backup")" "dump addrs after del"
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 2>/dev/null
-check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr"
+add_endpoint 10.0.1.3 2>/dev/null
+check "get_endpoint 4" "" "duplicate addr"
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 flags signal
-check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment"
+add_endpoint 10.0.1.4 flags signal
+check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment"
for i in $(seq 5 9); do
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1
+ add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1
done
-check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit"
-check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit"
+check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit"
+check "get_endpoint 10" "" "above hard addr limit"
-ip netns exec $ns1 ./pm_nl_ctl del 9
+del_endpoint 9
for i in $(seq 10 255); do
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i
- ip netns exec $ns1 ./pm_nl_ctl del $i
+ add_endpoint 10.0.0.9 id "${i}"
+ del_endpoint "${i}"
done
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1
-id 3 flags signal,backup 10.0.1.3
-id 4 flags signal 10.0.1.4
-id 5 flags signal 10.0.1.5
-id 6 flags signal 10.0.1.6
-id 7 flags signal 10.0.1.7
-id 8 flags signal 10.0.1.8" "id limit"
-
-ip netns exec $ns1 ./pm_nl_ctl flush
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs"
-
-ip netns exec $ns1 ./pm_nl_ctl limits 9 1 2>/dev/null
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "rcv addrs above hard limit"
-
-ip netns exec $ns1 ./pm_nl_ctl limits 1 9 2>/dev/null
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "subflows above hard limit"
-
-ip netns exec $ns1 ./pm_nl_ctl limits 8 8
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8
-subflows 8" "set limits"
-
-ip netns exec $ns1 ./pm_nl_ctl flush
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 id 100
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.5 id 254
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.6
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.7
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.8
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1
-id 2 flags 10.0.1.2
-id 3 flags 10.0.1.7
-id 4 flags 10.0.1.8
-id 100 flags 10.0.1.3
-id 101 flags 10.0.1.4
-id 254 flags 10.0.1.5
-id 255 flags 10.0.1.6" "set ids"
-
-ip netns exec $ns1 ./pm_nl_ctl flush
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.1
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.2 id 254
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.3
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.4
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.5 id 253
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.6
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.7
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.8
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.0.1
-id 2 flags 10.0.0.4
-id 3 flags 10.0.0.6
-id 4 flags 10.0.0.7
-id 5 flags 10.0.0.8
-id 253 flags 10.0.0.5
-id 254 flags 10.0.0.2
-id 255 flags 10.0.0.3" "wrap-around ids"
-
-ip netns exec $ns1 ./pm_nl_ctl flush
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags subflow
-ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags backup
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
-subflow,backup 10.0.1.1" "set flags (backup)"
-ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
-subflow 10.0.1.1" " (nobackup)"
+check "show_endpoints" \
+ "$(format_endpoints "1,10.0.1.1" \
+ "3,10.0.1.3,signal backup" \
+ "4,10.0.1.4,signal" \
+ "5,10.0.1.5,signal" \
+ "6,10.0.1.6,signal" \
+ "7,10.0.1.7,signal" \
+ "8,10.0.1.8,signal")" "id limit"
+
+flush_endpoint
+check "show_endpoints" "" "flush addrs"
+
+set_limits 9 1 2>/dev/null
+check "get_limits" "${default_limits}" "rcv addrs above hard limit"
+
+set_limits 1 9 2>/dev/null
+check "get_limits" "${default_limits}" "subflows above hard limit"
+
+set_limits 8 8
+check "get_limits" "$(format_limits 8 8)" "set limits"
+
+flush_endpoint
+add_endpoint 10.0.1.1
+add_endpoint 10.0.1.2
+add_endpoint 10.0.1.3 id 100
+add_endpoint 10.0.1.4
+add_endpoint 10.0.1.5 id 254
+add_endpoint 10.0.1.6
+add_endpoint 10.0.1.7
+add_endpoint 10.0.1.8
+check "show_endpoints" \
+ "$(format_endpoints "1,10.0.1.1" \
+ "2,10.0.1.2" \
+ "3,10.0.1.7" \
+ "4,10.0.1.8" \
+ "100,10.0.1.3" \
+ "101,10.0.1.4" \
+ "254,10.0.1.5" \
+ "255,10.0.1.6")" "set ids"
+
+flush_endpoint
+add_endpoint 10.0.0.1
+add_endpoint 10.0.0.2 id 254
+add_endpoint 10.0.0.3
+add_endpoint 10.0.0.4
+add_endpoint 10.0.0.5 id 253
+add_endpoint 10.0.0.6
+add_endpoint 10.0.0.7
+add_endpoint 10.0.0.8
+check "show_endpoints" \
+ "$(format_endpoints "1,10.0.0.1" \
+ "2,10.0.0.4" \
+ "3,10.0.0.6" \
+ "4,10.0.0.7" \
+ "5,10.0.0.8" \
+ "253,10.0.0.5" \
+ "254,10.0.0.2" \
+ "255,10.0.0.3")" "wrap-around ids"
+
+flush_endpoint
+add_endpoint 10.0.1.1 flags subflow
+change_address 10.0.1.1 backup
+check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow backup")" \
+ "set flags (backup)"
+change_address 10.0.1.1 nobackup
+check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow")" \
+ " (nobackup)"
# fullmesh support has been added later
-ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh 2>/dev/null
-if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" ||
+change_endpoint 1 fullmesh 2>/dev/null
+if show_endpoints | grep -q "fullmesh" ||
mptcp_lib_expect_all_features; then
- check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
-subflow,fullmesh 10.0.1.1" " (fullmesh)"
- ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh
- check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
-subflow 10.0.1.1" " (nofullmesh)"
- ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh
- check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
-subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)"
+ check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow fullmesh")" \
+ " (fullmesh)"
+ change_endpoint 1 nofullmesh
+ check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow")" \
+ " (nofullmesh)"
+ change_endpoint 1 backup,fullmesh
+ check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow backup fullmesh")" \
+ " (backup,fullmesh)"
else
for st in fullmesh nofullmesh backup,fullmesh; do
st=" (${st})"
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 7322e1e4e5..f74e1c3c12 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -27,10 +27,11 @@ capout=""
size=0
usage() {
- echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
+ echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]"
echo -e "\t-b: bail out after first error, otherwise runs al testcases"
echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
echo -e "\t-d: debug this script"
+ echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'"
}
# This function is used in the cleanup trap
@@ -45,7 +46,7 @@ cleanup()
}
mptcp_lib_check_mptcp
-mptcp_lib_check_tools ip
+mptcp_lib_check_tools ip tc
# "$ns1" ns2 ns3
# ns1eth1 ns2eth1 ns2eth3 ns3eth1
@@ -85,8 +86,8 @@ setup()
ip -net "$ns1" route add default via 10.0.2.2 metric 101
ip -net "$ns1" route add default via dead:beef:2::2 metric 101
- ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
- ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
+ mptcp_lib_pm_nl_set_limits "${ns1}" 1 1
+ mptcp_lib_pm_nl_add_endpoint "${ns1}" 10.0.2.1 dev ns1eth2 flags subflow
ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
@@ -108,7 +109,7 @@ setup()
ip -net "$ns3" route add default via 10.0.3.2
ip -net "$ns3" route add default via dead:beef:3::2
- ip netns exec "$ns3" ./pm_nl_ctl limits 1 1
+ mptcp_lib_pm_nl_set_limits "${ns3}" 1 1
# debug build can slow down measurably the test program
# we use quite tight time limit on the run-time, to ensure
@@ -259,7 +260,7 @@ run_test()
fi
}
-while getopts "bcdh" option;do
+while getopts "bcdhi" option;do
case "$option" in
"h")
usage $0
@@ -274,6 +275,9 @@ while getopts "bcdh" option;do
"d")
set -x
;;
+ "i")
+ mptcp_lib_set_ip_mptcp
+ ;;
"?")
usage $0
exit ${KSFT_FAIL}
diff --git a/tools/testing/selftests/net/nat6to4.c b/tools/testing/selftests/net/nat6to4.bpf.c
index ac54c36b25..ac54c36b25 100644
--- a/tools/testing/selftests/net/nat6to4.c
+++ b/tools/testing/selftests/net/nat6to4.bpf.c
diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore
new file mode 100644
index 0000000000..0a64d6d0e2
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/.gitignore
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+audit_logread
+connect_close
+conntrack_dump_flush
+sctp_collision
+nf_queue
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
new file mode 100644
index 0000000000..47945b2b3f
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+HOSTPKG_CONFIG := pkg-config
+MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
+MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
+
+TEST_PROGS := br_netfilter.sh bridge_brouter.sh
+TEST_PROGS += conntrack_icmp_related.sh
+TEST_PROGS += conntrack_ipip_mtu.sh
+TEST_PROGS += conntrack_tcp_unreplied.sh
+TEST_PROGS += conntrack_sctp_collision.sh
+TEST_PROGS += conntrack_vrf.sh
+TEST_PROGS += ipvs.sh
+TEST_PROGS += nf_conntrack_packetdrill.sh
+TEST_PROGS += nf_nat_edemux.sh
+TEST_PROGS += nft_audit.sh
+TEST_PROGS += nft_concat_range.sh
+TEST_PROGS += nft_conntrack_helper.sh
+TEST_PROGS += nft_fib.sh
+TEST_PROGS += nft_flowtable.sh
+TEST_PROGS += nft_meta.sh
+TEST_PROGS += nft_nat.sh
+TEST_PROGS += nft_nat_zones.sh
+TEST_PROGS += nft_queue.sh
+TEST_PROGS += nft_synproxy.sh
+TEST_PROGS += nft_zones_many.sh
+TEST_PROGS += rpath.sh
+TEST_PROGS += xt_string.sh
+
+TEST_PROGS_EXTENDED = nft_concat_range_perf.sh
+
+TEST_GEN_PROGS = conntrack_dump_flush
+
+TEST_GEN_FILES = audit_logread
+TEST_GEN_FILES += connect_close nf_queue
+TEST_GEN_FILES += sctp_collision
+
+include ../../lib.mk
+
+$(OUTPUT)/nf_queue: CFLAGS += $(MNL_CFLAGS)
+$(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS)
+
+$(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS)
+$(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS)
+
+TEST_FILES := lib.sh
+TEST_FILES += packetdrill
+
+TEST_INCLUDES := \
+ ../lib.sh
diff --git a/tools/testing/selftests/net/netfilter/audit_logread.c b/tools/testing/selftests/net/netfilter/audit_logread.c
new file mode 100644
index 0000000000..a0a880fc2d
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/audit_logread.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <linux/audit.h>
+#include <linux/netlink.h>
+
+static int fd;
+
+#define MAX_AUDIT_MESSAGE_LENGTH 8970
+struct audit_message {
+ struct nlmsghdr nlh;
+ union {
+ struct audit_status s;
+ char data[MAX_AUDIT_MESSAGE_LENGTH];
+ } u;
+};
+
+int audit_recv(int fd, struct audit_message *rep)
+{
+ struct sockaddr_nl addr;
+ socklen_t addrlen = sizeof(addr);
+ int ret;
+
+ do {
+ ret = recvfrom(fd, rep, sizeof(*rep), 0,
+ (struct sockaddr *)&addr, &addrlen);
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0 ||
+ addrlen != sizeof(addr) ||
+ addr.nl_pid != 0 ||
+ rep->nlh.nlmsg_type == NLMSG_ERROR) /* short-cut for now */
+ return -1;
+
+ return ret;
+}
+
+int audit_send(int fd, uint16_t type, uint32_t key, uint32_t val)
+{
+ static int seq = 0;
+ struct audit_message msg = {
+ .nlh = {
+ .nlmsg_len = NLMSG_SPACE(sizeof(msg.u.s)),
+ .nlmsg_type = type,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+ .nlmsg_seq = ++seq,
+ },
+ .u.s = {
+ .mask = key,
+ .enabled = key == AUDIT_STATUS_ENABLED ? val : 0,
+ .pid = key == AUDIT_STATUS_PID ? val : 0,
+ }
+ };
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ };
+ int ret;
+
+ do {
+ ret = sendto(fd, &msg, msg.nlh.nlmsg_len, 0,
+ (struct sockaddr *)&addr, sizeof(addr));
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret != (int)msg.nlh.nlmsg_len)
+ return -1;
+ return 0;
+}
+
+int audit_set(int fd, uint32_t key, uint32_t val)
+{
+ struct audit_message rep = { 0 };
+ int ret;
+
+ ret = audit_send(fd, AUDIT_SET, key, val);
+ if (ret)
+ return ret;
+
+ ret = audit_recv(fd, &rep);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+int readlog(int fd)
+{
+ struct audit_message rep = { 0 };
+ int ret = audit_recv(fd, &rep);
+ const char *sep = "";
+ char *k, *v;
+
+ if (ret < 0)
+ return ret;
+
+ if (rep.nlh.nlmsg_type != AUDIT_NETFILTER_CFG)
+ return 0;
+
+ /* skip the initial "audit(...): " part */
+ strtok(rep.u.data, " ");
+
+ while ((k = strtok(NULL, "="))) {
+ v = strtok(NULL, " ");
+
+ /* these vary and/or are uninteresting, ignore */
+ if (!strcmp(k, "pid") ||
+ !strcmp(k, "comm") ||
+ !strcmp(k, "subj"))
+ continue;
+
+ /* strip the varying sequence number */
+ if (!strcmp(k, "table"))
+ *strchrnul(v, ':') = '\0';
+
+ printf("%s%s=%s", sep, k, v);
+ sep = " ";
+ }
+ if (*sep) {
+ printf("\n");
+ fflush(stdout);
+ }
+ return 0;
+}
+
+void cleanup(int sig)
+{
+ audit_set(fd, AUDIT_STATUS_ENABLED, 0);
+ close(fd);
+ if (sig)
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ struct sigaction act = {
+ .sa_handler = cleanup,
+ };
+
+ fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
+ if (fd < 0) {
+ perror("Can't open netlink socket");
+ return -1;
+ }
+
+ if (sigaction(SIGTERM, &act, NULL) < 0 ||
+ sigaction(SIGINT, &act, NULL) < 0) {
+ perror("Can't set signal handler");
+ close(fd);
+ return -1;
+ }
+
+ audit_set(fd, AUDIT_STATUS_ENABLED, 1);
+ audit_set(fd, AUDIT_STATUS_PID, getpid());
+
+ while (1)
+ readlog(fd);
+}
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh
new file mode 100755
index 0000000000..c28379a965
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test for legacy br_netfilter module combined with connection tracking,
+# a combination that doesn't really work.
+# Multicast/broadcast packets race for hash table insertion.
+
+# eth0 br0 eth0
+# setup is: ns1 <->,ns0 <-> ns3
+# ns2 <-' `'-> ns4
+
+source lib.sh
+
+checktool "nft --version" "run test without nft tool"
+
+cleanup() {
+ cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+setup_ns ns0 ns1 ns2 ns3 ns4
+
+ret=0
+
+do_ping()
+{
+ fromns="$1"
+ dstip="$2"
+
+ if ! ip netns exec "$fromns" ping -c 1 -q "$dstip" > /dev/null; then
+ echo "ERROR: ping from $fromns to $dstip"
+ ip netns exec "$ns0" nft list ruleset
+ ret=1
+ fi
+}
+
+bcast_ping()
+{
+ fromns="$1"
+ dstip="$2"
+
+ local packets=500
+
+ [ "$KSFT_MACHINE_SLOW" = yes ] && packets=100
+
+ for i in $(seq 1 $packets); do
+ if ! ip netns exec "$fromns" ping -q -f -b -c 1 -q "$dstip" > /dev/null 2>&1; then
+ echo "ERROR: ping -b from $fromns to $dstip"
+ ip netns exec "$ns0" nft list ruleset
+ ret=1
+ break
+ fi
+ done
+}
+
+ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0
+ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0
+
+if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then
+ echo "SKIP: Can't create veth device"
+ exit $ksft_skip
+fi
+
+ip link add veth2 netns "$ns0" type veth peer name eth0 netns "$ns2"
+ip link add veth3 netns "$ns0" type veth peer name eth0 netns "$ns3"
+ip link add veth4 netns "$ns0" type veth peer name eth0 netns "$ns4"
+
+for i in $(seq 1 4); do
+ ip -net "$ns0" link set "veth$i" up
+done
+
+if ! ip -net "$ns0" link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1; then
+ echo "SKIP: Can't create bridge br0"
+ exit $ksft_skip
+fi
+
+# make veth0,1,2 part of bridge.
+for i in $(seq 1 3); do
+ ip -net "$ns0" link set "veth$i" master br0
+done
+
+# add a macvlan on top of the bridge.
+MACVLAN_ADDR=ba:f3:13:37:42:23
+ip -net "$ns0" link add link br0 name macvlan0 type macvlan mode private
+ip -net "$ns0" link set macvlan0 address ${MACVLAN_ADDR}
+ip -net "$ns0" link set macvlan0 up
+ip -net "$ns0" addr add 10.23.0.1/24 dev macvlan0
+
+# add a macvlan on top of veth4.
+MACVLAN_ADDR=ba:f3:13:37:42:24
+ip -net "$ns0" link add link veth4 name macvlan4 type macvlan mode passthru
+ip -net "$ns0" link set macvlan4 address ${MACVLAN_ADDR}
+ip -net "$ns0" link set macvlan4 up
+
+# make the macvlan part of the bridge.
+# veth4 is not a bridge port, only the macvlan on top of it.
+ip -net "$ns0" link set macvlan4 master br0
+
+ip -net "$ns0" link set br0 up
+ip -net "$ns0" addr add 10.0.0.1/24 dev br0
+
+modprobe -q br_netfilter
+if ! ip netns exec "$ns0" sysctl -q net.bridge.bridge-nf-call-iptables=1; then
+ echo "SKIP: bridge netfilter not available"
+ ret=$ksft_skip
+fi
+
+# for testing, so namespaces will reply to ping -b probes.
+ip netns exec "$ns0" sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0
+
+# enable conntrack in ns0 and drop broadcast packets in forward to
+# avoid them from getting confirmed in the postrouting hook before
+# the cloned skb is passed up the stack.
+ip netns exec "$ns0" nft -f - <<EOF
+table ip filter {
+ chain input {
+ type filter hook input priority 1; policy accept
+ iifname br0 counter
+ ct state new accept
+ }
+}
+
+table bridge filter {
+ chain forward {
+ type filter hook forward priority 0; policy accept
+ meta pkttype broadcast ip protocol icmp counter drop
+ }
+}
+EOF
+if [ "$?" -ne 0 ];then
+ echo "SKIP: could not add nftables ruleset"
+ exit $ksft_skip
+fi
+
+# place 1, 2 & 3 in same subnet, connected via ns0:br0.
+# ns4 is placed in same subnet as well, but its not
+# part of the bridge: the corresponding veth4 is not
+# part of the bridge, only its macvlan interface.
+for i in $(seq 1 4); do
+ eval ip -net \$ns"$i" link set eth0 up
+done
+for i in $(seq 1 2); do
+ eval ip -net \$ns"$i" addr add "10.0.0.1$i/24" dev eth0
+done
+
+ip -net "$ns3" addr add 10.23.0.13/24 dev eth0
+ip -net "$ns4" addr add 10.23.0.14/24 dev eth0
+
+# test basic connectivity
+do_ping "$ns1" 10.0.0.12
+do_ping "$ns3" 10.23.0.1
+do_ping "$ns4" 10.23.0.1
+
+bcast_ping "$ns1" 10.0.0.255
+
+# This should deliver broadcast to macvlan0, which is on top of ns0:br0.
+bcast_ping "$ns3" 10.23.0.255
+
+# same, this time via veth4:macvlan4.
+bcast_ping "$ns4" 10.23.0.255
+
+read t < /proc/sys/kernel/tainted
+if [ "$t" -eq 0 ];then
+ echo PASS: kernel not tainted
+else
+ echo ERROR: kernel is tainted
+ ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
new file mode 100755
index 0000000000..2549b65906
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+#
+# This test is for bridge 'brouting', i.e. make some packets being routed
+# rather than getting bridged even though they arrive on interface that is
+# part of a bridge.
+
+# eth0 br0 eth0
+# setup is: ns1 <-> nsbr <-> ns2
+
+source lib.sh
+
+if ! ebtables -V > /dev/null 2>&1;then
+ echo "SKIP: Could not run test without ebtables"
+ exit $ksft_skip
+fi
+
+cleanup() {
+ cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+setup_ns nsbr ns1 ns2
+
+ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0
+ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0
+if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then
+ echo "SKIP: Can't create veth device"
+ exit $ksft_skip
+fi
+ip link add veth1 netns "$nsbr" type veth peer name eth0 netns "$ns2"
+
+if ! ip -net "$nsbr" link add br0 type bridge; then
+ echo "SKIP: Can't create bridge br0"
+ exit $ksft_skip
+fi
+
+ip -net "$nsbr" link set veth0 up
+ip -net "$nsbr" link set veth1 up
+
+ip -net "$nsbr" link set veth0 master br0
+ip -net "$nsbr" link set veth1 master br0
+ip -net "$nsbr" link set br0 up
+ip -net "$nsbr" addr add 10.0.0.1/24 dev br0
+
+# place both in same subnet, ${ns1} and ${ns2} connected via ${nsbr}:br0
+ip -net "$ns1" link set eth0 up
+ip -net "$ns2" link set eth0 up
+ip -net "$ns1" addr add 10.0.0.11/24 dev eth0
+ip -net "$ns2" addr add 10.0.0.12/24 dev eth0
+
+test_ebtables_broute()
+{
+ # redirect is needed so the dstmac is rewritten to the bridge itself,
+ # ip stack won't process OTHERHOST (foreign unicast mac) packets.
+ if ! ip netns exec "$nsbr" ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP; then
+ echo "SKIP: Could not add ebtables broute redirect rule"
+ return $ksft_skip
+ fi
+
+ ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth0.forwarding=0
+
+ # ping net${ns1}, expected to not work (ip forwarding is off)
+ if ip netns exec "$ns1" ping -q -c 1 10.0.0.12 -W 0.5 > /dev/null 2>&1; then
+ echo "ERROR: ping works, should have failed" 1>&2
+ return 1
+ fi
+
+ # enable forwarding on both interfaces.
+ # neither needs an ip address, but at least the bridge needs
+ # an ip address in same network segment as ${ns1} and ${ns2} (${nsbr}
+ # needs to be able to determine route for to-be-forwarded packet).
+ ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth0.forwarding=1
+ ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth1.forwarding=1
+
+ if ! ip netns exec "$ns1" ping -q -c 1 10.0.0.12 > /dev/null; then
+ echo "ERROR: ping did not work, but it should (broute+forward)" 1>&2
+ return 1
+ fi
+
+ echo "PASS: ${ns1}/${ns2} connectivity with active broute rule"
+ ip netns exec "$nsbr" ebtables -t broute -F
+
+ # ping net${ns1}, expected to work (frames are bridged)
+ if ! ip netns exec "$ns1" ping -q -c 1 10.0.0.12 > /dev/null; then
+ echo "ERROR: ping did not work, but it should (bridged)" 1>&2
+ return 1
+ fi
+
+ ip netns exec "$nsbr" ebtables -t filter -A FORWARD -p ipv4 --ip-protocol icmp -j DROP
+
+ # ping net${ns1}, expected to not work (DROP in bridge forward)
+ if ip netns exec "$ns1" ping -q -c 1 10.0.0.12 -W 0.5 > /dev/null 2>&1; then
+ echo "ERROR: ping works, should have failed (icmp forward drop)" 1>&2
+ return 1
+ fi
+
+ # re-activate brouter
+ ip netns exec "$nsbr" ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP
+
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.0.11 > /dev/null; then
+ echo "ERROR: ping did not work, but it should (broute+forward 2)" 1>&2
+ return 1
+ fi
+
+ echo "PASS: ${ns1}/${ns2} connectivity with active broute rule and bridge forward drop"
+ return 0
+}
+
+# test basic connectivity
+if ! ip netns exec "$ns1" ping -c 1 -q 10.0.0.12 > /dev/null; then
+ echo "ERROR: Could not reach ${ns2} from ${ns1}" 1>&2
+ exit 1
+fi
+
+if ! ip netns exec "$ns2" ping -c 1 -q 10.0.0.11 > /dev/null; then
+ echo "ERROR: Could not reach ${ns1} from ${ns2}" 1>&2
+ exit 1
+fi
+
+test_ebtables_broute
+exit $?
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
new file mode 100644
index 0000000000..63ef80ef47
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/config
@@ -0,0 +1,89 @@
+CONFIG_AUDIT=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_BRIDGE=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_NETFILTER=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_CGROUP_BPF=y
+CONFIG_DUMMY=m
+CONFIG_INET_ESP=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP_SCTP=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_RR=m
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_MACVLAN=m
+CONFIG_NAMESPACES=y
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_IPIP=m
+CONFIG_NET_VRF=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NETFILTER_NETLINK_QUEUE=m
+CONFIG_NETFILTER_SYNPROXY=m
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XT_NAT=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_LOG_IPV4=m
+CONFIG_NF_LOG_IPV6=m
+CONFIG_NF_NAT=m
+CONFIG_NF_NAT_REDIRECT=y
+CONFIG_NF_NAT_MASQUERADE=y
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NF_FLOW_TABLE_INET=m
+CONFIG_NFT_BRIDGE_META=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_FIB=m
+CONFIG_NFT_FIB_INET=m
+CONFIG_NFT_FIB_IPV4=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NFT_FLOW_OFFLOAD=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_NUMGEN=m
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_QUOTA=m
+CONFIG_NFT_REDIR=m
+CONFIG_NFT_SYNPROXY=m
+CONFIG_VETH=m
+CONFIG_VLAN_8021Q=m
+CONFIG_XFRM_USER=m
+CONFIG_XFRM_STATISTICS=y
+CONFIG_NET_PKTGEN=m
+CONFIG_TUN=m
diff --git a/tools/testing/selftests/net/netfilter/connect_close.c b/tools/testing/selftests/net/netfilter/connect_close.c
new file mode 100644
index 0000000000..1c3b0add54
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/connect_close.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+
+#define PORT 12345
+#define RUNTIME 10
+
+static struct {
+ unsigned int timeout;
+ unsigned int port;
+} opts = {
+ .timeout = RUNTIME,
+ .port = PORT,
+};
+
+static void handler(int sig)
+{
+ _exit(sig == SIGALRM ? 0 : 1);
+}
+
+static void set_timeout(void)
+{
+ struct sigaction action = {
+ .sa_handler = handler,
+ };
+
+ sigaction(SIGALRM, &action, NULL);
+
+ alarm(opts.timeout);
+}
+
+static void do_connect(const struct sockaddr_in *dst)
+{
+ int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+
+ if (s >= 0)
+ fcntl(s, F_SETFL, O_NONBLOCK);
+
+ connect(s, (struct sockaddr *)dst, sizeof(*dst));
+ close(s);
+}
+
+static void do_accept(const struct sockaddr_in *src)
+{
+ int c, one = 1, s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+
+ if (s < 0)
+ return;
+
+ setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
+ setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+
+ bind(s, (struct sockaddr *)src, sizeof(*src));
+
+ listen(s, 16);
+
+ c = accept(s, NULL, NULL);
+ if (c >= 0)
+ close(c);
+
+ close(s);
+}
+
+static int accept_loop(void)
+{
+ struct sockaddr_in src = {
+ .sin_family = AF_INET,
+ .sin_port = htons(opts.port),
+ };
+
+ inet_pton(AF_INET, "127.0.0.1", &src.sin_addr);
+
+ set_timeout();
+
+ for (;;)
+ do_accept(&src);
+
+ return 1;
+}
+
+static int connect_loop(void)
+{
+ struct sockaddr_in dst = {
+ .sin_family = AF_INET,
+ .sin_port = htons(opts.port),
+ };
+
+ inet_pton(AF_INET, "127.0.0.1", &dst.sin_addr);
+
+ set_timeout();
+
+ for (;;)
+ do_connect(&dst);
+
+ return 1;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "t:p:")) != -1) {
+ switch (c) {
+ case 't':
+ opts.timeout = atoi(optarg);
+ break;
+ case 'p':
+ opts.port = atoi(optarg);
+ break;
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ pid_t p;
+
+ parse_opts(argc, argv);
+
+ p = fork();
+ if (p < 0)
+ return 111;
+
+ if (p > 0)
+ return accept_loop();
+
+ return connect_loop();
+}
diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
new file mode 100644
index 0000000000..bd9317bf5a
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
@@ -0,0 +1,469 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <time.h>
+#include <libmnl/libmnl.h>
+#include <netinet/ip.h>
+
+#include <linux/netlink.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <linux/netfilter/nf_conntrack_tcp.h>
+#include "../../kselftest_harness.h"
+
+#define TEST_ZONE_ID 123
+#define NF_CT_DEFAULT_ZONE_ID 0
+
+static int reply_counter;
+
+static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type,
+ uint32_t src_ip, uint32_t dst_ip,
+ uint16_t src_port, uint16_t dst_port)
+{
+ struct nlattr *nest, *nest_ip, *nest_proto;
+
+ nest = mnl_attr_nest_start(nlh, type);
+ if (!nest)
+ return -1;
+
+ nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP);
+ if (!nest_ip)
+ return -1;
+ mnl_attr_put_u32(nlh, CTA_IP_V4_SRC, src_ip);
+ mnl_attr_put_u32(nlh, CTA_IP_V4_DST, dst_ip);
+ mnl_attr_nest_end(nlh, nest_ip);
+
+ nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO);
+ if (!nest_proto)
+ return -1;
+ mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6);
+ mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port));
+ mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port));
+ mnl_attr_nest_end(nlh, nest_proto);
+
+ mnl_attr_nest_end(nlh, nest);
+}
+
+static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type,
+ struct in6_addr src_ip, struct in6_addr dst_ip,
+ uint16_t src_port, uint16_t dst_port)
+{
+ struct nlattr *nest, *nest_ip, *nest_proto;
+
+ nest = mnl_attr_nest_start(nlh, type);
+ if (!nest)
+ return -1;
+
+ nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP);
+ if (!nest_ip)
+ return -1;
+ mnl_attr_put(nlh, CTA_IP_V6_SRC, sizeof(struct in6_addr), &src_ip);
+ mnl_attr_put(nlh, CTA_IP_V6_DST, sizeof(struct in6_addr), &dst_ip);
+ mnl_attr_nest_end(nlh, nest_ip);
+
+ nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO);
+ if (!nest_proto)
+ return -1;
+ mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6);
+ mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port));
+ mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port));
+ mnl_attr_nest_end(nlh, nest_proto);
+
+ mnl_attr_nest_end(nlh, nest);
+}
+
+static int build_cta_proto(struct nlmsghdr *nlh)
+{
+ struct nlattr *nest, *nest_proto;
+
+ nest = mnl_attr_nest_start(nlh, CTA_PROTOINFO);
+ if (!nest)
+ return -1;
+
+ nest_proto = mnl_attr_nest_start(nlh, CTA_PROTOINFO_TCP);
+ if (!nest_proto)
+ return -1;
+ mnl_attr_put_u8(nlh, CTA_PROTOINFO_TCP_STATE, TCP_CONNTRACK_ESTABLISHED);
+ mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 0x0a0a);
+ mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_REPLY, 0x0a0a);
+ mnl_attr_nest_end(nlh, nest_proto);
+
+ mnl_attr_nest_end(nlh, nest);
+}
+
+static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh,
+ uint16_t zone)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *rplnlh;
+ unsigned int portid;
+ int err, ret;
+
+ portid = mnl_socket_get_portid(sock);
+
+ ret = build_cta_proto(nlh);
+ if (ret < 0) {
+ perror("build_cta_proto");
+ return -1;
+ }
+ mnl_attr_put_u32(nlh, CTA_TIMEOUT, htonl(20000));
+ mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+ if (mnl_socket_sendto(sock, nlh, nlh->nlmsg_len) < 0) {
+ perror("mnl_socket_sendto");
+ return -1;
+ }
+
+ ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+ if (ret < 0) {
+ perror("mnl_socket_recvfrom");
+ return ret;
+ }
+
+ ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL);
+ if (ret < 0) {
+ if (errno == EEXIST) {
+ /* The entries are probably still there from a previous
+ * run. So we are good
+ */
+ return 0;
+ }
+ perror("mnl_cb_run");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int conntrack_data_generate_v4(struct mnl_socket *sock, uint32_t src_ip,
+ uint32_t dst_ip, uint16_t zone)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfh;
+ int ret;
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
+ NLM_F_ACK | NLM_F_EXCL;
+ nlh->nlmsg_seq = time(NULL);
+
+ nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+ nfh->nfgen_family = AF_INET;
+ nfh->version = NFNETLINK_V0;
+ nfh->res_id = 0;
+
+ ret = build_cta_tuple_v4(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, 12345, 443);
+ if (ret < 0) {
+ perror("build_cta_tuple_v4");
+ return ret;
+ }
+ ret = build_cta_tuple_v4(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, 443, 12345);
+ if (ret < 0) {
+ perror("build_cta_tuple_v4");
+ return ret;
+ }
+ return conntrack_data_insert(sock, nlh, zone);
+}
+
+static int conntrack_data_generate_v6(struct mnl_socket *sock,
+ struct in6_addr src_ip,
+ struct in6_addr dst_ip,
+ uint16_t zone)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfh;
+ int ret;
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
+ NLM_F_ACK | NLM_F_EXCL;
+ nlh->nlmsg_seq = time(NULL);
+
+ nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+ nfh->nfgen_family = AF_INET6;
+ nfh->version = NFNETLINK_V0;
+ nfh->res_id = 0;
+
+ ret = build_cta_tuple_v6(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip,
+ 12345, 443);
+ if (ret < 0) {
+ perror("build_cta_tuple_v6");
+ return ret;
+ }
+ ret = build_cta_tuple_v6(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip,
+ 12345, 443);
+ if (ret < 0) {
+ perror("build_cta_tuple_v6");
+ return ret;
+ }
+ return conntrack_data_insert(sock, nlh, zone);
+}
+
+static int count_entries(const struct nlmsghdr *nlh, void *data)
+{
+ reply_counter++;
+}
+
+static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh, *rplnlh;
+ struct nfgenmsg *nfh;
+ struct nlattr *nest;
+ unsigned int portid;
+ int err, ret;
+
+ portid = mnl_socket_get_portid(sock);
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_GET;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ nlh->nlmsg_seq = time(NULL);
+
+ nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+ nfh->nfgen_family = AF_UNSPEC;
+ nfh->version = NFNETLINK_V0;
+ nfh->res_id = 0;
+
+ mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+ ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len);
+ if (ret < 0) {
+ perror("mnl_socket_sendto");
+ return ret;
+ }
+
+ reply_counter = 0;
+ ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+ while (ret > 0) {
+ ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid,
+ count_entries, NULL);
+ if (ret <= MNL_CB_STOP)
+ break;
+
+ ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+ }
+ if (ret < 0) {
+ perror("mnl_socket_recvfrom");
+ return ret;
+ }
+
+ return reply_counter;
+}
+
+static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh, *rplnlh;
+ struct nfgenmsg *nfh;
+ struct nlattr *nest;
+ unsigned int portid;
+ int err, ret;
+
+ portid = mnl_socket_get_portid(sock);
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_DELETE;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ nlh->nlmsg_seq = time(NULL);
+
+ nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+ nfh->nfgen_family = AF_UNSPEC;
+ nfh->version = NFNETLINK_V0;
+ nfh->res_id = 0;
+
+ mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+ ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len);
+ if (ret < 0) {
+ perror("mnl_socket_sendto");
+ return ret;
+ }
+
+ ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+ if (ret < 0) {
+ perror("mnl_socket_recvfrom");
+ return ret;
+ }
+
+ ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL);
+ if (ret < 0) {
+ perror("mnl_cb_run");
+ return ret;
+ }
+
+ return 0;
+}
+
+FIXTURE(conntrack_dump_flush)
+{
+ struct mnl_socket *sock;
+};
+
+FIXTURE_SETUP(conntrack_dump_flush)
+{
+ struct in6_addr src, dst;
+ int ret;
+
+ self->sock = mnl_socket_open(NETLINK_NETFILTER);
+ if (!self->sock) {
+ perror("mnl_socket_open");
+ SKIP(return, "cannot open netlink_netfilter socket");
+ }
+
+ ret = mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID);
+ EXPECT_EQ(ret, 0);
+
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ if (ret < 0 && errno == EPERM)
+ SKIP(return, "Needs to be run as root");
+ else if (ret < 0 && errno == EOPNOTSUPP)
+ SKIP(return, "Kernel does not seem to support conntrack zones");
+
+ ret = conntrack_data_generate_v4(self->sock, 0xf0f0f0f0, 0xf1f1f1f1,
+ TEST_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ ret = conntrack_data_generate_v4(self->sock, 0xf2f2f2f2, 0xf3f3f3f3,
+ TEST_ZONE_ID + 1);
+ EXPECT_EQ(ret, 0);
+ ret = conntrack_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5,
+ TEST_ZONE_ID + 2);
+ EXPECT_EQ(ret, 0);
+ ret = conntrack_data_generate_v4(self->sock, 0xf6f6f6f6, 0xf7f7f7f7,
+ NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+
+ src = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x01000000
+ }
+ }};
+ dst = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x02000000
+ }
+ }};
+ ret = conntrack_data_generate_v6(self->sock, src, dst,
+ TEST_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ src = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x03000000
+ }
+ }};
+ dst = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x04000000
+ }
+ }};
+ ret = conntrack_data_generate_v6(self->sock, src, dst,
+ TEST_ZONE_ID + 1);
+ EXPECT_EQ(ret, 0);
+ src = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x05000000
+ }
+ }};
+ dst = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x06000000
+ }
+ }};
+ ret = conntrack_data_generate_v6(self->sock, src, dst,
+ TEST_ZONE_ID + 2);
+ EXPECT_EQ(ret, 0);
+
+ src = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x07000000
+ }
+ }};
+ dst = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x08000000
+ }
+ }};
+ ret = conntrack_data_generate_v6(self->sock, src, dst,
+ NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_GE(ret, 2);
+ if (ret > 2)
+ SKIP(return, "kernel does not support filtering by zone");
+}
+
+FIXTURE_TEARDOWN(conntrack_dump_flush)
+{
+}
+
+TEST_F(conntrack_dump_flush, test_dump_by_zone)
+{
+ int ret;
+
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_EQ(ret, 2);
+}
+
+TEST_F(conntrack_dump_flush, test_flush_by_zone)
+{
+ int ret;
+
+ ret = conntrack_flush_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 2);
+}
+
+TEST_F(conntrack_dump_flush, test_flush_by_zone_default)
+{
+ int ret;
+
+ ret = conntrack_flush_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh
new file mode 100755
index 0000000000..c63d840ead
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh
@@ -0,0 +1,278 @@
+#!/bin/bash
+#
+# check that ICMP df-needed/pkttoobig icmp are set are set as related
+# state
+#
+# Setup is:
+#
+# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2
+# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280).
+# ping nsclient2 from nsclient1, checking that conntrack did set RELATED
+# 'fragmentation needed' icmp packet.
+#
+# In addition, nsrouter1 will perform IP masquerading, i.e. also
+# check the icmp errors are propagated to the correct host as per
+# nat of "established" icmp-echo "connection".
+
+source lib.sh
+
+if ! nft --version > /dev/null 2>&1;then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+cleanup() {
+ cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+setup_ns nsclient1 nsclient2 nsrouter1 nsrouter2
+
+ret=0
+
+add_addr()
+{
+ ns=$1
+ dev=$2
+ i=$3
+
+ ip -net "$ns" link set "$dev" up
+ ip -net "$ns" addr add "192.168.$i.2/24" dev "$dev"
+ ip -net "$ns" addr add "dead:$i::2/64" dev "$dev" nodad
+}
+
+check_counter()
+{
+ ns=$1
+ name=$2
+ expect=$3
+ local lret=0
+
+ if ! ip netns exec "$ns" nft list counter inet filter "$name" | grep -q "$expect"; then
+ echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2
+ ip netns exec "$ns" nft list counter inet filter "$name" 1>&2
+ lret=1
+ fi
+
+ return $lret
+}
+
+check_unknown()
+{
+ expect="packets 0 bytes 0"
+ for n in ${nsclient1} ${nsclient2} ${nsrouter1} ${nsrouter2}; do
+ if ! check_counter "$n" "unknown" "$expect"; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+DEV=veth0
+ip link add "$DEV" netns "$nsclient1" type veth peer name eth1 netns "$nsrouter1"
+ip link add "$DEV" netns "$nsclient2" type veth peer name eth1 netns "$nsrouter2"
+ip link add "$DEV" netns "$nsrouter1" type veth peer name eth2 netns "$nsrouter2"
+
+add_addr "$nsclient1" $DEV 1
+add_addr "$nsclient2" $DEV 2
+
+ip -net "$nsrouter1" link set eth1 up
+ip -net "$nsrouter1" link set $DEV up
+
+ip -net "$nsrouter2" link set eth1 mtu 1280 up
+ip -net "$nsrouter2" link set eth2 up
+
+ip -net "$nsclient1" route add default via 192.168.1.1
+ip -net "$nsclient1" -6 route add default via dead:1::1
+
+ip -net "$nsclient2" route add default via 192.168.2.1
+ip -net "$nsclient2" route add default via dead:2::1
+ip -net "$nsclient2" link set veth0 mtu 1280
+
+ip -net "$nsrouter1" addr add 192.168.1.1/24 dev eth1
+ip -net "$nsrouter1" addr add 192.168.3.1/24 dev veth0
+ip -net "$nsrouter1" addr add dead:1::1/64 dev eth1 nodad
+ip -net "$nsrouter1" addr add dead:3::1/64 dev veth0 nodad
+ip -net "$nsrouter1" route add default via 192.168.3.10
+ip -net "$nsrouter1" -6 route add default via dead:3::10
+
+ip -net "$nsrouter2" addr add 192.168.2.1/24 dev eth1
+ip -net "$nsrouter2" addr add 192.168.3.10/24 dev eth2
+ip -net "$nsrouter2" addr add dead:2::1/64 dev eth1 nodad
+ip -net "$nsrouter2" addr add dead:3::10/64 dev eth2 nodad
+ip -net "$nsrouter2" route add default via 192.168.3.1
+ip -net "$nsrouter2" route add default via dead:3::1
+
+for i in 4 6; do
+ ip netns exec "$nsrouter1" sysctl -q net.ipv$i.conf.all.forwarding=1
+ ip netns exec "$nsrouter2" sysctl -q net.ipv$i.conf.all.forwarding=1
+done
+
+for netns in "$nsrouter1" "$nsrouter2"; do
+ip netns exec "$netns" nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter related { }
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept
+ meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept
+ meta l4proto { icmp, icmpv6 } ct state new,established accept
+ counter name "unknown" drop
+ }
+}
+EOF
+done
+
+ip netns exec "$nsclient1" nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter related { }
+ counter redir4 { }
+ counter redir6 { }
+ chain input {
+ type filter hook input priority 0; policy accept;
+
+ icmp type "redirect" ct state "related" counter name "redir4" accept
+ icmpv6 type "nd-redirect" ct state "related" counter name "redir6" accept
+
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+ meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
+
+ counter name "unknown" drop
+ }
+}
+EOF
+
+ip netns exec "$nsclient2" nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter new { }
+ counter established { }
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept
+ meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept
+ counter name "unknown" drop
+ }
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "new" counter name "new"
+ meta l4proto { icmp, icmpv6 } ct state "established" counter name "established"
+ counter name "unknown" drop
+ }
+}
+EOF
+
+# make sure NAT core rewrites adress of icmp error if nat is used according to
+# conntrack nat information (icmp error will be directed at nsrouter1 address,
+# but it needs to be routed to nsclient1 address).
+ip netns exec "$nsrouter1" nft -f - <<EOF
+table ip nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ip protocol icmp oifname "veth0" counter masquerade
+ }
+}
+table ip6 nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ip6 nexthdr icmpv6 oifname "veth0" counter masquerade
+ }
+}
+EOF
+
+if ! ip netns exec "$nsclient1" ping -c 1 -s 1000 -q -M "do" 192.168.2.2 >/dev/null; then
+ echo "ERROR: netns ip routing/connectivity broken" 1>&2
+ exit 1
+fi
+if ! ip netns exec "$nsclient1" ping -c 1 -s 1000 -q dead:2::2 >/dev/null; then
+ echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2
+ exit 1
+fi
+
+if ! check_unknown; then
+ ret=1
+fi
+
+expect="packets 0 bytes 0"
+for netns in "$nsrouter1" "$nsrouter2" "$nsclient1";do
+ if ! check_counter "$netns" "related" "$expect"; then
+ ret=1
+ fi
+done
+
+expect="packets 2 bytes 2076"
+if ! check_counter "$nsclient2" "new" "$expect"; then
+ ret=1
+fi
+
+if ip netns exec "$nsclient1" ping -W 0.5 -q -c 1 -s 1300 -M "do" 192.168.2.2 > /dev/null; then
+ echo "ERROR: ping should have failed with PMTU too big error" 1>&2
+ ret=1
+fi
+
+# nsrouter2 should have generated the icmp error, so
+# related counter should be 0 (its in forward).
+expect="packets 0 bytes 0"
+if ! check_counter "$nsrouter2" "related" "$expect"; then
+ ret=1
+fi
+
+# but nsrouter1 should have seen it, same for nsclient1.
+expect="packets 1 bytes 576"
+for netns in ${nsrouter1} ${nsclient1};do
+ if ! check_counter "$netns" "related" "$expect"; then
+ ret=1
+ fi
+done
+
+if ip netns exec "${nsclient1}" ping6 -W 0.5 -c 1 -s 1300 dead:2::2 > /dev/null; then
+ echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2
+ ret=1
+fi
+
+expect="packets 2 bytes 1856"
+for netns in "${nsrouter1}" "${nsclient1}";do
+ if ! check_counter "$netns" "related" "$expect"; then
+ ret=1
+ fi
+done
+
+if [ $ret -eq 0 ];then
+ echo "PASS: icmp mtu error had RELATED state"
+else
+ echo "ERROR: icmp error RELATED state test has failed"
+fi
+
+# add 'bad' route, expect icmp REDIRECT to be generated
+ip netns exec "${nsclient1}" ip route add 192.168.1.42 via 192.168.1.1
+ip netns exec "${nsclient1}" ip route add dead:1::42 via dead:1::1
+
+ip netns exec "$nsclient1" ping -W 1 -q -i 0.5 -c 2 192.168.1.42 > /dev/null
+
+expect="packets 1 bytes 112"
+if ! check_counter "$nsclient1" "redir4" "$expect"; then
+ ret=1
+fi
+
+ip netns exec "$nsclient1" ping -W 1 -c 1 dead:1::42 > /dev/null
+expect="packets 1 bytes 192"
+if ! check_counter "$nsclient1" "redir6" "$expect"; then
+ ret=1
+fi
+
+if [ $ret -eq 0 ];then
+ echo "PASS: icmp redirects had RELATED state"
+else
+ echo "ERROR: icmp redirect RELATED state test has failed"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh b/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh
new file mode 100755
index 0000000000..9832a5d019
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh
@@ -0,0 +1,191 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+# Conntrack needs to reassemble fragments in order to have complete
+# packets for rule matching. Reassembly can lead to packet loss.
+
+# Consider the following setup:
+# +--------+ +---------+ +--------+
+# |Router A|-------|Wanrouter|-------|Router B|
+# | |.IPIP..| |..IPIP.| |
+# +--------+ +---------+ +--------+
+# / mtu 1400 \
+# / \
+#+--------+ +--------+
+#|Client A| |Client B|
+#| | | |
+#+--------+ +--------+
+
+# Router A and Router B use IPIP tunnel interfaces to tunnel traffic
+# between Client A and Client B over WAN. Wanrouter has MTU 1400 set
+# on its interfaces.
+
+rx=$(mktemp)
+
+checktool "iptables --version" "run test without iptables"
+checktool "socat -h" "run test without socat"
+
+setup_ns r_a r_b r_w c_a c_b
+
+cleanup() {
+ cleanup_all_ns
+ rm -f "$rx"
+}
+
+trap cleanup EXIT
+
+listener_ready()
+{
+ ns="$1"
+ port="$2"
+ ss -N "$ns" -lnu -o "sport = :$port" | grep -q "$port"
+}
+
+test_path() {
+ msg="$1"
+
+ ip netns exec "$c_b" socat -t 3 - udp4-listen:5000,reuseaddr > "$rx" < /dev/null &
+
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$c_b" 5000
+
+ for i in 1 2 3; do
+ head -c1400 /dev/zero | tr "\000" "a" | \
+ ip netns exec "$c_a" socat -t 1 -u STDIN UDP:192.168.20.2:5000
+ done
+
+ wait
+
+ bytes=$(wc -c < "$rx")
+
+ if [ "$bytes" -eq 1400 ];then
+ echo "OK: PMTU $msg connection tracking"
+ else
+ echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400"
+ exit 1
+ fi
+}
+
+# Detailed setup for Router A
+# ---------------------------
+# Interfaces:
+# eth0: 10.2.2.1/24
+# eth1: 192.168.10.1/24
+# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1
+# Routes:
+# 192.168.20.0/24 dev ipip0 (192.168.20.0/24 is subnet of Client B)
+# 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter)
+# No iptables rules at all.
+
+ip link add veth0 netns "$r_a" type veth peer name veth0 netns "$r_w"
+ip link add veth1 netns "$r_a" type veth peer name veth0 netns "$c_a"
+
+l_addr="10.2.2.1"
+r_addr="10.4.4.1"
+ip netns exec "$r_a" ip link add ipip0 type ipip local "$l_addr" remote "$r_addr" mode ipip || exit $ksft_skip
+
+for dev in lo veth0 veth1 ipip0; do
+ ip -net "$r_a" link set "$dev" up
+done
+
+ip -net "$r_a" addr add 10.2.2.1/24 dev veth0
+ip -net "$r_a" addr add 192.168.10.1/24 dev veth1
+
+ip -net "$r_a" route add 192.168.20.0/24 dev ipip0
+ip -net "$r_a" route add 10.4.4.0/24 via 10.2.2.254
+
+ip netns exec "$r_a" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Detailed setup for Router B
+# ---------------------------
+# Interfaces:
+# eth0: 10.4.4.1/24
+# eth1: 192.168.20.1/24
+# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1
+# Routes:
+# 192.168.10.0/24 dev ipip0 (192.168.10.0/24 is subnet of Client A)
+# 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter)
+# No iptables rules at all.
+
+ip link add veth0 netns "$r_b" type veth peer name veth1 netns "$r_w"
+ip link add veth1 netns "$r_b" type veth peer name veth0 netns "$c_b"
+
+l_addr="10.4.4.1"
+r_addr="10.2.2.1"
+
+ip netns exec "$r_b" ip link add ipip0 type ipip local "${l_addr}" remote "${r_addr}" mode ipip || exit $ksft_skip
+
+for dev in veth0 veth1 ipip0; do
+ ip -net "$r_b" link set $dev up
+done
+
+ip -net "$r_b" addr add 10.4.4.1/24 dev veth0
+ip -net "$r_b" addr add 192.168.20.1/24 dev veth1
+
+ip -net "$r_b" route add 192.168.10.0/24 dev ipip0
+ip -net "$r_b" route add 10.2.2.0/24 via 10.4.4.254
+ip netns exec "$r_b" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Client A
+ip -net "$c_a" addr add 192.168.10.2/24 dev veth0
+ip -net "$c_a" link set dev veth0 up
+ip -net "$c_a" route add default via 192.168.10.1
+
+# Client A
+ip -net "$c_b" addr add 192.168.20.2/24 dev veth0
+ip -net "$c_b" link set dev veth0 up
+ip -net "$c_b" route add default via 192.168.20.1
+
+# Wan
+ip -net "$r_w" addr add 10.2.2.254/24 dev veth0
+ip -net "$r_w" addr add 10.4.4.254/24 dev veth1
+
+ip -net "$r_w" link set dev veth0 up mtu 1400
+ip -net "$r_w" link set dev veth1 up mtu 1400
+
+ip -net "$r_a" link set dev veth0 mtu 1400
+ip -net "$r_b" link set dev veth0 mtu 1400
+
+ip netns exec "$r_w" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Path MTU discovery
+# ------------------
+# Running tracepath from Client A to Client B shows PMTU discovery is working
+# as expected:
+#
+# clienta:~# tracepath 192.168.20.2
+# 1?: [LOCALHOST] pmtu 1500
+# 1: 192.168.10.1 0.867ms
+# 1: 192.168.10.1 0.302ms
+# 2: 192.168.10.1 0.312ms pmtu 1480
+# 2: no reply
+# 3: 192.168.10.1 0.510ms pmtu 1380
+# 3: 192.168.20.2 2.320ms reached
+# Resume: pmtu 1380 hops 3 back 3
+
+# ip netns exec ${c_a} traceroute --mtu 192.168.20.2
+
+# Router A has learned PMTU (1400) to Router B from Wanrouter.
+# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B
+# from Router A.
+
+#Send large UDP packet
+#---------------------
+#Now we send a 1400 bytes UDP packet from Client A to Client B:
+
+# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | socat -u STDIN UDP:192.168.20.2:5000
+test_path "without"
+
+# The IPv4 stack on Client A already knows the PMTU to Client B, so the
+# UDP packet is sent as two fragments (1380 + 20). Router A forwards the
+# fragments between eth1 and ipip0. The fragments fit into the tunnel and
+# reach their destination.
+
+#When sending the large UDP packet again, Router A now reassembles the
+#fragments before routing the packet over ipip0. The resulting IPIP
+#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is
+#dropped on Router A before sending.
+
+ip netns exec "$r_a" iptables -A FORWARD -m conntrack --ctstate NEW
+test_path "with"
diff --git a/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh
new file mode 100755
index 0000000000..d860f7d974
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing For SCTP COLLISION SCENARIO as Below:
+#
+# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359]
+# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187]
+# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359]
+# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO]
+# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK]
+# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970]
+#
+# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS
+
+source lib.sh
+
+CLIENT_IP="198.51.200.1"
+CLIENT_PORT=1234
+
+SERVER_IP="198.51.100.1"
+SERVER_PORT=1234
+
+CLIENT_GW="198.51.200.2"
+SERVER_GW="198.51.100.2"
+
+# setup the topo
+setup() {
+ setup_ns CLIENT_NS SERVER_NS ROUTER_NS
+ ip -n "$SERVER_NS" link add link0 type veth peer name link1 netns "$ROUTER_NS"
+ ip -n "$CLIENT_NS" link add link3 type veth peer name link2 netns "$ROUTER_NS"
+
+ ip -n "$SERVER_NS" link set link0 up
+ ip -n "$SERVER_NS" addr add $SERVER_IP/24 dev link0
+ ip -n "$SERVER_NS" route add $CLIENT_IP dev link0 via $SERVER_GW
+
+ ip -n "$ROUTER_NS" link set link1 up
+ ip -n "$ROUTER_NS" link set link2 up
+ ip -n "$ROUTER_NS" addr add $SERVER_GW/24 dev link1
+ ip -n "$ROUTER_NS" addr add $CLIENT_GW/24 dev link2
+ ip net exec "$ROUTER_NS" sysctl -wq net.ipv4.ip_forward=1
+
+ ip -n "$CLIENT_NS" link set link3 up
+ ip -n "$CLIENT_NS" addr add $CLIENT_IP/24 dev link3
+ ip -n "$CLIENT_NS" route add $SERVER_IP dev link3 via $CLIENT_GW
+
+ # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with
+ # tc on $SERVER_NS side
+ tc -n "$SERVER_NS" qdisc add dev link0 root handle 1: htb r2q 64
+ tc -n "$SERVER_NS" class add dev link0 parent 1: classid 1:1 htb rate 100mbit
+ tc -n "$SERVER_NS" filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \
+ 0xff match u8 2 0xff at 32 flowid 1:1
+ if ! tc -n "$SERVER_NS" qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms; then
+ echo "SKIP: Cannot add netem qdisc"
+ exit $ksft_skip
+ fi
+
+ # simulate the ctstate check on OVS nf_conntrack
+ ip net exec "$ROUTER_NS" iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP
+ ip net exec "$ROUTER_NS" iptables -A INPUT -p sctp -j DROP
+
+ # use a smaller number for assoc's max_retrans to reproduce the issue
+ modprobe -q sctp
+ ip net exec "$CLIENT_NS" sysctl -wq net.sctp.association_max_retrans=3
+}
+
+cleanup() {
+ ip net exec "$CLIENT_NS" pkill sctp_collision >/dev/null 2>&1
+ ip net exec "$SERVER_NS" pkill sctp_collision >/dev/null 2>&1
+ cleanup_all_ns
+}
+
+do_test() {
+ ip net exec "$SERVER_NS" ./sctp_collision server \
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT &
+ ip net exec "$CLIENT_NS" ./sctp_collision client \
+ $CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT
+}
+
+# NOTE: one way to work around the issue is set a smaller hb_interval
+# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500
+
+# run the test case
+trap cleanup EXIT
+setup && \
+echo "Test for SCTP Collision in nf_conntrack:" && \
+do_test && echo "PASS!"
+exit $?
diff --git a/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh
new file mode 100755
index 0000000000..121ea93c01
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that UNREPLIED tcp conntrack will eventually timeout.
+#
+
+source lib.sh
+
+if ! nft --version > /dev/null 2>&1;then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+if ! conntrack --version > /dev/null 2>&1;then
+ echo "SKIP: Could not run test without conntrack tool"
+ exit $ksft_skip
+fi
+
+ret=0
+
+cleanup() {
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
+
+ cleanup_all_ns
+}
+
+ipv4() {
+ echo -n 192.168."$1".2
+}
+
+check_counter()
+{
+ ns=$1
+ name=$2
+ expect=$3
+ local lret=0
+
+ if ! ip netns exec "$ns2" nft list counter inet filter "$name" | grep -q "$expect"; then
+ echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2
+ ip netns exec "$ns2" nft list counter inet filter "$name" 1>&2
+ lret=1
+ fi
+
+ return $lret
+}
+
+trap cleanup EXIT
+
+# Create test namespaces
+setup_ns ns1 ns2
+
+# Connect the namespace to the host using a veth pair
+ip -net "$ns1" link add name veth1 type veth peer name veth2
+ip -net "$ns1" link set netns "$ns2" dev veth2
+
+ip -net "$ns1" link set up dev lo
+ip -net "$ns2" link set up dev lo
+ip -net "$ns1" link set up dev veth1
+ip -net "$ns2" link set up dev veth2
+
+ip -net "$ns2" addr add 10.11.11.2/24 dev veth2
+ip -net "$ns2" route add default via 10.11.11.1
+
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.veth2.forwarding=1
+
+# add a rule inside NS so we enable conntrack
+ip netns exec "$ns1" nft -f - <<EOF
+table inet filter {
+ chain input {
+ type filter hook input priority 0; policy accept;
+ ct state established accept
+ }
+}
+EOF
+
+ip -net "$ns1" addr add 10.11.11.1/24 dev veth1
+ip -net "$ns1" route add 10.99.99.99 via 10.11.11.2
+
+# Check connectivity works
+ip netns exec "$ns1" ping -q -c 2 10.11.11.2 >/dev/null || exit 1
+
+ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT &
+
+ip netns exec "$ns2" nft -f - <<EOF
+table inet filter {
+ counter connreq { }
+ counter redir { }
+ chain input {
+ type filter hook input priority 0; policy accept;
+ ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept
+ ct state new ct status dnat tcp dport 8080 counter name "redir" accept
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "ERROR: Could not load nft rules"
+ exit 1
+fi
+
+ip netns exec "$ns2" sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10
+
+echo "INFO: connect $ns1 -> $ns2 to the virtual ip"
+ip netns exec "$ns1" bash -c 'for i in $(seq 1 $BUSYWAIT_TIMEOUT) ; do
+ socat -u STDIN TCP:10.99.99.99:80 < /dev/null
+ sleep 0.1
+ done' &
+
+wait_for_attempt()
+{
+ count=$(ip netns exec "$ns2" conntrack -L -p tcp --dport 80 2>/dev/null | wc -l)
+ if [ "$count" -gt 0 ]; then
+ return 0
+ fi
+
+ return 1
+}
+
+# wait for conntrack to pick the new connection request up before loading
+# the nat redirect rule.
+if ! busywait "$BUSYWAIT_TIMEOUT" wait_for_attempt; then
+ echo "ERROR: $ns2 did not pick up tcp connection from peer"
+ exit 1
+fi
+
+ip netns exec "$ns2" nft -f - <<EOF
+table inet nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ ip daddr 10.99.99.99 tcp dport 80 redirect to :8080
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "ERROR: Could not load nat redirect"
+ exit 1
+fi
+
+wait_for_redirect()
+{
+ count=$(ip netns exec "$ns2" conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l)
+ if [ "$count" -gt 0 ]; then
+ return 0
+ fi
+
+ return 1
+}
+echo "INFO: NAT redirect added in ns $ns2, waiting for $BUSYWAIT_TIMEOUT ms for nat to take effect"
+
+busywait "$BUSYWAIT_TIMEOUT" wait_for_redirect
+ret=$?
+
+expect="packets 1 bytes 60"
+if ! check_counter "$ns2" "redir" "$expect"; then
+ ret=1
+fi
+
+if [ $ret -eq 0 ];then
+ echo "PASS: redirection counter has expected values"
+else
+ echo "ERROR: no tcp connection was redirected"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
new file mode 100755
index 0000000000..073e8e62d3
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
@@ -0,0 +1,220 @@
+#!/bin/bash
+
+# This script demonstrates interaction of conntrack and vrf.
+# The vrf driver calls the netfilter hooks again, with oif/iif
+# pointing at the VRF device.
+#
+# For ingress, this means first iteration has iifname of lower/real
+# device. In this script, thats veth0.
+# Second iteration is iifname set to vrf device, tvrf in this script.
+#
+# For egress, this is reversed: first iteration has the vrf device,
+# second iteration is done with the lower/real/veth0 device.
+#
+# test_ct_zone_in demonstrates unexpected change of nftables
+# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
+# connection on VRF rcv"
+#
+# It was possible to assign conntrack zone to a packet (or mark it for
+# `notracking`) in the prerouting chain before conntrack, based on real iif.
+#
+# After the change, the zone assignment is lost and the zone is assigned based
+# on the VRF master interface (in case such a rule exists).
+# assignment is lost. Instead, assignment based on the `iif` matching
+# Thus it is impossible to distinguish packets based on the original
+# interface.
+#
+# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
+# that was supposed to be fixed by the commit mentioned above to make sure
+# that any fix to test case 1 won't break masquerade again.
+
+source lib.sh
+
+IP0=172.30.30.1
+IP1=172.30.30.2
+PFXL=30
+ret=0
+
+cleanup()
+{
+ ip netns pids $ns0 | xargs kill 2>/dev/null
+ ip netns pids $ns1 | xargs kill 2>/dev/null
+
+ cleanup_all_ns
+}
+
+checktool "nft --version" "run test without nft"
+checktool "conntrack --version" "run test without conntrack"
+checktool "socat -h" "run test without socat"
+
+trap cleanup EXIT
+
+setup_ns ns0 ns1
+
+ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0
+ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
+
+if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then
+ echo "SKIP: Could not add veth device"
+ exit $ksft_skip
+fi
+
+if ! ip -net "$ns0" li add tvrf type vrf table 9876; then
+ echo "SKIP: Could not add vrf device"
+ exit $ksft_skip
+fi
+
+ip -net "$ns0" li set veth0 master tvrf
+ip -net "$ns0" li set tvrf up
+ip -net "$ns0" li set veth0 up
+ip -net "$ns1" li set veth0 up
+
+ip -net "$ns0" addr add $IP0/$PFXL dev veth0
+ip -net "$ns1" addr add $IP1/$PFXL dev veth0
+
+listener_ready()
+{
+ local ns="$1"
+
+ ss -N "$ns" -l -n -t -o "sport = :55555" | grep -q "55555"
+}
+
+ip netns exec "$ns1" socat -u -4 TCP-LISTEN:55555,reuseaddr,fork STDOUT > /dev/null &
+busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1"
+
+# test vrf ingress handling.
+# The incoming connection should be placed in conntrack zone 1,
+# as decided by the first iteration of the ruleset.
+test_ct_zone_in()
+{
+ip netns exec "$ns0" nft -f - <<EOF
+table testct {
+ chain rawpre {
+ type filter hook prerouting priority raw;
+
+ iif { veth0, tvrf } counter meta nftrace set 1
+ iif veth0 counter ct zone set 1 counter return
+ iif tvrf counter ct zone set 2 counter return
+ ip protocol icmp counter
+ notrack counter
+ }
+
+ chain rawout {
+ type filter hook output priority raw;
+
+ oif veth0 counter ct zone set 1 counter return
+ oif tvrf counter ct zone set 2 counter return
+ notrack counter
+ }
+}
+EOF
+ ip netns exec "$ns1" ping -W 1 -c 1 -I veth0 "$IP0" > /dev/null
+
+ # should be in zone 1, not zone 2
+ count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
+ if [ "$count" -eq 1 ]; then
+ echo "PASS: entry found in conntrack zone 1"
+ else
+ echo "FAIL: entry not found in conntrack zone 1"
+ count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
+ if [ "$count" -eq 1 ]; then
+ echo "FAIL: entry found in zone 2 instead"
+ else
+ echo "FAIL: entry not in zone 1 or 2, dumping table"
+ ip netns exec "$ns0" conntrack -L
+ ip netns exec "$ns0" nft list ruleset
+ fi
+ fi
+}
+
+# add masq rule that gets evaluated w. outif set to vrf device.
+# This tests the first iteration of the packet through conntrack,
+# oifname is the vrf device.
+test_masquerade_vrf()
+{
+ local qdisc=$1
+
+ if [ "$qdisc" != "default" ]; then
+ tc -net "$ns0" qdisc add dev tvrf root "$qdisc"
+ fi
+
+ ip netns exec "$ns0" conntrack -F 2>/dev/null
+
+ip netns exec "$ns0" nft -f - <<EOF
+flush ruleset
+table ip nat {
+ chain rawout {
+ type filter hook output priority raw;
+
+ oif tvrf ct state untracked counter
+ }
+ chain postrouting2 {
+ type filter hook postrouting priority mangle;
+
+ oif tvrf ct state untracked counter
+ }
+ chain postrouting {
+ type nat hook postrouting priority 0;
+ # NB: masquerade should always be combined with 'oif(name) bla',
+ # lack of this is intentional here, we want to exercise double-snat.
+ ip saddr 172.30.30.0/30 counter masquerade random
+ }
+}
+EOF
+ if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then
+ echo "FAIL: connect failure with masquerade + sport rewrite on vrf device"
+ ret=1
+ return
+ fi
+
+ # must also check that nat table was evaluated on second (lower device) iteration.
+ if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1' &&
+ ip netns exec "$ns0" nft list table ip nat |grep -q 'untracked counter packets [1-9]'; then
+ echo "PASS: connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
+ else
+ echo "FAIL: vrf rules have unexpected counter value"
+ ret=1
+ fi
+
+ if [ "$qdisc" != "default" ]; then
+ tc -net "$ns0" qdisc del dev tvrf root
+ fi
+}
+
+# add masq rule that gets evaluated w. outif set to veth device.
+# This tests the 2nd iteration of the packet through conntrack,
+# oifname is the lower device (veth0 in this case).
+test_masquerade_veth()
+{
+ ip netns exec "$ns0" conntrack -F 2>/dev/null
+ip netns exec "$ns0" nft -f - <<EOF
+flush ruleset
+table ip nat {
+ chain postrouting {
+ type nat hook postrouting priority 0;
+ meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
+ }
+}
+EOF
+ if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then
+ echo "FAIL: connect failure with masquerade + sport rewrite on veth device"
+ ret=1
+ return
+ fi
+
+ # must also check that nat table was evaluated on second (lower device) iteration.
+ if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1'; then
+ echo "PASS: connect with masquerade + sport rewrite on veth device"
+ else
+ echo "FAIL: vrf masq rule has unexpected counter value"
+ ret=1
+ fi
+}
+
+test_ct_zone_in
+test_masquerade_vrf "default"
+test_masquerade_vrf "pfifo"
+test_masquerade_veth
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh
new file mode 100755
index 0000000000..4ceee9fb39
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/ipvs.sh
@@ -0,0 +1,211 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# End-to-end ipvs test suite
+# Topology:
+#--------------------------------------------------------------+
+# | |
+# ns0 | ns1 |
+# ----------- | ----------- ----------- |
+# | veth01 | --------- | veth10 | | veth12 | |
+# ----------- peer ----------- ----------- |
+# | | | |
+# ----------- | | |
+# | br0 | |----------------- peer |--------------|
+# ----------- | | |
+# | | | |
+# ---------- peer ---------- ----------- |
+# | veth02 | --------- | veth20 | | veth21 | |
+# ---------- | ---------- ----------- |
+# | ns2 |
+# | |
+#--------------------------------------------------------------+
+#
+# We assume that all network driver are loaded
+#
+
+source lib.sh
+
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+readonly port=8080
+
+readonly vip_v4=207.175.44.110
+readonly cip_v4=10.0.0.2
+readonly gip_v4=10.0.0.1
+readonly dip_v4=172.16.0.1
+readonly rip_v4=172.16.0.2
+readonly sip_v4=10.0.0.3
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+readonly datalen=32
+
+sysipvsnet="/proc/sys/net/ipv4/vs/"
+if [ ! -d $sysipvsnet ]; then
+ if ! modprobe -q ip_vs; then
+ echo "skip: could not run test without ipvs module"
+ exit $ksft_skip
+ fi
+fi
+
+checktool "ipvsadm -v" "run test without ipvsadm"
+checktool "socat -h" "run test without socat"
+
+setup() {
+ setup_ns ns0 ns1 ns2
+
+ ip link add veth01 netns "${ns0}" type veth peer name veth10 netns "${ns1}"
+ ip link add veth02 netns "${ns0}" type veth peer name veth20 netns "${ns2}"
+ ip link add veth12 netns "${ns1}" type veth peer name veth21 netns "${ns2}"
+
+ ip netns exec "${ns0}" ip link set veth01 up
+ ip netns exec "${ns0}" ip link set veth02 up
+ ip netns exec "${ns0}" ip link add br0 type bridge
+ ip netns exec "${ns0}" ip link set veth01 master br0
+ ip netns exec "${ns0}" ip link set veth02 master br0
+ ip netns exec "${ns0}" ip link set br0 up
+ ip netns exec "${ns0}" ip addr add "${cip_v4}/24" dev br0
+
+ ip netns exec "${ns1}" ip link set veth10 up
+ ip netns exec "${ns1}" ip addr add "${gip_v4}/24" dev veth10
+ ip netns exec "${ns1}" ip link set veth12 up
+ ip netns exec "${ns1}" ip addr add "${dip_v4}/24" dev veth12
+
+ ip netns exec "${ns2}" ip link set veth21 up
+ ip netns exec "${ns2}" ip addr add "${rip_v4}/24" dev veth21
+ ip netns exec "${ns2}" ip link set veth20 up
+ ip netns exec "${ns2}" ip addr add "${sip_v4}/24" dev veth20
+
+ sleep 1
+
+ dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
+}
+
+cleanup() {
+ cleanup_all_ns
+
+ if [ -f "${outfile}" ]; then
+ rm "${outfile}"
+ fi
+ if [ -f "${infile}" ]; then
+ rm "${infile}"
+ fi
+}
+
+server_listen() {
+ ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT > "${outfile}" &
+ server_pid=$!
+ sleep 0.2
+}
+
+client_connect() {
+ ip netns exec "${ns0}" timeout 2 socat -u -4 STDIN TCP:"${vip_v4}":"${port}" < "${infile}"
+}
+
+verify_data() {
+ wait "${server_pid}"
+ cmp "$infile" "$outfile" 2>/dev/null
+}
+
+test_service() {
+ server_listen
+ client_connect
+ verify_data
+}
+
+
+test_dr() {
+ ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
+
+ ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr
+ ip netns exec "${ns1}" ipvsadm -a -t "${vip_v4}:${port}" -r "${rip_v4}:${port}"
+ ip netns exec "${ns1}" ip addr add "${vip_v4}/32" dev lo:1
+
+ # avoid incorrect arp response
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
+ # avoid reverse route lookup
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+ ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
+
+ test_service
+}
+
+test_nat() {
+ ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
+
+ ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr
+ ip netns exec "${ns1}" ipvsadm -a -m -t "${vip_v4}:${port}" -r "${rip_v4}:${port}"
+ ip netns exec "${ns1}" ip addr add "${vip_v4}/32" dev lo:1
+
+ ip netns exec "${ns2}" ip link del veth20
+ ip netns exec "${ns2}" ip route add default via "${dip_v4}" dev veth21
+
+ test_service
+}
+
+test_tun() {
+ ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
+
+ ip netns exec "${ns1}" modprobe -q ipip
+ ip netns exec "${ns1}" ip link set tunl0 up
+ ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0
+ ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0
+ ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.default.send_redirects=0
+ ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr
+ ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port}
+ ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1
+
+ ip netns exec "${ns2}" modprobe -q ipip
+ ip netns exec "${ns2}" ip link set tunl0 up
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+ ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
+
+ test_service
+}
+
+run_tests() {
+ local errors=
+
+ echo "Testing DR mode..."
+ cleanup
+ setup
+ test_dr
+ errors=$(( $errors + $? ))
+
+ echo "Testing NAT mode..."
+ cleanup
+ setup
+ test_nat
+ errors=$(( $errors + $? ))
+
+ echo "Testing Tunnel mode..."
+ cleanup
+ setup
+ test_tun
+ errors=$(( $errors + $? ))
+
+ return $errors
+}
+
+trap cleanup EXIT
+
+run_tests
+
+if [ $? -ne 0 ]; then
+ echo -e "$(basename $0): ${RED}FAIL${NC}"
+ exit 1
+fi
+echo -e "$(basename $0): ${GREEN}PASS${NC}"
+exit 0
diff --git a/tools/testing/selftests/net/netfilter/lib.sh b/tools/testing/selftests/net/netfilter/lib.sh
new file mode 100644
index 0000000000..bedd35298e
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/lib.sh
@@ -0,0 +1,10 @@
+net_netfilter_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "$net_netfilter_dir/../lib.sh"
+
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
diff --git a/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh b/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh
new file mode 100755
index 0000000000..c6fdd2079f
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+checktool "conntrack --version" "run test without conntrack"
+checktool "iptables --version" "run test without iptables"
+checktool "ip6tables --version" "run test without ip6tables"
+
+modprobe -q tun
+modprobe -q nf_conntrack
+# echo 1 > /proc/sys/net/netfilter/nf_log_all_netns
+
+PDRILL_TIMEOUT=10
+
+files="
+conntrack_ack_loss_stall.pkt
+conntrack_inexact_rst.pkt
+conntrack_syn_challenge_ack.pkt
+conntrack_synack_old.pkt
+conntrack_synack_reuse.pkt
+conntrack_rst_invalid.pkt
+"
+
+if ! packetdrill --dry_run --verbose "packetdrill/conntrack_ack_loss_stall.pkt";then
+ echo "SKIP: packetdrill not installed"
+ exit ${ksft_skip}
+fi
+
+ret=0
+
+run_packetdrill()
+{
+ filename="$1"
+ ipver="$2"
+ local mtu=1500
+
+ export NFCT_IP_VERSION="$ipver"
+
+ if [ "$ipver" = "ipv4" ];then
+ export xtables="iptables"
+ elif [ "$ipver" = "ipv6" ];then
+ export xtables="ip6tables"
+ mtu=1520
+ fi
+
+ timeout "$PDRILL_TIMEOUT" unshare -n packetdrill --ip_version="$ipver" --mtu=$mtu \
+ --tolerance_usecs=1000000 --non_fatal packet "$filename"
+}
+
+run_one_test_file()
+{
+ filename="$1"
+
+ for v in ipv4 ipv6;do
+ printf "%-50s(%s)%-20s" "$filename" "$v" ""
+ if run_packetdrill packetdrill/"$f" "$v";then
+ echo OK
+ else
+ echo FAIL
+ ret=1
+ fi
+ done
+}
+
+echo "Replaying packetdrill test cases:"
+for f in $files;do
+ run_one_test_file packetdrill/"$f"
+done
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
new file mode 100755
index 0000000000..1014551dd7
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test NAT source port clash resolution
+#
+
+source lib.sh
+ret=0
+socatpid=0
+
+cleanup()
+{
+ [ "$socatpid" -gt 0 ] && kill "$socatpid"
+
+ cleanup_all_ns
+}
+
+checktool "socat -h" "run test without socat"
+checktool "iptables --version" "run test without iptables"
+
+trap cleanup EXIT
+
+setup_ns ns1 ns2
+
+# Connect the namespaces using a veth pair
+ip link add name veth2 type veth peer name veth1
+ip link set netns "$ns1" dev veth1
+ip link set netns "$ns2" dev veth2
+
+ip netns exec "$ns1" ip link set up dev lo
+ip netns exec "$ns1" ip link set up dev veth1
+ip netns exec "$ns1" ip addr add 192.168.1.1/24 dev veth1
+
+ip netns exec "$ns2" ip link set up dev lo
+ip netns exec "$ns2" ip link set up dev veth2
+ip netns exec "$ns2" ip addr add 192.168.1.2/24 dev veth2
+
+# Create a server in one namespace
+ip netns exec "$ns1" socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 &
+socatpid=$!
+
+# Restrict source port to just one so we don't have to exhaust
+# all others.
+ip netns exec "$ns2" sysctl -q net.ipv4.ip_local_port_range="10000 10000"
+
+# add a virtual IP using DNAT
+ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
+
+# ... and route it to the other namespace
+ip netns exec "$ns2" ip route add 10.96.0.1 via 192.168.1.1
+
+# add a persistent connection from the other namespace
+ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
+
+sleep 1
+
+# ip daddr:dport will be rewritten to 192.168.1.1 5201
+# NAT must reallocate source port 10000 because
+# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
+echo test | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null
+ret=$?
+
+# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
+if [ $ret -eq 0 ]; then
+ echo "PASS: socat can connect via NAT'd address"
+else
+ echo "FAIL: socat cannot connect via NAT'd address"
+fi
+
+# check sport clashres.
+ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
+ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
+
+sleep 5 | ip netns exec "$ns2" socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+
+# if connect succeeds, client closes instantly due to EOF on stdin.
+# if connect hangs, it will time out after 5s.
+echo | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
+cpid2=$!
+
+time_then=$(date +%s)
+wait $cpid2
+rv=$?
+time_now=$(date +%s)
+
+# Check how much time has elapsed, expectation is for
+# 'cpid2' to connect and then exit (and no connect delay).
+delta=$((time_now - time_then))
+
+if [ $delta -lt 2 ] && [ $rv -eq 0 ]; then
+ echo "PASS: could connect to service via redirected ports"
+else
+ echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
+ ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nf_queue.c b/tools/testing/selftests/net/netfilter/nf_queue.c
new file mode 100644
index 0000000000..9e56b9d470
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nf_queue.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <time.h>
+#include <arpa/inet.h>
+
+#include <libmnl/libmnl.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_queue.h>
+
+struct options {
+ bool count_packets;
+ bool gso_enabled;
+ int verbose;
+ unsigned int queue_num;
+ unsigned int timeout;
+ uint32_t verdict;
+ uint32_t delay_ms;
+};
+
+static unsigned int queue_stats[5];
+static struct options opts;
+
+static void help(const char *p)
+{
+ printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
+}
+
+static int parse_attr_cb(const struct nlattr *attr, void *data)
+{
+ const struct nlattr **tb = data;
+ int type = mnl_attr_get_type(attr);
+
+ /* skip unsupported attribute in user-space */
+ if (mnl_attr_type_valid(attr, NFQA_MAX) < 0)
+ return MNL_CB_OK;
+
+ switch (type) {
+ case NFQA_MARK:
+ case NFQA_IFINDEX_INDEV:
+ case NFQA_IFINDEX_OUTDEV:
+ case NFQA_IFINDEX_PHYSINDEV:
+ case NFQA_IFINDEX_PHYSOUTDEV:
+ if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) {
+ perror("mnl_attr_validate");
+ return MNL_CB_ERROR;
+ }
+ break;
+ case NFQA_TIMESTAMP:
+ if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
+ sizeof(struct nfqnl_msg_packet_timestamp)) < 0) {
+ perror("mnl_attr_validate2");
+ return MNL_CB_ERROR;
+ }
+ break;
+ case NFQA_HWADDR:
+ if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
+ sizeof(struct nfqnl_msg_packet_hw)) < 0) {
+ perror("mnl_attr_validate2");
+ return MNL_CB_ERROR;
+ }
+ break;
+ case NFQA_PAYLOAD:
+ break;
+ }
+ tb[type] = attr;
+ return MNL_CB_OK;
+}
+
+static int queue_cb(const struct nlmsghdr *nlh, void *data)
+{
+ struct nlattr *tb[NFQA_MAX+1] = { 0 };
+ struct nfqnl_msg_packet_hdr *ph = NULL;
+ uint32_t id = 0;
+
+ (void)data;
+
+ mnl_attr_parse(nlh, sizeof(struct nfgenmsg), parse_attr_cb, tb);
+ if (tb[NFQA_PACKET_HDR]) {
+ ph = mnl_attr_get_payload(tb[NFQA_PACKET_HDR]);
+ id = ntohl(ph->packet_id);
+
+ if (opts.verbose > 0)
+ printf("packet hook=%u, hwproto 0x%x",
+ ntohs(ph->hw_protocol), ph->hook);
+
+ if (ph->hook >= 5) {
+ fprintf(stderr, "Unknown hook %d\n", ph->hook);
+ return MNL_CB_ERROR;
+ }
+
+ if (opts.verbose > 0) {
+ uint32_t skbinfo = 0;
+
+ if (tb[NFQA_SKB_INFO])
+ skbinfo = ntohl(mnl_attr_get_u32(tb[NFQA_SKB_INFO]));
+ if (skbinfo & NFQA_SKB_CSUMNOTREADY)
+ printf(" csumnotready");
+ if (skbinfo & NFQA_SKB_GSO)
+ printf(" gso");
+ if (skbinfo & NFQA_SKB_CSUM_NOTVERIFIED)
+ printf(" csumnotverified");
+ puts("");
+ }
+
+ if (opts.count_packets)
+ queue_stats[ph->hook]++;
+ }
+
+ return MNL_CB_OK + id;
+}
+
+static struct nlmsghdr *
+nfq_build_cfg_request(char *buf, uint8_t command, int queue_num)
+{
+ struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf);
+ struct nfqnl_msg_config_cmd cmd = {
+ .command = command,
+ .pf = htons(AF_INET),
+ };
+ struct nfgenmsg *nfg;
+
+ nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+
+ nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+
+ nfg->nfgen_family = AF_UNSPEC;
+ nfg->version = NFNETLINK_V0;
+ nfg->res_id = htons(queue_num);
+
+ mnl_attr_put(nlh, NFQA_CFG_CMD, sizeof(cmd), &cmd);
+
+ return nlh;
+}
+
+static struct nlmsghdr *
+nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
+{
+ struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf);
+ struct nfqnl_msg_config_params params = {
+ .copy_range = htonl(range),
+ .copy_mode = mode,
+ };
+ struct nfgenmsg *nfg;
+
+ nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+
+ nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+ nfg->nfgen_family = AF_UNSPEC;
+ nfg->version = NFNETLINK_V0;
+ nfg->res_id = htons(queue_num);
+
+ mnl_attr_put(nlh, NFQA_CFG_PARAMS, sizeof(params), &params);
+
+ return nlh;
+}
+
+static struct nlmsghdr *
+nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd)
+{
+ struct nfqnl_msg_verdict_hdr vh = {
+ .verdict = htonl(verd),
+ .id = htonl(id),
+ };
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfg;
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_VERDICT;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+ nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+ nfg->nfgen_family = AF_UNSPEC;
+ nfg->version = NFNETLINK_V0;
+ nfg->res_id = htons(queue_num);
+
+ mnl_attr_put(nlh, NFQA_VERDICT_HDR, sizeof(vh), &vh);
+
+ return nlh;
+}
+
+static void print_stats(void)
+{
+ unsigned int last, total;
+ int i;
+
+ total = 0;
+ last = queue_stats[0];
+
+ for (i = 0; i < 5; i++) {
+ printf("hook %d packets %08u\n", i, queue_stats[i]);
+ last = queue_stats[i];
+ total += last;
+ }
+
+ printf("%u packets total\n", total);
+}
+
+struct mnl_socket *open_queue(void)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ unsigned int queue_num;
+ struct mnl_socket *nl;
+ struct nlmsghdr *nlh;
+ struct timeval tv;
+ uint32_t flags;
+
+ nl = mnl_socket_open(NETLINK_NETFILTER);
+ if (nl == NULL) {
+ perror("mnl_socket_open");
+ exit(EXIT_FAILURE);
+ }
+
+ if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) {
+ perror("mnl_socket_bind");
+ exit(EXIT_FAILURE);
+ }
+
+ queue_num = opts.queue_num;
+ nlh = nfq_build_cfg_request(buf, NFQNL_CFG_CMD_BIND, queue_num);
+
+ if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+ perror("mnl_socket_sendto");
+ exit(EXIT_FAILURE);
+ }
+
+ nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
+
+ flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0;
+ flags |= NFQA_CFG_F_UID_GID;
+ mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
+ mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
+
+ if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+ perror("mnl_socket_sendto");
+ exit(EXIT_FAILURE);
+ }
+
+ memset(&tv, 0, sizeof(tv));
+ tv.tv_sec = opts.timeout;
+ if (opts.timeout && setsockopt(mnl_socket_get_fd(nl),
+ SOL_SOCKET, SO_RCVTIMEO,
+ &tv, sizeof(tv))) {
+ perror("setsockopt(SO_RCVTIMEO)");
+ exit(EXIT_FAILURE);
+ }
+
+ return nl;
+}
+
+static void sleep_ms(uint32_t delay)
+{
+ struct timespec ts = { .tv_sec = delay / 1000 };
+
+ delay %= 1000;
+
+ ts.tv_nsec = delay * 1000llu * 1000llu;
+
+ nanosleep(&ts, NULL);
+}
+
+static int mainloop(void)
+{
+ unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
+ struct mnl_socket *nl;
+ struct nlmsghdr *nlh;
+ unsigned int portid;
+ char *buf;
+ int ret;
+
+ buf = malloc(buflen);
+ if (!buf) {
+ perror("malloc");
+ exit(EXIT_FAILURE);
+ }
+
+ nl = open_queue();
+ portid = mnl_socket_get_portid(nl);
+
+ for (;;) {
+ uint32_t id;
+
+ ret = mnl_socket_recvfrom(nl, buf, buflen);
+ if (ret == -1) {
+ if (errno == ENOBUFS || errno == EINTR)
+ continue;
+
+ if (errno == EAGAIN) {
+ errno = 0;
+ ret = 0;
+ break;
+ }
+
+ perror("mnl_socket_recvfrom");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = mnl_cb_run(buf, ret, 0, portid, queue_cb, NULL);
+ if (ret < 0) {
+ perror("mnl_cb_run");
+ exit(EXIT_FAILURE);
+ }
+
+ id = ret - MNL_CB_OK;
+ if (opts.delay_ms)
+ sleep_ms(opts.delay_ms);
+
+ nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict);
+ if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+ perror("mnl_socket_sendto");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ mnl_socket_close(nl);
+
+ return ret;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) {
+ switch (c) {
+ case 'c':
+ opts.count_packets = true;
+ break;
+ case 'h':
+ help(argv[0]);
+ exit(0);
+ break;
+ case 'q':
+ opts.queue_num = atoi(optarg);
+ if (opts.queue_num > 0xffff)
+ opts.queue_num = 0;
+ break;
+ case 'Q':
+ opts.verdict = atoi(optarg);
+ if (opts.verdict > 0xffff) {
+ fprintf(stderr, "Expected destination queue number\n");
+ exit(1);
+ }
+
+ opts.verdict <<= 16;
+ opts.verdict |= NF_QUEUE;
+ break;
+ case 'd':
+ opts.delay_ms = atoi(optarg);
+ if (opts.delay_ms == 0) {
+ fprintf(stderr, "Expected nonzero delay (in milliseconds)\n");
+ exit(1);
+ }
+ break;
+ case 't':
+ opts.timeout = atoi(optarg);
+ break;
+ case 'G':
+ opts.gso_enabled = false;
+ break;
+ case 'v':
+ opts.verbose++;
+ break;
+ }
+ }
+
+ if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) {
+ fprintf(stderr, "Cannot use same destination and source queue\n");
+ exit(1);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int ret;
+
+ opts.verdict = NF_ACCEPT;
+ opts.gso_enabled = true;
+
+ parse_opts(argc, argv);
+
+ ret = mainloop();
+ if (opts.count_packets)
+ print_stats();
+
+ return ret;
+}
diff --git a/tools/testing/selftests/net/netfilter/nft_audit.sh b/tools/testing/selftests/net/netfilter/nft_audit.sh
new file mode 100755
index 0000000000..902f8114bc
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_audit.sh
@@ -0,0 +1,268 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that audit logs generated for nft commands are as expected.
+
+SKIP_RC=4
+RC=0
+
+if [ -r /var/run/auditd.pid ];then
+ read pid < /var/run/auditd.pid
+ p=$(pgrep ^auditd$)
+
+ if [ "$pid" -eq "$p" ]; then
+ echo "SKIP: auditd is running"
+ exit $SKIP_RC
+ fi
+fi
+
+nft --version >/dev/null 2>&1 || {
+ echo "SKIP: missing nft tool"
+ exit $SKIP_RC
+}
+
+# nft must be recent enough to support "reset" keyword.
+nft --check -f /dev/stdin >/dev/null 2>&1 <<EOF
+add table t
+add chain t c
+reset rules t c
+EOF
+
+if [ "$?" -ne 0 ];then
+ echo -n "SKIP: nft reset feature test failed: "
+ nft --version
+ exit $SKIP_RC
+fi
+
+# Run everything in a separate network namespace
+[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
+
+# give other scripts a chance to finish - audit_logread sees all activity
+sleep 1
+
+logfile=$(mktemp)
+rulefile=$(mktemp)
+echo "logging into $logfile"
+./audit_logread >"$logfile" &
+logread_pid=$!
+trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT
+exec 3<"$logfile"
+
+do_test() { # (cmd, log)
+ echo -n "testing for cmd: $1 ... "
+ cat <&3 >/dev/null
+ $1 >/dev/null || exit 1
+ sleep 0.1
+ res=$(diff -a -u <(echo "$2") - <&3)
+ [ $? -eq 0 ] && { echo "OK"; return; }
+ echo "FAIL"
+ grep -v '^\(---\|+++\|@@\)' <<< "$res"
+ ((RC--))
+}
+
+nft flush ruleset
+
+# adding tables, chains and rules
+
+for table in t1 t2; do
+ do_test "nft add table $table" \
+ "table=$table family=2 entries=1 op=nft_register_table"
+
+ do_test "nft add chain $table c1" \
+ "table=$table family=2 entries=1 op=nft_register_chain"
+
+ do_test "nft add chain $table c2; add chain $table c3" \
+ "table=$table family=2 entries=2 op=nft_register_chain"
+
+ cmd="add rule $table c1 counter"
+
+ do_test "nft $cmd" \
+ "table=$table family=2 entries=1 op=nft_register_rule"
+
+ do_test "nft $cmd; $cmd" \
+ "table=$table family=2 entries=2 op=nft_register_rule"
+
+ cmd=""
+ sep=""
+ for chain in c2 c3; do
+ for i in {1..3}; do
+ cmd+="$sep add rule $table $chain counter"
+ sep=";"
+ done
+ done
+ do_test "nft $cmd" \
+ "table=$table family=2 entries=6 op=nft_register_rule"
+done
+
+for ((i = 0; i < 500; i++)); do
+ echo "add rule t2 c3 counter accept comment \"rule $i\""
+done > "$rulefile"
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=500 op=nft_register_rule'
+
+# adding sets and elements
+
+settype='type inet_service; counter'
+setelem='{ 22, 80, 443 }'
+setblock="{ $settype; elements = $setelem; }"
+do_test "nft add set t1 s $setblock" \
+"table=t1 family=2 entries=4 op=nft_register_set"
+
+do_test "nft add set t1 s2 $setblock; add set t1 s3 { $settype; }" \
+"table=t1 family=2 entries=5 op=nft_register_set"
+
+do_test "nft add element t1 s3 $setelem" \
+"table=t1 family=2 entries=3 op=nft_register_setelem"
+
+# adding counters
+
+do_test 'nft add counter t1 c1' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+do_test 'nft add counter t2 c1; add counter t2 c2' \
+'table=t2 family=2 entries=2 op=nft_register_obj'
+
+for ((i = 3; i <= 500; i++)); do
+ echo "add counter t2 c$i"
+done > "$rulefile"
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=498 op=nft_register_obj'
+
+# adding/updating quotas
+
+do_test 'nft add quota t1 q1 { 10 bytes }' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \
+'table=t2 family=2 entries=2 op=nft_register_obj'
+
+for ((i = 3; i <= 500; i++)); do
+ echo "add quota t2 q$i { 10 bytes }"
+done > "$rulefile"
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=498 op=nft_register_obj'
+
+# changing the quota value triggers obj update path
+do_test 'nft add quota t1 q1 { 20 bytes }' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+# resetting rules
+
+do_test 'nft reset rules t1 c2' \
+'table=t1 family=2 entries=3 op=nft_reset_rule'
+
+do_test 'nft reset rules table t1' \
+'table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule'
+
+do_test 'nft reset rules t2 c3' \
+'table=t2 family=2 entries=189 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=126 op=nft_reset_rule'
+
+do_test 'nft reset rules t2' \
+'table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=186 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=129 op=nft_reset_rule'
+
+do_test 'nft reset rules' \
+'table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=180 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=135 op=nft_reset_rule'
+
+# resetting sets and elements
+
+elem=(22 ",80" ",443")
+relem=""
+for i in {1..3}; do
+ relem+="${elem[((i - 1))]}"
+ do_test "nft reset element t1 s { $relem }" \
+ "table=t1 family=2 entries=$i op=nft_reset_setelem"
+done
+
+do_test 'nft reset set t1 s' \
+'table=t1 family=2 entries=3 op=nft_reset_setelem'
+
+# resetting counters
+
+do_test 'nft reset counter t1 c1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset counters t1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset counters t2' \
+'table=t2 family=2 entries=342 op=nft_reset_obj
+table=t2 family=2 entries=158 op=nft_reset_obj'
+
+do_test 'nft reset counters' \
+'table=t1 family=2 entries=1 op=nft_reset_obj
+table=t2 family=2 entries=341 op=nft_reset_obj
+table=t2 family=2 entries=159 op=nft_reset_obj'
+
+# resetting quotas
+
+do_test 'nft reset quota t1 q1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset quotas t1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset quotas t2' \
+'table=t2 family=2 entries=315 op=nft_reset_obj
+table=t2 family=2 entries=185 op=nft_reset_obj'
+
+do_test 'nft reset quotas' \
+'table=t1 family=2 entries=1 op=nft_reset_obj
+table=t2 family=2 entries=314 op=nft_reset_obj
+table=t2 family=2 entries=186 op=nft_reset_obj'
+
+# deleting rules
+
+readarray -t handles < <(nft -a list chain t1 c1 | \
+ sed -n 's/.*counter.* handle \(.*\)$/\1/p')
+
+do_test "nft delete rule t1 c1 handle ${handles[0]}" \
+'table=t1 family=2 entries=1 op=nft_unregister_rule'
+
+cmd='delete rule t1 c1 handle'
+do_test "nft $cmd ${handles[1]}; $cmd ${handles[2]}" \
+'table=t1 family=2 entries=2 op=nft_unregister_rule'
+
+do_test 'nft flush chain t1 c2' \
+'table=t1 family=2 entries=3 op=nft_unregister_rule'
+
+do_test 'nft flush table t2' \
+'table=t2 family=2 entries=509 op=nft_unregister_rule'
+
+# deleting chains
+
+do_test 'nft delete chain t2 c2' \
+'table=t2 family=2 entries=1 op=nft_unregister_chain'
+
+# deleting sets and elements
+
+do_test 'nft delete element t1 s { 22 }' \
+'table=t1 family=2 entries=1 op=nft_unregister_setelem'
+
+do_test 'nft delete element t1 s { 80, 443 }' \
+'table=t1 family=2 entries=2 op=nft_unregister_setelem'
+
+do_test 'nft flush set t1 s2' \
+'table=t1 family=2 entries=3 op=nft_unregister_setelem'
+
+do_test 'nft delete set t1 s2' \
+'table=t1 family=2 entries=1 op=nft_unregister_set'
+
+do_test 'nft delete set t1 s3' \
+'table=t1 family=2 entries=1 op=nft_unregister_set'
+
+exit $RC
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
new file mode 100755
index 0000000000..6d66240e14
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -0,0 +1,1622 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# nft_concat_range.sh - Tests for sets with concatenation of ranged fields
+#
+# Copyright (c) 2019 Red Hat GmbH
+#
+# Author: Stefano Brivio <sbrivio@redhat.com>
+#
+# shellcheck disable=SC2154,SC2034,SC2016,SC2030,SC2031,SC2317
+# ^ Configuration and templates sourced with eval, counters reused in subshells
+
+source lib.sh
+
+# Available test groups:
+# - reported_issues: check for issues that were reported in the past
+# - correctness: check that packets match given entries, and only those
+# - concurrency: attempt races between insertion, deletion and lookup
+# - timeout: check that packets match entries until they expire
+# - performance: estimate matching rate, compare with rbtree and hash baselines
+TESTS="reported_issues correctness concurrency timeout"
+[ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}"
+
+# Set types, defined by TYPE_ variables below
+TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
+ net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp
+ net6_port_net6_port net_port_mac_proto_net"
+
+# Reported bugs, also described by TYPE_ variables below
+BUGS="flush_remove_add reload"
+
+# List of possible paths to pktgen script from kernel tree for performance tests
+PKTGEN_SCRIPT_PATHS="
+ ../../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+ pktgen/pktgen_bench_xmit_mode_netif_receive.sh"
+
+# Definition of set types:
+# display display text for test report
+# type_spec nftables set type specifier
+# chain_spec nftables type specifier for rules mapping to set
+# dst call sequence of format_*() functions for destination fields
+# src call sequence of format_*() functions for source fields
+# start initial integer used to generate addresses and ports
+# count count of entries to generate and match
+# src_delta number summed to destination generator for source fields
+# tools list of tools for correctness and timeout tests, any can be used
+# proto L4 protocol of test packets
+#
+# race_repeat race attempts per thread, 0 disables concurrency test for type
+# flood_tools list of tools for concurrency tests, any can be used
+# flood_proto L4 protocol of test packets for concurrency tests
+# flood_spec nftables type specifier for concurrency tests
+#
+# perf_duration duration of single pktgen injection test
+# perf_spec nftables type specifier for performance tests
+# perf_dst format_*() functions for destination fields in performance test
+# perf_src format_*() functions for source fields in performance test
+# perf_entries number of set entries for performance test
+# perf_proto L3 protocol of test packets
+TYPE_net_port="
+display net,port
+type_spec ipv4_addr . inet_service
+chain_spec ip daddr . udp dport
+dst addr4 port
+src
+start 1
+count 5
+src_delta 2000
+tools sendip bash
+proto udp
+
+race_repeat 3
+flood_tools iperf3 iperf netperf
+flood_proto udp
+flood_spec ip daddr . udp dport
+
+perf_duration 5
+perf_spec ip daddr . udp dport
+perf_dst addr4 port
+perf_src
+perf_entries 1000
+perf_proto ipv4
+"
+
+TYPE_port_net="
+display port,net
+type_spec inet_service . ipv4_addr
+chain_spec udp dport . ip daddr
+dst port addr4
+src
+start 1
+count 5
+src_delta 2000
+tools sendip socat bash
+proto udp
+
+race_repeat 3
+flood_tools iperf3 iperf netperf
+flood_proto udp
+flood_spec udp dport . ip daddr
+
+perf_duration 5
+perf_spec udp dport . ip daddr
+perf_dst port addr4
+perf_src
+perf_entries 100
+perf_proto ipv4
+"
+
+TYPE_net6_port="
+display net6,port
+type_spec ipv6_addr . inet_service
+chain_spec ip6 daddr . udp dport
+dst addr6 port
+src
+start 10
+count 5
+src_delta 2000
+tools sendip socat bash
+proto udp6
+
+race_repeat 3
+flood_tools iperf3 iperf netperf
+flood_proto tcp6
+flood_spec ip6 daddr . udp dport
+
+perf_duration 5
+perf_spec ip6 daddr . udp dport
+perf_dst addr6 port
+perf_src
+perf_entries 1000
+perf_proto ipv6
+"
+
+TYPE_port_proto="
+display port,proto
+type_spec inet_service . inet_proto
+chain_spec udp dport . meta l4proto
+dst port proto
+src
+start 1
+count 5
+src_delta 2000
+tools sendip socat bash
+proto udp
+
+race_repeat 0
+
+perf_duration 5
+perf_spec udp dport . meta l4proto
+perf_dst port proto
+perf_src
+perf_entries 30000
+perf_proto ipv4
+"
+
+TYPE_net6_port_mac="
+display net6,port,mac
+type_spec ipv6_addr . inet_service . ether_addr
+chain_spec ip6 daddr . udp dport . ether saddr
+dst addr6 port
+src mac
+start 10
+count 5
+src_delta 2000
+tools sendip socat bash
+proto udp6
+
+race_repeat 0
+
+perf_duration 5
+perf_spec ip6 daddr . udp dport . ether daddr
+perf_dst addr6 port mac
+perf_src
+perf_entries 10
+perf_proto ipv6
+"
+
+TYPE_net6_port_mac_proto="
+display net6,port,mac,proto
+type_spec ipv6_addr . inet_service . ether_addr . inet_proto
+chain_spec ip6 daddr . udp dport . ether saddr . meta l4proto
+dst addr6 port
+src mac proto
+start 10
+count 5
+src_delta 2000
+tools sendip socat bash
+proto udp6
+
+race_repeat 0
+
+perf_duration 5
+perf_spec ip6 daddr . udp dport . ether daddr . meta l4proto
+perf_dst addr6 port mac proto
+perf_src
+perf_entries 1000
+perf_proto ipv6
+"
+
+TYPE_net_port_net="
+display net,port,net
+type_spec ipv4_addr . inet_service . ipv4_addr
+chain_spec ip daddr . udp dport . ip saddr
+dst addr4 port
+src addr4
+start 1
+count 5
+src_delta 2000
+tools sendip socat bash
+proto udp
+
+race_repeat 3
+flood_tools iperf3 iperf netperf
+flood_proto tcp
+flood_spec ip daddr . udp dport . ip saddr
+
+perf_duration 0
+"
+
+TYPE_net6_port_net6_port="
+display net6,port,net6,port
+type_spec ipv6_addr . inet_service . ipv6_addr . inet_service
+chain_spec ip6 daddr . udp dport . ip6 saddr . udp sport
+dst addr6 port
+src addr6 port
+start 10
+count 5
+src_delta 2000
+tools sendip socat
+proto udp6
+
+race_repeat 3
+flood_tools iperf3 iperf netperf
+flood_proto tcp6
+flood_spec ip6 daddr . tcp dport . ip6 saddr . tcp sport
+
+perf_duration 0
+"
+
+TYPE_net_port_mac_proto_net="
+display net,port,mac,proto,net
+type_spec ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr
+chain_spec ip daddr . udp dport . ether saddr . meta l4proto . ip saddr
+dst addr4 port
+src mac proto addr4
+start 1
+count 5
+src_delta 2000
+tools sendip socat bash
+proto udp
+
+race_repeat 0
+
+perf_duration 0
+"
+
+TYPE_net_mac="
+display net,mac
+type_spec ipv4_addr . ether_addr
+chain_spec ip daddr . ether saddr
+dst addr4
+src mac
+start 1
+count 5
+src_delta 2000
+tools sendip socat bash
+proto udp
+
+race_repeat 0
+
+perf_duration 5
+perf_spec ip daddr . ether daddr
+perf_dst addr4 mac
+perf_src
+perf_entries 1000
+perf_proto ipv4
+"
+
+TYPE_mac_net="
+display mac,net
+type_spec ether_addr . ipv4_addr
+chain_spec ether saddr . ip saddr
+dst
+src mac addr4
+start 1
+count 5
+src_delta 2000
+tools sendip socat bash
+proto udp
+
+race_repeat 0
+
+perf_duration 0
+"
+
+TYPE_net_mac_icmp="
+display net,mac - ICMP
+type_spec ipv4_addr . ether_addr
+chain_spec ip daddr . ether saddr
+dst addr4
+src mac
+start 1
+count 5
+src_delta 2000
+tools ping
+proto icmp
+
+race_repeat 0
+
+perf_duration 0
+"
+
+TYPE_net6_mac_icmp="
+display net6,mac - ICMPv6
+type_spec ipv6_addr . ether_addr
+chain_spec ip6 daddr . ether saddr
+dst addr6
+src mac
+start 10
+count 50
+src_delta 2000
+tools ping
+proto icmp6
+
+race_repeat 0
+
+perf_duration 0
+"
+
+TYPE_net_port_proto_net="
+display net,port,proto,net
+type_spec ipv4_addr . inet_service . inet_proto . ipv4_addr
+chain_spec ip daddr . udp dport . meta l4proto . ip saddr
+dst addr4 port proto
+src addr4
+start 1
+count 5
+src_delta 2000
+tools sendip socat
+proto udp
+
+race_repeat 3
+flood_tools iperf3 iperf netperf
+flood_proto tcp
+flood_spec ip daddr . tcp dport . meta l4proto . ip saddr
+
+perf_duration 0
+"
+
+# Definition of tests for bugs reported in the past:
+# display display text for test report
+TYPE_flush_remove_add="
+display Add two elements, flush, re-add
+"
+
+TYPE_reload="
+display net,mac with reload
+type_spec ipv4_addr . ether_addr
+chain_spec ip daddr . ether saddr
+dst addr4
+src mac
+start 1
+count 1
+src_delta 2000
+tools sendip socat bash
+proto udp
+
+race_repeat 0
+
+perf_duration 0
+"
+
+# Set template for all tests, types and rules are filled in depending on test
+set_template='
+flush ruleset
+
+table inet filter {
+ counter test {
+ packets 0 bytes 0
+ }
+
+ set test {
+ type ${type_spec}
+ flags interval,timeout
+ }
+
+ chain input {
+ type filter hook prerouting priority 0; policy accept;
+ ${chain_spec} @test counter name \"test\"
+ }
+}
+
+table netdev perf {
+ counter test {
+ packets 0 bytes 0
+ }
+
+ counter match {
+ packets 0 bytes 0
+ }
+
+ set test {
+ type ${type_spec}
+ flags interval
+ }
+
+ set norange {
+ type ${type_spec}
+ }
+
+ set noconcat {
+ type ${type_spec%% *}
+ flags interval
+ }
+
+ chain test {
+ type filter hook ingress device veth_a priority 0;
+ }
+}
+'
+
+err_buf=
+info_buf=
+
+# Append string to error buffer
+err() {
+ err_buf="${err_buf}${1}
+"
+}
+
+# Append string to information buffer
+info() {
+ info_buf="${info_buf}${1}
+"
+}
+
+# Flush error buffer to stdout
+err_flush() {
+ printf "%s" "${err_buf}"
+ err_buf=
+}
+
+# Flush information buffer to stdout
+info_flush() {
+ printf "%s" "${info_buf}"
+ info_buf=
+}
+
+# Setup veth pair: this namespace receives traffic, B generates it
+setup_veth() {
+ ip netns add B
+ ip link add veth_a type veth peer name veth_b || return 1
+
+ ip link set veth_a up
+ ip link set veth_b netns B
+
+ ip -n B link set veth_b up
+
+ ip addr add dev veth_a 10.0.0.1
+ ip route add default dev veth_a
+
+ ip -6 addr add fe80::1/64 dev veth_a nodad
+ ip -6 addr add 2001:db8::1/64 dev veth_a nodad
+ ip -6 route add default dev veth_a
+
+ ip -n B route add default dev veth_b
+
+ ip -6 -n B addr add fe80::2/64 dev veth_b nodad
+ ip -6 -n B addr add 2001:db8::2/64 dev veth_b nodad
+ ip -6 -n B route add default dev veth_b
+
+ B() {
+ ip netns exec B "$@" >/dev/null 2>&1
+ }
+}
+
+# Fill in set template and initialise set
+setup_set() {
+ eval "echo \"${set_template}\"" | nft -f -
+}
+
+# Check that at least one of the needed tools is available
+check_tools() {
+ [ -z "${tools}" ] && return 0
+
+ __tools=
+ for tool in ${tools}; do
+ __tools="${__tools} ${tool}"
+
+ command -v "${tool}" >/dev/null && return 0
+ done
+ err "need one of:${__tools}, skipping" && return 1
+}
+
+# Set up function to send ICMP packets
+setup_send_icmp() {
+ send_icmp() {
+ B ping -c1 -W1 "${dst_addr4}" >/dev/null 2>&1
+ }
+}
+
+# Set up function to send ICMPv6 packets
+setup_send_icmp6() {
+ if command -v ping6 >/dev/null; then
+ send_icmp6() {
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+ B ping6 -q -c1 -W1 "${dst_addr6}"
+ }
+ else
+ send_icmp6() {
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+ B ping -q -6 -c1 -W1 "${dst_addr6}"
+ }
+ fi
+}
+
+# Set up function to send single UDP packets on IPv4
+setup_send_udp() {
+ if command -v sendip >/dev/null; then
+ send_udp() {
+ [ -n "${src_port}" ] && src_port="-us ${src_port}"
+ [ -n "${dst_port}" ] && dst_port="-ud ${dst_port}"
+ [ -n "${src_addr4}" ] && src_addr4="-is ${src_addr4}"
+
+ # shellcheck disable=SC2086 # sendip needs split options
+ B sendip -p ipv4 -p udp ${src_addr4} ${src_port} \
+ ${dst_port} "${dst_addr4}"
+
+ src_port=
+ dst_port=
+ src_addr4=
+ }
+ elif command -v socat -v >/dev/null; then
+ send_udp() {
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}" dev veth_b
+ __socatbind=",bind=${src_addr4}"
+ if [ -n "${src_port}" ];then
+ __socatbind="${__socatbind}:${src_port}"
+ fi
+ fi
+
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+ [ -z "${dst_port}" ] && dst_port=12345
+
+ echo "test4" | B socat -t 0.01 STDIN UDP4-DATAGRAM:"$dst_addr4":"$dst_port""${__socatbind}"
+
+ src_addr4=
+ src_port=
+ }
+ elif [ -z "$(bash -c 'type -p')" ]; then
+ send_udp() {
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}/16" dev veth_b
+ B ip route add default dev veth_b
+ fi
+
+ B bash -c "echo > /dev/udp/${dst_addr4}/${dst_port}"
+
+ if [ -n "${src_addr4}" ]; then
+ B ip addr del "${src_addr4}/16" dev veth_b
+ fi
+ src_addr4=
+ }
+ else
+ return 1
+ fi
+}
+
+# Set up function to send single UDP packets on IPv6
+setup_send_udp6() {
+ if command -v sendip >/dev/null; then
+ send_udp6() {
+ [ -n "${src_port}" ] && src_port="-us ${src_port}"
+ [ -n "${dst_port}" ] && dst_port="-ud ${dst_port}"
+ if [ -n "${src_addr6}" ]; then
+ src_addr6="-6s ${src_addr6}"
+ else
+ src_addr6="-6s 2001:db8::2"
+ fi
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ B sendip -p ipv6 -p udp ${src_addr6} ${src_port} \
+ ${dst_port} "${dst_addr6}"
+
+ src_port=
+ dst_port=
+ src_addr6=
+ }
+ elif command -v socat -v >/dev/null; then
+ send_udp6() {
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+
+ __socatbind6=
+
+ if [ -n "${src_addr6}" ]; then
+ B ip addr add "${src_addr6}" dev veth_b nodad
+
+ __socatbind6=",bind=[${src_addr6}]"
+
+ if [ -n "${src_port}" ] ;then
+ __socatbind6="${__socatbind6}:${src_port}"
+ fi
+ fi
+
+ echo "test6" | B socat -t 0.01 STDIN UDP6-DATAGRAM:["$dst_addr6"]:"$dst_port""${__socatbind6}"
+ }
+ elif [ -z "$(bash -c 'type -p')" ]; then
+ send_udp6() {
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+ B ip addr add "${src_addr6}" dev veth_b nodad
+ B bash -c "echo > /dev/udp/${dst_addr6}/${dst_port}"
+ ip -6 addr del "${dst_addr6}" dev veth_a 2>/dev/null
+ }
+ else
+ return 1
+ fi
+}
+
+listener_ready()
+{
+ port="$1"
+ ss -lnt -o "sport = :$port" | grep -q "$port"
+}
+
+# Set up function to send TCP traffic on IPv4
+setup_flood_tcp() {
+ if command -v iperf3 >/dev/null; then
+ flood_tcp() {
+ local n_port="${dst_port}"
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}/16" dev veth_b
+ src_addr4="-B ${src_addr4}"
+ else
+ B ip addr add dev veth_b 10.0.0.2
+ src_addr4="-B 10.0.0.2"
+ fi
+ if [ -n "${src_port}" ]; then
+ src_port="--cport ${src_port}"
+ fi
+ B ip route add default dev veth_b 2>/dev/null
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ iperf3 -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
+
+ # shellcheck disable=SC2086 # this needs split options
+ B iperf3 -c "${dst_addr4}" ${dst_port} ${src_port} \
+ ${src_addr4} -l16 -t 1000
+
+ src_addr4=
+ src_port=
+ dst_port=
+ }
+ elif command -v iperf >/dev/null; then
+ flood_tcp() {
+ local n_port="${dst_port}"
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}/16" dev veth_b
+ src_addr4="-B ${src_addr4}"
+ else
+ B ip addr add dev veth_b 10.0.0.2 2>/dev/null
+ src_addr4="-B 10.0.0.2"
+ fi
+ if [ -n "${src_port}" ]; then
+ src_addr4="${src_addr4}:${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ iperf -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
+
+ # shellcheck disable=SC2086 # this needs split options
+ B iperf -c "${dst_addr4}" ${dst_port} ${src_addr4} \
+ -l20 -t 1000
+
+ src_addr4=
+ src_port=
+ dst_port=
+ }
+ elif command -v netperf >/dev/null; then
+ flood_tcp() {
+ local n_port="${dst_port}"
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}/16" dev veth_b
+ else
+ B ip addr add dev veth_b 10.0.0.2
+ src_addr4="10.0.0.2"
+ fi
+ if [ -n "${src_port}" ]; then
+ dst_port="${dst_port},${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ netserver -4 ${dst_port} -L "${dst_addr4}" \
+ >/dev/null 2>&1
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "${n_port}"
+
+ # shellcheck disable=SC2086 # this needs split options
+ B netperf -4 -H "${dst_addr4}" ${dst_port} \
+ -L "${src_addr4}" -l 1000 -t TCP_STREAM
+
+ src_addr4=
+ src_port=
+ dst_port=
+ }
+ else
+ return 1
+ fi
+}
+
+# Set up function to send TCP traffic on IPv6
+setup_flood_tcp6() {
+ if command -v iperf3 >/dev/null; then
+ flood_tcp6() {
+ local n_port="${dst_port}"
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr6}" ]; then
+ B ip addr add "${src_addr6}" dev veth_b nodad
+ src_addr6="-B ${src_addr6}"
+ else
+ src_addr6="-B 2001:db8::2"
+ fi
+ if [ -n "${src_port}" ]; then
+ src_port="--cport ${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ iperf3 -s -DB "${dst_addr6}" ${dst_port} >/dev/null 2>&1
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "${n_port}"
+
+ # shellcheck disable=SC2086 # this needs split options
+ B iperf3 -c "${dst_addr6}" ${dst_port} \
+ ${src_port} ${src_addr6} -l16 -t 1000
+
+ src_addr6=
+ src_port=
+ dst_port=
+ }
+ elif command -v iperf >/dev/null; then
+ flood_tcp6() {
+ local n_port="${dst_port}"
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr6}" ]; then
+ B ip addr add "${src_addr6}" dev veth_b nodad
+ src_addr6="-B ${src_addr6}"
+ else
+ src_addr6="-B 2001:db8::2"
+ fi
+ if [ -n "${src_port}" ]; then
+ src_addr6="${src_addr6}:${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ iperf -s -VDB "${dst_addr6}" ${dst_port} >/dev/null 2>&1
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
+
+ # shellcheck disable=SC2086 # this needs split options
+ B iperf -c "${dst_addr6}" -V ${dst_port} \
+ ${src_addr6} -l1 -t 1000
+
+ src_addr6=
+ src_port=
+ dst_port=
+ }
+ elif command -v netperf >/dev/null; then
+ flood_tcp6() {
+ local n_port="${dst_port}"
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr6}" ]; then
+ B ip addr add "${src_addr6}" dev veth_b nodad
+ else
+ src_addr6="2001:db8::2"
+ fi
+ if [ -n "${src_port}" ]; then
+ dst_port="${dst_port},${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ netserver -6 ${dst_port} -L "${dst_addr6}" \
+ >/dev/null 2>&1
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
+
+ # shellcheck disable=SC2086 # this needs split options
+ B netperf -6 -H "${dst_addr6}" ${dst_port} \
+ -L "${src_addr6}" -l 1000 -t TCP_STREAM
+
+ src_addr6=
+ src_port=
+ dst_port=
+ }
+ else
+ return 1
+ fi
+}
+
+# Set up function to send UDP traffic on IPv4
+setup_flood_udp() {
+ if command -v iperf3 >/dev/null; then
+ flood_udp() {
+ local n_port="${dst_port}"
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}/16" dev veth_b
+ src_addr4="-B ${src_addr4}"
+ else
+ B ip addr add dev veth_b 10.0.0.2 2>/dev/null
+ src_addr4="-B 10.0.0.2"
+ fi
+ if [ -n "${src_port}" ]; then
+ src_port="--cport ${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ iperf3 -s -DB "${dst_addr4}" ${dst_port}
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
+
+ # shellcheck disable=SC2086 # this needs split options
+ B iperf3 -u -c "${dst_addr4}" -Z -b 100M -l16 -t1000 \
+ ${dst_port} ${src_port} ${src_addr4}
+
+ src_addr4=
+ src_port=
+ dst_port=
+ }
+ elif command -v iperf >/dev/null; then
+ flood_udp() {
+ local n_port="${dst_port}"
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}/16" dev veth_b
+ src_addr4="-B ${src_addr4}"
+ else
+ B ip addr add dev veth_b 10.0.0.2
+ src_addr4="-B 10.0.0.2"
+ fi
+ if [ -n "${src_port}" ]; then
+ src_addr4="${src_addr4}:${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ iperf -u -sDB "${dst_addr4}" ${dst_port} >/dev/null 2>&1
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
+
+ # shellcheck disable=SC2086 # this needs split options
+ B iperf -u -c "${dst_addr4}" -b 100M -l1 -t1000 \
+ ${dst_port} ${src_addr4}
+
+ src_addr4=
+ src_port=
+ dst_port=
+ }
+ elif command -v netperf >/dev/null; then
+ flood_udp() {
+ local n_port="${dst_port}"
+ [ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}/16" dev veth_b
+ else
+ B ip addr add dev veth_b 10.0.0.2
+ src_addr4="10.0.0.2"
+ fi
+ if [ -n "${src_port}" ]; then
+ dst_port="${dst_port},${src_port}"
+ fi
+ B ip route add default dev veth_b
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+ # shellcheck disable=SC2086 # this needs split options
+ netserver -4 ${dst_port} -L "${dst_addr4}" \
+ >/dev/null 2>&1
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
+
+ # shellcheck disable=SC2086 # this needs split options
+ B netperf -4 -H "${dst_addr4}" ${dst_port} \
+ -L "${src_addr4}" -l 1000 -t UDP_STREAM
+
+ src_addr4=
+ src_port=
+ dst_port=
+ }
+ else
+ return 1
+ fi
+}
+
+# Find pktgen script and set up function to start pktgen injection
+setup_perf() {
+ for pktgen_script_path in ${PKTGEN_SCRIPT_PATHS} __notfound; do
+ command -v "${pktgen_script_path}" >/dev/null && break
+ done
+ [ "${pktgen_script_path}" = "__notfound" ] && return 1
+
+ perf_ipv4() {
+ ${pktgen_script_path} -s80 \
+ -i veth_a -d "${dst_addr4}" -p "${dst_port}" \
+ -m "${dst_mac}" \
+ -t $(($(nproc) / 5 + 1)) -b10000 -n0 2>/dev/null &
+ perf_pid=$!
+ }
+ perf_ipv6() {
+ IP6=6 ${pktgen_script_path} -s100 \
+ -i veth_a -d "${dst_addr6}" -p "${dst_port}" \
+ -m "${dst_mac}" \
+ -t $(($(nproc) / 5 + 1)) -b10000 -n0 2>/dev/null &
+ perf_pid=$!
+ }
+}
+
+# Clean up before each test
+cleanup() {
+ nft reset counter inet filter test >/dev/null 2>&1
+ nft flush ruleset >/dev/null 2>&1
+ ip link del dummy0 2>/dev/null
+ ip route del default 2>/dev/null
+ ip -6 route del default 2>/dev/null
+ ip netns pids B 2>/dev/null | xargs kill 2>/dev/null
+ ip netns del B 2>/dev/null
+ ip link del veth_a 2>/dev/null
+ timeout=
+ killall iperf3 2>/dev/null
+ killall iperf 2>/dev/null
+ killall netperf 2>/dev/null
+ killall netserver 2>/dev/null
+}
+
+cleanup_exit() {
+ cleanup
+ rm -f "$tmp"
+}
+
+# Entry point for setup functions
+setup() {
+ if [ "$(id -u)" -ne 0 ]; then
+ echo " need to run as root"
+ exit ${ksft_skip}
+ fi
+
+ cleanup
+ check_tools || return 1
+ for arg do
+ if ! eval setup_"${arg}"; then
+ err " ${arg} not supported"
+ return 1
+ fi
+ done
+}
+
+# Format integer into IPv4 address, summing 10.0.0.5 (arbitrary) to it
+format_addr4() {
+ a=$((${1} + 16777216 * 10 + 5))
+ printf "%i.%i.%i.%i" \
+ "$((a / 16777216))" "$((a % 16777216 / 65536))" \
+ "$((a % 65536 / 256))" "$((a % 256))"
+}
+
+# Format integer into IPv6 address, summing 2001:db8:: to it
+format_addr6() {
+ printf "2001:db8::%04x:%04x" "$((${1} / 65536))" "$((${1} % 65536))"
+}
+
+# Format integer into EUI-48 address, summing 00:01:00:00:00:00 to it
+format_mac() {
+ printf "00:01:%02x:%02x:%02x:%02x" \
+ "$((${1} / 16777216))" "$((${1} % 16777216 / 65536))" \
+ "$((${1} % 65536 / 256))" "$((${1} % 256))"
+}
+
+# Format integer into port, avoid 0 port
+format_port() {
+ printf "%i" "$((${1} % 65534 + 1))"
+}
+
+# Drop suffixed '6' from L4 protocol, if any
+format_proto() {
+ printf "%s" "${proto}" | tr -d 6
+}
+
+# Format destination and source fields into nft concatenated type
+format() {
+ __start=
+ __end=
+ __expr="{ "
+
+ for f in ${dst}; do
+ [ "${__expr}" != "{ " ] && __expr="${__expr} . "
+
+ __start="$(eval format_"${f}" "${start}")"
+ __end="$(eval format_"${f}" "${end}")"
+
+ if [ "${f}" = "proto" ]; then
+ __expr="${__expr}${__start}"
+ else
+ __expr="${__expr}${__start}-${__end}"
+ fi
+ done
+ for f in ${src}; do
+ [ "${__expr}" != "{ " ] && __expr="${__expr} . "
+
+ __start="$(eval format_"${f}" "${srcstart}")"
+ __end="$(eval format_"${f}" "${srcend}")"
+
+ if [ "${f}" = "proto" ]; then
+ __expr="${__expr}${__start}"
+ else
+ __expr="${__expr}${__start}-${__end}"
+ fi
+ done
+
+ if [ -n "${timeout}" ]; then
+ echo "${__expr} timeout ${timeout}s }"
+ else
+ echo "${__expr} }"
+ fi
+}
+
+# Format destination and source fields into nft type, start element only
+format_norange() {
+ __expr="{ "
+
+ for f in ${dst}; do
+ [ "${__expr}" != "{ " ] && __expr="${__expr} . "
+
+ __expr="${__expr}$(eval format_"${f}" "${start}")"
+ done
+ for f in ${src}; do
+ __expr="${__expr} . $(eval format_"${f}" "${start}")"
+ done
+
+ echo "${__expr} }"
+}
+
+# Format first destination field into nft type
+format_noconcat() {
+ for f in ${dst}; do
+ __start="$(eval format_"${f}" "${start}")"
+ __end="$(eval format_"${f}" "${end}")"
+
+ if [ "${f}" = "proto" ]; then
+ echo "{ ${__start} }"
+ else
+ echo "{ ${__start}-${__end} }"
+ fi
+ return
+ done
+}
+
+# Add single entry to 'test' set in 'inet filter' table
+add() {
+ if ! nft add element inet filter test "${1}"; then
+ err "Failed to add ${1} given ruleset:"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+}
+
+# Format and output entries for sets in 'netdev perf' table
+add_perf() {
+ if [ "${1}" = "test" ]; then
+ echo "add element netdev perf test $(format)"
+ elif [ "${1}" = "norange" ]; then
+ echo "add element netdev perf norange $(format_norange)"
+ elif [ "${1}" = "noconcat" ]; then
+ echo "add element netdev perf noconcat $(format_noconcat)"
+ fi
+}
+
+# Add single entry to 'norange' set in 'netdev perf' table
+add_perf_norange() {
+ if ! nft add element netdev perf norange "${1}"; then
+ err "Failed to add ${1} given ruleset:"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+}
+
+# Add single entry to 'noconcat' set in 'netdev perf' table
+add_perf_noconcat() {
+ if ! nft add element netdev perf noconcat "${1}"; then
+ err "Failed to add ${1} given ruleset:"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+}
+
+# Delete single entry from set
+del() {
+ if ! nft delete element inet filter test "${1}"; then
+ err "Failed to delete ${1} given ruleset:"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+}
+
+# Return packet count from 'test' counter in 'inet filter' table
+count_packets() {
+ found=0
+ for token in $(nft list counter inet filter test); do
+ [ ${found} -eq 1 ] && echo "${token}" && return
+ [ "${token}" = "packets" ] && found=1
+ done
+}
+
+# Return packet count from 'test' counter in 'netdev perf' table
+count_perf_packets() {
+ found=0
+ for token in $(nft list counter netdev perf test); do
+ [ ${found} -eq 1 ] && echo "${token}" && return
+ [ "${token}" = "packets" ] && found=1
+ done
+}
+
+# Set MAC addresses, send traffic according to specifier
+flood() {
+ ip link set veth_a address "$(format_mac "${1}")"
+ ip -n B link set veth_b address "$(format_mac "${2}")"
+
+ for f in ${dst}; do
+ eval dst_"$f"=\$\(format_\$f "${1}"\)
+ done
+ for f in ${src}; do
+ eval src_"$f"=\$\(format_\$f "${2}"\)
+ done
+ eval flood_\$proto
+}
+
+# Set MAC addresses, start pktgen injection
+perf() {
+ dst_mac="$(format_mac "${1}")"
+ ip link set veth_a address "${dst_mac}"
+
+ for f in ${dst}; do
+ eval dst_"$f"=\$\(format_\$f "${1}"\)
+ done
+ for f in ${src}; do
+ eval src_"$f"=\$\(format_\$f "${2}"\)
+ done
+ eval perf_\$perf_proto
+}
+
+# Set MAC addresses, send single packet, check that it matches, reset counter
+send_match() {
+ ip link set veth_a address "$(format_mac "${1}")"
+ ip -n B link set veth_b address "$(format_mac "${2}")"
+
+ for f in ${dst}; do
+ eval dst_"$f"=\$\(format_\$f "${1}"\)
+ done
+ for f in ${src}; do
+ eval src_"$f"=\$\(format_\$f "${2}"\)
+ done
+ eval send_\$proto
+ if [ "$(count_packets)" != "1" ]; then
+ err "${proto} packet to:"
+ err " $(for f in ${dst}; do
+ eval format_\$f "${1}"; printf ' '; done)"
+ err "from:"
+ err " $(for f in ${src}; do
+ eval format_\$f "${2}"; printf ' '; done)"
+ err "should have matched ruleset:"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+ nft reset counter inet filter test >/dev/null
+}
+
+# Set MAC addresses, send single packet, check that it doesn't match
+send_nomatch() {
+ ip link set veth_a address "$(format_mac "${1}")"
+ ip -n B link set veth_b address "$(format_mac "${2}")"
+
+ for f in ${dst}; do
+ eval dst_"$f"=\$\(format_\$f "${1}"\)
+ done
+ for f in ${src}; do
+ eval src_"$f"=\$\(format_\$f "${2}"\)
+ done
+ eval send_\$proto
+ if [ "$(count_packets)" != "0" ]; then
+ err "${proto} packet to:"
+ err " $(for f in ${dst}; do
+ eval format_\$f "${1}"; printf ' '; done)"
+ err "from:"
+ err " $(for f in ${src}; do
+ eval format_\$f "${2}"; printf ' '; done)"
+ err "should not have matched ruleset:"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+}
+
+# Correctness test template:
+# - add ranged element, check that packets match it
+# - check that packets outside range don't match it
+# - remove some elements, check that packets don't match anymore
+test_correctness() {
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ range_size=1
+ for i in $(seq "${start}" $((start + count))); do
+ end=$((start + range_size))
+
+ # Avoid negative or zero-sized port ranges
+ if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+ start=${end}
+ end=$((end + 1))
+ fi
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" || return 1
+ for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
+ send_match "${j}" $((j + src_delta)) || return 1
+ done
+ send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
+
+ # Delete elements now and then
+ if [ $((i % 3)) -eq 0 ]; then
+ del "$(format)" || return 1
+ for j in $(seq "$start" \
+ $((range_size / 2 + 1)) ${end}); do
+ send_nomatch "${j}" $((j + src_delta)) \
+ || return 1
+ done
+ fi
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+}
+
+# Concurrency test template:
+# - add all the elements
+# - start a thread for each physical thread that:
+# - adds all the elements
+# - flushes the set
+# - adds all the elements
+# - flushes the entire ruleset
+# - adds the set back
+# - adds all the elements
+# - delete all the elements
+test_concurrency() {
+ proto=${flood_proto}
+ tools=${flood_tools}
+ chain_spec=${flood_spec}
+ setup veth flood_"${proto}" set || return ${ksft_skip}
+
+ range_size=1
+ cstart=${start}
+ flood_pids=
+ for i in $(seq "$start" $((start + count))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" || return 1
+
+ flood "${i}" $((i + src_delta)) & flood_pids="${flood_pids} $!"
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ sleep $((RANDOM%10))
+
+ pids=
+ for c in $(seq 1 "$(nproc)"); do (
+ for r in $(seq 1 "${race_repeat}"); do
+ range_size=1
+
+ # $start needs to be local to this subshell
+ # shellcheck disable=SC2030
+ start=${cstart}
+ for i in $(seq "$start" $((start + count))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" 2>/dev/null
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ nft flush inet filter test 2>/dev/null
+
+ range_size=1
+ start=${cstart}
+ for i in $(seq "$start" $((start + count))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" 2>/dev/null
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ nft flush ruleset
+ setup set 2>/dev/null
+
+ range_size=1
+ start=${cstart}
+ for i in $(seq "$start" $((start + count))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" 2>/dev/null
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ range_size=1
+ start=${cstart}
+ for i in $(seq "$start" $((start + count))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ del "$(format)" 2>/dev/null
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+ done
+ ) & pids="${pids} $!"
+ done
+
+ # shellcheck disable=SC2046,SC2086 # word splitting wanted here
+ wait $(for pid in ${pids}; do echo ${pid}; done)
+ # shellcheck disable=SC2046,SC2086
+ kill $(for pid in ${flood_pids}; do echo ${pid}; done) 2>/dev/null
+ # shellcheck disable=SC2046,SC2086
+ wait $(for pid in ${flood_pids}; do echo ${pid}; done) 2>/dev/null
+
+ return 0
+}
+
+# Timeout test template:
+# - add all the elements with 3s timeout while checking that packets match
+# - wait 3s after the last insertion, check that packets don't match any entry
+test_timeout() {
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ timeout=3
+
+ [ "$KSFT_MACHINE_SLOW" = "yes" ] && timeout=8
+
+ range_size=1
+ for i in $(seq "$start" $((start + count))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" || return 1
+
+ for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
+ send_match "${j}" $((j + src_delta)) || return 1
+ done
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+ sleep $timeout
+ for i in $(seq "$start" $((start + count))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
+ send_nomatch "${j}" $((j + src_delta)) || return 1
+ done
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+}
+
+# Performance test template:
+# - add concatenated ranged entries
+# - add non-ranged concatenated entries (for hash set matching rate baseline)
+# - add ranged entries with first field only (for rbhash baseline)
+# - start pktgen injection directly on device rx path of this namespace
+# - measure drop only rate, hash and rbtree baselines, then matching rate
+test_performance() {
+ chain_spec=${perf_spec}
+ dst="${perf_dst}"
+ src="${perf_src}"
+ setup veth perf set || return ${ksft_skip}
+
+ first=${start}
+ range_size=1
+ for set in test norange noconcat; do
+ start=${first}
+ for i in $(seq "$start" $((start + perf_entries))); do
+ end=$((start + range_size))
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+ start=${end}
+ end=$((end + 1))
+ elif [ "$start" -eq "$end" ]; then
+ end=$((start + 1))
+ fi
+
+ add_perf ${set}
+
+ start=$((end + range_size))
+ done > "${tmp}"
+ nft -f "${tmp}"
+ done
+
+ perf $((end - 1)) "$srcstart"
+
+ sleep 2
+
+ nft add rule netdev perf test counter name \"test\" drop
+ nft reset counter netdev perf test >/dev/null 2>&1
+ sleep "${perf_duration}"
+ pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))"
+ info " baseline (drop from netdev hook): ${pps}pps"
+ handle="$(nft -a list chain netdev perf test | grep counter)"
+ handle="${handle##* }"
+ nft delete rule netdev perf test handle "${handle}"
+
+ nft add rule "netdev perf test ${chain_spec} @norange \
+ counter name \"test\" drop"
+ nft reset counter netdev perf test >/dev/null 2>&1
+ sleep "${perf_duration}"
+ pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))"
+ info " baseline hash (non-ranged entries): ${pps}pps"
+ handle="$(nft -a list chain netdev perf test | grep counter)"
+ handle="${handle##* }"
+ nft delete rule netdev perf test handle "${handle}"
+
+ nft add rule "netdev perf test ${chain_spec%%. *} @noconcat \
+ counter name \"test\" drop"
+ nft reset counter netdev perf test >/dev/null 2>&1
+ sleep "${perf_duration}"
+ pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))"
+ info " baseline rbtree (match on first field only): ${pps}pps"
+ handle="$(nft -a list chain netdev perf test | grep counter)"
+ handle="${handle##* }"
+ nft delete rule netdev perf test handle "${handle}"
+
+ nft add rule "netdev perf test ${chain_spec} @test \
+ counter name \"test\" drop"
+ nft reset counter netdev perf test >/dev/null 2>&1
+ sleep "${perf_duration}"
+ pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))"
+ p5="$(printf %5s "${perf_entries}")"
+ info " set with ${p5} full, ranged entries: ${pps}pps"
+ kill "${perf_pid}"
+}
+
+test_bug_flush_remove_add() {
+ rounds=100
+ [ "$KSFT_MACHINE_SLOW" = "yes" ] && rounds=10
+
+ set_cmd='{ set s { type ipv4_addr . inet_service; flags interval; }; }'
+ elem1='{ 10.0.0.1 . 22-25, 10.0.0.1 . 10-20 }'
+ elem2='{ 10.0.0.1 . 10-20, 10.0.0.1 . 22-25 }'
+ for i in $(seq 1 $rounds); do
+ nft add table t "$set_cmd" || return ${ksft_skip}
+ nft add element t s "$elem1" 2>/dev/null || return 1
+ nft flush set t s 2>/dev/null || return 1
+ nft add element t s "$elem2" 2>/dev/null || return 1
+ done
+ nft flush ruleset
+}
+
+# - add ranged element, check that packets match it
+# - reload the set, check packets still match
+test_bug_reload() {
+ setup veth send_"${proto}" set || return ${ksft_skip}
+ rstart=${start}
+
+ range_size=1
+ for i in $(seq "${start}" $((start + count))); do
+ end=$((start + range_size))
+
+ # Avoid negative or zero-sized port ranges
+ if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+ start=${end}
+ end=$((end + 1))
+ fi
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" || return 1
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ # check kernel does allocate pcpu sctrach map
+ # for reload with no elemet add/delete
+ ( echo flush set inet filter test ;
+ nft list set inet filter test ) | nft -f -
+
+ start=${rstart}
+ range_size=1
+
+ for i in $(seq "${start}" $((start + count))); do
+ end=$((start + range_size))
+
+ # Avoid negative or zero-sized port ranges
+ if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+ start=${end}
+ end=$((end + 1))
+ fi
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
+ send_match "${j}" $((j + src_delta)) || return 1
+ done
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ nft flush ruleset
+}
+
+test_reported_issues() {
+ eval test_bug_"${subtest}"
+}
+
+# Run everything in a separate network namespace
+[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
+tmp="$(mktemp)"
+trap cleanup_exit EXIT
+
+# Entry point for test runs
+passed=0
+for name in ${TESTS}; do
+ printf "TEST: %s\n" "$(echo "$name" | tr '_' ' ')"
+ if [ "${name}" = "reported_issues" ]; then
+ SUBTESTS="${BUGS}"
+ else
+ SUBTESTS="${TYPES}"
+ fi
+
+ for subtest in ${SUBTESTS}; do
+ eval desc=\$TYPE_"${subtest}"
+ IFS='
+'
+ for __line in ${desc}; do
+ # shellcheck disable=SC2086
+ eval ${__line%% *}=\"${__line##* }\";
+ done
+ IFS='
+'
+
+ if [ "${name}" = "concurrency" ] && \
+ [ "${race_repeat}" = "0" ]; then
+ continue
+ fi
+ if [ "${name}" = "performance" ] && \
+ [ "${perf_duration}" = "0" ]; then
+ continue
+ fi
+
+ [ "$KSFT_MACHINE_SLOW" = "yes" ] && count=1
+
+ printf " %-32s " "${display}"
+ tthen=$(date +%s)
+ eval test_"${name}"
+ ret=$?
+
+ tnow=$(date +%s)
+ printf "%5ds%-30s" $((tnow-tthen))
+
+ if [ $ret -eq 0 ]; then
+ printf "[ OK ]\n"
+ info_flush
+ passed=$((passed + 1))
+ elif [ $ret -eq 1 ]; then
+ printf "[FAIL]\n"
+ err_flush
+ exit 1
+ elif [ $ret -eq ${ksft_skip} ]; then
+ printf "[SKIP]\n"
+ err_flush
+ fi
+ done
+done
+
+[ ${passed} -eq 0 ] && exit ${ksft_skip} || exit 0
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh
new file mode 100755
index 0000000000..5d276995a5
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+source lib.sh
+
+[ "$KSFT_MACHINE_SLOW" = yes ] && exit ${ksft_skip}
+
+NFT_CONCAT_RANGE_TESTS="performance" exec ./nft_concat_range.sh
diff --git a/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh
new file mode 100755
index 0000000000..abcaa73371
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+#
+# This tests connection tracking helper assignment:
+# 1. can attach ftp helper to a connection from nft ruleset.
+# 2. auto-assign still works.
+#
+# Kselftest framework requirement - SKIP code is 4.
+
+source lib.sh
+
+ret=0
+
+testipv6=1
+
+checktool "socat -h" "run test without socat"
+checktool "conntrack --version" "run test without conntrack"
+checktool "nft --version" "run test without nft"
+
+cleanup()
+{
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+
+ ip netns del "$ns1"
+ ip netns del "$ns2"
+}
+
+trap cleanup EXIT
+
+setup_ns ns1 ns2
+
+if ! ip link add veth0 netns "$ns1" type veth peer name veth0 netns "$ns2" > /dev/null 2>&1;then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+
+ip -net "$ns1" link set veth0 up
+ip -net "$ns2" link set veth0 up
+
+ip -net "$ns1" addr add 10.0.1.1/24 dev veth0
+ip -net "$ns1" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$ns2" addr add 10.0.1.2/24 dev veth0
+ip -net "$ns2" addr add dead:1::2/64 dev veth0 nodad
+
+load_ruleset_family() {
+ local family=$1
+ local ns=$2
+
+ip netns exec "$ns" nft -f - <<EOF
+table $family raw {
+ ct helper ftp {
+ type "ftp" protocol tcp
+ }
+ chain pre {
+ type filter hook prerouting priority 0; policy accept;
+ tcp dport 2121 ct helper set "ftp"
+ }
+ chain output {
+ type filter hook output priority 0; policy accept;
+ tcp dport 2121 ct helper set "ftp"
+ }
+}
+EOF
+ return $?
+}
+
+check_for_helper()
+{
+ local netns=$1
+ local message=$2
+ local port=$3
+
+ if echo "$message" |grep -q 'ipv6';then
+ local family="ipv6"
+ else
+ local family="ipv4"
+ fi
+
+ if ! ip netns exec "$netns" conntrack -L -f $family -p tcp --dport "$port" 2> /dev/null |grep -q 'helper=ftp';then
+ if [ "$autoassign" -eq 0 ] ;then
+ echo "FAIL: ${netns} did not show attached helper $message" 1>&2
+ ret=1
+ else
+ echo "PASS: ${netns} did not show attached helper $message" 1>&2
+ fi
+ else
+ if [ "$autoassign" -eq 0 ] ;then
+ echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2
+ else
+ echo "FAIL: ${netns} connection on port $port has ftp helper attached" 1>&2
+ ret=1
+ fi
+ fi
+
+ return 0
+}
+
+listener_ready()
+{
+ ns="$1"
+ port="$2"
+ proto="$3"
+ ss -N "$ns" -lnt -o "sport = :$port" | grep -q "$port"
+}
+
+test_helper()
+{
+ local port=$1
+ local autoassign=$2
+
+ if [ "$autoassign" -eq 0 ] ;then
+ msg="set via ruleset"
+ else
+ msg="auto-assign"
+ fi
+
+ ip netns exec "$ns2" socat -t 3 -u -4 TCP-LISTEN:"$port",reuseaddr STDOUT > /dev/null &
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" "$port" "-4"
+
+ ip netns exec "$ns1" socat -u -4 STDIN TCP:10.0.1.2:"$port" < /dev/null > /dev/null
+
+ check_for_helper "$ns1" "ip $msg" "$port" "$autoassign"
+ check_for_helper "$ns2" "ip $msg" "$port" "$autoassign"
+
+ if [ $testipv6 -eq 0 ] ;then
+ return 0
+ fi
+
+ ip netns exec "$ns1" conntrack -F 2> /dev/null
+ ip netns exec "$ns2" conntrack -F 2> /dev/null
+
+ ip netns exec "$ns2" socat -t 3 -u -6 TCP-LISTEN:"$port",reuseaddr STDOUT > /dev/null &
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns2" "$port" "-6"
+
+ ip netns exec "$ns1" socat -t 3 -u -6 STDIN TCP:"[dead:1::2]":"$port" < /dev/null > /dev/null
+
+ check_for_helper "$ns1" "ipv6 $msg" "$port"
+ check_for_helper "$ns2" "ipv6 $msg" "$port"
+}
+
+if ! load_ruleset_family ip "$ns1"; then
+ echo "FAIL: ${ns1} cannot load ip ruleset" 1>&2
+ exit 1
+fi
+
+if ! load_ruleset_family ip6 "$ns1"; then
+ echo "SKIP: ${ns1} cannot load ip6 ruleset" 1>&2
+ testipv6=0
+fi
+
+if ! load_ruleset_family inet "${ns2}"; then
+ echo "SKIP: ${ns1} cannot load inet ruleset" 1>&2
+ if ! load_ruleset_family ip "${ns2}"; then
+ echo "FAIL: ${ns2} cannot load ip ruleset" 1>&2
+ exit 1
+ fi
+
+ if [ "$testipv6" -eq 1 ] ;then
+ if ! load_ruleset_family ip6 "$ns2"; then
+ echo "FAIL: ${ns2} cannot load ip6 ruleset" 1>&2
+ exit 1
+ fi
+ fi
+fi
+
+test_helper 2121 0
+ip netns exec "$ns1" sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+ip netns exec "$ns2" sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+test_helper 21 1
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh
new file mode 100755
index 0000000000..ce1451c275
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_fib.sh
@@ -0,0 +1,234 @@
+#!/bin/bash
+#
+# This tests the fib expression.
+#
+# Kselftest framework requirement - SKIP code is 4.
+
+source lib.sh
+
+ret=0
+
+timeout=4
+
+log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
+
+cleanup()
+{
+ cleanup_all_ns
+
+ [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
+}
+
+checktool "nft --version" "run test without nft"
+
+setup_ns nsrouter ns1 ns2
+
+trap cleanup EXIT
+
+if dmesg | grep -q ' nft_rpfilter: ';then
+ dmesg -c | grep ' nft_rpfilter: '
+ echo "WARN: a previous test run has failed" 1>&2
+fi
+
+sysctl -q net.netfilter.nf_log_all_netns=1
+
+load_ruleset() {
+ local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority 0; policy accept;
+ fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+ }
+}
+EOF
+}
+
+load_pbr_ruleset() {
+ local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+ chain forward {
+ type filter hook forward priority raw;
+ fib saddr . iif oif gt 0 accept
+ log drop
+ }
+}
+EOF
+}
+
+load_ruleset_count() {
+ local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority 0; policy accept;
+ ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop
+ ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop
+ }
+}
+EOF
+}
+
+check_drops() {
+ if dmesg | grep -q ' nft_rpfilter: ';then
+ dmesg | grep ' nft_rpfilter: '
+ echo "FAIL: rpfilter did drop packets"
+ return 1
+ fi
+
+ return 0
+}
+
+check_fib_counter() {
+ local want=$1
+ local ns=$2
+ local address=$3
+
+ if ! ip netns exec "$ns" nft list table inet filter | grep 'fib saddr . iif' | grep "$address" | grep -q "packets $want";then
+ echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2
+ ip netns exec "$ns" nft list table inet filter
+ return 1
+ fi
+
+ if [ "$want" -gt 0 ]; then
+ echo "PASS: fib expression did drop packets for $address"
+ fi
+
+ return 0
+}
+
+load_ruleset "$nsrouter"
+load_ruleset "$ns1"
+load_ruleset "$ns2"
+
+if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
+
+ip -net "$nsrouter" link set veth0 up
+ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$nsrouter" link set veth1 up
+ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
+ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
+
+ip -net "$ns1" link set eth0 up
+ip -net "$ns2" link set eth0 up
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" route add default via dead:1::1
+
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns2" route add default via dead:2::1
+
+test_ping() {
+ local daddr4=$1
+ local daddr6=$2
+
+ if ! ip netns exec "$ns1" ping -c 1 -q "$daddr4" > /dev/null; then
+ check_drops
+ echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2
+ return 1
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q "$daddr6" > /dev/null; then
+ check_drops
+ echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2
+ return 1
+ fi
+
+ return 0
+}
+
+ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
+
+test_ping 10.0.2.1 dead:2::1 || exit 1
+check_drops || exit 1
+
+test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not cause unwanted packet drops"
+
+ip netns exec "$nsrouter" nft flush table inet filter
+
+ip -net "$ns1" route del default
+ip -net "$ns1" -6 route del default
+
+ip -net "$ns1" addr del 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr del dead:1::99/64 dev eth0
+
+ip -net "$ns1" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns1" addr add dead:2::99/64 dev eth0 nodad
+
+ip -net "$ns1" route add default via 10.0.2.1
+ip -net "$ns1" -6 route add default via dead:2::1
+
+ip -net "$nsrouter" addr add dead:2::1/64 dev veth0 nodad
+
+# switch to ruleset that doesn't log, this time
+# its expected that this does drop the packets.
+load_ruleset_count "$nsrouter"
+
+# ns1 has a default route, but nsrouter does not.
+# must not check return value, ping to 1.1.1.1 will
+# fail.
+check_fib_counter 0 "$nsrouter" 1.1.1.1 || exit 1
+check_fib_counter 0 "$nsrouter" 1c3::c01d || exit 1
+
+ip netns exec "$ns1" ping -W 0.5 -c 1 -q 1.1.1.1 > /dev/null
+check_fib_counter 1 "$nsrouter" 1.1.1.1 || exit 1
+
+ip netns exec "$ns1" ping -W 0.5 -i 0.1 -c 3 -q 1c3::c01d > /dev/null
+check_fib_counter 3 "$nsrouter" 1c3::c01d || exit 1
+
+# delete all rules
+ip netns exec "$ns1" nft flush ruleset
+ip netns exec "$ns2" nft flush ruleset
+ip netns exec "$nsrouter" nft flush ruleset
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+
+ip -net "$ns1" addr del 10.0.2.99/24 dev eth0
+ip -net "$ns1" addr del dead:2::99/64 dev eth0
+
+ip -net "$nsrouter" addr del dead:2::1/64 dev veth0
+
+# ... pbr ruleset for the router, check iif+oif.
+if ! load_pbr_ruleset "$nsrouter";then
+ echo "SKIP: Could not load fib forward ruleset"
+ exit $ksft_skip
+fi
+
+ip -net "$nsrouter" rule add from all table 128
+ip -net "$nsrouter" rule add from all iif veth0 table 129
+ip -net "$nsrouter" route add table 128 to 10.0.1.0/24 dev veth0
+ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1
+
+# drop main ipv4 table
+ip -net "$nsrouter" -4 rule delete table main
+
+if ! test_ping 10.0.2.99 dead:2::99;then
+ ip -net "$nsrouter" nft list ruleset
+ echo "FAIL: fib mismatch in pbr setup"
+ exit 1
+fi
+
+echo "PASS: fib expression forward check with policy based routing"
+exit 0
diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
new file mode 100755
index 0000000000..b399555085
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -0,0 +1,671 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This tests basic flowtable functionality.
+# Creates following default topology:
+#
+# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
+# Router1 is the one doing flow offloading, Router2 has no special
+# purpose other than having a link that is smaller than either Originator
+# and responder, i.e. TCPMSS announced values are too large and will still
+# result in fragmentation and/or PMTU discovery.
+#
+# You can check with different Orgininator/Link/Responder MTU eg:
+# nft_flowtable.sh -o8000 -l1500 -r2000
+#
+
+source lib.sh
+
+ret=0
+SOCAT_TIMEOUT=60
+
+nsin=""
+ns1out=""
+ns2out=""
+
+log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
+
+checktool "nft --version" "run test without nft tool"
+checktool "socat -h" "run test without socat"
+
+setup_ns ns1 ns2 nsr1 nsr2
+
+cleanup() {
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
+
+ cleanup_all_ns
+
+ rm -f "$nsin" "$ns1out" "$ns2out"
+
+ [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns"
+}
+
+trap cleanup EXIT
+
+sysctl -q net.netfilter.nf_log_all_netns=1
+
+ip link add veth0 netns "$nsr1" type veth peer name eth0 netns "$ns1"
+ip link add veth1 netns "$nsr1" type veth peer name veth0 netns "$nsr2"
+
+ip link add veth1 netns "$nsr2" type veth peer name eth0 netns "$ns2"
+
+for dev in veth0 veth1; do
+ ip -net "$nsr1" link set "$dev" up
+ ip -net "$nsr2" link set "$dev" up
+done
+
+ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$nsr2" addr add 10.0.2.1/24 dev veth1
+ip -net "$nsr2" addr add dead:2::1/64 dev veth1 nodad
+
+# set different MTUs so we need to push packets coming from ns1 (large MTU)
+# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
+# or to do PTMU discovery (send ICMP error back to originator).
+# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
+# is NOT the lowest link mtu.
+
+omtu=9000
+lmtu=1500
+rmtu=2000
+
+usage(){
+ echo "nft_flowtable.sh [OPTIONS]"
+ echo
+ echo "MTU options"
+ echo " -o originator"
+ echo " -l link"
+ echo " -r responder"
+ exit 1
+}
+
+while getopts "o:l:r:" o
+do
+ case $o in
+ o) omtu=$OPTARG;;
+ l) lmtu=$OPTARG;;
+ r) rmtu=$OPTARG;;
+ *) usage;;
+ esac
+done
+
+if ! ip -net "$nsr1" link set veth0 mtu "$omtu"; then
+ exit 1
+fi
+
+ip -net "$ns1" link set eth0 mtu "$omtu"
+
+if ! ip -net "$nsr2" link set veth1 mtu "$rmtu"; then
+ exit 1
+fi
+
+if ! ip -net "$nsr1" link set veth1 mtu "$lmtu"; then
+ exit 1
+fi
+
+if ! ip -net "$nsr2" link set veth0 mtu "$lmtu"; then
+ exit 1
+fi
+
+ip -net "$ns2" link set eth0 mtu "$rmtu"
+
+# transfer-net between nsr1 and nsr2.
+# these addresses are not used for connections.
+ip -net "$nsr1" addr add 192.168.10.1/24 dev veth1
+ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad
+
+ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0
+ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad
+
+for i in 0 1; do
+ ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+ ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+done
+
+for ns in "$ns1" "$ns2";do
+ ip -net "$ns" link set eth0 up
+
+ if ! ip netns exec "$ns" sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
+ echo "ERROR: Check Originator/Responder values (problem during address addition)"
+ exit 1
+ fi
+ # don't set ip DF bit for first two tests
+ ip netns exec "$ns" sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
+done
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via dead:1::1
+ip -net "$ns2" route add default via dead:2::1
+
+ip -net "$nsr1" route add default via 192.168.10.2
+ip -net "$nsr2" route add default via 192.168.10.1
+
+ip netns exec "$nsr1" nft -f - <<EOF
+table inet filter {
+ flowtable f1 {
+ hook ingress priority 0
+ devices = { veth0, veth1 }
+ }
+
+ counter routed_orig { }
+ counter routed_repl { }
+
+ chain forward {
+ type filter hook forward priority 0; policy drop;
+
+ # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
+ meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept
+
+ # count packets supposedly offloaded as per direction.
+ ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept
+
+ ct state established,related accept
+
+ meta nfproto ipv4 meta l4proto icmp accept
+ meta nfproto ipv6 meta l4proto icmpv6 accept
+ }
+}
+EOF
+
+if [ $? -ne 0 ]; then
+ echo "SKIP: Could not load nft ruleset"
+ exit $ksft_skip
+fi
+
+ip netns exec "$ns2" nft -f - <<EOF
+table inet filter {
+ counter ip4dscp0 { }
+ counter ip4dscp3 { }
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+ meta l4proto tcp goto {
+ ip dscp cs3 counter name ip4dscp3 accept
+ ip dscp 0 counter name ip4dscp0 accept
+ }
+ }
+}
+EOF
+
+if [ $? -ne 0 ]; then
+ echo -n "SKIP: Could not load ruleset: "
+ nft --version
+ exit $ksft_skip
+fi
+
+# test basic connectivity
+if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
+ echo "ERROR: $ns1 cannot reach ns2" 1>&2
+ exit 1
+fi
+
+if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
+ echo "ERROR: $ns2 cannot reach $ns1" 1>&2
+ exit 1
+fi
+
+nsin=$(mktemp)
+ns1out=$(mktemp)
+ns2out=$(mktemp)
+
+make_file()
+{
+ name=$1
+
+ SIZE=$((RANDOM % (1024 * 128)))
+ SIZE=$((SIZE + (1024 * 8)))
+ TSIZE=$((SIZE * 1024))
+
+ dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
+
+ SIZE=$((RANDOM % 1024))
+ SIZE=$((SIZE + 128))
+ TSIZE=$((TSIZE + SIZE))
+ dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
+}
+
+check_counters()
+{
+ local what=$1
+ local ok=1
+
+ local orig repl
+ orig=$(ip netns exec "$nsr1" nft reset counter inet filter routed_orig | grep packets)
+ repl=$(ip netns exec "$nsr1" nft reset counter inet filter routed_repl | grep packets)
+
+ local orig_cnt=${orig#*bytes}
+ local repl_cnt=${repl#*bytes}
+
+ local fs
+ fs=$(du -sb "$nsin")
+ local max_orig=${fs%%/*}
+ local max_repl=$((max_orig/4))
+
+ # flowtable fastpath should bypass normal routing one, i.e. the counters in forward hook
+ # should always be lower than the size of the transmitted file (max_orig).
+ if [ "$orig_cnt" -gt "$max_orig" ];then
+ echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2
+ ret=1
+ ok=0
+ fi
+
+ if [ "$repl_cnt" -gt $max_repl ];then
+ echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2
+ ret=1
+ ok=0
+ fi
+
+ if [ $ok -eq 1 ]; then
+ echo "PASS: $what"
+ fi
+}
+
+check_dscp()
+{
+ local what=$1
+ local ok=1
+
+ local counter
+ counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp3 | grep packets)
+
+ local pc4=${counter%*bytes*}
+ local pc4=${pc4#*packets}
+
+ counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp0 | grep packets)
+ local pc4z=${counter%*bytes*}
+ local pc4z=${pc4z#*packets}
+
+ case "$what" in
+ "dscp_none")
+ if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then
+ echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
+ ret=1
+ ok=0
+ fi
+ ;;
+ "dscp_fwd")
+ if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then
+ echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
+ ret=1
+ ok=0
+ fi
+ ;;
+ "dscp_ingress")
+ if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
+ echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ ret=1
+ ok=0
+ fi
+ ;;
+ "dscp_egress")
+ if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
+ echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ ret=1
+ ok=0
+ fi
+ ;;
+ *)
+ echo "FAIL: Unknown DSCP check" 1>&2
+ ret=1
+ ok=0
+ esac
+
+ if [ "$ok" -eq 1 ] ;then
+ echo "PASS: $what: dscp packet counters match"
+ fi
+}
+
+check_transfer()
+{
+ in=$1
+ out=$2
+ what=$3
+
+ if ! cmp "$in" "$out" > /dev/null 2>&1; then
+ echo "FAIL: file mismatch for $what" 1>&2
+ ls -l "$in"
+ ls -l "$out"
+ return 1
+ fi
+
+ return 0
+}
+
+listener_ready()
+{
+ ss -N "$nsb" -lnt -o "sport = :12345" | grep -q 12345
+}
+
+test_tcp_forwarding_ip()
+{
+ local nsa=$1
+ local nsb=$2
+ local dstip=$3
+ local dstport=$4
+ local lret=0
+
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" &
+ lpid=$!
+
+ busywait 1000 listener_ready
+
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out"
+
+ wait $lpid
+
+ if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
+ lret=1
+ ret=1
+ fi
+
+ if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
+ lret=1
+ ret=1
+ fi
+
+ return $lret
+}
+
+test_tcp_forwarding()
+{
+ test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+
+ return $?
+}
+
+test_tcp_forwarding_set_dscp()
+{
+ check_dscp "dscp_none"
+
+ip netns exec "$nsr1" nft -f - <<EOF
+table netdev dscpmangle {
+ chain setdscp0 {
+ type filter hook ingress device "veth0" priority 0; policy accept
+ ip dscp set cs3
+ }
+}
+EOF
+if [ $? -eq 0 ]; then
+ test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ check_dscp "dscp_ingress"
+
+ ip netns exec "$nsr1" nft delete table netdev dscpmangle
+else
+ echo "SKIP: Could not load netdev:ingress for veth0"
+fi
+
+ip netns exec "$nsr1" nft -f - <<EOF
+table netdev dscpmangle {
+ chain setdscp0 {
+ type filter hook egress device "veth1" priority 0; policy accept
+ ip dscp set cs3
+ }
+}
+EOF
+if [ $? -eq 0 ]; then
+ test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ check_dscp "dscp_egress"
+
+ ip netns exec "$nsr1" nft flush table netdev dscpmangle
+else
+ echo "SKIP: Could not load netdev:egress for veth1"
+fi
+
+ # partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3
+ # counters should have seen packets (before and after ft offload kicks in).
+ ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
+ test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ check_dscp "dscp_fwd"
+}
+
+test_tcp_forwarding_nat()
+{
+ local lret
+ local pmtu
+
+ test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ lret=$?
+
+ pmtu=$3
+ what=$4
+
+ if [ "$lret" -eq 0 ] ; then
+ if [ "$pmtu" -eq 1 ] ;then
+ check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
+ else
+ echo "PASS: flow offload for ns1/ns2 with masquerade $what"
+ fi
+
+ test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
+ lret=$?
+ if [ "$pmtu" -eq 1 ] ;then
+ check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
+ elif [ "$lret" -eq 0 ] ; then
+ echo "PASS: flow offload for ns1/ns2 with dnat $what"
+ fi
+ fi
+
+ return $lret
+}
+
+make_file "$nsin"
+
+# First test:
+# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
+# Due to MTU mismatch in both directions, all packets (except small packets like pure
+# acks) have to be handled by normal forwarding path. Therefore, packet counters
+# are not checked.
+if test_tcp_forwarding "$ns1" "$ns2"; then
+ echo "PASS: flow offloaded for ns1/ns2"
+else
+ echo "FAIL: flow offload for ns1/ns2:" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
+# delete default route, i.e. ns2 won't be able to reach ns1 and
+# will depend on ns1 being masqueraded in nsr1.
+# expect ns1 has nsr1 address.
+ip -net "$ns2" route del default via 10.0.2.1
+ip -net "$ns2" route del default via dead:2::1
+ip -net "$ns2" route add 192.168.10.1 via 10.0.2.1
+
+# Second test:
+# Same, but with NAT enabled. Same as in first test: we expect normal forward path
+# to handle most packets.
+ip netns exec "$nsr1" nft -f - <<EOF
+table ip nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
+ }
+
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oifname "veth1" counter masquerade
+ }
+}
+EOF
+
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then
+ echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2
+ exit 0
+fi
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 0 ""; then
+ echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
+# Third test:
+# Same as second test, but with PMTU discovery enabled. This
+# means that we expect the fastpath to handle packets as soon
+# as the endpoints adjust the packet size.
+ip netns exec "$ns1" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+
+# reset counters.
+# With pmtu in-place we'll also check that nft counters
+# are lower than file size and packets were forwarded via flowtable layer.
+# For earlier tests (large mtus), packets cannot be handled via flowtable
+# (except pure acks and other small packets).
+ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
+ echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+fi
+
+# Another test:
+# Add bridge interface br0 to Router1, with NAT enabled.
+test_bridge() {
+if ! ip -net "$nsr1" link add name br0 type bridge 2>/dev/null;then
+ echo "SKIP: could not add bridge br0"
+ [ "$ret" -eq 0 ] && ret=$ksft_skip
+ return
+fi
+ip -net "$nsr1" addr flush dev veth0
+ip -net "$nsr1" link set up dev veth0
+ip -net "$nsr1" link set veth0 master br0
+ip -net "$nsr1" addr add 10.0.1.1/24 dev br0
+ip -net "$nsr1" addr add dead:1::1/64 dev br0 nodad
+ip -net "$nsr1" link set up dev br0
+
+ip netns exec "$nsr1" sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
+
+# br0 with NAT enabled.
+ip netns exec "$nsr1" nft -f - <<EOF
+flush table ip nat
+table ip nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
+ }
+
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oifname "veth1" counter masquerade
+ }
+}
+EOF
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "on bridge"; then
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
+
+# Another test:
+# Add bridge interface br0 to Router1, with NAT and VLAN.
+ip -net "$nsr1" link set veth0 nomaster
+ip -net "$nsr1" link set down dev veth0
+ip -net "$nsr1" link add link veth0 name veth0.10 type vlan id 10
+ip -net "$nsr1" link set up dev veth0
+ip -net "$nsr1" link set up dev veth0.10
+ip -net "$nsr1" link set veth0.10 master br0
+
+ip -net "$ns1" addr flush dev eth0
+ip -net "$ns1" link add link eth0 name eth0.10 type vlan id 10
+ip -net "$ns1" link set eth0 up
+ip -net "$ns1" link set eth0.10 up
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0.10
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" addr add dead:1::99/64 dev eth0.10 nodad
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "bridge and VLAN"; then
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
+# restore test topology (remove bridge and VLAN)
+ip -net "$nsr1" link set veth0 nomaster
+ip -net "$nsr1" link set veth0 down
+ip -net "$nsr1" link set veth0.10 down
+ip -net "$nsr1" link delete veth0.10 type vlan
+ip -net "$nsr1" link delete br0 type bridge
+ip -net "$ns1" addr flush dev eth0.10
+ip -net "$ns1" link set eth0.10 down
+ip -net "$ns1" link set eth0 down
+ip -net "$ns1" link delete eth0.10 type vlan
+
+# restore address in ns1 and nsr1
+ip -net "$ns1" link set eth0 up
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via dead:1::1
+ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
+ip -net "$nsr1" link set up dev veth0
+}
+
+test_bridge
+
+KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1)
+KEY_AES="0x"$(ps -af | md5sum | cut -d " " -f 1)
+SPI1=$RANDOM
+SPI2=$RANDOM
+
+if [ $SPI1 -eq $SPI2 ]; then
+ SPI2=$((SPI2+1))
+fi
+
+do_esp() {
+ local ns=$1
+ local me=$2
+ local remote=$3
+ local lnet=$4
+ local rnet=$5
+ local spi_out=$6
+ local spi_in=$7
+
+ ip -net "$ns" xfrm state add src "$remote" dst "$me" proto esp spi "$spi_in" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$rnet" dst "$lnet"
+ ip -net "$ns" xfrm state add src "$me" dst "$remote" proto esp spi "$spi_out" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$lnet" dst "$rnet"
+
+ # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
+ ip -net "$ns" xfrm policy add src "$lnet" dst "$rnet" dir out tmpl src "$me" dst "$remote" proto esp mode tunnel priority 1 action allow
+ # to fwd decrypted packets after esp processing:
+ ip -net "$ns" xfrm policy add src "$rnet" dst "$lnet" dir fwd tmpl src "$remote" dst "$me" proto esp mode tunnel priority 1 action allow
+}
+
+do_esp "$nsr1" 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 "$SPI1" "$SPI2"
+
+do_esp "$nsr2" 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 "$SPI2" "$SPI1"
+
+ip netns exec "$nsr1" nft delete table ip nat
+
+# restore default routes
+ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns2" route add default via dead:2::1
+
+if test_tcp_forwarding "$ns1" "$ns2"; then
+ check_counters "ipsec tunnel mode for ns1/ns2"
+else
+ echo "FAIL: ipsec tunnel mode for ns1/ns2"
+ ip netns exec "$nsr1" nft list ruleset 1>&2
+ ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
+fi
+
+if [ "$1" = "" ]; then
+ low=1280
+ mtu=$((65536 - low))
+ o=$(((RANDOM%mtu) + low))
+ l=$(((RANDOM%mtu) + low))
+ r=$(((RANDOM%mtu) + low))
+
+ echo "re-run with random mtus: -o $o -l $l -r $r"
+ $0 -o "$o" -l "$l" -r "$r"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_meta.sh b/tools/testing/selftests/net/netfilter/nft_meta.sh
new file mode 100755
index 0000000000..71505b6cb2
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_meta.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+# check iif/iifname/oifgroup/iiftype match.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+
+if ! nft --version > /dev/null 2>&1; then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+cleanup()
+{
+ ip netns del "$ns0"
+}
+
+ip netns add "$ns0"
+ip -net "$ns0" link set lo up
+ip -net "$ns0" addr add 127.0.0.1 dev lo
+
+trap cleanup EXIT
+
+currentyear=$(date +%Y)
+lastyear=$((currentyear-1))
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table inet filter {
+ counter iifcount {}
+ counter iifnamecount {}
+ counter iifgroupcount {}
+ counter iiftypecount {}
+ counter infproto4count {}
+ counter il4protocounter {}
+ counter imarkcounter {}
+ counter icpu0counter {}
+ counter ilastyearcounter {}
+ counter icurrentyearcounter {}
+
+ counter oifcount {}
+ counter oifnamecount {}
+ counter oifgroupcount {}
+ counter oiftypecount {}
+ counter onfproto4count {}
+ counter ol4protocounter {}
+ counter oskuidcounter {}
+ counter oskgidcounter {}
+ counter omarkcounter {}
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+
+ meta iif lo counter name "iifcount"
+ meta iifname "lo" counter name "iifnamecount"
+ meta iifgroup "default" counter name "iifgroupcount"
+ meta iiftype "loopback" counter name "iiftypecount"
+ meta nfproto ipv4 counter name "infproto4count"
+ meta l4proto icmp counter name "il4protocounter"
+ meta mark 42 counter name "imarkcounter"
+ meta cpu 0 counter name "icpu0counter"
+ meta time "$lastyear-01-01" - "$lastyear-12-31" counter name ilastyearcounter
+ meta time "$currentyear-01-01" - "$currentyear-12-31" counter name icurrentyearcounter
+ }
+
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta oif lo counter name "oifcount" counter
+ meta oifname "lo" counter name "oifnamecount"
+ meta oifgroup "default" counter name "oifgroupcount"
+ meta oiftype "loopback" counter name "oiftypecount"
+ meta nfproto ipv4 counter name "onfproto4count"
+ meta l4proto icmp counter name "ol4protocounter"
+ meta skuid 0 counter name "oskuidcounter"
+ meta skgid 0 counter name "oskgidcounter"
+ meta mark 42 counter name "omarkcounter"
+ }
+}
+EOF
+
+if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add test ruleset"
+ exit $ksft_skip
+fi
+
+ret=0
+
+check_one_counter()
+{
+ local cname="$1"
+ local want="packets $2"
+ local verbose="$3"
+
+ if ! ip netns exec "$ns0" nft list counter inet filter "$cname" | grep -q "$want"; then
+ echo "FAIL: $cname, want \"$want\", got"
+ ret=1
+ ip netns exec "$ns0" nft list counter inet filter "$cname"
+ fi
+}
+
+check_lo_counters()
+{
+ local want="$1"
+ local verbose="$2"
+ local counter
+
+ for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \
+ oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \
+ il4protocounter icurrentyearcounter ol4protocounter \
+ ; do
+ check_one_counter "$counter" "$want" "$verbose"
+ done
+}
+
+check_lo_counters "0" false
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 -m 42 > /dev/null
+
+check_lo_counters "2" true
+
+check_one_counter oskuidcounter "1" true
+check_one_counter oskgidcounter "1" true
+check_one_counter imarkcounter "1" true
+check_one_counter omarkcounter "1" true
+check_one_counter ilastyearcounter "0" true
+
+if [ $ret -eq 0 ];then
+ echo "OK: nftables meta iif/oif counters at expected values"
+else
+ exit $ret
+fi
+
+#First CPU execution and counter
+taskset -p 01 $$ > /dev/null
+ip netns exec "$ns0" nft reset counters > /dev/null
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null
+check_one_counter icpu0counter "2" true
+
+if [ $ret -eq 0 ];then
+ echo "OK: nftables meta cpu counter at expected values"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh
new file mode 100755
index 0000000000..9e39de2645
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_nat.sh
@@ -0,0 +1,1156 @@
+#!/bin/bash
+#
+# This test is for basic NAT functionality: snat, dnat, redirect, masquerade.
+#
+
+source lib.sh
+
+ret=0
+test_inet_nat=true
+
+checktool "nft --version" "run test without nft tool"
+checktool "socat -h" "run test without socat"
+
+cleanup()
+{
+ ip netns pids "$ns0" | xargs kill 2>/dev/null
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
+
+ rm -f "$INFILE" "$OUTFILE"
+
+ cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+INFILE=$(mktemp)
+OUTFILE=$(mktemp)
+
+setup_ns ns0 ns1 ns2
+
+if ! ip link add veth0 netns "$ns0" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1;then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns2"
+
+ip -net "$ns0" link set veth0 up
+ip -net "$ns0" addr add 10.0.1.1/24 dev veth0
+ip -net "$ns0" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$ns0" link set veth1 up
+ip -net "$ns0" addr add 10.0.2.1/24 dev veth1
+ip -net "$ns0" addr add dead:2::1/64 dev veth1 nodad
+
+do_config()
+{
+ ns="$1"
+ subnet="$2"
+
+ ip -net "$ns" link set eth0 up
+ ip -net "$ns" addr add "10.0.$subnet.99/24" dev eth0
+ ip -net "$ns" route add default via "10.0.$subnet.1"
+ ip -net "$ns" addr add "dead:$subnet::99/64" dev eth0 nodad
+ ip -net "$ns" route add default via "dead:$subnet::1"
+}
+
+do_config "$ns1" 1
+do_config "$ns2" 2
+
+bad_counter()
+{
+ local ns=$1
+ local counter=$2
+ local expect=$3
+ local tag=$4
+
+ echo "ERROR: $counter counter in $ns has unexpected value (expected $expect) at $tag" 1>&2
+ ip netns exec "$ns" nft list counter inet filter "$counter" 1>&2
+}
+
+check_counters()
+{
+ ns=$1
+ local lret=0
+
+ if ! ip netns exec "$ns" nft list counter inet filter ns0in | grep -q "packets 1 bytes 84";then
+ bad_counter "$ns" ns0in "packets 1 bytes 84" "check_counters 1"
+ lret=1
+ fi
+
+ if ! ip netns exec "$ns" nft list counter inet filter ns0out | grep -q "packets 1 bytes 84";then
+ bad_counter "$ns" ns0out "packets 1 bytes 84" "check_counters 2"
+ lret=1
+ fi
+
+ expect="packets 1 bytes 104"
+ if ! ip netns exec "$ns" nft list counter inet filter ns0in6 | grep -q "$expect";then
+ bad_counter "$ns" ns0in6 "$expect" "check_counters 3"
+ lret=1
+ fi
+ if ! ip netns exec "$ns" nft list counter inet filter ns0out6 | grep -q "$expect";then
+ bad_counter "$ns" ns0out6 "$expect" "check_counters 4"
+ lret=1
+ fi
+
+ return $lret
+}
+
+check_ns0_counters()
+{
+ local ns=$1
+ local lret=0
+
+ if ! ip netns exec "$ns0" nft list counter inet filter ns0in | grep -q "packets 0 bytes 0";then
+ bad_counter "$ns0" ns0in "packets 0 bytes 0" "check_ns0_counters 1"
+ lret=1
+ fi
+
+ if ! ip netns exec "$ns0" nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0";then
+ bad_counter "$ns0" ns0in6 "packets 0 bytes 0"
+ lret=1
+ fi
+
+ if ! ip netns exec "$ns0" nft list counter inet filter ns0out | grep -q "packets 0 bytes 0";then
+ bad_counter "$ns0" ns0out "packets 0 bytes 0" "check_ns0_counters 2"
+ lret=1
+ fi
+ if ! ip netns exec "$ns0" nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0";then
+ bad_counter "$ns0" ns0out6 "packets 0 bytes 0" "check_ns0_counters3 "
+ lret=1
+ fi
+
+ for dir in "in" "out" ; do
+ expect="packets 1 bytes 84"
+ if ! ip netns exec "$ns0" nft list counter inet filter "${ns}${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" "$ns${dir}" "$expect" "check_ns0_counters 4"
+ lret=1
+ fi
+
+ expect="packets 1 bytes 104"
+ if ! ip netns exec "$ns0" nft list counter inet filter "${ns}${dir}6" | grep -q "$expect";then
+ bad_counter "$ns0" "$ns${dir}6" "$expect" "check_ns0_counters 5"
+ lret=1
+ fi
+ done
+
+ return $lret
+}
+
+reset_counters()
+{
+ for i in "$ns0" "$ns1" "$ns2" ;do
+ ip netns exec "$i" nft reset counters inet > /dev/null
+ done
+}
+
+test_local_dnat6()
+{
+ local family=$1
+ local lret=0
+ local IPF=""
+
+ if [ "$family" = "inet" ];then
+ IPF="ip6"
+ fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+ chain output {
+ type nat hook output priority 0; policy accept;
+ ip6 daddr dead:1::99 dnat $IPF to dead:2::99
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add $family dnat hook"
+ return $ksft_skip
+ fi
+
+ # ping netns1, expect rewrite to netns2
+ if ! ip netns exec "$ns0" ping -q -c 1 dead:1::99 > /dev/null;then
+ lret=1
+ echo "ERROR: ping6 failed"
+ return $lret
+ fi
+
+ expect="packets 0 bytes 0"
+ for dir in "in6" "out6" ; do
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat6 1"
+ lret=1
+ fi
+ done
+
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat6 2"
+ lret=1
+ fi
+ done
+
+ # expect 0 count in ns1
+ expect="packets 0 bytes 0"
+ for dir in "in6" "out6" ; do
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat6 3"
+ lret=1
+ fi
+ done
+
+ # expect 1 packet in ns2
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
+ bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat6 4"
+ lret=1
+ fi
+ done
+
+ test $lret -eq 0 && echo "PASS: ipv6 ping to $ns1 was $family NATted to $ns2"
+ ip netns exec "$ns0" nft flush chain ip6 nat output
+
+ return $lret
+}
+
+test_local_dnat()
+{
+ local family=$1
+ local lret=0
+ local IPF=""
+
+ if [ "$family" = "inet" ];then
+ IPF="ip"
+ fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF 2>/dev/null
+table $family nat {
+ chain output {
+ type nat hook output priority 0; policy accept;
+ ip daddr 10.0.1.99 dnat $IPF to 10.0.2.99
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ if [ "$family" = "inet" ];then
+ echo "SKIP: inet nat tests"
+ test_inet_nat=false
+ return $ksft_skip
+ fi
+ echo "SKIP: Could not add add $family dnat hook"
+ return $ksft_skip
+ fi
+
+ # ping netns1, expect rewrite to netns2
+ if ! ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null;then
+ lret=1
+ echo "ERROR: ping failed"
+ return $lret
+ fi
+
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" "ns1$dir" "$expect" "test_local_dnat 1"
+ lret=1
+ fi
+ done
+
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" "ns2$dir" "$expect" "test_local_dnat 2"
+ lret=1
+ fi
+ done
+
+ # expect 0 count in ns1
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ if ! ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect";then
+ bad_counter "$ns1" "ns0$dir" "$expect" "test_local_dnat 3"
+ lret=1
+ fi
+ done
+
+ # expect 1 packet in ns2
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ if ! ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect";then
+ bad_counter "$ns2" "ns0$dir" "$expect" "test_local_dnat 4"
+ lret=1
+ fi
+ done
+
+ test $lret -eq 0 && echo "PASS: ping to $ns1 was $family NATted to $ns2"
+
+ ip netns exec "$ns0" nft flush chain "$family" nat output
+
+ reset_counters
+ if ! ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null;then
+ lret=1
+ echo "ERROR: ping failed"
+ return $lret
+ fi
+
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" ns1$dir "$expect" "test_local_dnat 5"
+ lret=1
+ fi
+ done
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 6"
+ lret=1
+ fi
+ done
+
+ # expect 1 count in ns1
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" ns0$dir "$expect" "test_local_dnat 7"
+ lret=1
+ fi
+ done
+
+ # expect 0 packet in ns2
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
+ bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 8"
+ lret=1
+ fi
+ done
+
+ test $lret -eq 0 && echo "PASS: ping to $ns1 OK after $family nat output chain flush"
+
+ return $lret
+}
+
+listener_ready()
+{
+ local ns="$1"
+ local port="$2"
+ local proto="$3"
+ ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port"
+}
+
+test_local_dnat_portonly()
+{
+ local family=$1
+ local daddr=$2
+ local lret=0
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+ chain output {
+ type nat hook output priority 0; policy accept;
+ meta l4proto tcp dnat to :2000
+
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ if [ "$family" = "inet" ];then
+ echo "SKIP: inet port test"
+ test_inet_nat=false
+ return
+ fi
+ echo "SKIP: Could not add $family dnat hook"
+ return
+ fi
+
+ echo "SERVER-$family" | ip netns exec "$ns1" timeout 3 socat -u STDIN TCP-LISTEN:2000 &
+
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 2000 "-t"
+
+ result=$(ip netns exec "$ns0" timeout 1 socat -u TCP:"$daddr":2000 STDOUT)
+
+ if [ "$result" = "SERVER-inet" ];then
+ echo "PASS: inet port rewrite without l3 address"
+ else
+ echo "ERROR: inet port rewrite without l3 address, got $result"
+ ret=1
+ fi
+}
+
+test_masquerade6()
+{
+ local family=$1
+ local natflags=$2
+ local lret=0
+
+ ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
+ echo "ERROR: cannot ping $ns1 from $ns2 via ipv6"
+ return 1
+ fi
+
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" "ns2$dir" "$expect" "test_masquerade6 1"
+ lret=1
+ fi
+
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade6 2"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+# add masquerading rule
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oif veth0 masquerade $natflags
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add $family masquerade hook"
+ return $ksft_skip
+ fi
+
+ if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
+ echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags"
+ lret=1
+ fi
+
+ # ns1 should have seen packets from ns0, due to masquerade
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 3"
+ lret=1
+ fi
+
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 4"
+ lret=1
+ fi
+ done
+
+ # ns1 should not have seen packets from ns2, due to masquerade
+ expect="packets 0 bytes 0"
+ for dir in "in6" "out6" ; do
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 5"
+ lret=1
+ fi
+
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" "ns1$dir" "$expect" "test_masquerade6 6"
+ lret=1
+ fi
+ done
+
+ if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
+ echo "ERROR: cannot ping $ns1 from $ns2 with active ipv6 masquerade $natflags (attempt 2)"
+ lret=1
+ fi
+
+ if ! ip netns exec "$ns0" nft flush chain "$family" nat postrouting;then
+ echo "ERROR: Could not flush $family nat postrouting" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: $family IPv6 masquerade $natflags for $ns2"
+
+ return $lret
+}
+
+test_masquerade()
+{
+ local family=$1
+ local natflags=$2
+ local lret=0
+
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
+ echo "ERROR: cannot ping $ns1 from $ns2 $natflags"
+ lret=1
+ fi
+
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" "ns2$dir" "$expect" "test_masquerade 1"
+ lret=1
+ fi
+
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade 2"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+# add masquerading rule
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oif veth0 masquerade $natflags
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add $family masquerade hook"
+ return $ksft_skip
+ fi
+
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
+ echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags"
+ lret=1
+ fi
+
+ # ns1 should have seen packets from ns0, due to masquerade
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" "ns0$dir" "$expect" "test_masquerade 3"
+ lret=1
+ fi
+
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade 4"
+ lret=1
+ fi
+ done
+
+ # ns1 should not have seen packets from ns2, due to masquerade
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" "ns0$dir" "$expect" "test_masquerade 5"
+ lret=1
+ fi
+
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" "ns1$dir" "$expect" "test_masquerade 6"
+ lret=1
+ fi
+ done
+
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
+ echo "ERROR: cannot ping $ns1 from $ns2 with active ip masquerade $natflags (attempt 2)"
+ lret=1
+ fi
+
+ if ! ip netns exec "$ns0" nft flush chain "$family" nat postrouting; then
+ echo "ERROR: Could not flush $family nat postrouting" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: $family IP masquerade $natflags for $ns2"
+
+ return $lret
+}
+
+test_redirect6()
+{
+ local family=$1
+ local lret=0
+
+ ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
+ echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6"
+ lret=1
+ fi
+
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" ns2$dir "$expect" "test_redirect6 1"
+ lret=1
+ fi
+
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns2" ns1$dir "$expect" "test_redirect6 2"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+# add redirect rule
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add $family redirect hook"
+ return $ksft_skip
+ fi
+
+ if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
+ echo "ERROR: cannot ping $ns1 from $ns2 via ipv6 with active $family redirect"
+ lret=1
+ fi
+
+ # ns1 should have seen no packets from ns2, due to redirection
+ expect="packets 0 bytes 0"
+ for dir in "in6" "out6" ; do
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 3"
+ lret=1
+ fi
+ done
+
+ # ns0 should have seen packets from ns2, due to masquerade
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 4"
+ lret=1
+ fi
+ done
+
+ if ! ip netns exec "$ns0" nft delete table "$family" nat;then
+ echo "ERROR: Could not delete $family nat table" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: $family IPv6 redirection for $ns2"
+
+ return $lret
+}
+
+test_redirect()
+{
+ local family=$1
+ local lret=0
+
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
+ echo "ERROR: cannot ping $ns1 from $ns2"
+ lret=1
+ fi
+
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" "$ns2$dir" "$expect" "test_redirect 1"
+ lret=1
+ fi
+
+ if ! ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect";then
+ bad_counter "$ns2" ns1$dir "$expect" "test_redirect 2"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+# add redirect rule
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add $family redirect hook"
+ return $ksft_skip
+ fi
+
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
+ echo "ERROR: cannot ping $ns1 from $ns2 with active $family ip redirect"
+ lret=1
+ fi
+
+ # ns1 should have seen no packets from ns2, due to redirection
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" ns0$dir "$expect" "test_redirect 3"
+ lret=1
+ fi
+ done
+
+ # ns0 should have seen packets from ns2, due to masquerade
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" ns0$dir "$expect" "test_redirect 4"
+ lret=1
+ fi
+ done
+
+ if ! ip netns exec "$ns0" nft delete table "$family" nat;then
+ echo "ERROR: Could not delete $family nat table" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: $family IP redirection for $ns2"
+
+ return $lret
+}
+
+# test port shadowing.
+# create two listening services, one on router (ns0), one
+# on client (ns2), which is masqueraded from ns1 point of view.
+# ns2 sends udp packet coming from service port to ns1, on a highport.
+# Later, if n1 uses same highport to connect to ns0:service, packet
+# might be port-forwarded to ns2 instead.
+
+# second argument tells if we expect the 'fake-entry' to take effect
+# (CLIENT) or not (ROUTER).
+test_port_shadow()
+{
+ local test=$1
+ local expect=$2
+ local daddrc="10.0.1.99"
+ local daddrs="10.0.1.1"
+ local result=""
+ local logmsg=""
+
+ # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
+ echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405
+
+ echo ROUTER | ip netns exec "$ns0" timeout 3 socat -T 3 -u STDIN UDP4-LISTEN:1405 2>/dev/null &
+ local sc_r=$!
+ echo CLIENT | ip netns exec "$ns2" timeout 3 socat -T 3 -u STDIN UDP4-LISTEN:1405,reuseport 2>/dev/null &
+ local sc_c=$!
+
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1405 "-u"
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns2" 1405 "-u"
+
+ # ns1 tries to connect to ns0:1405. With default settings this should connect
+ # to client, it matches the conntrack entry created above.
+
+ result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404)
+
+ if [ "$result" = "$expect" ] ;then
+ echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}"
+ else
+ echo "ERROR: portshadow test $test: got reply from \"$result\", not $expect as intended"
+ ret=1
+ fi
+
+ kill $sc_r $sc_c 2>/dev/null
+
+ # flush udp entries for next test round, if any
+ ip netns exec "$ns0" conntrack -F >/dev/null 2>&1
+}
+
+# This prevents port shadow of router service via packet filter,
+# packets claiming to originate from service port from internal
+# network are dropped.
+test_port_shadow_filter()
+{
+ local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family filter {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ meta iif veth1 udp sport 1405 drop
+ }
+}
+EOF
+ test_port_shadow "port-filter" "ROUTER"
+
+ ip netns exec "$ns0" nft delete table "$family" filter
+}
+
+# This prevents port shadow of router service via notrack.
+test_port_shadow_notrack()
+{
+ local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family raw {
+ chain prerouting {
+ type filter hook prerouting priority -300; policy accept;
+ meta iif veth0 udp dport 1405 notrack
+ }
+ chain output {
+ type filter hook output priority -300; policy accept;
+ meta oif veth0 udp sport 1405 notrack
+ }
+}
+EOF
+ test_port_shadow "port-notrack" "ROUTER"
+
+ ip netns exec "$ns0" nft delete table "$family" raw
+}
+
+# This prevents port shadow of router service via sport remap.
+test_port_shadow_pat()
+{
+ local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family pat {
+ chain postrouting {
+ type nat hook postrouting priority -1; policy accept;
+ meta iif veth1 udp sport <= 1405 masquerade to : 1406-65535 random
+ }
+}
+EOF
+ test_port_shadow "pat" "ROUTER"
+
+ ip netns exec "$ns0" nft delete table "$family" pat
+}
+
+test_port_shadowing()
+{
+ local family="ip"
+
+ if ! conntrack -h >/dev/null 2>&1;then
+ echo "SKIP: Could not run nat port shadowing test without conntrack tool"
+ return
+ fi
+
+ if ! socat -h > /dev/null 2>&1;then
+ echo "SKIP: Could not run nat port shadowing test without socat tool"
+ return
+ fi
+
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+ ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oif veth0 masquerade
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add $family masquerade hook"
+ return $ksft_skip
+ fi
+
+ # test default behaviour. Packet from ns1 to ns0 is redirected to ns2.
+ test_port_shadow "default" "CLIENT"
+
+ # test packet filter based mitigation: prevent forwarding of
+ # packets claiming to come from the service port.
+ test_port_shadow_filter "$family"
+
+ # test conntrack based mitigation: connections going or coming
+ # from router:service bypass connection tracking.
+ test_port_shadow_notrack "$family"
+
+ # test nat based mitigation: fowarded packets coming from service port
+ # are masqueraded with random highport.
+ test_port_shadow_pat "$family"
+
+ ip netns exec "$ns0" nft delete table $family nat
+}
+
+test_stateless_nat_ip()
+{
+ local lret=0
+
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
+ echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules"
+ return 1
+ fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table ip stateless {
+ map xlate_in {
+ typeof meta iifname . ip saddr . ip daddr : ip daddr
+ elements = {
+ "veth1" . 10.0.2.99 . 10.0.1.99 : 10.0.2.2,
+ }
+ }
+ map xlate_out {
+ typeof meta iifname . ip saddr . ip daddr : ip daddr
+ elements = {
+ "veth0" . 10.0.1.99 . 10.0.2.2 : 10.0.2.99
+ }
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority -400; policy accept;
+ ip saddr set meta iifname . ip saddr . ip daddr map @xlate_in
+ ip daddr set meta iifname . ip saddr . ip daddr map @xlate_out
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add ip statless rules"
+ return $ksft_skip
+ fi
+
+ reset_counters
+
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null; then
+ echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules"
+ lret=1
+ fi
+
+ # ns1 should have seen packets from .2.2, due to stateless rewrite.
+ expect="packets 1 bytes 84"
+ if ! ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect";then
+ bad_counter "$ns1" ns0insl "$expect" "test_stateless 1"
+ lret=1
+ fi
+
+ for dir in "in" "out" ; do
+ if ! ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect";then
+ bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2"
+ lret=1
+ fi
+ done
+
+ # ns1 should not have seen packets from ns2, due to masquerade
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ if ! ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect";then
+ bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3"
+ lret=1
+ fi
+
+ if ! ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect";then
+ bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+ if ! socat -h > /dev/null 2>&1;then
+ echo "SKIP: Could not run stateless nat frag test without socat tool"
+ if [ $lret -eq 0 ]; then
+ return $ksft_skip
+ fi
+
+ ip netns exec "$ns0" nft delete table ip stateless
+ return $lret
+ fi
+
+ dd if=/dev/urandom of="$INFILE" bs=4096 count=1 2>/dev/null
+
+ ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:"$OUTFILE" < /dev/null 2>/dev/null &
+
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 4233 "-u"
+
+ # re-do with large ping -> ip fragmentation
+ if ! ip netns exec "$ns2" timeout 3 socat -u STDIN UDP4-SENDTO:"10.0.1.99:4233" < "$INFILE" > /dev/null;then
+ echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2
+ lret=1
+ fi
+
+ wait
+
+ if ! cmp "$INFILE" "$OUTFILE";then
+ ls -l "$INFILE" "$OUTFILE"
+ echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
+ lret=1
+ fi
+
+ :> "$OUTFILE"
+
+ # ns1 should have seen packets from 2.2, due to stateless rewrite.
+ expect="packets 3 bytes 4164"
+ if ! ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect";then
+ bad_counter "$ns1" ns0insl "$expect" "test_stateless 5"
+ lret=1
+ fi
+
+ if ! ip netns exec "$ns0" nft delete table ip stateless; then
+ echo "ERROR: Could not delete table ip stateless" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: IP statless for $ns2"
+
+ return $lret
+}
+
+# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
+for i in "$ns0" "$ns1" "$ns2" ;do
+ip netns exec "$i" nft -f /dev/stdin <<EOF
+table inet filter {
+ counter ns0in {}
+ counter ns1in {}
+ counter ns2in {}
+
+ counter ns0out {}
+ counter ns1out {}
+ counter ns2out {}
+
+ counter ns0in6 {}
+ counter ns1in6 {}
+ counter ns2in6 {}
+
+ counter ns0out6 {}
+ counter ns1out6 {}
+ counter ns2out6 {}
+
+ map nsincounter {
+ type ipv4_addr : counter
+ elements = { 10.0.1.1 : "ns0in",
+ 10.0.2.1 : "ns0in",
+ 10.0.1.99 : "ns1in",
+ 10.0.2.99 : "ns2in" }
+ }
+
+ map nsincounter6 {
+ type ipv6_addr : counter
+ elements = { dead:1::1 : "ns0in6",
+ dead:2::1 : "ns0in6",
+ dead:1::99 : "ns1in6",
+ dead:2::99 : "ns2in6" }
+ }
+
+ map nsoutcounter {
+ type ipv4_addr : counter
+ elements = { 10.0.1.1 : "ns0out",
+ 10.0.2.1 : "ns0out",
+ 10.0.1.99: "ns1out",
+ 10.0.2.99: "ns2out" }
+ }
+
+ map nsoutcounter6 {
+ type ipv6_addr : counter
+ elements = { dead:1::1 : "ns0out6",
+ dead:2::1 : "ns0out6",
+ dead:1::99 : "ns1out6",
+ dead:2::99 : "ns2out6" }
+ }
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+ counter name ip saddr map @nsincounter
+ icmpv6 type { "echo-request", "echo-reply" } counter name ip6 saddr map @nsincounter6
+ }
+ chain output {
+ type filter hook output priority 0; policy accept;
+ counter name ip daddr map @nsoutcounter
+ icmpv6 type { "echo-request", "echo-reply" } counter name ip6 daddr map @nsoutcounter6
+ }
+}
+EOF
+done
+
+# special case for stateless nat check, counter needs to
+# be done before (input) ip defragmentation
+ip netns exec "$ns1" nft -f /dev/stdin <<EOF
+table inet filter {
+ counter ns0insl {}
+
+ chain pre {
+ type filter hook prerouting priority -400; policy accept;
+ ip saddr 10.0.2.2 counter name "ns0insl"
+ }
+}
+EOF
+
+ping_basic()
+{
+ i="$1"
+ if ! ip netns exec "$ns0" ping -c 1 -q 10.0."$i".99 > /dev/null;then
+ echo "ERROR: Could not reach other namespace(s)" 1>&2
+ ret=1
+ fi
+
+ if ! ip netns exec "$ns0" ping -c 1 -q dead:"$i"::99 > /dev/null;then
+ echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2
+ ret=1
+ fi
+}
+
+test_basic_conn()
+{
+ local nsexec
+ name="$1"
+
+ nsexec=$(eval echo \$"$1")
+
+ ping_basic 1
+ ping_basic 2
+
+ if ! check_counters "$nsexec";then
+ return 1
+ fi
+
+ if ! check_ns0_counters "$name";then
+ return 1
+ fi
+
+ reset_counters
+ return 0
+}
+
+if ! test_basic_conn "ns1" ; then
+ echo "ERROR: basic test for ns1 failed" 1>&2
+ exit 1
+fi
+if ! test_basic_conn "ns2"; then
+ echo "ERROR: basic test for ns1 failed" 1>&2
+fi
+
+if [ $ret -eq 0 ];then
+ echo "PASS: netns routing/connectivity: $ns0 can reach $ns1 and $ns2"
+fi
+
+reset_counters
+test_local_dnat ip
+test_local_dnat6 ip6
+
+reset_counters
+test_local_dnat_portonly inet 10.0.1.99
+
+reset_counters
+$test_inet_nat && test_local_dnat inet
+$test_inet_nat && test_local_dnat6 inet
+
+for flags in "" "fully-random"; do
+reset_counters
+test_masquerade ip $flags
+test_masquerade6 ip6 $flags
+reset_counters
+$test_inet_nat && test_masquerade inet $flags
+$test_inet_nat && test_masquerade6 inet $flags
+done
+
+reset_counters
+test_redirect ip
+test_redirect6 ip6
+reset_counters
+$test_inet_nat && test_redirect inet
+$test_inet_nat && test_redirect6 inet
+
+test_port_shadowing
+test_stateless_nat_ip
+
+if [ $ret -ne 0 ];then
+ echo -n "FAIL: "
+ nft --version
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
new file mode 100755
index 0000000000..3b81d88bdd
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
@@ -0,0 +1,267 @@
+#!/bin/bash
+#
+# Test connection tracking zone and NAT source port reallocation support.
+#
+
+source lib.sh
+
+# Don't increase too much, 2000 clients should work
+# just fine but script can then take several minutes with
+# KASAN/debug builds.
+maxclients=100
+
+have_socat=0
+ret=0
+
+[ "$KSFT_MACHINE_SLOW" = yes ] && maxclients=40
+# client1---.
+# veth1-.
+# |
+# NAT Gateway --veth0--> Server
+# | |
+# veth2-' |
+# client2---' |
+# .... |
+# clientX----vethX---'
+
+# All clients share identical IP address.
+# NAT Gateway uses policy routing and conntrack zones to isolate client
+# namespaces. Each client connects to Server, each with colliding tuples:
+# clientsaddr:10000 -> serveraddr:dport
+# NAT Gateway is supposed to do port reallocation for each of the
+# connections.
+
+v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
+v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
+v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
+v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null)
+v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null)
+v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
+
+cleanup()
+{
+ cleanup_all_ns
+
+ sysctl -q net.ipv4.neigh.default.gc_thresh1="$v4gc1" 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh2="$v4gc2" 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh3="$v4gc3" 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh1="$v6gc1" 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh2="$v6gc2" 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh3="$v6gc3" 2>/dev/null
+}
+
+checktool "nft --version" echo "run test without nft tool"
+checktool "conntrack -V" "run test without conntrack tool"
+
+if socat -h >/dev/null 2>&1; then
+ have_socat=1
+fi
+
+setup_ns gw srv
+
+trap cleanup EXIT
+
+ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
+ip -net "$gw" link set veth0 up
+ip -net "$srv" link set eth0 up
+
+sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
+
+for i in $(seq 1 "$maxclients");do
+ setup_ns "cl$i"
+
+ cl=$(eval echo \$cl"$i")
+ if ! ip link add veth"$i" netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1;then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+ fi
+done
+
+for i in $(seq 1 "$maxclients");do
+ cl=$(eval echo \$cl"$i")
+ echo netns exec "$cl" ip link set eth0 up
+ echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
+ echo netns exec "$gw" ip link set "veth$i" up
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0
+
+ # clients have same IP addresses.
+ echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
+ echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0 nodad
+ echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
+ echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
+
+ # NB: same addresses on client-facing interfaces.
+ echo netns exec "$gw" ip addr add 10.1.0.2/24 dev "veth$i"
+ echo netns exec "$gw" ip addr add dead:1::2/64 dev "veth$i" nodad
+
+ # gw: policy routing
+ echo netns exec "$gw" ip route add 10.1.0.0/24 dev "veth$i" table $((1000+i))
+ echo netns exec "$gw" ip route add dead:1::0/64 dev "veth$i" table $((1000+i))
+ echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
+ echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
+ echo netns exec "$gw" ip rule add fwmark "$i" lookup $((1000+i))
+done | ip -batch /dev/stdin
+
+ip -net "$gw" addr add 10.3.0.1/24 dev veth0
+ip -net "$gw" addr add dead:3::1/64 dev veth0 nodad
+
+ip -net "$srv" addr add 10.3.0.99/24 dev eth0
+ip -net "$srv" addr add dead:3::99/64 dev eth0 nodad
+
+ip netns exec "$gw" nft -f /dev/stdin<<EOF
+table inet raw {
+ map iiftomark {
+ type ifname : mark
+ }
+
+ map iiftozone {
+ typeof iifname : ct zone
+ }
+
+ set inicmp {
+ flags dynamic
+ type ipv4_addr . ifname . ipv4_addr
+ }
+ set inflows {
+ flags dynamic
+ type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service
+ }
+
+ set inflows6 {
+ flags dynamic
+ type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority -64000; policy accept;
+ ct original zone set meta iifname map @iiftozone
+ meta mark set meta iifname map @iiftomark
+
+ tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter }
+ add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter }
+ ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter }
+ }
+
+ chain nat_postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ct mark set meta mark meta oifname veth0 masquerade
+ }
+
+ chain mangle_prerouting {
+ type filter hook prerouting priority -100; policy accept;
+ ct direction reply meta mark set ct mark
+ }
+}
+EOF
+if [ "$?" -ne 0 ];then
+ echo "SKIP: Could not add nftables rules"
+ exit $ksft_skip
+fi
+
+( echo add element inet raw iiftomark \{
+ for i in $(seq 1 $((maxclients-1))); do
+ echo \"veth"$i"\" : "$i",
+ done
+ echo \"veth"$maxclients"\" : "$maxclients" \}
+ echo add element inet raw iiftozone \{
+ for i in $(seq 1 $((maxclients-1))); do
+ echo \"veth"$i"\" : "$i",
+ done
+ echo \"veth$maxclients\" : $maxclients \}
+) | ip netns exec "$gw" nft -f /dev/stdin
+
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
+
+# useful for debugging: allows to use 'ping' from clients to gateway.
+ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
+
+for i in $(seq 1 "$maxclients"); do
+ cl=$(eval echo \$cl"$i")
+ ip netns exec "$cl" ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
+done
+
+wait || ret=1
+
+[ "$ret" -ne 0 ] && "FAIL: Ping failure from $cl" 1>&2
+
+for i in $(seq 1 "$maxclients"); do
+ if ! ip netns exec "$gw" nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"; then
+ ret=1
+ echo "FAIL: counter icmp mismatch for veth$i" 1>&2
+ ip netns exec "$gw" nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
+ break
+ fi
+done
+
+if ! ip netns exec "$gw" nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * maxclients)) bytes $((252 * maxclients)) }"; then
+ ret=1
+ echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * maxclients)) bytes $((252 * maxclients)) }"
+ ip netns exec "$gw" nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
+fi
+
+if [ $ret -eq 0 ]; then
+ echo "PASS: ping test from all $maxclients namespaces"
+fi
+
+if [ $have_socat -eq 0 ];then
+ echo "SKIP: socat not installed"
+ if [ $ret -ne 0 ];then
+ exit $ret
+ fi
+ exit $ksft_skip
+fi
+
+listener_ready()
+{
+ ss -N "$1" -lnt -o "sport = :5201" | grep -q 5201
+}
+
+ip netns exec "$srv" socat -u TCP-LISTEN:5201,fork STDOUT > /dev/null 2>/dev/null &
+socatpid=$!
+
+busywait 1000 listener_ready "$srv"
+
+for i in $(seq 1 "$maxclients"); do
+ if [ $ret -ne 0 ]; then
+ break
+ fi
+ cl=$(eval echo \$cl"$i")
+ if ! ip netns exec "$cl" socat -4 -u STDIN TCP:10.3.0.99:5201,sourceport=10000 < /dev/null > /dev/null; then
+ echo "FAIL: Failure to connect for $cl" 1>&2
+ ip netns exec "$gw" conntrack -S 1>&2
+ ret=1
+ fi
+done
+if [ $ret -eq 0 ];then
+ echo "PASS: socat connections for all $maxclients net namespaces"
+fi
+
+kill $socatpid
+wait
+
+for i in $(seq 1 "$maxclients"); do
+ if ! ip netns exec "$gw" nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null;then
+ ret=1
+ echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
+ break
+ fi
+done
+if [ $ret -eq 0 ];then
+ echo "PASS: Found client connection for all $maxclients net namespaces"
+fi
+
+if ! ip netns exec "$gw" nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null;then
+ ret=1
+ echo "FAIL: cannot find return entry on veth0" 1>&2
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
new file mode 100755
index 0000000000..8538f08c64
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -0,0 +1,417 @@
+#!/bin/bash
+#
+# This tests nf_queue:
+# 1. can process packets from all hooks
+# 2. support running nfqueue from more than one base chain
+#
+# shellcheck disable=SC2162,SC2317
+
+source lib.sh
+ret=0
+timeout=2
+
+cleanup()
+{
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
+ ip netns pids "$nsrouter" | xargs kill 2>/dev/null
+
+ cleanup_all_ns
+
+ rm -f "$TMPINPUT"
+ rm -f "$TMPFILE0"
+ rm -f "$TMPFILE1"
+ rm -f "$TMPFILE2" "$TMPFILE3"
+}
+
+checktool "nft --version" "test without nft tool"
+
+trap cleanup EXIT
+
+setup_ns ns1 ns2 nsrouter
+
+TMPFILE0=$(mktemp)
+TMPFILE1=$(mktemp)
+TMPFILE2=$(mktemp)
+TMPFILE3=$(mktemp)
+
+TMPINPUT=$(mktemp)
+dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
+
+if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
+
+ip -net "$nsrouter" link set veth0 up
+ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$nsrouter" link set veth1 up
+ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
+ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
+
+ip -net "$ns1" link set eth0 up
+ip -net "$ns2" link set eth0 up
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" route add default via dead:1::1
+
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns2" route add default via dead:2::1
+
+load_ruleset() {
+ local name=$1
+ local prio=$2
+
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+table inet $name {
+ chain nfq {
+ ip protocol icmp queue bypass
+ icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass
+ }
+ chain pre {
+ type filter hook prerouting priority $prio; policy accept;
+ jump nfq
+ }
+ chain input {
+ type filter hook input priority $prio; policy accept;
+ jump nfq
+ }
+ chain forward {
+ type filter hook forward priority $prio; policy accept;
+ tcp dport 12345 queue num 2
+ jump nfq
+ }
+ chain output {
+ type filter hook output priority $prio; policy accept;
+ tcp dport 12345 queue num 3
+ tcp sport 23456 queue num 3
+ jump nfq
+ }
+ chain post {
+ type filter hook postrouting priority $prio; policy accept;
+ jump nfq
+ }
+}
+EOF
+}
+
+load_counter_ruleset() {
+ local prio=$1
+
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+table inet countrules {
+ chain pre {
+ type filter hook prerouting priority $prio; policy accept;
+ counter
+ }
+ chain input {
+ type filter hook input priority $prio; policy accept;
+ counter
+ }
+ chain forward {
+ type filter hook forward priority $prio; policy accept;
+ counter
+ }
+ chain output {
+ type filter hook output priority $prio; policy accept;
+ counter
+ }
+ chain post {
+ type filter hook postrouting priority $prio; policy accept;
+ counter
+ }
+}
+EOF
+}
+
+test_ping() {
+ if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
+ return 1
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then
+ return 2
+ fi
+
+ return 0
+}
+
+test_ping_router() {
+ if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then
+ return 3
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then
+ return 4
+ fi
+
+ return 0
+}
+
+test_queue_blackhole() {
+ local proto=$1
+
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+table $proto blackh {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ queue num 600
+ }
+}
+EOF
+ if [ "$proto" = "ip" ] ;then
+ ip netns exec "$ns1" ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
+ lret=$?
+ elif [ "$proto" = "ip6" ]; then
+ ip netns exec "$ns1" ping -W 2 -c 1 -q dead:2::99 > /dev/null
+ lret=$?
+ else
+ lret=111
+ fi
+
+ # queue without bypass keyword should drop traffic if no listener exists.
+ if [ "$lret" -eq 0 ];then
+ echo "FAIL: $proto expected failure, got $lret" 1>&2
+ exit 1
+ fi
+
+ if ! ip netns exec "$nsrouter" nft delete table "$proto" blackh; then
+ echo "FAIL: $proto: Could not delete blackh table"
+ exit 1
+ fi
+
+ echo "PASS: $proto: statement with no listener results in packet drop"
+}
+
+nf_queue_wait()
+{
+ local procfile="/proc/self/net/netfilter/nfnetlink_queue"
+ local netns id
+
+ netns="$1"
+ id="$2"
+
+ # if this file doesn't exist, nfnetlink_module isn't loaded.
+ # rather than loading it ourselves, wait for kernel module autoload
+ # completion, nfnetlink should do so automatically because nf_queue
+ # helper program, spawned in the background, asked for this functionality.
+ test -f "$procfile" &&
+ ip netns exec "$netns" cat "$procfile" | grep -q "^ *$id "
+}
+
+test_queue()
+{
+ local expected="$1"
+ local last=""
+
+ # spawn nf_queue listeners
+ ip netns exec "$nsrouter" ./nf_queue -c -q 0 -t $timeout > "$TMPFILE0" &
+ ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t $timeout > "$TMPFILE1" &
+
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 0
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1
+
+ if ! test_ping;then
+ echo "FAIL: netns routing/connectivity with active listener on queues 0 and 1: $ret" 1>&2
+ exit $ret
+ fi
+
+ if ! test_ping_router;then
+ echo "FAIL: netns router unreachable listener on queue 0 and 1: $ret" 1>&2
+ exit $ret
+ fi
+
+ wait
+ ret=$?
+
+ for file in $TMPFILE0 $TMPFILE1; do
+ last=$(tail -n1 "$file")
+ if [ x"$last" != x"$expected packets total" ]; then
+ echo "FAIL: Expected $expected packets total, but got $last" 1>&2
+ ip netns exec "$nsrouter" nft list ruleset
+ exit 1
+ fi
+ done
+
+ echo "PASS: Expected and received $last"
+}
+
+listener_ready()
+{
+ ss -N "$1" -lnt -o "sport = :12345" | grep -q 12345
+}
+
+test_tcp_forward()
+{
+ ip netns exec "$nsrouter" ./nf_queue -q 2 -t "$timeout" &
+ local nfqpid=$!
+
+ timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
+ local rpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
+
+ ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
+
+ wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain"
+}
+
+test_tcp_localhost()
+{
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
+ timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
+ local rpid=$!
+
+ ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+ local nfqpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
+
+ ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null
+
+ wait "$rpid" && echo "PASS: tcp via loopback"
+ wait 2>/dev/null
+}
+
+test_tcp_localhost_connectclose()
+{
+ ip netns exec "$nsrouter" ./connect_close -p 23456 -t "$timeout" &
+ ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
+
+ wait && echo "PASS: tcp via loopback with connect/close"
+ wait 2>/dev/null
+}
+
+test_tcp_localhost_requeue()
+{
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+}
+EOF
+ timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
+ local rpid=$!
+
+ ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t "$timeout" > "$TMPFILE2" &
+
+ # nfqueue 1 will be called via output hook. But this time,
+ # re-queue the packet to nfqueue program on queue 2.
+ ip netns exec "$nsrouter" ./nf_queue -G -d 150 -c -q 0 -Q 1 -t "$timeout" > "$TMPFILE3" &
+
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
+ ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" > /dev/null
+
+ wait
+
+ if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
+ echo "FAIL: lost packets during requeue?!" 1>&2
+ return
+ fi
+
+ echo "PASS: tcp via loopback and re-queueing"
+}
+
+test_icmp_vrf() {
+ if ! ip -net "$ns1" link add tvrf type vrf table 9876;then
+ echo "SKIP: Could not add vrf device"
+ return
+ fi
+
+ ip -net "$ns1" li set eth0 master tvrf
+ ip -net "$ns1" li set tvrf up
+
+ ip -net "$ns1" route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
+ip netns exec "$ns1" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta oifname "tvrf" icmp type echo-request counter queue num 1
+ meta oifname "eth0" icmp type echo-request counter queue num 1
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ meta oifname "tvrf" icmp type echo-request counter queue num 1
+ meta oifname "eth0" icmp type echo-request counter queue num 1
+ }
+}
+EOF
+ ip netns exec "$ns1" ./nf_queue -q 1 -t "$timeout" &
+ local nfqpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1
+
+ ip netns exec "$ns1" ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
+
+ for n in output post; do
+ for d in tvrf eth0; do
+ if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then
+ echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
+ ip netns exec "$ns1" nft list ruleset
+ ret=1
+ return
+ fi
+ done
+ done
+
+ wait "$nfqpid" && echo "PASS: icmp+nfqueue via vrf"
+ wait 2>/dev/null
+}
+
+ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+load_ruleset "filter" 0
+
+if test_ping; then
+ # queue bypass works (rules were skipped, no listener)
+ echo "PASS: ${ns1} can reach ${ns2}"
+else
+ echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
+ exit $ret
+fi
+
+test_queue_blackhole ip
+test_queue_blackhole ip6
+
+# dummy ruleset to add base chains between the
+# queueing rules. We don't want the second reinject
+# to re-execute the old hooks.
+load_counter_ruleset 10
+
+# we are hooking all: prerouting/input/forward/output/postrouting.
+# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so:
+# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply).
+# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply.
+# so we expect that userspace program receives 10 packets.
+test_queue 10
+
+# same. We queue to a second program as well.
+load_ruleset "filter2" 20
+test_queue 20
+
+test_tcp_forward
+test_tcp_localhost
+test_tcp_localhost_connectclose
+test_tcp_localhost_requeue
+test_icmp_vrf
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_synproxy.sh b/tools/testing/selftests/net/netfilter/nft_synproxy.sh
new file mode 100755
index 0000000000..293f667a6a
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_synproxy.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+ret=0
+
+checktool "nft --version" "run test without nft tool"
+checktool "iperf3 --version" "run test without iperf3"
+
+setup_ns nsr ns1 ns2
+
+modprobe -q nf_conntrack
+
+cleanup() {
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
+
+ cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+ip link add veth0 netns "$nsr" type veth peer name eth0 netns "$ns1"
+ip link add veth1 netns "$nsr" type veth peer name eth0 netns "$ns2"
+
+for dev in veth0 veth1; do
+ ip -net "$nsr" link set "$dev" up
+done
+
+ip -net "$nsr" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsr" addr add 10.0.2.1/24 dev veth1
+
+ip netns exec "$nsr" sysctl -q net.ipv4.conf.veth0.forwarding=1
+ip netns exec "$nsr" sysctl -q net.ipv4.conf.veth1.forwarding=1
+ip netns exec "$nsr" sysctl -q net.netfilter.nf_conntrack_tcp_loose=0
+
+for n in $ns1 $ns2; do
+ ip -net "$n" link set eth0 up
+done
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns2" route add default via 10.0.2.1
+
+# test basic connectivity
+if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
+ echo "ERROR: $ns1 cannot reach $ns2" 1>&2
+ exit 1
+fi
+
+if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
+ echo "ERROR: $ns2 cannot reach $ns1" 1>&2
+ exit 1
+fi
+
+ip netns exec "$ns2" iperf3 -s > /dev/null 2>&1 &
+# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 &
+
+sleep 1
+
+ip netns exec "$nsr" nft -f - <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority -300; policy accept;
+ meta iif veth0 tcp flags syn counter notrack
+ }
+
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+
+ ct state new,established counter accept
+
+ meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp
+
+ ct state invalid counter drop
+
+ # make ns2 unreachable w.o. tcp synproxy
+ tcp flags syn counter drop
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "SKIP: Cannot add nft synproxy"
+ exit $ksft_skip
+fi
+
+if ! ip netns exec "$ns1" timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null; then
+ echo "FAIL: iperf3 returned an error" 1>&2
+ ret=1
+ ip netns exec "$nsr" nft list ruleset
+else
+ echo "PASS: synproxy connection successful"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_zones_many.sh b/tools/testing/selftests/net/netfilter/nft_zones_many.sh
new file mode 100755
index 0000000000..7db9982ba5
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_zones_many.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+
+# Test insertion speed for packets with identical addresses/ports
+# that are all placed in distinct conntrack zones.
+
+source lib.sh
+
+zones=2000
+[ "$KSFT_MACHINE_SLOW" = yes ] && zones=500
+
+have_ct_tool=0
+ret=0
+
+cleanup()
+{
+ cleanup_all_ns
+}
+
+checktool "nft --version" "run test without nft tool"
+checktool "socat -V" "run test without socat tool"
+
+setup_ns ns1
+
+trap cleanup EXIT
+
+if conntrack -V > /dev/null 2>&1; then
+ have_ct_tool=1
+fi
+
+test_zones() {
+ local max_zones=$1
+
+ip netns exec "$ns1" nft -f /dev/stdin<<EOF
+flush ruleset
+table inet raw {
+ map rndzone {
+ typeof numgen inc mod $max_zones : ct zone
+ }
+
+ chain output {
+ type filter hook output priority -64000; policy accept;
+ udp dport 12345 ct zone set numgen inc mod 65536 map @rndzone
+ }
+}
+EOF
+if [ "$?" -ne 0 ];then
+ echo "SKIP: Cannot add nftables rules"
+ exit $ksft_skip
+fi
+
+ ip netns exec "$ns1" sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
+
+ (
+ echo "add element inet raw rndzone {"
+ for i in $(seq 1 "$max_zones");do
+ echo -n "$i : $i"
+ if [ "$i" -lt "$max_zones" ]; then
+ echo ","
+ else
+ echo "}"
+ fi
+ done
+ ) | ip netns exec "$ns1" nft -f /dev/stdin
+
+ local i=0
+ local j=0
+ local outerstart
+ local stop
+ outerstart=$(date +%s%3N)
+ stop=$outerstart
+
+ while [ "$i" -lt "$max_zones" ]; do
+ local start
+ start=$(date +%s%3N)
+ i=$((i + 1000))
+ j=$((j + 1))
+ # nft rule in output places each packet in a different zone.
+ dd if=/dev/zero bs=8k count=1000 2>/dev/null | ip netns exec "$ns1" socat -u STDIN UDP:127.0.0.1:12345,sourceport=12345
+ if [ $? -ne 0 ] ;then
+ ret=1
+ break
+ fi
+
+ stop=$(date +%s%3N)
+ local duration=$((stop-start))
+ echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)"
+ done
+
+ if [ "$have_ct_tool" -eq 1 ]; then
+ local count duration
+ count=$(ip netns exec "$ns1" conntrack -C)
+ duration=$((stop-outerstart))
+
+ if [ "$count" -ge "$max_zones" ]; then
+ echo "PASS: inserted $count entries from packet path in $duration ms total"
+ else
+ ip netns exec "$ns1" conntrack -S 1>&2
+ echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries"
+ ret=1
+ fi
+ fi
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: insert $max_zones entries from packet path" 1>&2
+ fi
+}
+
+test_conntrack_tool() {
+ local max_zones=$1
+
+ ip netns exec "$ns1" conntrack -F >/dev/null 2>/dev/null
+
+ local outerstart start stop i
+ outerstart=$(date +%s%3N)
+ start=$(date +%s%3N)
+ stop="$start"
+ i=0
+ while [ "$i" -lt "$max_zones" ]; do
+ i=$((i + 1))
+ ip netns exec "$ns1" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1
+ if [ $? -ne 0 ];then
+ ip netns exec "$ns1" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null
+ echo "FAIL: conntrack -I returned an error"
+ ret=1
+ break
+ fi
+
+ if [ $((i%1000)) -eq 0 ];then
+ stop=$(date +%s%3N)
+
+ local duration=$((stop-start))
+ echo "PASS: added 1000 entries in $duration ms (now $i total)"
+ start=$stop
+ fi
+ done
+
+ local count
+ local duration
+ count=$(ip netns exec "$ns1" conntrack -C)
+ duration=$((stop-outerstart))
+
+ if [ "$count" -eq "$max_zones" ]; then
+ echo "PASS: inserted $count entries via ctnetlink in $duration ms"
+ else
+ ip netns exec "$ns1" conntrack -S 1>&2
+ echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)"
+ ret=1
+ fi
+}
+
+test_zones $zones
+
+if [ "$have_ct_tool" -eq 1 ];then
+ test_conntrack_tool $zones
+else
+ echo "SKIP: Could not run ctnetlink insertion test without conntrack tool"
+ if [ $ret -eq 0 ];then
+ exit $ksft_skip
+ fi
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/common.sh b/tools/testing/selftests/net/netfilter/packetdrill/common.sh
new file mode 100755
index 0000000000..ed36d53519
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/common.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# for debugging set net.netfilter.nf_log_all_netns=1 in init_net
+# or do not use net namespaces.
+modprobe -q nf_conntrack
+sysctl -q net.netfilter.nf_conntrack_log_invalid=6
+
+# Flush old cached data (fastopen cookies).
+ip tcp_metrics flush all > /dev/null 2>&1
+
+# TCP min, default, and max receive and send buffer sizes.
+sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))"
+sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304"
+
+# TCP congestion control.
+sysctl -q net.ipv4.tcp_congestion_control=cubic
+
+# TCP slow start after idle.
+sysctl -q net.ipv4.tcp_slow_start_after_idle=0
+
+# TCP Explicit Congestion Notification (ECN)
+sysctl -q net.ipv4.tcp_ecn=0
+
+sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1
+
+# Override the default qdisc on the tun device.
+# Many tests fail with timing errors if the default
+# is FQ and that paces their flows.
+tc qdisc add dev tun0 root pfifo
+
+# Enable conntrack
+$xtables -A INPUT -m conntrack --ctstate NEW -p tcp --syn
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt
new file mode 100644
index 0000000000..d755bd64c5
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt
@@ -0,0 +1,118 @@
+// check that already-acked (retransmitted) packet is let through rather
+// than tagged as INVALID.
+
+`packetdrill/common.sh`
+
+// should set -P DROP but it disconnects VM w.o. extra netns
++0 `$xtables -A INPUT -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 10) = 0
+
++0 < S 0:0(0) win 32792 <mss 1000>
++0 > S. 0:0(0) ack 1 <mss 1460>
++.01 < . 1:1(0) ack 1 win 65535
++0 accept(3, ..., ...) = 4
+
++0.0001 < P. 1:1461(1460) ack 1 win 257
++.0 > . 1:1(0) ack 1461 win 65535
++0.0001 < P. 1461:2921(1460) ack 1 win 257
++.0 > . 1:1(0) ack 2921 win 65535
++0.0001 < P. 2921:4381(1460) ack 1 win 257
++.0 > . 1:1(0) ack 4381 win 65535
++0.0001 < P. 4381:5841(1460) ack 1 win 257
++.0 > . 1:1(0) ack 5841 win 65535
++0.0001 < P. 5841:7301(1460) ack 1 win 257
++.0 > . 1:1(0) ack 7301 win 65535
++0.0001 < P. 7301:8761(1460) ack 1 win 257
++.0 > . 1:1(0) ack 8761 win 65535
++0.0001 < P. 8761:10221(1460) ack 1 win 257
++.0 > . 1:1(0) ack 10221 win 65535
++0.0001 < P. 10221:11681(1460) ack 1 win 257
++.0 > . 1:1(0) ack 11681 win 65535
++0.0001 < P. 11681:13141(1460) ack 1 win 257
++.0 > . 1:1(0) ack 13141 win 65535
++0.0001 < P. 13141:14601(1460) ack 1 win 257
++.0 > . 1:1(0) ack 14601 win 65535
++0.0001 < P. 14601:16061(1460) ack 1 win 257
++.0 > . 1:1(0) ack 16061 win 65535
++0.0001 < P. 16061:17521(1460) ack 1 win 257
++.0 > . 1:1(0) ack 17521 win 65535
++0.0001 < P. 17521:18981(1460) ack 1 win 257
++.0 > . 1:1(0) ack 18981 win 65535
++0.0001 < P. 18981:20441(1460) ack 1 win 257
++.0 > . 1:1(0) ack 20441 win 65535
++0.0001 < P. 20441:21901(1460) ack 1 win 257
++.0 > . 1:1(0) ack 21901 win 65535
++0.0001 < P. 21901:23361(1460) ack 1 win 257
++.0 > . 1:1(0) ack 23361 win 65535
++0.0001 < P. 23361:24821(1460) ack 1 win 257
+0.055 > . 1:1(0) ack 24821 win 65535
++0.0001 < P. 24821:26281(1460) ack 1 win 257
++.0 > . 1:1(0) ack 26281 win 65535
++0.0001 < P. 26281:27741(1460) ack 1 win 257
++.0 > . 1:1(0) ack 27741 win 65535
++0.0001 < P. 27741:29201(1460) ack 1 win 257
++.0 > . 1:1(0) ack 29201 win 65535
++0.0001 < P. 29201:30661(1460) ack 1 win 257
++.0 > . 1:1(0) ack 30661 win 65535
++0.0001 < P. 30661:32121(1460) ack 1 win 257
++.0 > . 1:1(0) ack 32121 win 65535
++0.0001 < P. 32121:33581(1460) ack 1 win 257
++.0 > . 1:1(0) ack 33581 win 65535
++0.0001 < P. 33581:35041(1460) ack 1 win 257
++.0 > . 1:1(0) ack 35041 win 65535
++0.0001 < P. 35041:36501(1460) ack 1 win 257
++.0 > . 1:1(0) ack 36501 win 65535
++0.0001 < P. 36501:37961(1460) ack 1 win 257
++.0 > . 1:1(0) ack 37961 win 65535
++0.0001 < P. 37961:39421(1460) ack 1 win 257
++.0 > . 1:1(0) ack 39421 win 65535
++0.0001 < P. 39421:40881(1460) ack 1 win 257
++.0 > . 1:1(0) ack 40881 win 65535
++0.0001 < P. 40881:42341(1460) ack 1 win 257
++.0 > . 1:1(0) ack 42341 win 65535
++0.0001 < P. 42341:43801(1460) ack 1 win 257
++.0 > . 1:1(0) ack 43801 win 65535
++0.0001 < P. 43801:45261(1460) ack 1 win 257
++.0 > . 1:1(0) ack 45261 win 65535
++0.0001 < P. 45261:46721(1460) ack 1 win 257
++.0 > . 1:1(0) ack 46721 win 65535
++0.0001 < P. 46721:48181(1460) ack 1 win 257
++.0 > . 1:1(0) ack 48181 win 65535
++0.0001 < P. 48181:49641(1460) ack 1 win 257
++.0 > . 1:1(0) ack 49641 win 65535
++0.0001 < P. 49641:51101(1460) ack 1 win 257
++.0 > . 1:1(0) ack 51101 win 65535
++0.0001 < P. 51101:52561(1460) ack 1 win 257
++.0 > . 1:1(0) ack 52561 win 65535
++0.0001 < P. 52561:54021(1460) ack 1 win 257
++.0 > . 1:1(0) ack 54021 win 65535
++0.0001 < P. 54021:55481(1460) ack 1 win 257
++.0 > . 1:1(0) ack 55481 win 65535
++0.0001 < P. 55481:56941(1460) ack 1 win 257
++.0 > . 1:1(0) ack 56941 win 65535
++0.0001 < P. 56941:58401(1460) ack 1 win 257
++.0 > . 1:1(0) ack 58401 win 65535
++0.0001 < P. 58401:59861(1460) ack 1 win 257
++.0 > . 1:1(0) ack 59861 win 65535
++0.0001 < P. 59861:61321(1460) ack 1 win 257
++.0 > . 1:1(0) ack 61321 win 65535
++0.0001 < P. 61321:62781(1460) ack 1 win 257
++.0 > . 1:1(0) ack 62781 win 65535
++0.0001 < P. 62781:64241(1460) ack 1 win 257
++.0 > . 1:1(0) ack 64241 win 65535
++0.0001 < P. 64241:65701(1460) ack 1 win 257
++.0 > . 1:1(0) ack 65701 win 65535
++0.0001 < P. 65701:67161(1460) ack 1 win 257
++.0 > . 1:1(0) ack 67161 win 65535
+
+// nf_ct_proto_6: SEQ is under the lower bound (already ACKed data retransmitted) IN=tun0 OUT= MAC= SRC=192.0.2.1 DST=192.168.24.72 LEN=1500 TOS=0x00 PREC=0x00 TTL=255 ID=0 PROTO=TCP SPT=34375 DPT=8080 SEQ=1 ACK=4162510439 WINDOW=257 RES=0x00 ACK PSH URGP=0
++0.0001 < P. 1:1461(1460) ack 1 win 257
+
+// only sent if above packet isn't flagged as invalid
++.0 > . 1:1(0) ack 67161 win 65535
+
++0 `$xtables -D INPUT -m conntrack --ctstate INVALID -j DROP`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt
new file mode 100644
index 0000000000..dccdd4c009
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt
@@ -0,0 +1,62 @@
+// check RST packet that doesn't exactly match expected next sequence
+// number still transitions conntrack state to CLOSE iff its already in
+// FIN/CLOSE_WAIT.
+
+`packetdrill/common.sh`
+
+// 5.771921 server_ip > client_ip TLSv1.2 337 [Packet size limited during capture]
+// 5.771994 server_ip > client_ip TLSv1.2 337 [Packet size limited during capture]
+// 5.772212 client_ip > server_ip TCP 66 45020 > 443 [ACK] Seq=1905874048 Ack=781810658 Win=36352 Len=0 TSval=3317842872 TSecr=675936334
+// 5.787924 server_ip > client_ip TLSv1.2 1300 [Packet size limited during capture]
+// 5.788126 server_ip > client_ip TLSv1.2 90 Application Data
+// 5.788207 server_ip > client_ip TCP 66 443 > 45020 [FIN, ACK] Seq=781811916 Ack=1905874048 Win=31104 Len=0 TSval=675936350 TSecr=3317842872
+// 5.788447 client_ip > server_ip TLSv1.2 90 Application Data
+// 5.788479 client_ip > server_ip TCP 66 45020 > 443 [RST, ACK] Seq=1905874072 Ack=781811917 Win=39040 Len=0 TSval=3317842889 TSecr=675936350
+// 5.788581 server_ip > client_ip TCP 54 8443 > 45020 [RST] Seq=781811892 Win=0 Len=0
+
++0 `iptables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `iptables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
++0.1 < S. 1:1(0) ack 1 win 65535 <mss 1460>
+
++0 > . 1:1(0) ack 1 win 65535
++0 < . 1:1001(1000) ack 1 win 65535
++0 < . 1001:2001(1000) ack 1 win 65535
++0 < . 2001:3001(1000) ack 1 win 65535
+
++0 > . 1:1(0) ack 1001 win 65535
++0 > . 1:1(0) ack 2001 win 65535
++0 > . 1:1(0) ack 3001 win 65535
+
++0 write(3, ..., 1000) = 1000
+
++0.0 > P. 1:1001(1000) ack 3001 win 65535
+
++0.1 read(3, ..., 1000) = 1000
+
+// Conntrack should move to FIN_WAIT, then CLOSE_WAIT.
++0 < F. 3001:3001(0) ack 1001 win 65535
++0 > . 1001:1001(0) ack 3002 win 65535
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE_WAIT`
+
++1 close(3) = 0
+// RST: unread data. FIN was seen, hence ack + 1
++0 > R. 1001:1001(0) ack 3002 win 65535
+// ... and then, CLOSE.
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE\ `
+
+// Spurious RST from peer -- no sk state. Should NOT get
+// marked INVALID, because conntrack is already closing.
++0.1 < R 2001:2001(0) win 0
+
+// No packets should have been marked INVALID
++0 `iptables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"`
++0 `iptables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt
new file mode 100644
index 0000000000..686f18a3d9
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt
@@ -0,0 +1,59 @@
+// check that out of window resets are marked as INVALID and conntrack remains
+// in ESTABLISHED state.
+
+`packetdrill/common.sh`
+
++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
++0.1 < S. 1:1(0) ack 1 win 65535 <mss 1460>
+
++0 > . 1:1(0) ack 1 win 65535
++0 < . 1:1001(1000) ack 1 win 65535
++0 < . 1001:2001(1000) ack 1 win 65535
++0 < . 2001:3001(1000) ack 1 win 65535
+
++0 > . 1:1(0) ack 1001 win 65535
++0 > . 1:1(0) ack 2001 win 65535
++0 > . 1:1(0) ack 3001 win 65535
+
++0 write(3, ..., 1000) = 1000
+
+// out of window
++0.0 < R 0:0(0) win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
+// out of window
++0.0 < R 1000000:1000000(0) win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
+// in-window but not exact match
++0.0 < R 42:42(0) win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
++0.0 > P. 1:1001(1000) ack 3001 win 65535
+
++0.1 read(3, ..., 1000) = 1000
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
++0 < . 3001:3001(0) ack 1001 win 65535
+
++0.0 < R. 3000:3000(0) ack 1001 win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
+// exact next sequence
++0.0 < R. 3001:3001(0) ack 1001 win 0
+// Conntrack should move to CLOSE
+
+// Expect four invalid RSTs
++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 4 "`
++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE\ `
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
new file mode 100644
index 0000000000..3442cd29bc
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
@@ -0,0 +1,44 @@
+// Check connection re-use, i.e. peer that receives the SYN answers with
+// a challenge-ACK.
+// Check that conntrack lets all packets pass, including the challenge ack,
+// and that a new connection is established.
+
+`packetdrill/common.sh`
+
+// S >
+// . < (challnge-ack)
+// R. >
+// S >
+// S. <
+// Expected outcome: established connection.
+
++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
+// Challenge ACK, old incarnation.
+0.1 < . 145824453:145824453(0) ack 643160523 win 240 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0>
+
++0.01 > R 643160523:643160523(0) win 0
+
++0.01 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
+
+// Must go through.
++0.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
+// correct synack
++0.1 < S. 0:0(0) ack 1 win 250 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0>
+
+// 3whs completes.
++0.01 > . 1:1(0) ack 1 win 256 <nop,nop,TS val 1 ecr 1>
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ESTABLISHED | grep -q ASSURED`
+
+// No packets should have been marked INVALID
++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"`
++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt
new file mode 100644
index 0000000000..3047160c4b
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt
@@ -0,0 +1,51 @@
+// Check conntrack copes with syn/ack reply for a previous, old incarnation.
+
+// tcpdump with buggy sequence
+// 10.176.25.8.829 > 10.192.171.30.2049: Flags [S], seq 2375731741, win 29200, options [mss 1460,sackOK,TS val 2083107423 ecr 0,nop,wscale 7], length 0
+// OLD synack, for old/previous S
+// 10.192.171.30.2049 > 10.176.25.8.829: Flags [S.], seq 145824453, ack 643160523, win 65535, options [mss 8952,nop,wscale 5,TS val 3215437785 ecr 2082921663,nop,nop], length 0
+// This reset never makes it to the endpoint, elided in the packetdrill script
+// 10.192.171.30.2049 > 10.176.25.8.829: Flags [R.], seq 1, ack 1, win 65535, options [mss 8952,nop,wscale 5,TS val 3215443451 ecr 2082921663,nop,nop], length 0
+// Syn retransmit, no change
+// 10.176.25.8.829 > 10.192.171.30.2049: Flags [S], seq 2375731741, win 29200, options [mss 1460,sackOK,TS val 2083115583 ecr 0,nop,wscale 7], length 0
+// CORRECT synack, should be accepted, but conntrack classified this as INVALID:
+// SEQ is over the upper bound (over the window of the receiver) IN=tun0 OUT= MAC= SRC=192.0.2.1 DST=192.168.37.78 LEN=40 TOS=0x00 PREC=0x00 TTL=255 ID=0 PROTO=TCP SPT=8080 DPT=34500 SEQ=162602411 ACK=2124350315 ..
+// 10.192.171.30.2049 > 10.176.25.8.829: Flags [S.], seq 162602410, ack 2375731742, win 65535, options [mss 8952,nop,wscale 5,TS val 3215445754 ecr 2083115583,nop,nop], length 0
+
+`packetdrill/common.sh`
+
++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
+// bogus/outdated synack, invalid ack value
+0.1 < S. 145824453:145824453(0) ack 643160523 win 240 <mss 1440,nop,nop,TS val 1 ecr 1,nop,wscale 0>
+
+// syn retransmitted
+1.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1015 ecr 0,nop,wscale 8>
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
+
+// correct synack
++0 < S. 145758918:145758918(0) ack 1 win 250 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0>
++0 write(3, ..., 1) = 1
+
+// with buggy conntrack above packet is dropped, so SYN rtx is seen:
+// script packet: 1.054007 . 1:1(0) ack 16777958 win 256 <nop,nop,TS val 1033 ecr 1>
+// actual packet: 3.010000 S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1015 ecr 0,nop,wscale 8>
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ESTABLISHED | grep -q ASSURED`
+
++0 > P. 1:2(1) ack 4294901762 win 256 <nop,nop,TS val 1067 ecr 1>
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ASSURED | grep -q ESTABLISHED`
+
+// No packets should have been marked INVALID in OUTPUT direction, 1 in INPUT
++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 1 "`
+
++0 `$xtables -D INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -D OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt
new file mode 100644
index 0000000000..842242f8cc
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt
@@ -0,0 +1,34 @@
+// Check reception of another SYN while we have an established conntrack state.
+// Challenge ACK is supposed to pass through, RST reply should clear conntrack
+// state and SYN retransmit should give us new 'SYN_RECV' connection state.
+
+`packetdrill/common.sh`
+
+// should show a match if bug is present:
++0 `iptables -A INPUT -m conntrack --ctstate INVALID -p tcp --tcp-flags SYN,ACK SYN,ACK`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 10) = 0
+
++0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7, TS val 1 ecr 0,nop,nop>
++0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,TS val 100 ecr 1,nop,wscale 8>
++.01 < . 1:1(0) ack 1 win 257 <TS val 1 ecr 100,nop,nop>
++0 accept(3, ..., ...) = 4
+
++0 < P. 1:101(100) ack 1 win 257 <TS val 2 ecr 100,nop,nop>
++.001 > . 1:1(0) ack 101 win 256 <nop,nop,TS val 110 ecr 2>
++0 read(4, ..., 101) = 100
+
+1.0 < S 2000:2000(0) win 32792 <mss 1000,nop,wscale 7, TS val 233 ecr 0,nop,nop>
+// Won't expect this: challenge ack.
+
++0 > . 1:1(0) ack 101 win 256 <nop,nop,TS val 112 ecr 2>
++0 < R. 101:101(0) ack 1 win 257
++0 close(4) = 0
+
+1.5 < S 2000:2000(0) win 32792 <mss 1000,nop,wscale 0, TS val 233 ecr 0,nop,nop>
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep -q SYN_RECV`
++0 `iptables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"`
diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh
new file mode 100755
index 0000000000..4485fd7675
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/rpath.sh
@@ -0,0 +1,175 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# return code to signal skipped test
+ksft_skip=4
+
+# search for legacy iptables (it uses the xtables extensions
+if iptables-legacy --version >/dev/null 2>&1; then
+ iptables='iptables-legacy'
+elif iptables --version >/dev/null 2>&1; then
+ iptables='iptables'
+else
+ iptables=''
+fi
+
+if ip6tables-legacy --version >/dev/null 2>&1; then
+ ip6tables='ip6tables-legacy'
+elif ip6tables --version >/dev/null 2>&1; then
+ ip6tables='ip6tables'
+else
+ ip6tables=''
+fi
+
+if nft --version >/dev/null 2>&1; then
+ nft='nft'
+else
+ nft=''
+fi
+
+if [ -z "$iptables$ip6tables$nft" ]; then
+ echo "SKIP: Test needs iptables, ip6tables or nft"
+ exit $ksft_skip
+fi
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+trap "ip netns del $ns1; ip netns del $ns2" EXIT
+
+# create two netns, disable rp_filter in ns2 and
+# keep IPv6 address when moving into VRF
+ip netns add "$ns1"
+ip netns add "$ns2"
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0
+ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1
+
+# a standard connection between the netns, should not trigger rp filter
+ip -net "$ns1" link add v0 type veth peer name v0 netns "$ns2"
+ip -net "$ns1" link set v0 up; ip -net "$ns2" link set v0 up
+ip -net "$ns1" a a 192.168.23.2/24 dev v0
+ip -net "$ns2" a a 192.168.23.1/24 dev v0
+ip -net "$ns1" a a fec0:23::2/64 dev v0 nodad
+ip -net "$ns2" a a fec0:23::1/64 dev v0 nodad
+
+# rp filter testing: ns1 sends packets via v0 which ns2 would route back via d0
+ip -net "$ns2" link add d0 type dummy
+ip -net "$ns2" link set d0 up
+ip -net "$ns1" a a 192.168.42.2/24 dev v0
+ip -net "$ns2" a a 192.168.42.1/24 dev d0
+ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad
+ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
+
+# firewall matches to test
+[ -n "$iptables" ] && {
+ common='-t raw -A PREROUTING -s 192.168.0.0/16'
+ if ! ip netns exec "$ns2" "$iptables" $common -m rpfilter;then
+ echo "Cannot add rpfilter rule"
+ exit $ksft_skip
+ fi
+ ip netns exec "$ns2" "$iptables" $common -m rpfilter --invert
+}
+[ -n "$ip6tables" ] && {
+ common='-t raw -A PREROUTING -s fec0::/16'
+ if ! ip netns exec "$ns2" "$ip6tables" $common -m rpfilter;then
+ echo "Cannot add rpfilter rule"
+ exit $ksft_skip
+ fi
+ ip netns exec "$ns2" "$ip6tables" $common -m rpfilter --invert
+}
+[ -n "$nft" ] && ip netns exec "$ns2" $nft -f - <<EOF
+table inet t {
+ chain c {
+ type filter hook prerouting priority raw;
+ ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter
+ ip6 saddr fec0::/16 fib saddr . iif oif exists counter
+ }
+}
+EOF
+
+die() {
+ echo "FAIL: $*"
+ #ip netns exec "$ns2" "$iptables" -t raw -vS
+ #ip netns exec "$ns2" "$ip6tables" -t raw -vS
+ #ip netns exec "$ns2" nft list ruleset
+ exit 1
+}
+
+# check rule counters, return true if rule did not match
+ipt_zero_rule() { # (command)
+ [ -n "$1" ] || return 0
+ ip netns exec "$ns2" "$1" -t raw -vS | grep -q -- "-m rpfilter -c 0 0"
+}
+ipt_zero_reverse_rule() { # (command)
+ [ -n "$1" ] || return 0
+ ip netns exec "$ns2" "$1" -t raw -vS | \
+ grep -q -- "-m rpfilter --invert -c 0 0"
+}
+nft_zero_rule() { # (family)
+ [ -n "$nft" ] || return 0
+ ip netns exec "$ns2" "$nft" list chain inet t c | \
+ grep -q "$1 saddr .* counter packets 0 bytes 0"
+}
+
+netns_ping() { # (netns, args...)
+ local netns="$1"
+ shift
+ ip netns exec "$netns" ping -q -c 1 -W 1 "$@" >/dev/null
+}
+
+clear_counters() {
+ [ -n "$iptables" ] && ip netns exec "$ns2" "$iptables" -t raw -Z
+ [ -n "$ip6tables" ] && ip netns exec "$ns2" "$ip6tables" -t raw -Z
+ if [ -n "$nft" ]; then
+ (
+ echo "delete table inet t";
+ ip netns exec "$ns2" $nft -s list table inet t;
+ ) | ip netns exec "$ns2" $nft -f -
+ fi
+}
+
+testrun() {
+ clear_counters
+
+ # test 1: martian traffic should fail rpfilter matches
+ netns_ping "$ns1" -I v0 192.168.42.1 && \
+ die "martian ping 192.168.42.1 succeeded"
+ netns_ping "$ns1" -I v0 fec0:42::1 && \
+ die "martian ping fec0:42::1 succeeded"
+
+ ipt_zero_rule "$iptables" || die "iptables matched martian"
+ ipt_zero_rule "$ip6tables" || die "ip6tables matched martian"
+ ipt_zero_reverse_rule "$iptables" && die "iptables not matched martian"
+ ipt_zero_reverse_rule "$ip6tables" && die "ip6tables not matched martian"
+ nft_zero_rule ip || die "nft IPv4 matched martian"
+ nft_zero_rule ip6 || die "nft IPv6 matched martian"
+
+ clear_counters
+
+ # test 2: rpfilter match should pass for regular traffic
+ netns_ping "$ns1" 192.168.23.1 || \
+ die "regular ping 192.168.23.1 failed"
+ netns_ping "$ns1" fec0:23::1 || \
+ die "regular ping fec0:23::1 failed"
+
+ ipt_zero_rule "$iptables" && die "iptables match not effective"
+ ipt_zero_rule "$ip6tables" && die "ip6tables match not effective"
+ ipt_zero_reverse_rule "$iptables" || die "iptables match over-effective"
+ ipt_zero_reverse_rule "$ip6tables" || die "ip6tables match over-effective"
+ nft_zero_rule ip && die "nft IPv4 match not effective"
+ nft_zero_rule ip6 && die "nft IPv6 match not effective"
+
+}
+
+testrun
+
+# repeat test with vrf device in $ns2
+ip -net "$ns2" link add vrf0 type vrf table 10
+ip -net "$ns2" link set vrf0 up
+ip -net "$ns2" link set v0 master vrf0
+
+testrun
+
+echo "PASS: netfilter reverse path match works as intended"
+exit 0
diff --git a/tools/testing/selftests/net/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c
new file mode 100644
index 0000000000..21bb1cfd8a
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/sctp_collision.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_in saddr = {}, daddr = {};
+ int sd, ret, len = sizeof(daddr);
+ struct timeval tv = {25, 0};
+ char buf[] = "hello";
+
+ if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
+ printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n",
+ argv[0]);
+ return -1;
+ }
+
+ sd = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP);
+ if (sd < 0) {
+ printf("Failed to create sd\n");
+ return -1;
+ }
+
+ saddr.sin_family = AF_INET;
+ saddr.sin_addr.s_addr = inet_addr(argv[2]);
+ saddr.sin_port = htons(atoi(argv[3]));
+
+ ret = bind(sd, (struct sockaddr *)&saddr, sizeof(saddr));
+ if (ret < 0) {
+ printf("Failed to bind to address\n");
+ goto out;
+ }
+
+ ret = listen(sd, 5);
+ if (ret < 0) {
+ printf("Failed to listen on port\n");
+ goto out;
+ }
+
+ daddr.sin_family = AF_INET;
+ daddr.sin_addr.s_addr = inet_addr(argv[4]);
+ daddr.sin_port = htons(atoi(argv[5]));
+
+ /* make test shorter than 25s */
+ ret = setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+ if (ret < 0) {
+ printf("Failed to setsockopt SO_RCVTIMEO\n");
+ goto out;
+ }
+
+ if (!strcmp(argv[1], "server")) {
+ sleep(1); /* wait a bit for client's INIT */
+ ret = connect(sd, (struct sockaddr *)&daddr, len);
+ if (ret < 0) {
+ printf("Failed to connect to peer\n");
+ goto out;
+ }
+ ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len);
+ if (ret < 0) {
+ printf("Failed to recv msg %d\n", ret);
+ goto out;
+ }
+ ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len);
+ if (ret < 0) {
+ printf("Failed to send msg %d\n", ret);
+ goto out;
+ }
+ printf("Server: sent! %d\n", ret);
+ }
+
+ if (!strcmp(argv[1], "client")) {
+ usleep(300000); /* wait a bit for server's listening */
+ ret = connect(sd, (struct sockaddr *)&daddr, len);
+ if (ret < 0) {
+ printf("Failed to connect to peer\n");
+ goto out;
+ }
+ sleep(1); /* wait a bit for server's delayed INIT_ACK to reproduce the issue */
+ ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len);
+ if (ret < 0) {
+ printf("Failed to send msg %d\n", ret);
+ goto out;
+ }
+ ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len);
+ if (ret < 0) {
+ printf("Failed to recv msg %d\n", ret);
+ goto out;
+ }
+ printf("Client: rcvd! %d\n", ret);
+ }
+ ret = 0;
+out:
+ close(sd);
+ return ret;
+}
diff --git a/tools/testing/selftests/net/netfilter/settings b/tools/testing/selftests/net/netfilter/settings
new file mode 100644
index 0000000000..abc5648b59
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/settings
@@ -0,0 +1 @@
+timeout=1800
diff --git a/tools/testing/selftests/net/netfilter/xt_string.sh b/tools/testing/selftests/net/netfilter/xt_string.sh
new file mode 100755
index 0000000000..8d401c69e3
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/xt_string.sh
@@ -0,0 +1,133 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# return code to signal skipped test
+ksft_skip=4
+rc=0
+
+source lib.sh
+
+checktool "socat -h" "run test without socat"
+checktool "iptables --version" "test needs iptables"
+
+infile=$(mktemp)
+
+cleanup()
+{
+ ip netns del "$netns"
+ rm -f "$infile"
+}
+
+trap cleanup EXIT
+
+setup_ns netns
+
+ip -net "$netns" link add d0 type dummy
+ip -net "$netns" link set d0 up
+ip -net "$netns" addr add 10.1.2.1/24 dev d0
+
+pattern="foo bar baz"
+patlen=11
+hdrlen=$((20 + 8)) # IPv4 + UDP
+
+#ip netns exec "$netns" tcpdump -npXi d0 &
+#tcpdump_pid=$!
+#trap 'kill $tcpdump_pid; ip netns del $netns' EXIT
+
+add_rule() { # (alg, from, to)
+ ip netns exec "$netns" \
+ iptables -A OUTPUT -o d0 -m string \
+ --string "$pattern" --algo "$1" --from "$2" --to "$3"
+}
+showrules() { # ()
+ ip netns exec "$netns" iptables -v -S OUTPUT | grep '^-A'
+}
+zerorules() {
+ ip netns exec "$netns" iptables -Z OUTPUT
+}
+countrule() { # (pattern)
+ showrules | grep -c -- "$*"
+}
+send() { # (offset)
+ ( for ((i = 0; i < $1 - hdrlen; i++)); do
+ echo -n " "
+ done
+ echo -n "$pattern"
+ ) > "$infile"
+
+ ip netns exec "$netns" socat -t 1 -u STDIN UDP-SENDTO:10.1.2.2:27374 < "$infile"
+}
+
+add_rule bm 1000 1500
+add_rule bm 1400 1600
+add_rule kmp 1000 1500
+add_rule kmp 1400 1600
+
+zerorules
+send 0
+send $((1000 - patlen))
+if [ "$(countrule -c 0 0)" -ne 4 ]; then
+ echo "FAIL: rules match data before --from"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1000
+send $((1400 - patlen))
+if [ "$(countrule -c 2)" -ne 2 ]; then
+ echo "FAIL: only two rules should match at low offset"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send $((1500 - patlen))
+if [ "$(countrule -c 1)" -ne 4 ]; then
+ echo "FAIL: all rules should match at end of packet"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1495
+if [ "$(countrule -c 1)" -ne 1 ]; then
+ echo "FAIL: only kmp with proper --to should match pattern spanning fragments"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1500
+if [ "$(countrule -c 1)" -ne 2 ]; then
+ echo "FAIL: two rules should match pattern at start of second fragment"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send $((1600 - patlen))
+if [ "$(countrule -c 1)" -ne 2 ]; then
+ echo "FAIL: two rules should match pattern at end of largest --to"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send $((1600 - patlen + 1))
+if [ "$(countrule -c 1)" -ne 0 ]; then
+ echo "FAIL: no rules should match pattern extending largest --to"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1600
+if [ "$(countrule -c 1)" -ne 0 ]; then
+ echo "FAIL: no rule should match pattern past largest --to"
+ showrules
+ ((rc--))
+fi
+
+[ $rc -eq 0 ] && echo "PASS: string match tests"
+exit $rc
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
new file mode 100755
index 0000000000..93d9d91452
--- /dev/null
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import time
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import ksft_eq, ksft_ge, ksft_busy_wait
+from lib.py import NetdevFamily, NetdevSimDev, ip
+
+
+def empty_check(nf) -> None:
+ devs = nf.dev_get({}, dump=True)
+ ksft_ge(len(devs), 1)
+
+
+def lo_check(nf) -> None:
+ lo_info = nf.dev_get({"ifindex": 1})
+ ksft_eq(len(lo_info['xdp-features']), 0)
+ ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0)
+
+
+def page_pool_check(nf) -> None:
+ with NetdevSimDev() as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ def up():
+ ip(f"link set dev {nsim.ifname} up")
+
+ def down():
+ ip(f"link set dev {nsim.ifname} down")
+
+ def get_pp():
+ pp_list = nf.page_pool_get({}, dump=True)
+ return [pp for pp in pp_list if pp.get("ifindex") == nsim.ifindex]
+
+ # No page pools when down
+ down()
+ ksft_eq(len(get_pp()), 0)
+
+ # Up, empty page pool appears
+ up()
+ pp_list = get_pp()
+ ksft_ge(len(pp_list), 0)
+ refs = sum([pp["inflight"] for pp in pp_list])
+ ksft_eq(refs, 0)
+
+ # Down, it disappears, again
+ down()
+ pp_list = get_pp()
+ ksft_eq(len(pp_list), 0)
+
+ # Up, allocate a page
+ up()
+ nsim.dfs_write("pp_hold", "y")
+ pp_list = nf.page_pool_get({}, dump=True)
+ refs = sum([pp["inflight"] for pp in pp_list if pp.get("ifindex") == nsim.ifindex])
+ ksft_ge(refs, 1)
+
+ # Now let's leak a page
+ down()
+ pp_list = get_pp()
+ ksft_eq(len(pp_list), 1)
+ refs = sum([pp["inflight"] for pp in pp_list])
+ ksft_eq(refs, 1)
+ attached = [pp for pp in pp_list if "detach-time" not in pp]
+ ksft_eq(len(attached), 0)
+
+ # New pp can get created, and we'll have two
+ up()
+ pp_list = get_pp()
+ attached = [pp for pp in pp_list if "detach-time" not in pp]
+ detached = [pp for pp in pp_list if "detach-time" in pp]
+ ksft_eq(len(attached), 1)
+ ksft_eq(len(detached), 1)
+
+ # Free the old page and the old pp is gone
+ nsim.dfs_write("pp_hold", "n")
+ # Freeing check is once a second so we may need to retry
+ ksft_busy_wait(lambda: len(get_pp()) == 1, deadline=2)
+
+ # And down...
+ down()
+ ksft_eq(len(get_pp()), 0)
+
+ # Last, leave the page hanging for destroy, nothing to check
+ # we're trying to exercise the orphaning path in the kernel
+ up()
+ nsim.dfs_write("pp_hold", "y")
+
+
+def main() -> None:
+ nf = NetdevFamily()
+ ksft_run([empty_check, lo_check, page_pool_check],
+ args=(nf, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 8b12071876..9f8dec2f65 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -489,7 +489,7 @@ class ovsactions(nla):
actstr, reason = parse_extract_field(
actstr,
"drop(",
- "([0-9]+)",
+ r"([0-9]+)",
lambda x: int(x, 0),
False,
None,
@@ -502,9 +502,9 @@ class ovsactions(nla):
actstr = actstr[len("drop"): ]
return (totallen - len(actstr))
- elif parse_starts_block(actstr, "^(\d+)", False, True):
+ elif parse_starts_block(actstr, r"^(\d+)", False, True):
actstr, output = parse_extract_field(
- actstr, None, "(\d+)", lambda x: int(x), False, "0"
+ actstr, None, r"(\d+)", lambda x: int(x), False, "0"
)
self["attrs"].append(["OVS_ACTION_ATTR_OUTPUT", output])
parsed = True
@@ -512,7 +512,7 @@ class ovsactions(nla):
actstr, recircid = parse_extract_field(
actstr,
"recirc(",
- "([0-9a-fA-Fx]+)",
+ r"([0-9a-fA-Fx]+)",
lambda x: int(x, 0),
False,
0,
@@ -588,17 +588,17 @@ class ovsactions(nla):
actstr = actstr[3:]
actstr, ip_block_min = parse_extract_field(
- actstr, "=", "([0-9a-fA-F\.]+)", str, False
+ actstr, "=", r"([0-9a-fA-F\.]+)", str, False
)
actstr, ip_block_max = parse_extract_field(
- actstr, "-", "([0-9a-fA-F\.]+)", str, False
+ actstr, "-", r"([0-9a-fA-F\.]+)", str, False
)
actstr, proto_min = parse_extract_field(
- actstr, ":", "(\d+)", int, False
+ actstr, ":", r"(\d+)", int, False
)
actstr, proto_max = parse_extract_field(
- actstr, "-", "(\d+)", int, False
+ actstr, "-", r"(\d+)", int, False
)
if t is not None:
diff --git a/tools/testing/selftests/net/sample_map_ret0.bpf.c b/tools/testing/selftests/net/sample_map_ret0.bpf.c
new file mode 100644
index 0000000000..43ca925949
--- /dev/null
+++ b/tools/testing/selftests/net/sample_map_ret0.bpf.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, long);
+ __uint(max_entries, 2);
+} htab SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, long);
+ __uint(max_entries, 2);
+} array SEC(".maps");
+
+/* Sample program which should always load for testing control paths. */
+SEC("xdp") int func()
+{
+ __u64 key64 = 0;
+ __u32 key = 0;
+ long *value;
+
+ value = bpf_map_lookup_elem(&htab, &key);
+ if (!value)
+ return 1;
+ value = bpf_map_lookup_elem(&array, &key64);
+ if (!value)
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/sample_ret0.bpf.c b/tools/testing/selftests/net/sample_ret0.bpf.c
new file mode 100644
index 0000000000..1df5ca98bb
--- /dev/null
+++ b/tools/testing/selftests/net/sample_ret0.bpf.c
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+
+#define SEC(name) __attribute__((section(name), used))
+
+/* Sample program which should always load for testing control paths. */
+SEC("xdp")
+int func()
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh
new file mode 100755
index 0000000000..e23210aa54
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh
@@ -0,0 +1,335 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Jianguo Wu <wujianguo@chinatelecom.cn>
+#
+# Mostly copied from tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh.
+#
+# This script is designed for testing the support of netfilter hooks for
+# SRv6 End.DX4 behavior.
+#
+# Hereafter a network diagram is shown, where one tenants (named 100) offer
+# IPv4 L3 VPN services allowing hosts to communicate with each other across
+# an IPv6 network.
+#
+# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DX4 behavior.
+#
+# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-1 pings
+# the host hs-2.
+#
+# First of all, L2 reachability of the host hs-2 is taken into account by
+# the router rt-1 which acts as an arp proxy.
+#
+# When the host hs-1 sends an IPv4 packet destined to hs-2, the router rt-1
+# receives the packet on the internal veth-t100 interface, rt-1 contains the
+# SRv6 Encap route for encapsulating the IPv4 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and
+# routs the packet to the specified nexthop. Afterwards, the packet is sent to
+# the host hs-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a
+# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING
+# hooks.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-1 netns | | hs-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | 10.0.0.11/24 | +----------+ | | +----------+ | 10.0.0.22/24 | |
+# | +-------+-------+ | route | | | | route | +-------+-------- |
+# | | table | | | | table | |
+# | +----------+ | | +----------+ |
+# | +--------------+ | | +--------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | |
+# | +--------------+ | | +--------------+ |
+# | | | |
+# | rt-1 netns | | rt-2 netns |
+# | | | |
+# +-----------------------------------+ +-----------------------------------+
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table
+# +----------------------------------------------------------------+
+# |SID |Action |
+# +----------------------------------------------------------------+
+# |fc00:21:100::6004|apply SRv6 End.DX4 nh4 10.0.0.1 dev veth-t100 |
+# +----------------------------------------------------------------+
+#
+# rt-1: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table
+# +---------------------------------------------------------------+
+# |SID |Action |
+# +---------------------------------------------------------------+
+# |fc00:12:100::6004|apply SRv6 End.DX4 nh4 10.0.0.2 dev veth-t100|
+# +---------------------------------------------------------------+
+#
+# rt-2: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+readonly IPv6_RT_NETWORK=2001:11
+readonly IPv4_HS_NETWORK=10.0.0
+readonly SID_LOCATOR=fc00
+
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ # destroy routers rt-* and hosts hs-*
+ for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+ ip netns del ${ns} || true
+ done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_rt_netfilter()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1
+ ip netns exec ${nsname} iptables -t raw -A PREROUTING -m rpfilter --invert -j DROP
+}
+
+setup_hs()
+{
+ local hs=$1
+ local rt=$2
+ local tid=$3
+ local hsname=hs-${hs}
+ local rtname=rt-${rt}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns add ${hsname}
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.${rt}${hs}/24 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ local hssrc_name=hs-t${tid}-${hssrc}
+ local hsdst_name=hs-t${tid}-${hsdst}
+ local rtsrc_name=rt-${rtsrc}
+ local rtdst_name=rt-${rtdst}
+ local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \
+ via 2001:11::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \
+ encap seg6local action End.DX4 nh4 ${IPv4_HS_NETWORK}.${hsdst} dev veth-t${tid}
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_hs 1 1 100
+ setup_hs 2 2 100
+
+ # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+}
+
+check_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})"
+}
+
+host_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+router_netfilter_tests()
+{
+ log_section "SRv6 VPN connectivity test with netfilter enabled in routers"
+ setup_rt_netfilter 1
+ setup_rt_netfilter 2
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup &>/dev/null
+
+setup
+
+host_tests
+router_netfilter_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh
new file mode 100755
index 0000000000..9e69a2ed5b
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh
@@ -0,0 +1,340 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Jianguo Wu <wujianguo@chinatelecom.cn>
+#
+# Mostly copied from tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh.
+#
+# This script is designed for testing the support of netfilter hooks for
+# SRv6 End.DX4 behavior.
+#
+# Hereafter a network diagram is shown, where one tenants (named 100) offer
+# IPv6 L3 VPN services allowing hosts to communicate with each other across
+# an IPv6 network.
+#
+# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DX4 behavior.
+#
+# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-1 pings
+# the host hs-2.
+#
+# First of all, L2 reachability of the host hs-2 is taken into account by
+# the router rt-1 which acts as an arp proxy.
+#
+# When the host hs-1 sends an IPv6 packet destined to hs-2, the router rt-1
+# receives the packet on the internal veth-t100 interface, rt-1 contains the
+# SRv6 Encap route for encapsulating the IPv6 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and
+# routs the packet to the specified nexthop. Afterwards, the packet is sent to
+# the host hs-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a
+# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING
+# hooks.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-1 netns | | hs-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::1/64 | | | | cafe::2/64 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | cafe::11/64 | +----------+ | | +----------+ | cafe::22/64 | |
+# | +-------+-------+ | route | | | | route | +-------+-------- |
+# | | table | | | | table | |
+# | +----------+ | | +----------+ |
+# | +--------------+ | | +--------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | |
+# | +--------------+ | | +--------------+ |
+# | | | |
+# | rt-1 netns | | rt-2 netns |
+# | | | |
+# +-----------------------------------+ +-----------------------------------+
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table
+# +----------------------------------------------------------------+
+# |SID |Action |
+# +----------------------------------------------------------------+
+# |fc00:21:100::6004|apply SRv6 End.DX6 nh6 cafe::1 dev veth-t100 |
+# +----------------------------------------------------------------+
+#
+# rt-1: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::2 |apply seg6 encap segs fc00:12:100::6004|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table
+# +---------------------------------------------------------------+
+# |SID |Action |
+# +---------------------------------------------------------------+
+# |fc00:12:100::6004|apply SRv6 End.DX6 nh6 cafe::2 dev veth-t100 |
+# +---------------------------------------------------------------+
+#
+# rt-2: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::1 |apply seg6 encap segs fc00:21:100::6004|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+readonly IPv6_RT_NETWORK=2001:11
+readonly IPv6_HS_NETWORK=cafe
+readonly SID_LOCATOR=fc00
+
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ # destroy routers rt-* and hosts hs-*
+ for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+ ip netns del ${ns} || true
+ done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_rt_netfilter()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1
+ ip netns exec ${nsname} ip6tables -t raw -A PREROUTING -m rpfilter --invert -j DROP
+}
+
+setup_hs()
+{
+ local hs=$1
+ local rt=$2
+ local tid=$3
+ local hsname=hs-${hs}
+ local rtname=rt-${rt}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns add ${hsname}
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::${rt}${hs}/64 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ local hssrc_name=hs-t${tid}-${hssrc}
+ local hsdst_name=hs-t${tid}-${hsdst}
+ local rtsrc_name=rt-${rtsrc}
+ local rtdst_name=rt-${rtdst}
+ local rtveth=veth-t${tid}
+ local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004
+
+ ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \
+ via 2001:11::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \
+ encap seg6local action End.DX6 nh6 ${IPv6_HS_NETWORK}::${hsdst} dev veth-t${tid}
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_hs 1 1 100
+ setup_hs 2 2 100
+
+ # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+}
+
+check_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-${hssrc} ping -6 -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})"
+}
+
+host_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+router_netfilter_tests()
+{
+ log_section "SRv6 VPN connectivity test with netfilter enabled in routers"
+ setup_rt_netfilter 1
+ setup_rt_netfilter 2
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup &>/dev/null
+
+setup
+
+host_tests
+router_netfilter_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index 8802604148..11a1ebda56 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -7,7 +7,7 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.o"
+BPF_FILE="xdp_dummy.bpf.o"
# set global exit status, but never reset nonzero one.
check_err()
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index 7080eae531..c51ea90a13 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -7,7 +7,7 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.o"
+BPF_FILE="xdp_dummy.bpf.o"
cleanup() {
local -r jobs="$(jobs -p)"
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index e1ff645bd3..17404f49cd 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -7,7 +7,7 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.o"
+BPF_FILE="xdp_dummy.bpf.o"
cleanup() {
local -r jobs="$(jobs -p)"
@@ -42,8 +42,8 @@ run_one() {
ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp
tc -n "${PEER_NS}" qdisc add dev veth1 clsact
- tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.o section schedcls/ingress6/nat_6 direct-action
- tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.o section schedcls/egress4/snat4 direct-action
+ tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.bpf.o section schedcls/ingress6/nat_6 direct-action
+ tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action
echo ${rx_args}
ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
@@ -89,7 +89,7 @@ if [ ! -f ${BPF_FILE} ]; then
exit -1
fi
-if [ ! -f nat6to4.o ]; then
+if [ ! -f nat6to4.bpf.o ]; then
echo "Missing nat6to4 helper. Run 'make' first"
exit -1
fi
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index 83ed987cff..550d8eb3e2 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -3,7 +3,7 @@
source net_helper.sh
-BPF_FILE="xdp_dummy.o"
+BPF_FILE="xdp_dummy.bpf.o"
readonly BASE="ns-$(mktemp -u XXXXXX)"
readonly SRC=2
readonly DST=1
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 3a394b43e2..4f1edbafb9 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-BPF_FILE="xdp_dummy.o"
+BPF_FILE="xdp_dummy.bpf.o"
readonly STATS="$(mktemp -p /tmp ns-XXXXXX)"
readonly BASE=`basename $STATS`
readonly SRC=2
diff --git a/tools/testing/selftests/net/xdp_dummy.c b/tools/testing/selftests/net/xdp_dummy.bpf.c
index d988b2e0ce..d988b2e0ce 100644
--- a/tools/testing/selftests/net/xdp_dummy.c
+++ b/tools/testing/selftests/net/xdp_dummy.bpf.c
diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh
index 4577895306..3eeeeffb40 100755
--- a/tools/testing/selftests/net/xfrm_policy.sh
+++ b/tools/testing/selftests/net/xfrm_policy.sh
@@ -293,7 +293,7 @@ check_random_order()
local ns=$1
local log=$2
- for i in $(seq 100); do
+ for i in $(seq 50); do
ip -net $ns xfrm policy flush
for j in $(seq 0 16 255 | sort -R); do
ip -net $ns xfrm policy add dst $j.0.0.0/24 dir out priority 10 action allow
@@ -306,7 +306,7 @@ check_random_order()
done
done
- for i in $(seq 100); do
+ for i in $(seq 50); do
ip -net $ns xfrm policy flush
for j in $(seq 0 16 255 | sort -R); do
local addr=$(printf "e000:0000:%02x00::/56" $j)