41 files changed, 8960 insertions, 0 deletions
diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore
new file mode 100644
index 0000000000..0a64d6d0e2
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/.gitignore
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+audit_logread
+connect_close
+conntrack_dump_flush
+sctp_collision
+nf_queue
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
new file mode 100644
index 0000000000..47945b2b3f
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+HOSTPKG_CONFIG := pkg-config
+MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
+MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
+
+TEST_PROGS := br_netfilter.sh bridge_brouter.sh
+TEST_PROGS += conntrack_icmp_related.sh
+TEST_PROGS += conntrack_ipip_mtu.sh
+TEST_PROGS += conntrack_tcp_unreplied.sh
+TEST_PROGS += conntrack_sctp_collision.sh
+TEST_PROGS += conntrack_vrf.sh
+TEST_PROGS += ipvs.sh
+TEST_PROGS += nf_conntrack_packetdrill.sh
+TEST_PROGS += nf_nat_edemux.sh
+TEST_PROGS += nft_audit.sh
+TEST_PROGS += nft_concat_range.sh
+TEST_PROGS += nft_conntrack_helper.sh
+TEST_PROGS += nft_fib.sh
+TEST_PROGS += nft_flowtable.sh
+TEST_PROGS += nft_meta.sh
+TEST_PROGS += nft_nat.sh
+TEST_PROGS += nft_nat_zones.sh
+TEST_PROGS += nft_queue.sh
+TEST_PROGS += nft_synproxy.sh
+TEST_PROGS += nft_zones_many.sh
+TEST_PROGS += rpath.sh
+TEST_PROGS += xt_string.sh
+
+TEST_PROGS_EXTENDED = nft_concat_range_perf.sh
+
+TEST_GEN_PROGS = conntrack_dump_flush
+
+TEST_GEN_FILES = audit_logread
+TEST_GEN_FILES += connect_close nf_queue
+TEST_GEN_FILES += sctp_collision
+
+include ../../lib.mk
+
+$(OUTPUT)/nf_queue: CFLAGS += $(MNL_CFLAGS)
+$(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS)
+
+$(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS)
+$(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS)
+
+TEST_FILES := lib.sh
+TEST_FILES += packetdrill
+
+TEST_INCLUDES := \
+	../lib.sh
diff --git a/tools/testing/selftests/net/netfilter/audit_logread.c b/tools/testing/selftests/net/netfilter/audit_logread.c
new file mode 100644
index 0000000000..a0a880fc2d
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/audit_logread.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <linux/audit.h>
+#include <linux/netlink.h>
+
+static int fd;
+
+#define MAX_AUDIT_MESSAGE_LENGTH	8970
+struct audit_message {
+	struct nlmsghdr nlh;
+	union {
+		struct audit_status s;
+		char data[MAX_AUDIT_MESSAGE_LENGTH];
+	} u;
+};
+
+int audit_recv(int fd, struct audit_message *rep)
+{
+	struct sockaddr_nl addr;
+	socklen_t addrlen = sizeof(addr);
+	int ret;
+
+	do {
+		ret = recvfrom(fd, rep, sizeof(*rep), 0,
+			       (struct sockaddr *)&addr, &addrlen);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0 ||
+	    addrlen != sizeof(addr) ||
+	    addr.nl_pid != 0 ||
+	    rep->nlh.nlmsg_type == NLMSG_ERROR) /* short-cut for now */
+		return -1;
+
+	return ret;
+}
+
+int audit_send(int fd, uint16_t type, uint32_t key, uint32_t val)
+{
+	static int seq = 0;
+	struct audit_message msg = {
+		.nlh = {
+			.nlmsg_len   = NLMSG_SPACE(sizeof(msg.u.s)),
+			.nlmsg_type  = type,
+			.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+			.nlmsg_seq   = ++seq,
+		},
+		.u.s = {
+			.mask    = key,
+			.enabled = key == AUDIT_STATUS_ENABLED ? val : 0,
+			.pid     = key == AUDIT_STATUS_PID ? val : 0,
+		}
+	};
+	struct sockaddr_nl addr = {
+		.nl_family = AF_NETLINK,
+	};
+	int ret;
+
+	do {
+		ret = sendto(fd, &msg, msg.nlh.nlmsg_len, 0,
+			     (struct sockaddr *)&addr, sizeof(addr));
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret != (int)msg.nlh.nlmsg_len)
+		return -1;
+	return 0;
+}
+
+int audit_set(int fd, uint32_t key, uint32_t val)
+{
+	struct audit_message rep = { 0 };
+	int ret;
+
+	ret = audit_send(fd, AUDIT_SET, key, val);
+	if (ret)
+		return ret;
+
+	ret = audit_recv(fd, &rep);
+	if (ret < 0)
+		return ret;
+	return 0;
+}
+
+int readlog(int fd)
+{
+	struct audit_message rep = { 0 };
+	int ret = audit_recv(fd, &rep);
+	const char *sep = "";
+	char *k, *v;
+
+	if (ret < 0)
+		return ret;
+
+	if (rep.nlh.nlmsg_type != AUDIT_NETFILTER_CFG)
+		return 0;
+
+	/* skip the initial "audit(...): " part */
+	strtok(rep.u.data, " ");
+
+	while ((k = strtok(NULL, "="))) {
+		v = strtok(NULL, " ");
+
+		/* these vary and/or are uninteresting, ignore */
+		if (!strcmp(k, "pid") ||
+		    !strcmp(k, "comm") ||
+		    !strcmp(k, "subj"))
+			continue;
+
+		/* strip the varying sequence number */
+		if (!strcmp(k, "table"))
+			*strchrnul(v, ':') = '\0';
+
+		printf("%s%s=%s", sep, k, v);
+		sep = " ";
+	}
+	if (*sep) {
+		printf("\n");
+		fflush(stdout);
+	}
+	return 0;
+}
+
+void cleanup(int sig)
+{
+	audit_set(fd, AUDIT_STATUS_ENABLED, 0);
+	close(fd);
+	if (sig)
+		exit(0);
+}
+
+int main(int argc, char **argv)
+{
+	struct sigaction act = {
+		.sa_handler = cleanup,
+	};
+
+	fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
+	if (fd < 0) {
+		perror("Can't open netlink socket");
+		return -1;
+	}
+
+	if (sigaction(SIGTERM, &act, NULL) < 0 ||
+	    sigaction(SIGINT, &act, NULL) < 0) {
+		perror("Can't set signal handler");
+		close(fd);
+		return -1;
+	}
+
+	audit_set(fd, AUDIT_STATUS_ENABLED, 1);
+	audit_set(fd, AUDIT_STATUS_PID, getpid());
+
+	while (1)
+		readlog(fd);
+}
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh
new file mode 100755
index 0000000000..c28379a965
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test for legacy br_netfilter module combined with connection tracking,
+# a combination that doesn't really work.
+# Multicast/broadcast packets race for hash table insertion.
+
+#           eth0    br0     eth0
+# setup is: ns1 <->,ns0 <-> ns3
+#           ns2 <-'    `'-> ns4
+
+source lib.sh
+
+checktool "nft --version" "run test without nft tool"
+
+cleanup() {
+	cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+setup_ns ns0 ns1 ns2 ns3 ns4
+
+ret=0
+
+do_ping()
+{
+	fromns="$1"
+	dstip="$2"
+
+	if ! ip netns exec "$fromns" ping -c 1 -q "$dstip" > /dev/null; then
+		echo "ERROR: ping from $fromns to $dstip"
+		ip netns exec "$ns0" nft list ruleset
+		ret=1
+	fi
+}
+
+bcast_ping()
+{
+	fromns="$1"
+	dstip="$2"
+
+	local packets=500
+
+	[ "$KSFT_MACHINE_SLOW" = yes ] && packets=100
+
+	for i in $(seq 1 $packets); do
+		if ! ip netns exec "$fromns" ping -q -f -b -c 1 -q "$dstip" > /dev/null 2>&1; then
+			echo "ERROR: ping -b from $fromns to $dstip"
+			ip netns exec "$ns0" nft list ruleset
+			ret=1
+			break
+		fi
+	done
+}
+
+ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0
+ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0
+
+if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then
+	echo "SKIP: Can't create veth device"
+	exit $ksft_skip
+fi
+
+ip link add veth2 netns "$ns0" type veth peer name eth0 netns "$ns2"
+ip link add veth3 netns "$ns0" type veth peer name eth0 netns "$ns3"
+ip link add veth4 netns "$ns0" type veth peer name eth0 netns "$ns4"
+
+for i in $(seq 1 4); do
+  ip -net "$ns0" link set "veth$i" up
+done
+
+if ! ip -net "$ns0" link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1; then
+	echo "SKIP: Can't create bridge br0"
+	exit $ksft_skip
+fi
+
+# make veth0,1,2 part of bridge.
+for i in $(seq 1 3); do
+  ip -net "$ns0" link set "veth$i" master br0
+done
+
+# add a macvlan on top of the bridge.
+MACVLAN_ADDR=ba:f3:13:37:42:23
+ip -net "$ns0" link add link br0 name macvlan0 type macvlan mode private
+ip -net "$ns0" link set macvlan0 address ${MACVLAN_ADDR}
+ip -net "$ns0" link set macvlan0 up
+ip -net "$ns0" addr add 10.23.0.1/24 dev macvlan0
+
+# add a macvlan on top of veth4.
+MACVLAN_ADDR=ba:f3:13:37:42:24
+ip -net "$ns0" link add link veth4 name macvlan4 type macvlan mode passthru
+ip -net "$ns0" link set macvlan4 address ${MACVLAN_ADDR}
+ip -net "$ns0" link set macvlan4 up
+
+# make the macvlan part of the bridge.
+# veth4 is not a bridge port, only the macvlan on top of it.
+ip -net "$ns0" link set macvlan4 master br0
+
+ip -net "$ns0" link set br0 up
+ip -net "$ns0" addr add 10.0.0.1/24 dev br0
+
+modprobe -q br_netfilter
+if ! ip netns exec "$ns0" sysctl -q net.bridge.bridge-nf-call-iptables=1; then
+	echo "SKIP: bridge netfilter not available"
+	ret=$ksft_skip
+fi
+
+# for testing, so namespaces will reply to ping -b probes.
+ip netns exec "$ns0" sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0
+
+# enable conntrack in ns0 and drop broadcast packets in forward to
+# avoid them from getting confirmed in the postrouting hook before
+# the cloned skb is passed up the stack.
+ip netns exec "$ns0" nft -f - <<EOF
+table ip filter {
+	chain input {
+		type filter hook input priority 1; policy accept
+		iifname br0 counter
+		ct state new accept
+	}
+}
+
+table bridge filter {
+	chain forward {
+		type filter hook forward priority 0; policy accept
+		meta pkttype broadcast ip protocol icmp counter drop
+	}
+}
+EOF
+if [ "$?" -ne 0 ];then
+	echo "SKIP: could not add nftables ruleset"
+	exit $ksft_skip
+fi
+
+# place 1, 2 & 3 in same subnet, connected via ns0:br0.
+# ns4 is placed in same subnet as well, but its not
+# part of the bridge: the corresponding veth4 is not
+# part of the bridge, only its macvlan interface.
+for i in $(seq 1 4); do
+  eval ip -net \$ns"$i" link set eth0 up
+done
+for i in $(seq 1 2); do
+  eval ip -net \$ns"$i" addr add "10.0.0.1$i/24" dev eth0
+done
+
+ip -net "$ns3" addr add 10.23.0.13/24 dev eth0
+ip -net "$ns4" addr add 10.23.0.14/24 dev eth0
+
+# test basic connectivity
+do_ping "$ns1" 10.0.0.12
+do_ping "$ns3" 10.23.0.1
+do_ping "$ns4" 10.23.0.1
+
+bcast_ping "$ns1" 10.0.0.255
+
+# This should deliver broadcast to macvlan0, which is on top of ns0:br0.
+bcast_ping "$ns3" 10.23.0.255
+
+# same, this time via veth4:macvlan4.
+bcast_ping "$ns4" 10.23.0.255
+
+read t < /proc/sys/kernel/tainted
+if [ "$t" -eq 0 ];then
+	echo PASS: kernel not tainted
+else
+	echo ERROR: kernel is tainted
+	ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
new file mode 100755
index 0000000000..2549b65906
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+#
+# This test is for bridge 'brouting', i.e. make some packets being routed
+# rather than getting bridged even though they arrive on interface that is
+# part of a bridge.
+
+#           eth0    br0     eth0
+# setup is: ns1 <-> nsbr <-> ns2
+
+source lib.sh
+
+if ! ebtables -V > /dev/null 2>&1;then
+	echo "SKIP: Could not run test without ebtables"
+	exit $ksft_skip
+fi
+
+cleanup() {
+	cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+setup_ns nsbr ns1 ns2
+
+ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0
+ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0
+if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then
+	echo "SKIP: Can't create veth device"
+	exit $ksft_skip
+fi
+ip link add veth1 netns "$nsbr" type veth peer name eth0 netns "$ns2"
+
+if ! ip -net "$nsbr" link add br0 type bridge; then
+	echo "SKIP: Can't create bridge br0"
+	exit $ksft_skip
+fi
+
+ip -net "$nsbr" link set veth0 up
+ip -net "$nsbr" link set veth1 up
+
+ip -net "$nsbr" link set veth0 master br0
+ip -net "$nsbr" link set veth1 master br0
+ip -net "$nsbr" link set br0 up
+ip -net "$nsbr" addr add 10.0.0.1/24 dev br0
+
+# place both in same subnet, ${ns1} and ${ns2} connected via ${nsbr}:br0
+ip -net "$ns1" link set eth0 up
+ip -net "$ns2" link set eth0 up
+ip -net "$ns1" addr add 10.0.0.11/24 dev eth0
+ip -net "$ns2" addr add 10.0.0.12/24 dev eth0
+
+test_ebtables_broute()
+{
+	# redirect is needed so the dstmac is rewritten to the bridge itself,
+	# ip stack won't process OTHERHOST (foreign unicast mac) packets.
+	if ! ip netns exec "$nsbr" ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP; then
+		echo "SKIP: Could not add ebtables broute redirect rule"
+		return $ksft_skip
+	fi
+
+	ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth0.forwarding=0
+
+	# ping net${ns1}, expected to not work (ip forwarding is off)
+	if ip netns exec "$ns1" ping -q -c 1 10.0.0.12 -W 0.5 > /dev/null 2>&1; then
+		echo "ERROR: ping works, should have failed" 1>&2
+		return 1
+	fi
+
+	# enable forwarding on both interfaces.
+	# neither needs an ip address, but at least the bridge needs
+	# an ip address in same network segment as ${ns1} and ${ns2} (${nsbr}
+	# needs to be able to determine route for to-be-forwarded packet).
+	ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth0.forwarding=1
+	ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth1.forwarding=1
+
+	if ! ip netns exec "$ns1" ping -q -c 1 10.0.0.12 > /dev/null; then
+		echo "ERROR: ping did not work, but it should (broute+forward)" 1>&2
+		return 1
+	fi
+
+	echo "PASS: ${ns1}/${ns2} connectivity with active broute rule"
+	ip netns exec "$nsbr" ebtables -t broute -F
+
+	# ping net${ns1}, expected to work (frames are bridged)
+	if ! ip netns exec "$ns1" ping -q -c 1 10.0.0.12 > /dev/null; then
+		echo "ERROR: ping did not work, but it should (bridged)" 1>&2
+		return 1
+	fi
+
+	ip netns exec "$nsbr" ebtables -t filter -A FORWARD -p ipv4 --ip-protocol icmp -j DROP
+
+	# ping net${ns1}, expected to not work (DROP in bridge forward)
+	if ip netns exec "$ns1" ping -q -c 1 10.0.0.12 -W 0.5 > /dev/null 2>&1; then
+		echo "ERROR: ping works, should have failed (icmp forward drop)" 1>&2
+		return 1
+	fi
+
+	# re-activate brouter
+	ip netns exec "$nsbr" ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP
+
+	if ! ip netns exec "$ns2" ping -q -c 1 10.0.0.11 > /dev/null; then
+		echo "ERROR: ping did not work, but it should (broute+forward 2)" 1>&2
+		return 1
+	fi
+
+	echo "PASS: ${ns1}/${ns2} connectivity with active broute rule and bridge forward drop"
+	return 0
+}
+
+# test basic connectivity
+if ! ip netns exec "$ns1" ping -c 1 -q 10.0.0.12 > /dev/null; then
+    echo "ERROR: Could not reach ${ns2} from ${ns1}" 1>&2
+    exit 1
+fi
+
+if ! ip netns exec "$ns2" ping -c 1 -q 10.0.0.11 > /dev/null; then
+    echo "ERROR: Could not reach ${ns1} from ${ns2}" 1>&2
+    exit 1
+fi
+
+test_ebtables_broute
+exit $?
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
new file mode 100644
index 0000000000..63ef80ef47
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/config
@@ -0,0 +1,89 @@
+CONFIG_AUDIT=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_BRIDGE=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_NETFILTER=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_CGROUP_BPF=y
+CONFIG_DUMMY=m
+CONFIG_INET_ESP=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP_SCTP=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_RR=m
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_MACVLAN=m
+CONFIG_NAMESPACES=y
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_IPIP=m
+CONFIG_NET_VRF=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NETFILTER_NETLINK_QUEUE=m
+CONFIG_NETFILTER_SYNPROXY=m
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XT_NAT=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_LOG_IPV4=m
+CONFIG_NF_LOG_IPV6=m
+CONFIG_NF_NAT=m
+CONFIG_NF_NAT_REDIRECT=y
+CONFIG_NF_NAT_MASQUERADE=y
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NF_FLOW_TABLE_INET=m
+CONFIG_NFT_BRIDGE_META=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_FIB=m
+CONFIG_NFT_FIB_INET=m
+CONFIG_NFT_FIB_IPV4=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NFT_FLOW_OFFLOAD=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_NUMGEN=m
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_QUOTA=m
+CONFIG_NFT_REDIR=m
+CONFIG_NFT_SYNPROXY=m
+CONFIG_VETH=m
+CONFIG_VLAN_8021Q=m
+CONFIG_XFRM_USER=m
+CONFIG_XFRM_STATISTICS=y
+CONFIG_NET_PKTGEN=m
+CONFIG_TUN=m
diff --git a/tools/testing/selftests/net/netfilter/connect_close.c b/tools/testing/selftests/net/netfilter/connect_close.c
new file mode 100644
index 0000000000..1c3b0add54
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/connect_close.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+
+#define PORT 12345
+#define RUNTIME 10
+
+static struct {
+	unsigned int timeout;
+	unsigned int port;
+} opts = {
+	.timeout = RUNTIME,
+	.port = PORT,
+};
+
+static void handler(int sig)
+{
+	_exit(sig == SIGALRM ? 0 : 1);
+}
+
+static void set_timeout(void)
+{
+	struct sigaction action = {
+		.sa_handler = handler,
+	};
+
+	sigaction(SIGALRM, &action, NULL);
+
+	alarm(opts.timeout);
+}
+
+static void do_connect(const struct sockaddr_in *dst)
+{
+	int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+
+	if (s >= 0)
+		fcntl(s, F_SETFL, O_NONBLOCK);
+
+	connect(s, (struct sockaddr *)dst, sizeof(*dst));
+	close(s);
+}
+
+static void do_accept(const struct sockaddr_in *src)
+{
+	int c, one = 1, s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+
+	if (s < 0)
+		return;
+
+	setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
+	setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+
+	bind(s, (struct sockaddr *)src, sizeof(*src));
+
+	listen(s, 16);
+
+	c = accept(s, NULL, NULL);
+	if (c >= 0)
+		close(c);
+
+	close(s);
+}
+
+static int accept_loop(void)
+{
+	struct sockaddr_in src = {
+		.sin_family = AF_INET,
+		.sin_port = htons(opts.port),
+	};
+
+	inet_pton(AF_INET, "127.0.0.1", &src.sin_addr);
+
+	set_timeout();
+
+	for (;;)
+		do_accept(&src);
+
+	return 1;
+}
+
+static int connect_loop(void)
+{
+	struct sockaddr_in dst = {
+		.sin_family = AF_INET,
+		.sin_port = htons(opts.port),
+	};
+
+	inet_pton(AF_INET, "127.0.0.1", &dst.sin_addr);
+
+	set_timeout();
+
+	for (;;)
+		do_connect(&dst);
+
+	return 1;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "t:p:")) != -1) {
+		switch (c) {
+		case 't':
+			opts.timeout = atoi(optarg);
+			break;
+		case 'p':
+			opts.port = atoi(optarg);
+			break;
+		}
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	pid_t p;
+
+	parse_opts(argc, argv);
+
+	p = fork();
+	if (p < 0)
+		return 111;
+
+	if (p > 0)
+		return accept_loop();
+
+	return connect_loop();
+}
diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
new file mode 100644
index 0000000000..bd9317bf5a
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
@@ -0,0 +1,469 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <time.h>
+#include <libmnl/libmnl.h>
+#include <netinet/ip.h>
+
+#include <linux/netlink.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <linux/netfilter/nf_conntrack_tcp.h>
+#include "../../kselftest_harness.h"
+
+#define TEST_ZONE_ID 123
+#define NF_CT_DEFAULT_ZONE_ID 0
+
+static int reply_counter;
+
+static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type,
+			      uint32_t src_ip, uint32_t dst_ip,
+			      uint16_t src_port, uint16_t dst_port)
+{
+	struct nlattr *nest, *nest_ip, *nest_proto;
+
+	nest = mnl_attr_nest_start(nlh, type);
+	if (!nest)
+		return -1;
+
+	nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP);
+	if (!nest_ip)
+		return -1;
+	mnl_attr_put_u32(nlh, CTA_IP_V4_SRC, src_ip);
+	mnl_attr_put_u32(nlh, CTA_IP_V4_DST, dst_ip);
+	mnl_attr_nest_end(nlh, nest_ip);
+
+	nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO);
+	if (!nest_proto)
+		return -1;
+	mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6);
+	mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port));
+	mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port));
+	mnl_attr_nest_end(nlh, nest_proto);
+
+	mnl_attr_nest_end(nlh, nest);
+}
+
+static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type,
+			      struct in6_addr src_ip, struct in6_addr dst_ip,
+			      uint16_t src_port, uint16_t dst_port)
+{
+	struct nlattr *nest, *nest_ip, *nest_proto;
+
+	nest = mnl_attr_nest_start(nlh, type);
+	if (!nest)
+		return -1;
+
+	nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP);
+	if (!nest_ip)
+		return -1;
+	mnl_attr_put(nlh, CTA_IP_V6_SRC, sizeof(struct in6_addr), &src_ip);
+	mnl_attr_put(nlh, CTA_IP_V6_DST, sizeof(struct in6_addr), &dst_ip);
+	mnl_attr_nest_end(nlh, nest_ip);
+
+	nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO);
+	if (!nest_proto)
+		return -1;
+	mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6);
+	mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port));
+	mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port));
+	mnl_attr_nest_end(nlh, nest_proto);
+
+	mnl_attr_nest_end(nlh, nest);
+}
+
+static int build_cta_proto(struct nlmsghdr *nlh)
+{
+	struct nlattr *nest, *nest_proto;
+
+	nest = mnl_attr_nest_start(nlh, CTA_PROTOINFO);
+	if (!nest)
+		return -1;
+
+	nest_proto = mnl_attr_nest_start(nlh, CTA_PROTOINFO_TCP);
+	if (!nest_proto)
+		return -1;
+	mnl_attr_put_u8(nlh, CTA_PROTOINFO_TCP_STATE, TCP_CONNTRACK_ESTABLISHED);
+	mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 0x0a0a);
+	mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_REPLY, 0x0a0a);
+	mnl_attr_nest_end(nlh, nest_proto);
+
+	mnl_attr_nest_end(nlh, nest);
+}
+
+static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh,
+				 uint16_t zone)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *rplnlh;
+	unsigned int portid;
+	int err, ret;
+
+	portid = mnl_socket_get_portid(sock);
+
+	ret = build_cta_proto(nlh);
+	if (ret < 0) {
+		perror("build_cta_proto");
+		return -1;
+	}
+	mnl_attr_put_u32(nlh, CTA_TIMEOUT, htonl(20000));
+	mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+	if (mnl_socket_sendto(sock, nlh, nlh->nlmsg_len) < 0) {
+		perror("mnl_socket_sendto");
+		return -1;
+	}
+
+	ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+	if (ret < 0) {
+		perror("mnl_socket_recvfrom");
+		return ret;
+	}
+
+	ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL);
+	if (ret < 0) {
+		if (errno == EEXIST) {
+			/* The entries are probably still there from a previous
+			 * run. So we are good
+			 */
+			return 0;
+		}
+		perror("mnl_cb_run");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int conntrack_data_generate_v4(struct mnl_socket *sock, uint32_t src_ip,
+				      uint32_t dst_ip, uint16_t zone)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfh;
+	int ret;
+
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
+			   NLM_F_ACK | NLM_F_EXCL;
+	nlh->nlmsg_seq = time(NULL);
+
+	nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+	nfh->nfgen_family = AF_INET;
+	nfh->version = NFNETLINK_V0;
+	nfh->res_id = 0;
+
+	ret = build_cta_tuple_v4(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, 12345, 443);
+	if (ret < 0) {
+		perror("build_cta_tuple_v4");
+		return ret;
+	}
+	ret = build_cta_tuple_v4(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, 443, 12345);
+	if (ret < 0) {
+		perror("build_cta_tuple_v4");
+		return ret;
+	}
+	return conntrack_data_insert(sock, nlh, zone);
+}
+
+static int conntrack_data_generate_v6(struct mnl_socket *sock,
+				      struct in6_addr src_ip,
+				      struct in6_addr dst_ip,
+				      uint16_t zone)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfh;
+	int ret;
+
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
+			   NLM_F_ACK | NLM_F_EXCL;
+	nlh->nlmsg_seq = time(NULL);
+
+	nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+	nfh->nfgen_family = AF_INET6;
+	nfh->version = NFNETLINK_V0;
+	nfh->res_id = 0;
+
+	ret = build_cta_tuple_v6(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip,
+				 12345, 443);
+	if (ret < 0) {
+		perror("build_cta_tuple_v6");
+		return ret;
+	}
+	ret = build_cta_tuple_v6(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip,
+				 12345, 443);
+	if (ret < 0) {
+		perror("build_cta_tuple_v6");
+		return ret;
+	}
+	return conntrack_data_insert(sock, nlh, zone);
+}
+
+static int count_entries(const struct nlmsghdr *nlh, void *data)
+{
+	reply_counter++;
+}
+
+static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh, *rplnlh;
+	struct nfgenmsg *nfh;
+	struct nlattr *nest;
+	unsigned int portid;
+	int err, ret;
+
+	portid = mnl_socket_get_portid(sock);
+
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type	= (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_GET;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+	nlh->nlmsg_seq = time(NULL);
+
+	nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+	nfh->nfgen_family = AF_UNSPEC;
+	nfh->version = NFNETLINK_V0;
+	nfh->res_id = 0;
+
+	mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+	ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len);
+	if (ret < 0) {
+		perror("mnl_socket_sendto");
+		return ret;
+	}
+
+	reply_counter = 0;
+	ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+	while (ret > 0) {
+		ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid,
+				 count_entries, NULL);
+		if (ret <= MNL_CB_STOP)
+			break;
+
+		ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+	}
+	if (ret < 0) {
+		perror("mnl_socket_recvfrom");
+		return ret;
+	}
+
+	return reply_counter;
+}
+
+static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlmsghdr *nlh, *rplnlh;
+	struct nfgenmsg *nfh;
+	struct nlattr *nest;
+	unsigned int portid;
+	int err, ret;
+
+	portid = mnl_socket_get_portid(sock);
+
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type	= (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_DELETE;
+	nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+	nlh->nlmsg_seq = time(NULL);
+
+	nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+	nfh->nfgen_family = AF_UNSPEC;
+	nfh->version = NFNETLINK_V0;
+	nfh->res_id = 0;
+
+	mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+	ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len);
+	if (ret < 0) {
+		perror("mnl_socket_sendto");
+		return ret;
+	}
+
+	ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+	if (ret < 0) {
+		perror("mnl_socket_recvfrom");
+		return ret;
+	}
+
+	ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL);
+	if (ret < 0) {
+		perror("mnl_cb_run");
+		return ret;
+	}
+
+	return 0;
+}
+
+FIXTURE(conntrack_dump_flush)
+{
+	struct mnl_socket *sock;
+};
+
+FIXTURE_SETUP(conntrack_dump_flush)
+{
+	struct in6_addr src, dst;
+	int ret;
+
+	self->sock = mnl_socket_open(NETLINK_NETFILTER);
+	if (!self->sock) {
+		perror("mnl_socket_open");
+		SKIP(return, "cannot open netlink_netfilter socket");
+	}
+
+	ret = mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID);
+	EXPECT_EQ(ret, 0);
+
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+	if (ret < 0 && errno == EPERM)
+		SKIP(return, "Needs to be run as root");
+	else if (ret < 0 && errno == EOPNOTSUPP)
+		SKIP(return, "Kernel does not seem to support conntrack zones");
+
+	ret = conntrack_data_generate_v4(self->sock, 0xf0f0f0f0, 0xf1f1f1f1,
+					 TEST_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+	ret = conntrack_data_generate_v4(self->sock, 0xf2f2f2f2, 0xf3f3f3f3,
+					 TEST_ZONE_ID + 1);
+	EXPECT_EQ(ret, 0);
+	ret = conntrack_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5,
+					 TEST_ZONE_ID + 2);
+	EXPECT_EQ(ret, 0);
+	ret = conntrack_data_generate_v4(self->sock, 0xf6f6f6f6, 0xf7f7f7f7,
+					 NF_CT_DEFAULT_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+
+	src = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x01000000
+		}
+	}};
+	dst = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x02000000
+		}
+	}};
+	ret = conntrack_data_generate_v6(self->sock, src, dst,
+					 TEST_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+	src = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x03000000
+		}
+	}};
+	dst = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x04000000
+		}
+	}};
+	ret = conntrack_data_generate_v6(self->sock, src, dst,
+					 TEST_ZONE_ID + 1);
+	EXPECT_EQ(ret, 0);
+	src = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x05000000
+		}
+	}};
+	dst = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x06000000
+		}
+	}};
+	ret = conntrack_data_generate_v6(self->sock, src, dst,
+					 TEST_ZONE_ID + 2);
+	EXPECT_EQ(ret, 0);
+
+	src = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x07000000
+		}
+	}};
+	dst = (struct in6_addr) {{
+		.__u6_addr32 = {
+			0xb80d0120,
+			0x00000000,
+			0x00000000,
+			0x08000000
+		}
+	}};
+	ret = conntrack_data_generate_v6(self->sock, src, dst,
+					 NF_CT_DEFAULT_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+	EXPECT_GE(ret, 2);
+	if (ret > 2)
+		SKIP(return, "kernel does not support filtering by zone");
+}
+
+FIXTURE_TEARDOWN(conntrack_dump_flush)
+{
+}
+
+TEST_F(conntrack_dump_flush, test_dump_by_zone)
+{
+	int ret;
+
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+	EXPECT_EQ(ret, 2);
+}
+
+TEST_F(conntrack_dump_flush, test_flush_by_zone)
+{
+	int ret;
+
+	ret = conntrack_flush_zone(self->sock, TEST_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1);
+	EXPECT_EQ(ret, 2);
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
+	EXPECT_EQ(ret, 2);
+	ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+	EXPECT_EQ(ret, 2);
+}
+
+TEST_F(conntrack_dump_flush, test_flush_by_zone_default)
+{
+	int ret;
+
+	ret = conntrack_flush_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+	EXPECT_EQ(ret, 2);
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1);
+	EXPECT_EQ(ret, 2);
+	ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
+	EXPECT_EQ(ret, 2);
+	ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+	EXPECT_EQ(ret, 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh
new file mode 100755
index 0000000000..c63d840ead
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh
@@ -0,0 +1,278 @@
+#!/bin/bash
+#
+# check that ICMP df-needed/pkttoobig icmp are set are set as related
+# state
+#
+# Setup is:
+#
+# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2
+# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280).
+# ping nsclient2 from nsclient1, checking that conntrack did set RELATED
+# 'fragmentation needed' icmp packet.
+#
+# In addition, nsrouter1 will perform IP masquerading, i.e. also
+# check the icmp errors are propagated to the correct host as per
+# nat of "established" icmp-echo "connection".
+
+source lib.sh
+
+if ! nft --version > /dev/null 2>&1;then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+cleanup() {
+	cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+setup_ns nsclient1 nsclient2 nsrouter1 nsrouter2
+
+ret=0
+
+add_addr()
+{
+	ns=$1
+	dev=$2
+	i=$3
+
+	ip -net "$ns" link set "$dev" up
+	ip -net "$ns" addr add "192.168.$i.2/24" dev "$dev"
+	ip -net "$ns" addr add "dead:$i::2/64" dev "$dev" nodad
+}
+
+check_counter()
+{
+	ns=$1
+	name=$2
+	expect=$3
+	local lret=0
+
+	if ! ip netns exec "$ns" nft list counter inet filter "$name" | grep -q "$expect"; then
+		echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2
+		ip netns exec "$ns" nft list counter inet filter "$name" 1>&2
+		lret=1
+	fi
+
+	return $lret
+}
+
+check_unknown()
+{
+	expect="packets 0 bytes 0"
+	for n in ${nsclient1} ${nsclient2} ${nsrouter1} ${nsrouter2}; do
+		if ! check_counter "$n" "unknown" "$expect"; then
+			return 1
+		fi
+	done
+
+	return 0
+}
+
+DEV=veth0
+ip link add "$DEV" netns "$nsclient1" type veth peer name eth1 netns "$nsrouter1"
+ip link add "$DEV" netns "$nsclient2" type veth peer name eth1 netns "$nsrouter2"
+ip link add "$DEV" netns "$nsrouter1" type veth peer name eth2 netns "$nsrouter2"
+
+add_addr "$nsclient1" $DEV 1
+add_addr "$nsclient2" $DEV 2
+
+ip -net "$nsrouter1" link set eth1 up
+ip -net "$nsrouter1" link set $DEV up
+
+ip -net "$nsrouter2" link set eth1 mtu 1280 up
+ip -net "$nsrouter2" link set eth2 up
+
+ip -net "$nsclient1" route add default via 192.168.1.1
+ip -net "$nsclient1" -6 route add default via dead:1::1
+
+ip -net "$nsclient2" route add default via 192.168.2.1
+ip -net "$nsclient2" route add default via dead:2::1
+ip -net "$nsclient2" link set veth0 mtu 1280
+
+ip -net "$nsrouter1" addr add 192.168.1.1/24 dev eth1
+ip -net "$nsrouter1" addr add 192.168.3.1/24 dev veth0
+ip -net "$nsrouter1" addr add dead:1::1/64 dev eth1 nodad
+ip -net "$nsrouter1" addr add dead:3::1/64 dev veth0 nodad
+ip -net "$nsrouter1" route add default via 192.168.3.10
+ip -net "$nsrouter1" -6 route add default via dead:3::10
+
+ip -net "$nsrouter2" addr add 192.168.2.1/24 dev eth1
+ip -net "$nsrouter2" addr add 192.168.3.10/24 dev eth2
+ip -net "$nsrouter2" addr add dead:2::1/64  dev eth1 nodad
+ip -net "$nsrouter2" addr add dead:3::10/64 dev eth2 nodad
+ip -net "$nsrouter2" route add default via 192.168.3.1
+ip -net "$nsrouter2" route add default via dead:3::1
+
+for i in 4 6; do
+	ip netns exec "$nsrouter1" sysctl -q net.ipv$i.conf.all.forwarding=1
+	ip netns exec "$nsrouter2" sysctl -q net.ipv$i.conf.all.forwarding=1
+done
+
+for netns in "$nsrouter1" "$nsrouter2"; do
+ip netns exec "$netns" nft -f - <<EOF
+table inet filter {
+	counter unknown { }
+	counter related { }
+	chain forward {
+		type filter hook forward priority 0; policy accept;
+		meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept
+		meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept
+		meta l4proto { icmp, icmpv6 } ct state new,established accept
+		counter name "unknown" drop
+	}
+}
+EOF
+done
+
+ip netns exec "$nsclient1" nft -f - <<EOF
+table inet filter {
+	counter unknown { }
+	counter related { }
+	counter redir4 { }
+	counter redir6 { }
+	chain input {
+		type filter hook input priority 0; policy accept;
+
+		icmp type "redirect" ct state "related" counter name "redir4" accept
+		icmpv6 type "nd-redirect" ct state "related" counter name "redir6" accept
+
+		meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+		meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
+
+		counter name "unknown" drop
+	}
+}
+EOF
+
+ip netns exec "$nsclient2" nft -f - <<EOF
+table inet filter {
+	counter unknown { }
+	counter new { }
+	counter established { }
+
+	chain input {
+		type filter hook input priority 0; policy accept;
+		meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+		meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept
+		meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept
+		counter name "unknown" drop
+	}
+	chain output {
+		type filter hook output priority 0; policy accept;
+		meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+		meta l4proto { icmp, icmpv6 } ct state "new" counter name "new"
+		meta l4proto { icmp, icmpv6 } ct state "established" counter name "established"
+		counter name "unknown" drop
+	}
+}
+EOF
+
+# make sure NAT core rewrites adress of icmp error if nat is used according to
+# conntrack nat information (icmp error will be directed at nsrouter1 address,
+# but it needs to be routed to nsclient1 address).
+ip netns exec "$nsrouter1" nft -f - <<EOF
+table ip nat {
+	chain postrouting {
+		type nat hook postrouting priority 0; policy accept;
+		ip protocol icmp oifname "veth0" counter masquerade
+	}
+}
+table ip6 nat {
+	chain postrouting {
+		type nat hook postrouting priority 0; policy accept;
+		ip6 nexthdr icmpv6 oifname "veth0" counter masquerade
+	}
+}
+EOF
+
+if ! ip netns exec "$nsclient1" ping -c 1 -s 1000 -q -M "do" 192.168.2.2 >/dev/null; then
+	echo "ERROR: netns ip routing/connectivity broken" 1>&2
+	exit 1
+fi
+if ! ip netns exec "$nsclient1" ping -c 1 -s 1000 -q dead:2::2 >/dev/null; then
+	echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2
+	exit 1
+fi
+
+if ! check_unknown; then
+	ret=1
+fi
+
+expect="packets 0 bytes 0"
+for netns in "$nsrouter1" "$nsrouter2" "$nsclient1";do
+	if ! check_counter "$netns" "related" "$expect"; then
+		ret=1
+	fi
+done
+
+expect="packets 2 bytes 2076"
+if ! check_counter "$nsclient2" "new" "$expect"; then
+	ret=1
+fi
+
+if ip netns exec "$nsclient1" ping -W 0.5 -q -c 1 -s 1300 -M "do" 192.168.2.2 > /dev/null; then
+	echo "ERROR: ping should have failed with PMTU too big error" 1>&2
+	ret=1
+fi
+
+# nsrouter2 should have generated the icmp error, so
+# related counter should be 0 (its in forward).
+expect="packets 0 bytes 0"
+if ! check_counter "$nsrouter2" "related" "$expect"; then
+	ret=1
+fi
+
+# but nsrouter1 should have seen it, same for nsclient1.
+expect="packets 1 bytes 576"
+for netns in ${nsrouter1} ${nsclient1};do
+	if ! check_counter "$netns" "related" "$expect"; then
+		ret=1
+	fi
+done
+
+if ip netns exec "${nsclient1}" ping6 -W 0.5 -c 1 -s 1300 dead:2::2 > /dev/null; then
+	echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2
+	ret=1
+fi
+
+expect="packets 2 bytes 1856"
+for netns in "${nsrouter1}" "${nsclient1}";do
+	if ! check_counter "$netns" "related" "$expect"; then
+		ret=1
+	fi
+done
+
+if [ $ret -eq 0 ];then
+	echo "PASS: icmp mtu error had RELATED state"
+else
+	echo "ERROR: icmp error RELATED state test has failed"
+fi
+
+# add 'bad' route,  expect icmp REDIRECT to be generated
+ip netns exec "${nsclient1}" ip route add 192.168.1.42 via 192.168.1.1
+ip netns exec "${nsclient1}" ip route add dead:1::42 via dead:1::1
+
+ip netns exec "$nsclient1" ping -W 1 -q -i 0.5 -c 2 192.168.1.42 > /dev/null
+
+expect="packets 1 bytes 112"
+if ! check_counter "$nsclient1" "redir4" "$expect"; then
+	ret=1
+fi
+
+ip netns exec "$nsclient1" ping -W 1 -c 1 dead:1::42 > /dev/null
+expect="packets 1 bytes 192"
+if ! check_counter "$nsclient1" "redir6" "$expect"; then
+	ret=1
+fi
+
+if [ $ret -eq 0 ];then
+	echo "PASS: icmp redirects had RELATED state"
+else
+	echo "ERROR: icmp redirect RELATED state test has failed"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh b/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh
new file mode 100755
index 0000000000..9832a5d019
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh
@@ -0,0 +1,191 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+# Conntrack needs to reassemble fragments in order to have complete
+# packets for rule matching.  Reassembly can lead to packet loss.
+
+# Consider the following setup:
+#            +--------+       +---------+       +--------+
+#            |Router A|-------|Wanrouter|-------|Router B|
+#            |        |.IPIP..|         |..IPIP.|        |
+#            +--------+       +---------+       +--------+
+#           /                  mtu 1400                   \
+#          /                                               \
+#+--------+                                                 +--------+
+#|Client A|                                                 |Client B|
+#|        |                                                 |        |
+#+--------+                                                 +--------+
+
+# Router A and Router B use IPIP tunnel interfaces to tunnel traffic
+# between Client A and Client B over WAN. Wanrouter has MTU 1400 set
+# on its interfaces.
+
+rx=$(mktemp)
+
+checktool "iptables --version" "run test without iptables"
+checktool "socat -h" "run test without socat"
+
+setup_ns r_a r_b r_w c_a c_b
+
+cleanup() {
+	cleanup_all_ns
+	rm -f "$rx"
+}
+
+trap cleanup EXIT
+
+listener_ready()
+{
+	ns="$1"
+	port="$2"
+	ss -N "$ns" -lnu -o "sport = :$port" | grep -q "$port"
+}
+
+test_path() {
+	msg="$1"
+
+	ip netns exec "$c_b" socat -t 3 - udp4-listen:5000,reuseaddr > "$rx" < /dev/null &
+
+	busywait $BUSYWAIT_TIMEOUT listener_ready "$c_b" 5000
+
+	for i in 1 2 3; do
+		head -c1400 /dev/zero | tr "\000" "a" | \
+			ip netns exec "$c_a" socat -t 1 -u STDIN UDP:192.168.20.2:5000
+	done
+
+	wait
+
+	bytes=$(wc -c < "$rx")
+
+	if [ "$bytes" -eq 1400 ];then
+		echo "OK: PMTU $msg connection tracking"
+	else
+		echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400"
+		exit 1
+	fi
+}
+
+# Detailed setup for Router A
+# ---------------------------
+# Interfaces:
+# eth0: 10.2.2.1/24
+# eth1: 192.168.10.1/24
+# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1
+# Routes:
+# 192.168.20.0/24 dev ipip0    (192.168.20.0/24 is subnet of Client B)
+# 10.4.4.1 via 10.2.2.254      (Router B via Wanrouter)
+# No iptables rules at all.
+
+ip link add veth0 netns "$r_a" type veth peer name veth0 netns "$r_w"
+ip link add veth1 netns "$r_a" type veth peer name veth0 netns "$c_a"
+
+l_addr="10.2.2.1"
+r_addr="10.4.4.1"
+ip netns exec "$r_a" ip link add ipip0 type ipip local "$l_addr" remote "$r_addr" mode ipip || exit $ksft_skip
+
+for dev in lo veth0 veth1 ipip0; do
+    ip -net "$r_a" link set "$dev" up
+done
+
+ip -net "$r_a" addr add 10.2.2.1/24 dev veth0
+ip -net "$r_a" addr add 192.168.10.1/24 dev veth1
+
+ip -net "$r_a" route add 192.168.20.0/24 dev ipip0
+ip -net "$r_a" route add 10.4.4.0/24 via 10.2.2.254
+
+ip netns exec "$r_a" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Detailed setup for Router B
+# ---------------------------
+# Interfaces:
+# eth0: 10.4.4.1/24
+# eth1: 192.168.20.1/24
+# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1
+# Routes:
+# 192.168.10.0/24 dev ipip0    (192.168.10.0/24 is subnet of Client A)
+# 10.2.2.1 via 10.4.4.254      (Router A via Wanrouter)
+# No iptables rules at all.
+
+ip link add veth0 netns "$r_b" type veth peer name veth1 netns "$r_w"
+ip link add veth1 netns "$r_b" type veth peer name veth0 netns "$c_b"
+
+l_addr="10.4.4.1"
+r_addr="10.2.2.1"
+
+ip netns exec "$r_b" ip link add ipip0 type ipip local "${l_addr}" remote "${r_addr}" mode ipip || exit $ksft_skip
+
+for dev in veth0 veth1 ipip0; do
+	ip -net "$r_b" link set $dev up
+done
+
+ip -net "$r_b" addr add 10.4.4.1/24 dev veth0
+ip -net "$r_b" addr add 192.168.20.1/24 dev veth1
+
+ip -net "$r_b" route add 192.168.10.0/24 dev ipip0
+ip -net "$r_b" route add 10.2.2.0/24 via 10.4.4.254
+ip netns exec "$r_b" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Client A
+ip -net "$c_a" addr add 192.168.10.2/24 dev veth0
+ip -net "$c_a" link set dev veth0 up
+ip -net "$c_a" route add default via 192.168.10.1
+
+# Client A
+ip -net "$c_b" addr add 192.168.20.2/24 dev veth0
+ip -net "$c_b" link set dev veth0 up
+ip -net "$c_b" route add default via 192.168.20.1
+
+# Wan
+ip -net "$r_w" addr add 10.2.2.254/24 dev veth0
+ip -net "$r_w" addr add 10.4.4.254/24 dev veth1
+
+ip -net "$r_w" link set dev veth0 up mtu 1400
+ip -net "$r_w" link set dev veth1 up mtu 1400
+
+ip -net "$r_a" link set dev veth0 mtu 1400
+ip -net "$r_b" link set dev veth0 mtu 1400
+
+ip netns exec "$r_w" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Path MTU discovery
+# ------------------
+# Running tracepath from Client A to Client B shows PMTU discovery is working
+# as expected:
+#
+# clienta:~# tracepath 192.168.20.2
+# 1?: [LOCALHOST]                      pmtu 1500
+# 1:  192.168.10.1                                          0.867ms
+# 1:  192.168.10.1                                          0.302ms
+# 2:  192.168.10.1                                          0.312ms pmtu 1480
+# 2:  no reply
+# 3:  192.168.10.1                                          0.510ms pmtu 1380
+# 3:  192.168.20.2                                          2.320ms reached
+# Resume: pmtu 1380 hops 3 back 3
+
+# ip netns exec ${c_a} traceroute --mtu 192.168.20.2
+
+# Router A has learned PMTU (1400) to Router B from Wanrouter.
+# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B
+# from Router A.
+
+#Send large UDP packet
+#---------------------
+#Now we send a 1400 bytes UDP packet from Client A to Client B:
+
+# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | socat -u STDIN UDP:192.168.20.2:5000
+test_path "without"
+
+# The IPv4 stack on Client A already knows the PMTU to Client B, so the
+# UDP packet is sent as two fragments (1380 + 20). Router A forwards the
+# fragments between eth1 and ipip0. The fragments fit into the tunnel and
+# reach their destination.
+
+#When sending the large UDP packet again, Router A now reassembles the
+#fragments before routing the packet over ipip0. The resulting IPIP
+#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is
+#dropped on Router A before sending.
+
+ip netns exec "$r_a" iptables -A FORWARD -m conntrack --ctstate NEW
+test_path "with"
diff --git a/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh
new file mode 100755
index 0000000000..d860f7d974
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing For SCTP COLLISION SCENARIO as Below:
+#
+#   14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359]
+#   14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187]
+#   14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359]
+#   14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO]
+#   14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK]
+#   14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970]
+#
+# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS
+
+source lib.sh
+
+CLIENT_IP="198.51.200.1"
+CLIENT_PORT=1234
+
+SERVER_IP="198.51.100.1"
+SERVER_PORT=1234
+
+CLIENT_GW="198.51.200.2"
+SERVER_GW="198.51.100.2"
+
+# setup the topo
+setup() {
+	setup_ns CLIENT_NS SERVER_NS ROUTER_NS
+	ip -n "$SERVER_NS" link add link0 type veth peer name link1 netns "$ROUTER_NS"
+	ip -n "$CLIENT_NS" link add link3 type veth peer name link2 netns "$ROUTER_NS"
+
+	ip -n "$SERVER_NS" link set link0 up
+	ip -n "$SERVER_NS" addr add $SERVER_IP/24 dev link0
+	ip -n "$SERVER_NS" route add $CLIENT_IP dev link0 via $SERVER_GW
+
+	ip -n "$ROUTER_NS" link set link1 up
+	ip -n "$ROUTER_NS" link set link2 up
+	ip -n "$ROUTER_NS" addr add $SERVER_GW/24 dev link1
+	ip -n "$ROUTER_NS" addr add $CLIENT_GW/24 dev link2
+	ip net exec "$ROUTER_NS" sysctl -wq net.ipv4.ip_forward=1
+
+	ip -n "$CLIENT_NS" link set link3 up
+	ip -n "$CLIENT_NS" addr add $CLIENT_IP/24 dev link3
+	ip -n "$CLIENT_NS" route add $SERVER_IP dev link3 via $CLIENT_GW
+
+	# simulate the delay on OVS upcall by setting up a delay for INIT_ACK with
+	# tc on $SERVER_NS side
+	tc -n "$SERVER_NS" qdisc add dev link0 root handle 1: htb r2q 64
+	tc -n "$SERVER_NS" class add dev link0 parent 1: classid 1:1 htb rate 100mbit
+	tc -n "$SERVER_NS" filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \
+		0xff match u8 2 0xff at 32 flowid 1:1
+	if ! tc -n "$SERVER_NS" qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms; then
+		echo "SKIP: Cannot add netem qdisc"
+		exit $ksft_skip
+	fi
+
+	# simulate the ctstate check on OVS nf_conntrack
+	ip net exec "$ROUTER_NS" iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP
+	ip net exec "$ROUTER_NS" iptables -A INPUT -p sctp -j DROP
+
+	# use a smaller number for assoc's max_retrans to reproduce the issue
+	modprobe -q sctp
+	ip net exec "$CLIENT_NS" sysctl -wq net.sctp.association_max_retrans=3
+}
+
+cleanup() {
+	ip net exec "$CLIENT_NS" pkill sctp_collision >/dev/null 2>&1
+	ip net exec "$SERVER_NS" pkill sctp_collision >/dev/null 2>&1
+	cleanup_all_ns
+}
+
+do_test() {
+	ip net exec "$SERVER_NS" ./sctp_collision server \
+		$SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT &
+	ip net exec "$CLIENT_NS" ./sctp_collision client \
+		$CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT
+}
+
+# NOTE: one way to work around the issue is set a smaller hb_interval
+# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500
+
+# run the test case
+trap cleanup EXIT
+setup && \
+echo "Test for SCTP Collision in nf_conntrack:" && \
+do_test && echo "PASS!"
+exit $?
diff --git a/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh
new file mode 100755
index 0000000000..121ea93c01
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that UNREPLIED tcp conntrack will eventually timeout.
+#
+
+source lib.sh
+
+if ! nft --version > /dev/null 2>&1;then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+if ! conntrack --version > /dev/null 2>&1;then
+	echo "SKIP: Could not run test without conntrack tool"
+	exit $ksft_skip
+fi
+
+ret=0
+
+cleanup() {
+	ip netns pids "$ns1" | xargs kill 2>/dev/null
+	ip netns pids "$ns2" | xargs kill 2>/dev/null
+
+	cleanup_all_ns
+}
+
+ipv4() {
+    echo -n 192.168."$1".2
+}
+
+check_counter()
+{
+	ns=$1
+	name=$2
+	expect=$3
+	local lret=0
+
+	if ! ip netns exec "$ns2" nft list counter inet filter "$name" | grep -q "$expect"; then
+		echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2
+		ip netns exec "$ns2" nft list counter inet filter "$name" 1>&2
+		lret=1
+	fi
+
+	return $lret
+}
+
+trap cleanup EXIT
+
+# Create test namespaces
+setup_ns ns1 ns2
+
+# Connect the namespace to the host using a veth pair
+ip -net "$ns1" link add name veth1 type veth peer name veth2
+ip -net "$ns1" link set netns "$ns2" dev veth2
+
+ip -net "$ns1" link set up dev lo
+ip -net "$ns2" link set up dev lo
+ip -net "$ns1" link set up dev veth1
+ip -net "$ns2" link set up dev veth2
+
+ip -net "$ns2" addr add 10.11.11.2/24 dev veth2
+ip -net "$ns2" route add default via 10.11.11.1
+
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.veth2.forwarding=1
+
+# add a rule inside NS so we enable conntrack
+ip netns exec "$ns1" nft -f - <<EOF
+table inet filter {
+	chain input {
+		type filter hook input priority 0; policy accept;
+		ct state established accept
+	}
+}
+EOF
+
+ip -net "$ns1" addr add 10.11.11.1/24 dev veth1
+ip -net "$ns1" route add 10.99.99.99 via 10.11.11.2
+
+# Check connectivity works
+ip netns exec "$ns1" ping -q -c 2 10.11.11.2 >/dev/null || exit 1
+
+ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT &
+
+ip netns exec "$ns2" nft -f - <<EOF
+table inet filter {
+	counter connreq { }
+	counter redir { }
+	chain input {
+		type filter hook input priority 0; policy accept;
+		ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept
+		ct state new ct status dnat tcp dport 8080 counter name "redir" accept
+	}
+}
+EOF
+if [ $? -ne 0 ]; then
+	echo "ERROR: Could not load nft rules"
+	exit 1
+fi
+
+ip netns exec "$ns2" sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10
+
+echo "INFO: connect $ns1 -> $ns2 to the virtual ip"
+ip netns exec "$ns1" bash -c 'for i in $(seq 1 $BUSYWAIT_TIMEOUT) ; do
+	socat -u STDIN TCP:10.99.99.99:80 < /dev/null
+	sleep 0.1
+	done' &
+
+wait_for_attempt()
+{
+	count=$(ip netns exec "$ns2" conntrack -L -p tcp --dport 80 2>/dev/null | wc -l)
+	if [ "$count" -gt 0 ]; then
+		return 0
+	fi
+
+	return 1
+}
+
+# wait for conntrack to pick the new connection request up before loading
+# the nat redirect rule.
+if ! busywait "$BUSYWAIT_TIMEOUT" wait_for_attempt; then
+	echo "ERROR: $ns2 did not pick up tcp connection from peer"
+	exit 1
+fi
+
+ip netns exec "$ns2" nft -f - <<EOF
+table inet nat {
+	chain prerouting {
+		type nat hook prerouting priority 0; policy accept;
+		ip daddr 10.99.99.99 tcp dport 80 redirect to :8080
+	}
+}
+EOF
+if [ $? -ne 0 ]; then
+	echo "ERROR: Could not load nat redirect"
+	exit 1
+fi
+
+wait_for_redirect()
+{
+	count=$(ip netns exec "$ns2" conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l)
+	if [ "$count" -gt 0 ]; then
+		return 0
+	fi
+
+	return 1
+}
+echo "INFO: NAT redirect added in ns $ns2, waiting for $BUSYWAIT_TIMEOUT ms for nat to take effect"
+
+busywait "$BUSYWAIT_TIMEOUT" wait_for_redirect
+ret=$?
+
+expect="packets 1 bytes 60"
+if ! check_counter "$ns2" "redir" "$expect"; then
+	ret=1
+fi
+
+if [ $ret -eq 0 ];then
+	echo "PASS: redirection counter has expected values"
+else
+	echo "ERROR: no tcp connection was redirected"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
new file mode 100755
index 0000000000..073e8e62d3
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
@@ -0,0 +1,220 @@
+#!/bin/bash
+
+# This script demonstrates interaction of conntrack and vrf.
+# The vrf driver calls the netfilter hooks again, with oif/iif
+# pointing at the VRF device.
+#
+# For ingress, this means first iteration has iifname of lower/real
+# device.  In this script, thats veth0.
+# Second iteration is iifname set to vrf device, tvrf in this script.
+#
+# For egress, this is reversed: first iteration has the vrf device,
+# second iteration is done with the lower/real/veth0 device.
+#
+# test_ct_zone_in demonstrates unexpected change of nftables
+# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
+# connection on VRF rcv"
+#
+# It was possible to assign conntrack zone to a packet (or mark it for
+# `notracking`) in the prerouting chain before conntrack, based on real iif.
+#
+# After the change, the zone assignment is lost and the zone is assigned based
+# on the VRF master interface (in case such a rule exists).
+# assignment is lost. Instead, assignment based on the `iif` matching
+# Thus it is impossible to distinguish packets based on the original
+# interface.
+#
+# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
+# that was supposed to be fixed by the commit mentioned above to make sure
+# that any fix to test case 1 won't break masquerade again.
+
+source lib.sh
+
+IP0=172.30.30.1
+IP1=172.30.30.2
+PFXL=30
+ret=0
+
+cleanup()
+{
+	ip netns pids $ns0 | xargs kill 2>/dev/null
+	ip netns pids $ns1 | xargs kill 2>/dev/null
+
+	cleanup_all_ns
+}
+
+checktool "nft --version" "run test without nft"
+checktool "conntrack --version" "run test without conntrack"
+checktool "socat -h" "run test without socat"
+
+trap cleanup EXIT
+
+setup_ns ns0 ns1
+
+ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0
+ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
+
+if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then
+	echo "SKIP: Could not add veth device"
+	exit $ksft_skip
+fi
+
+if ! ip -net "$ns0" li add tvrf type vrf table 9876; then
+	echo "SKIP: Could not add vrf device"
+	exit $ksft_skip
+fi
+
+ip -net "$ns0" li set veth0 master tvrf
+ip -net "$ns0" li set tvrf up
+ip -net "$ns0" li set veth0 up
+ip -net "$ns1" li set veth0 up
+
+ip -net "$ns0" addr add $IP0/$PFXL dev veth0
+ip -net "$ns1" addr add $IP1/$PFXL dev veth0
+
+listener_ready()
+{
+        local ns="$1"
+
+        ss -N "$ns" -l -n -t -o "sport = :55555" | grep -q "55555"
+}
+
+ip netns exec "$ns1" socat -u -4 TCP-LISTEN:55555,reuseaddr,fork STDOUT > /dev/null &
+busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1"
+
+# test vrf ingress handling.
+# The incoming connection should be placed in conntrack zone 1,
+# as decided by the first iteration of the ruleset.
+test_ct_zone_in()
+{
+ip netns exec "$ns0" nft -f - <<EOF
+table testct {
+	chain rawpre {
+		type filter hook prerouting priority raw;
+
+		iif { veth0, tvrf } counter meta nftrace set 1
+		iif veth0 counter ct zone set 1 counter return
+		iif tvrf counter ct zone set 2 counter return
+		ip protocol icmp counter
+		notrack counter
+	}
+
+	chain rawout {
+		type filter hook output priority raw;
+
+		oif veth0 counter ct zone set 1 counter return
+		oif tvrf counter ct zone set 2 counter return
+		notrack counter
+	}
+}
+EOF
+	ip netns exec "$ns1" ping -W 1 -c 1 -I veth0 "$IP0" > /dev/null
+
+	# should be in zone 1, not zone 2
+	count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
+	if [ "$count" -eq 1 ]; then
+		echo "PASS: entry found in conntrack zone 1"
+	else
+		echo "FAIL: entry not found in conntrack zone 1"
+		count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
+		if [ "$count" -eq 1 ]; then
+			echo "FAIL: entry found in zone 2 instead"
+		else
+			echo "FAIL: entry not in zone 1 or 2, dumping table"
+			ip netns exec "$ns0" conntrack -L
+			ip netns exec "$ns0" nft list ruleset
+		fi
+	fi
+}
+
+# add masq rule that gets evaluated w. outif set to vrf device.
+# This tests the first iteration of the packet through conntrack,
+# oifname is the vrf device.
+test_masquerade_vrf()
+{
+	local qdisc=$1
+
+	if [ "$qdisc" != "default" ]; then
+		tc -net "$ns0" qdisc add dev tvrf root "$qdisc"
+	fi
+
+	ip netns exec "$ns0" conntrack -F 2>/dev/null
+
+ip netns exec "$ns0" nft -f - <<EOF
+flush ruleset
+table ip nat {
+	chain rawout {
+		type filter hook output priority raw;
+
+		oif tvrf ct state untracked counter
+	}
+	chain postrouting2 {
+		type filter hook postrouting priority mangle;
+
+		oif tvrf ct state untracked counter
+	}
+	chain postrouting {
+		type nat hook postrouting priority 0;
+		# NB: masquerade should always be combined with 'oif(name) bla',
+		# lack of this is intentional here, we want to exercise double-snat.
+		ip saddr 172.30.30.0/30 counter masquerade random
+	}
+}
+EOF
+	if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then
+		echo "FAIL: connect failure with masquerade + sport rewrite on vrf device"
+		ret=1
+		return
+	fi
+
+	# must also check that nat table was evaluated on second (lower device) iteration.
+	if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1' &&
+	   ip netns exec "$ns0" nft list table ip nat |grep -q 'untracked counter packets [1-9]'; then
+		echo "PASS: connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
+	else
+		echo "FAIL: vrf rules have unexpected counter value"
+		ret=1
+	fi
+
+	if [ "$qdisc" != "default" ]; then
+		tc -net "$ns0" qdisc del dev tvrf root
+	fi
+}
+
+# add masq rule that gets evaluated w. outif set to veth device.
+# This tests the 2nd iteration of the packet through conntrack,
+# oifname is the lower device (veth0 in this case).
+test_masquerade_veth()
+{
+	ip netns exec "$ns0" conntrack -F 2>/dev/null
+ip netns exec "$ns0" nft -f - <<EOF
+flush ruleset
+table ip nat {
+	chain postrouting {
+		type nat hook postrouting priority 0;
+		meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
+	}
+}
+EOF
+	if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then
+		echo "FAIL: connect failure with masquerade + sport rewrite on veth device"
+		ret=1
+		return
+	fi
+
+	# must also check that nat table was evaluated on second (lower device) iteration.
+	if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1'; then
+		echo "PASS: connect with masquerade + sport rewrite on veth device"
+	else
+		echo "FAIL: vrf masq rule has unexpected counter value"
+		ret=1
+	fi
+}
+
+test_ct_zone_in
+test_masquerade_vrf "default"
+test_masquerade_vrf "pfifo"
+test_masquerade_veth
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh
new file mode 100755
index 0000000000..4ceee9fb39
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/ipvs.sh
@@ -0,0 +1,211 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# End-to-end ipvs test suite
+# Topology:
+#--------------------------------------------------------------+
+#                      |                                       |
+#         ns0          |         ns1                           |
+#      -----------     |     -----------    -----------        |
+#      | veth01  | --------- | veth10  |    | veth12  |        |
+#      -----------    peer   -----------    -----------        |
+#           |          |                        |              |
+#      -----------     |                        |              |
+#      |  br0    |     |-----------------  peer |--------------|
+#      -----------     |                        |              |
+#           |          |                        |              |
+#      ----------     peer   ----------      -----------       |
+#      |  veth02 | --------- |  veth20 |     | veth21  |       |
+#      ----------      |     ----------      -----------       |
+#                      |         ns2                           |
+#                      |                                       |
+#--------------------------------------------------------------+
+#
+# We assume that all network driver are loaded
+#
+
+source lib.sh
+
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+readonly port=8080
+
+readonly vip_v4=207.175.44.110
+readonly cip_v4=10.0.0.2
+readonly gip_v4=10.0.0.1
+readonly dip_v4=172.16.0.1
+readonly rip_v4=172.16.0.2
+readonly sip_v4=10.0.0.3
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+readonly datalen=32
+
+sysipvsnet="/proc/sys/net/ipv4/vs/"
+if [ ! -d $sysipvsnet ]; then
+	if ! modprobe -q ip_vs; then
+		echo "skip: could not run test without ipvs module"
+		exit $ksft_skip
+	fi
+fi
+
+checktool "ipvsadm -v" "run test without ipvsadm"
+checktool "socat -h" "run test without socat"
+
+setup() {
+	setup_ns ns0 ns1 ns2
+
+	ip link add veth01 netns "${ns0}" type veth peer name veth10 netns "${ns1}"
+	ip link add veth02 netns "${ns0}" type veth peer name veth20 netns "${ns2}"
+	ip link add veth12 netns "${ns1}" type veth peer name veth21 netns "${ns2}"
+
+	ip netns exec "${ns0}" ip link set veth01 up
+	ip netns exec "${ns0}" ip link set veth02 up
+	ip netns exec "${ns0}" ip link add br0 type bridge
+	ip netns exec "${ns0}" ip link set veth01 master br0
+	ip netns exec "${ns0}" ip link set veth02 master br0
+	ip netns exec "${ns0}" ip link set br0 up
+	ip netns exec "${ns0}" ip addr add "${cip_v4}/24" dev br0
+
+	ip netns exec "${ns1}" ip link set veth10 up
+	ip netns exec "${ns1}" ip addr add "${gip_v4}/24" dev veth10
+	ip netns exec "${ns1}" ip link set veth12 up
+	ip netns exec "${ns1}" ip addr add "${dip_v4}/24" dev veth12
+
+	ip netns exec "${ns2}" ip link set veth21 up
+	ip netns exec "${ns2}" ip addr add "${rip_v4}/24" dev veth21
+	ip netns exec "${ns2}" ip link set veth20 up
+	ip netns exec "${ns2}" ip addr add "${sip_v4}/24" dev veth20
+
+	sleep 1
+
+	dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
+}
+
+cleanup() {
+	cleanup_all_ns
+
+	if [ -f "${outfile}" ]; then
+		rm "${outfile}"
+	fi
+	if [ -f "${infile}" ]; then
+		rm "${infile}"
+	fi
+}
+
+server_listen() {
+	ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT > "${outfile}" &
+	server_pid=$!
+	sleep 0.2
+}
+
+client_connect() {
+	ip netns exec "${ns0}" timeout 2 socat -u -4 STDIN TCP:"${vip_v4}":"${port}" < "${infile}"
+}
+
+verify_data() {
+	wait "${server_pid}"
+	cmp "$infile" "$outfile" 2>/dev/null
+}
+
+test_service() {
+	server_listen
+	client_connect
+	verify_data
+}
+
+
+test_dr() {
+	ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
+
+	ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr
+	ip netns exec "${ns1}" ipvsadm -a -t "${vip_v4}:${port}" -r "${rip_v4}:${port}"
+	ip netns exec "${ns1}" ip addr add "${vip_v4}/32" dev lo:1
+
+	# avoid incorrect arp response
+	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
+	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
+	# avoid reverse route lookup
+	ip netns exec "${ns2}" sysctl -qw  net.ipv4.conf.all.rp_filter=0
+	ip netns exec "${ns2}" sysctl -qw  net.ipv4.conf.veth21.rp_filter=0
+	ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
+
+	test_service
+}
+
+test_nat() {
+	ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
+
+	ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr
+	ip netns exec "${ns1}" ipvsadm -a -m -t "${vip_v4}:${port}" -r "${rip_v4}:${port}"
+	ip netns exec "${ns1}" ip addr add "${vip_v4}/32" dev lo:1
+
+	ip netns exec "${ns2}" ip link del veth20
+	ip netns exec "${ns2}" ip route add default via "${dip_v4}" dev veth21
+
+	test_service
+}
+
+test_tun() {
+	ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
+
+	ip netns exec "${ns1}" modprobe -q ipip
+	ip netns exec "${ns1}" ip link set tunl0 up
+	ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0
+	ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0
+	ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.default.send_redirects=0
+	ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr
+	ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port}
+	ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1
+
+	ip netns exec "${ns2}" modprobe -q ipip
+	ip netns exec "${ns2}" ip link set tunl0 up
+	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
+	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
+	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
+	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+	ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
+
+	test_service
+}
+
+run_tests() {
+	local errors=
+
+	echo "Testing DR mode..."
+	cleanup
+	setup
+	test_dr
+	errors=$(( $errors + $? ))
+
+	echo "Testing NAT mode..."
+	cleanup
+	setup
+	test_nat
+	errors=$(( $errors + $? ))
+
+	echo "Testing Tunnel mode..."
+	cleanup
+	setup
+	test_tun
+	errors=$(( $errors + $? ))
+
+	return $errors
+}
+
+trap cleanup EXIT
+
+run_tests
+
+if [ $? -ne 0 ]; then
+	echo -e "$(basename $0): ${RED}FAIL${NC}"
+	exit 1
+fi
+echo -e "$(basename $0): ${GREEN}PASS${NC}"
+exit 0
diff --git a/tools/testing/selftests/net/netfilter/lib.sh b/tools/testing/selftests/net/netfilter/lib.sh
new file mode 100644
index 0000000000..bedd35298e
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/lib.sh
@@ -0,0 +1,10 @@
+net_netfilter_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "$net_netfilter_dir/../lib.sh"
+
+checktool (){
+	if ! $1 > /dev/null 2>&1; then
+		echo "SKIP: Could not $2"
+		exit $ksft_skip
+	fi
+}
diff --git a/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh b/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh
new file mode 100755
index 0000000000..c6fdd2079f
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+checktool "conntrack --version" "run test without conntrack"
+checktool "iptables --version" "run test without iptables"
+checktool "ip6tables --version" "run test without ip6tables"
+
+modprobe -q tun
+modprobe -q nf_conntrack
+# echo 1 > /proc/sys/net/netfilter/nf_log_all_netns
+
+PDRILL_TIMEOUT=10
+
+files="
+conntrack_ack_loss_stall.pkt
+conntrack_inexact_rst.pkt
+conntrack_syn_challenge_ack.pkt
+conntrack_synack_old.pkt
+conntrack_synack_reuse.pkt
+conntrack_rst_invalid.pkt
+"
+
+if ! packetdrill --dry_run --verbose "packetdrill/conntrack_ack_loss_stall.pkt";then
+	echo "SKIP: packetdrill not installed"
+	exit ${ksft_skip}
+fi
+
+ret=0
+
+run_packetdrill()
+{
+	filename="$1"
+	ipver="$2"
+	local mtu=1500
+
+	export NFCT_IP_VERSION="$ipver"
+
+	if [ "$ipver" = "ipv4" ];then
+		export xtables="iptables"
+	elif [ "$ipver" = "ipv6" ];then
+		export xtables="ip6tables"
+		mtu=1520
+	fi
+
+	timeout "$PDRILL_TIMEOUT" unshare -n packetdrill --ip_version="$ipver" --mtu=$mtu \
+		--tolerance_usecs=1000000 --non_fatal packet "$filename"
+}
+
+run_one_test_file()
+{
+	filename="$1"
+
+	for v in ipv4 ipv6;do
+		printf "%-50s(%s)%-20s" "$filename" "$v" ""
+		if run_packetdrill packetdrill/"$f" "$v";then
+			echo OK
+		else
+			echo FAIL
+			ret=1
+		fi
+	done
+}
+
+echo "Replaying packetdrill test cases:"
+for f in $files;do
+	run_one_test_file packetdrill/"$f"
+done
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
new file mode 100755
index 0000000000..1014551dd7
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test NAT source port clash resolution
+#
+
+source lib.sh
+ret=0
+socatpid=0
+
+cleanup()
+{
+	[ "$socatpid" -gt 0 ] && kill "$socatpid"
+
+	cleanup_all_ns
+}
+
+checktool "socat -h" "run test without socat"
+checktool "iptables --version" "run test without iptables"
+
+trap cleanup EXIT
+
+setup_ns ns1 ns2
+
+# Connect the namespaces using a veth pair
+ip link add name veth2 type veth peer name veth1
+ip link set netns "$ns1" dev veth1
+ip link set netns "$ns2" dev veth2
+
+ip netns exec "$ns1" ip link set up dev lo
+ip netns exec "$ns1" ip link set up dev veth1
+ip netns exec "$ns1" ip addr add 192.168.1.1/24 dev veth1
+
+ip netns exec "$ns2" ip link set up dev lo
+ip netns exec "$ns2" ip link set up dev veth2
+ip netns exec "$ns2" ip addr add 192.168.1.2/24 dev veth2
+
+# Create a server in one namespace
+ip netns exec "$ns1" socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 &
+socatpid=$!
+
+# Restrict source port to just one so we don't have to exhaust
+# all others.
+ip netns exec "$ns2" sysctl -q net.ipv4.ip_local_port_range="10000 10000"
+
+# add a virtual IP using DNAT
+ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
+
+# ... and route it to the other namespace
+ip netns exec "$ns2" ip route add 10.96.0.1 via 192.168.1.1
+
+# add a persistent connection from the other namespace
+ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
+
+sleep 1
+
+# ip daddr:dport will be rewritten to 192.168.1.1 5201
+# NAT must reallocate source port 10000 because
+# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
+echo test | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null
+ret=$?
+
+# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
+if [ $ret -eq 0 ]; then
+	echo "PASS: socat can connect via NAT'd address"
+else
+	echo "FAIL: socat cannot connect via NAT'd address"
+fi
+
+# check sport clashres.
+ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
+ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
+
+sleep 5 | ip netns exec "$ns2" socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+
+# if connect succeeds, client closes instantly due to EOF on stdin.
+# if connect hangs, it will time out after 5s.
+echo | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
+cpid2=$!
+
+time_then=$(date +%s)
+wait $cpid2
+rv=$?
+time_now=$(date +%s)
+
+# Check how much time has elapsed, expectation is for
+# 'cpid2' to connect and then exit (and no connect delay).
+delta=$((time_now - time_then))
+
+if [ $delta -lt 2 ] && [ $rv -eq 0 ]; then
+	echo "PASS: could connect to service via redirected ports"
+else
+	echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
+	ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nf_queue.c b/tools/testing/selftests/net/netfilter/nf_queue.c
new file mode 100644
index 0000000000..9e56b9d470
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nf_queue.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <time.h>
+#include <arpa/inet.h>
+
+#include <libmnl/libmnl.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_queue.h>
+
+struct options {
+	bool count_packets;
+	bool gso_enabled;
+	int verbose;
+	unsigned int queue_num;
+	unsigned int timeout;
+	uint32_t verdict;
+	uint32_t delay_ms;
+};
+
+static unsigned int queue_stats[5];
+static struct options opts;
+
+static void help(const char *p)
+{
+	printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
+}
+
+static int parse_attr_cb(const struct nlattr *attr, void *data)
+{
+	const struct nlattr **tb = data;
+	int type = mnl_attr_get_type(attr);
+
+	/* skip unsupported attribute in user-space */
+	if (mnl_attr_type_valid(attr, NFQA_MAX) < 0)
+		return MNL_CB_OK;
+
+	switch (type) {
+	case NFQA_MARK:
+	case NFQA_IFINDEX_INDEV:
+	case NFQA_IFINDEX_OUTDEV:
+	case NFQA_IFINDEX_PHYSINDEV:
+	case NFQA_IFINDEX_PHYSOUTDEV:
+		if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) {
+			perror("mnl_attr_validate");
+			return MNL_CB_ERROR;
+		}
+		break;
+	case NFQA_TIMESTAMP:
+		if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
+		    sizeof(struct nfqnl_msg_packet_timestamp)) < 0) {
+			perror("mnl_attr_validate2");
+			return MNL_CB_ERROR;
+		}
+		break;
+	case NFQA_HWADDR:
+		if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC,
+		    sizeof(struct nfqnl_msg_packet_hw)) < 0) {
+			perror("mnl_attr_validate2");
+			return MNL_CB_ERROR;
+		}
+		break;
+	case NFQA_PAYLOAD:
+		break;
+	}
+	tb[type] = attr;
+	return MNL_CB_OK;
+}
+
+static int queue_cb(const struct nlmsghdr *nlh, void *data)
+{
+	struct nlattr *tb[NFQA_MAX+1] = { 0 };
+	struct nfqnl_msg_packet_hdr *ph = NULL;
+	uint32_t id = 0;
+
+	(void)data;
+
+	mnl_attr_parse(nlh, sizeof(struct nfgenmsg), parse_attr_cb, tb);
+	if (tb[NFQA_PACKET_HDR]) {
+		ph = mnl_attr_get_payload(tb[NFQA_PACKET_HDR]);
+		id = ntohl(ph->packet_id);
+
+		if (opts.verbose > 0)
+			printf("packet hook=%u, hwproto 0x%x",
+				ntohs(ph->hw_protocol), ph->hook);
+
+		if (ph->hook >= 5) {
+			fprintf(stderr, "Unknown hook %d\n", ph->hook);
+			return MNL_CB_ERROR;
+		}
+
+		if (opts.verbose > 0) {
+			uint32_t skbinfo = 0;
+
+			if (tb[NFQA_SKB_INFO])
+				skbinfo = ntohl(mnl_attr_get_u32(tb[NFQA_SKB_INFO]));
+			if (skbinfo & NFQA_SKB_CSUMNOTREADY)
+				printf(" csumnotready");
+			if (skbinfo & NFQA_SKB_GSO)
+				printf(" gso");
+			if (skbinfo & NFQA_SKB_CSUM_NOTVERIFIED)
+				printf(" csumnotverified");
+			puts("");
+		}
+
+		if (opts.count_packets)
+			queue_stats[ph->hook]++;
+	}
+
+	return MNL_CB_OK + id;
+}
+
+static struct nlmsghdr *
+nfq_build_cfg_request(char *buf, uint8_t command, int queue_num)
+{
+	struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf);
+	struct nfqnl_msg_config_cmd cmd = {
+		.command = command,
+		.pf = htons(AF_INET),
+	};
+	struct nfgenmsg *nfg;
+
+	nlh->nlmsg_type	= (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG;
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+
+	nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+
+	nfg->nfgen_family = AF_UNSPEC;
+	nfg->version = NFNETLINK_V0;
+	nfg->res_id = htons(queue_num);
+
+	mnl_attr_put(nlh, NFQA_CFG_CMD, sizeof(cmd), &cmd);
+
+	return nlh;
+}
+
+static struct nlmsghdr *
+nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
+{
+	struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf);
+	struct nfqnl_msg_config_params params = {
+		.copy_range = htonl(range),
+		.copy_mode = mode,
+	};
+	struct nfgenmsg *nfg;
+
+	nlh->nlmsg_type	= (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG;
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+
+	nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+	nfg->nfgen_family = AF_UNSPEC;
+	nfg->version = NFNETLINK_V0;
+	nfg->res_id = htons(queue_num);
+
+	mnl_attr_put(nlh, NFQA_CFG_PARAMS, sizeof(params), &params);
+
+	return nlh;
+}
+
+static struct nlmsghdr *
+nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd)
+{
+	struct nfqnl_msg_verdict_hdr vh = {
+		.verdict = htonl(verd),
+		.id = htonl(id),
+	};
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfg;
+
+	nlh = mnl_nlmsg_put_header(buf);
+	nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_VERDICT;
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+	nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg));
+	nfg->nfgen_family = AF_UNSPEC;
+	nfg->version = NFNETLINK_V0;
+	nfg->res_id = htons(queue_num);
+
+	mnl_attr_put(nlh, NFQA_VERDICT_HDR, sizeof(vh), &vh);
+
+	return nlh;
+}
+
+static void print_stats(void)
+{
+	unsigned int last, total;
+	int i;
+
+	total = 0;
+	last = queue_stats[0];
+
+	for (i = 0; i < 5; i++) {
+		printf("hook %d packets %08u\n", i, queue_stats[i]);
+		last = queue_stats[i];
+		total += last;
+	}
+
+	printf("%u packets total\n", total);
+}
+
+struct mnl_socket *open_queue(void)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	unsigned int queue_num;
+	struct mnl_socket *nl;
+	struct nlmsghdr *nlh;
+	struct timeval tv;
+	uint32_t flags;
+
+	nl = mnl_socket_open(NETLINK_NETFILTER);
+	if (nl == NULL) {
+		perror("mnl_socket_open");
+		exit(EXIT_FAILURE);
+	}
+
+	if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) {
+		perror("mnl_socket_bind");
+		exit(EXIT_FAILURE);
+	}
+
+	queue_num = opts.queue_num;
+	nlh = nfq_build_cfg_request(buf, NFQNL_CFG_CMD_BIND, queue_num);
+
+	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+		perror("mnl_socket_sendto");
+		exit(EXIT_FAILURE);
+	}
+
+	nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
+
+	flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0;
+	flags |= NFQA_CFG_F_UID_GID;
+	mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
+	mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
+
+	if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+		perror("mnl_socket_sendto");
+		exit(EXIT_FAILURE);
+	}
+
+	memset(&tv, 0, sizeof(tv));
+	tv.tv_sec = opts.timeout;
+	if (opts.timeout && setsockopt(mnl_socket_get_fd(nl),
+				       SOL_SOCKET, SO_RCVTIMEO,
+				       &tv, sizeof(tv))) {
+		perror("setsockopt(SO_RCVTIMEO)");
+		exit(EXIT_FAILURE);
+	}
+
+	return nl;
+}
+
+static void sleep_ms(uint32_t delay)
+{
+	struct timespec ts = { .tv_sec = delay / 1000 };
+
+	delay %= 1000;
+
+	ts.tv_nsec = delay * 1000llu * 1000llu;
+
+	nanosleep(&ts, NULL);
+}
+
+static int mainloop(void)
+{
+	unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
+	struct mnl_socket *nl;
+	struct nlmsghdr *nlh;
+	unsigned int portid;
+	char *buf;
+	int ret;
+
+	buf = malloc(buflen);
+	if (!buf) {
+		perror("malloc");
+		exit(EXIT_FAILURE);
+	}
+
+	nl = open_queue();
+	portid = mnl_socket_get_portid(nl);
+
+	for (;;) {
+		uint32_t id;
+
+		ret = mnl_socket_recvfrom(nl, buf, buflen);
+		if (ret == -1) {
+			if (errno == ENOBUFS || errno == EINTR)
+				continue;
+
+			if (errno == EAGAIN) {
+				errno = 0;
+				ret = 0;
+				break;
+			}
+
+			perror("mnl_socket_recvfrom");
+			exit(EXIT_FAILURE);
+		}
+
+		ret = mnl_cb_run(buf, ret, 0, portid, queue_cb, NULL);
+		if (ret < 0) {
+			perror("mnl_cb_run");
+			exit(EXIT_FAILURE);
+		}
+
+		id = ret - MNL_CB_OK;
+		if (opts.delay_ms)
+			sleep_ms(opts.delay_ms);
+
+		nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict);
+		if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
+			perror("mnl_socket_sendto");
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	mnl_socket_close(nl);
+
+	return ret;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) {
+		switch (c) {
+		case 'c':
+			opts.count_packets = true;
+			break;
+		case 'h':
+			help(argv[0]);
+			exit(0);
+			break;
+		case 'q':
+			opts.queue_num = atoi(optarg);
+			if (opts.queue_num > 0xffff)
+				opts.queue_num = 0;
+			break;
+		case 'Q':
+			opts.verdict = atoi(optarg);
+			if (opts.verdict > 0xffff) {
+				fprintf(stderr, "Expected destination queue number\n");
+				exit(1);
+			}
+
+			opts.verdict <<= 16;
+			opts.verdict |= NF_QUEUE;
+			break;
+		case 'd':
+			opts.delay_ms = atoi(optarg);
+			if (opts.delay_ms == 0) {
+				fprintf(stderr, "Expected nonzero delay (in milliseconds)\n");
+				exit(1);
+			}
+			break;
+		case 't':
+			opts.timeout = atoi(optarg);
+			break;
+		case 'G':
+			opts.gso_enabled = false;
+			break;
+		case 'v':
+			opts.verbose++;
+			break;
+		}
+	}
+
+	if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) {
+		fprintf(stderr, "Cannot use same destination and source queue\n");
+		exit(1);
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	opts.verdict = NF_ACCEPT;
+	opts.gso_enabled = true;
+
+	parse_opts(argc, argv);
+
+	ret = mainloop();
+	if (opts.count_packets)
+		print_stats();
+
+	return ret;
+}
diff --git a/tools/testing/selftests/net/netfilter/nft_audit.sh b/tools/testing/selftests/net/netfilter/nft_audit.sh
new file mode 100755
index 0000000000..902f8114bc
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_audit.sh
@@ -0,0 +1,268 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that audit logs generated for nft commands are as expected.
+
+SKIP_RC=4
+RC=0
+
+if [ -r /var/run/auditd.pid ];then
+	read pid < /var/run/auditd.pid
+	p=$(pgrep ^auditd$)
+
+	if [ "$pid" -eq "$p" ]; then
+		echo "SKIP: auditd is running"
+		exit $SKIP_RC
+	fi
+fi
+
+nft --version >/dev/null 2>&1 || {
+	echo "SKIP: missing nft tool"
+	exit $SKIP_RC
+}
+
+# nft must be recent enough to support "reset" keyword.
+nft --check -f /dev/stdin >/dev/null 2>&1 <<EOF
+add table t
+add chain t c
+reset rules t c
+EOF
+
+if [ "$?" -ne 0 ];then
+	echo -n "SKIP: nft reset feature test failed: "
+	nft --version
+	exit $SKIP_RC
+fi
+
+# Run everything in a separate network namespace
+[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
+
+# give other scripts a chance to finish - audit_logread sees all activity
+sleep 1
+
+logfile=$(mktemp)
+rulefile=$(mktemp)
+echo "logging into $logfile"
+./audit_logread >"$logfile" &
+logread_pid=$!
+trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT
+exec 3<"$logfile"
+
+do_test() { # (cmd, log)
+	echo -n "testing for cmd: $1 ... "
+	cat <&3 >/dev/null
+	$1 >/dev/null || exit 1
+	sleep 0.1
+	res=$(diff -a -u <(echo "$2") - <&3)
+	[ $? -eq 0 ] && { echo "OK"; return; }
+	echo "FAIL"
+	grep -v '^\(---\|+++\|@@\)' <<< "$res"
+	((RC--))
+}
+
+nft flush ruleset
+
+# adding tables, chains and rules
+
+for table in t1 t2; do
+	do_test "nft add table $table" \
+	"table=$table family=2 entries=1 op=nft_register_table"
+
+	do_test "nft add chain $table c1" \
+	"table=$table family=2 entries=1 op=nft_register_chain"
+
+	do_test "nft add chain $table c2; add chain $table c3" \
+	"table=$table family=2 entries=2 op=nft_register_chain"
+
+	cmd="add rule $table c1 counter"
+
+	do_test "nft $cmd" \
+	"table=$table family=2 entries=1 op=nft_register_rule"
+
+	do_test "nft $cmd; $cmd" \
+	"table=$table family=2 entries=2 op=nft_register_rule"
+
+	cmd=""
+	sep=""
+	for chain in c2 c3; do
+		for i in {1..3}; do
+			cmd+="$sep add rule $table $chain counter"
+			sep=";"
+		done
+	done
+	do_test "nft $cmd" \
+	"table=$table family=2 entries=6 op=nft_register_rule"
+done
+
+for ((i = 0; i < 500; i++)); do
+	echo "add rule t2 c3 counter accept comment \"rule $i\""
+done > "$rulefile"
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=500 op=nft_register_rule'
+
+# adding sets and elements
+
+settype='type inet_service; counter'
+setelem='{ 22, 80, 443 }'
+setblock="{ $settype; elements = $setelem; }"
+do_test "nft add set t1 s $setblock" \
+"table=t1 family=2 entries=4 op=nft_register_set"
+
+do_test "nft add set t1 s2 $setblock; add set t1 s3 { $settype; }" \
+"table=t1 family=2 entries=5 op=nft_register_set"
+
+do_test "nft add element t1 s3 $setelem" \
+"table=t1 family=2 entries=3 op=nft_register_setelem"
+
+# adding counters
+
+do_test 'nft add counter t1 c1' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+do_test 'nft add counter t2 c1; add counter t2 c2' \
+'table=t2 family=2 entries=2 op=nft_register_obj'
+
+for ((i = 3; i <= 500; i++)); do
+	echo "add counter t2 c$i"
+done > "$rulefile"
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=498 op=nft_register_obj'
+
+# adding/updating quotas
+
+do_test 'nft add quota t1 q1 { 10 bytes }' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \
+'table=t2 family=2 entries=2 op=nft_register_obj'
+
+for ((i = 3; i <= 500; i++)); do
+	echo "add quota t2 q$i { 10 bytes }"
+done > "$rulefile"
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=498 op=nft_register_obj'
+
+# changing the quota value triggers obj update path
+do_test 'nft add quota t1 q1 { 20 bytes }' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+# resetting rules
+
+do_test 'nft reset rules t1 c2' \
+'table=t1 family=2 entries=3 op=nft_reset_rule'
+
+do_test 'nft reset rules table t1' \
+'table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule'
+
+do_test 'nft reset rules t2 c3' \
+'table=t2 family=2 entries=189 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=126 op=nft_reset_rule'
+
+do_test 'nft reset rules t2' \
+'table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=186 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=129 op=nft_reset_rule'
+
+do_test 'nft reset rules' \
+'table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=180 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=135 op=nft_reset_rule'
+
+# resetting sets and elements
+
+elem=(22 ",80" ",443")
+relem=""
+for i in {1..3}; do
+	relem+="${elem[((i - 1))]}"
+	do_test "nft reset element t1 s { $relem }" \
+	"table=t1 family=2 entries=$i op=nft_reset_setelem"
+done
+
+do_test 'nft reset set t1 s' \
+'table=t1 family=2 entries=3 op=nft_reset_setelem'
+
+# resetting counters
+
+do_test 'nft reset counter t1 c1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset counters t1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset counters t2' \
+'table=t2 family=2 entries=342 op=nft_reset_obj
+table=t2 family=2 entries=158 op=nft_reset_obj'
+
+do_test 'nft reset counters' \
+'table=t1 family=2 entries=1 op=nft_reset_obj
+table=t2 family=2 entries=341 op=nft_reset_obj
+table=t2 family=2 entries=159 op=nft_reset_obj'
+
+# resetting quotas
+
+do_test 'nft reset quota t1 q1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset quotas t1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset quotas t2' \
+'table=t2 family=2 entries=315 op=nft_reset_obj
+table=t2 family=2 entries=185 op=nft_reset_obj'
+
+do_test 'nft reset quotas' \
+'table=t1 family=2 entries=1 op=nft_reset_obj
+table=t2 family=2 entries=314 op=nft_reset_obj
+table=t2 family=2 entries=186 op=nft_reset_obj'
+
+# deleting rules
+
+readarray -t handles < <(nft -a list chain t1 c1 | \
+			 sed -n 's/.*counter.* handle \(.*\)$/\1/p')
+
+do_test "nft delete rule t1 c1 handle ${handles[0]}" \
+'table=t1 family=2 entries=1 op=nft_unregister_rule'
+
+cmd='delete rule t1 c1 handle'
+do_test "nft $cmd ${handles[1]}; $cmd ${handles[2]}" \
+'table=t1 family=2 entries=2 op=nft_unregister_rule'
+
+do_test 'nft flush chain t1 c2' \
+'table=t1 family=2 entries=3 op=nft_unregister_rule'
+
+do_test 'nft flush table t2' \
+'table=t2 family=2 entries=509 op=nft_unregister_rule'
+
+# deleting chains
+
+do_test 'nft delete chain t2 c2' \
+'table=t2 family=2 entries=1 op=nft_unregister_chain'
+
+# deleting sets and elements
+
+do_test 'nft delete element t1 s { 22 }' \
+'table=t1 family=2 entries=1 op=nft_unregister_setelem'
+
+do_test 'nft delete element t1 s { 80, 443 }' \
+'table=t1 family=2 entries=2 op=nft_unregister_setelem'
+
+do_test 'nft flush set t1 s2' \
+'table=t1 family=2 entries=3 op=nft_unregister_setelem'
+
+do_test 'nft delete set t1 s2' \
+'table=t1 family=2 entries=1 op=nft_unregister_set'
+
+do_test 'nft delete set t1 s3' \
+'table=t1 family=2 entries=1 op=nft_unregister_set'
+
+exit $RC
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
new file mode 100755
index 0000000000..6d66240e14
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -0,0 +1,1622 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# nft_concat_range.sh - Tests for sets with concatenation of ranged fields
+#
+# Copyright (c) 2019 Red Hat GmbH
+#
+# Author: Stefano Brivio <sbrivio@redhat.com>
+#
+# shellcheck disable=SC2154,SC2034,SC2016,SC2030,SC2031,SC2317
+# ^ Configuration and templates sourced with eval, counters reused in subshells
+
+source lib.sh
+
+# Available test groups:
+# - reported_issues: check for issues that were reported in the past
+# - correctness: check that packets match given entries, and only those
+# - concurrency: attempt races between insertion, deletion and lookup
+# - timeout: check that packets match entries until they expire
+# - performance: estimate matching rate, compare with rbtree and hash baselines
+TESTS="reported_issues correctness concurrency timeout"
+[ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}"
+
+# Set types, defined by TYPE_ variables below
+TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
+       net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp
+       net6_port_net6_port net_port_mac_proto_net"
+
+# Reported bugs, also described by TYPE_ variables below
+BUGS="flush_remove_add reload"
+
+# List of possible paths to pktgen script from kernel tree for performance tests
+PKTGEN_SCRIPT_PATHS="
+	../../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+	pktgen/pktgen_bench_xmit_mode_netif_receive.sh"
+
+# Definition of set types:
+# display	display text for test report
+# type_spec	nftables set type specifier
+# chain_spec	nftables type specifier for rules mapping to set
+# dst		call sequence of format_*() functions for destination fields
+# src		call sequence of format_*() functions for source fields
+# start		initial integer used to generate addresses and ports
+# count		count of entries to generate and match
+# src_delta	number summed to destination generator for source fields
+# tools		list of tools for correctness and timeout tests, any can be used
+# proto		L4 protocol of test packets
+#
+# race_repeat	race attempts per thread, 0 disables concurrency test for type
+# flood_tools	list of tools for concurrency tests, any can be used
+# flood_proto	L4 protocol of test packets for concurrency tests
+# flood_spec	nftables type specifier for concurrency tests
+#
+# perf_duration	duration of single pktgen injection test
+# perf_spec	nftables type specifier for performance tests
+# perf_dst	format_*() functions for destination fields in performance test
+# perf_src	format_*() functions for source fields in performance test
+# perf_entries	number of set entries for performance test
+# perf_proto	L3 protocol of test packets
+TYPE_net_port="
+display		net,port
+type_spec	ipv4_addr . inet_service
+chain_spec	ip daddr . udp dport
+dst		addr4 port
+src		 
+start		1
+count		5
+src_delta	2000
+tools		sendip bash
+proto		udp
+
+race_repeat	3
+flood_tools	iperf3 iperf netperf
+flood_proto	udp
+flood_spec	ip daddr . udp dport
+
+perf_duration	5
+perf_spec	ip daddr . udp dport
+perf_dst	addr4 port
+perf_src	 
+perf_entries	1000
+perf_proto	ipv4
+"
+
+TYPE_port_net="
+display		port,net
+type_spec	inet_service . ipv4_addr
+chain_spec	udp dport . ip daddr
+dst		port addr4
+src		 
+start		1
+count		5
+src_delta	2000
+tools		sendip socat bash
+proto		udp
+
+race_repeat	3
+flood_tools	iperf3 iperf netperf
+flood_proto	udp
+flood_spec	udp dport . ip daddr
+
+perf_duration	5
+perf_spec	udp dport . ip daddr
+perf_dst	port addr4
+perf_src	 
+perf_entries	100
+perf_proto	ipv4
+"
+
+TYPE_net6_port="
+display		net6,port
+type_spec	ipv6_addr . inet_service
+chain_spec	ip6 daddr . udp dport
+dst		addr6 port
+src		 
+start		10
+count		5
+src_delta	2000
+tools		sendip socat bash
+proto		udp6
+
+race_repeat	3
+flood_tools	iperf3 iperf netperf
+flood_proto	tcp6
+flood_spec	ip6 daddr . udp dport
+
+perf_duration	5
+perf_spec	ip6 daddr . udp dport
+perf_dst	addr6 port
+perf_src	 
+perf_entries	1000
+perf_proto	ipv6
+"
+
+TYPE_port_proto="
+display		port,proto
+type_spec	inet_service . inet_proto
+chain_spec	udp dport . meta l4proto
+dst		port proto
+src		 
+start		1
+count		5
+src_delta	2000
+tools		sendip socat bash
+proto		udp
+
+race_repeat	0
+
+perf_duration	5
+perf_spec	udp dport . meta l4proto
+perf_dst	port proto
+perf_src	 
+perf_entries	30000
+perf_proto	ipv4
+"
+
+TYPE_net6_port_mac="
+display		net6,port,mac
+type_spec	ipv6_addr . inet_service . ether_addr
+chain_spec	ip6 daddr . udp dport . ether saddr
+dst		addr6 port
+src		mac
+start		10
+count		5
+src_delta	2000
+tools		sendip socat bash
+proto		udp6
+
+race_repeat	0
+
+perf_duration	5
+perf_spec	ip6 daddr . udp dport . ether daddr
+perf_dst	addr6 port mac
+perf_src	 
+perf_entries	10
+perf_proto	ipv6
+"
+
+TYPE_net6_port_mac_proto="
+display		net6,port,mac,proto
+type_spec	ipv6_addr . inet_service . ether_addr . inet_proto
+chain_spec	ip6 daddr . udp dport . ether saddr . meta l4proto
+dst		addr6 port
+src		mac proto
+start		10
+count		5
+src_delta	2000
+tools		sendip socat bash
+proto		udp6
+
+race_repeat	0
+
+perf_duration	5
+perf_spec	ip6 daddr . udp dport . ether daddr . meta l4proto
+perf_dst	addr6 port mac proto
+perf_src	 
+perf_entries	1000
+perf_proto	ipv6
+"
+
+TYPE_net_port_net="
+display		net,port,net
+type_spec	ipv4_addr . inet_service . ipv4_addr
+chain_spec	ip daddr . udp dport . ip saddr
+dst		addr4 port
+src		addr4
+start		1
+count		5
+src_delta	2000
+tools		sendip socat bash
+proto		udp
+
+race_repeat	3
+flood_tools	iperf3 iperf netperf
+flood_proto	tcp
+flood_spec	ip daddr . udp dport . ip saddr
+
+perf_duration	0
+"
+
+TYPE_net6_port_net6_port="
+display		net6,port,net6,port
+type_spec	ipv6_addr . inet_service . ipv6_addr . inet_service
+chain_spec	ip6 daddr . udp dport . ip6 saddr . udp sport
+dst		addr6 port
+src		addr6 port
+start		10
+count		5
+src_delta	2000
+tools		sendip socat
+proto		udp6
+
+race_repeat	3
+flood_tools	iperf3 iperf netperf
+flood_proto	tcp6
+flood_spec	ip6 daddr . tcp dport . ip6 saddr . tcp sport
+
+perf_duration	0
+"
+
+TYPE_net_port_mac_proto_net="
+display		net,port,mac,proto,net
+type_spec	ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr
+chain_spec	ip daddr . udp dport . ether saddr . meta l4proto . ip saddr
+dst		addr4 port
+src		mac proto addr4
+start		1
+count		5
+src_delta	2000
+tools		sendip socat bash
+proto		udp
+
+race_repeat	0
+
+perf_duration	0
+"
+
+TYPE_net_mac="
+display		net,mac
+type_spec	ipv4_addr . ether_addr
+chain_spec	ip daddr . ether saddr
+dst		addr4
+src		mac
+start		1
+count		5
+src_delta	2000
+tools		sendip socat bash
+proto		udp
+
+race_repeat	0
+
+perf_duration	5
+perf_spec	ip daddr . ether daddr
+perf_dst	addr4 mac
+perf_src	 
+perf_entries	1000
+perf_proto	ipv4
+"
+
+TYPE_mac_net="
+display		mac,net
+type_spec	ether_addr . ipv4_addr
+chain_spec	ether saddr . ip saddr
+dst		 
+src		mac addr4
+start		1
+count		5
+src_delta	2000
+tools		sendip socat bash
+proto		udp
+
+race_repeat	0
+
+perf_duration	0
+"
+
+TYPE_net_mac_icmp="
+display		net,mac - ICMP
+type_spec	ipv4_addr . ether_addr
+chain_spec	ip daddr . ether saddr
+dst		addr4
+src		mac
+start		1
+count		5
+src_delta	2000
+tools		ping
+proto		icmp
+
+race_repeat	0
+
+perf_duration	0
+"
+
+TYPE_net6_mac_icmp="
+display		net6,mac - ICMPv6
+type_spec	ipv6_addr . ether_addr
+chain_spec	ip6 daddr . ether saddr
+dst		addr6
+src		mac
+start		10
+count		50
+src_delta	2000
+tools		ping
+proto		icmp6
+
+race_repeat	0
+
+perf_duration	0
+"
+
+TYPE_net_port_proto_net="
+display		net,port,proto,net
+type_spec	ipv4_addr . inet_service . inet_proto . ipv4_addr
+chain_spec	ip daddr . udp dport . meta l4proto . ip saddr
+dst		addr4 port proto
+src		addr4
+start		1
+count		5
+src_delta	2000
+tools		sendip socat
+proto		udp
+
+race_repeat	3
+flood_tools	iperf3 iperf netperf
+flood_proto	tcp
+flood_spec	ip daddr . tcp dport . meta l4proto . ip saddr
+
+perf_duration	0
+"
+
+# Definition of tests for bugs reported in the past:
+# display	display text for test report
+TYPE_flush_remove_add="
+display		Add two elements, flush, re-add
+"
+
+TYPE_reload="
+display		net,mac with reload
+type_spec	ipv4_addr . ether_addr
+chain_spec	ip daddr . ether saddr
+dst		addr4
+src		mac
+start		1
+count		1
+src_delta	2000
+tools		sendip socat bash
+proto		udp
+
+race_repeat	0
+
+perf_duration	0
+"
+
+# Set template for all tests, types and rules are filled in depending on test
+set_template='
+flush ruleset
+
+table inet filter {
+	counter test {
+		packets 0 bytes 0
+	}
+
+	set test {
+		type ${type_spec}
+		flags interval,timeout
+	}
+
+	chain input {
+		type filter hook prerouting priority 0; policy accept;
+		${chain_spec} @test counter name \"test\"
+	}
+}
+
+table netdev perf {
+	counter test {
+		packets 0 bytes 0
+	}
+
+	counter match {
+		packets 0 bytes 0
+	}
+
+	set test {
+		type ${type_spec}
+		flags interval
+	}
+
+	set norange {
+		type ${type_spec}
+	}
+
+	set noconcat {
+		type ${type_spec%% *}
+		flags interval
+	}
+
+	chain test {
+		type filter hook ingress device veth_a priority 0;
+	}
+}
+'
+
+err_buf=
+info_buf=
+
+# Append string to error buffer
+err() {
+	err_buf="${err_buf}${1}
+"
+}
+
+# Append string to information buffer
+info() {
+	info_buf="${info_buf}${1}
+"
+}
+
+# Flush error buffer to stdout
+err_flush() {
+	printf "%s" "${err_buf}"
+	err_buf=
+}
+
+# Flush information buffer to stdout
+info_flush() {
+	printf "%s" "${info_buf}"
+	info_buf=
+}
+
+# Setup veth pair: this namespace receives traffic, B generates it
+setup_veth() {
+	ip netns add B
+	ip link add veth_a type veth peer name veth_b || return 1
+
+	ip link set veth_a up
+	ip link set veth_b netns B
+
+	ip -n B link set veth_b up
+
+	ip addr add dev veth_a 10.0.0.1
+	ip route add default dev veth_a
+
+	ip -6 addr add fe80::1/64 dev veth_a nodad
+	ip -6 addr add 2001:db8::1/64 dev veth_a nodad
+	ip -6 route add default dev veth_a
+
+	ip -n B route add default dev veth_b
+
+	ip -6 -n B addr add fe80::2/64 dev veth_b nodad
+	ip -6 -n B addr add 2001:db8::2/64 dev veth_b nodad
+	ip -6 -n B route add default dev veth_b
+
+	B() {
+		ip netns exec B "$@" >/dev/null 2>&1
+	}
+}
+
+# Fill in set template and initialise set
+setup_set() {
+	eval "echo \"${set_template}\"" | nft -f -
+}
+
+# Check that at least one of the needed tools is available
+check_tools() {
+	[ -z "${tools}" ] && return 0
+
+	__tools=
+	for tool in ${tools}; do
+		__tools="${__tools} ${tool}"
+
+		command -v "${tool}" >/dev/null && return 0
+	done
+	err "need one of:${__tools}, skipping" && return 1
+}
+
+# Set up function to send ICMP packets
+setup_send_icmp() {
+	send_icmp() {
+		B ping -c1 -W1 "${dst_addr4}" >/dev/null 2>&1
+	}
+}
+
+# Set up function to send ICMPv6 packets
+setup_send_icmp6() {
+	if command -v ping6 >/dev/null; then
+		send_icmp6() {
+			ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+				2>/dev/null
+			B ping6 -q -c1 -W1 "${dst_addr6}"
+		}
+	else
+		send_icmp6() {
+			ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+				2>/dev/null
+			B ping -q -6 -c1 -W1 "${dst_addr6}"
+		}
+	fi
+}
+
+# Set up function to send single UDP packets on IPv4
+setup_send_udp() {
+	if command -v sendip >/dev/null; then
+		send_udp() {
+			[ -n "${src_port}" ] && src_port="-us ${src_port}"
+			[ -n "${dst_port}" ] && dst_port="-ud ${dst_port}"
+			[ -n "${src_addr4}" ] && src_addr4="-is ${src_addr4}"
+
+			# shellcheck disable=SC2086 # sendip needs split options
+			B sendip -p ipv4 -p udp ${src_addr4} ${src_port} \
+						${dst_port} "${dst_addr4}"
+
+			src_port=
+			dst_port=
+			src_addr4=
+		}
+	elif command -v socat -v >/dev/null; then
+		send_udp() {
+			if [ -n "${src_addr4}" ]; then
+				B ip addr add "${src_addr4}" dev veth_b
+				__socatbind=",bind=${src_addr4}"
+				if [ -n "${src_port}" ];then
+					__socatbind="${__socatbind}:${src_port}"
+				fi
+			fi
+
+			ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+			[ -z "${dst_port}" ] && dst_port=12345
+
+			echo "test4" | B socat -t 0.01 STDIN UDP4-DATAGRAM:"$dst_addr4":"$dst_port""${__socatbind}"
+
+			src_addr4=
+			src_port=
+		}
+	elif [ -z "$(bash -c 'type -p')" ]; then
+		send_udp() {
+			ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+			if [ -n "${src_addr4}" ]; then
+				B ip addr add "${src_addr4}/16" dev veth_b
+				B ip route add default dev veth_b
+			fi
+
+			B bash -c "echo > /dev/udp/${dst_addr4}/${dst_port}"
+
+			if [ -n "${src_addr4}" ]; then
+				B ip addr del "${src_addr4}/16" dev veth_b
+			fi
+			src_addr4=
+		}
+	else
+		return 1
+	fi
+}
+
+# Set up function to send single UDP packets on IPv6
+setup_send_udp6() {
+	if command -v sendip >/dev/null; then
+		send_udp6() {
+			[ -n "${src_port}" ] && src_port="-us ${src_port}"
+			[ -n "${dst_port}" ] && dst_port="-ud ${dst_port}"
+			if [ -n "${src_addr6}" ]; then
+				src_addr6="-6s ${src_addr6}"
+			else
+				src_addr6="-6s 2001:db8::2"
+			fi
+			ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+				2>/dev/null
+
+			# shellcheck disable=SC2086 # this needs split options
+			B sendip -p ipv6 -p udp ${src_addr6} ${src_port} \
+						${dst_port} "${dst_addr6}"
+
+			src_port=
+			dst_port=
+			src_addr6=
+		}
+	elif command -v socat -v >/dev/null; then
+		send_udp6() {
+			ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+				2>/dev/null
+
+			__socatbind6=
+
+			if [ -n "${src_addr6}" ]; then
+				B ip addr add "${src_addr6}" dev veth_b nodad
+
+				__socatbind6=",bind=[${src_addr6}]"
+
+				if [ -n "${src_port}" ] ;then
+					__socatbind6="${__socatbind6}:${src_port}"
+				fi
+			fi
+
+			echo "test6" | B socat -t 0.01 STDIN UDP6-DATAGRAM:["$dst_addr6"]:"$dst_port""${__socatbind6}"
+		}
+	elif [ -z "$(bash -c 'type -p')" ]; then
+		send_udp6() {
+			ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+				2>/dev/null
+			B ip addr add "${src_addr6}" dev veth_b nodad
+			B bash -c "echo > /dev/udp/${dst_addr6}/${dst_port}"
+			ip -6 addr del "${dst_addr6}" dev veth_a 2>/dev/null
+		}
+	else
+		return 1
+	fi
+}
+
+listener_ready()
+{
+	port="$1"
+	ss -lnt -o "sport = :$port" | grep -q "$port"
+}
+
+# Set up function to send TCP traffic on IPv4
+setup_flood_tcp() {
+	if command -v iperf3 >/dev/null; then
+		flood_tcp() {
+			local n_port="${dst_port}"
+			[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+			if [ -n "${src_addr4}" ]; then
+				B ip addr add "${src_addr4}/16" dev veth_b
+				src_addr4="-B ${src_addr4}"
+			else
+				B ip addr add dev veth_b 10.0.0.2
+				src_addr4="-B 10.0.0.2"
+			fi
+			if [ -n "${src_port}" ]; then
+				src_port="--cport ${src_port}"
+			fi
+			B ip route add default dev veth_b 2>/dev/null
+			ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+			# shellcheck disable=SC2086 # this needs split options
+			iperf3 -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1
+			busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
+
+			# shellcheck disable=SC2086 # this needs split options
+			B iperf3 -c "${dst_addr4}" ${dst_port} ${src_port} \
+				${src_addr4} -l16 -t 1000
+
+			src_addr4=
+			src_port=
+			dst_port=
+		}
+	elif command -v iperf >/dev/null; then
+		flood_tcp() {
+			local n_port="${dst_port}"
+			[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+			if [ -n "${src_addr4}" ]; then
+				B ip addr add "${src_addr4}/16" dev veth_b
+				src_addr4="-B ${src_addr4}"
+			else
+				B ip addr add dev veth_b 10.0.0.2 2>/dev/null
+				src_addr4="-B 10.0.0.2"
+			fi
+			if [ -n "${src_port}" ]; then
+				src_addr4="${src_addr4}:${src_port}"
+			fi
+			B ip route add default dev veth_b
+			ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+			# shellcheck disable=SC2086 # this needs split options
+			iperf -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1
+			busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
+
+			# shellcheck disable=SC2086 # this needs split options
+			B iperf -c "${dst_addr4}" ${dst_port} ${src_addr4} \
+				-l20 -t 1000
+
+			src_addr4=
+			src_port=
+			dst_port=
+		}
+	elif command -v netperf >/dev/null; then
+		flood_tcp() {
+			local n_port="${dst_port}"
+			[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+			if [ -n "${src_addr4}" ]; then
+				B ip addr add "${src_addr4}/16" dev veth_b
+			else
+				B ip addr add dev veth_b 10.0.0.2
+				src_addr4="10.0.0.2"
+			fi
+			if [ -n "${src_port}" ]; then
+				dst_port="${dst_port},${src_port}"
+			fi
+			B ip route add default dev veth_b
+			ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+			# shellcheck disable=SC2086 # this needs split options
+			netserver -4 ${dst_port} -L "${dst_addr4}" \
+				>/dev/null 2>&1
+			busywait "$BUSYWAIT_TIMEOUT" listener_ready "${n_port}"
+
+			# shellcheck disable=SC2086 # this needs split options
+			B netperf -4 -H "${dst_addr4}" ${dst_port} \
+				-L "${src_addr4}" -l 1000 -t TCP_STREAM
+
+			src_addr4=
+			src_port=
+			dst_port=
+		}
+	else
+		return 1
+	fi
+}
+
+# Set up function to send TCP traffic on IPv6
+setup_flood_tcp6() {
+	if command -v iperf3 >/dev/null; then
+		flood_tcp6() {
+			local n_port="${dst_port}"
+			[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+			if [ -n "${src_addr6}" ]; then
+				B ip addr add "${src_addr6}" dev veth_b nodad
+				src_addr6="-B ${src_addr6}"
+			else
+				src_addr6="-B 2001:db8::2"
+			fi
+			if [ -n "${src_port}" ]; then
+				src_port="--cport ${src_port}"
+			fi
+			B ip route add default dev veth_b
+			ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+				2>/dev/null
+
+			# shellcheck disable=SC2086 # this needs split options
+			iperf3 -s -DB "${dst_addr6}" ${dst_port} >/dev/null 2>&1
+			busywait "$BUSYWAIT_TIMEOUT" listener_ready "${n_port}"
+
+			# shellcheck disable=SC2086 # this needs split options
+			B iperf3 -c "${dst_addr6}" ${dst_port} \
+				${src_port} ${src_addr6} -l16 -t 1000
+
+			src_addr6=
+			src_port=
+			dst_port=
+		}
+	elif command -v iperf >/dev/null; then
+		flood_tcp6() {
+			local n_port="${dst_port}"
+			[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+			if [ -n "${src_addr6}" ]; then
+				B ip addr add "${src_addr6}" dev veth_b nodad
+				src_addr6="-B ${src_addr6}"
+			else
+				src_addr6="-B 2001:db8::2"
+			fi
+			if [ -n "${src_port}" ]; then
+				src_addr6="${src_addr6}:${src_port}"
+			fi
+			B ip route add default dev veth_b
+			ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+				2>/dev/null
+
+			# shellcheck disable=SC2086 # this needs split options
+			iperf -s -VDB "${dst_addr6}" ${dst_port} >/dev/null 2>&1
+			busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
+
+			# shellcheck disable=SC2086 # this needs split options
+			B iperf -c "${dst_addr6}" -V ${dst_port} \
+				${src_addr6} -l1 -t 1000
+
+			src_addr6=
+			src_port=
+			dst_port=
+		}
+	elif command -v netperf >/dev/null; then
+		flood_tcp6() {
+			local n_port="${dst_port}"
+			[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+			if [ -n "${src_addr6}" ]; then
+				B ip addr add "${src_addr6}" dev veth_b nodad
+			else
+				src_addr6="2001:db8::2"
+			fi
+			if [ -n "${src_port}" ]; then
+				dst_port="${dst_port},${src_port}"
+			fi
+			B ip route add default dev veth_b
+			ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+				2>/dev/null
+
+			# shellcheck disable=SC2086 # this needs split options
+			netserver -6 ${dst_port} -L "${dst_addr6}" \
+				>/dev/null 2>&1
+			busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
+
+			# shellcheck disable=SC2086 # this needs split options
+			B netperf -6 -H "${dst_addr6}" ${dst_port} \
+				-L "${src_addr6}" -l 1000 -t TCP_STREAM
+
+			src_addr6=
+			src_port=
+			dst_port=
+		}
+	else
+		return 1
+	fi
+}
+
+# Set up function to send UDP traffic on IPv4
+setup_flood_udp() {
+	if command -v iperf3 >/dev/null; then
+		flood_udp() {
+			local n_port="${dst_port}"
+			[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+			if [ -n "${src_addr4}" ]; then
+				B ip addr add "${src_addr4}/16" dev veth_b
+				src_addr4="-B ${src_addr4}"
+			else
+				B ip addr add dev veth_b 10.0.0.2 2>/dev/null
+				src_addr4="-B 10.0.0.2"
+			fi
+			if [ -n "${src_port}" ]; then
+				src_port="--cport ${src_port}"
+			fi
+			B ip route add default dev veth_b
+			ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+			# shellcheck disable=SC2086 # this needs split options
+			iperf3 -s -DB "${dst_addr4}" ${dst_port}
+			busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
+
+			# shellcheck disable=SC2086 # this needs split options
+			B iperf3 -u -c "${dst_addr4}" -Z -b 100M -l16 -t1000 \
+				${dst_port} ${src_port} ${src_addr4}
+
+			src_addr4=
+			src_port=
+			dst_port=
+		}
+	elif command -v iperf >/dev/null; then
+		flood_udp() {
+			local n_port="${dst_port}"
+			[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+			if [ -n "${src_addr4}" ]; then
+				B ip addr add "${src_addr4}/16" dev veth_b
+				src_addr4="-B ${src_addr4}"
+			else
+				B ip addr add dev veth_b 10.0.0.2
+				src_addr4="-B 10.0.0.2"
+			fi
+			if [ -n "${src_port}" ]; then
+				src_addr4="${src_addr4}:${src_port}"
+			fi
+			B ip route add default dev veth_b
+			ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+			# shellcheck disable=SC2086 # this needs split options
+			iperf -u -sDB "${dst_addr4}" ${dst_port} >/dev/null 2>&1
+			busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
+
+			# shellcheck disable=SC2086 # this needs split options
+			B iperf -u -c "${dst_addr4}" -b 100M -l1 -t1000 \
+				${dst_port} ${src_addr4}
+
+			src_addr4=
+			src_port=
+			dst_port=
+		}
+	elif command -v netperf >/dev/null; then
+		flood_udp() {
+			local n_port="${dst_port}"
+			[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
+			if [ -n "${src_addr4}" ]; then
+				B ip addr add "${src_addr4}/16" dev veth_b
+			else
+				B ip addr add dev veth_b 10.0.0.2
+				src_addr4="10.0.0.2"
+			fi
+			if [ -n "${src_port}" ]; then
+				dst_port="${dst_port},${src_port}"
+			fi
+			B ip route add default dev veth_b
+			ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+
+			# shellcheck disable=SC2086 # this needs split options
+			netserver -4 ${dst_port} -L "${dst_addr4}" \
+				>/dev/null 2>&1
+			busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
+
+			# shellcheck disable=SC2086 # this needs split options
+			B netperf -4 -H "${dst_addr4}" ${dst_port} \
+				-L "${src_addr4}" -l 1000 -t UDP_STREAM
+
+			src_addr4=
+			src_port=
+			dst_port=
+		}
+	else
+		return 1
+	fi
+}
+
+# Find pktgen script and set up function to start pktgen injection
+setup_perf() {
+	for pktgen_script_path in ${PKTGEN_SCRIPT_PATHS} __notfound; do
+		command -v "${pktgen_script_path}" >/dev/null && break
+	done
+	[ "${pktgen_script_path}" = "__notfound" ] && return 1
+
+	perf_ipv4() {
+		${pktgen_script_path} -s80 \
+			-i veth_a -d "${dst_addr4}" -p "${dst_port}" \
+			-m "${dst_mac}" \
+			-t $(($(nproc) / 5 + 1)) -b10000 -n0 2>/dev/null &
+		perf_pid=$!
+	}
+	perf_ipv6() {
+		IP6=6 ${pktgen_script_path} -s100 \
+			-i veth_a -d "${dst_addr6}" -p "${dst_port}" \
+			-m "${dst_mac}" \
+			-t $(($(nproc) / 5 + 1)) -b10000 -n0 2>/dev/null &
+		perf_pid=$!
+	}
+}
+
+# Clean up before each test
+cleanup() {
+	nft reset counter inet filter test	>/dev/null 2>&1
+	nft flush ruleset			>/dev/null 2>&1
+	ip link del dummy0			2>/dev/null
+	ip route del default			2>/dev/null
+	ip -6 route del default			2>/dev/null
+	ip netns pids B				2>/dev/null | xargs kill 2>/dev/null
+	ip netns del B				2>/dev/null
+	ip link del veth_a			2>/dev/null
+	timeout=
+	killall iperf3				2>/dev/null
+	killall iperf				2>/dev/null
+	killall netperf				2>/dev/null
+	killall netserver			2>/dev/null
+}
+
+cleanup_exit() {
+	cleanup
+	rm -f "$tmp"
+}
+
+# Entry point for setup functions
+setup() {
+	if [ "$(id -u)" -ne 0 ]; then
+		echo "  need to run as root"
+		exit ${ksft_skip}
+	fi
+
+	cleanup
+	check_tools || return 1
+	for arg do
+		if ! eval setup_"${arg}"; then
+			err "  ${arg} not supported"
+			return 1
+		fi
+	done
+}
+
+# Format integer into IPv4 address, summing 10.0.0.5 (arbitrary) to it
+format_addr4() {
+	a=$((${1} + 16777216 * 10 + 5))
+	printf "%i.%i.%i.%i"						\
+	       "$((a / 16777216))" "$((a % 16777216 / 65536))"	\
+	       "$((a % 65536 / 256))" "$((a % 256))"
+}
+
+# Format integer into IPv6 address, summing 2001:db8:: to it
+format_addr6() {
+	printf "2001:db8::%04x:%04x" "$((${1} / 65536))" "$((${1} % 65536))"
+}
+
+# Format integer into EUI-48 address, summing 00:01:00:00:00:00 to it
+format_mac() {
+	printf "00:01:%02x:%02x:%02x:%02x" \
+	       "$((${1} / 16777216))" "$((${1} % 16777216 / 65536))"	\
+	       "$((${1} % 65536 / 256))" "$((${1} % 256))"
+}
+
+# Format integer into port, avoid 0 port
+format_port() {
+	printf "%i" "$((${1} % 65534 + 1))"
+}
+
+# Drop suffixed '6' from L4 protocol, if any
+format_proto() {
+	printf "%s" "${proto}" | tr -d 6
+}
+
+# Format destination and source fields into nft concatenated type
+format() {
+	__start=
+	__end=
+	__expr="{ "
+
+	for f in ${dst}; do
+		[ "${__expr}" != "{ " ] && __expr="${__expr} . "
+
+		__start="$(eval format_"${f}" "${start}")"
+		__end="$(eval format_"${f}" "${end}")"
+
+		if [ "${f}" = "proto" ]; then
+			__expr="${__expr}${__start}"
+		else
+			__expr="${__expr}${__start}-${__end}"
+		fi
+	done
+	for f in ${src}; do
+		[ "${__expr}" != "{ " ] && __expr="${__expr} . "
+
+		__start="$(eval format_"${f}" "${srcstart}")"
+		__end="$(eval format_"${f}" "${srcend}")"
+
+		if [ "${f}" = "proto" ]; then
+			__expr="${__expr}${__start}"
+		else
+			__expr="${__expr}${__start}-${__end}"
+		fi
+	done
+
+	if [ -n "${timeout}" ]; then
+		echo "${__expr} timeout ${timeout}s }"
+	else
+		echo "${__expr} }"
+	fi
+}
+
+# Format destination and source fields into nft type, start element only
+format_norange() {
+	__expr="{ "
+
+	for f in ${dst}; do
+		[ "${__expr}" != "{ " ] && __expr="${__expr} . "
+
+		__expr="${__expr}$(eval format_"${f}" "${start}")"
+	done
+	for f in ${src}; do
+		__expr="${__expr} . $(eval format_"${f}" "${start}")"
+	done
+
+	echo "${__expr} }"
+}
+
+# Format first destination field into nft type
+format_noconcat() {
+	for f in ${dst}; do
+		__start="$(eval format_"${f}" "${start}")"
+		__end="$(eval format_"${f}" "${end}")"
+
+		if [ "${f}" = "proto" ]; then
+			echo "{ ${__start} }"
+		else
+			echo "{ ${__start}-${__end} }"
+		fi
+		return
+	done
+}
+
+# Add single entry to 'test' set in 'inet filter' table
+add() {
+	if ! nft add element inet filter test "${1}"; then
+		err "Failed to add ${1} given ruleset:"
+		err "$(nft -a list ruleset)"
+		return 1
+	fi
+}
+
+# Format and output entries for sets in 'netdev perf' table
+add_perf() {
+	if [ "${1}" = "test" ]; then
+		echo "add element netdev perf test $(format)"
+	elif [ "${1}" = "norange" ]; then
+		echo "add element netdev perf norange $(format_norange)"
+	elif [ "${1}" = "noconcat" ]; then
+		echo "add element netdev perf noconcat $(format_noconcat)"
+	fi
+}
+
+# Add single entry to 'norange' set in 'netdev perf' table
+add_perf_norange() {
+	if ! nft add element netdev perf norange "${1}"; then
+		err "Failed to add ${1} given ruleset:"
+		err "$(nft -a list ruleset)"
+		return 1
+	fi
+}
+
+# Add single entry to 'noconcat' set in 'netdev perf' table
+add_perf_noconcat() {
+	if ! nft add element netdev perf noconcat "${1}"; then
+		err "Failed to add ${1} given ruleset:"
+		err "$(nft -a list ruleset)"
+		return 1
+	fi
+}
+
+# Delete single entry from set
+del() {
+	if ! nft delete element inet filter test "${1}"; then
+		err "Failed to delete ${1} given ruleset:"
+		err "$(nft -a list ruleset)"
+		return 1
+	fi
+}
+
+# Return packet count from 'test' counter in 'inet filter' table
+count_packets() {
+	found=0
+	for token in $(nft list counter inet filter test); do
+		[ ${found} -eq 1 ] && echo "${token}" && return
+		[ "${token}" = "packets" ] && found=1
+	done
+}
+
+# Return packet count from 'test' counter in 'netdev perf' table
+count_perf_packets() {
+	found=0
+	for token in $(nft list counter netdev perf test); do
+		[ ${found} -eq 1 ] && echo "${token}" && return
+		[ "${token}" = "packets" ] && found=1
+	done
+}
+
+# Set MAC addresses, send traffic according to specifier
+flood() {
+	ip link set veth_a address "$(format_mac "${1}")"
+	ip -n B link set veth_b address "$(format_mac "${2}")"
+
+	for f in ${dst}; do
+		eval dst_"$f"=\$\(format_\$f "${1}"\)
+	done
+	for f in ${src}; do
+		eval src_"$f"=\$\(format_\$f "${2}"\)
+	done
+	eval flood_\$proto
+}
+
+# Set MAC addresses, start pktgen injection
+perf() {
+	dst_mac="$(format_mac "${1}")"
+	ip link set veth_a address "${dst_mac}"
+
+	for f in ${dst}; do
+		eval dst_"$f"=\$\(format_\$f "${1}"\)
+	done
+	for f in ${src}; do
+		eval src_"$f"=\$\(format_\$f "${2}"\)
+	done
+	eval perf_\$perf_proto
+}
+
+# Set MAC addresses, send single packet, check that it matches, reset counter
+send_match() {
+	ip link set veth_a address "$(format_mac "${1}")"
+	ip -n B link set veth_b address "$(format_mac "${2}")"
+
+	for f in ${dst}; do
+		eval dst_"$f"=\$\(format_\$f "${1}"\)
+	done
+	for f in ${src}; do
+		eval src_"$f"=\$\(format_\$f "${2}"\)
+	done
+	eval send_\$proto
+	if [ "$(count_packets)" != "1" ]; then
+		err "${proto} packet to:"
+		err "  $(for f in ${dst}; do
+			 eval format_\$f "${1}"; printf ' '; done)"
+		err "from:"
+		err "  $(for f in ${src}; do
+			 eval format_\$f "${2}"; printf ' '; done)"
+		err "should have matched ruleset:"
+		err "$(nft -a list ruleset)"
+		return 1
+	fi
+	nft reset counter inet filter test >/dev/null
+}
+
+# Set MAC addresses, send single packet, check that it doesn't match
+send_nomatch() {
+	ip link set veth_a address "$(format_mac "${1}")"
+	ip -n B link set veth_b address "$(format_mac "${2}")"
+
+	for f in ${dst}; do
+		eval dst_"$f"=\$\(format_\$f "${1}"\)
+	done
+	for f in ${src}; do
+		eval src_"$f"=\$\(format_\$f "${2}"\)
+	done
+	eval send_\$proto
+	if [ "$(count_packets)" != "0" ]; then
+		err "${proto} packet to:"
+		err "  $(for f in ${dst}; do
+			 eval format_\$f "${1}"; printf ' '; done)"
+		err "from:"
+		err "  $(for f in ${src}; do
+			 eval format_\$f "${2}"; printf ' '; done)"
+		err "should not have matched ruleset:"
+		err "$(nft -a list ruleset)"
+		return 1
+	fi
+}
+
+# Correctness test template:
+# - add ranged element, check that packets match it
+# - check that packets outside range don't match it
+# - remove some elements, check that packets don't match anymore
+test_correctness() {
+	setup veth send_"${proto}" set || return ${ksft_skip}
+
+	range_size=1
+	for i in $(seq "${start}" $((start + count))); do
+		end=$((start + range_size))
+
+		# Avoid negative or zero-sized port ranges
+		if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+			start=${end}
+			end=$((end + 1))
+		fi
+		srcstart=$((start + src_delta))
+		srcend=$((end + src_delta))
+
+		add "$(format)" || return 1
+		for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
+			send_match "${j}" $((j + src_delta)) || return 1
+		done
+		send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
+
+		# Delete elements now and then
+		if [ $((i % 3)) -eq 0 ]; then
+			del "$(format)" || return 1
+			for j in $(seq "$start" \
+				   $((range_size / 2 + 1)) ${end}); do
+				send_nomatch "${j}" $((j + src_delta)) \
+					|| return 1
+			done
+		fi
+
+		range_size=$((range_size + 1))
+		start=$((end + range_size))
+	done
+}
+
+# Concurrency test template:
+# - add all the elements
+# - start a thread for each physical thread that:
+#   - adds all the elements
+#   - flushes the set
+#   - adds all the elements
+#   - flushes the entire ruleset
+#   - adds the set back
+#   - adds all the elements
+#   - delete all the elements
+test_concurrency() {
+	proto=${flood_proto}
+	tools=${flood_tools}
+	chain_spec=${flood_spec}
+	setup veth flood_"${proto}" set || return ${ksft_skip}
+
+	range_size=1
+	cstart=${start}
+	flood_pids=
+	for i in $(seq "$start" $((start + count))); do
+		end=$((start + range_size))
+		srcstart=$((start + src_delta))
+		srcend=$((end + src_delta))
+
+		add "$(format)" || return 1
+
+		flood "${i}" $((i + src_delta)) & flood_pids="${flood_pids} $!"
+
+		range_size=$((range_size + 1))
+		start=$((end + range_size))
+	done
+
+	sleep $((RANDOM%10))
+
+	pids=
+	for c in $(seq 1 "$(nproc)"); do (
+		for r in $(seq 1 "${race_repeat}"); do
+			range_size=1
+
+			# $start needs to be local to this subshell
+			# shellcheck disable=SC2030
+			start=${cstart}
+			for i in $(seq "$start" $((start + count))); do
+				end=$((start + range_size))
+				srcstart=$((start + src_delta))
+				srcend=$((end + src_delta))
+
+				add "$(format)" 2>/dev/null
+
+				range_size=$((range_size + 1))
+				start=$((end + range_size))
+			done
+
+			nft flush inet filter test 2>/dev/null
+
+			range_size=1
+			start=${cstart}
+			for i in $(seq "$start" $((start + count))); do
+				end=$((start + range_size))
+				srcstart=$((start + src_delta))
+				srcend=$((end + src_delta))
+
+				add "$(format)" 2>/dev/null
+
+				range_size=$((range_size + 1))
+				start=$((end + range_size))
+			done
+
+			nft flush ruleset
+			setup set 2>/dev/null
+
+			range_size=1
+			start=${cstart}
+			for i in $(seq "$start" $((start + count))); do
+				end=$((start + range_size))
+				srcstart=$((start + src_delta))
+				srcend=$((end + src_delta))
+
+				add "$(format)" 2>/dev/null
+
+				range_size=$((range_size + 1))
+				start=$((end + range_size))
+			done
+
+			range_size=1
+			start=${cstart}
+			for i in $(seq "$start" $((start + count))); do
+				end=$((start + range_size))
+				srcstart=$((start + src_delta))
+				srcend=$((end + src_delta))
+
+				del "$(format)" 2>/dev/null
+
+				range_size=$((range_size + 1))
+				start=$((end + range_size))
+			done
+		done
+	) & pids="${pids} $!"
+	done
+
+	# shellcheck disable=SC2046,SC2086 # word splitting wanted here
+	wait $(for pid in ${pids}; do echo ${pid}; done)
+	# shellcheck disable=SC2046,SC2086
+	kill $(for pid in ${flood_pids}; do echo ${pid}; done) 2>/dev/null
+	# shellcheck disable=SC2046,SC2086
+	wait $(for pid in ${flood_pids}; do echo ${pid}; done) 2>/dev/null
+
+	return 0
+}
+
+# Timeout test template:
+# - add all the elements with 3s timeout while checking that packets match
+# - wait 3s after the last insertion, check that packets don't match any entry
+test_timeout() {
+	setup veth send_"${proto}" set || return ${ksft_skip}
+
+	timeout=3
+
+	[ "$KSFT_MACHINE_SLOW" = "yes" ] && timeout=8
+
+	range_size=1
+	for i in $(seq "$start" $((start + count))); do
+		end=$((start + range_size))
+		srcstart=$((start + src_delta))
+		srcend=$((end + src_delta))
+
+		add "$(format)" || return 1
+
+		for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
+			send_match "${j}" $((j + src_delta)) || return 1
+		done
+
+		range_size=$((range_size + 1))
+		start=$((end + range_size))
+	done
+	sleep $timeout
+	for i in $(seq "$start" $((start + count))); do
+		end=$((start + range_size))
+		srcstart=$((start + src_delta))
+		srcend=$((end + src_delta))
+
+		for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
+			send_nomatch "${j}" $((j + src_delta)) || return 1
+		done
+
+		range_size=$((range_size + 1))
+		start=$((end + range_size))
+	done
+}
+
+# Performance test template:
+# - add concatenated ranged entries
+# - add non-ranged concatenated entries (for hash set matching rate baseline)
+# - add ranged entries with first field only (for rbhash baseline)
+# - start pktgen injection directly on device rx path of this namespace
+# - measure drop only rate, hash and rbtree baselines, then matching rate
+test_performance() {
+	chain_spec=${perf_spec}
+	dst="${perf_dst}"
+	src="${perf_src}"
+	setup veth perf set || return ${ksft_skip}
+
+	first=${start}
+	range_size=1
+	for set in test norange noconcat; do
+		start=${first}
+		for i in $(seq "$start" $((start + perf_entries))); do
+			end=$((start + range_size))
+			srcstart=$((start + src_delta))
+			srcend=$((end + src_delta))
+
+			if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+				start=${end}
+				end=$((end + 1))
+			elif [ "$start" -eq "$end" ]; then
+				end=$((start + 1))
+			fi
+
+			add_perf ${set}
+
+			start=$((end + range_size))
+		done > "${tmp}"
+		nft -f "${tmp}"
+	done
+
+	perf $((end - 1)) "$srcstart"
+
+	sleep 2
+
+	nft add rule netdev perf test counter name \"test\" drop
+	nft reset counter netdev perf test >/dev/null 2>&1
+	sleep "${perf_duration}"
+	pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))"
+	info "    baseline (drop from netdev hook):            ${pps}pps"
+	handle="$(nft -a list chain netdev perf test | grep counter)"
+	handle="${handle##* }"
+	nft delete rule netdev perf test handle "${handle}"
+
+	nft add rule "netdev perf test ${chain_spec} @norange \
+		counter name \"test\" drop"
+	nft reset counter netdev perf test >/dev/null 2>&1
+	sleep "${perf_duration}"
+	pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))"
+	info "    baseline hash (non-ranged entries):          ${pps}pps"
+	handle="$(nft -a list chain netdev perf test | grep counter)"
+	handle="${handle##* }"
+	nft delete rule netdev perf test handle "${handle}"
+
+	nft add rule "netdev perf test ${chain_spec%%. *} @noconcat \
+		counter name \"test\" drop"
+	nft reset counter netdev perf test >/dev/null 2>&1
+	sleep "${perf_duration}"
+	pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))"
+	info "    baseline rbtree (match on first field only): ${pps}pps"
+	handle="$(nft -a list chain netdev perf test | grep counter)"
+	handle="${handle##* }"
+	nft delete rule netdev perf test handle "${handle}"
+
+	nft add rule "netdev perf test ${chain_spec} @test \
+		counter name \"test\" drop"
+	nft reset counter netdev perf test >/dev/null 2>&1
+	sleep "${perf_duration}"
+	pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))"
+	p5="$(printf %5s "${perf_entries}")"
+	info "    set with ${p5} full, ranged entries:         ${pps}pps"
+	kill "${perf_pid}"
+}
+
+test_bug_flush_remove_add() {
+	rounds=100
+	[ "$KSFT_MACHINE_SLOW" = "yes" ] && rounds=10
+
+	set_cmd='{ set s { type ipv4_addr . inet_service; flags interval; }; }'
+	elem1='{ 10.0.0.1 . 22-25, 10.0.0.1 . 10-20 }'
+	elem2='{ 10.0.0.1 . 10-20, 10.0.0.1 . 22-25 }'
+	for i in $(seq 1 $rounds); do
+		nft add table t "$set_cmd"	|| return ${ksft_skip}
+		nft add element t s "$elem1"	2>/dev/null || return 1
+		nft flush set t s		2>/dev/null || return 1
+		nft add element t s "$elem2"	2>/dev/null || return 1
+	done
+	nft flush ruleset
+}
+
+# - add ranged element, check that packets match it
+# - reload the set, check packets still match
+test_bug_reload() {
+	setup veth send_"${proto}" set || return ${ksft_skip}
+	rstart=${start}
+
+	range_size=1
+	for i in $(seq "${start}" $((start + count))); do
+		end=$((start + range_size))
+
+		# Avoid negative or zero-sized port ranges
+		if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+			start=${end}
+			end=$((end + 1))
+		fi
+		srcstart=$((start + src_delta))
+		srcend=$((end + src_delta))
+
+		add "$(format)" || return 1
+		range_size=$((range_size + 1))
+		start=$((end + range_size))
+	done
+
+	# check kernel does allocate pcpu sctrach map
+	# for reload with no elemet add/delete
+	( echo flush set inet filter test ;
+	  nft list set inet filter test ) | nft -f -
+
+	start=${rstart}
+	range_size=1
+
+	for i in $(seq "${start}" $((start + count))); do
+		end=$((start + range_size))
+
+		# Avoid negative or zero-sized port ranges
+		if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+			start=${end}
+			end=$((end + 1))
+		fi
+		srcstart=$((start + src_delta))
+		srcend=$((end + src_delta))
+
+		for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
+			send_match "${j}" $((j + src_delta)) || return 1
+		done
+
+		range_size=$((range_size + 1))
+		start=$((end + range_size))
+	done
+
+	nft flush ruleset
+}
+
+test_reported_issues() {
+	eval test_bug_"${subtest}"
+}
+
+# Run everything in a separate network namespace
+[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
+tmp="$(mktemp)"
+trap cleanup_exit EXIT
+
+# Entry point for test runs
+passed=0
+for name in ${TESTS}; do
+	printf "TEST: %s\n" "$(echo "$name" | tr '_' ' ')"
+	if [ "${name}" = "reported_issues" ]; then
+		SUBTESTS="${BUGS}"
+	else
+		SUBTESTS="${TYPES}"
+	fi
+
+	for subtest in ${SUBTESTS}; do
+		eval desc=\$TYPE_"${subtest}"
+		IFS='
+'
+		for __line in ${desc}; do
+			# shellcheck disable=SC2086
+			eval ${__line%%	*}=\"${__line##*	}\";
+		done
+		IFS=' 	
+'
+
+		if [ "${name}" = "concurrency" ] && \
+		   [ "${race_repeat}" = "0" ]; then
+			continue
+		fi
+		if [ "${name}" = "performance" ] && \
+		   [ "${perf_duration}" = "0" ]; then
+			continue
+		fi
+
+		[ "$KSFT_MACHINE_SLOW" = "yes" ] && count=1
+
+		printf "  %-32s  " "${display}"
+		tthen=$(date +%s)
+		eval test_"${name}"
+		ret=$?
+
+		tnow=$(date +%s)
+		printf "%5ds%-30s" $((tnow-tthen))
+
+		if [ $ret -eq 0 ]; then
+			printf "[ OK ]\n"
+			info_flush
+			passed=$((passed + 1))
+		elif [ $ret -eq 1 ]; then
+			printf "[FAIL]\n"
+			err_flush
+			exit 1
+		elif [ $ret -eq ${ksft_skip} ]; then
+			printf "[SKIP]\n"
+			err_flush
+		fi
+	done
+done
+
+[ ${passed} -eq 0 ] && exit ${ksft_skip} || exit 0
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh
new file mode 100755
index 0000000000..5d276995a5
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+source lib.sh
+
+[ "$KSFT_MACHINE_SLOW" = yes ] && exit ${ksft_skip}
+
+NFT_CONCAT_RANGE_TESTS="performance" exec ./nft_concat_range.sh
diff --git a/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh
new file mode 100755
index 0000000000..abcaa73371
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+#
+# This tests connection tracking helper assignment:
+# 1. can attach ftp helper to a connection from nft ruleset.
+# 2. auto-assign still works.
+#
+# Kselftest framework requirement - SKIP code is 4.
+
+source lib.sh
+
+ret=0
+
+testipv6=1
+
+checktool "socat -h" "run test without socat"
+checktool "conntrack --version" "run test without conntrack"
+checktool "nft --version" "run test without nft"
+
+cleanup()
+{
+	ip netns pids "$ns1" | xargs kill 2>/dev/null
+
+	ip netns del "$ns1"
+	ip netns del "$ns2"
+}
+
+trap cleanup EXIT
+
+setup_ns ns1 ns2
+
+if ! ip link add veth0 netns "$ns1" type veth peer name veth0 netns "$ns2" > /dev/null 2>&1;then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+fi
+
+ip -net "$ns1" link set veth0 up
+ip -net "$ns2" link set veth0 up
+
+ip -net "$ns1" addr add 10.0.1.1/24 dev veth0
+ip -net "$ns1" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$ns2" addr add 10.0.1.2/24 dev veth0
+ip -net "$ns2" addr add dead:1::2/64 dev veth0 nodad
+
+load_ruleset_family() {
+	local family=$1
+	local ns=$2
+
+ip netns exec "$ns" nft -f - <<EOF
+table $family raw {
+	ct helper ftp {
+             type "ftp" protocol tcp
+        }
+	chain pre {
+		type filter hook prerouting priority 0; policy accept;
+		tcp dport 2121 ct helper set "ftp"
+	}
+	chain output {
+		type filter hook output priority 0; policy accept;
+		tcp dport 2121 ct helper set "ftp"
+	}
+}
+EOF
+	return $?
+}
+
+check_for_helper()
+{
+	local netns=$1
+	local message=$2
+	local port=$3
+
+	if echo "$message" |grep -q 'ipv6';then
+		local family="ipv6"
+	else
+		local family="ipv4"
+	fi
+
+	if ! ip netns exec "$netns" conntrack -L -f $family -p tcp --dport "$port" 2> /dev/null |grep -q 'helper=ftp';then
+		if [ "$autoassign" -eq 0 ] ;then
+			echo "FAIL: ${netns} did not show attached helper $message" 1>&2
+			ret=1
+		else
+			echo "PASS: ${netns} did not show attached helper $message" 1>&2
+		fi
+	else
+		if [ "$autoassign" -eq 0 ] ;then
+			echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2
+		else
+			echo "FAIL: ${netns} connection on port $port has ftp helper attached" 1>&2
+			ret=1
+		fi
+	fi
+
+	return 0
+}
+
+listener_ready()
+{
+	ns="$1"
+	port="$2"
+	proto="$3"
+	ss -N "$ns" -lnt -o "sport = :$port" | grep -q "$port"
+}
+
+test_helper()
+{
+	local port=$1
+	local autoassign=$2
+
+	if [ "$autoassign" -eq 0 ] ;then
+		msg="set via ruleset"
+	else
+		msg="auto-assign"
+	fi
+
+	ip netns exec "$ns2" socat -t 3 -u -4 TCP-LISTEN:"$port",reuseaddr STDOUT > /dev/null &
+	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" "$port" "-4"
+
+	ip netns exec "$ns1" socat -u -4 STDIN TCP:10.0.1.2:"$port" < /dev/null > /dev/null
+
+	check_for_helper "$ns1" "ip $msg" "$port" "$autoassign"
+	check_for_helper "$ns2" "ip $msg" "$port" "$autoassign"
+
+	if [ $testipv6 -eq 0 ] ;then
+		return 0
+	fi
+
+	ip netns exec "$ns1" conntrack -F 2> /dev/null
+	ip netns exec "$ns2" conntrack -F 2> /dev/null
+
+	ip netns exec "$ns2" socat -t 3 -u -6 TCP-LISTEN:"$port",reuseaddr STDOUT > /dev/null &
+	busywait $BUSYWAIT_TIMEOUT listener_ready "$ns2" "$port" "-6"
+
+	ip netns exec "$ns1" socat -t 3 -u -6 STDIN TCP:"[dead:1::2]":"$port" < /dev/null > /dev/null
+
+	check_for_helper "$ns1" "ipv6 $msg" "$port"
+	check_for_helper "$ns2" "ipv6 $msg" "$port"
+}
+
+if ! load_ruleset_family ip "$ns1"; then
+	echo "FAIL: ${ns1} cannot load ip ruleset" 1>&2
+	exit 1
+fi
+
+if ! load_ruleset_family ip6 "$ns1"; then
+	echo "SKIP: ${ns1} cannot load ip6 ruleset" 1>&2
+	testipv6=0
+fi
+
+if ! load_ruleset_family inet "${ns2}"; then
+	echo "SKIP: ${ns1} cannot load inet ruleset" 1>&2
+	if ! load_ruleset_family ip "${ns2}"; then
+		echo "FAIL: ${ns2} cannot load ip ruleset" 1>&2
+		exit 1
+	fi
+
+	if [ "$testipv6" -eq 1 ] ;then
+		if ! load_ruleset_family ip6 "$ns2"; then
+			echo "FAIL: ${ns2} cannot load ip6 ruleset" 1>&2
+			exit 1
+		fi
+	fi
+fi
+
+test_helper 2121 0
+ip netns exec "$ns1" sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+ip netns exec "$ns2" sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+test_helper 21 1
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh
new file mode 100755
index 0000000000..ce1451c275
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_fib.sh
@@ -0,0 +1,234 @@
+#!/bin/bash
+#
+# This tests the fib expression.
+#
+# Kselftest framework requirement - SKIP code is 4.
+
+source lib.sh
+
+ret=0
+
+timeout=4
+
+log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
+
+cleanup()
+{
+	cleanup_all_ns
+
+	[ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
+}
+
+checktool "nft --version" "run test without nft"
+
+setup_ns nsrouter ns1 ns2
+
+trap cleanup EXIT
+
+if dmesg | grep -q ' nft_rpfilter: ';then
+	dmesg -c | grep ' nft_rpfilter: '
+	echo "WARN: a previous test run has failed" 1>&2
+fi
+
+sysctl -q net.netfilter.nf_log_all_netns=1
+
+load_ruleset() {
+	local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+	chain prerouting {
+		type filter hook prerouting priority 0; policy accept;
+	        fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+	}
+}
+EOF
+}
+
+load_pbr_ruleset() {
+	local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+	chain forward {
+		type filter hook forward priority raw;
+		fib saddr . iif oif gt 0 accept
+		log drop
+	}
+}
+EOF
+}
+
+load_ruleset_count() {
+	local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+	chain prerouting {
+		type filter hook prerouting priority 0; policy accept;
+		ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop
+		ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop
+	}
+}
+EOF
+}
+
+check_drops() {
+	if dmesg | grep -q ' nft_rpfilter: ';then
+		dmesg | grep ' nft_rpfilter: '
+		echo "FAIL: rpfilter did drop packets"
+		return 1
+	fi
+
+	return 0
+}
+
+check_fib_counter() {
+	local want=$1
+	local ns=$2
+	local address=$3
+
+	if ! ip netns exec "$ns" nft list table inet filter | grep 'fib saddr . iif' | grep "$address" | grep -q "packets $want";then
+		echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2
+		ip netns exec "$ns" nft list table inet filter
+		return 1
+	fi
+
+	if [ "$want" -gt 0 ]; then
+		echo "PASS: fib expression did drop packets for $address"
+	fi
+
+	return 0
+}
+
+load_ruleset "$nsrouter"
+load_ruleset "$ns1"
+load_ruleset "$ns2"
+
+if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+fi
+ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
+
+ip -net "$nsrouter" link set veth0 up
+ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$nsrouter" link set veth1 up
+ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
+ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
+
+ip -net "$ns1" link set eth0 up
+ip -net "$ns2" link set eth0 up
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" route add default via dead:1::1
+
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns2" route add default via dead:2::1
+
+test_ping() {
+  local daddr4=$1
+  local daddr6=$2
+
+  if ! ip netns exec "$ns1" ping -c 1 -q "$daddr4" > /dev/null; then
+	check_drops
+	echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2
+	return 1
+  fi
+
+  if ! ip netns exec "$ns1" ping -c 1 -q "$daddr6" > /dev/null; then
+	check_drops
+	echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2
+	return 1
+  fi
+
+  return 0
+}
+
+ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
+
+test_ping 10.0.2.1 dead:2::1 || exit 1
+check_drops || exit 1
+
+test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not cause unwanted packet drops"
+
+ip netns exec "$nsrouter" nft flush table inet filter
+
+ip -net "$ns1" route del default
+ip -net "$ns1" -6 route del default
+
+ip -net "$ns1" addr del 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr del dead:1::99/64 dev eth0
+
+ip -net "$ns1" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns1" addr add dead:2::99/64 dev eth0 nodad
+
+ip -net "$ns1" route add default via 10.0.2.1
+ip -net "$ns1" -6 route add default via dead:2::1
+
+ip -net "$nsrouter" addr add dead:2::1/64 dev veth0 nodad
+
+# switch to ruleset that doesn't log, this time
+# its expected that this does drop the packets.
+load_ruleset_count "$nsrouter"
+
+# ns1 has a default route, but nsrouter does not.
+# must not check return value, ping to 1.1.1.1 will
+# fail.
+check_fib_counter 0 "$nsrouter" 1.1.1.1 || exit 1
+check_fib_counter 0 "$nsrouter" 1c3::c01d || exit 1
+
+ip netns exec "$ns1" ping -W 0.5 -c 1 -q 1.1.1.1 > /dev/null
+check_fib_counter 1 "$nsrouter" 1.1.1.1 || exit 1
+
+ip netns exec "$ns1" ping -W 0.5 -i 0.1 -c 3 -q 1c3::c01d > /dev/null
+check_fib_counter 3 "$nsrouter" 1c3::c01d || exit 1
+
+# delete all rules
+ip netns exec "$ns1" nft flush ruleset
+ip netns exec "$ns2" nft flush ruleset
+ip netns exec "$nsrouter" nft flush ruleset
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+
+ip -net "$ns1" addr del 10.0.2.99/24 dev eth0
+ip -net "$ns1" addr del dead:2::99/64 dev eth0
+
+ip -net "$nsrouter" addr del dead:2::1/64 dev veth0
+
+# ... pbr ruleset for the router, check iif+oif.
+if ! load_pbr_ruleset "$nsrouter";then
+	echo "SKIP: Could not load fib forward ruleset"
+	exit $ksft_skip
+fi
+
+ip -net "$nsrouter" rule add from all table 128
+ip -net "$nsrouter" rule add from all iif veth0 table 129
+ip -net "$nsrouter" route add table 128 to 10.0.1.0/24 dev veth0
+ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1
+
+# drop main ipv4 table
+ip -net "$nsrouter" -4 rule delete table main
+
+if ! test_ping 10.0.2.99 dead:2::99;then
+	ip -net "$nsrouter" nft list ruleset
+	echo "FAIL: fib mismatch in pbr setup"
+	exit 1
+fi
+
+echo "PASS: fib expression forward check with policy based routing"
+exit 0
diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
new file mode 100755
index 0000000000..b399555085
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -0,0 +1,671 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This tests basic flowtable functionality.
+# Creates following default topology:
+#
+# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
+# Router1 is the one doing flow offloading, Router2 has no special
+# purpose other than having a link that is smaller than either Originator
+# and responder, i.e. TCPMSS announced values are too large and will still
+# result in fragmentation and/or PMTU discovery.
+#
+# You can check with different Orgininator/Link/Responder MTU eg:
+# nft_flowtable.sh -o8000 -l1500 -r2000
+#
+
+source lib.sh
+
+ret=0
+SOCAT_TIMEOUT=60
+
+nsin=""
+ns1out=""
+ns2out=""
+
+log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
+
+checktool "nft --version" "run test without nft tool"
+checktool "socat -h" "run test without socat"
+
+setup_ns ns1 ns2 nsr1 nsr2
+
+cleanup() {
+	ip netns pids "$ns1" | xargs kill 2>/dev/null
+	ip netns pids "$ns2" | xargs kill 2>/dev/null
+
+	cleanup_all_ns
+
+	rm -f "$nsin" "$ns1out" "$ns2out"
+
+	[ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns"
+}
+
+trap cleanup EXIT
+
+sysctl -q net.netfilter.nf_log_all_netns=1
+
+ip link add veth0 netns "$nsr1" type veth peer name eth0 netns "$ns1"
+ip link add veth1 netns "$nsr1" type veth peer name veth0 netns "$nsr2"
+
+ip link add veth1 netns "$nsr2" type veth peer name eth0 netns "$ns2"
+
+for dev in veth0 veth1; do
+    ip -net "$nsr1" link set "$dev" up
+    ip -net "$nsr2" link set "$dev" up
+done
+
+ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$nsr2" addr add 10.0.2.1/24 dev veth1
+ip -net "$nsr2" addr add dead:2::1/64 dev veth1 nodad
+
+# set different MTUs so we need to push packets coming from ns1 (large MTU)
+# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
+# or to do PTMU discovery (send ICMP error back to originator).
+# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
+# is NOT the lowest link mtu.
+
+omtu=9000
+lmtu=1500
+rmtu=2000
+
+usage(){
+	echo "nft_flowtable.sh [OPTIONS]"
+	echo
+	echo "MTU options"
+	echo "   -o originator"
+	echo "   -l link"
+	echo "   -r responder"
+	exit 1
+}
+
+while getopts "o:l:r:" o
+do
+	case $o in
+		o) omtu=$OPTARG;;
+		l) lmtu=$OPTARG;;
+		r) rmtu=$OPTARG;;
+		*) usage;;
+	esac
+done
+
+if ! ip -net "$nsr1" link set veth0 mtu "$omtu"; then
+	exit 1
+fi
+
+ip -net "$ns1" link set eth0 mtu "$omtu"
+
+if ! ip -net "$nsr2" link set veth1 mtu "$rmtu"; then
+	exit 1
+fi
+
+if ! ip -net "$nsr1" link set veth1 mtu "$lmtu"; then
+	exit 1
+fi
+
+if ! ip -net "$nsr2" link set veth0 mtu "$lmtu"; then
+	exit 1
+fi
+
+ip -net "$ns2" link set eth0 mtu "$rmtu"
+
+# transfer-net between nsr1 and nsr2.
+# these addresses are not used for connections.
+ip -net "$nsr1" addr add 192.168.10.1/24 dev veth1
+ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad
+
+ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0
+ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad
+
+for i in 0 1; do
+  ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+  ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+done
+
+for ns in "$ns1" "$ns2";do
+  ip -net "$ns" link set eth0 up
+
+  if ! ip netns exec "$ns" sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
+	echo "ERROR: Check Originator/Responder values (problem during address addition)"
+	exit 1
+  fi
+  # don't set ip DF bit for first two tests
+  ip netns exec "$ns" sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
+done
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via dead:1::1
+ip -net "$ns2" route add default via dead:2::1
+
+ip -net "$nsr1" route add default via 192.168.10.2
+ip -net "$nsr2" route add default via 192.168.10.1
+
+ip netns exec "$nsr1" nft -f - <<EOF
+table inet filter {
+  flowtable f1 {
+     hook ingress priority 0
+     devices = { veth0, veth1 }
+   }
+
+   counter routed_orig { }
+   counter routed_repl { }
+
+   chain forward {
+      type filter hook forward priority 0; policy drop;
+
+      # flow offloaded? Tag ct with mark 1, so we can detect when it fails.
+      meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept
+
+      # count packets supposedly offloaded as per direction.
+      ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept
+
+      ct state established,related accept
+
+      meta nfproto ipv4 meta l4proto icmp accept
+      meta nfproto ipv6 meta l4proto icmpv6 accept
+   }
+}
+EOF
+
+if [ $? -ne 0 ]; then
+	echo "SKIP: Could not load nft ruleset"
+	exit $ksft_skip
+fi
+
+ip netns exec "$ns2" nft -f - <<EOF
+table inet filter {
+   counter ip4dscp0 { }
+   counter ip4dscp3 { }
+
+   chain input {
+      type filter hook input priority 0; policy accept;
+      meta l4proto tcp goto {
+	      ip dscp cs3 counter name ip4dscp3 accept
+	      ip dscp 0 counter name ip4dscp0 accept
+      }
+   }
+}
+EOF
+
+if [ $? -ne 0 ]; then
+	echo -n "SKIP: Could not load ruleset: "
+	nft --version
+	exit $ksft_skip
+fi
+
+# test basic connectivity
+if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
+  echo "ERROR: $ns1 cannot reach ns2" 1>&2
+  exit 1
+fi
+
+if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
+  echo "ERROR: $ns2 cannot reach $ns1" 1>&2
+  exit 1
+fi
+
+nsin=$(mktemp)
+ns1out=$(mktemp)
+ns2out=$(mktemp)
+
+make_file()
+{
+	name=$1
+
+	SIZE=$((RANDOM % (1024 * 128)))
+	SIZE=$((SIZE + (1024 * 8)))
+	TSIZE=$((SIZE * 1024))
+
+	dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
+
+	SIZE=$((RANDOM % 1024))
+	SIZE=$((SIZE + 128))
+	TSIZE=$((TSIZE + SIZE))
+	dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
+}
+
+check_counters()
+{
+	local what=$1
+	local ok=1
+
+	local orig repl
+	orig=$(ip netns exec "$nsr1" nft reset counter inet filter routed_orig | grep packets)
+	repl=$(ip netns exec "$nsr1" nft reset counter inet filter routed_repl | grep packets)
+
+	local orig_cnt=${orig#*bytes}
+	local repl_cnt=${repl#*bytes}
+
+	local fs
+	fs=$(du -sb "$nsin")
+	local max_orig=${fs%%/*}
+	local max_repl=$((max_orig/4))
+
+	# flowtable fastpath should bypass normal routing one, i.e. the counters in forward hook
+	# should always be lower than the size of the transmitted file (max_orig).
+	if [ "$orig_cnt" -gt "$max_orig" ];then
+		echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2
+		ret=1
+		ok=0
+	fi
+
+	if [ "$repl_cnt" -gt $max_repl ];then
+		echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2
+		ret=1
+		ok=0
+	fi
+
+	if [ $ok -eq 1 ]; then
+		echo "PASS: $what"
+	fi
+}
+
+check_dscp()
+{
+	local what=$1
+	local ok=1
+
+	local counter
+	counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp3 | grep packets)
+
+	local pc4=${counter%*bytes*}
+	local pc4=${pc4#*packets}
+
+	counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp0 | grep packets)
+	local pc4z=${counter%*bytes*}
+	local pc4z=${pc4z#*packets}
+
+	case "$what" in
+	"dscp_none")
+		if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then
+			echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
+			ret=1
+			ok=0
+		fi
+		;;
+	"dscp_fwd")
+		if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then
+			echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
+			ret=1
+			ok=0
+		fi
+		;;
+	"dscp_ingress")
+		if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
+			echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+			ret=1
+			ok=0
+		fi
+		;;
+	"dscp_egress")
+		if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
+			echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+			ret=1
+			ok=0
+		fi
+		;;
+	*)
+		echo "FAIL: Unknown DSCP check" 1>&2
+		ret=1
+		ok=0
+	esac
+
+	if [ "$ok" -eq 1 ] ;then
+		echo "PASS: $what: dscp packet counters match"
+	fi
+}
+
+check_transfer()
+{
+	in=$1
+	out=$2
+	what=$3
+
+	if ! cmp "$in" "$out" > /dev/null 2>&1; then
+		echo "FAIL: file mismatch for $what" 1>&2
+		ls -l "$in"
+		ls -l "$out"
+		return 1
+	fi
+
+	return 0
+}
+
+listener_ready()
+{
+	ss -N "$nsb" -lnt -o "sport = :12345" | grep -q 12345
+}
+
+test_tcp_forwarding_ip()
+{
+	local nsa=$1
+	local nsb=$2
+	local dstip=$3
+	local dstport=$4
+	local lret=0
+
+	timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" &
+	lpid=$!
+
+	busywait 1000 listener_ready
+
+	timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out"
+
+	wait $lpid
+
+	if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
+		lret=1
+		ret=1
+	fi
+
+	if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
+		lret=1
+		ret=1
+	fi
+
+	return $lret
+}
+
+test_tcp_forwarding()
+{
+	test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+
+	return $?
+}
+
+test_tcp_forwarding_set_dscp()
+{
+	check_dscp "dscp_none"
+
+ip netns exec "$nsr1" nft -f - <<EOF
+table netdev dscpmangle {
+   chain setdscp0 {
+      type filter hook ingress device "veth0" priority 0; policy accept
+	ip dscp set cs3
+  }
+}
+EOF
+if [ $? -eq 0 ]; then
+	test_tcp_forwarding_ip "$1" "$2"  10.0.2.99 12345
+	check_dscp "dscp_ingress"
+
+	ip netns exec "$nsr1" nft delete table netdev dscpmangle
+else
+	echo "SKIP: Could not load netdev:ingress for veth0"
+fi
+
+ip netns exec "$nsr1" nft -f - <<EOF
+table netdev dscpmangle {
+   chain setdscp0 {
+      type filter hook egress device "veth1" priority 0; policy accept
+      ip dscp set cs3
+  }
+}
+EOF
+if [ $? -eq 0 ]; then
+	test_tcp_forwarding_ip "$1" "$2"  10.0.2.99 12345
+	check_dscp "dscp_egress"
+
+	ip netns exec "$nsr1" nft flush table netdev dscpmangle
+else
+	echo "SKIP: Could not load netdev:egress for veth1"
+fi
+
+	# partial.  If flowtable really works, then both dscp-is-0 and dscp-is-cs3
+	# counters should have seen packets (before and after ft offload kicks in).
+	ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
+	test_tcp_forwarding_ip "$1" "$2"  10.0.2.99 12345
+	check_dscp "dscp_fwd"
+}
+
+test_tcp_forwarding_nat()
+{
+	local lret
+	local pmtu
+
+	test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+	lret=$?
+
+	pmtu=$3
+	what=$4
+
+	if [ "$lret" -eq 0 ] ; then
+		if [ "$pmtu" -eq 1 ] ;then
+			check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
+		else
+			echo "PASS: flow offload for ns1/ns2 with masquerade $what"
+		fi
+
+		test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
+		lret=$?
+		if [ "$pmtu" -eq 1 ] ;then
+			check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
+		elif [ "$lret" -eq 0 ] ; then
+			echo "PASS: flow offload for ns1/ns2 with dnat $what"
+		fi
+	fi
+
+	return $lret
+}
+
+make_file "$nsin"
+
+# First test:
+# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
+# Due to MTU mismatch in both directions, all packets (except small packets like pure
+# acks) have to be handled by normal forwarding path.  Therefore, packet counters
+# are not checked.
+if test_tcp_forwarding "$ns1" "$ns2"; then
+	echo "PASS: flow offloaded for ns1/ns2"
+else
+	echo "FAIL: flow offload for ns1/ns2:" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
+# delete default route, i.e. ns2 won't be able to reach ns1 and
+# will depend on ns1 being masqueraded in nsr1.
+# expect ns1 has nsr1 address.
+ip -net "$ns2" route del default via 10.0.2.1
+ip -net "$ns2" route del default via dead:2::1
+ip -net "$ns2" route add 192.168.10.1 via 10.0.2.1
+
+# Second test:
+# Same, but with NAT enabled.  Same as in first test: we expect normal forward path
+# to handle most packets.
+ip netns exec "$nsr1" nft -f - <<EOF
+table ip nat {
+   chain prerouting {
+      type nat hook prerouting priority 0; policy accept;
+      meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
+   }
+
+   chain postrouting {
+      type nat hook postrouting priority 0; policy accept;
+      meta oifname "veth1" counter masquerade
+   }
+}
+EOF
+
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then
+	echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2
+	exit 0
+fi
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 0 ""; then
+	echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
+# Third test:
+# Same as second test, but with PMTU discovery enabled. This
+# means that we expect the fastpath to handle packets as soon
+# as the endpoints adjust the packet size.
+ip netns exec "$ns1" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+
+# reset counters.
+# With pmtu in-place we'll also check that nft counters
+# are lower than file size and packets were forwarded via flowtable layer.
+# For earlier tests (large mtus), packets cannot be handled via flowtable
+# (except pure acks and other small packets).
+ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
+	echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+fi
+
+# Another test:
+# Add bridge interface br0 to Router1, with NAT enabled.
+test_bridge() {
+if ! ip -net "$nsr1" link add name br0 type bridge 2>/dev/null;then
+	echo "SKIP: could not add bridge br0"
+	[ "$ret" -eq 0 ] && ret=$ksft_skip
+	return
+fi
+ip -net "$nsr1" addr flush dev veth0
+ip -net "$nsr1" link set up dev veth0
+ip -net "$nsr1" link set veth0 master br0
+ip -net "$nsr1" addr add 10.0.1.1/24 dev br0
+ip -net "$nsr1" addr add dead:1::1/64 dev br0 nodad
+ip -net "$nsr1" link set up dev br0
+
+ip netns exec "$nsr1" sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
+
+# br0 with NAT enabled.
+ip netns exec "$nsr1" nft -f - <<EOF
+flush table ip nat
+table ip nat {
+   chain prerouting {
+      type nat hook prerouting priority 0; policy accept;
+      meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
+   }
+
+   chain postrouting {
+      type nat hook postrouting priority 0; policy accept;
+      meta oifname "veth1" counter masquerade
+   }
+}
+EOF
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "on bridge"; then
+	echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
+
+# Another test:
+# Add bridge interface br0 to Router1, with NAT and VLAN.
+ip -net "$nsr1" link set veth0 nomaster
+ip -net "$nsr1" link set down dev veth0
+ip -net "$nsr1" link add link veth0 name veth0.10 type vlan id 10
+ip -net "$nsr1" link set up dev veth0
+ip -net "$nsr1" link set up dev veth0.10
+ip -net "$nsr1" link set veth0.10 master br0
+
+ip -net "$ns1" addr flush dev eth0
+ip -net "$ns1" link add link eth0 name eth0.10 type vlan id 10
+ip -net "$ns1" link set eth0 up
+ip -net "$ns1" link set eth0.10 up
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0.10
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" addr add dead:1::99/64 dev eth0.10 nodad
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "bridge and VLAN"; then
+	echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
+	ip netns exec "$nsr1" nft list ruleset
+	ret=1
+fi
+
+# restore test topology (remove bridge and VLAN)
+ip -net "$nsr1" link set veth0 nomaster
+ip -net "$nsr1" link set veth0 down
+ip -net "$nsr1" link set veth0.10 down
+ip -net "$nsr1" link delete veth0.10 type vlan
+ip -net "$nsr1" link delete br0 type bridge
+ip -net "$ns1" addr flush dev eth0.10
+ip -net "$ns1" link set eth0.10 down
+ip -net "$ns1" link set eth0 down
+ip -net "$ns1" link delete eth0.10 type vlan
+
+# restore address in ns1 and nsr1
+ip -net "$ns1" link set eth0 up
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via dead:1::1
+ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
+ip -net "$nsr1" link set up dev veth0
+}
+
+test_bridge
+
+KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1)
+KEY_AES="0x"$(ps -af | md5sum | cut -d " " -f 1)
+SPI1=$RANDOM
+SPI2=$RANDOM
+
+if [ $SPI1 -eq $SPI2 ]; then
+	SPI2=$((SPI2+1))
+fi
+
+do_esp() {
+    local ns=$1
+    local me=$2
+    local remote=$3
+    local lnet=$4
+    local rnet=$5
+    local spi_out=$6
+    local spi_in=$7
+
+    ip -net "$ns" xfrm state add src "$remote" dst "$me" proto esp spi "$spi_in"  enc aes "$KEY_AES"  auth sha1 "$KEY_SHA" mode tunnel sel src "$rnet" dst "$lnet"
+    ip -net "$ns" xfrm state add src "$me"  dst "$remote" proto esp spi "$spi_out" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$lnet" dst "$rnet"
+
+    # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
+    ip -net "$ns" xfrm policy add src "$lnet" dst "$rnet" dir out tmpl src "$me" dst "$remote" proto esp mode tunnel priority 1 action allow
+    # to fwd decrypted packets after esp processing:
+    ip -net "$ns" xfrm policy add src "$rnet" dst "$lnet" dir fwd tmpl src "$remote" dst "$me" proto esp mode tunnel priority 1 action allow
+}
+
+do_esp "$nsr1" 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 "$SPI1" "$SPI2"
+
+do_esp "$nsr2" 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 "$SPI2" "$SPI1"
+
+ip netns exec "$nsr1" nft delete table ip nat
+
+# restore default routes
+ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns2" route add default via dead:2::1
+
+if test_tcp_forwarding "$ns1" "$ns2"; then
+	check_counters "ipsec tunnel mode for ns1/ns2"
+else
+	echo "FAIL: ipsec tunnel mode for ns1/ns2"
+	ip netns exec "$nsr1" nft list ruleset 1>&2
+	ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
+fi
+
+if [ "$1" = "" ]; then
+	low=1280
+	mtu=$((65536 - low))
+	o=$(((RANDOM%mtu) + low))
+	l=$(((RANDOM%mtu) + low))
+	r=$(((RANDOM%mtu) + low))
+
+	echo "re-run with random mtus: -o $o -l $l -r $r"
+	$0 -o "$o" -l "$l" -r "$r"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_meta.sh b/tools/testing/selftests/net/netfilter/nft_meta.sh
new file mode 100755
index 0000000000..71505b6cb2
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_meta.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+# check iif/iifname/oifgroup/iiftype match.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+
+if ! nft --version > /dev/null 2>&1; then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+cleanup()
+{
+	ip netns del "$ns0"
+}
+
+ip netns add "$ns0"
+ip -net "$ns0" link set lo up
+ip -net "$ns0" addr add 127.0.0.1 dev lo
+
+trap cleanup EXIT
+
+currentyear=$(date +%Y)
+lastyear=$((currentyear-1))
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table inet filter {
+	counter iifcount {}
+	counter iifnamecount {}
+	counter iifgroupcount {}
+	counter iiftypecount {}
+	counter infproto4count {}
+	counter il4protocounter {}
+	counter imarkcounter {}
+	counter icpu0counter {}
+	counter ilastyearcounter {}
+	counter icurrentyearcounter {}
+
+	counter oifcount {}
+	counter oifnamecount {}
+	counter oifgroupcount {}
+	counter oiftypecount {}
+	counter onfproto4count {}
+	counter ol4protocounter {}
+	counter oskuidcounter {}
+	counter oskgidcounter {}
+	counter omarkcounter {}
+
+	chain input {
+		type filter hook input priority 0; policy accept;
+
+		meta iif lo counter name "iifcount"
+		meta iifname "lo" counter name "iifnamecount"
+		meta iifgroup "default" counter name "iifgroupcount"
+		meta iiftype "loopback" counter name "iiftypecount"
+		meta nfproto ipv4 counter name "infproto4count"
+		meta l4proto icmp counter name "il4protocounter"
+		meta mark 42 counter name "imarkcounter"
+		meta cpu 0 counter name "icpu0counter"
+		meta time "$lastyear-01-01" - "$lastyear-12-31" counter name ilastyearcounter
+		meta time "$currentyear-01-01" - "$currentyear-12-31" counter name icurrentyearcounter
+	}
+
+	chain output {
+		type filter hook output priority 0; policy accept;
+		meta oif lo counter name "oifcount" counter
+		meta oifname "lo" counter name "oifnamecount"
+		meta oifgroup "default" counter name "oifgroupcount"
+		meta oiftype "loopback" counter name "oiftypecount"
+		meta nfproto ipv4 counter name "onfproto4count"
+		meta l4proto icmp counter name "ol4protocounter"
+		meta skuid 0 counter name "oskuidcounter"
+		meta skgid 0 counter name "oskgidcounter"
+		meta mark 42 counter name "omarkcounter"
+	}
+}
+EOF
+
+if [ $? -ne 0 ]; then
+	echo "SKIP: Could not add test ruleset"
+	exit $ksft_skip
+fi
+
+ret=0
+
+check_one_counter()
+{
+	local cname="$1"
+	local want="packets $2"
+	local verbose="$3"
+
+	if ! ip netns exec "$ns0" nft list counter inet filter "$cname" | grep -q "$want"; then
+		echo "FAIL: $cname, want \"$want\", got"
+		ret=1
+		ip netns exec "$ns0" nft list counter inet filter "$cname"
+	fi
+}
+
+check_lo_counters()
+{
+	local want="$1"
+	local verbose="$2"
+	local counter
+
+	for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \
+		       oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \
+		       il4protocounter icurrentyearcounter ol4protocounter \
+	     ; do
+		check_one_counter "$counter" "$want" "$verbose"
+	done
+}
+
+check_lo_counters "0" false
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 -m 42 > /dev/null
+
+check_lo_counters "2" true
+
+check_one_counter oskuidcounter "1" true
+check_one_counter oskgidcounter "1" true
+check_one_counter imarkcounter "1" true
+check_one_counter omarkcounter "1" true
+check_one_counter ilastyearcounter "0" true
+
+if [ $ret -eq 0 ];then
+	echo "OK: nftables meta iif/oif counters at expected values"
+else
+	exit $ret
+fi
+
+#First CPU execution and counter
+taskset -p 01 $$ > /dev/null
+ip netns exec "$ns0" nft reset counters > /dev/null
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null
+check_one_counter icpu0counter "2" true
+
+if [ $ret -eq 0 ];then
+	echo "OK: nftables meta cpu counter at expected values"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh
new file mode 100755
index 0000000000..9e39de2645
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_nat.sh
@@ -0,0 +1,1156 @@
+#!/bin/bash
+#
+# This test is for basic NAT functionality: snat, dnat, redirect, masquerade.
+#
+
+source lib.sh
+
+ret=0
+test_inet_nat=true
+
+checktool "nft --version" "run test without nft tool"
+checktool "socat -h" "run test without socat"
+
+cleanup()
+{
+	ip netns pids "$ns0" | xargs kill 2>/dev/null
+	ip netns pids "$ns1" | xargs kill 2>/dev/null
+	ip netns pids "$ns2" | xargs kill 2>/dev/null
+
+	rm -f "$INFILE" "$OUTFILE"
+
+	cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+INFILE=$(mktemp)
+OUTFILE=$(mktemp)
+
+setup_ns ns0 ns1 ns2
+
+if ! ip link add veth0 netns "$ns0" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1;then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+fi
+ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns2"
+
+ip -net "$ns0" link set veth0 up
+ip -net "$ns0" addr add 10.0.1.1/24 dev veth0
+ip -net "$ns0" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$ns0" link set veth1 up
+ip -net "$ns0" addr add 10.0.2.1/24 dev veth1
+ip -net "$ns0" addr add dead:2::1/64 dev veth1 nodad
+
+do_config()
+{
+	ns="$1"
+	subnet="$2"
+
+	ip -net "$ns" link set eth0 up
+	ip -net "$ns" addr add "10.0.$subnet.99/24" dev eth0
+	ip -net "$ns" route add default via "10.0.$subnet.1"
+	ip -net "$ns" addr add "dead:$subnet::99/64" dev eth0 nodad
+	ip -net "$ns" route add default via "dead:$subnet::1"
+}
+
+do_config "$ns1" 1
+do_config "$ns2" 2
+
+bad_counter()
+{
+	local ns=$1
+	local counter=$2
+	local expect=$3
+	local tag=$4
+
+	echo "ERROR: $counter counter in $ns has unexpected value (expected $expect) at $tag" 1>&2
+	ip netns exec "$ns" nft list counter inet filter "$counter" 1>&2
+}
+
+check_counters()
+{
+	ns=$1
+	local lret=0
+
+	if ! ip netns exec "$ns" nft list counter inet filter ns0in | grep -q "packets 1 bytes 84";then
+		bad_counter "$ns" ns0in "packets 1 bytes 84" "check_counters 1"
+		lret=1
+	fi
+
+	if ! ip netns exec "$ns" nft list counter inet filter ns0out | grep -q "packets 1 bytes 84";then
+		bad_counter "$ns" ns0out "packets 1 bytes 84" "check_counters 2"
+		lret=1
+	fi
+
+	expect="packets 1 bytes 104"
+	if ! ip netns exec "$ns" nft list counter inet filter ns0in6 | grep -q "$expect";then
+		bad_counter "$ns" ns0in6 "$expect" "check_counters 3"
+		lret=1
+	fi
+	if ! ip netns exec "$ns" nft list counter inet filter ns0out6 | grep -q "$expect";then
+		bad_counter "$ns" ns0out6 "$expect" "check_counters 4"
+		lret=1
+	fi
+
+	return $lret
+}
+
+check_ns0_counters()
+{
+	local ns=$1
+	local lret=0
+
+	if ! ip netns exec "$ns0" nft list counter inet filter ns0in | grep -q "packets 0 bytes 0";then
+		bad_counter "$ns0" ns0in "packets 0 bytes 0" "check_ns0_counters 1"
+		lret=1
+	fi
+
+	if ! ip netns exec "$ns0" nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0";then
+		bad_counter "$ns0" ns0in6 "packets 0 bytes 0"
+		lret=1
+	fi
+
+	if ! ip netns exec "$ns0" nft list counter inet filter ns0out | grep -q "packets 0 bytes 0";then
+		bad_counter "$ns0" ns0out "packets 0 bytes 0" "check_ns0_counters 2"
+		lret=1
+	fi
+	if ! ip netns exec "$ns0" nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0";then
+		bad_counter "$ns0" ns0out6 "packets 0 bytes 0" "check_ns0_counters3 "
+		lret=1
+	fi
+
+	for dir in "in" "out" ; do
+		expect="packets 1 bytes 84"
+		if ! ip netns exec "$ns0" nft list counter inet filter "${ns}${dir}" | grep -q "$expect";then
+			bad_counter "$ns0" "$ns${dir}" "$expect" "check_ns0_counters 4"
+			lret=1
+		fi
+
+		expect="packets 1 bytes 104"
+		if ! ip netns exec "$ns0" nft list counter inet filter "${ns}${dir}6" | grep -q "$expect";then
+			bad_counter "$ns0" "$ns${dir}6" "$expect" "check_ns0_counters 5"
+			lret=1
+		fi
+	done
+
+	return $lret
+}
+
+reset_counters()
+{
+	for i in "$ns0" "$ns1" "$ns2" ;do
+		ip netns exec "$i" nft reset counters inet > /dev/null
+	done
+}
+
+test_local_dnat6()
+{
+	local family=$1
+	local lret=0
+	local IPF=""
+
+	if [ "$family" = "inet" ];then
+		IPF="ip6"
+	fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+	chain output {
+		type nat hook output priority 0; policy accept;
+		ip6 daddr dead:1::99 dnat $IPF to dead:2::99
+	}
+}
+EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add add $family dnat hook"
+		return $ksft_skip
+	fi
+
+	# ping netns1, expect rewrite to netns2
+	if ! ip netns exec "$ns0" ping -q -c 1 dead:1::99 > /dev/null;then
+		lret=1
+		echo "ERROR: ping6 failed"
+		return $lret
+	fi
+
+	expect="packets 0 bytes 0"
+	for dir in "in6" "out6" ; do
+		if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+			bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat6 1"
+			lret=1
+		fi
+	done
+
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+			bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat6 2"
+			lret=1
+		fi
+	done
+
+	# expect 0 count in ns1
+	expect="packets 0 bytes 0"
+	for dir in "in6" "out6" ; do
+		if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
+			bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat6 3"
+			lret=1
+		fi
+	done
+
+	# expect 1 packet in ns2
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		if ! ip netns exec "$ns2" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
+			bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat6 4"
+			lret=1
+		fi
+	done
+
+	test $lret -eq 0 && echo "PASS: ipv6 ping to $ns1 was $family NATted to $ns2"
+	ip netns exec "$ns0" nft flush chain ip6 nat output
+
+	return $lret
+}
+
+test_local_dnat()
+{
+	local family=$1
+	local lret=0
+	local IPF=""
+
+	if [ "$family" = "inet" ];then
+		IPF="ip"
+	fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF 2>/dev/null
+table $family nat {
+	chain output {
+		type nat hook output priority 0; policy accept;
+		ip daddr 10.0.1.99 dnat $IPF to 10.0.2.99
+	}
+}
+EOF
+	if [ $? -ne 0 ]; then
+		if [ "$family" = "inet" ];then
+			echo "SKIP: inet nat tests"
+			test_inet_nat=false
+			return $ksft_skip
+		fi
+		echo "SKIP: Could not add add $family dnat hook"
+		return $ksft_skip
+	fi
+
+	# ping netns1, expect rewrite to netns2
+	if ! ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null;then
+		lret=1
+		echo "ERROR: ping failed"
+		return $lret
+	fi
+
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+			bad_counter "$ns0" "ns1$dir" "$expect" "test_local_dnat 1"
+			lret=1
+		fi
+	done
+
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+			bad_counter "$ns0" "ns2$dir" "$expect" "test_local_dnat 2"
+			lret=1
+		fi
+	done
+
+	# expect 0 count in ns1
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		if ! ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect";then
+			bad_counter "$ns1" "ns0$dir" "$expect" "test_local_dnat 3"
+			lret=1
+		fi
+	done
+
+	# expect 1 packet in ns2
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		if ! ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect";then
+			bad_counter "$ns2" "ns0$dir" "$expect" "test_local_dnat 4"
+			lret=1
+		fi
+	done
+
+	test $lret -eq 0 && echo "PASS: ping to $ns1 was $family NATted to $ns2"
+
+	ip netns exec "$ns0" nft flush chain "$family" nat output
+
+	reset_counters
+	if ! ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null;then
+		lret=1
+		echo "ERROR: ping failed"
+		return $lret
+	fi
+
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+			bad_counter "$ns1" ns1$dir "$expect" "test_local_dnat 5"
+			lret=1
+		fi
+	done
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+			bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 6"
+			lret=1
+		fi
+	done
+
+	# expect 1 count in ns1
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
+			bad_counter "$ns0" ns0$dir "$expect" "test_local_dnat 7"
+			lret=1
+		fi
+	done
+
+	# expect 0 packet in ns2
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		if ! ip netns exec "$ns2" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
+			bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 8"
+			lret=1
+		fi
+	done
+
+	test $lret -eq 0 && echo "PASS: ping to $ns1 OK after $family nat output chain flush"
+
+	return $lret
+}
+
+listener_ready()
+{
+	local ns="$1"
+	local port="$2"
+	local proto="$3"
+	ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port"
+}
+
+test_local_dnat_portonly()
+{
+	local family=$1
+	local daddr=$2
+	local lret=0
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+	chain output {
+		type nat hook output priority 0; policy accept;
+		meta l4proto tcp dnat to :2000
+
+	}
+}
+EOF
+	if [ $? -ne 0 ]; then
+		if [ "$family" = "inet" ];then
+			echo "SKIP: inet port test"
+			test_inet_nat=false
+			return
+		fi
+		echo "SKIP: Could not add $family dnat hook"
+		return
+	fi
+
+	echo "SERVER-$family" | ip netns exec "$ns1" timeout 3 socat -u STDIN TCP-LISTEN:2000 &
+
+	busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 2000 "-t"
+
+	result=$(ip netns exec "$ns0" timeout 1 socat -u TCP:"$daddr":2000 STDOUT)
+
+	if [ "$result" = "SERVER-inet" ];then
+		echo "PASS: inet port rewrite without l3 address"
+	else
+		echo "ERROR: inet port rewrite without l3 address, got $result"
+		ret=1
+	fi
+}
+
+test_masquerade6()
+{
+	local family=$1
+	local natflags=$2
+	local lret=0
+
+	ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+	if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
+		echo "ERROR: cannot ping $ns1 from $ns2 via ipv6"
+		return 1
+	fi
+
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+			bad_counter "$ns1" "ns2$dir" "$expect" "test_masquerade6 1"
+			lret=1
+		fi
+
+		if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+			bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade6 2"
+			lret=1
+		fi
+	done
+
+	reset_counters
+
+# add masquerading rule
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+	chain postrouting {
+		type nat hook postrouting priority 0; policy accept;
+		meta oif veth0 masquerade $natflags
+	}
+}
+EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add add $family masquerade hook"
+		return $ksft_skip
+	fi
+
+	if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
+		echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags"
+		lret=1
+	fi
+
+	# ns1 should have seen packets from ns0, due to masquerade
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
+			bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 3"
+			lret=1
+		fi
+
+		if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+			bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 4"
+			lret=1
+		fi
+	done
+
+	# ns1 should not have seen packets from ns2, due to masquerade
+	expect="packets 0 bytes 0"
+	for dir in "in6" "out6" ; do
+		if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+			bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 5"
+			lret=1
+		fi
+
+		if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+			bad_counter "$ns0" "ns1$dir" "$expect" "test_masquerade6 6"
+			lret=1
+		fi
+	done
+
+	if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
+		echo "ERROR: cannot ping $ns1 from $ns2 with active ipv6 masquerade $natflags (attempt 2)"
+		lret=1
+	fi
+
+	if ! ip netns exec "$ns0" nft flush chain "$family" nat postrouting;then
+		echo "ERROR: Could not flush $family nat postrouting" 1>&2
+		lret=1
+	fi
+
+	test $lret -eq 0 && echo "PASS: $family IPv6 masquerade $natflags for $ns2"
+
+	return $lret
+}
+
+test_masquerade()
+{
+	local family=$1
+	local natflags=$2
+	local lret=0
+
+	ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+	ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+	if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
+		echo "ERROR: cannot ping $ns1 from $ns2 $natflags"
+		lret=1
+	fi
+
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+			bad_counter "$ns1" "ns2$dir" "$expect" "test_masquerade 1"
+			lret=1
+		fi
+
+		if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+			bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade 2"
+			lret=1
+		fi
+	done
+
+	reset_counters
+
+# add masquerading rule
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+	chain postrouting {
+		type nat hook postrouting priority 0; policy accept;
+		meta oif veth0 masquerade $natflags
+	}
+}
+EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add add $family masquerade hook"
+		return $ksft_skip
+	fi
+
+	if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
+		echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags"
+		lret=1
+	fi
+
+	# ns1 should have seen packets from ns0, due to masquerade
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
+			bad_counter "$ns1" "ns0$dir" "$expect" "test_masquerade 3"
+			lret=1
+		fi
+
+		if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+			bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade 4"
+			lret=1
+		fi
+	done
+
+	# ns1 should not have seen packets from ns2, due to masquerade
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+			bad_counter "$ns1" "ns0$dir" "$expect" "test_masquerade 5"
+			lret=1
+		fi
+
+		if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+			bad_counter "$ns0" "ns1$dir" "$expect" "test_masquerade 6"
+			lret=1
+		fi
+	done
+
+	if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
+		echo "ERROR: cannot ping $ns1 from $ns2 with active ip masquerade $natflags (attempt 2)"
+		lret=1
+	fi
+
+	if ! ip netns exec "$ns0" nft flush chain "$family" nat postrouting; then
+		echo "ERROR: Could not flush $family nat postrouting" 1>&2
+		lret=1
+	fi
+
+	test $lret -eq 0 && echo "PASS: $family IP masquerade $natflags for $ns2"
+
+	return $lret
+}
+
+test_redirect6()
+{
+	local family=$1
+	local lret=0
+
+	ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+	if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
+		echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6"
+		lret=1
+	fi
+
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+			bad_counter "$ns1" ns2$dir "$expect" "test_redirect6 1"
+			lret=1
+		fi
+
+		if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+			bad_counter "$ns2" ns1$dir "$expect" "test_redirect6 2"
+			lret=1
+		fi
+	done
+
+	reset_counters
+
+# add redirect rule
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+	chain prerouting {
+		type nat hook prerouting priority 0; policy accept;
+		meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect
+	}
+}
+EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add add $family redirect hook"
+		return $ksft_skip
+	fi
+
+	if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
+		echo "ERROR: cannot ping $ns1 from $ns2 via ipv6 with active $family redirect"
+		lret=1
+	fi
+
+	# ns1 should have seen no packets from ns2, due to redirection
+	expect="packets 0 bytes 0"
+	for dir in "in6" "out6" ; do
+		if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+			bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 3"
+			lret=1
+		fi
+	done
+
+	# ns0 should have seen packets from ns2, due to masquerade
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+			bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 4"
+			lret=1
+		fi
+	done
+
+	if ! ip netns exec "$ns0" nft delete table "$family" nat;then
+		echo "ERROR: Could not delete $family nat table" 1>&2
+		lret=1
+	fi
+
+	test $lret -eq 0 && echo "PASS: $family IPv6 redirection for $ns2"
+
+	return $lret
+}
+
+test_redirect()
+{
+	local family=$1
+	local lret=0
+
+	ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+	ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+	if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
+		echo "ERROR: cannot ping $ns1 from $ns2"
+		lret=1
+	fi
+
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+			bad_counter "$ns1" "$ns2$dir" "$expect" "test_redirect 1"
+			lret=1
+		fi
+
+		if ! ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect";then
+			bad_counter "$ns2" ns1$dir "$expect" "test_redirect 2"
+			lret=1
+		fi
+	done
+
+	reset_counters
+
+# add redirect rule
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+	chain prerouting {
+		type nat hook prerouting priority 0; policy accept;
+		meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect
+	}
+}
+EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add add $family redirect hook"
+		return $ksft_skip
+	fi
+
+	if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
+		echo "ERROR: cannot ping $ns1 from $ns2 with active $family ip redirect"
+		lret=1
+	fi
+
+	# ns1 should have seen no packets from ns2, due to redirection
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+
+		if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+			bad_counter "$ns1" ns0$dir "$expect" "test_redirect 3"
+			lret=1
+		fi
+	done
+
+	# ns0 should have seen packets from ns2, due to masquerade
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+			bad_counter "$ns0" ns0$dir "$expect" "test_redirect 4"
+			lret=1
+		fi
+	done
+
+	if ! ip netns exec "$ns0" nft delete table "$family" nat;then
+		echo "ERROR: Could not delete $family nat table" 1>&2
+		lret=1
+	fi
+
+	test $lret -eq 0 && echo "PASS: $family IP redirection for $ns2"
+
+	return $lret
+}
+
+# test port shadowing.
+# create two listening services, one on router (ns0), one
+# on client (ns2), which is masqueraded from ns1 point of view.
+# ns2 sends udp packet coming from service port to ns1, on a highport.
+# Later, if n1 uses same highport to connect to ns0:service, packet
+# might be port-forwarded to ns2 instead.
+
+# second argument tells if we expect the 'fake-entry' to take effect
+# (CLIENT) or not (ROUTER).
+test_port_shadow()
+{
+	local test=$1
+	local expect=$2
+	local daddrc="10.0.1.99"
+	local daddrs="10.0.1.1"
+	local result=""
+	local logmsg=""
+
+	# make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
+	echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405
+
+	echo ROUTER | ip netns exec "$ns0" timeout 3 socat -T 3 -u STDIN UDP4-LISTEN:1405 2>/dev/null &
+	local sc_r=$!
+	echo CLIENT | ip netns exec "$ns2" timeout 3 socat -T 3 -u STDIN UDP4-LISTEN:1405,reuseport 2>/dev/null &
+	local sc_c=$!
+
+	busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1405 "-u"
+	busywait $BUSYWAIT_TIMEOUT listener_ready "$ns2" 1405 "-u"
+
+	# ns1 tries to connect to ns0:1405.  With default settings this should connect
+	# to client, it matches the conntrack entry created above.
+
+	result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404)
+
+	if [ "$result" = "$expect" ] ;then
+		echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}"
+	else
+		echo "ERROR: portshadow test $test: got reply from \"$result\", not $expect as intended"
+		ret=1
+	fi
+
+	kill $sc_r $sc_c 2>/dev/null
+
+	# flush udp entries for next test round, if any
+	ip netns exec "$ns0" conntrack -F >/dev/null 2>&1
+}
+
+# This prevents port shadow of router service via packet filter,
+# packets claiming to originate from service port from internal
+# network are dropped.
+test_port_shadow_filter()
+{
+	local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family filter {
+	chain forward {
+		type filter hook forward priority 0; policy accept;
+		meta iif veth1 udp sport 1405 drop
+	}
+}
+EOF
+	test_port_shadow "port-filter" "ROUTER"
+
+	ip netns exec "$ns0" nft delete table "$family" filter
+}
+
+# This prevents port shadow of router service via notrack.
+test_port_shadow_notrack()
+{
+	local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family raw {
+	chain prerouting {
+		type filter hook prerouting priority -300; policy accept;
+		meta iif veth0 udp dport 1405 notrack
+	}
+	chain output {
+		type filter hook output priority -300; policy accept;
+		meta oif veth0 udp sport 1405 notrack
+	}
+}
+EOF
+	test_port_shadow "port-notrack" "ROUTER"
+
+	ip netns exec "$ns0" nft delete table "$family" raw
+}
+
+# This prevents port shadow of router service via sport remap.
+test_port_shadow_pat()
+{
+	local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family pat {
+	chain postrouting {
+		type nat hook postrouting priority -1; policy accept;
+		meta iif veth1 udp sport <= 1405 masquerade to : 1406-65535 random
+	}
+}
+EOF
+	test_port_shadow "pat" "ROUTER"
+
+	ip netns exec "$ns0" nft delete table "$family" pat
+}
+
+test_port_shadowing()
+{
+	local family="ip"
+
+	if ! conntrack -h >/dev/null 2>&1;then
+		echo "SKIP: Could not run nat port shadowing test without conntrack tool"
+		return
+	fi
+
+	if ! socat -h > /dev/null 2>&1;then
+		echo "SKIP: Could not run nat port shadowing test without socat tool"
+		return
+	fi
+
+	ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+	ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+	ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+	chain postrouting {
+		type nat hook postrouting priority 0; policy accept;
+		meta oif veth0 masquerade
+	}
+}
+EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add add $family masquerade hook"
+		return $ksft_skip
+	fi
+
+	# test default behaviour. Packet from ns1 to ns0 is redirected to ns2.
+	test_port_shadow "default" "CLIENT"
+
+	# test packet filter based mitigation: prevent forwarding of
+	# packets claiming to come from the service port.
+	test_port_shadow_filter "$family"
+
+	# test conntrack based mitigation: connections going or coming
+	# from router:service bypass connection tracking.
+	test_port_shadow_notrack "$family"
+
+	# test nat based mitigation: fowarded packets coming from service port
+	# are masqueraded with random highport.
+	test_port_shadow_pat "$family"
+
+	ip netns exec "$ns0" nft delete table $family nat
+}
+
+test_stateless_nat_ip()
+{
+	local lret=0
+
+	ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+	ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+	if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
+		echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules"
+		return 1
+	fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table ip stateless {
+	map xlate_in {
+		typeof meta iifname . ip saddr . ip daddr : ip daddr
+		elements = {
+			"veth1" . 10.0.2.99 . 10.0.1.99 : 10.0.2.2,
+		}
+	}
+	map xlate_out {
+		typeof meta iifname . ip saddr . ip daddr : ip daddr
+		elements = {
+			"veth0" . 10.0.1.99 . 10.0.2.2 : 10.0.2.99
+		}
+	}
+
+	chain prerouting {
+		type filter hook prerouting priority -400; policy accept;
+		ip saddr set meta iifname . ip saddr . ip daddr map @xlate_in
+		ip daddr set meta iifname . ip saddr . ip daddr map @xlate_out
+	}
+}
+EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add ip statless rules"
+		return $ksft_skip
+	fi
+
+	reset_counters
+
+	if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null; then
+		echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules"
+		lret=1
+	fi
+
+	# ns1 should have seen packets from .2.2, due to stateless rewrite.
+	expect="packets 1 bytes 84"
+	if ! ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect";then
+		bad_counter "$ns1" ns0insl "$expect" "test_stateless 1"
+		lret=1
+	fi
+
+	for dir in "in" "out" ; do
+		if ! ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect";then
+			bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2"
+			lret=1
+		fi
+	done
+
+	# ns1 should not have seen packets from ns2, due to masquerade
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		if ! ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect";then
+			bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3"
+			lret=1
+		fi
+
+		if ! ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect";then
+			bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4"
+			lret=1
+		fi
+	done
+
+	reset_counters
+
+	if ! socat -h > /dev/null 2>&1;then
+		echo "SKIP: Could not run stateless nat frag test without socat tool"
+		if [ $lret -eq 0 ]; then
+			return $ksft_skip
+		fi
+
+		ip netns exec "$ns0" nft delete table ip stateless
+		return $lret
+	fi
+
+	dd if=/dev/urandom of="$INFILE" bs=4096 count=1 2>/dev/null
+
+	ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:"$OUTFILE" < /dev/null 2>/dev/null &
+
+	busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 4233 "-u"
+
+	# re-do with large ping -> ip fragmentation
+	if ! ip netns exec "$ns2" timeout 3 socat -u STDIN UDP4-SENDTO:"10.0.1.99:4233" < "$INFILE" > /dev/null;then
+		echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2
+		lret=1
+	fi
+
+	wait
+
+	if ! cmp "$INFILE" "$OUTFILE";then
+		ls -l "$INFILE" "$OUTFILE"
+		echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
+		lret=1
+	fi
+
+	:> "$OUTFILE"
+
+	# ns1 should have seen packets from 2.2, due to stateless rewrite.
+	expect="packets 3 bytes 4164"
+	if ! ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect";then
+		bad_counter "$ns1" ns0insl "$expect" "test_stateless 5"
+		lret=1
+	fi
+
+	if ! ip netns exec "$ns0" nft delete table ip stateless; then
+		echo "ERROR: Could not delete table ip stateless" 1>&2
+		lret=1
+	fi
+
+	test $lret -eq 0 && echo "PASS: IP statless for $ns2"
+
+	return $lret
+}
+
+# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
+for i in "$ns0" "$ns1" "$ns2" ;do
+ip netns exec "$i" nft -f /dev/stdin <<EOF
+table inet filter {
+	counter ns0in {}
+	counter ns1in {}
+	counter ns2in {}
+
+	counter ns0out {}
+	counter ns1out {}
+	counter ns2out {}
+
+	counter ns0in6 {}
+	counter ns1in6 {}
+	counter ns2in6 {}
+
+	counter ns0out6 {}
+	counter ns1out6 {}
+	counter ns2out6 {}
+
+	map nsincounter {
+		type ipv4_addr : counter
+		elements = { 10.0.1.1 : "ns0in",
+			     10.0.2.1 : "ns0in",
+			     10.0.1.99 : "ns1in",
+			     10.0.2.99 : "ns2in" }
+	}
+
+	map nsincounter6 {
+		type ipv6_addr : counter
+		elements = { dead:1::1 : "ns0in6",
+			     dead:2::1 : "ns0in6",
+			     dead:1::99 : "ns1in6",
+			     dead:2::99 : "ns2in6" }
+	}
+
+	map nsoutcounter {
+		type ipv4_addr : counter
+		elements = { 10.0.1.1 : "ns0out",
+			     10.0.2.1 : "ns0out",
+			     10.0.1.99: "ns1out",
+			     10.0.2.99: "ns2out" }
+	}
+
+	map nsoutcounter6 {
+		type ipv6_addr : counter
+		elements = { dead:1::1 : "ns0out6",
+			     dead:2::1 : "ns0out6",
+			     dead:1::99 : "ns1out6",
+			     dead:2::99 : "ns2out6" }
+	}
+
+	chain input {
+		type filter hook input priority 0; policy accept;
+		counter name ip saddr map @nsincounter
+		icmpv6 type { "echo-request", "echo-reply" } counter name ip6 saddr map @nsincounter6
+	}
+	chain output {
+		type filter hook output priority 0; policy accept;
+		counter name ip daddr map @nsoutcounter
+		icmpv6 type { "echo-request", "echo-reply" } counter name ip6 daddr map @nsoutcounter6
+	}
+}
+EOF
+done
+
+# special case for stateless nat check, counter needs to
+# be done before (input) ip defragmentation
+ip netns exec "$ns1" nft -f /dev/stdin <<EOF
+table inet filter {
+	counter ns0insl {}
+
+	chain pre {
+		type filter hook prerouting priority -400; policy accept;
+		ip saddr 10.0.2.2 counter name "ns0insl"
+	}
+}
+EOF
+
+ping_basic()
+{
+	i="$1"
+	if ! ip netns exec "$ns0" ping -c 1 -q 10.0."$i".99 > /dev/null;then
+		echo "ERROR: Could not reach other namespace(s)" 1>&2
+		ret=1
+	fi
+
+	if ! ip netns exec "$ns0" ping -c 1 -q dead:"$i"::99 > /dev/null;then
+		echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2
+		ret=1
+	fi
+}
+
+test_basic_conn()
+{
+	local nsexec
+	name="$1"
+
+	nsexec=$(eval echo \$"$1")
+
+	ping_basic 1
+	ping_basic 2
+
+	if ! check_counters "$nsexec";then
+		return 1
+	fi
+
+	if ! check_ns0_counters "$name";then
+		return 1
+	fi
+
+	reset_counters
+	return 0
+}
+
+if ! test_basic_conn "ns1" ; then
+	echo "ERROR: basic test for ns1 failed" 1>&2
+	exit 1
+fi
+if ! test_basic_conn "ns2"; then
+	echo "ERROR: basic test for ns1 failed" 1>&2
+fi
+
+if [ $ret -eq 0 ];then
+	echo "PASS: netns routing/connectivity: $ns0 can reach $ns1 and $ns2"
+fi
+
+reset_counters
+test_local_dnat ip
+test_local_dnat6 ip6
+
+reset_counters
+test_local_dnat_portonly inet 10.0.1.99
+
+reset_counters
+$test_inet_nat && test_local_dnat inet
+$test_inet_nat && test_local_dnat6 inet
+
+for flags in "" "fully-random"; do
+reset_counters
+test_masquerade ip $flags
+test_masquerade6 ip6 $flags
+reset_counters
+$test_inet_nat && test_masquerade inet $flags
+$test_inet_nat && test_masquerade6 inet $flags
+done
+
+reset_counters
+test_redirect ip
+test_redirect6 ip6
+reset_counters
+$test_inet_nat && test_redirect inet
+$test_inet_nat && test_redirect6 inet
+
+test_port_shadowing
+test_stateless_nat_ip
+
+if [ $ret -ne 0 ];then
+	echo -n "FAIL: "
+	nft --version
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
new file mode 100755
index 0000000000..3b81d88bdd
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
@@ -0,0 +1,267 @@
+#!/bin/bash
+#
+# Test connection tracking zone and NAT source port reallocation support.
+#
+
+source lib.sh
+
+# Don't increase too much, 2000 clients should work
+# just fine but script can then take several minutes with
+# KASAN/debug builds.
+maxclients=100
+
+have_socat=0
+ret=0
+
+[ "$KSFT_MACHINE_SLOW" = yes ] && maxclients=40
+# client1---.
+#            veth1-.
+#                  |
+#               NAT Gateway --veth0--> Server
+#                  | |
+#            veth2-' |
+# client2---'        |
+#  ....              |
+# clientX----vethX---'
+
+# All clients share identical IP address.
+# NAT Gateway uses policy routing and conntrack zones to isolate client
+# namespaces.  Each client connects to Server, each with colliding tuples:
+#   clientsaddr:10000 -> serveraddr:dport
+#   NAT Gateway is supposed to do port reallocation for each of the
+#   connections.
+
+v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
+v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
+v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
+v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null)
+v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null)
+v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
+
+cleanup()
+{
+	cleanup_all_ns
+
+	sysctl -q net.ipv4.neigh.default.gc_thresh1="$v4gc1" 2>/dev/null
+	sysctl -q net.ipv4.neigh.default.gc_thresh2="$v4gc2" 2>/dev/null
+	sysctl -q net.ipv4.neigh.default.gc_thresh3="$v4gc3" 2>/dev/null
+	sysctl -q net.ipv6.neigh.default.gc_thresh1="$v6gc1" 2>/dev/null
+	sysctl -q net.ipv6.neigh.default.gc_thresh2="$v6gc2" 2>/dev/null
+	sysctl -q net.ipv6.neigh.default.gc_thresh3="$v6gc3" 2>/dev/null
+}
+
+checktool "nft --version" echo "run test without nft tool"
+checktool "conntrack -V" "run test without conntrack tool"
+
+if socat -h >/dev/null 2>&1; then
+	have_socat=1
+fi
+
+setup_ns gw srv
+
+trap cleanup EXIT
+
+ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
+ip -net "$gw" link set veth0 up
+ip -net "$srv" link set eth0 up
+
+sysctl -q net.ipv6.neigh.default.gc_thresh1=512  2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh1=512  2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
+
+for i in $(seq 1 "$maxclients");do
+  setup_ns "cl$i"
+
+  cl=$(eval echo \$cl"$i")
+  if ! ip link add veth"$i" netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1;then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+  fi
+done
+
+for i in $(seq 1 "$maxclients");do
+  cl=$(eval echo \$cl"$i")
+  echo netns exec "$cl" ip link set eth0 up
+  echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
+  echo netns exec "$gw" ip link set "veth$i" up
+  echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2
+  echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0
+
+  # clients have same IP addresses.
+  echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
+  echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0 nodad
+  echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
+  echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
+
+  # NB: same addresses on client-facing interfaces.
+  echo netns exec "$gw" ip addr add 10.1.0.2/24 dev "veth$i"
+  echo netns exec "$gw" ip addr add dead:1::2/64 dev "veth$i" nodad
+
+  # gw: policy routing
+  echo netns exec "$gw" ip route add 10.1.0.0/24 dev "veth$i" table $((1000+i))
+  echo netns exec "$gw" ip route add dead:1::0/64 dev "veth$i" table $((1000+i))
+  echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
+  echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
+  echo netns exec "$gw" ip rule add fwmark "$i" lookup $((1000+i))
+done | ip -batch /dev/stdin
+
+ip -net "$gw" addr add 10.3.0.1/24 dev veth0
+ip -net "$gw" addr add dead:3::1/64 dev veth0 nodad
+
+ip -net "$srv" addr add 10.3.0.99/24 dev eth0
+ip -net "$srv" addr add dead:3::99/64 dev eth0 nodad
+
+ip netns exec "$gw" nft -f /dev/stdin<<EOF
+table inet raw {
+	map iiftomark {
+		type ifname : mark
+	}
+
+	map iiftozone {
+		typeof iifname : ct zone
+	}
+
+	set inicmp {
+		flags dynamic
+		type ipv4_addr . ifname . ipv4_addr
+	}
+	set inflows {
+		flags dynamic
+		type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service
+	}
+
+	set inflows6 {
+		flags dynamic
+		type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service
+	}
+
+	chain prerouting {
+		type filter hook prerouting priority -64000; policy accept;
+		ct original zone set meta iifname map @iiftozone
+		meta mark set meta iifname map @iiftomark
+
+		tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter }
+		add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter }
+		ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter }
+	}
+
+	chain nat_postrouting {
+		type nat hook postrouting priority 0; policy accept;
+                ct mark set meta mark meta oifname veth0 masquerade
+	}
+
+	chain mangle_prerouting {
+		type filter hook prerouting priority -100; policy accept;
+		ct direction reply meta mark set ct mark
+	}
+}
+EOF
+if [ "$?" -ne 0 ];then
+	echo "SKIP: Could not add nftables rules"
+	exit $ksft_skip
+fi
+
+( echo add element inet raw iiftomark \{
+	for i in $(seq 1 $((maxclients-1))); do
+		echo \"veth"$i"\" : "$i",
+	done
+	echo \"veth"$maxclients"\" : "$maxclients" \}
+	echo add element inet raw iiftozone \{
+	for i in $(seq 1 $((maxclients-1))); do
+		echo \"veth"$i"\" : "$i",
+	done
+	echo \"veth$maxclients\" : $maxclients \}
+) | ip netns exec "$gw" nft -f /dev/stdin
+
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
+
+# useful for debugging: allows to use 'ping' from clients to gateway.
+ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
+
+for i in $(seq 1 "$maxclients"); do
+  cl=$(eval echo \$cl"$i")
+  ip netns exec "$cl" ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
+done
+
+wait || ret=1
+
+[ "$ret" -ne 0 ] && "FAIL: Ping failure from $cl" 1>&2
+
+for i in $(seq 1 "$maxclients"); do
+   if ! ip netns exec "$gw" nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"; then
+      ret=1
+      echo "FAIL: counter icmp mismatch for veth$i" 1>&2
+      ip netns exec "$gw" nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
+      break
+   fi
+done
+
+if ! ip netns exec "$gw" nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * maxclients)) bytes $((252 * maxclients)) }"; then
+    ret=1
+    echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * maxclients)) bytes $((252 * maxclients)) }"
+    ip netns exec "$gw" nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
+fi
+
+if [ $ret -eq 0 ]; then
+	echo "PASS: ping test from all $maxclients namespaces"
+fi
+
+if [ $have_socat -eq 0 ];then
+	echo "SKIP: socat not installed"
+	if [ $ret -ne 0 ];then
+	    exit $ret
+	fi
+	exit $ksft_skip
+fi
+
+listener_ready()
+{
+	ss -N "$1" -lnt -o "sport = :5201" | grep -q 5201
+}
+
+ip netns exec "$srv" socat -u TCP-LISTEN:5201,fork STDOUT > /dev/null 2>/dev/null &
+socatpid=$!
+
+busywait 1000 listener_ready "$srv"
+
+for i in $(seq 1 "$maxclients"); do
+  if [ $ret -ne 0 ]; then
+     break
+  fi
+  cl=$(eval echo \$cl"$i")
+  if ! ip netns exec "$cl" socat -4 -u STDIN TCP:10.3.0.99:5201,sourceport=10000 < /dev/null > /dev/null; then
+     echo "FAIL: Failure to connect for $cl" 1>&2
+     ip netns exec "$gw" conntrack -S 1>&2
+     ret=1
+  fi
+done
+if [ $ret -eq 0 ];then
+	echo "PASS: socat connections for all $maxclients net namespaces"
+fi
+
+kill $socatpid
+wait
+
+for i in $(seq 1 "$maxclients"); do
+   if ! ip netns exec "$gw" nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null;then
+      ret=1
+      echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
+      break
+   fi
+done
+if [ $ret -eq 0 ];then
+	echo "PASS: Found client connection for all $maxclients net namespaces"
+fi
+
+if ! ip netns exec "$gw" nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null;then
+    ret=1
+    echo "FAIL: cannot find return entry on veth0" 1>&2
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
new file mode 100755
index 0000000000..8538f08c64
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -0,0 +1,417 @@
+#!/bin/bash
+#
+# This tests nf_queue:
+# 1. can process packets from all hooks
+# 2. support running nfqueue from more than one base chain
+#
+# shellcheck disable=SC2162,SC2317
+
+source lib.sh
+ret=0
+timeout=2
+
+cleanup()
+{
+	ip netns pids "$ns1" | xargs kill 2>/dev/null
+	ip netns pids "$ns2" | xargs kill 2>/dev/null
+	ip netns pids "$nsrouter" | xargs kill 2>/dev/null
+
+	cleanup_all_ns
+
+	rm -f "$TMPINPUT"
+	rm -f "$TMPFILE0"
+	rm -f "$TMPFILE1"
+	rm -f "$TMPFILE2" "$TMPFILE3"
+}
+
+checktool "nft --version" "test without nft tool"
+
+trap cleanup EXIT
+
+setup_ns ns1 ns2 nsrouter
+
+TMPFILE0=$(mktemp)
+TMPFILE1=$(mktemp)
+TMPFILE2=$(mktemp)
+TMPFILE3=$(mktemp)
+
+TMPINPUT=$(mktemp)
+dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
+
+if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+fi
+ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
+
+ip -net "$nsrouter" link set veth0 up
+ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$nsrouter" link set veth1 up
+ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
+ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
+
+ip -net "$ns1" link set eth0 up
+ip -net "$ns2" link set eth0 up
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" route add default via dead:1::1
+
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns2" route add default via dead:2::1
+
+load_ruleset() {
+	local name=$1
+	local prio=$2
+
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+table inet $name {
+	chain nfq {
+		ip protocol icmp queue bypass
+		icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass
+	}
+	chain pre {
+		type filter hook prerouting priority $prio; policy accept;
+		jump nfq
+	}
+	chain input {
+		type filter hook input priority $prio; policy accept;
+		jump nfq
+	}
+	chain forward {
+		type filter hook forward priority $prio; policy accept;
+		tcp dport 12345 queue num 2
+		jump nfq
+	}
+	chain output {
+		type filter hook output priority $prio; policy accept;
+		tcp dport 12345 queue num 3
+		tcp sport 23456 queue num 3
+		jump nfq
+	}
+	chain post {
+		type filter hook postrouting priority $prio; policy accept;
+		jump nfq
+	}
+}
+EOF
+}
+
+load_counter_ruleset() {
+	local prio=$1
+
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+table inet countrules {
+	chain pre {
+		type filter hook prerouting priority $prio; policy accept;
+		counter
+	}
+	chain input {
+		type filter hook input priority $prio; policy accept;
+		counter
+	}
+	chain forward {
+		type filter hook forward priority $prio; policy accept;
+		counter
+	}
+	chain output {
+		type filter hook output priority $prio; policy accept;
+		counter
+	}
+	chain post {
+		type filter hook postrouting priority $prio; policy accept;
+		counter
+	}
+}
+EOF
+}
+
+test_ping() {
+  if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
+	return 1
+  fi
+
+  if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then
+	return 2
+  fi
+
+  return 0
+}
+
+test_ping_router() {
+  if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then
+	return 3
+  fi
+
+  if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then
+	return 4
+  fi
+
+  return 0
+}
+
+test_queue_blackhole() {
+	local proto=$1
+
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+table $proto blackh {
+	chain forward {
+	type filter hook forward priority 0; policy accept;
+		queue num 600
+	}
+}
+EOF
+	if [ "$proto" = "ip" ] ;then
+		ip netns exec "$ns1" ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
+		lret=$?
+	elif [ "$proto" = "ip6" ]; then
+		ip netns exec "$ns1" ping -W 2 -c 1 -q dead:2::99 > /dev/null
+		lret=$?
+	else
+		lret=111
+	fi
+
+	# queue without bypass keyword should drop traffic if no listener exists.
+	if [ "$lret" -eq 0 ];then
+		echo "FAIL: $proto expected failure, got $lret" 1>&2
+		exit 1
+	fi
+
+	if ! ip netns exec "$nsrouter" nft delete table "$proto" blackh; then
+	        echo "FAIL: $proto: Could not delete blackh table"
+	        exit 1
+	fi
+
+        echo "PASS: $proto: statement with no listener results in packet drop"
+}
+
+nf_queue_wait()
+{
+	local procfile="/proc/self/net/netfilter/nfnetlink_queue"
+	local netns id
+
+	netns="$1"
+	id="$2"
+
+	# if this file doesn't exist, nfnetlink_module isn't loaded.
+	# rather than loading it ourselves, wait for kernel module autoload
+	# completion, nfnetlink should do so automatically because nf_queue
+	# helper program, spawned in the background, asked for this functionality.
+	test -f "$procfile" &&
+		ip netns exec "$netns" cat "$procfile" | grep -q "^ *$id "
+}
+
+test_queue()
+{
+	local expected="$1"
+	local last=""
+
+	# spawn nf_queue listeners
+	ip netns exec "$nsrouter" ./nf_queue -c -q 0 -t $timeout > "$TMPFILE0" &
+	ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t $timeout > "$TMPFILE1" &
+
+	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 0
+	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1
+
+	if ! test_ping;then
+		echo "FAIL: netns routing/connectivity with active listener on queues 0 and 1: $ret" 1>&2
+		exit $ret
+	fi
+
+	if ! test_ping_router;then
+		echo "FAIL: netns router unreachable listener on queue 0 and 1: $ret" 1>&2
+		exit $ret
+	fi
+
+	wait
+	ret=$?
+
+	for file in $TMPFILE0 $TMPFILE1; do
+		last=$(tail -n1 "$file")
+		if [ x"$last" != x"$expected packets total" ]; then
+			echo "FAIL: Expected $expected packets total, but got $last" 1>&2
+			ip netns exec "$nsrouter" nft list ruleset
+			exit 1
+		fi
+	done
+
+	echo "PASS: Expected and received $last"
+}
+
+listener_ready()
+{
+	ss -N "$1" -lnt -o "sport = :12345" | grep -q 12345
+}
+
+test_tcp_forward()
+{
+	ip netns exec "$nsrouter" ./nf_queue -q 2 -t "$timeout" &
+	local nfqpid=$!
+
+	timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
+	local rpid=$!
+
+	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
+
+	ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
+
+	wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain"
+}
+
+test_tcp_localhost()
+{
+	dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
+	timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
+	local rpid=$!
+
+	ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+	local nfqpid=$!
+
+	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
+
+	ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null
+
+	wait "$rpid" && echo "PASS: tcp via loopback"
+	wait 2>/dev/null
+}
+
+test_tcp_localhost_connectclose()
+{
+	ip netns exec "$nsrouter" ./connect_close -p 23456 -t "$timeout" &
+	ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+
+	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
+
+	wait && echo "PASS: tcp via loopback with connect/close"
+	wait 2>/dev/null
+}
+
+test_tcp_localhost_requeue()
+{
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+	chain output {
+		type filter hook output priority 0; policy accept;
+		tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+	}
+	chain post {
+		type filter hook postrouting priority 0; policy accept;
+		tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+	}
+}
+EOF
+	timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
+	local rpid=$!
+
+	ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t "$timeout" > "$TMPFILE2" &
+
+	# nfqueue 1 will be called via output hook.  But this time,
+        # re-queue the packet to nfqueue program on queue 2.
+	ip netns exec "$nsrouter" ./nf_queue -G -d 150 -c -q 0 -Q 1 -t "$timeout" > "$TMPFILE3" &
+
+	busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
+	ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" > /dev/null
+
+	wait
+
+	if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
+		echo "FAIL: lost packets during requeue?!" 1>&2
+		return
+	fi
+
+	echo "PASS: tcp via loopback and re-queueing"
+}
+
+test_icmp_vrf() {
+	if ! ip -net "$ns1" link add tvrf type vrf table 9876;then
+		echo "SKIP: Could not add vrf device"
+		return
+	fi
+
+	ip -net "$ns1" li set eth0 master tvrf
+	ip -net "$ns1" li set tvrf up
+
+	ip -net "$ns1" route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
+ip netns exec "$ns1" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+	chain output {
+		type filter hook output priority 0; policy accept;
+		meta oifname "tvrf" icmp type echo-request counter queue num 1
+		meta oifname "eth0" icmp type echo-request counter queue num 1
+	}
+	chain post {
+		type filter hook postrouting priority 0; policy accept;
+		meta oifname "tvrf" icmp type echo-request counter queue num 1
+		meta oifname "eth0" icmp type echo-request counter queue num 1
+	}
+}
+EOF
+	ip netns exec "$ns1" ./nf_queue -q 1 -t "$timeout" &
+	local nfqpid=$!
+
+	busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1
+
+	ip netns exec "$ns1" ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
+
+	for n in output post; do
+		for d in tvrf eth0; do
+			if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then
+				echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
+				ip netns exec "$ns1" nft list ruleset
+				ret=1
+				return
+			fi
+		done
+	done
+
+	wait "$nfqpid" && echo "PASS: icmp+nfqueue via vrf"
+	wait 2>/dev/null
+}
+
+ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+load_ruleset "filter" 0
+
+if test_ping; then
+	# queue bypass works (rules were skipped, no listener)
+	echo "PASS: ${ns1} can reach ${ns2}"
+else
+	echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
+	exit $ret
+fi
+
+test_queue_blackhole ip
+test_queue_blackhole ip6
+
+# dummy ruleset to add base chains between the
+# queueing rules.  We don't want the second reinject
+# to re-execute the old hooks.
+load_counter_ruleset 10
+
+# we are hooking all: prerouting/input/forward/output/postrouting.
+# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so:
+# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply).
+# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply.
+# so we expect that userspace program receives 10 packets.
+test_queue 10
+
+# same.  We queue to a second program as well.
+load_ruleset "filter2" 20
+test_queue 20
+
+test_tcp_forward
+test_tcp_localhost
+test_tcp_localhost_connectclose
+test_tcp_localhost_requeue
+test_icmp_vrf
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_synproxy.sh b/tools/testing/selftests/net/netfilter/nft_synproxy.sh
new file mode 100755
index 0000000000..293f667a6a
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_synproxy.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+ret=0
+
+checktool "nft --version" "run test without nft tool"
+checktool "iperf3 --version" "run test without iperf3"
+
+setup_ns nsr ns1 ns2
+
+modprobe -q nf_conntrack
+
+cleanup() {
+	ip netns pids "$ns1" | xargs kill 2>/dev/null
+	ip netns pids "$ns2" | xargs kill 2>/dev/null
+
+	cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+ip link add veth0 netns "$nsr" type veth peer name eth0 netns "$ns1"
+ip link add veth1 netns "$nsr" type veth peer name eth0 netns "$ns2"
+
+for dev in veth0 veth1; do
+	ip -net "$nsr" link set "$dev" up
+done
+
+ip -net "$nsr" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsr" addr add 10.0.2.1/24 dev veth1
+
+ip netns exec "$nsr" sysctl -q net.ipv4.conf.veth0.forwarding=1
+ip netns exec "$nsr" sysctl -q net.ipv4.conf.veth1.forwarding=1
+ip netns exec "$nsr" sysctl -q net.netfilter.nf_conntrack_tcp_loose=0
+
+for n in $ns1 $ns2; do
+  ip -net "$n" link set eth0 up
+done
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns2" route add default via 10.0.2.1
+
+# test basic connectivity
+if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
+  echo "ERROR: $ns1 cannot reach $ns2" 1>&2
+  exit 1
+fi
+
+if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
+  echo "ERROR: $ns2 cannot reach $ns1" 1>&2
+  exit 1
+fi
+
+ip netns exec "$ns2" iperf3 -s > /dev/null 2>&1 &
+# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 &
+
+sleep 1
+
+ip netns exec "$nsr" nft -f - <<EOF
+table inet filter {
+   chain prerouting {
+      type filter hook prerouting priority -300; policy accept;
+      meta iif veth0 tcp flags syn counter notrack
+   }
+
+  chain forward {
+      type filter hook forward priority 0; policy accept;
+
+      ct state new,established counter accept
+
+      meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp
+
+      ct state invalid counter drop
+
+      # make ns2 unreachable w.o. tcp synproxy
+      tcp flags syn counter drop
+   }
+}
+EOF
+if [ $? -ne 0 ]; then
+	echo "SKIP: Cannot add nft synproxy"
+	exit $ksft_skip
+fi
+
+if ! ip netns exec "$ns1" timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null; then
+	echo "FAIL: iperf3 returned an error" 1>&2
+	ret=1
+	ip netns exec "$nsr" nft list ruleset
+else
+	echo "PASS: synproxy connection successful"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_zones_many.sh b/tools/testing/selftests/net/netfilter/nft_zones_many.sh
new file mode 100755
index 0000000000..7db9982ba5
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_zones_many.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+
+# Test insertion speed for packets with identical addresses/ports
+# that are all placed in distinct conntrack zones.
+
+source lib.sh
+
+zones=2000
+[ "$KSFT_MACHINE_SLOW" = yes ] && zones=500
+
+have_ct_tool=0
+ret=0
+
+cleanup()
+{
+	cleanup_all_ns
+}
+
+checktool "nft --version" "run test without nft tool"
+checktool "socat -V" "run test without socat tool"
+
+setup_ns ns1
+
+trap cleanup EXIT
+
+if conntrack -V > /dev/null 2>&1; then
+	have_ct_tool=1
+fi
+
+test_zones() {
+	local max_zones=$1
+
+ip netns exec "$ns1" nft -f /dev/stdin<<EOF
+flush ruleset
+table inet raw {
+	map rndzone {
+		typeof numgen inc mod $max_zones : ct zone
+	}
+
+	chain output {
+		type filter hook output priority -64000; policy accept;
+		udp dport 12345  ct zone set numgen inc mod 65536 map @rndzone
+	}
+}
+EOF
+if [ "$?" -ne 0 ];then
+	echo "SKIP: Cannot add nftables rules"
+	exit $ksft_skip
+fi
+
+	ip netns exec "$ns1" sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
+
+	(
+		echo "add element inet raw rndzone {"
+	for i in $(seq 1 "$max_zones");do
+		echo -n "$i : $i"
+		if [ "$i" -lt "$max_zones" ]; then
+			echo ","
+		else
+			echo "}"
+		fi
+	done
+	) | ip netns exec "$ns1" nft -f /dev/stdin
+
+	local i=0
+	local j=0
+	local outerstart
+	local stop
+	outerstart=$(date +%s%3N)
+	stop=$outerstart
+
+	while [ "$i" -lt "$max_zones" ]; do
+		local start
+		start=$(date +%s%3N)
+		i=$((i + 1000))
+		j=$((j + 1))
+		# nft rule in output places each packet in a different zone.
+		dd if=/dev/zero bs=8k count=1000 2>/dev/null | ip netns exec "$ns1" socat -u STDIN UDP:127.0.0.1:12345,sourceport=12345
+		if [ $? -ne 0 ] ;then
+			ret=1
+			break
+		fi
+
+		stop=$(date +%s%3N)
+		local duration=$((stop-start))
+		echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)"
+	done
+
+	if [ "$have_ct_tool" -eq 1 ]; then
+		local count duration
+		count=$(ip netns exec "$ns1" conntrack -C)
+		duration=$((stop-outerstart))
+
+		if [ "$count" -ge "$max_zones" ]; then
+			echo "PASS: inserted $count entries from packet path in $duration ms total"
+		else
+			ip netns exec "$ns1" conntrack -S 1>&2
+			echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries"
+			ret=1
+		fi
+	fi
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: insert $max_zones entries from packet path" 1>&2
+	fi
+}
+
+test_conntrack_tool() {
+	local max_zones=$1
+
+	ip netns exec "$ns1" conntrack -F >/dev/null 2>/dev/null
+
+	local outerstart start stop i
+	outerstart=$(date +%s%3N)
+	start=$(date +%s%3N)
+	stop="$start"
+	i=0
+	while [ "$i" -lt "$max_zones" ]; do
+		i=$((i + 1))
+		ip netns exec "$ns1" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+	                 --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1
+		if [ $? -ne 0 ];then
+			ip netns exec "$ns1" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+	                 --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null
+			echo "FAIL: conntrack -I returned an error"
+			ret=1
+			break
+		fi
+
+		if [ $((i%1000)) -eq 0 ];then
+			stop=$(date +%s%3N)
+
+			local duration=$((stop-start))
+			echo "PASS: added 1000 entries in $duration ms (now $i total)"
+			start=$stop
+		fi
+	done
+
+	local count
+	local duration
+	count=$(ip netns exec "$ns1" conntrack -C)
+	duration=$((stop-outerstart))
+
+	if [ "$count" -eq "$max_zones" ]; then
+		echo "PASS: inserted $count entries via ctnetlink in $duration ms"
+	else
+		ip netns exec "$ns1" conntrack -S 1>&2
+		echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)"
+		ret=1
+	fi
+}
+
+test_zones $zones
+
+if [ "$have_ct_tool" -eq 1 ];then
+	test_conntrack_tool $zones
+else
+	echo "SKIP: Could not run ctnetlink insertion test without conntrack tool"
+	if [ $ret -eq 0 ];then
+		exit $ksft_skip
+	fi
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/common.sh b/tools/testing/selftests/net/netfilter/packetdrill/common.sh
new file mode 100755
index 0000000000..ed36d53519
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/common.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# for debugging set net.netfilter.nf_log_all_netns=1 in init_net
+# or do not use net namespaces.
+modprobe -q nf_conntrack
+sysctl -q net.netfilter.nf_conntrack_log_invalid=6
+
+# Flush old cached data (fastopen cookies).
+ip tcp_metrics flush all > /dev/null 2>&1
+
+# TCP min, default, and max receive and send buffer sizes.
+sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))"
+sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304"
+
+# TCP congestion control.
+sysctl -q net.ipv4.tcp_congestion_control=cubic
+
+# TCP slow start after idle.
+sysctl -q net.ipv4.tcp_slow_start_after_idle=0
+
+# TCP Explicit Congestion Notification (ECN)
+sysctl -q net.ipv4.tcp_ecn=0
+
+sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1
+
+# Override the default qdisc on the tun device.
+# Many tests fail with timing errors if the default
+# is FQ and that paces their flows.
+tc qdisc add dev tun0 root pfifo
+
+# Enable conntrack
+$xtables -A INPUT -m conntrack --ctstate NEW -p tcp --syn
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt
new file mode 100644
index 0000000000..d755bd64c5
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt
@@ -0,0 +1,118 @@
+// check that already-acked (retransmitted) packet is let through rather
+// than tagged as INVALID.
+
+`packetdrill/common.sh`
+
+// should set -P DROP but it disconnects VM w.o. extra netns
++0 `$xtables -A INPUT -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 10) = 0
+
++0 < S 0:0(0) win 32792 <mss 1000>
++0 > S. 0:0(0) ack 1 <mss 1460>
++.01 < . 1:1(0) ack 1 win 65535
++0 accept(3, ..., ...) = 4
+
++0.0001 < P. 1:1461(1460) ack 1 win 257
++.0 > . 1:1(0) ack 1461 win 65535
++0.0001 < P. 1461:2921(1460) ack 1 win 257
++.0 > . 1:1(0) ack 2921 win 65535
++0.0001 < P. 2921:4381(1460) ack 1 win 257
++.0 > . 1:1(0) ack 4381 win 65535
++0.0001 < P. 4381:5841(1460) ack 1 win 257
++.0 > . 1:1(0) ack 5841 win 65535
++0.0001 < P. 5841:7301(1460) ack 1 win 257
++.0 > . 1:1(0) ack 7301 win 65535
++0.0001 < P. 7301:8761(1460) ack 1 win 257
++.0 > . 1:1(0) ack 8761 win 65535
++0.0001 < P. 8761:10221(1460) ack 1 win 257
++.0 > . 1:1(0) ack 10221 win 65535
++0.0001 < P. 10221:11681(1460) ack 1 win 257
++.0 > . 1:1(0) ack 11681 win 65535
++0.0001 < P. 11681:13141(1460) ack 1 win 257
++.0 > . 1:1(0) ack 13141 win 65535
++0.0001 < P. 13141:14601(1460) ack 1 win 257
++.0 > . 1:1(0) ack 14601 win 65535
++0.0001 < P. 14601:16061(1460) ack 1 win 257
++.0 > . 1:1(0) ack 16061 win 65535
++0.0001 < P. 16061:17521(1460) ack 1 win 257
++.0 > . 1:1(0) ack 17521 win 65535
++0.0001 < P. 17521:18981(1460) ack 1 win 257
++.0 > . 1:1(0) ack 18981 win 65535
++0.0001 < P. 18981:20441(1460) ack 1 win 257
++.0 > . 1:1(0) ack 20441 win 65535
++0.0001 < P. 20441:21901(1460) ack 1 win 257
++.0 > . 1:1(0) ack 21901 win 65535
++0.0001 < P. 21901:23361(1460) ack 1 win 257
++.0 > . 1:1(0) ack 23361 win 65535
++0.0001 < P. 23361:24821(1460) ack 1 win 257
+0.055 > . 1:1(0) ack 24821 win 65535
++0.0001 < P. 24821:26281(1460) ack 1 win 257
++.0 > . 1:1(0) ack 26281 win 65535
++0.0001 < P. 26281:27741(1460) ack 1 win 257
++.0 > . 1:1(0) ack 27741 win 65535
++0.0001 < P. 27741:29201(1460) ack 1 win 257
++.0 > . 1:1(0) ack 29201 win 65535
++0.0001 < P. 29201:30661(1460) ack 1 win 257
++.0 > . 1:1(0) ack 30661 win 65535
++0.0001 < P. 30661:32121(1460) ack 1 win 257
++.0 > . 1:1(0) ack 32121 win 65535
++0.0001 < P. 32121:33581(1460) ack 1 win 257
++.0 > . 1:1(0) ack 33581 win 65535
++0.0001 < P. 33581:35041(1460) ack 1 win 257
++.0 > . 1:1(0) ack 35041 win 65535
++0.0001 < P. 35041:36501(1460) ack 1 win 257
++.0 > . 1:1(0) ack 36501 win 65535
++0.0001 < P. 36501:37961(1460) ack 1 win 257
++.0 > . 1:1(0) ack 37961 win 65535
++0.0001 < P. 37961:39421(1460) ack 1 win 257
++.0 > . 1:1(0) ack 39421 win 65535
++0.0001 < P. 39421:40881(1460) ack 1 win 257
++.0 > . 1:1(0) ack 40881 win 65535
++0.0001 < P. 40881:42341(1460) ack 1 win 257
++.0 > . 1:1(0) ack 42341 win 65535
++0.0001 < P. 42341:43801(1460) ack 1 win 257
++.0 > . 1:1(0) ack 43801 win 65535
++0.0001 < P. 43801:45261(1460) ack 1 win 257
++.0 > . 1:1(0) ack 45261 win 65535
++0.0001 < P. 45261:46721(1460) ack 1 win 257
++.0 > . 1:1(0) ack 46721 win 65535
++0.0001 < P. 46721:48181(1460) ack 1 win 257
++.0 > . 1:1(0) ack 48181 win 65535
++0.0001 < P. 48181:49641(1460) ack 1 win 257
++.0 > . 1:1(0) ack 49641 win 65535
++0.0001 < P. 49641:51101(1460) ack 1 win 257
++.0 > . 1:1(0) ack 51101 win 65535
++0.0001 < P. 51101:52561(1460) ack 1 win 257
++.0 > . 1:1(0) ack 52561 win 65535
++0.0001 < P. 52561:54021(1460) ack 1 win 257
++.0 > . 1:1(0) ack 54021 win 65535
++0.0001 < P. 54021:55481(1460) ack 1 win 257
++.0 > . 1:1(0) ack 55481 win 65535
++0.0001 < P. 55481:56941(1460) ack 1 win 257
++.0 > . 1:1(0) ack 56941 win 65535
++0.0001 < P. 56941:58401(1460) ack 1 win 257
++.0 > . 1:1(0) ack 58401 win 65535
++0.0001 < P. 58401:59861(1460) ack 1 win 257
++.0 > . 1:1(0) ack 59861 win 65535
++0.0001 < P. 59861:61321(1460) ack 1 win 257
++.0 > . 1:1(0) ack 61321 win 65535
++0.0001 < P. 61321:62781(1460) ack 1 win 257
++.0 > . 1:1(0) ack 62781 win 65535
++0.0001 < P. 62781:64241(1460) ack 1 win 257
++.0 > . 1:1(0) ack 64241 win 65535
++0.0001 < P. 64241:65701(1460) ack 1 win 257
++.0 > . 1:1(0) ack 65701 win 65535
++0.0001 < P. 65701:67161(1460) ack 1 win 257
++.0 > . 1:1(0) ack 67161 win 65535
+
+// nf_ct_proto_6: SEQ is under the lower bound (already ACKed data retransmitted) IN=tun0 OUT= MAC= SRC=192.0.2.1 DST=192.168.24.72 LEN=1500 TOS=0x00 PREC=0x00 TTL=255 ID=0 PROTO=TCP SPT=34375 DPT=8080 SEQ=1 ACK=4162510439 WINDOW=257 RES=0x00 ACK PSH URGP=0
++0.0001 < P. 1:1461(1460) ack 1 win 257
+
+// only sent if above packet isn't flagged as invalid
++.0 > . 1:1(0) ack 67161 win 65535
+
++0 `$xtables -D INPUT -m conntrack --ctstate INVALID -j DROP`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt
new file mode 100644
index 0000000000..dccdd4c009
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt
@@ -0,0 +1,62 @@
+// check RST packet that doesn't exactly match expected next sequence
+// number still transitions conntrack state to CLOSE iff its already in
+// FIN/CLOSE_WAIT.
+
+`packetdrill/common.sh`
+
+//  5.771921 server_ip > client_ip TLSv1.2 337 [Packet size limited during capture]
+//  5.771994 server_ip > client_ip TLSv1.2 337 [Packet size limited during capture]
+//  5.772212 client_ip > server_ip TCP 66 45020 > 443 [ACK] Seq=1905874048 Ack=781810658 Win=36352 Len=0 TSval=3317842872 TSecr=675936334
+//  5.787924 server_ip > client_ip TLSv1.2 1300 [Packet size limited during capture]
+//  5.788126 server_ip > client_ip TLSv1.2 90 Application Data
+//  5.788207 server_ip > client_ip TCP 66 443 > 45020 [FIN, ACK] Seq=781811916 Ack=1905874048 Win=31104 Len=0 TSval=675936350 TSecr=3317842872
+//  5.788447 client_ip > server_ip TLSv1.2 90 Application Data
+//  5.788479 client_ip > server_ip TCP 66 45020 > 443 [RST, ACK] Seq=1905874072 Ack=781811917 Win=39040 Len=0 TSval=3317842889 TSecr=675936350
+//  5.788581 server_ip > client_ip TCP 54 8443 > 45020 [RST] Seq=781811892 Win=0 Len=0
+
++0 `iptables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `iptables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
++0.1 < S. 1:1(0) ack 1 win 65535 <mss 1460>
+
++0 > . 1:1(0) ack 1 win 65535
++0 < . 1:1001(1000) ack 1 win 65535
++0 < . 1001:2001(1000) ack 1 win 65535
++0 < . 2001:3001(1000) ack 1 win 65535
+
++0 > . 1:1(0) ack 1001 win 65535
++0 > . 1:1(0) ack 2001 win 65535
++0 > . 1:1(0) ack 3001 win 65535
+
++0 write(3, ..., 1000) = 1000
+
++0.0 > P. 1:1001(1000) ack 3001 win 65535
+
++0.1 read(3, ..., 1000) = 1000
+
+// Conntrack should move to FIN_WAIT, then CLOSE_WAIT.
++0 < F. 3001:3001(0) ack 1001 win 65535
++0 >  . 1001:1001(0) ack 3002 win 65535
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE_WAIT`
+
++1 close(3) = 0
+// RST: unread data. FIN was seen, hence ack + 1
++0 > R. 1001:1001(0) ack 3002 win 65535
+// ... and then, CLOSE.
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE\ `
+
+// Spurious RST from peer -- no sk state.  Should NOT get
+// marked INVALID, because conntrack is already closing.
++0.1 < R 2001:2001(0) win 0
+
+// No packets should have been marked INVALID
++0 `iptables -v -S INPUT  | grep INVALID | grep -q -- "-c 0 0"`
++0 `iptables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt
new file mode 100644
index 0000000000..686f18a3d9
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt
@@ -0,0 +1,59 @@
+// check that out of window resets are marked as INVALID and conntrack remains
+// in ESTABLISHED state.
+
+`packetdrill/common.sh`
+
++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
++0.1 < S. 1:1(0) ack 1 win 65535 <mss 1460>
+
++0 > . 1:1(0) ack 1 win 65535
++0 < . 1:1001(1000) ack 1 win 65535
++0 < . 1001:2001(1000) ack 1 win 65535
++0 < . 2001:3001(1000) ack 1 win 65535
+
++0 > . 1:1(0) ack 1001 win 65535
++0 > . 1:1(0) ack 2001 win 65535
++0 > . 1:1(0) ack 3001 win 65535
+
++0 write(3, ..., 1000) = 1000
+
+// out of window
++0.0 < R	0:0(0)	win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
+// out of window
++0.0 < R	1000000:1000000(0)	win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
+// in-window but not exact match
++0.0 < R	42:42(0)	win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
++0.0 > P. 1:1001(1000) ack 3001 win 65535
+
++0.1 read(3, ..., 1000) = 1000
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
++0 < . 3001:3001(0) ack 1001 win 65535
+
++0.0 < R. 3000:3000(0) ack 1001 win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
+// exact next sequence
++0.0 < R. 3001:3001(0) ack 1001 win 0
+// Conntrack should move to CLOSE
+
+// Expect four invalid RSTs
++0 `$xtables -v -S INPUT  | grep INVALID | grep -q -- "-c 4 "`
++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE\ `
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
new file mode 100644
index 0000000000..3442cd29bc
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
@@ -0,0 +1,44 @@
+// Check connection re-use, i.e. peer that receives the SYN answers with
+// a challenge-ACK.
+// Check that conntrack lets all packets pass, including the challenge ack,
+// and that a new connection is established.
+
+`packetdrill/common.sh`
+
+// S  >
+//  . < (challnge-ack)
+// R. >
+// S  >
+// S. <
+// Expected outcome: established connection.
+
++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
+// Challenge ACK, old incarnation.
+0.1 < . 145824453:145824453(0) ack 643160523 win 240 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0>
+
++0.01 > R 643160523:643160523(0) win 0
+
++0.01 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
+
+// Must go through.
++0.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
+// correct synack
++0.1 < S. 0:0(0) ack 1 win 250 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0>
+
+// 3whs completes.
++0.01 > . 1:1(0) ack 1 win 256 <nop,nop,TS val 1 ecr 1>
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ESTABLISHED | grep -q ASSURED`
+
+// No packets should have been marked INVALID
++0 `$xtables -v -S INPUT  | grep INVALID | grep -q -- "-c 0 0"`
++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt
new file mode 100644
index 0000000000..3047160c4b
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt
@@ -0,0 +1,51 @@
+// Check conntrack copes with syn/ack reply for a previous, old incarnation.
+
+// tcpdump with buggy sequence
+// 10.176.25.8.829 > 10.192.171.30.2049: Flags [S], seq 2375731741, win 29200, options [mss 1460,sackOK,TS val 2083107423 ecr 0,nop,wscale 7], length 0
+// OLD synack, for old/previous S
+// 10.192.171.30.2049 > 10.176.25.8.829: Flags [S.], seq 145824453, ack 643160523, win 65535, options [mss 8952,nop,wscale 5,TS val 3215437785 ecr 2082921663,nop,nop], length 0
+// This reset never makes it to the endpoint, elided in the packetdrill script
+// 10.192.171.30.2049 > 10.176.25.8.829: Flags [R.], seq 1, ack 1, win 65535, options [mss 8952,nop,wscale 5,TS val 3215443451 ecr 2082921663,nop,nop], length 0
+// Syn retransmit, no change
+// 10.176.25.8.829 > 10.192.171.30.2049: Flags [S], seq 2375731741, win 29200, options [mss 1460,sackOK,TS val 2083115583 ecr 0,nop,wscale 7], length 0
+// CORRECT synack, should be accepted, but conntrack classified this as INVALID:
+// SEQ is over the upper bound (over the window of the receiver) IN=tun0 OUT= MAC= SRC=192.0.2.1 DST=192.168.37.78 LEN=40 TOS=0x00 PREC=0x00 TTL=255 ID=0 PROTO=TCP SPT=8080 DPT=34500 SEQ=162602411 ACK=2124350315 ..
+// 10.192.171.30.2049 > 10.176.25.8.829: Flags [S.], seq 162602410, ack 2375731742, win 65535, options [mss 8952,nop,wscale 5,TS val 3215445754 ecr 2083115583,nop,nop], length 0
+
+`packetdrill/common.sh`
+
++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
+// bogus/outdated synack, invalid ack value
+0.1 < S. 145824453:145824453(0) ack 643160523 win 240 <mss 1440,nop,nop,TS val 1 ecr 1,nop,wscale 0>
+
+// syn retransmitted
+1.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1015 ecr 0,nop,wscale 8>
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
+
+// correct synack
++0 < S. 145758918:145758918(0) ack 1 win 250 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0>
++0 write(3, ..., 1) = 1
+
+// with buggy conntrack above packet is dropped, so SYN rtx is seen:
+// script packet:  1.054007 . 1:1(0) ack 16777958 win 256 <nop,nop,TS val 1033 ecr 1>
+// actual packet:  3.010000 S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1015 ecr 0,nop,wscale 8>
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ESTABLISHED | grep -q ASSURED`
+
++0 > P. 1:2(1) ack 4294901762 win 256 <nop,nop,TS val 1067 ecr 1>
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ASSURED | grep -q ESTABLISHED`
+
+// No packets should have been marked INVALID in OUTPUT direction, 1 in INPUT
++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
++0 `$xtables -v -S INPUT  | grep INVALID | grep -q -- "-c 1 "`
+
++0 `$xtables -D INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -D OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt
new file mode 100644
index 0000000000..842242f8cc
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt
@@ -0,0 +1,34 @@
+// Check reception of another SYN while we have an established conntrack state.
+// Challenge ACK is supposed to pass through, RST reply should clear conntrack
+// state and SYN retransmit should give us new 'SYN_RECV' connection state.
+
+`packetdrill/common.sh`
+
+// should show a match if bug is present:
++0 `iptables -A INPUT -m conntrack --ctstate INVALID -p tcp --tcp-flags SYN,ACK SYN,ACK`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 10) = 0
+
++0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7, TS val 1 ecr 0,nop,nop>
++0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,TS val 100 ecr 1,nop,wscale 8>
++.01 < . 1:1(0) ack 1 win 257 <TS val 1 ecr 100,nop,nop>
++0 accept(3, ..., ...) = 4
+
++0 < P. 1:101(100) ack 1 win 257 <TS val 2 ecr 100,nop,nop>
++.001 > . 1:1(0) ack 101 win 256 <nop,nop,TS val 110 ecr 2>
++0 read(4, ..., 101) = 100
+
+1.0 < S 2000:2000(0) win 32792 <mss 1000,nop,wscale 7, TS val 233 ecr 0,nop,nop>
+// Won't expect this: challenge ack.
+
++0 > . 1:1(0) ack 101 win 256 <nop,nop,TS val 112 ecr 2>
++0 < R. 101:101(0) ack 1 win 257
++0 close(4) = 0
+
+1.5 < S 2000:2000(0) win 32792 <mss 1000,nop,wscale 0, TS val 233 ecr 0,nop,nop>
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep -q SYN_RECV`
++0 `iptables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"`
diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh
new file mode 100755
index 0000000000..4485fd7675
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/rpath.sh
@@ -0,0 +1,175 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# return code to signal skipped test
+ksft_skip=4
+
+# search for legacy iptables (it uses the xtables extensions
+if iptables-legacy --version >/dev/null 2>&1; then
+	iptables='iptables-legacy'
+elif iptables --version >/dev/null 2>&1; then
+	iptables='iptables'
+else
+	iptables=''
+fi
+
+if ip6tables-legacy --version >/dev/null 2>&1; then
+	ip6tables='ip6tables-legacy'
+elif ip6tables --version >/dev/null 2>&1; then
+	ip6tables='ip6tables'
+else
+	ip6tables=''
+fi
+
+if nft --version >/dev/null 2>&1; then
+	nft='nft'
+else
+	nft=''
+fi
+
+if [ -z "$iptables$ip6tables$nft" ]; then
+	echo "SKIP: Test needs iptables, ip6tables or nft"
+	exit $ksft_skip
+fi
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+trap "ip netns del $ns1; ip netns del $ns2" EXIT
+
+# create two netns, disable rp_filter in ns2 and
+# keep IPv6 address when moving into VRF
+ip netns add "$ns1"
+ip netns add "$ns2"
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0
+ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1
+
+# a standard connection between the netns, should not trigger rp filter
+ip -net "$ns1" link add v0 type veth peer name v0 netns "$ns2"
+ip -net "$ns1" link set v0 up; ip -net "$ns2" link set v0 up
+ip -net "$ns1" a a 192.168.23.2/24 dev v0
+ip -net "$ns2" a a 192.168.23.1/24 dev v0
+ip -net "$ns1" a a fec0:23::2/64 dev v0 nodad
+ip -net "$ns2" a a fec0:23::1/64 dev v0 nodad
+
+# rp filter testing: ns1 sends packets via v0 which ns2 would route back via d0
+ip -net "$ns2" link add d0 type dummy
+ip -net "$ns2" link set d0 up
+ip -net "$ns1" a a 192.168.42.2/24 dev v0
+ip -net "$ns2" a a 192.168.42.1/24 dev d0
+ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad
+ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
+
+# firewall matches to test
+[ -n "$iptables" ] && {
+	common='-t raw -A PREROUTING -s 192.168.0.0/16'
+	if ! ip netns exec "$ns2" "$iptables" $common -m rpfilter;then
+		echo "Cannot add rpfilter rule"
+		exit $ksft_skip
+	fi
+	ip netns exec "$ns2" "$iptables" $common -m rpfilter --invert
+}
+[ -n "$ip6tables" ] && {
+	common='-t raw -A PREROUTING -s fec0::/16'
+	if ! ip netns exec "$ns2" "$ip6tables" $common -m rpfilter;then
+		echo "Cannot add rpfilter rule"
+		exit $ksft_skip
+	fi
+	ip netns exec "$ns2" "$ip6tables" $common -m rpfilter --invert
+}
+[ -n "$nft" ] && ip netns exec "$ns2" $nft -f - <<EOF
+table inet t {
+	chain c {
+		type filter hook prerouting priority raw;
+		ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter
+		ip6 saddr fec0::/16 fib saddr . iif oif exists counter
+	}
+}
+EOF
+
+die() {
+	echo "FAIL: $*"
+	#ip netns exec "$ns2" "$iptables" -t raw -vS
+	#ip netns exec "$ns2" "$ip6tables" -t raw -vS
+	#ip netns exec "$ns2" nft list ruleset
+	exit 1
+}
+
+# check rule counters, return true if rule did not match
+ipt_zero_rule() { # (command)
+	[ -n "$1" ] || return 0
+	ip netns exec "$ns2" "$1" -t raw -vS | grep -q -- "-m rpfilter -c 0 0"
+}
+ipt_zero_reverse_rule() { # (command)
+	[ -n "$1" ] || return 0
+	ip netns exec "$ns2" "$1" -t raw -vS | \
+		grep -q -- "-m rpfilter --invert -c 0 0"
+}
+nft_zero_rule() { # (family)
+	[ -n "$nft" ] || return 0
+	ip netns exec "$ns2" "$nft" list chain inet t c | \
+		grep -q "$1 saddr .* counter packets 0 bytes 0"
+}
+
+netns_ping() { # (netns, args...)
+	local netns="$1"
+	shift
+	ip netns exec "$netns" ping -q -c 1 -W 1 "$@" >/dev/null
+}
+
+clear_counters() {
+	[ -n "$iptables" ] && ip netns exec "$ns2" "$iptables" -t raw -Z
+	[ -n "$ip6tables" ] && ip netns exec "$ns2" "$ip6tables" -t raw -Z
+	if [ -n "$nft" ]; then
+		(
+			echo "delete table inet t";
+			ip netns exec "$ns2" $nft -s list table inet t;
+		) | ip netns exec "$ns2" $nft -f -
+	fi
+}
+
+testrun() {
+	clear_counters
+
+	# test 1: martian traffic should fail rpfilter matches
+	netns_ping "$ns1" -I v0 192.168.42.1 && \
+		die "martian ping 192.168.42.1 succeeded"
+	netns_ping "$ns1" -I v0 fec0:42::1 && \
+		die "martian ping fec0:42::1 succeeded"
+
+	ipt_zero_rule "$iptables" || die "iptables matched martian"
+	ipt_zero_rule "$ip6tables" || die "ip6tables matched martian"
+	ipt_zero_reverse_rule "$iptables" && die "iptables not matched martian"
+	ipt_zero_reverse_rule "$ip6tables" && die "ip6tables not matched martian"
+	nft_zero_rule ip || die "nft IPv4 matched martian"
+	nft_zero_rule ip6 || die "nft IPv6 matched martian"
+
+	clear_counters
+
+	# test 2: rpfilter match should pass for regular traffic
+	netns_ping "$ns1" 192.168.23.1 || \
+		die "regular ping 192.168.23.1 failed"
+	netns_ping "$ns1" fec0:23::1 || \
+		die "regular ping fec0:23::1 failed"
+
+	ipt_zero_rule "$iptables" && die "iptables match not effective"
+	ipt_zero_rule "$ip6tables" && die "ip6tables match not effective"
+	ipt_zero_reverse_rule "$iptables" || die "iptables match over-effective"
+	ipt_zero_reverse_rule "$ip6tables" || die "ip6tables match over-effective"
+	nft_zero_rule ip && die "nft IPv4 match not effective"
+	nft_zero_rule ip6 && die "nft IPv6 match not effective"
+
+}
+
+testrun
+
+# repeat test with vrf device in $ns2
+ip -net "$ns2" link add vrf0 type vrf table 10
+ip -net "$ns2" link set vrf0 up
+ip -net "$ns2" link set v0 master vrf0
+
+testrun
+
+echo "PASS: netfilter reverse path match works as intended"
+exit 0
diff --git a/tools/testing/selftests/net/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c
new file mode 100644
index 0000000000..21bb1cfd8a
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/sctp_collision.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+
+int main(int argc, char *argv[])
+{
+	struct sockaddr_in saddr = {}, daddr = {};
+	int sd, ret, len = sizeof(daddr);
+	struct timeval tv = {25, 0};
+	char buf[] = "hello";
+
+	if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
+		printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n",
+		       argv[0]);
+		return -1;
+	}
+
+	sd = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP);
+	if (sd < 0) {
+		printf("Failed to create sd\n");
+		return -1;
+	}
+
+	saddr.sin_family = AF_INET;
+	saddr.sin_addr.s_addr = inet_addr(argv[2]);
+	saddr.sin_port = htons(atoi(argv[3]));
+
+	ret = bind(sd, (struct sockaddr *)&saddr, sizeof(saddr));
+	if (ret < 0) {
+		printf("Failed to bind to address\n");
+		goto out;
+	}
+
+	ret = listen(sd, 5);
+	if (ret < 0) {
+		printf("Failed to listen on port\n");
+		goto out;
+	}
+
+	daddr.sin_family = AF_INET;
+	daddr.sin_addr.s_addr = inet_addr(argv[4]);
+	daddr.sin_port = htons(atoi(argv[5]));
+
+	/* make test shorter than 25s */
+	ret = setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+	if (ret < 0) {
+		printf("Failed to setsockopt SO_RCVTIMEO\n");
+		goto out;
+	}
+
+	if (!strcmp(argv[1], "server")) {
+		sleep(1); /* wait a bit for client's INIT */
+		ret = connect(sd, (struct sockaddr *)&daddr, len);
+		if (ret < 0) {
+			printf("Failed to connect to peer\n");
+			goto out;
+		}
+		ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len);
+		if (ret < 0) {
+			printf("Failed to recv msg %d\n", ret);
+			goto out;
+		}
+		ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len);
+		if (ret < 0) {
+			printf("Failed to send msg %d\n", ret);
+			goto out;
+		}
+		printf("Server: sent! %d\n", ret);
+	}
+
+	if (!strcmp(argv[1], "client")) {
+		usleep(300000); /* wait a bit for server's listening */
+		ret = connect(sd, (struct sockaddr *)&daddr, len);
+		if (ret < 0) {
+			printf("Failed to connect to peer\n");
+			goto out;
+		}
+		sleep(1); /* wait a bit for server's delayed INIT_ACK to reproduce the issue */
+		ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len);
+		if (ret < 0) {
+			printf("Failed to send msg %d\n", ret);
+			goto out;
+		}
+		ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len);
+		if (ret < 0) {
+			printf("Failed to recv msg %d\n", ret);
+			goto out;
+		}
+		printf("Client: rcvd! %d\n", ret);
+	}
+	ret = 0;
+out:
+	close(sd);
+	return ret;
+}
diff --git a/tools/testing/selftests/net/netfilter/settings b/tools/testing/selftests/net/netfilter/settings
new file mode 100644
index 0000000000..abc5648b59
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/settings
@@ -0,0 +1 @@
+timeout=1800
diff --git a/tools/testing/selftests/net/netfilter/xt_string.sh b/tools/testing/selftests/net/netfilter/xt_string.sh
new file mode 100755
index 0000000000..8d401c69e3
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/xt_string.sh
@@ -0,0 +1,133 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# return code to signal skipped test
+ksft_skip=4
+rc=0
+
+source lib.sh
+
+checktool "socat -h" "run test without socat"
+checktool "iptables --version" "test needs iptables"
+
+infile=$(mktemp)
+
+cleanup()
+{
+	ip netns del "$netns"
+	rm -f "$infile"
+}
+
+trap cleanup EXIT
+
+setup_ns netns
+
+ip -net "$netns" link add d0 type dummy
+ip -net "$netns" link set d0 up
+ip -net "$netns" addr add 10.1.2.1/24 dev d0
+
+pattern="foo bar baz"
+patlen=11
+hdrlen=$((20 + 8)) # IPv4 + UDP
+
+#ip netns exec "$netns" tcpdump -npXi d0 &
+#tcpdump_pid=$!
+#trap 'kill $tcpdump_pid; ip netns del $netns' EXIT
+
+add_rule() { # (alg, from, to)
+	ip netns exec "$netns" \
+		iptables -A OUTPUT -o d0 -m string \
+			--string "$pattern" --algo "$1" --from "$2" --to "$3"
+}
+showrules() { # ()
+	ip netns exec "$netns" iptables -v -S OUTPUT | grep '^-A'
+}
+zerorules() {
+	ip netns exec "$netns" iptables -Z OUTPUT
+}
+countrule() { # (pattern)
+	showrules | grep -c -- "$*"
+}
+send() { # (offset)
+	( for ((i = 0; i < $1 - hdrlen; i++)); do
+		echo -n " "
+	  done
+	  echo -n "$pattern"
+	) > "$infile"
+
+	ip netns exec "$netns" socat -t 1 -u STDIN UDP-SENDTO:10.1.2.2:27374 < "$infile"
+}
+
+add_rule bm 1000 1500
+add_rule bm 1400 1600
+add_rule kmp 1000 1500
+add_rule kmp 1400 1600
+
+zerorules
+send 0
+send $((1000 - patlen))
+if [ "$(countrule -c 0 0)" -ne 4 ]; then
+	echo "FAIL: rules match data before --from"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send 1000
+send $((1400 - patlen))
+if [ "$(countrule -c 2)" -ne 2 ]; then
+	echo "FAIL: only two rules should match at low offset"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send $((1500 - patlen))
+if [ "$(countrule -c 1)" -ne 4 ]; then
+	echo "FAIL: all rules should match at end of packet"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send 1495
+if [ "$(countrule -c 1)" -ne 1 ]; then
+	echo "FAIL: only kmp with proper --to should match pattern spanning fragments"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send 1500
+if [ "$(countrule -c 1)" -ne 2 ]; then
+	echo "FAIL: two rules should match pattern at start of second fragment"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send $((1600 - patlen))
+if [ "$(countrule -c 1)" -ne 2 ]; then
+	echo "FAIL: two rules should match pattern at end of largest --to"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send $((1600 - patlen + 1))
+if [ "$(countrule -c 1)" -ne 0 ]; then
+	echo "FAIL: no rules should match pattern extending largest --to"
+	showrules
+	((rc--))
+fi
+
+zerorules
+send 1600
+if [ "$(countrule -c 1)" -ne 0 ]; then
+	echo "FAIL: no rule should match pattern past largest --to"
+	showrules
+	((rc--))
+fi
+
+[ $rc -eq 0 ] && echo "PASS: string match tests"
+exit $rc