Adding upstream version 5.10.209.upstream/5.10.209

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 10:05:51 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-27 10:05:51 +0000
commit: 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch)
tree: a94efe259b9009378be6d90eb30d2b019d95c194 /tools/testing/selftests/drivers/net/mlxsw
parent: Initial commit. (diff)
download: linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.tar.xz
linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.zip
58 files changed, 13948 insertions, 0 deletions
diff --git a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh
new file mode 100755
index 000000000..bdffe698e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that blackhole routes are marked as offloaded and that packets hitting
+# them are dropped by the ASIC and not by the kernel.
+#
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    | 2001:db8:1::1/64           |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# |    |  default via 2001:db8:1::2 |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# |    + $rp1                                                                 |
+# |        192.0.2.2/24                                                       |
+# |        2001:db8:1::2/64                                                   |
+# |                                                                           |
+# |        2001:db8:2::2/64                                                   |
+# |        198.51.100.2/24                                                    |
+# |    + $rp2                                                                 |
+# |    |                                                                      |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------+
+# |    |  default via 198.51.100.2  |
+# |    |  default via 2001:db8:2::2 |
+# |    |                            |
+# |    | 2001:db8:2::1/64           |
+# |    | 198.51.100.1/24            |
+# |    + $h2                        |
+# | H2 (vrf)                        |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	blackhole_ipv4
+	blackhole_ipv6
+"
+NUM_NETIFS=4
+: ${TIMEOUT:=20000} # ms
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+	ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	tc qdisc add dev $rp1 clsact
+
+	__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+	__addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+	__addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64
+	__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
+
+	tc qdisc del dev $rp1 clsact
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.1 ": h1->h2"
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::1 ": h1->h2"
+}
+
+blackhole_ipv4()
+{
+	# Transmit packets from H1 to H2 and make sure they are dropped by the
+	# ASIC and not by the kernel
+	RET=0
+
+	ip -4 route add blackhole 198.51.100.0/30
+	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_hw dst_ip 198.51.100.1 src_ip 192.0.2.1 ip_proto icmp \
+		action pass
+
+	busywait "$TIMEOUT" wait_for_offload ip -4 route show 198.51.100.0/30
+	check_err $? "route not marked as offloaded when should"
+
+	ping_do $h1 198.51.100.1
+	check_fail $? "ping passed when should not"
+
+	tc_check_packets "dev $rp1 ingress" 101 0
+	check_err $? "packets trapped and not dropped by ASIC"
+
+	log_test "IPv4 blackhole route"
+
+	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+	ip -4 route del blackhole 198.51.100.0/30
+}
+
+blackhole_ipv6()
+{
+	RET=0
+
+	ip -6 route add blackhole 2001:db8:2::/120
+	tc filter add dev $rp1 ingress protocol ipv6 pref 1 handle 101 flower \
+		skip_hw dst_ip 2001:db8:2::1 src_ip 2001:db8:1::1 \
+		ip_proto icmpv6 action pass
+
+	busywait "$TIMEOUT" wait_for_offload ip -6 route show 2001:db8:2::/120
+	check_err $? "route not marked as offloaded when should"
+
+	ping6_do $h1 2001:db8:2::1
+	check_fail $? "ping passed when should not"
+
+	tc_check_packets "dev $rp1 ingress" 101 0
+	check_err $? "packets trapped and not dropped by ASIC"
+
+	log_test "IPv6 blackhole route"
+
+	tc filter del dev $rp1 ingress protocol ipv6 pref 1 handle 101 flower
+	ip -6 route del blackhole 2001:db8:2::/120
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	router_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh
new file mode 100755
index 000000000..89b55e946
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test generic devlink-trap functionality over mlxsw. These tests are not
+# specific to a single trap, but do not check the devlink-trap common
+# infrastructure either.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	dev_del_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+dev_del_test()
+{
+	local trap_name="source_mac_is_multicast"
+	local smac=01:02:03:04:05:06
+	local num_iter=5
+	local mz_pid
+	local i
+
+	$MZ $h1 -c 0 -p 100 -a $smac -b bcast -t ip -q &
+	mz_pid=$!
+
+	# The purpose of this test is to make sure we correctly dismantle a
+	# port while packets are trapped from it. This is done by reloading the
+	# the driver while the 'ingress_smac_mc_drop' trap is triggered.
+	RET=0
+
+	for i in $(seq 1 $num_iter); do
+		log_info "Iteration $i / $num_iter"
+
+		devlink_trap_action_set $trap_name "trap"
+		sleep 1
+
+		devlink_reload
+		# Allow netdevices to be re-created following the reload
+		sleep 20
+
+		cleanup
+		setup_prepare
+		setup_wait
+	done
+
+	log_test "Device delete"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
new file mode 100755
index 000000000..b32ba5fec
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap ACL drops functionality over mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	ingress_flow_action_drop_test
+	egress_flow_action_drop_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ingress_flow_action_drop_test()
+{
+	local mz_pid
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower src_mac $h1mac action pass
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+		flower dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -d 1msec -q &
+	mz_pid=$!
+
+	RET=0
+
+	devlink_trap_drop_test ingress_flow_action_drop $swp2 101
+
+	log_test "ingress_flow_action_drop"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+}
+
+egress_flow_action_drop_test()
+{
+	local mz_pid
+
+	tc filter add dev $swp2 egress protocol ip pref 2 handle 102 \
+		flower src_mac $h1mac action pass
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -d 1msec -q &
+	mz_pid=$!
+
+	RET=0
+
+	devlink_trap_drop_test egress_flow_action_drop $swp2 102
+
+	log_test "egress_flow_action_drop"
+
+	tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 2 102
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
new file mode 100755
index 000000000..a37273473
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
@@ -0,0 +1,688 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap control trap functionality over mlxsw. Each registered
+# control packet trap is tested to make sure it is triggered under the right
+# conditions.
+#
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    | 2001:db8:1::1/64           |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# |    |  default via 2001:db8:1::2 |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# |    + $rp1                                                                 |
+# |        192.0.2.2/24                                                       |
+# |        2001:db8:1::2/64                                                   |
+# |                                                                           |
+# |        2001:db8:2::2/64                                                   |
+# |        198.51.100.2/24                                                    |
+# |    + $rp2                                                                 |
+# |    |                                                                      |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------+
+# |    |  default via 198.51.100.2  |
+# |    |  default via 2001:db8:2::2 |
+# |    |                            |
+# |    | 2001:db8:2::1/64           |
+# |    | 198.51.100.1/24            |
+# |    + $h2                        |
+# | H2 (vrf)                        |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	stp_test
+	lacp_test
+	lldp_test
+	igmp_query_test
+	igmp_v1_report_test
+	igmp_v2_report_test
+	igmp_v3_report_test
+	igmp_v2_leave_test
+	mld_query_test
+	mld_v1_report_test
+	mld_v2_report_test
+	mld_v1_done_test
+	ipv4_dhcp_test
+	ipv6_dhcp_test
+	arp_request_test
+	arp_response_test
+	ipv6_neigh_solicit_test
+	ipv6_neigh_advert_test
+	ipv4_bfd_test
+	ipv6_bfd_test
+	ipv4_ospf_test
+	ipv6_ospf_test
+	ipv4_bgp_test
+	ipv6_bgp_test
+	ipv4_vrrp_test
+	ipv6_vrrp_test
+	ipv4_pim_test
+	ipv6_pim_test
+	uc_loopback_test
+	local_route_test
+	external_route_test
+	ipv6_uc_dip_link_local_scope_test
+	ipv4_router_alert_test
+	ipv6_router_alert_test
+	ipv6_dip_all_nodes_test
+	ipv6_dip_all_routers_test
+	ipv6_router_solicit_test
+	ipv6_router_advert_test
+	ipv6_redirect_test
+	ptp_event_test
+	ptp_general_test
+	flow_action_sample_test
+	flow_action_trap_test
+"
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+	ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+	__addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+	__addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64
+	__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	router_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+stp_test()
+{
+	devlink_trap_stats_test "STP" "stp" $MZ $h1 -c 1 -t bpdu -q
+}
+
+lacp_payload_get()
+{
+	local source_mac=$1; shift
+	local p
+
+	p=$(:
+		)"01:80:C2:00:00:02:"$(       : ETH daddr
+		)"$source_mac:"$(             : ETH saddr
+		)"88:09:"$(                   : ETH type
+		)
+	echo $p
+}
+
+lacp_test()
+{
+	local h1mac=$(mac_get $h1)
+
+	devlink_trap_stats_test "LACP" "lacp" $MZ $h1 -c 1 \
+		$(lacp_payload_get $h1mac) -p 100 -q
+}
+
+lldp_payload_get()
+{
+	local source_mac=$1; shift
+	local p
+
+	p=$(:
+		)"01:80:C2:00:00:0E:"$(       : ETH daddr
+		)"$source_mac:"$(             : ETH saddr
+		)"88:CC:"$(                   : ETH type
+		)
+	echo $p
+}
+
+lldp_test()
+{
+	local h1mac=$(mac_get $h1)
+
+	devlink_trap_stats_test "LLDP" "lldp" $MZ $h1 -c 1 \
+		$(lldp_payload_get $h1mac) -p 100 -q
+}
+
+igmp_query_test()
+{
+	# IGMP (IP Protocol 2) Membership Query (Type 0x11)
+	devlink_trap_stats_test "IGMP Membership Query" "igmp_query" \
+		$MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 224.0.0.1 -t ip proto=2,p=11 -p 100 -q
+}
+
+igmp_v1_report_test()
+{
+	# IGMP (IP Protocol 2) Version 1 Membership Report (Type 0x12)
+	devlink_trap_stats_test "IGMP Version 1 Membership Report" \
+		"igmp_v1_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=12 -p 100 -q
+}
+
+igmp_v2_report_test()
+{
+	# IGMP (IP Protocol 2) Version 2 Membership Report (Type 0x16)
+	devlink_trap_stats_test "IGMP Version 2 Membership Report" \
+		"igmp_v2_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=16 -p 100 -q
+}
+
+igmp_v3_report_test()
+{
+	# IGMP (IP Protocol 2) Version 3 Membership Report (Type 0x22)
+	devlink_trap_stats_test "IGMP Version 3 Membership Report" \
+		"igmp_v3_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=22 -p 100 -q
+}
+
+igmp_v2_leave_test()
+{
+	# IGMP (IP Protocol 2) Version 2 Leave Group (Type 0x17)
+	devlink_trap_stats_test "IGMP Version 2 Leave Group" \
+		"igmp_v2_leave" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:02 \
+		-A 192.0.2.1 -B 224.0.0.2 -t ip proto=2,p=17 -p 100 -q
+}
+
+mld_payload_get()
+{
+	local type=$1; shift
+	local p
+
+	type=$(printf "%x" $type)
+	p=$(:
+		)"3A:"$(			: Next Header - ICMPv6
+		)"00:"$(			: Hdr Ext Len
+		)"00:00:00:00:00:00:"$(		: Options and Padding
+		)"$type:"$(			: ICMPv6.type
+		)"00:"$(			: ICMPv6.code
+		)"00:"$(			: ICMPv6.checksum
+		)
+	echo $p
+}
+
+mld_query_test()
+{
+	# MLD Multicast Listener Query (Type 130)
+	devlink_trap_stats_test "MLD Multicast Listener Query" "mld_query" \
+		$MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 130) -p 100 -q
+}
+
+mld_v1_report_test()
+{
+	# MLD Version 1 Multicast Listener Report (Type 131)
+	devlink_trap_stats_test "MLD Version 1 Multicast Listener Report" \
+		"mld_v1_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 131) -p 100 -q
+}
+
+mld_v2_report_test()
+{
+	# MLD Version 2 Multicast Listener Report (Type 143)
+	devlink_trap_stats_test "MLD Version 2 Multicast Listener Report" \
+		"mld_v2_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 143) -p 100 -q
+}
+
+mld_v1_done_test()
+{
+	# MLD Version 1 Multicast Listener Done (Type 132)
+	devlink_trap_stats_test "MLD Version 1 Multicast Listener Done" \
+		"mld_v1_done" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 132) -p 100 -q
+}
+
+ipv4_dhcp_test()
+{
+	devlink_trap_stats_test "IPv4 DHCP Port 67" "ipv4_dhcp" \
+		$MZ $h1 -c 1 -a own -b bcast -A 0.0.0.0 -B 255.255.255.255 \
+		-t udp sp=68,dp=67 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 DHCP Port 68" "ipv4_dhcp" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) -A 192.0.2.1 \
+		-B 255.255.255.255 -t udp sp=67,dp=68 -p 100 -q
+}
+
+ipv6_dhcp_test()
+{
+	devlink_trap_stats_test "IPv6 DHCP Port 547" "ipv6_dhcp" \
+		$MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=546,dp=547 \
+		-p 100 -q
+
+	devlink_trap_stats_test "IPv6 DHCP Port 546" "ipv6_dhcp" \
+		$MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=547,dp=546 \
+		-p 100 -q
+}
+
+arp_request_test()
+{
+	devlink_trap_stats_test "ARP Request" "arp_request" \
+		$MZ $h1 -c 1 -a own -b bcast -t arp request -p 100 -q
+}
+
+arp_response_test()
+{
+	devlink_trap_stats_test "ARP Response" "arp_response" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) -t arp reply -p 100 -q
+}
+
+icmpv6_header_get()
+{
+	local type=$1; shift
+	local p
+
+	type=$(printf "%x" $type)
+	p=$(:
+		)"$type:"$(			: ICMPv6.type
+		)"00:"$(			: ICMPv6.code
+		)"00:"$(			: ICMPv6.checksum
+		)
+	echo $p
+}
+
+ipv6_neigh_solicit_test()
+{
+	devlink_trap_stats_test "IPv6 Neighbour Solicitation" \
+		"ipv6_neigh_solicit" $MZ $h1 -6 -c 1 \
+		-A fe80::1 -B ff02::1:ff00:02 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 135) -p 100 -q
+}
+
+ipv6_neigh_advert_test()
+{
+	devlink_trap_stats_test "IPv6 Neighbour Advertisement" \
+		"ipv6_neigh_advert" $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B 2001:db8:1::2 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 136) -p 100 -q
+}
+
+ipv4_bfd_test()
+{
+	devlink_trap_stats_test "IPv4 BFD Control - Port 3784" "ipv4_bfd" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3784 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 BFD Echo - Port 3785" "ipv4_bfd" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3785 -p 100 -q
+}
+
+ipv6_bfd_test()
+{
+	devlink_trap_stats_test "IPv6 BFD Control - Port 3784" "ipv6_bfd" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 \
+		-t udp sp=49153,dp=3784 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 BFD Echo - Port 3785" "ipv6_bfd" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 \
+		-t udp sp=49153,dp=3785 -p 100 -q
+}
+
+ipv4_ospf_test()
+{
+	devlink_trap_stats_test "IPv4 OSPF - Multicast" "ipv4_ospf" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:00:05 \
+		-A 192.0.2.1 -B 224.0.0.5 -t ip proto=89 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 OSPF - Unicast" "ipv4_ospf" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t ip proto=89 -p 100 -q
+}
+
+ipv6_ospf_test()
+{
+	devlink_trap_stats_test "IPv6 OSPF - Multicast" "ipv6_ospf" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:05 \
+		-A fe80::1 -B ff02::5 -t ip next=89 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 OSPF - Unicast" "ipv6_ospf" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 -t ip next=89 -p 100 -q
+}
+
+ipv4_bgp_test()
+{
+	devlink_trap_stats_test "IPv4 BGP" "ipv4_bgp" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t tcp sp=54321,dp=179,flags=rst \
+		-p 100 -q
+}
+
+ipv6_bgp_test()
+{
+	devlink_trap_stats_test "IPv6 BGP" "ipv6_bgp" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 \
+		-t tcp sp=54321,dp=179,flags=rst -p 100 -q
+}
+
+ipv4_vrrp_test()
+{
+	devlink_trap_stats_test "IPv4 VRRP" "ipv4_vrrp" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:00:12 \
+		-A 192.0.2.1 -B 224.0.0.18 -t ip proto=112 -p 100 -q
+}
+
+ipv6_vrrp_test()
+{
+	devlink_trap_stats_test "IPv6 VRRP" "ipv6_vrrp" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:12 \
+		-A fe80::1 -B ff02::12 -t ip next=112 -p 100 -q
+}
+
+ipv4_pim_test()
+{
+	devlink_trap_stats_test "IPv4 PIM - Multicast" "ipv4_pim" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:00:0d \
+		-A 192.0.2.1 -B 224.0.0.13 -t ip proto=103 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 PIM - Unicast" "ipv4_pim" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t ip proto=103 -p 100 -q
+}
+
+ipv6_pim_test()
+{
+	devlink_trap_stats_test "IPv6 PIM - Multicast" "ipv6_pim" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:0d \
+		-A fe80::1 -B ff02::d -t ip next=103 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 PIM - Unicast" "ipv6_pim" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B 2001:db8:1::2 -t ip next=103 -p 100 -q
+}
+
+uc_loopback_test()
+{
+	# Add neighbours to the fake destination IPs, so that the packets are
+	# routed in the device and not trapped due to an unresolved neighbour
+	# exception.
+	ip -4 neigh add 192.0.2.3 lladdr 00:11:22:33:44:55 nud permanent \
+		dev $rp1
+	ip -6 neigh add 2001:db8:1::3 lladdr 00:11:22:33:44:55 nud permanent \
+		dev $rp1
+
+	devlink_trap_stats_test "IPv4 Unicast Loopback" "uc_loopback" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.3 -t udp sp=54321,dp=12345 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 Unicast Loopback" "uc_loopback" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::3 -t udp sp=54321,dp=12345 \
+		-p 100 -q
+
+	ip -6 neigh del 2001:db8:1::3 dev $rp1
+	ip -4 neigh del 192.0.2.3 dev $rp1
+}
+
+local_route_test()
+{
+	# Use a fake source IP to prevent the trap from being triggered twice
+	# when the router sends back a port unreachable message.
+	devlink_trap_stats_test "IPv4 Local Route" "local_route" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.3 -B 192.0.2.2 -t udp sp=54321,dp=12345 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 Local Route" "local_route" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::3 -B 2001:db8:1::2 -t udp sp=54321,sp=12345 \
+		-p 100 -q
+}
+
+external_route_test()
+{
+	# Add a dummy device through which the incoming packets should be
+	# routed.
+	ip link add name dummy10 up type dummy
+	ip address add 203.0.113.1/24 dev dummy10
+	ip -6 address add 2001:db8:10::1/64 dev dummy10
+
+	devlink_trap_stats_test "IPv4 External Route" "external_route" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 203.0.113.2 -t udp sp=54321,dp=12345 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 External Route" "external_route" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:10::2 -t udp sp=54321,sp=12345 \
+		-p 100 -q
+
+	ip -6 address del 2001:db8:10::1/64 dev dummy10
+	ip address del 203.0.113.1/24 dev dummy10
+	ip link del dev dummy10
+}
+
+ipv6_uc_dip_link_local_scope_test()
+{
+	# Add a dummy link-local prefix route to allow the packet to be routed.
+	ip -6 route add fe80:1::/64 dev $rp2
+
+	devlink_trap_stats_test \
+		"IPv6 Unicast Destination IP With Link-Local Scope" \
+		"ipv6_uc_dip_link_local_scope" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B fe80:1::2 -t udp sp=54321,sp=12345 \
+		-p 100 -q
+
+	ip -6 route del fe80:1::/64 dev $rp2
+}
+
+ipv4_router_alert_get()
+{
+	local p
+
+	# https://en.wikipedia.org/wiki/IPv4#Options
+	p=$(:
+		)"94:"$(			: Option Number
+		)"04:"$(			: Option Length
+		)"00:00:"$(			: Option Data
+		)
+	echo $p
+}
+
+ipv4_router_alert_test()
+{
+	devlink_trap_stats_test "IPv4 Router Alert" "ipv4_router_alert" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 198.51.100.3 \
+		-t ip option=$(ipv4_router_alert_get) -p 100 -q
+}
+
+ipv6_router_alert_get()
+{
+	local p
+
+	# https://en.wikipedia.org/wiki/IPv6_packet#Hop-by-hop_options_and_destination_options
+	# https://tools.ietf.org/html/rfc2711#section-2.1
+	p=$(:
+		)"11:"$(			: Next Header - UDP
+		)"00:"$(			: Hdr Ext Len
+		)"05:02:00:00:00:00:"$(		: Option Data
+		)
+	echo $p
+}
+
+ipv6_router_alert_test()
+{
+	devlink_trap_stats_test "IPv6 Router Alert" "ipv6_router_alert" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::3 \
+		-t ip next=0,payload=$(ipv6_router_alert_get) -p 100 -q
+}
+
+ipv6_dip_all_nodes_test()
+{
+	devlink_trap_stats_test "IPv6 Destination IP \"All Nodes Address\"" \
+		"ipv6_dip_all_nodes" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \
+		-A 2001:db8:1::1 -B ff02::1 -t udp sp=12345,dp=54321 -p 100 -q
+}
+
+ipv6_dip_all_routers_test()
+{
+	devlink_trap_stats_test "IPv6 Destination IP \"All Routers Address\"" \
+		"ipv6_dip_all_routers" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \
+		-A 2001:db8:1::1 -B ff02::2 -t udp sp=12345,dp=54321 -p 100 -q
+}
+
+ipv6_router_solicit_test()
+{
+	devlink_trap_stats_test "IPv6 Router Solicitation" \
+		"ipv6_router_solicit" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \
+		-A fe80::1 -B ff02::2 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 133) -p 100 -q
+}
+
+ipv6_router_advert_test()
+{
+	devlink_trap_stats_test "IPv6 Router Advertisement" \
+		"ipv6_router_advert" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \
+		-A fe80::1 -B ff02::1 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 134) -p 100 -q
+}
+
+ipv6_redirect_test()
+{
+	devlink_trap_stats_test "IPv6 Redirect Message" \
+		"ipv6_redirect" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B 2001:db8:1::2 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 137) -p 100 -q
+}
+
+ptp_event_test()
+{
+	# PTP is only supported on Spectrum-1, for now.
+	[[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+
+	# PTP Sync (0)
+	devlink_trap_stats_test "PTP Time-Critical Event Message" "ptp_event" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \
+		-A 192.0.2.1 -B 224.0.1.129 \
+		-t udp sp=12345,dp=319,payload=10 -p 100 -q
+}
+
+ptp_general_test()
+{
+	# PTP is only supported on Spectrum-1, for now.
+	[[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+
+	# PTP Announce (b)
+	devlink_trap_stats_test "PTP General Message" "ptp_general" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \
+		-A 192.0.2.1 -B 224.0.1.129 \
+		-t udp sp=12345,dp=320,payload=1b -p 100 -q
+}
+
+flow_action_sample_test()
+{
+	# Install a filter that samples every incoming packet.
+	tc qdisc add dev $rp1 clsact
+	tc filter add dev $rp1 ingress proto all pref 1 handle 101 matchall \
+		skip_sw action sample rate 1 group 1
+
+	devlink_trap_stats_test "Flow Sampling" "flow_action_sample" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q
+
+	tc filter del dev $rp1 ingress proto all pref 1 handle 101 matchall
+	tc qdisc del dev $rp1 clsact
+}
+
+flow_action_trap_test()
+{
+	# Install a filter that traps a specific flow.
+	tc qdisc add dev $rp1 clsact
+	tc filter add dev $rp1 ingress proto ip pref 1 handle 101 flower \
+		skip_sw ip_proto udp src_port 12345 dst_port 54321 action trap
+
+	devlink_trap_stats_test "Flow Trapping (Logging)" "flow_action_trap" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q
+
+	tc filter del dev $rp1 ingress proto ip pref 1 handle 101 flower
+	tc qdisc del dev $rp1 clsact
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
new file mode 100755
index 000000000..a4c2812e9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
@@ -0,0 +1,430 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap L2 drops functionality over mlxsw. Each registered L2 drop
+# packet trap is tested to make sure it is triggered under the right
+# conditions.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	source_mac_is_multicast_test
+	vlan_tag_mismatch_test
+	ingress_vlan_filter_test
+	ingress_stp_filter_test
+	port_list_is_empty_test
+	port_loopback_filter_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+source_mac_is_multicast_test()
+{
+	local trap_name="source_mac_is_multicast"
+	local smac=01:02:03:04:05:06
+	local mz_pid
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower src_mac $smac action drop
+
+	$MZ $h1 -c 0 -p 100 -a $smac -b bcast -t ip -d 1msec -q &
+	mz_pid=$!
+
+	RET=0
+
+	devlink_trap_drop_test $trap_name $swp2 101
+
+	log_test "Source MAC is multicast"
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+}
+
+__vlan_tag_mismatch_test()
+{
+	local trap_name="vlan_tag_mismatch"
+	local dmac=de:ad:be:ef:13:37
+	local opt=$1; shift
+	local mz_pid
+
+	# Remove PVID flag. This should prevent untagged and prio-tagged
+	# packets from entering the bridge.
+	bridge vlan add vid 1 dev $swp1 untagged master
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower dst_mac $dmac action drop
+
+	$MZ $h1 "$opt" -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $swp2 101
+
+	# Add PVID and make sure packets are no longer dropped.
+	bridge vlan add vid 1 dev $swp1 pvid untagged master
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_idle_test $trap_name
+	check_err $? "Trap stats not idle when packets should not be dropped"
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
+	check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_fail $? "Packets not forwarded when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+}
+
+vlan_tag_mismatch_untagged_test()
+{
+	RET=0
+
+	__vlan_tag_mismatch_test
+
+	log_test "VLAN tag mismatch - untagged packets"
+}
+
+vlan_tag_mismatch_vid_0_test()
+{
+	RET=0
+
+	__vlan_tag_mismatch_test "-Q 0"
+
+	log_test "VLAN tag mismatch - prio-tagged packets"
+}
+
+vlan_tag_mismatch_test()
+{
+	vlan_tag_mismatch_untagged_test
+	vlan_tag_mismatch_vid_0_test
+}
+
+ingress_vlan_filter_test()
+{
+	local trap_name="ingress_vlan_filter"
+	local dmac=de:ad:be:ef:13:37
+	local mz_pid
+	local vid=10
+
+	bridge vlan add vid $vid dev $swp2 master
+
+	RET=0
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower dst_mac $dmac action drop
+
+	$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $swp2 101
+
+	# Add the VLAN on the bridge port and make sure packets are no longer
+	# dropped.
+	bridge vlan add vid $vid dev $swp1 master
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_idle_test $trap_name
+	check_err $? "Trap stats not idle when packets should not be dropped"
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
+	check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_fail $? "Packets not forwarded when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	log_test "Ingress VLAN filter"
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+
+	bridge vlan del vid $vid dev $swp1 master
+	bridge vlan del vid $vid dev $swp2 master
+}
+
+__ingress_stp_filter_test()
+{
+	local trap_name="ingress_spanning_tree_filter"
+	local dmac=de:ad:be:ef:13:37
+	local state=$1; shift
+	local mz_pid
+	local vid=20
+
+	bridge vlan add vid $vid dev $swp2 master
+	bridge vlan add vid $vid dev $swp1 master
+	ip link set dev $swp1 type bridge_slave state $state
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower dst_mac $dmac action drop
+
+	$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $swp2 101
+
+	# Change STP state to forwarding and make sure packets are no longer
+	# dropped.
+	ip link set dev $swp1 type bridge_slave state 3
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_idle_test $trap_name
+	check_err $? "Trap stats not idle when packets should not be dropped"
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
+	check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_fail $? "Packets not forwarded when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+
+	bridge vlan del vid $vid dev $swp1 master
+	bridge vlan del vid $vid dev $swp2 master
+}
+
+ingress_stp_filter_listening_test()
+{
+	local state=$1; shift
+
+	RET=0
+
+	__ingress_stp_filter_test $state
+
+	log_test "Ingress STP filter - listening state"
+}
+
+ingress_stp_filter_learning_test()
+{
+	local state=$1; shift
+
+	RET=0
+
+	__ingress_stp_filter_test $state
+
+	log_test "Ingress STP filter - learning state"
+}
+
+ingress_stp_filter_test()
+{
+	ingress_stp_filter_listening_test 1
+	ingress_stp_filter_learning_test 2
+}
+
+port_list_is_empty_uc_test()
+{
+	local trap_name="port_list_is_empty"
+	local dmac=de:ad:be:ef:13:37
+	local mz_pid
+
+	# Disable unicast flooding on both ports, so that packets cannot egress
+	# any port.
+	ip link set dev $swp1 type bridge_slave flood off
+	ip link set dev $swp2 type bridge_slave flood off
+
+	RET=0
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower dst_mac $dmac action drop
+
+	$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $swp2 101
+
+	# Allow packets to be flooded to one port.
+	ip link set dev $swp2 type bridge_slave flood on
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_idle_test $trap_name
+	check_err $? "Trap stats not idle when packets should not be dropped"
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
+	check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_fail $? "Packets not forwarded when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	log_test "Port list is empty - unicast"
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+
+	ip link set dev $swp1 type bridge_slave flood on
+}
+
+port_list_is_empty_mc_test()
+{
+	local trap_name="port_list_is_empty"
+	local dmac=01:00:5e:00:00:01
+	local dip=239.0.0.1
+	local mz_pid
+
+	# Disable multicast flooding on both ports, so that packets cannot
+	# egress any port. We also need to flush IP addresses from the bridge
+	# in order to prevent packets from being flooded to the router port.
+	ip link set dev $swp1 type bridge_slave mcast_flood off
+	ip link set dev $swp2 type bridge_slave mcast_flood off
+	ip address flush dev br0
+
+	RET=0
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower dst_mac $dmac action drop
+
+	$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $swp2 101
+
+	# Allow packets to be flooded to one port.
+	ip link set dev $swp2 type bridge_slave mcast_flood on
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_idle_test $trap_name
+	check_err $? "Trap stats not idle when packets should not be dropped"
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
+	check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_fail $? "Packets not forwarded when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	log_test "Port list is empty - multicast"
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+
+	ip link set dev $swp1 type bridge_slave mcast_flood on
+}
+
+port_list_is_empty_test()
+{
+	port_list_is_empty_uc_test
+	port_list_is_empty_mc_test
+}
+
+port_loopback_filter_uc_test()
+{
+	local trap_name="port_loopback_filter"
+	local dmac=de:ad:be:ef:13:37
+	local mz_pid
+
+	# Make sure packets can only egress the input port.
+	ip link set dev $swp2 type bridge_slave flood off
+
+	RET=0
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+		flower dst_mac $dmac action drop
+
+	$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $swp2 101
+
+	# Allow packets to be flooded.
+	ip link set dev $swp2 type bridge_slave flood on
+	devlink_trap_action_set $trap_name "trap"
+
+	devlink_trap_stats_idle_test $trap_name
+	check_err $? "Trap stats not idle when packets should not be dropped"
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
+	check_err $? "Trap group stats not idle with when packets should not be dropped"
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_fail $? "Packets not forwarded when should"
+
+	devlink_trap_action_set $trap_name "drop"
+
+	log_test "Port loopback filter - unicast"
+
+	devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+}
+
+port_loopback_filter_test()
+{
+	port_loopback_filter_uc_test
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
new file mode 100755
index 000000000..269b26806
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -0,0 +1,660 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap L3 drops functionality over mlxsw. Each registered L3 drop
+# packet trap is tested to make sure it is triggered under the right
+# conditions.
+
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    | 2001:db8:1::1/64           |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# |    |  default via 2001:db8:1::2 |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# |    + $rp1                                                                 |
+# |        192.0.2.2/24                                                       |
+# |        2001:db8:1::2/64                                                   |
+# |                                                                           |
+# |        2001:db8:2::2/64                                                   |
+# |        198.51.100.2/24                                                    |
+# |    + $rp2                                                                 |
+# |    |                                                                      |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------+
+# |    |  default via 198.51.100.2  |
+# |    |  default via 2001:db8:2::2 |
+# |    |                            |
+# |    | 2001:db8:2::1/64           |
+# |    | 198.51.100.1/24            |
+# |    + $h2                        |
+# | H2 (vrf)                        |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	non_ip_test
+	uc_dip_over_mc_dmac_test
+	dip_is_loopback_test
+	sip_is_mc_test
+	sip_is_loopback_test
+	ip_header_corrupted_test
+	ipv4_sip_is_limited_bc_test
+	ipv6_mc_dip_reserved_scope_test
+	ipv6_mc_dip_interface_local_scope_test
+	blackhole_route_test
+	irif_disabled_test
+	erif_disabled_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $h2_ipv4/24 $h2_ipv6/64
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+	ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 $h2_ipv4/24 $h2_ipv6/64
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	tc qdisc add dev $rp2 clsact
+
+	__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+	__addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+	__addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64
+	__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
+
+	tc qdisc del dev $rp2 clsact
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	rp1mac=$(mac_get $rp1)
+
+	h1_ipv4=192.0.2.1
+	h2_ipv4=198.51.100.1
+	h1_ipv6=2001:db8:1::1
+	h2_ipv6=2001:db8:2::1
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+ping_check()
+{
+	trap_name=$1; shift
+
+	devlink_trap_action_set $trap_name "trap"
+	ping_do $h1 $h2_ipv4
+	check_err $? "Packets that should not be trapped were trapped"
+	devlink_trap_action_set $trap_name "drop"
+}
+
+non_ip_test()
+{
+	local trap_name="non_ip"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+		flower dst_ip $h2_ipv4 action drop
+
+	# Generate non-IP packets to the router
+	$MZ $h1 -c 0 -p 100 -d 1msec -B $h2_ipv4 -q "$rp1mac $h1mac \
+		00:00 de:ad:be:ef" &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "Non IP"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
+}
+
+__uc_dip_over_mc_dmac_test()
+{
+	local desc=$1; shift
+	local proto=$1; shift
+	local dip=$1; shift
+	local flags=${1:-""}; shift
+	local trap_name="uc_dip_over_mc_dmac"
+	local dmac=01:02:03:04:05:06
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower ip_proto udp src_port 54321 dst_port 12345 action drop
+
+	# Generate IP packets with a unicast IP and a multicast destination MAC
+	$MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $dmac \
+		-B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "Unicast destination IP over multicast destination MAC: $desc"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
+}
+
+uc_dip_over_mc_dmac_test()
+{
+	__uc_dip_over_mc_dmac_test "IPv4" "ip" $h2_ipv4
+	__uc_dip_over_mc_dmac_test "IPv6" "ipv6" $h2_ipv6 "-6"
+}
+
+__sip_is_loopback_test()
+{
+	local desc=$1; shift
+	local proto=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local flags=${1:-""}; shift
+	local trap_name="sip_is_loopback_address"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower src_ip $sip action drop
+
+	# Generate packets with loopback source IP
+	$MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -A $sip \
+		-b $rp1mac -B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "Source IP is loopback address: $desc"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
+}
+
+sip_is_loopback_test()
+{
+	__sip_is_loopback_test "IPv4" "ip" "127.0.0.0/8" $h2_ipv4
+	__sip_is_loopback_test "IPv6" "ipv6" "::1" $h2_ipv6 "-6"
+}
+
+__dip_is_loopback_test()
+{
+	local desc=$1; shift
+	local proto=$1; shift
+	local dip=$1; shift
+	local flags=${1:-""}; shift
+	local trap_name="dip_is_loopback_address"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower dst_ip $dip action drop
+
+	# Generate packets with loopback destination IP
+	$MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \
+		-B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "Destination IP is loopback address: $desc"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
+}
+
+dip_is_loopback_test()
+{
+	__dip_is_loopback_test "IPv4" "ip" "127.0.0.0/8"
+	__dip_is_loopback_test "IPv6" "ipv6" "::1" "-6"
+}
+
+__sip_is_mc_test()
+{
+	local desc=$1; shift
+	local proto=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local flags=${1:-""}; shift
+	local trap_name="sip_is_mc"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower src_ip $sip action drop
+
+	# Generate packets with multicast source IP
+	$MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -A $sip \
+		-b $rp1mac -B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "Source IP is multicast: $desc"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
+}
+
+sip_is_mc_test()
+{
+	__sip_is_mc_test "IPv4" "ip" "239.1.1.1" $h2_ipv4
+	__sip_is_mc_test "IPv6" "ipv6" "FF02::2" $h2_ipv6 "-6"
+}
+
+ipv4_sip_is_limited_bc_test()
+{
+	local trap_name="ipv4_sip_is_limited_bc"
+	local sip=255.255.255.255
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+		flower src_ip $sip action drop
+
+	# Generate packets with limited broadcast source IP
+	$MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -p 100 -A $sip -b $rp1mac \
+		-B $h2_ipv4 -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "IPv4 source IP is limited broadcast"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
+}
+
+ipv4_payload_get()
+{
+	local ipver=$1; shift
+	local ihl=$1; shift
+	local checksum=$1; shift
+
+	p=$(:
+		)"08:00:"$(                   : ETH type
+		)"$ipver"$(                   : IP version
+		)"$ihl:"$(                    : IHL
+		)"00:"$(		      : IP TOS
+		)"00:F4:"$(                   : IP total length
+		)"00:00:"$(                   : IP identification
+		)"20:00:"$(                   : IP flags + frag off
+		)"30:"$(                      : IP TTL
+		)"01:"$(                      : IP proto
+		)"$checksum:"$(               : IP header csum
+		)"$h1_ipv4:"$(                : IP saddr
+	        )"$h2_ipv4:"$(                : IP daddr
+		)
+	echo $p
+}
+
+__ipv4_header_corrupted_test()
+{
+	local desc=$1; shift
+	local ipver=$1; shift
+	local ihl=$1; shift
+	local checksum=$1; shift
+	local trap_name="ip_header_corrupted"
+	local payload
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+		flower dst_ip $h2_ipv4 action drop
+
+	payload=$(ipv4_payload_get $ipver $ihl $checksum)
+
+	# Generate packets with corrupted IP header
+	$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "IP header corrupted: $desc: IPv4"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
+}
+
+ipv6_payload_get()
+{
+	local ipver=$1; shift
+
+	p=$(:
+		)"86:DD:"$(                  : ETH type
+		)"$ipver"$(                  : IP version
+		)"0:0:"$(                    : Traffic class
+		)"0:00:00:"$(		     : Flow label
+		)"00:00:"$(                  : Payload length
+		)"01:"$(                     : Next header
+		)"04:"$(                     : Hop limit
+		)"$h1_ipv6:"$(      	     : IP saddr
+		)"$h2_ipv6:"$(               : IP daddr
+		)
+	echo $p
+}
+
+__ipv6_header_corrupted_test()
+{
+	local desc=$1; shift
+	local ipver=$1; shift
+	local trap_name="ip_header_corrupted"
+	local payload
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+		flower dst_ip $h2_ipv4 action drop
+
+	payload=$(ipv6_payload_get $ipver)
+
+	# Generate packets with corrupted IP header
+	$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "IP header corrupted: $desc: IPv6"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
+}
+
+ip_header_corrupted_test()
+{
+	# Each test uses one wrong value. The three values below are correct.
+	local ipv="4"
+	local ihl="5"
+	local checksum="00:F4"
+
+	__ipv4_header_corrupted_test "wrong IP version" 5 $ihl $checksum
+	__ipv4_header_corrupted_test "wrong IHL" $ipv 4 $checksum
+	__ipv4_header_corrupted_test "wrong checksum" $ipv $ihl "00:00"
+	__ipv6_header_corrupted_test "wrong IP version" 5
+}
+
+ipv6_mc_dip_reserved_scope_test()
+{
+	local trap_name="ipv6_mc_dip_reserved_scope"
+	local dip=FF00::
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol ipv6 pref 1 handle 101 \
+		flower dst_ip $dip action drop
+
+	# Generate packets with reserved scope destination IP
+	$MZ $h1 -6 -t udp "sp=54321,dp=12345" -c 0 -p 100 -b \
+		"33:33:00:00:00:00" -B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "IPv6 multicast destination IP reserved scope"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101
+}
+
+ipv6_mc_dip_interface_local_scope_test()
+{
+	local trap_name="ipv6_mc_dip_interface_local_scope"
+	local dip=FF01::
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	tc filter add dev $rp2 egress protocol ipv6 pref 1 handle 101 \
+		flower dst_ip $dip action drop
+
+	# Generate packets with interface local scope destination IP
+	$MZ $h1 -6 -t udp "sp=54321,dp=12345" -c 0 -p 100 -b \
+		"33:33:00:00:00:00" -B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+
+	log_test "IPv6 multicast destination IP interface-local scope"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101
+}
+
+__blackhole_route_test()
+{
+	local flags=$1; shift
+	local subnet=$1; shift
+	local proto=$1; shift
+	local dip=$1; shift
+	local ip_proto=${1:-"icmp"}; shift
+	local trap_name="blackhole_route"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	ip -$flags route add blackhole $subnet
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower skip_hw dst_ip $dip ip_proto $ip_proto action drop
+
+	# Generate packets to the blackhole route
+	$MZ $h1 -$flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \
+		-B $dip -d 1msec -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $rp2 101
+	log_test "Blackhole route: IPv$flags"
+
+	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
+	ip -$flags route del blackhole $subnet
+}
+
+blackhole_route_test()
+{
+	__blackhole_route_test "4" "198.51.100.0/30" "ip" $h2_ipv4
+	__blackhole_route_test "6" "2001:db8:2::/120" "ipv6" $h2_ipv6 "icmpv6"
+}
+
+irif_disabled_test()
+{
+	local trap_name="irif_disabled"
+	local t0_packets t0_bytes
+	local t1_packets t1_bytes
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	devlink_trap_action_set $trap_name "trap"
+
+	# When RIF of a physical port ("Sub-port RIF") is destroyed, we first
+	# block the STP of the {Port, VLAN} so packets cannot get into the RIF.
+	# Using bridge enables us to see this trap because when bridge is
+	# destroyed, there is a small time window that packets can go into the
+	# RIF, while it is disabled.
+	ip link add dev br0 type bridge
+	ip link set dev $rp1 master br0
+	ip address flush dev $rp1
+	__addr_add_del br0 add 192.0.2.2/24
+	ip li set dev br0 up
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+	t0_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+	# Generate packets to h2 through br0 RIF that will be removed later
+	$MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -p 100 -a own -b $rp1mac \
+		-B $h2_ipv4 -q &
+	mz_pid=$!
+
+	# Wait before removing br0 RIF to allow packets to go into the bridge.
+	sleep 1
+
+	# Flushing address will dismantle the RIF
+	ip address flush dev br0
+
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+	t1_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+	if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then
+		check_err 1 "Trap stats idle when packets should be trapped"
+	fi
+
+	log_test "Ingress RIF disabled"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	ip link set dev $rp1 nomaster
+	__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+	ip link del dev br0 type bridge
+	devlink_trap_action_set $trap_name "drop"
+}
+
+erif_disabled_test()
+{
+	local trap_name="erif_disabled"
+	local t0_packets t0_bytes
+	local t1_packets t1_bytes
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+
+	devlink_trap_action_set $trap_name "trap"
+	ip link add dev br0 type bridge
+	ip add flush dev $rp1
+	ip link set dev $rp1 master br0
+	__addr_add_del br0 add 192.0.2.2/24
+	ip link set dev br0 up
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+	t0_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+	rp2mac=$(mac_get $rp2)
+
+	# Generate packets that should go out through br0 RIF that will be
+	# removed later
+	$MZ $h2 -t udp "sp=54321,dp=12345" -c 0 -p 100 -a own -b $rp2mac \
+		-B 192.0.2.1 -q &
+	mz_pid=$!
+
+	sleep 5
+	# Unlinking the port from the bridge will disable the RIF associated
+	# with br0 as it is no longer an upper of any mlxsw port.
+	ip link set dev $rp1 nomaster
+
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+	t1_bytes=$(devlink_trap_rx_bytes_get $trap_name)
+
+	if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then
+		check_err 1 "Trap stats idle when packets should be trapped"
+	fi
+
+	log_test "Egress RIF disabled"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+	ip link del dev br0 type bridge
+	devlink_trap_action_set $trap_name "drop"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
new file mode 100755
index 000000000..1d157b1bd
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -0,0 +1,552 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap L3 exceptions functionality over mlxsw.
+# Check all exception traps to make sure they are triggered under the right
+# conditions.
+
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    | 2001:db8:1::1/64           |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# |    |  default via 2001:db8:1::2 |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# |    + $rp1                                                                 |
+# |        192.0.2.2/24                                                       |
+# |        2001:db8:1::2/64                                                   |
+# |                                                                           |
+# |        2001:db8:2::2/64                                                   |
+# |        198.51.100.2/24                                                    |
+# |    + $rp2                                                                 |
+# |    |                                                                      |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------+
+# |    |  default via 198.51.100.2  |
+# |    |  default via 2001:db8:2::2 |
+# |    |                            |
+# |    | 2001:db8:2::1/64           |
+# |    | 198.51.100.1/24            |
+# |    + $h2                        |
+# | H2 (vrf)                        |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	mtu_value_is_too_small_test
+	ttl_value_is_too_small_test
+	mc_reverse_path_forwarding_test
+	reject_route_test
+	unresolved_neigh_test
+	ipv4_lpm_miss_test
+	ipv6_lpm_miss_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+require_command $MCD
+require_command $MC_CLI
+table_name=selftests
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+
+	tc qdisc add dev $h1 clsact
+}
+
+h1_destroy()
+{
+	tc qdisc del dev $h1 clsact
+
+	ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+	ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	tc qdisc add dev $rp2 clsact
+
+	__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+	__addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+	__addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64
+	__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
+
+	tc qdisc del dev $rp2 clsact
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1mac=$(mac_get $rp1)
+
+	start_mcd
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+
+	kill_mcd
+}
+
+ping_check()
+{
+	ping_do $h1 198.51.100.1
+	check_err $? "Packets that should not be trapped were trapped"
+}
+
+trap_action_check()
+{
+	local trap_name=$1; shift
+	local expected_action=$1; shift
+
+	action=$(devlink_trap_action_get $trap_name)
+	if [ "$action" != $expected_action ]; then
+		check_err 1 "Trap $trap_name has wrong action: $action"
+	fi
+}
+
+mtu_value_is_too_small_test()
+{
+	local trap_name="mtu_value_is_too_small"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	# type - Destination Unreachable
+	# code - Fragmentation Needed and Don't Fragment was Set
+	tc filter add dev $h1 ingress protocol ip pref 1 handle 101 \
+		flower skip_hw ip_proto icmp type 3 code 4 action pass
+
+	mtu_set $rp2 1300
+
+	# Generate IP packets bigger than router's MTU with don't fragment
+	# flag on.
+	$MZ $h1 -t udp "sp=54321,dp=12345,df" -p 1400 -c 0 -d 1msec -b $rp1mac \
+		-B 198.51.100.1 -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets_hitting "dev $h1 ingress" 101
+	check_err $? "Packets were not received to h1"
+
+	log_test "MTU value is too small"
+
+	mtu_restore $rp2
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower
+}
+
+__ttl_value_is_too_small_test()
+{
+	local ttl_val=$1; shift
+	local trap_name="ttl_value_is_too_small"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	# type - Time Exceeded
+	# code - Time to Live exceeded in Transit
+	tc filter add dev $h1 ingress protocol ip pref 1 handle 101 \
+		 flower skip_hw ip_proto icmp type 11 code 0 action pass
+
+	# Generate IP packets with small TTL
+	$MZ $h1 -t udp "ttl=$ttl_val,sp=54321,dp=12345" -c 0 -d 1msec \
+		-b $rp1mac -B 198.51.100.1 -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets_hitting "dev $h1 ingress" 101
+	check_err $? "Packets were not received to h1"
+
+	log_test "TTL value is too small: TTL=$ttl_val"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower
+}
+
+ttl_value_is_too_small_test()
+{
+	__ttl_value_is_too_small_test 0
+	__ttl_value_is_too_small_test 1
+}
+
+start_mcd()
+{
+	SMCROUTEDIR="$(mktemp -d)"
+	for ((i = 1; i <= $NUM_NETIFS; ++i)); do
+		 echo "phyint ${NETIFS[p$i]} enable" >> \
+			 $SMCROUTEDIR/$table_name.conf
+	done
+
+	$MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \
+		-P $SMCROUTEDIR/$table_name.pid
+}
+
+kill_mcd()
+{
+	pkill $MCD
+	rm -rf $SMCROUTEDIR
+}
+
+__mc_reverse_path_forwarding_test()
+{
+	local desc=$1; shift
+	local src_ip=$1; shift
+	local dst_ip=$1; shift
+	local dst_mac=$1; shift
+	local proto=$1; shift
+	local flags=${1:-""}; shift
+	local trap_name="mc_reverse_path_forwarding"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+		flower dst_ip $dst_ip ip_proto udp action drop
+
+	$MC_CLI -I $table_name add $rp1 $src_ip $dst_ip $rp2
+
+	# Generate packets to multicast address.
+	$MZ $h2 $flags -t udp "sp=54321,dp=12345" -c 0 -p 128 \
+		-a 00:11:22:33:44:55 -b $dst_mac \
+		-A $src_ip -B $dst_ip -q &
+
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $rp2 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "Multicast reverse path forwarding: $desc"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	tc filter del dev $rp2 egress protocol $proto pref 1 handle 101 flower
+}
+
+mc_reverse_path_forwarding_test()
+{
+	__mc_reverse_path_forwarding_test "IPv4" "192.0.2.1" "225.1.2.3" \
+		"01:00:5e:01:02:03" "ip"
+	__mc_reverse_path_forwarding_test "IPv6" "2001:db8:1::1" "ff0e::3" \
+		"33:33:00:00:00:03" "ipv6" "-6"
+}
+
+__reject_route_test()
+{
+	local desc=$1; shift
+	local dst_ip=$1; shift
+	local proto=$1; shift
+	local ip_proto=$1; shift
+	local type=$1; shift
+	local code=$1; shift
+	local unreachable=$1; shift
+	local flags=${1:-""}; shift
+	local trap_name="reject_route"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	tc filter add dev $h1 ingress protocol $proto pref 1 handle 101 flower \
+		skip_hw ip_proto $ip_proto type $type code $code action pass
+
+	ip route add unreachable $unreachable
+
+	# Generate pacekts to h2. The destination IP is unreachable.
+	$MZ $flags $h1 -t udp "sp=54321,dp=12345" -c 0 -d 1msec -b $rp1mac \
+		-B $dst_ip -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets_hitting "dev $h1 ingress" 101
+	check_err $? "ICMP packet was not received to h1"
+
+	log_test "Reject route: $desc"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	ip route del unreachable $unreachable
+	tc filter del dev $h1 ingress protocol $proto pref 1 handle 101 flower
+}
+
+reject_route_test()
+{
+	# type - Destination Unreachable
+	# code - Host Unreachable
+	__reject_route_test "IPv4" 198.51.100.1 "ip" "icmp" 3 1 \
+		"198.51.100.0/26"
+	# type - Destination Unreachable
+	# code - No Route
+	__reject_route_test "IPv6" 2001:db8:2::1 "ipv6" "icmpv6" 1 0 \
+		"2001:db8:2::0/66" "-6"
+}
+
+__host_miss_test()
+{
+	local desc=$1; shift
+	local dip=$1; shift
+	local trap_name="unresolved_neigh"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	ip neigh flush dev $rp2
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+	# Generate packets to h2 (will incur a unresolved neighbor).
+	# The ping should pass and devlink counters should be increased.
+	ping_do $h1 $dip
+	check_err $? "ping failed: $desc"
+
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+	if [[ $t0_packets -eq $t1_packets ]]; then
+		check_err 1 "Trap counter did not increase"
+	fi
+
+	log_test "Unresolved neigh: host miss: $desc"
+}
+
+__invalid_nexthop_test()
+{
+	local desc=$1; shift
+	local dip=$1; shift
+	local extra_add=$1; shift
+	local subnet=$1; shift
+	local via_add=$1; shift
+	local trap_name="unresolved_neigh"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	ip address add $extra_add/$subnet dev $h2
+
+	# Check that correct route does not trigger unresolved_neigh
+	ip $flags route add $dip via $extra_add dev $rp2
+
+	# Generate packets in order to discover all neighbours.
+	# Without it, counters of unresolved_neigh will be increased
+	# during neighbours discovery and the check below will fail
+	# for a wrong reason
+	ping_do $h1 $dip
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+	ping_do $h1 $dip
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+	if [[ $t0_packets -ne $t1_packets ]]; then
+		check_err 1 "Trap counter increased when it should not"
+	fi
+
+	ip $flags route del $dip via $extra_add dev $rp2
+
+	# Check that route to nexthop that does not exist trigger
+	# unresolved_neigh
+	ip $flags route add $dip via $via_add dev $h2
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+	ping_do $h1 $dip
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+	if [[ $t0_packets -eq $t1_packets ]]; then
+		check_err 1 "Trap counter did not increase"
+	fi
+
+	ip $flags route del $dip via $via_add dev $h2
+	ip address del $extra_add/$subnet dev $h2
+	log_test "Unresolved neigh: nexthop does not exist: $desc"
+}
+
+unresolved_neigh_test()
+{
+	__host_miss_test "IPv4" 198.51.100.1
+	__host_miss_test "IPv6" 2001:db8:2::1
+	__invalid_nexthop_test "IPv4" 198.51.100.1 198.51.100.3 24 198.51.100.4
+	__invalid_nexthop_test "IPv6" 2001:db8:2::1 2001:db8:2::3 64 \
+		2001:db8:2::4
+}
+
+vrf_without_routes_create()
+{
+	# VRF creating makes the links to be down and then up again.
+	# By default, IPv6 address is not saved after link becomes down.
+	# Save IPv6 address using sysctl configuration.
+	sysctl_set net.ipv6.conf.$rp1.keep_addr_on_down 1
+	sysctl_set net.ipv6.conf.$rp2.keep_addr_on_down 1
+
+	ip link add dev vrf1 type vrf table 101
+	ip link set dev $rp1 master vrf1
+	ip link set dev $rp2 master vrf1
+	ip link set dev vrf1 up
+
+	# Wait for rp1 and rp2 to be up
+	setup_wait
+}
+
+vrf_without_routes_destroy()
+{
+	ip link set dev $rp1 nomaster
+	ip link set dev $rp2 nomaster
+	ip link del dev vrf1
+
+	sysctl_restore net.ipv6.conf.$rp2.keep_addr_on_down
+	sysctl_restore net.ipv6.conf.$rp1.keep_addr_on_down
+
+	# Wait for interfaces to be up
+	setup_wait
+}
+
+ipv4_lpm_miss_test()
+{
+	local trap_name="ipv4_lpm_miss"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	# Create a VRF without a default route
+	vrf_without_routes_create
+
+	# Generate packets through a VRF without a matching route.
+	$MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -d 1msec -b $rp1mac \
+		-B 203.0.113.1 -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	log_test "LPM miss: IPv4"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	vrf_without_routes_destroy
+}
+
+ipv6_lpm_miss_test()
+{
+	local trap_name="ipv6_lpm_miss"
+	local expected_action="trap"
+	local mz_pid
+
+	RET=0
+
+	ping_check $trap_name
+	trap_action_check $trap_name $expected_action
+
+	# Create a VRF without a default route
+	vrf_without_routes_create
+
+	# Generate packets through a VRF without a matching route.
+	$MZ -6 $h1 -t udp "sp=54321,dp=12345" -c 0 -d 1msec -b $rp1mac \
+		-B 2001:db8::1 -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	log_test "LPM miss: IPv6"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	vrf_without_routes_destroy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
new file mode 100755
index 000000000..508a702f0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
@@ -0,0 +1,361 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap policer functionality over mlxsw.
+
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# |    + $rp1                                                                 |
+# |        192.0.2.2/24                                                       |
+# |                                                                           |
+# |        198.51.100.2/24                                                    |
+# |    + $rp2                                                                 |
+# |    |                                                                      |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------+
+# |    |  default via 198.51.100.2  |
+# |    |                            |
+# |    | 198.51.100.1/24            |
+# |    + $h2                        |
+# | H2 (vrf)                        |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	rate_limits_test
+	burst_limits_test
+	rate_test
+	burst_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+	mtu_set $h1 10000
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	mtu_restore $h1
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/24
+	mtu_set $h2 10000
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+}
+
+h2_destroy()
+{
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	mtu_restore $h2
+	simple_if_fini $h2 198.51.100.1/24
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	__addr_add_del $rp1 add 192.0.2.2/24
+	__addr_add_del $rp2 add 198.51.100.2/24
+	mtu_set $rp1 10000
+	mtu_set $rp2 10000
+
+	ip -4 route add blackhole 198.51.100.100
+
+	devlink trap set $DEVLINK_DEV trap blackhole_route action trap
+}
+
+router_destroy()
+{
+	devlink trap set $DEVLINK_DEV trap blackhole_route action drop
+
+	ip -4 route del blackhole 198.51.100.100
+
+	mtu_restore $rp2
+	mtu_restore $rp1
+	__addr_add_del $rp2 del 198.51.100.2/24
+	__addr_add_del $rp1 del 192.0.2.2/24
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp1_mac=$(mac_get $rp1)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+
+	# Reload to ensure devlink-trap settings are back to default.
+	devlink_reload
+}
+
+rate_limits_test()
+{
+	RET=0
+
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 0 &> /dev/null
+	check_fail $? "Policer rate was changed to rate lower than limit"
+	devlink trap policer set $DEVLINK_DEV policer 1 \
+		rate 2000000001 &> /dev/null
+	check_fail $? "Policer rate was changed to rate higher than limit"
+
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 1
+	check_err $? "Failed to set policer rate to minimum"
+	devlink trap policer set $DEVLINK_DEV policer 1 rate 2000000000
+	check_err $? "Failed to set policer rate to maximum"
+
+	log_test "Trap policer rate limits"
+}
+
+burst_limits_test()
+{
+	RET=0
+
+	devlink trap policer set $DEVLINK_DEV policer 1 burst 0 &> /dev/null
+	check_fail $? "Policer burst size was changed to 0"
+	devlink trap policer set $DEVLINK_DEV policer 1 burst 17 &> /dev/null
+	check_fail $? "Policer burst size was changed to burst size that is not power of 2"
+	devlink trap policer set $DEVLINK_DEV policer 1 burst 8 &> /dev/null
+	check_fail $? "Policer burst size was changed to burst size lower than limit"
+	devlink trap policer set $DEVLINK_DEV policer 1 \
+		burst $((2**25)) &> /dev/null
+	check_fail $? "Policer burst size was changed to burst size higher than limit"
+
+	devlink trap policer set $DEVLINK_DEV policer 1 burst 16
+	check_err $? "Failed to set policer burst size to minimum"
+	devlink trap policer set $DEVLINK_DEV policer 1 burst $((2**24))
+	check_err $? "Failed to set policer burst size to maximum"
+
+	log_test "Trap policer burst size limits"
+}
+
+trap_rate_get()
+{
+	local t0 t1
+
+	t0=$(devlink_trap_rx_packets_get blackhole_route)
+	sleep 10
+	t1=$(devlink_trap_rx_packets_get blackhole_route)
+
+	echo $(((t1 - t0) / 10))
+}
+
+policer_drop_rate_get()
+{
+	local id=$1; shift
+	local t0 t1
+
+	t0=$(devlink_trap_policer_rx_dropped_get $id)
+	sleep 10
+	t1=$(devlink_trap_policer_rx_dropped_get $id)
+
+	echo $(((t1 - t0) / 10))
+}
+
+__rate_test()
+{
+	local rate pct drop_rate
+	local id=$1; shift
+
+	RET=0
+
+	devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512
+	devlink trap group set $DEVLINK_DEV group l3_drops policer $id
+
+	# Send packets at highest possible rate and make sure they are dropped
+	# by the policer. Make sure measured received rate is about 1000 pps
+	log_info "=== Tx rate: Highest, Policer rate: 1000 pps ==="
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac
+
+	sleep 5 # Take measurements when rate is stable
+
+	rate=$(trap_rate_get)
+	pct=$((100 * (rate - 1000) / 1000))
+	((-10 <= pct && pct <= 10))
+	check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-10%"
+	log_info "Expected rate 1000 pps, measured rate $rate pps"
+
+	drop_rate=$(policer_drop_rate_get $id)
+	(( drop_rate > 0 ))
+	check_err $? "Expected non-zero policer drop rate, got 0"
+	log_info "Measured policer drop rate of $drop_rate pps"
+
+	stop_traffic
+
+	# Send packets at a rate of 1000 pps and make sure they are not dropped
+	# by the policer
+	log_info "=== Tx rate: 1000 pps, Policer rate: 1000 pps ==="
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -d 1msec
+
+	sleep 5 # Take measurements when rate is stable
+
+	drop_rate=$(policer_drop_rate_get $id)
+	(( drop_rate == 0 ))
+	check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps"
+	log_info "Measured policer drop rate of $drop_rate pps"
+
+	stop_traffic
+
+	# Unbind the policer and send packets at highest possible rate. Make
+	# sure they are not dropped by the policer and that the measured
+	# received rate is higher than 1000 pps
+	log_info "=== Tx rate: Highest, Policer rate: No policer ==="
+
+	devlink trap group set $DEVLINK_DEV group l3_drops nopolicer
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac
+
+	rate=$(trap_rate_get)
+	(( rate > 1000 ))
+	check_err $? "Expected rate higher than 1000 pps, got $rate pps"
+	log_info "Measured rate $rate pps"
+
+	drop_rate=$(policer_drop_rate_get $id)
+	(( drop_rate == 0 ))
+	check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps"
+	log_info "Measured policer drop rate of $drop_rate pps"
+
+	stop_traffic
+
+	log_test "Trap policer rate"
+}
+
+rate_test()
+{
+	local id
+
+	for id in $(devlink_trap_policer_ids_get); do
+		echo
+		log_info "Running rate test for policer $id"
+		__rate_test $id
+	done
+}
+
+__burst_test()
+{
+	local t0_rx t0_drop t1_rx t1_drop rx drop
+	local id=$1; shift
+
+	RET=0
+
+	devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512
+	devlink trap group set $DEVLINK_DEV group l3_drops policer $id
+
+	# Send a burst of 16 packets and make sure that 16 are received
+	# and that none are dropped by the policer
+	log_info "=== Tx burst size: 16, Policer burst size: 512 ==="
+
+	t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
+	t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 16
+
+	t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
+	t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+	rx=$((t1_rx - t0_rx))
+	(( rx == 16 ))
+	check_err $? "Expected burst size of 16 packets, got $rx packets"
+	log_info "Expected burst size of 16 packets, measured burst size of $rx packets"
+
+	drop=$((t1_drop - t0_drop))
+	(( drop == 0 ))
+	check_err $? "Expected zero policer drops, got $drop"
+	log_info "Measured policer drops of $drop packets"
+
+	# Unbind the policer and send a burst of 64 packets. Make sure that
+	# 64 packets are received and that none are dropped by the policer
+	log_info "=== Tx burst size: 64, Policer burst size: No policer ==="
+
+	devlink trap group set $DEVLINK_DEV group l3_drops nopolicer
+
+	t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
+	t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64
+
+	t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
+	t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+	rx=$((t1_rx - t0_rx))
+	(( rx == 64 ))
+	check_err $? "Expected burst size of 64 packets, got $rx packets"
+	log_info "Expected burst size of 64 packets, measured burst size of $rx packets"
+
+	drop=$((t1_drop - t0_drop))
+	(( drop == 0 ))
+	check_err $? "Expected zero policer drops, got $drop"
+	log_info "Measured policer drops of $drop packets"
+
+	log_test "Trap policer burst size"
+}
+
+burst_test()
+{
+	local id
+
+	for id in $(devlink_trap_policer_ids_get); do
+		echo
+		log_info "Running burst size test for policer $id"
+		__burst_test $id
+	done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
new file mode 100755
index 000000000..8817851da
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
@@ -0,0 +1,263 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap tunnel exceptions functionality over mlxsw.
+# Check all exception traps to make sure they are triggered under the right
+# conditions.
+
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |      192.0.2.1/28 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|-----+
+# | SW1               |     |
+# |              $swp1 +    |
+# |      192.0.2.2/28       |
+# |                         |
+# |  + g1a (gre)            |
+# |    loc=192.0.2.65       |
+# |    rem=192.0.2.66       |
+# |    tos=inherit          |
+# |                         |
+# |  + $rp1                 |
+# |  |  198.51.100.1/28     |
+# +--|----------------------+
+#    |
+# +--|----------------------+
+# |  |                 VRF2 |
+# | + $rp2                  |
+# |   198.51.100.2/28       |
+# +-------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	decap_error_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+vrf2_create()
+{
+	simple_if_init $rp2 198.51.100.2/28
+}
+
+vrf2_destroy()
+{
+	simple_if_fini $rp2 198.51.100.2/28
+}
+
+switch_create()
+{
+	__addr_add_del $swp1 add 192.0.2.2/28
+	tc qdisc add dev $swp1 clsact
+	ip link set dev $swp1 up
+
+	tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit
+	__addr_add_del g1 add 192.0.2.65/32
+	ip link set dev g1 up
+
+	__addr_add_del $rp1 add 198.51.100.1/28
+	ip link set dev $rp1 up
+}
+
+switch_destroy()
+{
+	ip link set dev $rp1 down
+	__addr_add_del $rp1 del 198.51.100.1/28
+
+	ip link set dev g1 down
+	__addr_add_del g1 del 192.0.2.65/32
+	tunnel_destroy g1
+
+	ip link set dev $swp1 down
+	tc qdisc del dev $swp1 clsact
+	__addr_add_del $swp1 del 192.0.2.2/28
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	rp1=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	switch_create
+	vrf2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vrf2_destroy
+	switch_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+ecn_payload_get()
+{
+	p=$(:
+		)"0"$(		              : GRE flags
+	        )"0:00:"$(                    : Reserved + version
+		)"08:00:"$(		      : ETH protocol type
+		)"4"$(	                      : IP version
+		)"5:"$(                       : IHL
+		)"00:"$(                      : IP TOS
+		)"00:14:"$(                   : IP total length
+		)"00:00:"$(                   : IP identification
+		)"20:00:"$(                   : IP flags + frag off
+		)"30:"$(                      : IP TTL
+		)"01:"$(                      : IP proto
+		)"E7:E6:"$(    	              : IP header csum
+		)"C0:00:01:01:"$(             : IP saddr : 192.0.1.1
+		)"C0:00:02:01:"$(             : IP daddr : 192.0.2.1
+		)
+	echo $p
+}
+
+ecn_decap_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local ecn_desc=$1; shift
+	local outer_tos=$1; shift
+	local mz_pid
+
+	RET=0
+
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower src_ip 192.0.1.1 dst_ip 192.0.2.1 action pass
+
+	rp1_mac=$(mac_get $rp1)
+	rp2_mac=$(mac_get $rp2)
+	payload=$(ecn_payload_get)
+
+	ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+		-A 192.0.2.66 -B 192.0.2.65 -t ip \
+			len=48,tos=$outer_tos,proto=47,p=$payload -q &
+
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
+}
+
+ipip_payload_get()
+{
+	local flags=$1; shift
+	local key=$1; shift
+
+	p=$(:
+		)"$flags"$(		      : GRE flags
+	        )"0:00:"$(                    : Reserved + version
+		)"08:00:"$(		      : ETH protocol type
+		)"$key"$( 		      : Key
+		)"4"$(	                      : IP version
+		)"5:"$(                       : IHL
+		)"00:"$(                      : IP TOS
+		)"00:14:"$(                   : IP total length
+		)"00:00:"$(                   : IP identification
+		)"20:00:"$(                   : IP flags + frag off
+		)"30:"$(                      : IP TTL
+		)"01:"$(                      : IP proto
+		)"E7:E6:"$(    	              : IP header csum
+		)"C0:00:01:01:"$(             : IP saddr : 192.0.1.1
+		)"C0:00:02:01:"$(             : IP daddr : 192.0.2.1
+		)
+	echo $p
+}
+
+no_matching_tunnel_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local sip=$1; shift
+	local mz_pid
+
+	RET=0
+
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower src_ip 192.0.1.1 dst_ip 192.0.2.1 action pass
+
+	rp1_mac=$(mac_get $rp1)
+	rp2_mac=$(mac_get $rp2)
+	payload=$(ipip_payload_get "$@")
+
+	ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+		-A $sip -B 192.0.2.65 -t ip len=48,proto=47,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
+}
+
+decap_error_test()
+{
+	# Correct source IP - the remote address
+	local sip=192.0.2.66
+
+	ecn_decap_test "Decap error" "ECT(1)" 01
+	ecn_decap_test "Decap error" "ECT(0)" 02
+	ecn_decap_test "Decap error" "CE" 03
+
+	no_matching_tunnel_test "Decap error: Source IP check failed" \
+		192.0.2.68 "0"
+	no_matching_tunnel_test \
+		"Decap error: Key exists but was not expected" $sip "2" ":E9:"
+
+	# Destroy the tunnel and create new one with key
+	__addr_add_del g1 del 192.0.2.65/32
+	tunnel_destroy g1
+
+	tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit key 233
+	__addr_add_del g1 add 192.0.2.65/32
+
+	no_matching_tunnel_test \
+		"Decap error: Key does not exist but was expected" $sip "0"
+	no_matching_tunnel_test \
+		"Decap error: Packet has a wrong key field" $sip "2" "E8:"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
new file mode 100755
index 000000000..10e0f3dbc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
@@ -0,0 +1,327 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap tunnel drops and exceptions functionality over mlxsw.
+# Check all traps to make sure they are triggered under the right
+# conditions.
+
+# +--------------------+
+# | H1 (vrf)           |
+# |    + $h1           |
+# |    | 192.0.2.1/28  |
+# +----|---------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# | +--|--------------------------------------------------------------------+ |
+# | |  + $swp1                   BR1 (802.1d)                               | |
+# | |                                                                       | |
+# | |  + vx1 (vxlan)                                                        | |
+# | |    local 192.0.2.17                                                   | |
+# | |    id 1000 dstport $VXPORT                                            | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |    + $rp1                                                                 |
+# |    | 192.0.2.17/28                                                        |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                             VRF2       |
+# |    + $rp2                                                   |
+# |      192.0.2.18/28                                          |
+# |                                                             |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	decap_error_test
+	overlay_smac_is_mc_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+: ${VXPORT:=4789}
+export VXPORT
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	tc qdisc add dev $swp1 clsact
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	ip link add name vx1 type vxlan id 1000 local 192.0.2.17 \
+		dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx1 master br1
+	ip link set dev vx1 up
+
+	ip address add dev $rp1 192.0.2.17/28
+	ip link set dev $rp1 up
+}
+
+switch_destroy()
+{
+	ip link set dev $rp1 down
+	ip address del dev $rp1 192.0.2.17/28
+
+	ip link set dev vx1 down
+	ip link set dev vx1 nomaster
+	ip link del dev vx1
+
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrf2_create()
+{
+	simple_if_init $rp2 192.0.2.18/28
+}
+
+vrf2_destroy()
+{
+	simple_if_fini $rp2 192.0.2.18/28
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	rp1=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+	h1_create
+	switch_create
+	vrf2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vrf2_destroy
+	switch_destroy
+	h1_destroy
+	forwarding_restore
+	vrf_cleanup
+}
+
+ecn_payload_get()
+{
+	dest_mac=$(mac_get $h1)
+	p=$(:
+		)"08:"$(                      : VXLAN flags
+		)"00:00:00:"$(                : VXLAN reserved
+		)"00:03:e8:"$(                : VXLAN VNI : 1000
+		)"00:"$(                      : VXLAN reserved
+		)"$dest_mac:"$(               : ETH daddr
+		)"00:00:00:00:00:00:"$(       : ETH saddr
+		)"08:00:"$(                   : ETH type
+		)"45:"$(                      : IP version + IHL
+		)"00:"$(                      : IP TOS
+		)"00:14:"$(                   : IP total length
+		)"00:00:"$(                   : IP identification
+		)"20:00:"$(                   : IP flags + frag off
+		)"40:"$(                      : IP TTL
+		)"00:"$(                      : IP proto
+		)"D6:E5:"$(                   : IP header csum
+		)"c0:00:02:03:"$(             : IP saddr: 192.0.2.3
+		)"c0:00:02:01:"$(             : IP daddr: 192.0.2.1
+		)
+	echo $p
+}
+
+ecn_decap_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local ecn_desc=$1; shift
+	local outer_tos=$1; shift
+	local mz_pid
+
+	RET=0
+
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower src_ip 192.0.2.3 dst_ip 192.0.2.1 action pass
+
+	rp1_mac=$(mac_get $rp1)
+	payload=$(ecn_payload_get)
+
+	ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac -B 192.0.2.17 \
+		-t udp sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
+}
+
+reserved_bits_payload_get()
+{
+	dest_mac=$(mac_get $h1)
+	p=$(:
+		)"08:"$(                      : VXLAN flags
+		)"01:00:00:"$(                : VXLAN reserved
+		)"00:03:e8:"$(                : VXLAN VNI : 1000
+		)"00:"$(                      : VXLAN reserved
+		)"$dest_mac:"$(               : ETH daddr
+		)"00:00:00:00:00:00:"$(       : ETH saddr
+		)"08:00:"$(                   : ETH type
+		)"45:"$(                      : IP version + IHL
+		)"00:"$(                      : IP TOS
+		)"00:14:"$(                   : IP total length
+		)"00:00:"$(                   : IP identification
+		)"20:00:"$(                   : IP flags + frag off
+		)"40:"$(                      : IP TTL
+		)"00:"$(                      : IP proto
+		)"00:00:"$(                   : IP header csum
+		)"c0:00:02:03:"$(             : IP saddr: 192.0.2.3
+		)"c0:00:02:01:"$(             : IP daddr: 192.0.2.1
+		)
+	echo $p
+}
+
+short_payload_get()
+{
+        dest_mac=$(mac_get $h1)
+        p=$(:
+		)"08:"$(                      : VXLAN flags
+		)"01:00:00:"$(                : VXLAN reserved
+		)"00:03:e8:"$(                : VXLAN VNI : 1000
+		)"00:"$(                      : VXLAN reserved
+		)
+        echo $p
+}
+
+corrupted_packet_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local payload_get=$1; shift
+	local mz_pid
+
+	RET=0
+
+	# In case of too short packet, there is no any inner packet,
+	# so the matching will always succeed
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower skip_hw src_ip 192.0.2.3 dst_ip 192.0.2.1 action pass
+
+	rp1_mac=$(mac_get $rp1)
+	payload=$($payload_get)
+	ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac \
+		-B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
+}
+
+decap_error_test()
+{
+	ecn_decap_test "Decap error" "ECT(1)" 01
+	ecn_decap_test "Decap error" "ECT(0)" 02
+	ecn_decap_test "Decap error" "CE" 03
+
+	corrupted_packet_test "Decap error: Reserved bits in use" \
+		"reserved_bits_payload_get"
+	corrupted_packet_test "Decap error: No L2 header" "short_payload_get"
+}
+
+mc_smac_payload_get()
+{
+	dest_mac=$(mac_get $h1)
+	source_mac=01:02:03:04:05:06
+	p=$(:
+		)"08:"$(                      : VXLAN flags
+		)"00:00:00:"$(                : VXLAN reserved
+		)"00:03:e8:"$(                : VXLAN VNI : 1000
+		)"00:"$(                      : VXLAN reserved
+		)"$dest_mac:"$(               : ETH daddr
+		)"$source_mac:"$(             : ETH saddr
+		)"08:00:"$(                   : ETH type
+		)"45:"$(                      : IP version + IHL
+		)"00:"$(                      : IP TOS
+		)"00:14:"$(                   : IP total length
+		)"00:00:"$(                   : IP identification
+		)"20:00:"$(                   : IP flags + frag off
+		)"40:"$(                      : IP TTL
+		)"00:"$(                      : IP proto
+		)"00:00:"$(                   : IP header csum
+		)"c0:00:02:03:"$(             : IP saddr: 192.0.2.3
+		)"c0:00:02:01:"$(             : IP daddr: 192.0.2.1
+		)
+	echo $p
+}
+
+overlay_smac_is_mc_test()
+{
+	local trap_name="overlay_smac_is_mc"
+	local mz_pid
+
+	RET=0
+
+	# The matching will be checked on devlink_trap_drop_test()
+	# and the filter will be removed on devlink_trap_drop_cleanup()
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower src_mac 01:02:03:04:05:06 action pass
+
+	rp1_mac=$(mac_get $rp1)
+	payload=$(mc_smac_payload_get)
+
+	ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac \
+		-B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_drop_test $trap_name $swp1 101
+
+	log_test "Overlay source MAC is multicast"
+
+	devlink_trap_drop_cleanup $mz_pid $swp1 "ip" 1 101
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
new file mode 100755
index 000000000..7a0a99c1d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
@@ -0,0 +1,170 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test operations that we expect to report extended ack.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	netdev_pre_up_test
+	vxlan_vlan_add_test
+	vxlan_bridge_create_test
+	bridge_create_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+netdev_pre_up_test()
+{
+	RET=0
+
+	ip link add name br1 up type bridge vlan_filtering 0 mcast_snooping 0
+	ip link add name vx1 up type vxlan id 1000 \
+		local 192.0.2.17 remote 192.0.2.18 \
+		dstport 4789 nolearning noudpcsum tos inherit ttl 100
+
+	ip link set dev vx1 master br1
+	check_err $?
+
+	ip link set dev $swp1 master br1
+	check_err $?
+
+	ip link add name br2 up type bridge vlan_filtering 0 mcast_snooping 0
+	ip link add name vx2 up type vxlan id 2000 \
+		local 192.0.2.17 remote 192.0.2.18 \
+		dstport 4789 nolearning noudpcsum tos inherit ttl 100
+
+	ip link set dev vx2 master br2
+	check_err $?
+
+	ip link set dev $swp2 master br2
+	check_err $?
+
+	# Unsupported configuration: mlxsw demands that all offloaded VXLAN
+	# devices have the same TTL.
+	ip link set dev vx2 down
+	ip link set dev vx2 type vxlan ttl 200
+
+	ip link set dev vx2 up &>/dev/null
+	check_fail $?
+
+	ip link set dev vx2 up 2>&1 >/dev/null | grep -q mlxsw_spectrum
+	check_err $?
+
+	log_test "extack - NETDEV_PRE_UP"
+
+	ip link del dev vx2
+	ip link del dev br2
+
+	ip link del dev vx1
+	ip link del dev br1
+}
+
+vxlan_vlan_add_test()
+{
+	RET=0
+
+	ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0
+
+	# Unsupported configuration: mlxsw demands VXLAN with "noudpcsum".
+	ip link add name vx1 up type vxlan id 1000 \
+		local 192.0.2.17 remote 192.0.2.18 \
+		dstport 4789 tos inherit ttl 100
+
+	ip link set dev vx1 master br1
+	check_err $?
+
+	bridge vlan add dev vx1 vid 1
+	check_err $?
+
+	ip link set dev $swp1 master br1
+	check_err $?
+
+	bridge vlan add dev vx1 vid 1 pvid untagged 2>&1 >/dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $?
+
+	log_test "extack - map VLAN at VXLAN device"
+
+	ip link del dev vx1
+	ip link del dev br1
+}
+
+vxlan_bridge_create_test()
+{
+	RET=0
+
+	# Unsupported configuration: mlxsw demands VXLAN with "noudpcsum".
+	ip link add name vx1 up type vxlan id 1000 \
+		local 192.0.2.17 remote 192.0.2.18 \
+		dstport 4789 tos inherit ttl 100
+
+	# Test with VLAN-aware bridge.
+	ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0
+
+	ip link set dev vx1 master br1
+
+	ip link set dev $swp1 master br1 2>&1 > /dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $?
+
+	# Test with VLAN-unaware bridge.
+	ip link set dev br1 type bridge vlan_filtering 0
+
+	ip link set dev $swp1 master br1 2>&1 > /dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $?
+
+	log_test "extack - bridge creation with VXLAN"
+
+	ip link del dev br1
+	ip link del dev vx1
+}
+
+bridge_create_test()
+{
+	RET=0
+
+	ip link add name br1 up type bridge vlan_filtering 1
+	ip link add name br2 up type bridge vlan_filtering 1
+
+	ip link set dev $swp1 master br1
+	check_err $?
+
+	# Only one VLAN-aware bridge is supported, so this should fail with
+	# an extack.
+	ip link set dev $swp2 master br2 2>&1 > /dev/null \
+		| grep -q mlxsw_spectrum
+	check_err $?
+
+	log_test "extack - multiple VLAN-aware bridges creation"
+
+	ip link del dev br2
+	ip link del dev br1
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib.sh b/tools/testing/selftests/drivers/net/mlxsw/fib.sh
new file mode 100755
index 000000000..eab79b9e5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/fib.sh
@@ -0,0 +1,256 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the FIB offload API on top of mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	ipv4_identical_routes
+	ipv4_tos
+	ipv4_metric
+	ipv4_replace
+	ipv4_delete
+	ipv4_plen
+	ipv4_replay
+	ipv4_flush
+	ipv4_local_replace
+	ipv6_add
+	ipv6_metric
+	ipv6_append_single
+	ipv6_replace_single
+	ipv6_metric_multipath
+	ipv6_append_multipath
+	ipv6_replace_multipath
+	ipv6_append_multipath_to_single
+	ipv6_delete_single
+	ipv6_delete_multipath
+	ipv6_replay_single
+	ipv6_replay_multipath
+	ipv6_local_replace
+"
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source $lib_dir/fib_offload_lib.sh
+
+ipv4_identical_routes()
+{
+	fib_ipv4_identical_routes_test "testns1"
+}
+
+ipv4_tos()
+{
+	fib_ipv4_tos_test "testns1"
+}
+
+ipv4_metric()
+{
+	fib_ipv4_metric_test "testns1"
+}
+
+ipv4_replace()
+{
+	fib_ipv4_replace_test "testns1"
+}
+
+ipv4_delete()
+{
+	fib_ipv4_delete_test "testns1"
+}
+
+ipv4_plen()
+{
+	fib_ipv4_plen_test "testns1"
+}
+
+ipv4_replay_metric()
+{
+	fib_ipv4_replay_metric_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv4_replay_tos()
+{
+	fib_ipv4_replay_tos_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv4_replay_plen()
+{
+	fib_ipv4_replay_plen_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv4_replay()
+{
+	ipv4_replay_metric
+	ipv4_replay_tos
+	ipv4_replay_plen
+}
+
+ipv4_flush()
+{
+	fib_ipv4_flush_test "testns1"
+}
+
+ipv4_local_replace()
+{
+	local ns="testns1"
+
+	RET=0
+
+	ip -n $ns link add name dummy1 type dummy
+	ip -n $ns link set dev dummy1 up
+
+	ip -n $ns route add table local 192.0.2.1/32 dev dummy1
+	fib4_trap_check $ns "table local 192.0.2.1/32 dev dummy1" false
+	check_err $? "Local table route not in hardware when should"
+
+	ip -n $ns route add table main 192.0.2.1/32 dev dummy1
+	fib4_trap_check $ns "table main 192.0.2.1/32 dev dummy1" true
+	check_err $? "Main table route in hardware when should not"
+
+	fib4_trap_check $ns "table local 192.0.2.1/32 dev dummy1" false
+	check_err $? "Local table route was replaced when should not"
+
+	# Test that local routes can replace routes in main table.
+	ip -n $ns route add table main 192.0.2.2/32 dev dummy1
+	fib4_trap_check $ns "table main 192.0.2.2/32 dev dummy1" false
+	check_err $? "Main table route not in hardware when should"
+
+	ip -n $ns route add table local 192.0.2.2/32 dev dummy1
+	fib4_trap_check $ns "table local 192.0.2.2/32 dev dummy1" false
+	check_err $? "Local table route did not replace route in main table when should"
+
+	fib4_trap_check $ns "table main 192.0.2.2/32 dev dummy1" true
+	check_err $? "Main table route was not replaced when should"
+
+	log_test "IPv4 local table route replacement"
+
+	ip -n $ns link del dev dummy1
+}
+
+ipv6_add()
+{
+	fib_ipv6_add_test "testns1"
+}
+
+ipv6_metric()
+{
+	fib_ipv6_metric_test "testns1"
+}
+
+ipv6_append_single()
+{
+	fib_ipv6_append_single_test "testns1"
+}
+
+ipv6_replace_single()
+{
+	fib_ipv6_replace_single_test "testns1"
+}
+
+ipv6_metric_multipath()
+{
+	fib_ipv6_metric_multipath_test "testns1"
+}
+
+ipv6_append_multipath()
+{
+	fib_ipv6_append_multipath_test "testns1"
+}
+
+ipv6_replace_multipath()
+{
+	fib_ipv6_replace_multipath_test "testns1"
+}
+
+ipv6_append_multipath_to_single()
+{
+	fib_ipv6_append_multipath_to_single_test "testns1"
+}
+
+ipv6_delete_single()
+{
+	fib_ipv6_delete_single_test "testns1"
+}
+
+ipv6_delete_multipath()
+{
+	fib_ipv6_delete_multipath_test "testns1"
+}
+
+ipv6_replay_single()
+{
+	fib_ipv6_replay_single_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv6_replay_multipath()
+{
+	fib_ipv6_replay_multipath_test "testns1" "$DEVLINK_DEV"
+}
+
+ipv6_local_replace()
+{
+	local ns="testns1"
+
+	RET=0
+
+	ip -n $ns link add name dummy1 type dummy
+	ip -n $ns link set dev dummy1 up
+
+	ip -n $ns route add table local 2001:db8:1::1/128 dev dummy1
+	fib6_trap_check $ns "table local 2001:db8:1::1/128 dev dummy1" false
+	check_err $? "Local table route not in hardware when should"
+
+	ip -n $ns route add table main 2001:db8:1::1/128 dev dummy1
+	fib6_trap_check $ns "table main 2001:db8:1::1/128 dev dummy1" true
+	check_err $? "Main table route in hardware when should not"
+
+	fib6_trap_check $ns "table local 2001:db8:1::1/128 dev dummy1" false
+	check_err $? "Local table route was replaced when should not"
+
+	# Test that local routes can replace routes in main table.
+	ip -n $ns route add table main 2001:db8:1::2/128 dev dummy1
+	fib6_trap_check $ns "table main 2001:db8:1::2/128 dev dummy1" false
+	check_err $? "Main table route not in hardware when should"
+
+	ip -n $ns route add table local 2001:db8:1::2/128 dev dummy1
+	fib6_trap_check $ns "table local 2001:db8:1::2/128 dev dummy1" false
+	check_err $? "Local route route did not replace route in main table when should"
+
+	fib6_trap_check $ns "table main 2001:db8:1::2/128 dev dummy1" true
+	check_err $? "Main table route was not replaced when should"
+
+	log_test "IPv6 local table route replacement"
+
+	ip -n $ns link del dev dummy1
+}
+
+setup_prepare()
+{
+	ip netns add testns1
+	if [ $? -ne 0 ]; then
+		echo "Failed to add netns \"testns1\""
+		exit 1
+	fi
+
+	devlink dev reload $DEVLINK_DEV netns testns1
+	if [ $? -ne 0 ]; then
+		echo "Failed to reload into netns \"testns1\""
+		exit 1
+	fi
+}
+
+cleanup()
+{
+	pre_cleanup
+	devlink -N testns1 dev reload $DEVLINK_DEV netns $$
+	ip netns del testns1
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh b/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh
new file mode 100755
index 000000000..e99ae500f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/fib_offload.sh
@@ -0,0 +1,349 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test unicast FIB offload indication.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	ipv6_route_add
+	ipv6_route_replace
+	ipv6_route_nexthop_group_share
+	ipv6_route_rate
+"
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+tor1_create()
+{
+	simple_if_init $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128
+}
+
+tor1_destroy()
+{
+	simple_if_fini $tor1_p1 2001:db8:1::2/128 2001:db8:1::3/128
+}
+
+tor2_create()
+{
+	simple_if_init $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128
+}
+
+tor2_destroy()
+{
+	simple_if_fini $tor2_p1 2001:db8:2::2/128 2001:db8:2::3/128
+}
+
+spine_create()
+{
+	ip link set dev $spine_p1 up
+	ip link set dev $spine_p2 up
+
+	__addr_add_del $spine_p1 add 2001:db8:1::1/64
+	__addr_add_del $spine_p2 add 2001:db8:2::1/64
+}
+
+spine_destroy()
+{
+	__addr_add_del $spine_p2 del 2001:db8:2::1/64
+	__addr_add_del $spine_p1 del 2001:db8:1::1/64
+
+	ip link set dev $spine_p2 down
+	ip link set dev $spine_p1 down
+}
+
+ipv6_offload_check()
+{
+	local pfx="$1"; shift
+	local expected_num=$1; shift
+	local num
+
+	# Try to avoid races with route offload
+	sleep .1
+
+	num=$(ip -6 route show match ${pfx} | grep "offload" | wc -l)
+
+	if [ $num -eq $expected_num ]; then
+		return 0
+	fi
+
+	return 1
+}
+
+ipv6_route_add_prefix()
+{
+	RET=0
+
+	# Add a prefix route and check that it is offloaded.
+	ip -6 route add 2001:db8:3::/64 dev $spine_p1 metric 100
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1
+	check_err $? "prefix route not offloaded"
+
+	# Append an identical prefix route with an higher metric and check that
+	# offload indication did not change.
+	ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 200
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 1
+	check_err $? "lowest metric not offloaded after append"
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	# Prepend an identical prefix route with lower metric and check that
+	# it is offloaded and the others are not.
+	ip -6 route append 2001:db8:3::/64 dev $spine_p1 metric 10
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 10" 1
+	check_err $? "lowest metric not offloaded after prepend"
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 100" 0
+	check_err $? "mid metric offloaded when should not"
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p1 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	# Delete the routes and add the same route with a different nexthop
+	# device. Check that it is offloaded.
+	ip -6 route flush 2001:db8:3::/64 dev $spine_p1
+	ip -6 route add 2001:db8:3::/64 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 dev $spine_p2" 1
+
+	log_test "IPv6 prefix route add"
+
+	ip -6 route flush 2001:db8:3::/64
+}
+
+ipv6_route_add_mpath()
+{
+	RET=0
+
+	# Add a multipath route and check that it is offloaded.
+	ip -6 route add 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded when should"
+
+	# Append another nexthop and check that it is offloaded as well.
+	ip -6 route append 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::3 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 3
+	check_err $? "appended nexthop not offloaded when should"
+
+	# Mimic route replace by removing the route and adding it back with
+	# only two nexthops.
+	ip -6 route del 2001:db8:3::/64
+	ip -6 route add 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after delete & add"
+
+	# Append a nexthop with an higher metric and check that the offload
+	# indication did not change.
+	ip -6 route append 2001:db8:3::/64 metric 200 \
+		nexthop via 2001:db8:1::3 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "lowest metric not offloaded after append"
+	ipv6_offload_check "2001:db8:3::/64 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	# Prepend a nexthop with a lower metric and check that it is offloaded
+	# and the others are not.
+	ip -6 route append 2001:db8:3::/64 metric 10 \
+		nexthop via 2001:db8:1::3 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 10" 1
+	check_err $? "lowest metric not offloaded after prepend"
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 0
+	check_err $? "mid metric offloaded when should not"
+	ipv6_offload_check "2001:db8:3::/64 metric 200" 0
+	check_err $? "highest metric offloaded when should not"
+
+	log_test "IPv6 multipath route add"
+
+	ip -6 route flush 2001:db8:3::/64
+}
+
+ipv6_route_add()
+{
+	ipv6_route_add_prefix
+	ipv6_route_add_mpath
+}
+
+ipv6_route_replace()
+{
+	RET=0
+
+	# Replace prefix route with prefix route.
+	ip -6 route add 2001:db8:3::/64 metric 100 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 1
+	check_err $? "prefix route not offloaded when should"
+	ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 1
+	check_err $? "prefix route not offloaded after replace"
+
+	# Replace prefix route with multipath route.
+	ip -6 route replace 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after replace"
+
+	# Replace multipath route with prefix route. A prefix route cannot
+	# replace a multipath route, so it is appended.
+	ip -6 route replace 2001:db8:3::/64 metric 100 dev $spine_p1
+	ipv6_offload_check "2001:db8:3::/64 metric 100 dev $spine_p1" 0
+	check_err $? "prefix route offloaded after 'replacing' multipath route"
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after being 'replaced' by prefix route"
+
+	# Replace multipath route with multipath route.
+	ip -6 route replace 2001:db8:3::/64 metric 100 \
+		nexthop via 2001:db8:1::3 dev $spine_p1 \
+		nexthop via 2001:db8:2::3 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after replacing multipath route"
+
+	# Replace a non-existing multipath route with a multipath route and
+	# check that it is appended and not offloaded.
+	ip -6 route replace 2001:db8:3::/64 metric 200 \
+		nexthop via 2001:db8:1::3 dev $spine_p1 \
+		nexthop via 2001:db8:2::3 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64 metric 100" 2
+	check_err $? "multipath route not offloaded after non-existing route was 'replaced'"
+	ipv6_offload_check "2001:db8:3::/64 metric 200" 0
+	check_err $? "multipath route offloaded after 'replacing' non-existing route"
+
+	log_test "IPv6 route replace"
+
+	ip -6 route flush 2001:db8:3::/64
+}
+
+ipv6_route_nexthop_group_share()
+{
+	RET=0
+
+	# The driver consolidates identical nexthop groups in order to reduce
+	# the resource usage in its adjacency table. Check that the deletion
+	# of one multipath route using the group does not affect the other.
+	ip -6 route add 2001:db8:3::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ip -6 route add 2001:db8:4::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ipv6_offload_check "2001:db8:3::/64" 2
+	check_err $? "multipath route not offloaded when should"
+	ipv6_offload_check "2001:db8:4::/64" 2
+	check_err $? "multipath route not offloaded when should"
+	ip -6 route del 2001:db8:3::/64
+	ipv6_offload_check "2001:db8:4::/64" 2
+	check_err $? "multipath route not offloaded after deletion of route sharing the nexthop group"
+
+	# Check that after unsharing a nexthop group the routes are still
+	# marked as offloaded.
+	ip -6 route add 2001:db8:3::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1 \
+		nexthop via 2001:db8:2::2 dev $spine_p2
+	ip -6 route del 2001:db8:4::/64 \
+		nexthop via 2001:db8:1::2 dev $spine_p1
+	ipv6_offload_check "2001:db8:4::/64" 1
+	check_err $? "singlepath route not offloaded after unsharing the nexthop group"
+	ipv6_offload_check "2001:db8:3::/64" 2
+	check_err $? "multipath route not offloaded after unsharing the nexthop group"
+
+	log_test "IPv6 nexthop group sharing"
+
+	ip -6 route flush 2001:db8:3::/64
+	ip -6 route flush 2001:db8:4::/64
+}
+
+ipv6_route_rate()
+{
+	local batch_dir=$(mktemp -d)
+	local num_rts=$((40 * 1024))
+	local num_nhs=16
+	local total
+	local start
+	local diff
+	local end
+	local nhs
+	local i
+
+	RET=0
+
+	# Prepare 40K /64 multipath routes with 16 nexthops each and check how
+	# long it takes to add them. A limit of 60 seconds is set. It is much
+	# higher than insertion should take and meant to flag a serious
+	# regression.
+	total=$((nums_nhs * num_rts))
+
+	for i in $(seq 1 $num_nhs); do
+		ip -6 address add 2001:db8:1::10:$i/128 dev $tor1_p1
+		nexthops+=" nexthop via 2001:db8:1::10:$i dev $spine_p1"
+	done
+
+	for i in $(seq 1 $num_rts); do
+		echo "route add 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \
+			>> $batch_dir/add.batch
+		echo "route del 2001:db8:8:$(printf "%x" $i)::/64$nexthops" \
+			>> $batch_dir/del.batch
+	done
+
+	start=$(date +%s.%N)
+
+	ip -batch $batch_dir/add.batch
+	count=$(ip -6 route show | grep offload | wc -l)
+	while [ $count -lt $total ]; do
+		sleep .01
+		count=$(ip -6 route show | grep offload | wc -l)
+	done
+
+	end=$(date +%s.%N)
+
+	diff=$(echo "$end - $start" | bc -l)
+	test "$(echo "$diff > 60" | bc -l)" -eq 0
+	check_err $? "route insertion took too long"
+	log_info "inserted $num_rts routes in $diff seconds"
+
+	log_test "IPv6 routes insertion rate"
+
+	ip -batch $batch_dir/del.batch
+	for i in $(seq 1 $num_nhs); do
+		ip -6 address del 2001:db8:1::10:$i/128 dev $tor1_p1
+	done
+	rm -rf $batch_dir
+}
+
+setup_prepare()
+{
+	spine_p1=${NETIFS[p1]}
+	tor1_p1=${NETIFS[p2]}
+
+	spine_p2=${NETIFS[p3]}
+	tor2_p1=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	tor1_create
+	tor2_create
+	spine_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	spine_destroy
+	tor2_destroy
+	tor1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
new file mode 100755
index 000000000..76f1ab489
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
@@ -0,0 +1,217 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# ../../../net/forwarding/mirror_gre_topo_lib.sh for more details.
+#
+# Test offloading various features of offloading gretap mirrors specific to
+# mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/mirror_lib.sh
+source $lib_dir/mirror_gre_lib.sh
+source $lib_dir/mirror_gre_topo_lib.sh
+
+setup_keyful()
+{
+	tunnel_create gt6-key ip6gretap 2001:db8:3::1 2001:db8:3::2 \
+		      ttl 100 tos inherit allow-localremote \
+		      key 1234
+
+	tunnel_create h3-gt6-key ip6gretap 2001:db8:3::2 2001:db8:3::1 \
+		      key 1234
+	ip link set h3-gt6-key vrf v$h3
+	matchall_sink_create h3-gt6-key
+
+	ip address add dev $swp3 2001:db8:3::1/64
+	ip address add dev $h3 2001:db8:3::2/64
+}
+
+cleanup_keyful()
+{
+	ip address del dev $h3 2001:db8:3::2/64
+	ip address del dev $swp3 2001:db8:3::1/64
+
+	tunnel_destroy h3-gt6-key
+	tunnel_destroy gt6-key
+}
+
+setup_soft()
+{
+	# Set up a topology for testing underlay routes that point at an
+	# unsupported soft device.
+
+	tunnel_create gt6-soft ip6gretap 2001:db8:4::1 2001:db8:4::2 \
+		      ttl 100 tos inherit allow-localremote
+
+	tunnel_create h3-gt6-soft ip6gretap 2001:db8:4::2 2001:db8:4::1
+	ip link set h3-gt6-soft vrf v$h3
+	matchall_sink_create h3-gt6-soft
+
+	ip link add name v1 type veth peer name v2
+	ip link set dev v1 up
+	ip address add dev v1 2001:db8:4::1/64
+
+	ip link set dev v2 vrf v$h3
+	ip link set dev v2 up
+	ip address add dev v2 2001:db8:4::2/64
+}
+
+cleanup_soft()
+{
+	ip link del dev v1
+
+	tunnel_destroy h3-gt6-soft
+	tunnel_destroy gt6-soft
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	setup_keyful
+	setup_soft
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	cleanup_soft
+	cleanup_keyful
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_ttl_inherit()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev type $type ttl inherit
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	ip link set dev $tundev type $type ttl 100
+
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: no offload on TTL of inherit ($tcflags)"
+}
+
+test_span_gre_tos_fixed()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev type $type tos 0x10
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	ip link set dev $tundev type $type tos inherit
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: no offload on a fixed TOS ($tcflags)"
+}
+
+test_span_failable()
+{
+	local should_fail=$1; shift
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	if ((should_fail)); then
+	    fail_test_span_gre_dir  $tundev ingress
+	else
+	    quick_test_span_gre_dir $tundev ingress
+	fi
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: should_fail=$should_fail ($tcflags)"
+}
+
+test_failable()
+{
+	local should_fail=$1; shift
+
+	test_span_failable $should_fail gt6-key "mirror to keyful gretap"
+	test_span_failable $should_fail gt6-soft "mirror to gretap w/ soft underlay"
+}
+
+test_sw()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	test_failable 0
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+test_hw()
+{
+	test_failable 1
+
+	test_span_gre_tos_fixed gt4 gretap "mirror to gretap"
+	test_span_gre_tos_fixed gt6 ip6gretap "mirror to ip6gretap"
+
+	test_span_gre_ttl_inherit gt4 gretap "mirror to gretap"
+	test_span_gre_ttl_inherit gt6 ip6gretap "mirror to ip6gretap"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+if ! tc_offload_check; then
+    check_err 1 "Could not test offloaded functionality"
+    log_test "mlxsw-specific tests for mirror to gretap"
+    exit
+fi
+
+tcflags="skip_hw"
+test_sw
+
+tcflags="skip_sw"
+test_hw
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
new file mode 100644
index 000000000..e00435753
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
@@ -0,0 +1,198 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Test offloading a number of mirrors-to-gretap. The test creates a number of
+# tunnels. Then it adds one flower mirror for each of the tunnels, matching a
+# given host IP. Then it generates traffic at each of the host IPs and checks
+# that the traffic has been mirrored at the appropriate tunnel.
+#
+#   +--------------------------+                   +--------------------------+
+#   | H1                       |                   |                       H2 |
+#   |     + $h1                |                   |                $h2 +     |
+#   |     | 2001:db8:1:X::1/64 |                   | 2001:db8:1:X::2/64 |     |
+#   +-----|--------------------+                   +--------------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirrors                                                  |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   |     + $swp3                          + gt6-<X> (ip6gretap)              |
+#   |     | 2001:db8:2:X::1/64             : loc=2001:db8:2:X::1              |
+#   |     |                                : rem=2001:db8:2:X::2              |
+#   |     |                                : ttl=100                          |
+#   |     |                                : tos=inherit                      |
+#   |     |                                :                                  |
+#   +-----|--------------------------------:----------------------------------+
+#         |                                :
+#   +-----|--------------------------------:----------------------------------+
+#   | H3  + $h3                            + h3-gt6-<X> (ip6gretap)           |
+#   |       2001:db8:2:X::2/64               loc=2001:db8:2:X::2              |
+#   |                                        rem=2001:db8:2:X::1              |
+#   |                                        ttl=100                          |
+#   |                                        tos=inherit                      |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+
+source ../../../../net/forwarding/mirror_lib.sh
+
+MIRROR_NUM_NETIFS=6
+
+mirror_gre_ipv6_addr()
+{
+	local net=$1; shift
+	local num=$1; shift
+
+	printf "2001:db8:%x:%x" $net $num
+}
+
+mirror_gre_tunnels_create()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	MIRROR_GRE_BATCH_FILE="$(mktemp)"
+	for ((i=0; i < count; ++i)); do
+		local match_dip=$(mirror_gre_ipv6_addr 1 $i)::2
+		local htun=h3-gt6-$i
+		local tun=gt6-$i
+
+		((mirror_gre_tunnels++))
+
+		ip address add dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64
+		ip address add dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64
+
+		ip address add dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64
+		ip address add dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64
+
+		tunnel_create $tun ip6gretap \
+			      $(mirror_gre_ipv6_addr 2 $i)::1 \
+			      $(mirror_gre_ipv6_addr 2 $i)::2 \
+			      ttl 100 tos inherit allow-localremote
+
+		tunnel_create $htun ip6gretap \
+			      $(mirror_gre_ipv6_addr 2 $i)::2 \
+			      $(mirror_gre_ipv6_addr 2 $i)::1
+		ip link set $htun vrf v$h3
+		matchall_sink_create $htun
+
+		cat >> $MIRROR_GRE_BATCH_FILE <<-EOF
+			filter add dev $swp1 ingress pref 1000 \
+				protocol ipv6 \
+				flower $tcflags dst_ip $match_dip \
+				action mirred egress mirror dev $tun
+		EOF
+	done
+
+	tc -b $MIRROR_GRE_BATCH_FILE
+	check_err_fail $should_fail $? "Mirror rule insertion"
+}
+
+mirror_gre_tunnels_destroy()
+{
+	local count=$1; shift
+
+	for ((i=0; i < count; ++i)); do
+		local htun=h3-gt6-$i
+		local tun=gt6-$i
+
+		ip address del dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64
+		ip address del dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64
+
+		ip address del dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64
+		ip address del dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64
+
+		tunnel_destroy $htun
+		tunnel_destroy $tun
+	done
+}
+
+__mirror_gre_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	mirror_gre_tunnels_create $count $should_fail
+	if ((should_fail)); then
+	    return
+	fi
+
+	sleep 5
+
+	for ((i = 0; i < count; ++i)); do
+		local sip=$(mirror_gre_ipv6_addr 1 $i)::1
+		local dip=$(mirror_gre_ipv6_addr 1 $i)::2
+		local htun=h3-gt6-$i
+		local message
+
+		icmp6_capture_install $htun
+		mirror_test v$h1 $sip $dip $htun 100 10
+		icmp6_capture_uninstall $htun
+	done
+}
+
+mirror_gre_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
+		check_err 1 "Could not test offloaded functionality"
+		return
+	fi
+
+	tcflags="skip_sw"
+	__mirror_gre_test $count $should_fail
+}
+
+mirror_gre_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	mirror_gre_tunnels=0
+
+	vrf_prepare
+
+	simple_if_init $h1
+	simple_if_init $h2
+	simple_if_init $h3
+
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	tc qdisc add dev $swp1 clsact
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	ip link set dev $swp3 up
+}
+
+mirror_gre_cleanup()
+{
+	mirror_gre_tunnels_destroy $mirror_gre_tunnels
+
+	ip link set dev $swp3 down
+
+	ip link set dev $swp2 down
+
+	tc qdisc del dev $swp1 clsact
+	ip link set dev $swp1 down
+
+	ip link del dev br1
+
+	simple_if_fini $h3
+	simple_if_fini $h2
+	simple_if_fini $h1
+
+	vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
new file mode 100644
index 000000000..cbe50f260
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+if [[ ! -v MLXSW_CHIP ]]; then
+	MLXSW_CHIP=$(devlink -j dev info $DEVLINK_DEV | jq -r '.[][]["driver"]')
+	if [ -z "$MLXSW_CHIP" ]; then
+		echo "SKIP: Device $DEVLINK_DEV doesn't support devlink info command"
+		exit 1
+	fi
+fi
diff --git a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh
new file mode 100755
index 000000000..f02d83e94
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh
@@ -0,0 +1,259 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test a "one-armed router" [1] scenario. Packets forwarded between H1 and H2
+# should be forwarded by the ASIC, but also trapped so that ICMP redirect
+# packets could be potentially generated.
+#
+# 1. https://en.wikipedia.org/wiki/One-armed_router
+#
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    | 2001:db8:1::1/64           |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# |    |  default via 2001:db8:1::2 |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# | +--|--------------------------------------------------------------------+ |
+# | |  + $swp1                   BR0 (802.1d)                               | |
+# | |                                                                       | |
+# | |                            192.0.2.2/24                               | |
+# | |                          2001:db8:1::2/64                             | |
+# | |                           198.51.100.2/24                             | |
+# | |                          2001:db8:2::2/64                             | |
+# | |                                                                       | |
+# | |  + $swp2                                                              | |
+# | +--|--------------------------------------------------------------------+ |
+# |    |                                                                      |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------+
+# |    |  default via 198.51.100.2  |
+# |    |  default via 2001:db8:2::2 |
+# |    |                            |
+# |    | 2001:db8:2::1/64           |
+# |    | 198.51.100.1/24            |
+# |    + $h2                        |
+# | H2 (vrf)                        |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="ping_ipv4 ping_ipv6 fwd_mark_ipv4 fwd_mark_ipv6"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+	ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64
+}
+
+switch_create()
+{
+	ip link add name br0 type bridge mcast_snooping 0
+	ip link set dev br0 up
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp1 up
+	ip link set dev $swp2 master br0
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+
+	__addr_add_del br0 add 192.0.2.2/24 2001:db8:1::2/64
+	__addr_add_del br0 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+switch_destroy()
+{
+	__addr_add_del br0 del 198.51.100.2/24 2001:db8:2::2/64
+	__addr_add_del br0 del 192.0.2.2/24 2001:db8:1::2/64
+
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev br0 down
+	ip link del dev br0
+}
+
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.1 ": h1->h2"
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::1 ": h1->h2"
+}
+
+fwd_mark_ipv4()
+{
+	# Transmit packets from H1 to H2 and make sure they are trapped at
+	# swp1 due to loopback error, but only forwarded by the ASIC through
+	# swp2
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \
+		action pass
+
+	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 flower \
+		skip_hw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \
+		action pass
+
+	tc filter add dev $swp2 egress protocol ip pref 2 handle 102 flower \
+		skip_sw dst_ip 198.51.100.1 ip_proto udp dst_port 52768 \
+		action pass
+
+	ip vrf exec v$h1 $MZ $h1 -c 10 -d 100msec -p 64 -A 192.0.2.1 \
+		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+	RET=0
+
+	tc_check_packets "dev $swp1 ingress" 101 10
+	check_err $?
+
+	log_test "fwd mark: trapping IPv4 packets due to LBERROR"
+
+	RET=0
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_err $?
+
+	log_test "fwd mark: forwarding IPv4 packets in software"
+
+	RET=0
+
+	tc_check_packets "dev $swp2 egress" 102 10
+	check_err $?
+
+	log_test "fwd mark: forwarding IPv4 packets in hardware"
+
+	tc filter del dev $swp2 egress protocol ip pref 2 handle 102 flower
+	tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+fwd_mark_ipv6()
+{
+	tc filter add dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower \
+		skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \
+		action pass
+
+	tc filter add dev $swp2 egress protocol ipv6 pref 1 handle 101 flower \
+		skip_hw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \
+		action pass
+
+	tc filter add dev $swp2 egress protocol ipv6 pref 2 handle 102 flower \
+		skip_sw dst_ip 2001:db8:2::1 ip_proto udp dst_port 52768 \
+		action pass
+
+	ip vrf exec v$h1 $MZ $h1 -6 -c 10 -d 100msec -p 64 -A 2001:db8:1::1 \
+		-B 2001:db8:2::1 -t udp dp=52768,sp=42768 -q
+
+	RET=0
+
+	tc_check_packets "dev $swp1 ingress" 101 10
+	check_err $?
+
+	log_test "fwd mark: trapping IPv6 packets due to LBERROR"
+
+	RET=0
+
+	tc_check_packets "dev $swp2 egress" 101 0
+	check_err $?
+
+	log_test "fwd mark: forwarding IPv6 packets in software"
+
+	RET=0
+
+	tc_check_packets "dev $swp2 egress" 102 10
+	check_err $?
+
+	log_test "fwd mark: forwarding IPv6 packets in hardware"
+
+	tc filter del dev $swp2 egress protocol ipv6 pref 2 handle 102 flower
+	tc filter del dev $swp2 egress protocol ipv6 pref 1 handle 101 flower
+	tc filter del dev $swp1 ingress protocol ipv6 pref 1 handle 101 flower
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	sysctl_set net.ipv4.conf.all.accept_redirects 0
+	sysctl_set net.ipv6.conf.all.accept_redirects 0
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	sysctl_restore net.ipv6.conf.all.accept_redirects
+	sysctl_restore net.ipv4.conf.all.accept_redirects
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
new file mode 100755
index 000000000..71066bc4b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
@@ -0,0 +1,166 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for port-default priority. Non-IP packets ingress $swp1 and are
+# prioritized according to the default priority specified at the port.
+# rx_octets_prio_* counters are used to verify the prioritization.
+#
+# +-----------------------+
+# | H1                    |
+# |    + $h1              |
+# |    | 192.0.2.1/28     |
+# +----|------------------+
+#      |
+# +----|------------------+
+# | SW |                  |
+# |    + $swp1            |
+# |      192.0.2.2/28     |
+# |      APP=<prio>,1,0   |
+# +-----------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_defprio
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=2
+: ${HIT_TIMEOUT:=1000} # ms
+source $lib_dir/lib.sh
+
+declare -a APP
+
+defprio_install()
+{
+	local dev=$1; shift
+	local prio=$1; shift
+	local app="app=$prio,1,0"
+
+	lldptool -T -i $dev -V APP $app >/dev/null
+	lldpad_app_wait_set $dev
+	APP[$prio]=$app
+}
+
+defprio_uninstall()
+{
+	local dev=$1; shift
+	local prio=$1; shift
+	local app=${APP[$prio]}
+
+	lldptool -T -i $dev -V APP -d $app >/dev/null
+	lldpad_app_wait_del
+	unset APP[$prio]
+}
+
+defprio_flush()
+{
+	local dev=$1; shift
+	local prio
+
+	if ((${#APP[@]})); then
+		lldptool -T -i $dev -V APP -d ${APP[@]} >/dev/null
+	fi
+	lldpad_app_wait_del
+	APP=()
+}
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	ip addr add dev $swp1 192.0.2.2/28
+}
+
+switch_destroy()
+{
+	defprio_flush $swp1
+	ip addr del dev $swp1 192.0.2.2/28
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	vrf_prepare
+
+	h1_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+__test_defprio()
+{
+	local prio_install=$1; shift
+	local prio_observe=$1; shift
+	local key
+	local t1
+	local i
+
+	RET=0
+
+	defprio_install $swp1 $prio_install
+
+	local t0=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
+	mausezahn -q $h1 -d 100m -c 10 -t arp reply
+	t1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((t0 + 10))" \
+		ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
+
+	check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $((t1 - t0))."
+	log_test "Default priority $prio_install/$prio_observe"
+
+	defprio_uninstall $swp1 $prio_install
+}
+
+test_defprio()
+{
+	local prio
+
+	for prio in {0..7}; do
+		__test_defprio $prio $prio
+	done
+
+	defprio_install $swp1 3
+	__test_defprio 0 3
+	__test_defprio 1 3
+	__test_defprio 2 3
+	__test_defprio 4 4
+	__test_defprio 5 5
+	__test_defprio 6 6
+	__test_defprio 7 7
+	defprio_uninstall $swp1 3
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
new file mode 100755
index 000000000..28a570006
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
@@ -0,0 +1,194 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for DSCP prioritization and rewrite. Packets ingress $swp1 with a DSCP
+# tag and are prioritized according to the map at $swp1. They egress $swp2 and
+# the DSCP value is updated to match the map at that interface. The updated DSCP
+# tag is verified at $h2.
+#
+# ICMP responses are produced with the same DSCP tag that arrived at $h2. They
+# go through prioritization at $swp2 and DSCP retagging at $swp1. The tag is
+# verified at $h1--it should match the original tag.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |   192.0.2.2/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |  +-|----------------------------------------------------------------|-+   |
+# |  | + $swp1                       BR                           $swp2 + |   |
+# |  |   APP=0,5,10 .. 7,5,17                      APP=0,5,20 .. 7,5,27   |   |
+# |  +--------------------------------------------------------------------+   |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_dscp
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+	tc qdisc add dev $h1 clsact
+	dscp_capture_install $h1 10
+}
+
+h1_destroy()
+{
+	dscp_capture_uninstall $h1 10
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28
+	tc qdisc add dev $h2 clsact
+	dscp_capture_install $h2 20
+}
+
+h2_destroy()
+{
+	dscp_capture_uninstall $h2 20
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/28
+}
+
+dscp_map()
+{
+	local base=$1; shift
+	local prio
+
+	for prio in {0..7}; do
+		echo app=$prio,5,$((base + prio))
+	done
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	lldptool -T -i $swp1 -V APP $(dscp_map 10) >/dev/null
+	lldptool -T -i $swp2 -V APP $(dscp_map 20) >/dev/null
+	lldpad_app_wait_set $swp1
+	lldpad_app_wait_set $swp2
+}
+
+switch_destroy()
+{
+	lldptool -T -i $swp2 -V APP -d $(dscp_map 20) >/dev/null
+	lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null
+	lldpad_app_wait_del
+
+	ip link set dev $swp2 down
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+dscp_ping_test()
+{
+	local vrf_name=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local prio=$1; shift
+	local dev_10=$1; shift
+	local dev_20=$1; shift
+	local key
+
+	local dscp_10=$(((prio + 10) << 2))
+	local dscp_20=$(((prio + 20) << 2))
+
+	RET=0
+
+	local -A t0s
+	eval "t0s=($(dscp_fetch_stats $dev_10 10)
+		   $(dscp_fetch_stats $dev_20 20))"
+
+	local ping_timeout=$((PING_TIMEOUT * 5))
+	ip vrf exec $vrf_name \
+	   ${PING} -Q $dscp_10 ${sip:+-I $sip} $dip \
+		   -c 10 -i 0.5 -w $ping_timeout &> /dev/null
+
+	local -A t1s
+	eval "t1s=($(dscp_fetch_stats $dev_10 10)
+		   $(dscp_fetch_stats $dev_20 20))"
+
+	for key in ${!t0s[@]}; do
+		local expect
+		if ((key == prio+10 || key == prio+20)); then
+			expect=10
+		else
+			expect=0
+		fi
+
+		local delta=$((t1s[$key] - t0s[$key]))
+		((expect == delta))
+		check_err $? "DSCP $key: Expected to capture $expect packets, got $delta."
+	done
+
+	log_test "DSCP rewrite: $dscp_10-(prio $prio)-$dscp_20"
+}
+
+test_dscp()
+{
+	local prio
+
+	for prio in {0..7}; do
+		dscp_ping_test v$h1 192.0.2.1 192.0.2.2 $prio $h1 $h2
+	done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
new file mode 100755
index 000000000..4cb2aa652
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
@@ -0,0 +1,284 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for DSCP prioritization in the router.
+#
+# With ip_forward_update_priority disabled, the packets are expected to keep
+# their DSCP (which in this test uses only values 0..7) intact as they are
+# forwarded by the switch. That is verified at $h2. ICMP responses are formed
+# with the same DSCP as the requests, and likewise pass through the switch
+# intact, which is verified at $h1.
+#
+# With ip_forward_update_priority enabled, router reprioritizes the packets
+# according to the table in reprioritize(). Thus, say, DSCP 7 maps to priority
+# 4, which on egress maps back to DSCP 4. The response packet then gets
+# reprioritized to 6, getting DSCP 6 on egress.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |  192.0.2.18/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |    + $swp1                                                    $swp2 +     |
+# |      192.0.2.2/28                                     192.0.2.17/28       |
+# |      APP=0,5,0 .. 7,5,7                          APP=0,5,0 .. 7,5,7       |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_update
+	test_no_update
+	test_pedit_norewrite
+	test_dscp_leftover
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+
+reprioritize()
+{
+	local in=$1; shift
+
+	# This is based on rt_tos2priority in include/net/route.h. Assuming 1:1
+	# mapping between priorities and TOS, it yields a new priority for a
+	# packet with ingress priority of $in.
+	local -a reprio=(0 0 2 2 6 6 4 4)
+
+	echo ${reprio[$in]}
+}
+
+zero()
+{
+    echo 0
+}
+
+three()
+{
+    echo 3
+}
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+	tc qdisc add dev $h1 clsact
+	dscp_capture_install $h1 0
+	ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	dscp_capture_uninstall $h1 0
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.18/28
+	tc qdisc add dev $h2 clsact
+	dscp_capture_install $h2 0
+	ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	dscp_capture_uninstall $h2 0
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.18/28
+}
+
+dscp_map()
+{
+	local base=$1; shift
+	local prio
+
+	for prio in {0..7}; do
+		echo app=$prio,5,$((base + prio))
+	done
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/28
+	__simple_if_init $swp2 v$swp1 192.0.2.17/28
+
+	tc qdisc add dev $swp1 clsact
+	tc qdisc add dev $swp2 clsact
+
+	lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null
+	lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
+	lldpad_app_wait_set $swp1
+	lldpad_app_wait_set $swp2
+}
+
+switch_destroy()
+{
+	lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
+	lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null
+	lldpad_app_wait_del
+
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	__simple_if_fini $swp2 192.0.2.17/28
+	simple_if_fini $swp1 192.0.2.2/28
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	sysctl_set net.ipv4.ip_forward_update_priority 1
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+	sysctl_restore net.ipv4.ip_forward_update_priority
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.18
+}
+
+dscp_ping_test()
+{
+	local vrf_name=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local prio=$1; shift
+	local reprio=$1; shift
+	local dev1=$1; shift
+	local dev2=$1; shift
+	local i
+
+	local prio2=$($reprio $prio)   # ICMP Request egress prio
+	local prio3=$($reprio $prio2)  # ICMP Response egress prio
+
+	local dscp=$((prio << 2))     # ICMP Request ingress DSCP
+	local dscp2=$((prio2 << 2))   # ICMP Request egress DSCP
+	local dscp3=$((prio3 << 2))   # ICMP Response egress DSCP
+
+	RET=0
+
+	eval "local -A dev1_t0s=($(dscp_fetch_stats $dev1 0))"
+	eval "local -A dev2_t0s=($(dscp_fetch_stats $dev2 0))"
+
+	local ping_timeout=$((PING_TIMEOUT * 5))
+	ip vrf exec $vrf_name \
+	   ${PING} -Q $dscp ${sip:+-I $sip} $dip \
+		   -c 10 -i 0.5 -w $ping_timeout &> /dev/null
+
+	eval "local -A dev1_t1s=($(dscp_fetch_stats $dev1 0))"
+	eval "local -A dev2_t1s=($(dscp_fetch_stats $dev2 0))"
+
+	for i in {0..7}; do
+		local dscpi=$((i << 2))
+		local expect2=0
+		local expect3=0
+
+		if ((i == prio2)); then
+			expect2=10
+		fi
+		if ((i == prio3)); then
+			expect3=10
+		fi
+
+		local delta=$((dev2_t1s[$i] - dev2_t0s[$i]))
+		((expect2 == delta))
+		check_err $? "DSCP $dscpi@$dev2: Expected to capture $expect2 packets, got $delta."
+
+		delta=$((dev1_t1s[$i] - dev1_t0s[$i]))
+		((expect3 == delta))
+		check_err $? "DSCP $dscpi@$dev1: Expected to capture $expect3 packets, got $delta."
+	done
+
+	log_test "DSCP rewrite: $dscp-(prio $prio2)-$dscp2-(prio $prio3)-$dscp3"
+}
+
+__test_update()
+{
+	local update=$1; shift
+	local reprio=$1; shift
+	local prio
+
+	sysctl_restore net.ipv4.ip_forward_update_priority
+	sysctl_set net.ipv4.ip_forward_update_priority $update
+
+	for prio in {0..7}; do
+		dscp_ping_test v$h1 192.0.2.1 192.0.2.18 $prio $reprio $h1 $h2
+	done
+}
+
+test_update()
+{
+	echo "Test net.ipv4.ip_forward_update_priority=1"
+	__test_update 1 reprioritize
+}
+
+test_no_update()
+{
+	echo "Test net.ipv4.ip_forward_update_priority=0"
+	__test_update 0 echo
+}
+
+# Test that when DSCP is updated in pedit, the DSCP rewrite is turned off.
+test_pedit_norewrite()
+{
+	echo "Test no DSCP rewrite after DSCP is updated by pedit"
+
+	tc filter add dev $swp1 ingress handle 101 pref 1 prot ip flower \
+	    action pedit ex munge ip dsfield set $((3 << 2)) retain 0xfc \
+	    action skbedit priority 3
+
+	__test_update 0 three
+
+	tc filter del dev $swp1 ingress pref 1
+}
+
+# Test that when the last APP rule is removed, the prio->DSCP map is properly
+# set to zeroes, and that the last APP rule does not stay active in the ASIC.
+test_dscp_leftover()
+{
+	echo "Test that last removed DSCP rule is deconfigured correctly"
+
+	lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
+	lldpad_app_wait_del
+
+	__test_update 0 zero
+
+	lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
+	lldpad_app_wait_set $swp2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
new file mode 100755
index 000000000..e9f8718af
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
@@ -0,0 +1,320 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A test for strict prioritization of traffic in the switch. Run two streams of
+# traffic, each through a different ingress port, one tagged with PCP of 1, the
+# other with PCP of 2. Both streams converge at one egress port, where they are
+# assigned TC of, respectively, 1 and 2, with strict priority configured between
+# them. In H3, we expect to see (almost) exclusively the high-priority traffic.
+#
+# Please see qos_mc_aware.sh for an explanation of why we use mausezahn and
+# counters instead of just running iperf3.
+#
+# +---------------------------+                 +-----------------------------+
+# | H1                        |                 |                          H2 |
+# |         $h1.111 +         |                 |         + $h2.222           |
+# |   192.0.2.33/28 |         |                 |         | 192.0.2.65/28     |
+# |   e-qos-map 0:1 |         |                 |         | e-qos-map 0:2     |
+# |                 |         |                 |         |                   |
+# |             $h1 +         |                 |         + $h2               |
+# +-----------------|---------+                 +---------|-------------------+
+#                   |                                     |
+# +-----------------|-------------------------------------|-------------------+
+# |           $swp1 +                                     + $swp2             |
+# |          >1Gbps |                                     | >1Gbps            |
+# | +---------------|-----------+              +----------|----------------+  |
+# | |     $swp1.111 +           |              |          + $swp2.222      |  |
+# | |                     BR111 |       SW     | BR222                     |  |
+# | |     $swp3.111 +           |              |          + $swp3.222      |  |
+# | +---------------|-----------+              +----------|----------------+  |
+# |                 \_____________________________________/                   |
+# |                                    |                                      |
+# |                                    + $swp3                                |
+# |                                    | 1Gbps bottleneck                     |
+# |                                    | ETS: (up n->tc n for n in 0..7)      |
+# |                                    |      strict priority                 |
+# +------------------------------------|--------------------------------------+
+#                                      |
+#                 +--------------------|--------------------+
+#                 |                    + $h3             H3 |
+#                 |                   / \                   |
+#                 |                  /   \                  |
+#                 |         $h3.111 +     + $h3.222         |
+#                 |  192.0.2.34/28          192.0.2.66/28   |
+#                 +-----------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_ets_strict
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	mtu_set $h1 10000
+
+	vlan_create $h1 111 v$h1 192.0.2.33/28
+	ip link set dev $h1.111 type vlan egress-qos-map 0:1
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 111
+
+	mtu_restore $h1
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	mtu_set $h2 10000
+
+	vlan_create $h2 222 v$h2 192.0.2.65/28
+	ip link set dev $h2.222 type vlan egress-qos-map 0:2
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 222
+
+	mtu_restore $h2
+	simple_if_fini $h2
+}
+
+h3_create()
+{
+	simple_if_init $h3
+	mtu_set $h3 10000
+
+	vlan_create $h3 111 v$h3 192.0.2.34/28
+	vlan_create $h3 222 v$h3 192.0.2.66/28
+}
+
+h3_destroy()
+{
+	vlan_destroy $h3 222
+	vlan_destroy $h3 111
+
+	mtu_restore $h3
+	simple_if_fini $h3
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	mtu_set $swp1 10000
+
+	ip link set dev $swp2 up
+	mtu_set $swp2 10000
+
+	# prio n -> TC n, strict scheduling
+	lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:1,2:2,3:3,4:4,5:5,6:6,7:7
+	lldptool -T -i $swp3 -V ETS-CFG tsa=$(
+			)"0:strict,"$(
+			)"1:strict,"$(
+			)"2:strict,"$(
+			)"3:strict,"$(
+			)"4:strict,"$(
+			)"5:strict,"$(
+			)"6:strict,"$(
+			)"7:strict"
+	sleep 1
+
+	ip link set dev $swp3 up
+	mtu_set $swp3 10000
+	ethtool -s $swp3 speed 1000 autoneg off
+
+	vlan_create $swp1 111
+	vlan_create $swp2 222
+	vlan_create $swp3 111
+	vlan_create $swp3 222
+
+	ip link add name br111 up type bridge vlan_filtering 0
+	ip link set dev $swp1.111 master br111
+	ip link set dev $swp3.111 master br111
+
+	ip link add name br222 up type bridge vlan_filtering 0
+	ip link set dev $swp2.222 master br222
+	ip link set dev $swp3.222 master br222
+
+	# Make sure that ingress quotas are smaller than egress so that there is
+	# room for both streams of traffic to be admitted to shared buffer.
+	devlink_pool_size_thtype_save 0
+	devlink_pool_size_thtype_set 0 dynamic 10000000
+	devlink_pool_size_thtype_save 4
+	devlink_pool_size_thtype_set 4 dynamic 10000000
+
+	devlink_port_pool_th_save $swp1 0
+	devlink_port_pool_th_set $swp1 0 6
+	devlink_tc_bind_pool_th_save $swp1 1 ingress
+	devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6
+
+	devlink_port_pool_th_save $swp2 0
+	devlink_port_pool_th_set $swp2 0 6
+	devlink_tc_bind_pool_th_save $swp2 2 ingress
+	devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6
+
+	devlink_tc_bind_pool_th_save $swp3 1 egress
+	devlink_tc_bind_pool_th_set $swp3 1 egress 4 7
+	devlink_tc_bind_pool_th_save $swp3 2 egress
+	devlink_tc_bind_pool_th_set $swp3 2 egress 4 7
+	devlink_port_pool_th_save $swp3 4
+	devlink_port_pool_th_set $swp3 4 7
+}
+
+switch_destroy()
+{
+	devlink_port_pool_th_restore $swp3 4
+	devlink_tc_bind_pool_th_restore $swp3 2 egress
+	devlink_tc_bind_pool_th_restore $swp3 1 egress
+
+	devlink_tc_bind_pool_th_restore $swp2 2 ingress
+	devlink_port_pool_th_restore $swp2 0
+
+	devlink_tc_bind_pool_th_restore $swp1 1 ingress
+	devlink_port_pool_th_restore $swp1 0
+
+	devlink_pool_size_thtype_restore 4
+	devlink_pool_size_thtype_restore 0
+
+	ip link del dev br222
+	ip link del dev br111
+
+	vlan_destroy $swp3 222
+	vlan_destroy $swp3 111
+	vlan_destroy $swp2 222
+	vlan_destroy $swp1 111
+
+	ethtool -s $swp3 autoneg on
+	mtu_restore $swp3
+	ip link set dev $swp3 down
+	lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0
+
+	mtu_restore $swp2
+	ip link set dev $swp2 down
+
+	mtu_restore $swp1
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	h3mac=$(mac_get $h3)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.34 " from H1"
+	ping_test $h2 192.0.2.66 " from H2"
+}
+
+rel()
+{
+	local old=$1; shift
+	local new=$1; shift
+
+	bc <<< "
+	    scale=2
+	    ret = 100 * $new / $old
+	    if (ret > 0) { ret } else { 0 }
+	"
+}
+
+test_ets_strict()
+{
+	RET=0
+
+	# Run high-prio traffic on its own.
+	start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac
+	local -a rate_2
+	rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_2 "prio 2"))
+	check_err $? "Could not get high enough prio-2 ingress rate"
+	local rate_2_in=${rate_2[0]}
+	local rate_2_eg=${rate_2[1]}
+	stop_traffic # $h2.222
+
+	# Start low-prio stream.
+	start_traffic $h1.111 192.0.2.33 192.0.2.34 $h3mac
+
+	local -a rate_1
+	rate_1=($(measure_rate $swp1 $h3 rx_octets_prio_1 "prio 1"))
+	check_err $? "Could not get high enough prio-1 ingress rate"
+	local rate_1_in=${rate_1[0]}
+	local rate_1_eg=${rate_1[1]}
+
+	# High-prio and low-prio on their own should have about the same
+	# throughput.
+	local rel21=$(rel $rate_1_eg $rate_2_eg)
+	check_err $(bc <<< "$rel21 < 95")
+	check_err $(bc <<< "$rel21 > 105")
+
+	# Start the high-prio stream--now both streams run.
+	start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac
+	rate_3=($(measure_rate $swp2 $h3 rx_octets_prio_2 "prio 2 w/ 1"))
+	check_err $? "Could not get high enough prio-2 ingress rate with prio-1"
+	local rate_3_in=${rate_3[0]}
+	local rate_3_eg=${rate_3[1]}
+	stop_traffic # $h2.222
+
+	stop_traffic # $h1.111
+
+	# High-prio should have about the same throughput whether or not
+	# low-prio is in the system.
+	local rel32=$(rel $rate_2_eg $rate_3_eg)
+	check_err $(bc <<< "$rel32 < 95")
+
+	log_test "strict priority"
+	echo "Ingress to switch:"
+	echo "  p1 in rate            $(humanize $rate_1_in)"
+	echo "  p2 in rate            $(humanize $rate_2_in)"
+	echo "  p2 in rate w/ p1      $(humanize $rate_3_in)"
+	echo "Egress from switch:"
+	echo "  p1 eg rate            $(humanize $rate_1_eg)"
+	echo "  p2 eg rate            $(humanize $rate_2_eg) ($rel21% of p1)"
+	echo "  p2 eg rate w/ p1      $(humanize $rate_3_eg) ($rel32% of p2)"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
new file mode 100755
index 000000000..27de3d9ed
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
@@ -0,0 +1,379 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	test_defaults
+	test_dcb_ets
+	test_mtu
+	test_pfc
+	test_int_buf
+	test_tc_priomap
+	test_tc_mtu
+	test_tc_sizes
+	test_tc_int_buf
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+swp=$NETIF_NO_CABLE
+
+cleanup()
+{
+	pre_cleanup
+}
+
+get_prio_pg()
+{
+	__mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
+		grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2-
+}
+
+get_prio_pfc()
+{
+	__mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
+		grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2-
+}
+
+get_prio_tc()
+{
+	__mlnx_qos -i $swp | sed -n '/^tc/,$p' |
+		awk '/^tc/ { TC = $2 }
+		     /priority:/ { PRIO[$2]=TC }
+		     END {
+			for (i in PRIO)
+			    printf("%d ", PRIO[i])
+		     }'
+}
+
+get_buf_size()
+{
+	local idx=$1; shift
+
+	__mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1))
+}
+
+get_tot_size()
+{
+	__mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//'
+}
+
+check_prio_pg()
+{
+	local expect=$1; shift
+
+	local current=$(get_prio_pg)
+	test "$current" = "$expect"
+	check_err $? "prio2buffer is '$current', expected '$expect'"
+}
+
+check_prio_pfc()
+{
+	local expect=$1; shift
+
+	local current=$(get_prio_pfc)
+	test "$current" = "$expect"
+	check_err $? "prio PFC is '$current', expected '$expect'"
+}
+
+check_prio_tc()
+{
+	local expect=$1; shift
+
+	local current=$(get_prio_tc)
+	test "$current" = "$expect"
+	check_err $? "prio_tc is '$current', expected '$expect'"
+}
+
+__check_buf_size()
+{
+	local idx=$1; shift
+	local expr=$1; shift
+	local what=$1; shift
+
+	local current=$(get_buf_size $idx)
+	((current $expr))
+	check_err $? "${what}buffer $idx size is '$current', expected '$expr'"
+	echo $current
+}
+
+check_buf_size()
+{
+	__check_buf_size "$@" > /dev/null
+}
+
+test_defaults()
+{
+	RET=0
+
+	check_prio_pg "0 0 0 0 0 0 0 0 "
+	check_prio_tc "0 0 0 0 0 0 0 0 "
+	check_prio_pfc "0 0 0 0 0 0 0 0 "
+
+	log_test "Default headroom configuration"
+}
+
+test_dcb_ets()
+{
+	RET=0
+
+	__mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null
+
+	check_prio_pg "0 2 4 6 1 3 5 7 "
+	check_prio_tc "0 2 4 6 1 3 5 7 "
+	check_prio_pfc "0 0 0 0 0 0 0 0 "
+
+	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+
+	check_prio_pg "0 0 0 0 0 0 0 0 "
+	check_prio_tc "0 0 0 0 0 0 0 0 "
+
+	__mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null
+	check_fail $? "prio2buffer accepted in DCB mode"
+
+	log_test "Configuring headroom through ETS"
+}
+
+test_mtu()
+{
+	local what=$1; shift
+	local buf0size_2
+	local buf0size
+
+	RET=0
+	buf0size=$(__check_buf_size 0 "> 0")
+
+	mtu_set $swp 3000
+	buf0size_2=$(__check_buf_size 0 "> $buf0size" "MTU 3000: ")
+	mtu_restore $swp
+
+	mtu_set $swp 6000
+	check_buf_size 0 "> $buf0size_2" "MTU 6000: "
+	mtu_restore $swp
+
+	check_buf_size 0 "== $buf0size"
+
+	log_test "${what}MTU impacts buffer size"
+}
+
+test_tc_mtu()
+{
+	# In TC mode, MTU still impacts the threshold below which a buffer is
+	# not permitted to go.
+
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+	test_mtu "TC: "
+	tc qdisc delete dev $swp root
+}
+
+test_pfc()
+{
+	RET=0
+
+	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null
+
+	local buf0size=$(get_buf_size 0)
+	local buf1size=$(get_buf_size 1)
+	local buf2size=$(get_buf_size 2)
+	local buf3size=$(get_buf_size 3)
+	check_buf_size 0 "> 0"
+	check_buf_size 1 "> 0"
+	check_buf_size 2 "> 0"
+	check_buf_size 3 "> 0"
+	check_buf_size 4 "== 0"
+	check_buf_size 5 "== 0"
+	check_buf_size 6 "== 0"
+	check_buf_size 7 "== 0"
+
+	log_test "Buffer size sans PFC"
+
+	RET=0
+
+	__mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null
+
+	check_prio_pg "0 0 0 0 0 1 2 3 "
+	check_prio_pfc "0 0 0 0 0 1 1 1 "
+	check_buf_size 0 "== $buf0size"
+	check_buf_size 1 "> $buf1size"
+	check_buf_size 2 "> $buf2size"
+	check_buf_size 3 "> $buf3size"
+
+	local buf1size=$(get_buf_size 1)
+	check_buf_size 2 "== $buf1size"
+	check_buf_size 3 "== $buf1size"
+
+	log_test "PFC: Cable length 0"
+
+	RET=0
+
+	__mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null
+
+	check_buf_size 0 "== $buf0size"
+	check_buf_size 1 "> $buf1size"
+	check_buf_size 2 "> $buf1size"
+	check_buf_size 3 "> $buf1size"
+
+	log_test "PFC: Cable length 1000"
+
+	RET=0
+
+	__mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null
+	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+
+	check_prio_pg "0 0 0 0 0 0 0 0 "
+	check_prio_tc "0 0 0 0 0 0 0 0 "
+	check_buf_size 0 "> 0"
+	check_buf_size 1 "== 0"
+	check_buf_size 2 "== 0"
+	check_buf_size 3 "== 0"
+	check_buf_size 4 "== 0"
+	check_buf_size 5 "== 0"
+	check_buf_size 6 "== 0"
+	check_buf_size 7 "== 0"
+
+	log_test "PFC: Restore defaults"
+}
+
+test_tc_priomap()
+{
+	RET=0
+
+	__mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null
+	check_prio_pg "0 1 2 3 4 5 6 7 "
+
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+	check_prio_pg "0 0 0 0 0 0 0 0 "
+
+	__mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null
+	check_prio_pg "1 3 5 7 0 2 4 6 "
+
+	tc qdisc delete dev $swp root
+	check_prio_pg "0 1 2 3 4 5 6 7 "
+
+	# Clean up.
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+	__mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null
+	tc qdisc delete dev $swp root
+	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+
+	log_test "TC: priomap"
+}
+
+test_tc_sizes()
+{
+	local cell_size=$(devlink_cell_size_get)
+	local size=$((cell_size * 1000))
+
+	RET=0
+
+	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+	check_fail $? "buffer_size should fail before qdisc is added"
+
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+
+	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+	check_err $? "buffer_size should pass after qdisc is added"
+	check_buf_size 0 "== $size" "set size: "
+
+	mtu_set $swp 6000
+	check_buf_size 0 "== $size" "set MTU: "
+	mtu_restore $swp
+
+	__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+
+	# After replacing the qdisc for the same kind, buffer_size still has to
+	# work.
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1M
+
+	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+	check_buf_size 0 "== $size" "post replace, set size: "
+
+	__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+
+	# Likewise after replacing for a different kind.
+	tc qdisc replace dev $swp root handle 2: prio bands 8
+
+	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+	check_buf_size 0 "== $size" "post replace different kind, set size: "
+
+	tc qdisc delete dev $swp root
+
+	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+	check_fail $? "buffer_size should fail after qdisc is deleted"
+
+	log_test "TC: buffer size"
+}
+
+test_int_buf()
+{
+	local what=$1; shift
+
+	RET=0
+
+	local buf0size=$(get_buf_size 0)
+	local tot_size=$(get_tot_size)
+
+	# Size of internal buffer and buffer 9.
+	local dsize=$((tot_size - buf0size))
+
+	tc qdisc add dev $swp clsact
+	tc filter add dev $swp egress matchall skip_sw action mirred egress mirror dev $swp
+
+	local buf0size_2=$(get_buf_size 0)
+	local tot_size_2=$(get_tot_size)
+	local dsize_2=$((tot_size_2 - buf0size_2))
+
+	# Egress SPAN should have added to the "invisible" buffer configuration.
+	((dsize_2 > dsize))
+	check_err $? "Invisible buffers account for '$dsize_2', expected '> $dsize'"
+
+	mtu_set $swp 3000
+
+	local buf0size_3=$(get_buf_size 0)
+	local tot_size_3=$(get_tot_size)
+	local dsize_3=$((tot_size_3 - buf0size_3))
+
+	# MTU change might change buffer 0, which will show at total, but the
+	# hidden buffers should stay the same size.
+	((dsize_3 == dsize_2))
+	check_err $? "MTU change: Invisible buffers account for '$dsize_3', expected '== $dsize_2'"
+
+	mtu_restore $swp
+	tc qdisc del dev $swp clsact
+
+	# After SPAN removal, hidden buffers should be back to the original sizes.
+	local buf0size_4=$(get_buf_size 0)
+	local tot_size_4=$(get_tot_size)
+	local dsize_4=$((tot_size_4 - buf0size_4))
+	((dsize_4 == dsize))
+	check_err $? "SPAN removed: Invisible buffers account for '$dsize_4', expected '== $dsize'"
+
+	log_test "${what}internal buffer size"
+}
+
+test_tc_int_buf()
+{
+	local cell_size=$(devlink_cell_size_get)
+	local size=$((cell_size * 1000))
+
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+	test_int_buf "TC: "
+
+	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+	test_int_buf "TC+buffsize: "
+
+	__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+	tc qdisc delete dev $swp root
+}
+
+trap cleanup EXIT
+
+bail_on_lldpad
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
new file mode 100644
index 000000000..0bf76f13c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
@@ -0,0 +1,98 @@
+# SPDX-License-Identifier: GPL-2.0
+
+check_rate()
+{
+	local rate=$1; shift
+	local min=$1; shift
+	local what=$1; shift
+
+	if ((rate > min)); then
+		return 0
+	fi
+
+	echo "$what $(humanize $ir) < $(humanize $min)" > /dev/stderr
+	return 1
+}
+
+measure_rate()
+{
+	local sw_in=$1; shift   # Where the traffic ingresses the switch
+	local host_in=$1; shift # Where it ingresses another host
+	local counter=$1; shift # Counter to use for measurement
+	local what=$1; shift
+
+	local interval=10
+	local i
+	local ret=0
+
+	# Dips in performance might cause momentary ingress rate to drop below
+	# 1Gbps. That wouldn't saturate egress and MC would thus get through,
+	# seemingly winning bandwidth on account of UC. Demand at least 2Gbps
+	# average ingress rate to somewhat mitigate this.
+	local min_ingress=2147483648
+
+	for i in {5..0}; do
+		local t0=$(ethtool_stats_get $host_in $counter)
+		local u0=$(ethtool_stats_get $sw_in $counter)
+		sleep $interval
+		local t1=$(ethtool_stats_get $host_in $counter)
+		local u1=$(ethtool_stats_get $sw_in $counter)
+
+		local ir=$(rate $u0 $u1 $interval)
+		local er=$(rate $t0 $t1 $interval)
+
+		if check_rate $ir $min_ingress "$what ingress rate"; then
+			break
+		fi
+
+		# Fail the test if we can't get the throughput.
+		if ((i == 0)); then
+			ret=1
+		fi
+	done
+
+	echo $ir $er
+	return $ret
+}
+
+bail_on_lldpad()
+{
+	if systemctl is-active --quiet lldpad; then
+
+		cat >/dev/stderr <<-EOF
+		WARNING: lldpad is running
+
+			lldpad will likely configure DCB, and this test will
+			configure Qdiscs. mlxsw does not support both at the
+			same time, one of them is arbitrarily going to overwrite
+			the other. That will cause spurious failures (or,
+			unlikely, passes) of this test.
+		EOF
+
+		if [[ -z $ALLOW_LLDPAD ]]; then
+			cat >/dev/stderr <<-EOF
+
+				If you want to run the test anyway, please set
+				an environment variable ALLOW_LLDPAD to a
+				non-empty string.
+			EOF
+			exit 1
+		else
+			return
+		fi
+	fi
+}
+
+__mlnx_qos()
+{
+	local err
+
+	mlnx_qos "$@" 2>/dev/null
+	err=$?
+
+	if ((err)); then
+		echo "Error ($err) in mlnx_qos $@" >/dev/stderr
+	fi
+
+	return $err
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
new file mode 100755
index 000000000..8f164c80e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -0,0 +1,341 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A test for switch behavior under MC overload. An issue in Spectrum chips
+# causes throughput of UC traffic to drop severely when a switch is under heavy
+# MC load. This issue can be overcome by putting the switch to MC-aware mode.
+# This test verifies that UC performance stays intact even as the switch is
+# under MC flood, and therefore that the MC-aware mode is enabled and correctly
+# configured.
+#
+# Because mlxsw throttles CPU port, the traffic can't actually reach userspace
+# at full speed. That makes it impossible to use iperf3 to simply measure the
+# throughput, because many packets (that reach $h3) don't get to the kernel at
+# all even in UDP mode (the situation is even worse in TCP mode, where one can't
+# hope to see more than a couple Mbps).
+#
+# So instead we send traffic with mausezahn and use RX ethtool counters at $h3.
+# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore
+# each gets a different priority and we can use per-prio ethtool counters to
+# measure the throughput. In order to avoid prioritizing unicast traffic, prio
+# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and
+# thus TC 0).
+#
+# Mausezahn can't actually saturate the links unless it's using large frames.
+# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
+# multicast traffic uses 8K frames.
+#
+# +---------------------------+            +----------------------------------+
+# | H1                        |            |                               H2 |
+# |                           |            |  unicast --> + $h2.111           |
+# |                 multicast |            |  traffic     | 192.0.2.129/28    |
+# |                 traffic   |            |              | e-qos-map 0:1     |
+# |           $h1 + <-----    |            |              |                   |
+# | 192.0.2.65/28 |           |            |              + $h2               |
+# +---------------|-----------+            +--------------|-------------------+
+#                 |                                       |
+# +---------------|---------------------------------------|-------------------+
+# |         $swp1 +                                       + $swp2             |
+# |        >1Gbps |                                       | >1Gbps            |
+# | +-------------|------+                     +----------|----------------+  |
+# | |     $swp1.1 +      |                     |          + $swp2.111      |  |
+# | |                BR1 |             SW      | BR111                     |  |
+# | |     $swp3.1 +      |                     |          + $swp3.111      |  |
+# | +-------------|------+                     +----------|----------------+  |
+# |               \_______________________________________/                   |
+# |                                    |                                      |
+# |                                    + $swp3                                |
+# |                                    | 1Gbps bottleneck                     |
+# |                                    | prio qdisc: {0..7} -> 7              |
+# +------------------------------------|--------------------------------------+
+#                                      |
+#                                   +--|-----------------+
+#                                   |  + $h3          H3 |
+#                                   |  | 192.0.2.66/28   |
+#                                   |  |                 |
+#                                   |  + $h3.111         |
+#                                   |    192.0.2.130/28  |
+#                                   +--------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_mc_aware
+	test_uc_aware
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.65/28
+	mtu_set $h1 10000
+}
+
+h1_destroy()
+{
+	mtu_restore $h1
+	simple_if_fini $h1 192.0.2.65/28
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	mtu_set $h2 10000
+
+	vlan_create $h2 111 v$h2 192.0.2.129/28
+	ip link set dev $h2.111 type vlan egress-qos-map 0:1
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 111
+
+	mtu_restore $h2
+	simple_if_fini $h2
+}
+
+h3_create()
+{
+	simple_if_init $h3 192.0.2.66/28
+	mtu_set $h3 10000
+
+	vlan_create $h3 111 v$h3 192.0.2.130/28
+}
+
+h3_destroy()
+{
+	vlan_destroy $h3 111
+
+	mtu_restore $h3
+	simple_if_fini $h3 192.0.2.66/28
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	mtu_set $swp1 10000
+
+	ip link set dev $swp2 up
+	mtu_set $swp2 10000
+
+	ip link set dev $swp3 up
+	mtu_set $swp3 10000
+
+	vlan_create $swp2 111
+	vlan_create $swp3 111
+
+	ethtool -s $swp3 speed 1000 autoneg off
+	tc qdisc replace dev $swp3 root handle 3: \
+	   prio bands 8 priomap 7 7 7 7 7 7 7 7
+
+	ip link add name br1 type bridge vlan_filtering 0
+	ip link set dev br1 up
+	ip link set dev $swp1 master br1
+	ip link set dev $swp3 master br1
+
+	ip link add name br111 type bridge vlan_filtering 0
+	ip link set dev br111 up
+	ip link set dev $swp2.111 master br111
+	ip link set dev $swp3.111 master br111
+
+	# Make sure that ingress quotas are smaller than egress so that there is
+	# room for both streams of traffic to be admitted to shared buffer.
+	devlink_port_pool_th_save $swp1 0
+	devlink_port_pool_th_set $swp1 0 5
+	devlink_tc_bind_pool_th_save $swp1 0 ingress
+	devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5
+
+	devlink_port_pool_th_save $swp2 0
+	devlink_port_pool_th_set $swp2 0 5
+	devlink_tc_bind_pool_th_save $swp2 1 ingress
+	devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5
+
+	devlink_port_pool_th_save $swp3 4
+	devlink_port_pool_th_set $swp3 4 12
+}
+
+switch_destroy()
+{
+	devlink_port_pool_th_restore $swp3 4
+
+	devlink_tc_bind_pool_th_restore $swp2 1 ingress
+	devlink_port_pool_th_restore $swp2 0
+
+	devlink_tc_bind_pool_th_restore $swp1 0 ingress
+	devlink_port_pool_th_restore $swp1 0
+
+	ip link del dev br111
+	ip link del dev br1
+
+	tc qdisc del dev $swp3 root handle 3:
+	ethtool -s $swp3 autoneg on
+
+	vlan_destroy $swp3 111
+	vlan_destroy $swp2 111
+
+	mtu_restore $swp3
+	ip link set dev $swp3 down
+
+	mtu_restore $swp2
+	ip link set dev $swp2 down
+
+	mtu_restore $swp1
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	h3mac=$(mac_get $h3)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h2 192.0.2.130
+}
+
+test_mc_aware()
+{
+	RET=0
+
+	local -a uc_rate
+	start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
+	uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC-only"))
+	check_err $? "Could not get high enough UC-only ingress rate"
+	stop_traffic
+	local ucth1=${uc_rate[1]}
+
+	start_traffic $h1 192.0.2.65 bc bc
+
+	local d0=$(date +%s)
+	local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
+	local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0)
+
+	local -a uc_rate_2
+	start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
+	uc_rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC+MC"))
+	check_err $? "Could not get high enough UC+MC ingress rate"
+	stop_traffic
+	local ucth2=${uc_rate_2[1]}
+
+	local d1=$(date +%s)
+	local t1=$(ethtool_stats_get $h3 rx_octets_prio_0)
+	local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0)
+
+	local deg=$(bc <<< "
+			scale=2
+			ret = 100 * ($ucth1 - $ucth2) / $ucth1
+			if (ret > 0) { ret } else { 0 }
+		    ")
+
+	# Minimum shaper of 200Mbps on MC TCs should cause about 20% of
+	# degradation on 1Gbps link.
+	check_err $(bc <<< "$deg < 15") "Minimum shaper not in effect"
+	check_err $(bc <<< "$deg > 25") "MC traffic degrades UC performance too much"
+
+	local interval=$((d1 - d0))
+	local mc_ir=$(rate $u0 $u1 $interval)
+	local mc_er=$(rate $t0 $t1 $interval)
+
+	stop_traffic
+
+	log_test "UC performance under MC overload"
+
+	echo "UC-only throughput  $(humanize $ucth1)"
+	echo "UC+MC throughput    $(humanize $ucth2)"
+	echo "Degradation         $deg %"
+	echo
+	echo "Full report:"
+	echo "  UC only:"
+	echo "    ingress UC throughput $(humanize ${uc_rate[0]})"
+	echo "    egress UC throughput  $(humanize ${uc_rate[1]})"
+	echo "  UC+MC:"
+	echo "    ingress UC throughput $(humanize ${uc_rate_2[0]})"
+	echo "    egress UC throughput  $(humanize ${uc_rate_2[1]})"
+	echo "    ingress MC throughput $(humanize $mc_ir)"
+	echo "    egress MC throughput  $(humanize $mc_er)"
+	echo
+}
+
+test_uc_aware()
+{
+	RET=0
+
+	start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
+
+	local d0=$(date +%s)
+	local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
+	local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+	sleep 1
+
+	local attempts=50
+	local passes=0
+	local i
+
+	for ((i = 0; i < attempts; ++i)); do
+		if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 1; then
+			((passes++))
+		fi
+
+		sleep 0.1
+	done
+
+	local d1=$(date +%s)
+	local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
+	local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
+
+	local interval=$((d1 - d0))
+	local uc_ir=$(rate $u0 $u1 $interval)
+	local uc_er=$(rate $t0 $t1 $interval)
+
+	((attempts == passes))
+	check_err $?
+
+	stop_traffic
+
+	log_test "MC performance under UC overload"
+	echo "    ingress UC throughput $(humanize ${uc_ir})"
+	echo "    egress UC throughput  $(humanize ${uc_er})"
+	echo "    sent $attempts BC ARPs, got $passes responses"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
new file mode 100755
index 000000000..56761de1c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -0,0 +1,419 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority
+# of 1. This stream is consistently prioritized as priority 1, is put to PG
+# buffer 1, and scheduled at TC 1.
+#
+# - the stream first ingresses through $swp1, where it is forwarded to $swp3
+#
+# - then it ingresses through $swp4. Here it is put to a lossless buffer and put
+#   to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is
+#   shaped, and thus the PFC pool eventually fills, therefore the headroom
+#   fills, and $swp3 is paused.
+#
+# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at
+#   a pool ("overflow pool"). The overflow pool needs to be large enough to
+#   contain the whole burst.
+#
+# - eventually the PFC pool gets some traffic out, headroom therefore gets some
+#   traffic to the pool, and $swp3 is unpaused again. This way the traffic is
+#   gradually forwarded from the overflow pool, through the PFC pool, out of
+#   $swp2, and eventually to $h2.
+#
+# - if PFC works, all lossless flow packets that ingress through $swp1 should
+#   also be seen ingressing $h2. If it doesn't, there will be drops due to
+#   discrepancy between the speeds of $swp1 and $h2.
+#
+# - it should all play out relatively quickly, so that SLL and HLL will not
+#   cause drops.
+#
+# +-----------------------+
+# | H1                    |
+# |   + $h1.111           |
+# |   | 192.0.2.33/28     |
+# |   |                   |
+# |   + $h1               |
+# +---|-------------------+  +--------------------+
+#     |                      |                    |
+# +---|----------------------|--------------------|---------------------------+
+# |   + $swp1          $swp3 +                    + $swp4                     |
+# |   | iPOOL1        iPOOL0 |                    | iPOOL2                    |
+# |   | ePOOL4        ePOOL5 |                    | ePOOL4                    |
+# |   |                1Gbps |                    | 1Gbps                     |
+# |   |        PFC:enabled=1 |                    | PFC:enabled=1             |
+# | +-|----------------------|-+                +-|------------------------+  |
+# | | + $swp1.111  $swp3.111 + |                | + $swp4.111              |  |
+# | |                          |                |                          |  |
+# | | BR1                      |                | BR2                      |  |
+# | |                          |                |                          |  |
+# | |                          |                |         + $swp2.111      |  |
+# | +--------------------------+                +---------|----------------+  |
+# |                                                       |                   |
+# | iPOOL0: 500KB dynamic                                 |                   |
+# | iPOOL1: 10MB static                                   |                   |
+# | iPOOL2: 1MB static                                    + $swp2             |
+# | ePOOL4: 500KB dynamic                                 | iPOOL0            |
+# | ePOOL5: 10MB static                                   | ePOOL6            |
+# | ePOOL6: "infinite" static                             | 200Mbps shaper    |
+# +-------------------------------------------------------|-------------------+
+#                                                         |
+#                                                     +---|-------------------+
+#                                                     |   + $h2            H2 |
+#                                                     |   |                   |
+#                                                     |   + $h2.111           |
+#                                                     |     192.0.2.34/28     |
+#                                                     +-----------------------+
+#
+# iPOOL0+ePOOL4 is a helper pool for control traffic etc.
+# iPOOL1+ePOOL5 are overflow pools.
+# iPOOL2+ePOOL6 are PFC pools.
+
+ALL_TESTS="
+	ping_ipv4
+	test_qos_pfc
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+_1KB=1000
+_100KB=$((100 * _1KB))
+_500KB=$((500 * _1KB))
+_1MB=$((1000 * _1KB))
+_10MB=$((10 * _1MB))
+
+h1_create()
+{
+	simple_if_init $h1
+	mtu_set $h1 10000
+
+	vlan_create $h1 111 v$h1 192.0.2.33/28
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 111
+
+	mtu_restore $h1
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	mtu_set $h2 10000
+
+	vlan_create $h2 111 v$h2 192.0.2.34/28
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 111
+
+	mtu_restore $h2
+	simple_if_fini $h2
+}
+
+switch_create()
+{
+	local lanes_swp4
+	local pg1_size
+
+	# pools
+	# -----
+
+	devlink_pool_size_thtype_save 0
+	devlink_pool_size_thtype_save 4
+	devlink_pool_size_thtype_save 1
+	devlink_pool_size_thtype_save 5
+	devlink_pool_size_thtype_save 2
+	devlink_pool_size_thtype_save 6
+
+	devlink_port_pool_th_save $swp1 1
+	devlink_port_pool_th_save $swp2 6
+	devlink_port_pool_th_save $swp3 5
+	devlink_port_pool_th_save $swp4 2
+
+	devlink_tc_bind_pool_th_save $swp1 1 ingress
+	devlink_tc_bind_pool_th_save $swp2 1 egress
+	devlink_tc_bind_pool_th_save $swp3 1 egress
+	devlink_tc_bind_pool_th_save $swp4 1 ingress
+
+	# Control traffic pools. Just reduce the size. Keep them dynamic so that
+	# we don't need to change all the uninteresting quotas.
+	devlink_pool_size_thtype_set 0 dynamic $_500KB
+	devlink_pool_size_thtype_set 4 dynamic $_500KB
+
+	# Overflow pools.
+	devlink_pool_size_thtype_set 1 static $_10MB
+	devlink_pool_size_thtype_set 5 static $_10MB
+
+	# PFC pools. As per the writ, the size of egress PFC pool should be
+	# infinice, but actually it just needs to be large enough to not matter
+	# in practice, so reuse the 10MB limit.
+	devlink_pool_size_thtype_set 2 static $_1MB
+	devlink_pool_size_thtype_set 6 static $_10MB
+
+	# $swp1
+	# -----
+
+	ip link set dev $swp1 up
+	mtu_set $swp1 10000
+	vlan_create $swp1 111
+	ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp1 1 $_10MB
+	devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB
+
+	# Configure qdisc so that we can configure PG and therefore pool
+	# assignment.
+	tc qdisc replace dev $swp1 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	dcb buffer set dev $swp1 prio-buffer all:0 1:1
+
+	# $swp2
+	# -----
+
+	ip link set dev $swp2 up
+	mtu_set $swp2 10000
+	vlan_create $swp2 111
+	ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp2 6 $_10MB
+	devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB
+
+	# prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped.
+	tc qdisc replace dev $swp2 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	tc qdisc replace dev $swp2 parent 1:7 handle 17: \
+	   tbf rate 200Mbit burst 131072 limit 1M
+
+	# $swp3
+	# -----
+
+	ip link set dev $swp3 up
+	mtu_set $swp3 10000
+	vlan_create $swp3 111
+	ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp3 5 $_10MB
+	devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB
+
+	# prio 0->TC0 (band 7), 1->TC1 (band 6)
+	tc qdisc replace dev $swp3 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+
+	# Need to enable PFC so that PAUSE takes effect. Therefore need to put
+	# the lossless prio into a buffer of its own. Don't bother with buffer
+	# sizes though, there is not going to be any pressure in the "backward"
+	# direction.
+	dcb buffer set dev $swp3 prio-buffer all:0 1:1
+	dcb pfc set dev $swp3 prio-pfc all:off 1:on
+
+	# $swp4
+	# -----
+
+	ip link set dev $swp4 up
+	mtu_set $swp4 10000
+	vlan_create $swp4 111
+	ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp4 2 $_1MB
+	devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB
+
+	# Configure qdisc so that we can hand-tune headroom.
+	tc qdisc replace dev $swp4 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	dcb buffer set dev $swp4 prio-buffer all:0 1:1
+	dcb pfc set dev $swp4 prio-pfc all:off 1:on
+	# PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which
+	# is (-2*MTU) about 80K of delay provision.
+	pg1_size=$_100KB
+
+	setup_wait_dev_with_timeout $swp4
+
+	lanes_swp4=$(ethtool $swp4 | grep 'Lanes:')
+	lanes_swp4=${lanes_swp4#*"Lanes: "}
+
+	# 8-lane ports use two buffers among which the configured buffer
+	# is split, so double the size to get twice (20K + 80K).
+	if [[ $lanes_swp4 -eq 8 ]]; then
+		pg1_size=$((pg1_size * 2))
+	fi
+
+	dcb buffer set dev $swp4 buffer-size all:0 1:$pg1_size
+
+	# bridges
+	# -------
+
+	ip link add name br1 type bridge vlan_filtering 0
+	ip link set dev $swp1.111 master br1
+	ip link set dev $swp3.111 master br1
+	ip link set dev br1 up
+
+	ip link add name br2 type bridge vlan_filtering 0
+	ip link set dev $swp2.111 master br2
+	ip link set dev $swp4.111 master br2
+	ip link set dev br2 up
+}
+
+switch_destroy()
+{
+	# Do this first so that we can reset the limits to values that are only
+	# valid for the original static / dynamic setting.
+	devlink_pool_size_thtype_restore 6
+	devlink_pool_size_thtype_restore 5
+	devlink_pool_size_thtype_restore 4
+	devlink_pool_size_thtype_restore 2
+	devlink_pool_size_thtype_restore 1
+	devlink_pool_size_thtype_restore 0
+
+	# bridges
+	# -------
+
+	ip link set dev br2 down
+	ip link set dev $swp4.111 nomaster
+	ip link set dev $swp2.111 nomaster
+	ip link del dev br2
+
+	ip link set dev br1 down
+	ip link set dev $swp3.111 nomaster
+	ip link set dev $swp1.111 nomaster
+	ip link del dev br1
+
+	# $swp4
+	# -----
+
+	dcb buffer set dev $swp4 buffer-size all:0
+	dcb pfc set dev $swp4 prio-pfc all:off
+	dcb buffer set dev $swp4 prio-buffer all:0
+	tc qdisc del dev $swp4 root
+
+	devlink_tc_bind_pool_th_restore $swp4 1 ingress
+	devlink_port_pool_th_restore $swp4 2
+
+	vlan_destroy $swp4 111
+	mtu_restore $swp4
+	ip link set dev $swp4 down
+
+	# $swp3
+	# -----
+
+	dcb pfc set dev $swp3 prio-pfc all:off
+	dcb buffer set dev $swp3 prio-buffer all:0
+	tc qdisc del dev $swp3 root
+
+	devlink_tc_bind_pool_th_restore $swp3 1 egress
+	devlink_port_pool_th_restore $swp3 5
+
+	vlan_destroy $swp3 111
+	mtu_restore $swp3
+	ip link set dev $swp3 down
+
+	# $swp2
+	# -----
+
+	tc qdisc del dev $swp2 parent 1:7
+	tc qdisc del dev $swp2 root
+
+	devlink_tc_bind_pool_th_restore $swp2 1 egress
+	devlink_port_pool_th_restore $swp2 6
+
+	vlan_destroy $swp2 111
+	mtu_restore $swp2
+	ip link set dev $swp2 down
+
+	# $swp1
+	# -----
+
+	dcb buffer set dev $swp1 prio-buffer all:0
+	tc qdisc del dev $swp1 root
+
+	devlink_tc_bind_pool_th_restore $swp1 1 ingress
+	devlink_port_pool_th_restore $swp1 1
+
+	vlan_destroy $swp1 111
+	mtu_restore $swp1
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	swp4=${NETIFS[p6]}
+
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.34
+}
+
+test_qos_pfc()
+{
+	RET=0
+
+	# 10M pool, each packet is 8K of payload + headers
+	local pkts=$((_10MB / 8050))
+	local size=$((pkts * 8050))
+	local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1)
+	local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1)
+
+	$MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \
+		-a own -b $h2mac -c $pkts -t udp -q
+	sleep 2
+
+	local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1)
+	local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1)
+
+	local din=$((in1 - in0))
+	local dout=$((out1 - out0))
+
+	local pct_in=$((din * 100 / size))
+
+	((pct_in > 95 && pct_in < 105))
+	check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%"
+
+	((dout == din))
+	check_err $? "$((din - dout)) bytes out of $din ingressed got lost"
+
+	log_test "PFC"
+}
+
+trap cleanup EXIT
+
+bail_on_lldpad
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
new file mode 100644
index 000000000..e93878d42
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ROUTER_NUM_NETIFS=4
+: ${TIMEOUT:=20000} # ms
+
+router_h1_create()
+{
+	simple_if_init $h1 192.0.1.1/24
+}
+
+router_h1_destroy()
+{
+	simple_if_fini $h1 192.0.1.1/24
+}
+
+router_h2_create()
+{
+	simple_if_init $h2 192.0.2.1/24
+	tc qdisc add dev $h2 handle ffff: ingress
+}
+
+router_h2_destroy()
+{
+	tc qdisc del dev $h2 handle ffff: ingress
+	simple_if_fini $h2 192.0.2.1/24
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	ip address add 192.0.1.2/24 dev $rp1
+	ip address add 192.0.2.2/24 dev $rp2
+}
+
+router_destroy()
+{
+	ip address del 192.0.2.2/24 dev $rp2
+	ip address del 192.0.1.2/24 dev $rp1
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+router_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	rp1mac=$(mac_get $rp1)
+
+	vrf_prepare
+
+	router_h1_create
+	router_h2_create
+
+	router_create
+}
+
+wait_for_routes()
+{
+	local t0=$1; shift
+	local route_count=$1; shift
+
+	local t1=$(ip route | grep -o 'offload' | wc -l)
+	local delta=$((t1 - t0))
+	echo $delta
+	[[ $delta -ge $route_count ]]
+}
+
+router_routes_create()
+{
+	local route_count=$1
+	local count=0
+
+	ROUTE_FILE="$(mktemp)"
+
+	for i in {0..255}
+	do
+		for j in {0..255}
+		do
+			for k in {0..255}
+			do
+				if [[ $count -eq $route_count ]]; then
+					break 3
+				fi
+
+				echo route add 193.${i}.${j}.${k}/32 dev $rp2 \
+					>> $ROUTE_FILE
+				((count++))
+			done
+		done
+	done
+
+	ip -b $ROUTE_FILE &> /dev/null
+}
+
+router_routes_destroy()
+{
+	if [[ -v ROUTE_FILE ]]; then
+		rm -f $ROUTE_FILE
+	fi
+}
+
+router_test()
+{
+	local route_count=$1
+	local should_fail=$2
+	local delta
+
+	RET=0
+
+	local t0=$(ip route | grep -o 'offload' | wc -l)
+	router_routes_create $route_count
+	delta=$(busywait "$TIMEOUT" wait_for_routes $t0 $route_count)
+
+	check_err_fail $should_fail $? "Offload routes: Expected $route_count, got $delta."
+	if [[ $RET -ne 0 ]] || [[ $should_fail -eq 1 ]]; then
+		return
+	fi
+
+	router_routes_destroy
+}
+
+router_cleanup()
+{
+	pre_cleanup
+
+	router_routes_destroy
+	router_destroy
+
+	router_h2_destroy
+	router_h1_destroy
+
+	vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
new file mode 100755
index 000000000..f4031002d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -0,0 +1,698 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test various interface configuration scenarios. Observe that configurations
+# deemed valid by mlxsw succeed, invalid configurations fail and that no traces
+# are produced. To prevent the test from passing in case traces are produced,
+# the user can set the 'kernel.panic_on_warn' and 'kernel.panic_on_oops'
+# sysctls in its environment.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	rif_set_addr_test
+	rif_vrf_set_addr_test
+	rif_inherit_bridge_addr_test
+	rif_non_inherit_bridge_addr_test
+	vlan_interface_deletion_test
+	bridge_deletion_test
+	bridge_vlan_flags_test
+	vlan_1_test
+	lag_bridge_upper_test
+	duplicate_vlans_test
+	vlan_rif_refcount_test
+	subport_rif_refcount_test
+	vlan_dev_deletion_test
+	lag_unlink_slaves_test
+	lag_dev_deletion_test
+	vlan_interface_uppers_test
+	bridge_extern_learn_test
+	neigh_offload_test
+	nexthop_offload_test
+	devlink_reload_test
+"
+NUM_NETIFS=2
+: ${TIMEOUT:=20000} # ms
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+rif_set_addr_test()
+{
+	local swp1_mac=$(mac_get $swp1)
+	local swp2_mac=$(mac_get $swp2)
+
+	RET=0
+
+	# $swp1 and $swp2 likely got their IPv6 local addresses already, but
+	# here we need to test the transition to RIF.
+	ip addr flush dev $swp1
+	ip addr flush dev $swp2
+	sleep .1
+
+	ip addr add dev $swp1 192.0.2.1/28
+	check_err $?
+
+	ip link set dev $swp1 addr 00:11:22:33:44:55
+	check_err $?
+
+	# IP address enablement should be rejected if the MAC address prefix
+	# doesn't match other RIFs.
+	ip addr add dev $swp2 192.0.2.2/28 &>/dev/null
+	check_fail $? "IP address addition passed for a device with a wrong MAC"
+	ip addr add dev $swp2 192.0.2.2/28 2>&1 >/dev/null \
+	    | grep -q mlxsw_spectrum
+	check_err $? "no extack for IP address addition"
+
+	ip link set dev $swp2 addr 00:11:22:33:44:66
+	check_err $?
+	ip addr add dev $swp2 192.0.2.2/28 &>/dev/null
+	check_err $?
+
+	# Change of MAC address of a RIF should be forbidden if the new MAC
+	# doesn't share the prefix with other MAC addresses.
+	ip link set dev $swp2 addr 00:11:22:33:00:66 &>/dev/null
+	check_fail $? "change of MAC address passed for a wrong MAC"
+	ip link set dev $swp2 addr 00:11:22:33:00:66 2>&1 >/dev/null \
+	    | grep -q mlxsw_spectrum
+	check_err $? "no extack for MAC address change"
+
+	log_test "RIF - bad MAC change"
+
+	ip addr del dev $swp2 192.0.2.2/28
+	ip addr del dev $swp1 192.0.2.1/28
+
+	ip link set dev $swp2 addr $swp2_mac
+	ip link set dev $swp1 addr $swp1_mac
+}
+
+rif_vrf_set_addr_test()
+{
+	# Test that it is possible to set an IP address on a VRF upper despite
+	# its random MAC address.
+	RET=0
+
+	ip link add name vrf-test type vrf table 10
+	ip link set dev $swp1 master vrf-test
+
+	ip -4 address add 192.0.2.1/24 dev vrf-test
+	check_err $? "failed to set IPv4 address on VRF"
+	ip -6 address add 2001:db8:1::1/64 dev vrf-test
+	check_err $? "failed to set IPv6 address on VRF"
+
+	log_test "RIF - setting IP address on VRF"
+
+	ip link del dev vrf-test
+}
+
+rif_inherit_bridge_addr_test()
+{
+	RET=0
+
+	# Create first RIF
+	ip addr add dev $swp1 192.0.2.1/28
+	check_err $?
+
+	# Create a FID RIF
+	ip link add name br1 up type bridge vlan_filtering 0
+	ip link set dev $swp2 master br1
+	ip addr add dev br1 192.0.2.17/28
+	check_err $?
+
+	# Prepare a device with a low MAC address
+	ip link add name d up type dummy
+	ip link set dev d addr 00:11:22:33:44:55
+
+	# Attach the device to br1. That prompts bridge address change, which
+	# should be vetoed, thus preventing the attachment.
+	ip link set dev d master br1 &>/dev/null
+	check_fail $? "Device with low MAC was permitted to attach a bridge with RIF"
+	ip link set dev d master br1 2>&1 >/dev/null \
+	    | grep -q mlxsw_spectrum
+	check_err $? "no extack for bridge attach rejection"
+
+	ip link set dev $swp2 addr 00:11:22:33:44:55 &>/dev/null
+	check_fail $? "Changing swp2's MAC address permitted"
+	ip link set dev $swp2 addr 00:11:22:33:44:55 2>&1 >/dev/null \
+	    | grep -q mlxsw_spectrum
+	check_err $? "no extack for bridge port MAC address change rejection"
+
+	log_test "RIF - attach port with bad MAC to bridge"
+
+	ip link del dev d
+	ip link del dev br1
+	ip addr del dev $swp1 192.0.2.1/28
+}
+
+rif_non_inherit_bridge_addr_test()
+{
+	local swp2_mac=$(mac_get $swp2)
+
+	RET=0
+
+	# Create first RIF
+	ip addr add dev $swp1 192.0.2.1/28
+	check_err $?
+
+	# Create a FID RIF
+	ip link add name br1 up type bridge vlan_filtering 0
+	ip link set dev br1 addr $swp2_mac
+	ip link set dev $swp2 master br1
+	ip addr add dev br1 192.0.2.17/28
+	check_err $?
+
+	# Prepare a device with a low MAC address
+	ip link add name d up type dummy
+	ip link set dev d addr 00:11:22:33:44:55
+
+	# Attach the device to br1. Since the bridge address was set, it should
+	# work.
+	ip link set dev d master br1 &>/dev/null
+	check_err $? "Could not attach a device with low MAC to a bridge with RIF"
+
+	# Port MAC address change should be allowed for a bridge with set MAC.
+	ip link set dev $swp2 addr 00:11:22:33:44:55
+	check_err $? "Changing swp2's MAC address not permitted"
+
+	log_test "RIF - attach port with bad MAC to bridge with set MAC"
+
+	ip link set dev $swp2 addr $swp2_mac
+	ip link del dev d
+	ip link del dev br1
+	ip addr del dev $swp1 192.0.2.1/28
+}
+
+vlan_interface_deletion_test()
+{
+	# Test that when a VLAN interface is deleted, its associated router
+	# interface (RIF) is correctly deleted and not leaked. See commit
+	# c360867ec46a ("mlxsw: spectrum: Delete RIF when VLAN device is
+	# removed") for more details
+	RET=0
+
+	ip link add name br0 type bridge vlan_filtering 1
+	ip link set dev $swp1 master br0
+
+	ip link add link br0 name br0.10 type vlan id 10
+	ip -6 address add 2001:db8:1::1/64 dev br0.10
+	ip link del dev br0.10
+
+	# If we leaked the previous RIF, then this should produce a trace
+	ip link add link br0 name br0.20 type vlan id 20
+	ip -6 address add 2001:db8:1::1/64 dev br0.20
+	ip link del dev br0.20
+
+	log_test "vlan interface deletion"
+
+	ip link del dev br0
+}
+
+bridge_deletion_test()
+{
+	# Test that when a bridge with VLAN interfaces is deleted, we correctly
+	# delete the associated RIFs. See commit 602b74eda813 ("mlxsw:
+	# spectrum_switchdev: Do not leak RIFs when removing bridge") for more
+	# details
+	RET=0
+
+	ip link add name br0 type bridge vlan_filtering 1
+	ip link set dev $swp1 master br0
+	ip -6 address add 2001:db8::1/64 dev br0
+
+	ip link add link br0 name br0.10 type vlan id 10
+	ip -6 address add 2001:db8:1::1/64 dev br0.10
+
+	ip link add link br0 name br0.20 type vlan id 20
+	ip -6 address add 2001:db8:2::1/64 dev br0.20
+
+	ip link del dev br0
+
+	# If we leaked previous RIFs, then this should produce a trace
+	ip -6 address add 2001:db8:1::1/64 dev $swp1
+	ip -6 address del 2001:db8:1::1/64 dev $swp1
+
+	log_test "bridge deletion"
+}
+
+bridge_vlan_flags_test()
+{
+	# Test that when bridge VLAN flags are toggled, we do not take
+	# unnecessary references on related structs. See commit 9e25826ffc94
+	# ("mlxsw: spectrum_switchdev: Fix port_vlan refcounting") for more
+	# details
+	RET=0
+
+	ip link add name br0 type bridge vlan_filtering 1
+	ip link set dev $swp1 master br0
+
+	bridge vlan add vid 10 dev $swp1 pvid untagged
+	bridge vlan add vid 10 dev $swp1 untagged
+	bridge vlan add vid 10 dev $swp1 pvid
+	bridge vlan add vid 10 dev $swp1
+	ip link del dev br0
+
+	# If we did not handle references correctly, then this should produce a
+	# trace
+	devlink dev reload "$DEVLINK_DEV"
+
+	# Allow netdevices to be re-created following the reload
+	sleep 20
+
+	log_test "bridge vlan flags"
+}
+
+vlan_1_test()
+{
+	# Test that VLAN 1 can be configured over mlxsw ports. In the past it
+	# was used internally for untagged traffic. See commit 47bf9df2e820
+	# ("mlxsw: spectrum: Forbid creation of VLAN 1 over port/LAG") for more
+	# details
+	RET=0
+
+	ip link add link $swp1 name $swp1.1 type vlan id 1
+	check_err $? "did not manage to create vlan 1 when should"
+
+	log_test "vlan 1"
+
+	ip link del dev $swp1.1
+}
+
+lag_bridge_upper_test()
+{
+	# Test that ports cannot be enslaved to LAG devices that have uppers
+	# and that failure is handled gracefully. See commit b3529af6bb0d
+	# ("spectrum: Reference count VLAN entries") for more details
+	RET=0
+
+	ip link add name bond1 type bond mode 802.3ad
+
+	ip link add name br0 type bridge vlan_filtering 1
+	ip link set dev bond1 master br0
+
+	ip link set dev $swp1 down
+	ip link set dev $swp1 master bond1 &> /dev/null
+	check_fail $? "managed to enslave port to lag when should not"
+
+	# This might generate a trace, if we did not handle the failure
+	# correctly
+	ip -6 address add 2001:db8:1::1/64 dev $swp1
+	ip -6 address del 2001:db8:1::1/64 dev $swp1
+
+	log_test "lag with bridge upper"
+
+	ip link del dev br0
+	ip link del dev bond1
+}
+
+duplicate_vlans_test()
+{
+	# Test that on a given port a VLAN is only used once. Either as VLAN
+	# in a VLAN-aware bridge or as a VLAN device
+	RET=0
+
+	ip link add name br0 type bridge vlan_filtering 1
+	ip link set dev $swp1 master br0
+	bridge vlan add vid 10 dev $swp1
+
+	ip link add link $swp1 name $swp1.10 type vlan id 10 &> /dev/null
+	check_fail $? "managed to create vlan device when should not"
+
+	bridge vlan del vid 10 dev $swp1
+	ip link add link $swp1 name $swp1.10 type vlan id 10
+	check_err $? "did not manage to create vlan device when should"
+	bridge vlan add vid 10 dev $swp1 &> /dev/null
+	check_fail $? "managed to add bridge vlan when should not"
+
+	log_test "duplicate vlans"
+
+	ip link del dev $swp1.10
+	ip link del dev br0
+}
+
+vlan_rif_refcount_test()
+{
+	# Test that RIFs representing VLAN interfaces are not affected from
+	# ports member in the VLAN. We use the offload indication on routes
+	# configured on the RIF to understand if it was created / destroyed
+	RET=0
+
+	ip link add name br0 type bridge vlan_filtering 1
+	ip link set dev $swp1 master br0
+
+	ip link set dev $swp1 up
+	ip link set dev br0 up
+
+	ip link add link br0 name br0.10 up type vlan id 10
+	ip -6 address add 2001:db8:1::1/64 dev br0.10
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
+	check_err $? "vlan rif was not created before adding port to vlan"
+
+	bridge vlan add vid 10 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
+	check_err $? "vlan rif was destroyed after adding port to vlan"
+
+	bridge vlan del vid 10 dev $swp1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
+	check_err $? "vlan rif was destroyed after removing port from vlan"
+
+	ip link set dev $swp1 nomaster
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
+	check_err $? "vlan rif was not destroyed after unlinking port from bridge"
+
+	log_test "vlan rif refcount"
+
+	ip link del dev br0.10
+	ip link set dev $swp1 down
+	ip link del dev br0
+}
+
+subport_rif_refcount_test()
+{
+	# Test that RIFs representing upper devices of physical ports are
+	# reference counted correctly and destroyed when should. We use the
+	# offload indication on routes configured on the RIF to understand if
+	# it was created / destroyed
+	RET=0
+
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp2 down
+	ip link set dev $swp1 master bond1
+	ip link set dev $swp2 master bond1
+
+	ip link set dev bond1 up
+	ip link add link bond1 name bond1.10 up type vlan id 10
+	ip -6 address add 2001:db8:1::1/64 dev bond1
+	ip -6 address add 2001:db8:2::1/64 dev bond1.10
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif was not created on lag device"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10
+	check_err $? "subport rif was not created on vlan device"
+
+	ip link set dev $swp1 nomaster
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif of lag device was destroyed when should not"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10
+	check_err $? "subport rif of vlan device was destroyed when should not"
+
+	ip link set dev $swp2 nomaster
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+	check_err $? "subport rif of lag device was not destroyed when should"
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10
+	check_err $? "subport rif of vlan device was not destroyed when should"
+
+	log_test "subport rif refcount"
+
+	ip link del dev bond1.10
+	ip link del dev bond1
+}
+
+vlan_dev_deletion_test()
+{
+	# Test that VLAN devices are correctly deleted / unlinked when enslaved
+	# to bridge
+	RET=0
+
+	ip link add name br10 type bridge
+	ip link add name br20 type bridge
+	ip link add name br30 type bridge
+	ip link add link $swp1 name $swp1.10 type vlan id 10
+	ip link add link $swp1 name $swp1.20 type vlan id 20
+	ip link add link $swp1 name $swp1.30 type vlan id 30
+	ip link set dev $swp1.10 master br10
+	ip link set dev $swp1.20 master br20
+	ip link set dev $swp1.30 master br30
+
+	# If we did not handle the situation correctly, then these operations
+	# might produce a trace
+	ip link set dev $swp1.30 nomaster
+	ip link del dev $swp1.20
+	# Deletion via ioctl uses different code paths from netlink
+	vconfig rem $swp1.10 &> /dev/null
+
+	log_test "vlan device deletion"
+
+	ip link del dev $swp1.30
+	ip link del dev br30
+	ip link del dev br20
+	ip link del dev br10
+}
+
+lag_create()
+{
+	ip link add name bond1 type bond mode 802.3ad
+	ip link set dev $swp1 down
+	ip link set dev $swp2 down
+	ip link set dev $swp1 master bond1
+	ip link set dev $swp2 master bond1
+
+	ip link add link bond1 name bond1.10 type vlan id 10
+	ip link add link bond1 name bond1.20 type vlan id 20
+
+	ip link add name br0 type bridge vlan_filtering 1
+	ip link set dev bond1 master br0
+
+	ip link add name br10 type bridge
+	ip link set dev bond1.10 master br10
+
+	ip link add name br20 type bridge
+	ip link set dev bond1.20 master br20
+}
+
+lag_unlink_slaves_test()
+{
+	# Test that ports are correctly unlinked from their LAG master, when
+	# the LAG and its VLAN uppers are enslaved to bridges
+	RET=0
+
+	lag_create
+
+	ip link set dev $swp1 nomaster
+	check_err $? "lag slave $swp1 was not unlinked from master"
+	ip link set dev $swp2 nomaster
+	check_err $? "lag slave $swp2 was not unlinked from master"
+
+	# Try to configure corresponding VLANs as router interfaces
+	ip -6 address add 2001:db8:1::1/64 dev $swp1
+	check_err $? "failed to configure ip address on $swp1"
+
+	ip link add link $swp1 name $swp1.10 type vlan id 10
+	ip -6 address add 2001:db8:10::1/64 dev $swp1.10
+	check_err $? "failed to configure ip address on $swp1.10"
+
+	ip link add link $swp1 name $swp1.20 type vlan id 20
+	ip -6 address add 2001:db8:20::1/64 dev $swp1.20
+	check_err $? "failed to configure ip address on $swp1.20"
+
+	log_test "lag slaves unlinking"
+
+	ip link del dev $swp1.20
+	ip link del dev $swp1.10
+	ip address flush dev $swp1
+
+	ip link del dev br20
+	ip link del dev br10
+	ip link del dev br0
+	ip link del dev bond1
+}
+
+lag_dev_deletion_test()
+{
+	# Test that LAG device is correctly deleted, when the LAG and its VLAN
+	# uppers are enslaved to bridges
+	RET=0
+
+	lag_create
+
+	ip link del dev bond1
+
+	log_test "lag device deletion"
+
+	ip link del dev br20
+	ip link del dev br10
+	ip link del dev br0
+}
+
+vlan_interface_uppers_test()
+{
+	# Test that uppers of a VLAN interface are correctly sanitized
+	RET=0
+
+	ip link add name br0 type bridge vlan_filtering 1
+	ip link set dev $swp1 master br0
+
+	ip link add link br0 name br0.10 type vlan id 10
+	ip link add link br0.10 name macvlan0 \
+		type macvlan mode private &> /dev/null
+	check_fail $? "managed to create a macvlan when should not"
+
+	ip -6 address add 2001:db8:1::1/64 dev br0.10
+	ip link add link br0.10 name macvlan0 type macvlan mode private
+	check_err $? "did not manage to create a macvlan when should"
+
+	ip link del dev macvlan0
+
+	ip link add name vrf-test type vrf table 10
+	ip link set dev br0.10 master vrf-test
+	check_err $? "did not manage to enslave vlan interface to vrf"
+	ip link del dev vrf-test
+
+	ip link add name br-test type bridge
+	ip link set dev br0.10 master br-test &> /dev/null
+	check_fail $? "managed to enslave vlan interface to bridge when should not"
+	ip link del dev br-test
+
+	log_test "vlan interface uppers"
+
+	ip link del dev br0
+}
+
+bridge_extern_learn_test()
+{
+	# Test that externally learned entries added from user space are
+	# marked as offloaded
+	RET=0
+
+	ip link add name br0 type bridge
+	ip link set dev $swp1 master br0
+
+	bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn
+
+	busywait "$TIMEOUT" wait_for_offload \
+		bridge fdb show brport $swp1 de:ad:be:ef:13:37
+	check_err $? "fdb entry not marked as offloaded when should"
+
+	log_test "externally learned fdb entry"
+
+	ip link del dev br0
+}
+
+neigh_offload_test()
+{
+	# Test that IPv4 and IPv6 neighbour entries are marked as offloaded
+	RET=0
+
+	ip -4 address add 192.0.2.1/24 dev $swp1
+	ip -6 address add 2001:db8:1::1/64 dev $swp1
+
+	ip -4 neigh add 192.0.2.2 lladdr de:ad:be:ef:13:37 nud perm dev $swp1
+	ip -6 neigh add 2001:db8:1::2 lladdr de:ad:be:ef:13:37 nud perm \
+		dev $swp1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -4 neigh show dev $swp1 192.0.2.2
+	check_err $? "ipv4 neigh entry not marked as offloaded when should"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 neigh show dev $swp1 2001:db8:1::2
+	check_err $? "ipv6 neigh entry not marked as offloaded when should"
+
+	log_test "neighbour offload indication"
+
+	ip -6 neigh del 2001:db8:1::2 dev $swp1
+	ip -4 neigh del 192.0.2.2 dev $swp1
+	ip -6 address del 2001:db8:1::1/64 dev $swp1
+	ip -4 address del 192.0.2.1/24 dev $swp1
+}
+
+nexthop_offload_test()
+{
+	# Test that IPv4 and IPv6 nexthops are marked as offloaded
+	RET=0
+
+	sysctl_set net.ipv6.conf.$swp2.keep_addr_on_down 1
+	simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+	simple_if_init $swp2 192.0.2.2/24 2001:db8:1::2/64
+	setup_wait
+
+	ip -4 route add 198.51.100.0/24 vrf v$swp1 \
+		nexthop via 192.0.2.2 dev $swp1
+	ip -6 route add 2001:db8:2::/64 vrf v$swp1 \
+		nexthop via 2001:db8:1::2 dev $swp1
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -4 route show 198.51.100.0/24 vrf v$swp1
+	check_err $? "ipv4 nexthop not marked as offloaded when should"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route show 2001:db8:2::/64 vrf v$swp1
+	check_err $? "ipv6 nexthop not marked as offloaded when should"
+
+	ip link set dev $swp2 down
+	sleep 1
+
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -4 route show 198.51.100.0/24 vrf v$swp1
+	check_err $? "ipv4 nexthop marked as offloaded when should not"
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip -6 route show 2001:db8:2::/64 vrf v$swp1
+	check_err $? "ipv6 nexthop marked as offloaded when should not"
+
+	ip link set dev $swp2 up
+	setup_wait
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -4 route show 198.51.100.0/24 vrf v$swp1
+	check_err $? "ipv4 nexthop not marked as offloaded after neigh add"
+	busywait "$TIMEOUT" wait_for_offload \
+		ip -6 route show 2001:db8:2::/64 vrf v$swp1
+	check_err $? "ipv6 nexthop not marked as offloaded after neigh add"
+
+	log_test "nexthop offload indication"
+
+	ip -6 route del 2001:db8:2::/64 vrf v$swp1
+	ip -4 route del 198.51.100.0/24 vrf v$swp1
+
+	simple_if_fini $swp2 192.0.2.2/24 2001:db8:1::2/64
+	simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+	sysctl_restore net.ipv6.conf.$swp2.keep_addr_on_down
+}
+
+devlink_reload_test()
+{
+	# Test that after executing all the above configuration tests, a
+	# devlink reload can be performed without errors
+	RET=0
+
+	devlink dev reload "$DEVLINK_DEV"
+	check_err $? "devlink reload failed"
+
+	log_test "devlink reload - last test"
+
+	sleep 20
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
new file mode 100755
index 000000000..af64bc9ea
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
@@ -0,0 +1,81 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A driver for the ETS selftest that implements testing in offloaded datapath.
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/sch_ets_core.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+ALL_TESTS="
+	ping_ipv4
+	priomap_mode
+	ets_test_strict
+	ets_test_mixed
+	ets_test_dwrr
+"
+
+switch_create()
+{
+	ets_switch_create
+
+	# Create a bottleneck so that the DWRR process can kick in.
+	ethtool -s $h2 speed 1000 autoneg off
+	ethtool -s $swp2 speed 1000 autoneg off
+
+	# Set the ingress quota high and use the three egress TCs to limit the
+	# amount of traffic that is admitted to the shared buffers. This makes
+	# sure that there is always enough traffic of all types to select from
+	# for the DWRR process.
+	devlink_port_pool_th_save $swp1 0
+	devlink_port_pool_th_set $swp1 0 12
+	devlink_tc_bind_pool_th_save $swp1 0 ingress
+	devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12
+	devlink_port_pool_th_save $swp2 4
+	devlink_port_pool_th_set $swp2 4 12
+	devlink_tc_bind_pool_th_save $swp2 7 egress
+	devlink_tc_bind_pool_th_set $swp2 7 egress 4 5
+	devlink_tc_bind_pool_th_save $swp2 6 egress
+	devlink_tc_bind_pool_th_set $swp2 6 egress 4 5
+	devlink_tc_bind_pool_th_save $swp2 5 egress
+	devlink_tc_bind_pool_th_set $swp2 5 egress 4 5
+
+	# Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet
+	# priorities at $swp1 based on their 802.1p headers. ingress-qos-map is
+	# not offloaded by mlxsw as of this writing, but the mapping used is
+	# 1:1, which is the mapping currently hard-coded by the driver.
+}
+
+switch_destroy()
+{
+	devlink_tc_bind_pool_th_restore $swp2 5 egress
+	devlink_tc_bind_pool_th_restore $swp2 6 egress
+	devlink_tc_bind_pool_th_restore $swp2 7 egress
+	devlink_port_pool_th_restore $swp2 4
+	devlink_tc_bind_pool_th_restore $swp1 0 ingress
+	devlink_port_pool_th_restore $swp1 0
+
+	ethtool -s $swp2 autoneg on
+	ethtool -s $h2 autoneg on
+
+	ets_switch_destroy
+}
+
+# Callback from sch_ets_tests.sh
+collect_stats()
+{
+	local -a streams=("$@")
+	local stream
+
+	# Wait for qdisc counter update so that we don't get it mid-way through.
+	busywait_for_counter 1000 +1 \
+		qdisc_parent_stats_get $swp2 10:$((${streams[0]} + 1)) .bytes \
+		> /dev/null
+
+	for stream in ${streams[@]}; do
+		qdisc_parent_stats_get $swp2 10:$((stream + 1)) .bytes
+	done
+}
+
+bail_on_lldpad
+ets_run
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
new file mode 100644
index 000000000..33ddd0168
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -0,0 +1,657 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends a >1Gbps stream of traffic from H1, to the switch, which
+# forwards it to a 1Gbps port. This 1Gbps stream is then looped back to the
+# switch and forwarded to the port under test $swp3, which is also 1Gbps.
+#
+# This way, $swp3 should be 100% filled with traffic without any of it spilling
+# to the backlog. Any extra packets sent should almost 1:1 go to backlog. That
+# is what H2 is used for--it sends the extra traffic to create backlog.
+#
+# A RED Qdisc is installed on $swp3. The configuration is such that the minimum
+# and maximum size are 1 byte apart, so there is a very clear border under which
+# no marking or dropping takes place, and above which everything is marked or
+# dropped.
+#
+# The test uses the buffer build-up behavior to test the installed RED.
+#
+# In order to test WRED, $swp3 actually contains RED under PRIO, with two
+# different configurations. Traffic is prioritized using 802.1p and relies on
+# the implicit mlxsw configuration, where packet priority is taken 1:1 from the
+# 802.1p marking.
+#
+# +--------------------------+                     +--------------------------+
+# | H1                       |                     | H2                       |
+# |     + $h1.10             |                     |     + $h2.10             |
+# |     | 192.0.2.1/28       |                     |     | 192.0.2.2/28       |
+# |     |                    |                     |     |                    |
+# |     |         $h1.11 +   |                     |     |         $h2.11 +   |
+# |     |  192.0.2.17/28 |   |                     |     |  192.0.2.18/28 |   |
+# |     |                |   |                     |     |                |   |
+# |     \______    ______/   |                     |     \______    ______/   |
+# |            \ /           |                     |            \ /           |
+# |             + $h1        |                     |             + $h2        |
+# +-------------|------------+                     +-------------|------------+
+#               | >1Gbps                                         |
+# +-------------|------------------------------------------------|------------+
+# | SW          + $swp1                                          + $swp2      |
+# |     _______/ \___________                        ___________/ \_______    |
+# |    /                     \                      /                     \   |
+# |  +-|-----------------+   |                    +-|-----------------+   |   |
+# |  | + $swp1.10        |   |                    | + $swp2.10        |   |   |
+# |  |                   |   |        .-------------+ $swp5.10        |   |   |
+# |  |     BR1_10        |   |        |           |                   |   |   |
+# |  |                   |   |        |           |     BR2_10        |   |   |
+# |  | + $swp2.10        |   |        |           |                   |   |   |
+# |  +-|-----------------+   |        |           | + $swp3.10        |   |   |
+# |    |                     |        |           +-|-----------------+   |   |
+# |    |   +-----------------|-+      |             |   +-----------------|-+ |
+# |    |   |        $swp1.11 + |      |             |   |        $swp2.11 + | |
+# |    |   |                   |      | .-----------------+ $swp5.11        | |
+# |    |   |      BR1_11       |      | |           |   |                   | |
+# |    |   |                   |      | |           |   |      BR2_11       | |
+# |    |   |        $swp2.11 + |      | |           |   |                   | |
+# |    |   +-----------------|-+      | |           |   |        $swp3.11 + | |
+# |    |                     |        | |           |   +-----------------|-+ |
+# |    \_______   ___________/        | |           \___________   _______/   |
+# |            \ /                    \ /                       \ /           |
+# |             + $swp4                + $swp5                   + $swp3      |
+# +-------------|----------------------|-------------------------|------------+
+#               |                      |                         | 1Gbps
+#               \________1Gbps_________/                         |
+#                                   +----------------------------|------------+
+#                                   | H3                         + $h3        |
+#                                   |      _____________________/ \_______    |
+#                                   |     /                               \   |
+#                                   |     |                               |   |
+#                                   |     + $h3.10                 $h3.11 +   |
+#                                   |       192.0.2.3/28    192.0.2.19/28     |
+#                                   +-----------------------------------------+
+
+NUM_NETIFS=8
+CHECK_TC="yes"
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+ipaddr()
+{
+	local host=$1; shift
+	local vlan=$1; shift
+
+	echo 192.0.2.$((16 * (vlan - 10) + host))
+}
+
+host_create()
+{
+	local dev=$1; shift
+	local host=$1; shift
+
+	simple_if_init $dev
+	mtu_set $dev 10000
+
+	vlan_create $dev 10 v$dev $(ipaddr $host 10)/28
+	ip link set dev $dev.10 type vlan egress 0:0
+
+	vlan_create $dev 11 v$dev $(ipaddr $host 11)/28
+	ip link set dev $dev.11 type vlan egress 0:1
+}
+
+host_destroy()
+{
+	local dev=$1; shift
+
+	vlan_destroy $dev 11
+	vlan_destroy $dev 10
+	mtu_restore $dev
+	simple_if_fini $dev
+}
+
+h1_create()
+{
+	host_create $h1 1
+}
+
+h1_destroy()
+{
+	host_destroy $h1
+}
+
+h2_create()
+{
+	host_create $h2 2
+	tc qdisc add dev $h2 clsact
+
+	# Some of the tests in this suite use multicast traffic. As this traffic
+	# enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus
+	# e.g. traffic ingressing through $swp2 is flooded to $swp3 (the
+	# intended destination) and $swp5 (which is intended as ingress for
+	# another stream of traffic).
+	#
+	# This is generally not a problem, but if the $swp5 throughput is lower
+	# than $swp2 throughput, there will be a build-up at $swp5. That may
+	# cause packets to fail to queue up at $swp3 due to shared buffer
+	# quotas, and the test to spuriously fail.
+	#
+	# Prevent this by setting the speed of $h2 to 1Gbps.
+
+	ethtool -s $h2 speed 1000 autoneg off
+}
+
+h2_destroy()
+{
+	ethtool -s $h2 autoneg on
+	tc qdisc del dev $h2 clsact
+	host_destroy $h2
+}
+
+h3_create()
+{
+	host_create $h3 3
+	ethtool -s $h3 speed 1000 autoneg off
+}
+
+h3_destroy()
+{
+	ethtool -s $h3 autoneg on
+	host_destroy $h3
+}
+
+switch_create()
+{
+	local intf
+	local vlan
+
+	ip link add dev br1_10 type bridge
+	ip link add dev br1_11 type bridge
+
+	ip link add dev br2_10 type bridge
+	ip link add dev br2_11 type bridge
+
+	for intf in $swp1 $swp2 $swp3 $swp4 $swp5; do
+		ip link set dev $intf up
+		mtu_set $intf 10000
+	done
+
+	for intf in $swp1 $swp4; do
+		for vlan in 10 11; do
+			vlan_create $intf $vlan
+			ip link set dev $intf.$vlan master br1_$vlan
+			ip link set dev $intf.$vlan up
+		done
+	done
+
+	for intf in $swp2 $swp3 $swp5; do
+		for vlan in 10 11; do
+			vlan_create $intf $vlan
+			ip link set dev $intf.$vlan master br2_$vlan
+			ip link set dev $intf.$vlan up
+		done
+	done
+
+	ip link set dev $swp4.10 type vlan egress 0:0
+	ip link set dev $swp4.11 type vlan egress 0:1
+	for intf in $swp1 $swp2 $swp5; do
+		for vlan in 10 11; do
+			ip link set dev $intf.$vlan type vlan ingress 0:0 1:1
+		done
+	done
+
+	for intf in $swp2 $swp3 $swp4 $swp5; do
+		ethtool -s $intf speed 1000 autoneg off
+	done
+
+	ip link set dev br1_10 up
+	ip link set dev br1_11 up
+	ip link set dev br2_10 up
+	ip link set dev br2_11 up
+
+	local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
+	devlink_port_pool_th_save $swp3 8
+	devlink_port_pool_th_set $swp3 8 $size
+}
+
+switch_destroy()
+{
+	local intf
+	local vlan
+
+	devlink_port_pool_th_restore $swp3 8
+
+	tc qdisc del dev $swp3 root 2>/dev/null
+
+	ip link set dev br2_11 down
+	ip link set dev br2_10 down
+	ip link set dev br1_11 down
+	ip link set dev br1_10 down
+
+	for intf in $swp5 $swp4 $swp3 $swp2; do
+		ethtool -s $intf autoneg on
+	done
+
+	for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do
+		for vlan in 11 10; do
+			ip link set dev $intf.$vlan down
+			ip link set dev $intf.$vlan nomaster
+			vlan_destroy $intf $vlan
+		done
+
+		mtu_restore $intf
+		ip link set dev $intf down
+	done
+
+	ip link del dev br2_11
+	ip link del dev br2_10
+	ip link del dev br1_11
+	ip link del dev br1_10
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	swp4=${NETIFS[p7]}
+	swp5=${NETIFS[p8]}
+
+	h3_mac=$(mac_get $h3)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1.10 $(ipaddr 3 10) " from host 1, vlan 10"
+	ping_test $h1.11 $(ipaddr 3 11) " from host 1, vlan 11"
+	ping_test $h2.10 $(ipaddr 3 10) " from host 2, vlan 10"
+	ping_test $h2.11 $(ipaddr 3 11) " from host 2, vlan 11"
+}
+
+get_tc()
+{
+	local vlan=$1; shift
+
+	echo $((vlan - 10))
+}
+
+get_qdisc_handle()
+{
+	local vlan=$1; shift
+
+	local tc=$(get_tc $vlan)
+	local band=$((8 - tc))
+
+	# Handle is 107: for TC1, 108: for TC0.
+	echo "10$band:"
+}
+
+get_qdisc_backlog()
+{
+	local vlan=$1; shift
+
+	qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .backlog
+}
+
+get_mc_transmit_queue()
+{
+	local vlan=$1; shift
+
+	local tc=$(($(get_tc $vlan) + 8))
+	ethtool_stats_get $swp3 tc_transmit_queue_tc_$tc
+}
+
+get_nmarked()
+{
+	local vlan=$1; shift
+
+	ethtool_stats_get $swp3 ecn_marked
+}
+
+get_qdisc_npackets()
+{
+	local vlan=$1; shift
+
+	busywait_for_counter 1100 +1 \
+		qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets
+}
+
+send_packets()
+{
+	local vlan=$1; shift
+	local proto=$1; shift
+	local pkts=$1; shift
+
+	$MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
+	    -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
+	    -t $proto -q -c $pkts "$@"
+}
+
+# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
+# success. After 10 failed attempts it bails out and returns 1. It dumps the
+# backlog size to stdout.
+build_backlog()
+{
+	local vlan=$1; shift
+	local size=$1; shift
+	local proto=$1; shift
+
+	local tc=$((vlan - 10))
+	local band=$((8 - tc))
+	local cur=-1
+	local i=0
+
+	while :; do
+		local cur=$(busywait 1100 until_counter_is "> $cur" \
+					    get_qdisc_backlog $vlan)
+		local diff=$((size - cur))
+		local pkts=$(((diff + 7999) / 8000))
+
+		if ((cur >= size)); then
+			echo $cur
+			return 0
+		elif ((i++ > 10)); then
+			echo $cur
+			return 1
+		fi
+
+		send_packets $vlan $proto $pkts "$@"
+	done
+}
+
+check_marking()
+{
+	local vlan=$1; shift
+	local cond=$1; shift
+
+	local npackets_0=$(get_qdisc_npackets $vlan)
+	local nmarked_0=$(get_nmarked $vlan)
+	sleep 5
+	local npackets_1=$(get_qdisc_npackets $vlan)
+	local nmarked_1=$(get_nmarked $vlan)
+
+	local nmarked_d=$((nmarked_1 - nmarked_0))
+	local npackets_d=$((npackets_1 - npackets_0))
+	local pct=$((100 * nmarked_d / npackets_d))
+
+	echo $pct
+	((pct $cond))
+}
+
+ecn_test_common()
+{
+	local name=$1; shift
+	local vlan=$1; shift
+	local limit=$1; shift
+	local backlog
+	local pct
+
+	# Build the below-the-limit backlog using UDP. We could use TCP just
+	# fine, but this way we get a proof that UDP is accepted when queue
+	# length is below the limit. The main stream is using TCP, and if the
+	# limit is misconfigured, we would see this traffic being ECN marked.
+	RET=0
+	backlog=$(build_backlog $vlan $((2 * limit / 3)) udp)
+	check_err $? "Could not build the requested backlog"
+	pct=$(check_marking $vlan "== 0")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+	log_test "TC $((vlan - 10)): $name backlog < limit"
+
+	# Now push TCP, because non-TCP traffic would be early-dropped after the
+	# backlog crosses the limit, and we want to make sure that the backlog
+	# is above the limit.
+	RET=0
+	backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
+	check_err $? "Could not build the requested backlog"
+	pct=$(check_marking $vlan ">= 95")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
+	log_test "TC $((vlan - 10)): $name backlog > limit"
+}
+
+do_ecn_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local name=ECN
+
+	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+			  $h3_mac tos=0x01
+	sleep 1
+
+	ecn_test_common "$name" $vlan $limit
+
+	# Up there we saw that UDP gets accepted when backlog is below the
+	# limit. Now that it is above, it should all get dropped, and backlog
+	# building should fail.
+	RET=0
+	build_backlog $vlan $((2 * limit)) udp >/dev/null
+	check_fail $? "UDP traffic went into backlog instead of being early-dropped"
+	log_test "TC $((vlan - 10)): $name backlog > limit: UDP early-dropped"
+
+	stop_traffic
+	sleep 1
+}
+
+do_ecn_nodrop_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local name="ECN nodrop"
+
+	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+			  $h3_mac tos=0x01
+	sleep 1
+
+	ecn_test_common "$name" $vlan $limit
+
+	# Up there we saw that UDP gets accepted when backlog is below the
+	# limit. Now that it is above, in nodrop mode, make sure it goes to
+	# backlog as well.
+	RET=0
+	build_backlog $vlan $((2 * limit)) udp >/dev/null
+	check_err $? "UDP traffic was early-dropped instead of getting into backlog"
+	log_test "TC $((vlan - 10)): $name backlog > limit: UDP not dropped"
+
+	stop_traffic
+	sleep 1
+}
+
+do_red_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local backlog
+	local pct
+
+	# Use ECN-capable TCP to verify there's no marking even though the queue
+	# is above limit.
+	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+			  $h3_mac tos=0x01
+
+	# Pushing below the queue limit should work.
+	RET=0
+	backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01)
+	check_err $? "Could not build the requested backlog"
+	pct=$(check_marking $vlan "== 0")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+	log_test "TC $((vlan - 10)): RED backlog < limit"
+
+	# Pushing above should not.
+	RET=0
+	backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
+	check_fail $? "Traffic went into backlog instead of being early-dropped"
+	pct=$(check_marking $vlan "== 0")
+	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+	local diff=$((limit - backlog))
+	pct=$((100 * diff / limit))
+	((0 <= pct && pct <= 10))
+	check_err $? "backlog $backlog / $limit expected <= 10% distance"
+	log_test "TC $((vlan - 10)): RED backlog > limit"
+
+	stop_traffic
+	sleep 1
+}
+
+do_mc_backlog_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local backlog
+	local pct
+
+	RET=0
+
+	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc
+	start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc
+
+	qbl=$(busywait 5000 until_counter_is ">= 500000" \
+		       get_qdisc_backlog $vlan)
+	check_err $? "Could not build MC backlog"
+
+	# Verify that we actually see the backlog on BUM TC. Do a busywait as
+	# well, performance blips might cause false fail.
+	local ebl
+	ebl=$(busywait 5000 until_counter_is ">= 500000" \
+		       get_mc_transmit_queue $vlan)
+	check_err $? "MC backlog reported by qdisc not visible in ethtool"
+
+	stop_traffic
+	stop_traffic
+
+	log_test "TC $((vlan - 10)): Qdisc reports MC backlog"
+}
+
+do_drop_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local trigger=$1; shift
+	local subtest=$1; shift
+	local fetch_counter=$1; shift
+	local backlog
+	local base
+	local now
+	local pct
+
+	RET=0
+
+	start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) $h3_mac
+
+	# Create a bit of a backlog and observe no mirroring due to drops.
+	qevent_rule_install_$subtest
+	base=$($fetch_counter)
+
+	build_backlog $vlan $((2 * limit / 3)) udp >/dev/null
+
+	busywait 1100 until_counter_is ">= $((base + 1))" $fetch_counter >/dev/null
+	check_fail $? "Spurious packets observed without buffer pressure"
+
+	# Push to the queue until it's at the limit. The configured limit is
+	# rounded by the qdisc and then by the driver, so this is the best we
+	# can do to get to the real limit of the system.
+	build_backlog $vlan $((3 * limit / 2)) udp >/dev/null
+
+	base=$($fetch_counter)
+	send_packets $vlan udp 11
+
+	now=$(busywait 1100 until_counter_is ">= $((base + 10))" $fetch_counter)
+	check_err $? "Dropped packets not observed: 11 expected, $((now - base)) seen"
+
+	# When no extra traffic is injected, there should be no mirroring.
+	busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
+	check_fail $? "Spurious packets observed"
+
+	# When the rule is uninstalled, there should be no mirroring.
+	qevent_rule_uninstall_$subtest
+	send_packets $vlan udp 11
+	busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
+	check_fail $? "Spurious packets observed after uninstall"
+
+	log_test "TC $((vlan - 10)): ${trigger}ped packets $subtest'd"
+
+	stop_traffic
+	sleep 1
+}
+
+qevent_rule_install_mirror()
+{
+	tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+	   action mirred egress mirror dev $swp2 hw_stats disabled
+}
+
+qevent_rule_uninstall_mirror()
+{
+	tc filter del block 10 pref 1234 handle 102 matchall
+}
+
+qevent_counter_fetch_mirror()
+{
+	tc_rule_handle_stats_get "dev $h2 ingress" 101
+}
+
+do_drop_mirror_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local qevent_name=$1; shift
+
+	tc filter add dev $h2 ingress pref 1 handle 101 prot ip \
+	   flower skip_sw ip_proto udp \
+	   action drop
+
+	do_drop_test "$vlan" "$limit" "$qevent_name" mirror \
+		     qevent_counter_fetch_mirror
+
+	tc filter del dev $h2 ingress pref 1 handle 101 flower
+}
+
+qevent_rule_install_trap()
+{
+	tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+	   action trap hw_stats disabled
+}
+
+qevent_rule_uninstall_trap()
+{
+	tc filter del block 10 pref 1234 handle 102 matchall
+}
+
+qevent_counter_fetch_trap()
+{
+	local trap_name=$1; shift
+
+	devlink_trap_rx_packets_get "$trap_name"
+}
+
+do_drop_trap_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local trap_name=$1; shift
+
+	do_drop_test "$vlan" "$limit" "$trap_name" trap \
+		     "qevent_counter_fetch_trap $trap_name"
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
new file mode 100755
index 000000000..3f007c5f8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ecn_test
+	ecn_nodrop_test
+	red_test
+	mc_backlog_test
+	red_mirror_test
+	red_trap_test
+"
+: ${QDISC:=ets}
+source sch_red_core.sh
+
+# do_ecn_test first build 2/3 of the requested backlog and expects no marking,
+# and then builds 3/2 of it and does expect marking. The values of $BACKLOG1 and
+# $BACKLOG2 are far enough not to overlap, so that we can assume that if we do
+# see (do not see) marking, it is actually due to the configuration of that one
+# TC, and not due to configuration of the other TC leaking over.
+BACKLOG1=200000
+BACKLOG2=500000
+
+install_qdisc()
+{
+	local -a args=("$@")
+
+	tc qdisc add dev $swp3 root handle 10: $QDISC \
+	   bands 8 priomap 7 6 5 4 3 2 1 0
+	tc qdisc add dev $swp3 parent 10:8 handle 108: red \
+	   limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \
+	   probability 1.0 avpkt 8000 burst 38 "${args[@]}"
+	tc qdisc add dev $swp3 parent 10:7 handle 107: red \
+	   limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \
+	   probability 1.0 avpkt 8000 burst 63 "${args[@]}"
+	sleep 1
+}
+
+uninstall_qdisc()
+{
+	tc qdisc del dev $swp3 parent 10:7
+	tc qdisc del dev $swp3 parent 10:8
+	tc qdisc del dev $swp3 root
+}
+
+ecn_test()
+{
+	install_qdisc ecn
+
+	do_ecn_test 10 $BACKLOG1
+	do_ecn_test 11 $BACKLOG2
+
+	uninstall_qdisc
+}
+
+ecn_nodrop_test()
+{
+	install_qdisc ecn nodrop
+
+	do_ecn_nodrop_test 10 $BACKLOG1
+	do_ecn_nodrop_test 11 $BACKLOG2
+
+	uninstall_qdisc
+}
+
+red_test()
+{
+	install_qdisc
+
+	do_red_test 10 $BACKLOG1
+	do_red_test 11 $BACKLOG2
+
+	uninstall_qdisc
+}
+
+mc_backlog_test()
+{
+	install_qdisc
+
+	# Note that the backlog numbers here do not correspond to RED
+	# configuration, but are arbitrary.
+	do_mc_backlog_test 10 $BACKLOG1
+	do_mc_backlog_test 11 $BACKLOG2
+
+	uninstall_qdisc
+}
+
+red_mirror_test()
+{
+	install_qdisc qevent early_drop block 10
+
+	do_drop_mirror_test 10 $BACKLOG1 early_drop
+	do_drop_mirror_test 11 $BACKLOG2 early_drop
+
+	uninstall_qdisc
+}
+
+red_trap_test()
+{
+	install_qdisc qevent early_drop block 10
+
+	do_drop_trap_test 10 $BACKLOG1 early_drop
+	do_drop_trap_test 11 $BACKLOG2 early_drop
+
+	uninstall_qdisc
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+bail_on_lldpad
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh
new file mode 100755
index 000000000..76820a0e9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+QDISC=prio
+source sch_red_ets.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
new file mode 100755
index 000000000..ede9c38d3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ecn_test
+	ecn_nodrop_test
+	red_test
+	mc_backlog_test
+	red_mirror_test
+"
+source sch_red_core.sh
+
+BACKLOG=300000
+
+install_qdisc()
+{
+	local -a args=("$@")
+
+	tc qdisc add dev $swp3 root handle 108: red \
+	   limit 1000000 min $BACKLOG max $((BACKLOG + 1)) \
+	   probability 1.0 avpkt 8000 burst 38 "${args[@]}"
+	sleep 1
+}
+
+uninstall_qdisc()
+{
+	tc qdisc del dev $swp3 root
+}
+
+ecn_test()
+{
+	install_qdisc ecn
+	do_ecn_test 10 $BACKLOG
+	uninstall_qdisc
+}
+
+ecn_nodrop_test()
+{
+	install_qdisc ecn nodrop
+	do_ecn_nodrop_test 10 $BACKLOG
+	uninstall_qdisc
+}
+
+red_test()
+{
+	install_qdisc
+	do_red_test 10 $BACKLOG
+	uninstall_qdisc
+}
+
+mc_backlog_test()
+{
+	install_qdisc
+	# Note that the backlog value here does not correspond to RED
+	# configuration, but is arbitrary.
+	do_mc_backlog_test 10 $BACKLOG
+	uninstall_qdisc
+}
+
+red_mirror_test()
+{
+	install_qdisc qevent early_drop block 10
+	do_drop_mirror_test 10 $BACKLOG
+	uninstall_qdisc
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+bail_on_lldpad
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh
new file mode 100755
index 000000000..c6ce0b448
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source qos_lib.sh
+bail_on_lldpad
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+TCFLAGS=skip_sw
+source $lib_dir/sch_tbf_ets.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh
new file mode 100755
index 000000000..8d245f331
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source qos_lib.sh
+bail_on_lldpad
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+TCFLAGS=skip_sw
+source $lib_dir/sch_tbf_prio.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh
new file mode 100755
index 000000000..013886061
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source qos_lib.sh
+bail_on_lldpad
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+TCFLAGS=skip_sw
+source $lib_dir/sch_tbf_root.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
new file mode 100755
index 000000000..7d9e73a43
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
@@ -0,0 +1,222 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	port_pool_test
+	port_tc_ip_test
+	port_tc_arp_test
+"
+
+NUM_NETIFS=2
+source ../../../net/forwarding/lib.sh
+source ../../../net/forwarding/devlink_lib.sh
+source mlxsw_lib.sh
+
+SB_POOL_ING=0
+SB_POOL_EGR_CPU=10
+
+SB_ITC_CPU_IP=2
+SB_ITC_CPU_ARP=2
+SB_ITC=0
+
+h1_create()
+{
+	simple_if_init $h1 192.0.1.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.1.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.1.2/24
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.1.2/24
+}
+
+sb_occ_pool_check()
+{
+	local dl_port=$1; shift
+	local pool=$1; shift
+	local exp_max_occ=$1
+	local max_occ
+	local err=0
+
+	max_occ=$(devlink sb -j occupancy show $dl_port \
+		  | jq -e ".[][][\"pool\"][\"$pool\"][\"max\"]")
+
+	if [[ "$max_occ" -ne "$exp_max_occ" ]]; then
+		err=1
+	fi
+
+	echo $max_occ
+	return $err
+}
+
+sb_occ_itc_check()
+{
+	local dl_port=$1; shift
+	local itc=$1; shift
+	local exp_max_occ=$1
+	local max_occ
+	local err=0
+
+	max_occ=$(devlink sb -j occupancy show $dl_port \
+		  | jq -e ".[][][\"itc\"][\"$itc\"][\"max\"]")
+
+	if [[ "$max_occ" -ne "$exp_max_occ" ]]; then
+		err=1
+	fi
+
+	echo $max_occ
+	return $err
+}
+
+sb_occ_etc_check()
+{
+	local dl_port=$1; shift
+	local etc=$1; shift
+	local exp_max_occ=$1; shift
+	local max_occ
+	local err=0
+
+	max_occ=$(devlink sb -j occupancy show $dl_port \
+		  | jq -e ".[][][\"etc\"][\"$etc\"][\"max\"]")
+
+	if [[ "$max_occ" -ne "$exp_max_occ" ]]; then
+		err=1
+	fi
+
+	echo $max_occ
+	return $err
+}
+
+port_pool_test()
+{
+	local exp_max_occ=288
+	local max_occ
+
+	devlink sb occupancy clearmax $DEVLINK_DEV
+
+	$MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+		-t ip -q
+
+	devlink sb occupancy snapshot $DEVLINK_DEV
+
+	RET=0
+	max_occ=$(sb_occ_pool_check $dl_port1 $SB_POOL_ING $exp_max_occ)
+	check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "physical port's($h1) ingress pool"
+
+	RET=0
+	max_occ=$(sb_occ_pool_check $dl_port2 $SB_POOL_ING $exp_max_occ)
+	check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "physical port's($h2) ingress pool"
+
+	RET=0
+	max_occ=$(sb_occ_pool_check $cpu_dl_port $SB_POOL_EGR_CPU $exp_max_occ)
+	check_err $? "Expected ePool($SB_POOL_EGR_CPU) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "CPU port's egress pool"
+}
+
+port_tc_ip_test()
+{
+	local exp_max_occ=288
+	local max_occ
+
+	devlink sb occupancy clearmax $DEVLINK_DEV
+
+	$MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+		-t ip -q
+
+	devlink sb occupancy snapshot $DEVLINK_DEV
+
+	RET=0
+	max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+	check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "physical port's($h1) ingress TC - IP packet"
+
+	RET=0
+	max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+	check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "physical port's($h2) ingress TC - IP packet"
+
+	RET=0
+	max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_IP $exp_max_occ)
+	check_err $? "Expected egress TC($SB_ITC_CPU_IP) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "CPU port's egress TC - IP packet"
+}
+
+port_tc_arp_test()
+{
+	local exp_max_occ=96
+	local max_occ
+
+	if [[ $MLXSW_CHIP != "mlxsw_spectrum" ]]; then
+		exp_max_occ=144
+	fi
+
+	devlink sb occupancy clearmax $DEVLINK_DEV
+
+	$MZ $h1 -c 1 -p 160 -a $h1mac -A 192.0.1.1 -t arp -q
+
+	devlink sb occupancy snapshot $DEVLINK_DEV
+
+	RET=0
+	max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+	check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "physical port's($h1) ingress TC - ARP packet"
+
+	RET=0
+	max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+	check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "physical port's($h2) ingress TC - ARP packet"
+
+	RET=0
+	max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_ARP $exp_max_occ)
+	check_err $? "Expected egress TC($SB_ITC_IP2ME) max occupancy to be $exp_max_occ, but got $max_occ"
+	log_test "CPU port's egress TC - ARP packet"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	dl_port1=$(devlink_port_by_netdev $h1)
+	dl_port2=$(devlink_port_by_netdev $h2)
+
+	cpu_dl_port=$(devlink_cpu_port_get)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
new file mode 100755
index 000000000..2223337ee
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
@@ -0,0 +1,416 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import json as j
+import random
+
+
+class SkipTest(Exception):
+    pass
+
+
+class RandomValuePicker:
+    """
+    Class for storing shared buffer configuration. Can handle 3 different
+    objects, pool, tcbind and portpool. Provide an interface to get random
+    values for a specific object type as the follow:
+      1. Pool:
+         - random size
+
+      2. TcBind:
+         - random pool number
+         - random threshold
+
+      3. PortPool:
+         - random threshold
+    """
+    def __init__(self, pools):
+        self._pools = []
+        for pool in pools:
+            self._pools.append(pool)
+
+    def _cell_size(self):
+        return self._pools[0]["cell_size"]
+
+    def _get_static_size(self, th):
+        # For threshold of 16, this works out to be about 12MB on Spectrum-1,
+        # and about 17MB on Spectrum-2.
+        return th * 8000 * self._cell_size()
+
+    def _get_size(self):
+        return self._get_static_size(16)
+
+    def _get_thtype(self):
+        return "static"
+
+    def _get_th(self, pool):
+        # Threshold value could be any integer between 3 to 16
+        th = random.randint(3, 16)
+        if pool["thtype"] == "dynamic":
+            return th
+        else:
+            return self._get_static_size(th)
+
+    def _get_pool(self, direction):
+        ing_pools = []
+        egr_pools = []
+        for pool in self._pools:
+            if pool["type"] == "ingress":
+                ing_pools.append(pool)
+            else:
+                egr_pools.append(pool)
+        if direction == "ingress":
+            arr = ing_pools
+        else:
+            arr = egr_pools
+        return arr[random.randint(0, len(arr) - 1)]
+
+    def get_value(self, objid):
+        if isinstance(objid, Pool):
+            if objid["pool"] in [4, 8, 9, 10]:
+                # The threshold type of pools 4, 8, 9 and 10 cannot be changed
+                raise SkipTest()
+            else:
+                return (self._get_size(), self._get_thtype())
+        if isinstance(objid, TcBind):
+            if objid["tc"] >= 8:
+                # Multicast TCs cannot be changed
+                raise SkipTest()
+            else:
+                pool = self._get_pool(objid["type"])
+                th = self._get_th(pool)
+                pool_n = pool["pool"]
+                return (pool_n, th)
+        if isinstance(objid, PortPool):
+            pool_n = objid["pool"]
+            pool = self._pools[pool_n]
+            assert pool["pool"] == pool_n
+            th = self._get_th(pool)
+            return (th,)
+
+
+class RecordValuePickerException(Exception):
+    pass
+
+
+class RecordValuePicker:
+    """
+    Class for storing shared buffer configuration. Can handle 2 different
+    objects, pool and tcbind. Provide an interface to get the stored values per
+    object type.
+    """
+    def __init__(self, objlist):
+        self._recs = []
+        for item in objlist:
+            self._recs.append({"objid": item, "value": item.var_tuple()})
+
+    def get_value(self, objid):
+        if isinstance(objid, Pool) and objid["pool"] in [4, 8, 9, 10]:
+            # The threshold type of pools 4, 8, 9 and 10 cannot be changed
+            raise SkipTest()
+        if isinstance(objid, TcBind) and objid["tc"] >= 8:
+            # Multicast TCs cannot be changed
+            raise SkipTest()
+        for rec in self._recs:
+            if rec["objid"].weak_eq(objid):
+                return rec["value"]
+        raise RecordValuePickerException()
+
+
+def run_cmd(cmd, json=False):
+    out = subprocess.check_output(cmd, shell=True)
+    if json:
+        return j.loads(out)
+    return out
+
+
+def run_json_cmd(cmd):
+    return run_cmd(cmd, json=True)
+
+
+def log_test(test_name, err_msg=None):
+    if err_msg:
+        print("\t%s" % err_msg)
+        print("TEST: %-80s  [FAIL]" % test_name)
+    else:
+        print("TEST: %-80s  [ OK ]" % test_name)
+
+
+class CommonItem(dict):
+    varitems = []
+
+    def var_tuple(self):
+        ret = []
+        self.varitems.sort()
+        for key in self.varitems:
+            ret.append(self[key])
+        return tuple(ret)
+
+    def weak_eq(self, other):
+        for key in self:
+            if key in self.varitems:
+                continue
+            if self[key] != other[key]:
+                return False
+        return True
+
+
+class CommonList(list):
+    def get_by(self, by_obj):
+        for item in self:
+            if item.weak_eq(by_obj):
+                return item
+        return None
+
+    def del_by(self, by_obj):
+        for item in self:
+            if item.weak_eq(by_obj):
+                self.remove(item)
+
+
+class Pool(CommonItem):
+    varitems = ["size", "thtype"]
+
+    def dl_set(self, dlname, size, thtype):
+        run_cmd("devlink sb pool set {} sb {} pool {} size {} thtype {}".format(dlname, self["sb"],
+                                                                                self["pool"],
+                                                                                size, thtype))
+
+
+class PoolList(CommonList):
+    pass
+
+
+def get_pools(dlname, direction=None):
+    d = run_json_cmd("devlink sb pool show -j")
+    pools = PoolList()
+    for pooldict in d["pool"][dlname]:
+        if not direction or direction == pooldict["type"]:
+            pools.append(Pool(pooldict))
+    return pools
+
+
+def do_check_pools(dlname, pools, vp):
+    for pool in pools:
+        pre_pools = get_pools(dlname)
+        try:
+            (size, thtype) = vp.get_value(pool)
+        except SkipTest:
+            continue
+        pool.dl_set(dlname, size, thtype)
+        post_pools = get_pools(dlname)
+        pool = post_pools.get_by(pool)
+
+        err_msg = None
+        if pool["size"] != size:
+            err_msg = "Incorrect pool size (got {}, expected {})".format(pool["size"], size)
+        if pool["thtype"] != thtype:
+            err_msg = "Incorrect pool threshold type (got {}, expected {})".format(pool["thtype"], thtype)
+
+        pre_pools.del_by(pool)
+        post_pools.del_by(pool)
+        if pre_pools != post_pools:
+            err_msg = "Other pool setup changed as well"
+        log_test("pool {} of sb {} set verification".format(pool["pool"],
+                                                            pool["sb"]), err_msg)
+
+
+def check_pools(dlname, pools):
+    # Save defaults
+    record_vp = RecordValuePicker(pools)
+
+    # For each pool, set random size and static threshold type
+    do_check_pools(dlname, pools, RandomValuePicker(pools))
+
+    # Restore defaults
+    do_check_pools(dlname, pools, record_vp)
+
+
+class TcBind(CommonItem):
+    varitems = ["pool", "threshold"]
+
+    def __init__(self, port, d):
+        super(TcBind, self).__init__(d)
+        self["dlportname"] = port.name
+
+    def dl_set(self, pool, th):
+        run_cmd("devlink sb tc bind set {} sb {} tc {} type {} pool {} th {}".format(self["dlportname"],
+                                                                                     self["sb"],
+                                                                                     self["tc"],
+                                                                                     self["type"],
+                                                                                     pool, th))
+
+
+class TcBindList(CommonList):
+    pass
+
+
+def get_tcbinds(ports, verify_existence=False):
+    d = run_json_cmd("devlink sb tc bind show -j -n")
+    tcbinds = TcBindList()
+    for port in ports:
+        err_msg = None
+        if port.name not in d["tc_bind"] or len(d["tc_bind"][port.name]) == 0:
+            err_msg = "No tc bind for port"
+        else:
+            for tcbinddict in d["tc_bind"][port.name]:
+                tcbinds.append(TcBind(port, tcbinddict))
+        if verify_existence:
+            log_test("tc bind existence for port {} verification".format(port.name), err_msg)
+    return tcbinds
+
+
+def do_check_tcbind(ports, tcbinds, vp):
+    for tcbind in tcbinds:
+        pre_tcbinds = get_tcbinds(ports)
+        try:
+            (pool, th) = vp.get_value(tcbind)
+        except SkipTest:
+            continue
+        tcbind.dl_set(pool, th)
+        post_tcbinds = get_tcbinds(ports)
+        tcbind = post_tcbinds.get_by(tcbind)
+
+        err_msg = None
+        if tcbind["pool"] != pool:
+            err_msg = "Incorrect pool (got {}, expected {})".format(tcbind["pool"], pool)
+        if tcbind["threshold"] != th:
+            err_msg = "Incorrect threshold (got {}, expected {})".format(tcbind["threshold"], th)
+
+        pre_tcbinds.del_by(tcbind)
+        post_tcbinds.del_by(tcbind)
+        if pre_tcbinds != post_tcbinds:
+            err_msg = "Other tc bind setup changed as well"
+        log_test("tc bind {}-{} of sb {} set verification".format(tcbind["dlportname"],
+                                                                  tcbind["tc"],
+                                                                  tcbind["sb"]), err_msg)
+
+
+def check_tcbind(dlname, ports, pools):
+    tcbinds = get_tcbinds(ports, verify_existence=True)
+
+    # Save defaults
+    record_vp = RecordValuePicker(tcbinds)
+
+    # Bind each port and unicast TC (TCs < 8) to a random pool and a random
+    # threshold
+    do_check_tcbind(ports, tcbinds, RandomValuePicker(pools))
+
+    # Restore defaults
+    do_check_tcbind(ports, tcbinds, record_vp)
+
+
+class PortPool(CommonItem):
+    varitems = ["threshold"]
+
+    def __init__(self, port, d):
+        super(PortPool, self).__init__(d)
+        self["dlportname"] = port.name
+
+    def dl_set(self, th):
+        run_cmd("devlink sb port pool set {} sb {} pool {} th {}".format(self["dlportname"],
+                                                                         self["sb"],
+                                                                         self["pool"], th))
+
+
+class PortPoolList(CommonList):
+    pass
+
+
+def get_portpools(ports, verify_existence=False):
+    d = run_json_cmd("devlink sb port pool -j -n")
+    portpools = PortPoolList()
+    for port in ports:
+        err_msg = None
+        if port.name not in d["port_pool"] or len(d["port_pool"][port.name]) == 0:
+            err_msg = "No port pool for port"
+        else:
+            for portpooldict in d["port_pool"][port.name]:
+                portpools.append(PortPool(port, portpooldict))
+        if verify_existence:
+            log_test("port pool existence for port {} verification".format(port.name), err_msg)
+    return portpools
+
+
+def do_check_portpool(ports, portpools, vp):
+    for portpool in portpools:
+        pre_portpools = get_portpools(ports)
+        (th,) = vp.get_value(portpool)
+        portpool.dl_set(th)
+        post_portpools = get_portpools(ports)
+        portpool = post_portpools.get_by(portpool)
+
+        err_msg = None
+        if portpool["threshold"] != th:
+            err_msg = "Incorrect threshold (got {}, expected {})".format(portpool["threshold"], th)
+
+        pre_portpools.del_by(portpool)
+        post_portpools.del_by(portpool)
+        if pre_portpools != post_portpools:
+            err_msg = "Other port pool setup changed as well"
+        log_test("port pool {}-{} of sb {} set verification".format(portpool["dlportname"],
+                                                                    portpool["pool"],
+                                                                    portpool["sb"]), err_msg)
+
+
+def check_portpool(dlname, ports, pools):
+    portpools = get_portpools(ports, verify_existence=True)
+
+    # Save defaults
+    record_vp = RecordValuePicker(portpools)
+
+    # For each port pool, set a random threshold
+    do_check_portpool(ports, portpools, RandomValuePicker(pools))
+
+    # Restore defaults
+    do_check_portpool(ports, portpools, record_vp)
+
+
+class Port:
+    def __init__(self, name):
+        self.name = name
+
+
+class PortList(list):
+    pass
+
+
+def get_ports(dlname):
+    d = run_json_cmd("devlink port show -j")
+    ports = PortList()
+    for name in d["port"]:
+        if name.find(dlname) == 0 and d["port"][name]["flavour"] == "physical":
+            ports.append(Port(name))
+    return ports
+
+
+def get_device():
+    devices_info = run_json_cmd("devlink -j dev info")["info"]
+    for d in devices_info:
+        if "mlxsw_spectrum" in devices_info[d]["driver"]:
+            return d
+    return None
+
+
+class UnavailableDevlinkNameException(Exception):
+    pass
+
+
+def test_sb_configuration():
+    # Use static seed
+    random.seed(0)
+
+    dlname = get_device()
+    if not dlname:
+        raise UnavailableDevlinkNameException()
+
+    ports = get_ports(dlname)
+    pools = get_pools(dlname)
+
+    check_pools(dlname, pools)
+    check_tcbind(dlname, ports, pools)
+    check_portpool(dlname, ports, pools)
+
+
+test_sb_configuration()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh
new file mode 100644
index 000000000..f7c168dec
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../mirror_gre_scale.sh
+
+mirror_gre_get_target()
+{
+	local should_fail=$1; shift
+	local target
+
+	target=$(devlink_resource_size_get span_agents)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
new file mode 100755
index 000000000..d7cf33a3f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+if [[ "$DEVLINK_VIDDID" != "15b3:cf6c" && \
+	"$DEVLINK_VIDDID" != "15b3:cf70" ]]; then
+	echo "SKIP: test is tailored for Mellanox Spectrum-2 and Spectrum-3"
+	exit 1
+fi
+
+current_test=""
+
+cleanup()
+{
+	pre_cleanup
+	if [ ! -z $current_test ]; then
+		${current_test}_cleanup
+	fi
+	# Need to reload in order to avoid router abort.
+	devlink_reload
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="router tc_flower mirror_gre tc_police"
+for current_test in ${TESTS:-$ALL_TESTS}; do
+	source ${current_test}_scale.sh
+
+	num_netifs_var=${current_test^^}_NUM_NETIFS
+	num_netifs=${!num_netifs_var:-$NUM_NETIFS}
+
+	for should_fail in 0 1; do
+		RET=0
+		target=$(${current_test}_get_target "$should_fail")
+		${current_test}_setup_prepare
+		setup_wait $num_netifs
+		${current_test}_test "$target" "$should_fail"
+		${current_test}_cleanup
+		devlink_reload
+		if [[ "$should_fail" -eq 0 ]]; then
+			log_test "'$current_test' $target"
+		else
+			log_test "'$current_test' overflow $target"
+		fi
+	done
+done
+current_test=""
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh
new file mode 100644
index 000000000..1897e163e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../router_scale.sh
+
+router_get_target()
+{
+	local should_fail=$1
+	local target
+
+	target=$(devlink_resource_size_get kvd)
+
+	if [[ $should_fail -eq 0 ]]; then
+		target=$((target * 85 / 100))
+	else
+		target=$((target + 1))
+	fi
+
+	echo $target
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
new file mode 100755
index 000000000..616d35814
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
@@ -0,0 +1,1129 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking the A-TCAM and C-TCAM operation in Spectrum-2.
+# It tries to exercise as many code paths in the eRP state machine as
+# possible.
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+ALL_TESTS="single_mask_test identical_filters_test two_masks_test \
+	multiple_masks_test ctcam_edge_cases_test delta_simple_test \
+	delta_two_masks_one_key_test delta_simple_rehash_test \
+	bloom_simple_test bloom_complex_test bloom_delta_test \
+	max_erp_entries_test max_group_size_test"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+tp_record()
+{
+	local tracepoint=$1
+	local cmd=$2
+
+	perf record -q -e $tracepoint $cmd
+	return $?
+}
+
+tp_record_all()
+{
+	local tracepoint=$1
+	local seconds=$2
+
+	perf record -a -q -e $tracepoint sleep $seconds
+	return $?
+}
+
+__tp_hit_count()
+{
+	local tracepoint=$1
+
+	local perf_output=`perf script -F trace:event,trace`
+	return `echo $perf_output | grep "$tracepoint:" | wc -l`
+}
+
+tp_check_hits()
+{
+	local tracepoint=$1
+	local count=$2
+
+	__tp_hit_count $tracepoint
+	if [[ "$?" -ne "$count" ]]; then
+		return 1
+	fi
+	return 0
+}
+
+tp_check_hits_any()
+{
+	local tracepoint=$1
+
+	__tp_hit_count $tracepoint
+	if [[ "$?" -eq "0" ]]; then
+		return 1
+	fi
+	return 0
+}
+
+single_mask_test()
+{
+	# When only a single mask is required, the device uses the master
+	# mask and not the eRP table. Verify that under this mode the right
+	# filter is matched
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Single filter - did not match"
+
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 198.51.100.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 2
+	check_err $? "Two filters - did not match highest priority"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Two filters - did not match lowest priority"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Single filter - did not match after delete"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "single mask test ($tcflags)"
+}
+
+identical_filters_test()
+{
+	# When two filters that only differ in their priority are used,
+	# one needs to be inserted into the C-TCAM. This test verifies
+	# that filters are correctly spilled to C-TCAM and that the right
+	# filter is matched
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match A-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match C-TCAM filter after A-TCAM delete"
+
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Did not match C-TCAM filter after A-TCAM add"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match A-TCAM filter after C-TCAM delete"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "identical filters test ($tcflags)"
+}
+
+two_masks_test()
+{
+	# When more than one mask is required, the eRP table is used. This
+	# test verifies that the eRP table is correctly allocated and used
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.0.0/8 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Two filters - did not match highest priority"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Single filter - did not match"
+
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Two filters - did not match highest priority after add"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "two masks test ($tcflags)"
+}
+
+multiple_masks_test()
+{
+	# The number of masks in a region is limited. Once the maximum
+	# number of masks has been reached filters that require new
+	# masks are spilled to the C-TCAM. This test verifies that
+	# spillage is performed correctly and that the right filter is
+	# matched
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	local index
+
+	RET=0
+
+	NUM_MASKS=32
+	NUM_ERPS=16
+	BASE_INDEX=100
+
+	for i in $(eval echo {1..$NUM_MASKS}); do
+		index=$((BASE_INDEX - i))
+
+		if ((i > NUM_ERPS)); then
+			exp_hits=1
+			err_msg="$i filters - C-TCAM spill did not happen when it was expected"
+		else
+			exp_hits=0
+			err_msg="$i filters - C-TCAM spill happened when it should not"
+		fi
+
+		tp_record "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" \
+			"tc filter add dev $h2 ingress protocol ip pref $index \
+				handle $index \
+				flower $tcflags \
+				dst_ip 192.0.2.2/${i} src_ip 192.0.2.1/${i} \
+				action drop"
+		tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" \
+				$exp_hits
+		check_err $? "$err_msg"
+
+		$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 \
+			-B 192.0.2.2 -t ip -q
+
+		tc_check_packets "dev $h2 ingress" $index 1
+		check_err $? "$i filters - did not match highest priority (add)"
+	done
+
+	for i in $(eval echo {$NUM_MASKS..1}); do
+		index=$((BASE_INDEX - i))
+
+		$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 \
+			-B 192.0.2.2 -t ip -q
+
+		tc_check_packets "dev $h2 ingress" $index 2
+		check_err $? "$i filters - did not match highest priority (del)"
+
+		tc filter del dev $h2 ingress protocol ip pref $index \
+			handle $index flower
+	done
+
+	log_test "multiple masks test ($tcflags)"
+}
+
+ctcam_two_atcam_masks_test()
+{
+	RET=0
+
+	# First case: C-TCAM is disabled when there are two A-TCAM masks.
+	# We push a filter into the C-TCAM by using two identical filters
+	# as in identical_filters_test()
+
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into C-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.0.0/16 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match A-TCAM filter"
+
+	# Delete both A-TCAM and C-TCAM filters and make sure the remaining
+	# A-TCAM filter still works
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match A-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "ctcam with two atcam masks test ($tcflags)"
+}
+
+ctcam_one_atcam_mask_test()
+{
+	RET=0
+
+	# Second case: C-TCAM is disabled when there is one A-TCAM mask.
+	# The test is similar to identical_filters_test()
+
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into C-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match C-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match A-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "ctcam with one atcam mask test ($tcflags)"
+}
+
+ctcam_no_atcam_masks_test()
+{
+	RET=0
+
+	# Third case: C-TCAM is disabled when there are no A-TCAM masks
+	# This test exercises the code path that transitions the eRP table
+	# to its initial state after deleting the last C-TCAM mask
+
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into C-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "ctcam with no atcam masks test ($tcflags)"
+}
+
+ctcam_edge_cases_test()
+{
+	# When the C-TCAM is disabled after deleting the last C-TCAM
+	# mask, we want to make sure the eRP state machine is put in
+	# the correct state
+
+	ctcam_two_atcam_masks_test
+	ctcam_one_atcam_mask_test
+	ctcam_no_atcam_masks_test
+}
+
+delta_simple_test()
+{
+	# The first filter will create eRP, the second filter will fit into
+	# the first eRP with delta. Remove the first rule then and check that
+        # the eRP stays (referenced by the second filter).
+
+	RET=0
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	tp_record "objagg:*" "tc filter add dev $h2 ingress protocol ip \
+		   pref 1 handle 101 flower $tcflags dst_ip 192.0.0.0/24 \
+		   action drop"
+	tp_check_hits "objagg:objagg_obj_root_create" 1
+	check_err $? "eRP was not created"
+
+	tp_record "objagg:*" "tc filter add dev $h2 ingress protocol ip \
+		   pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \
+		   action drop"
+	tp_check_hits "objagg:objagg_obj_root_create" 0
+	check_err $? "eRP was incorrectly created"
+	tp_check_hits "objagg:objagg_obj_parent_assign" 1
+	check_err $? "delta was not created"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tp_record "objagg:*" "tc filter del dev $h2 ingress protocol ip \
+		   pref 1 handle 101 flower"
+	tp_check_hits "objagg:objagg_obj_root_destroy" 0
+	check_err $? "eRP was incorrectly destroyed"
+	tp_check_hits "objagg:objagg_obj_parent_unassign" 0
+	check_err $? "delta was incorrectly destroyed"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Did not match on correct filter after the first was removed"
+
+	tp_record "objagg:*" "tc filter del dev $h2 ingress protocol ip \
+		   pref 2 handle 102 flower"
+	tp_check_hits "objagg:objagg_obj_parent_unassign" 1
+	check_err $? "delta was not destroyed"
+	tp_check_hits "objagg:objagg_obj_root_destroy" 1
+	check_err $? "eRP was not destroyed"
+
+	log_test "delta simple test ($tcflags)"
+}
+
+delta_two_masks_one_key_test()
+{
+	# If 2 keys are the same and only differ in mask in a way that
+	# they belong under the same ERP (second is delta of the first),
+	# there should be no C-TCAM spill.
+
+	RET=0
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	tp_record "mlxsw:*" "tc filter add dev $h2 ingress protocol ip \
+		   pref 1 handle 101 flower $tcflags dst_ip 192.0.2.0/24 \
+		   action drop"
+	tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 0
+	check_err $? "incorrect C-TCAM spill while inserting the first rule"
+
+	tp_record "mlxsw:*" "tc filter add dev $h2 ingress protocol ip \
+		   pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \
+		   action drop"
+	tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 0
+	check_err $? "incorrect C-TCAM spill while inserting the second rule"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "delta two masks one key test ($tcflags)"
+}
+
+delta_simple_rehash_test()
+{
+	RET=0
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	devlink dev param set $DEVLINK_DEV \
+		name acl_region_rehash_interval cmode runtime value 0
+	check_err $? "Failed to set ACL region rehash interval"
+
+	tp_record_all mlxsw:mlxsw_sp_acl_tcam_vregion_rehash 7
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+	check_fail $? "Rehash trace was hit even when rehash should be disabled"
+
+	devlink dev param set $DEVLINK_DEV \
+		name acl_region_rehash_interval cmode runtime value 3000
+	check_err $? "Failed to set ACL region rehash interval"
+
+	sleep 1
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.1.0/25 action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.3.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_fail $? "Matched a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tp_record_all mlxsw:* 3
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+	check_err $? "Rehash trace was not hit"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate
+	check_err $? "Migrate trace was not hit"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end
+	check_err $? "Migrate end trace was not hit"
+	tp_record_all mlxsw:* 3
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+	check_err $? "Rehash trace was not hit"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate
+	check_fail $? "Migrate trace was hit when no migration should happen"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end
+	check_fail $? "Migrate end trace was hit when no migration should happen"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched a wrong filter after rehash"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_fail $? "Matched a wrong filter after rehash"
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Did not match on correct filter after rehash"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "delta simple rehash test ($tcflags)"
+}
+
+delta_simple_ipv6_rehash_test()
+{
+	RET=0
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	devlink dev param set $DEVLINK_DEV \
+		name acl_region_rehash_interval cmode runtime value 0
+	check_err $? "Failed to set ACL region rehash interval"
+
+	tp_record_all mlxsw:mlxsw_sp_acl_tcam_vregion_rehash 7
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+	check_fail $? "Rehash trace was hit even when rehash should be disabled"
+
+	devlink dev param set $DEVLINK_DEV \
+		name acl_region_rehash_interval cmode runtime value 3000
+	check_err $? "Failed to set ACL region rehash interval"
+
+	sleep 1
+
+	tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 101 flower \
+		$tcflags dst_ip 2001:db8:1::0/121 action drop
+	tc filter add dev $h2 ingress protocol ipv6 pref 2 handle 102 flower \
+		$tcflags dst_ip 2001:db8:2::2 action drop
+	tc filter add dev $h2 ingress protocol ipv6 pref 3 handle 103 flower \
+		$tcflags dst_ip 2001:db8:3::0/120 action drop
+
+	$MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \
+		-A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_fail $? "Matched a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tp_record_all mlxsw:* 3
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+	check_err $? "Rehash trace was not hit"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate
+	check_err $? "Migrate trace was not hit"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end
+	check_err $? "Migrate end trace was not hit"
+	tp_record_all mlxsw:* 3
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+	check_err $? "Rehash trace was not hit"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate
+	check_fail $? "Migrate trace was hit when no migration should happen"
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_migrate_end
+	check_fail $? "Migrate end trace was hit when no migration should happen"
+
+	$MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \
+		-A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched a wrong filter after rehash"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_fail $? "Matched a wrong filter after rehash"
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Did not match on correct filter after rehash"
+
+	tc filter del dev $h2 ingress protocol ipv6 pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ipv6 pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 101 flower
+
+	log_test "delta simple IPv6 rehash test ($tcflags)"
+}
+
+TEST_RULE_BASE=256
+declare -a test_rules_inserted
+
+test_rule_add()
+{
+	local iface=$1
+	local tcflags=$2
+	local index=$3
+
+	if ! [ ${test_rules_inserted[$index]} ] ; then
+		test_rules_inserted[$index]=false
+	fi
+	if ${test_rules_inserted[$index]} ; then
+		return
+	fi
+
+	local number=$(( $index + $TEST_RULE_BASE ))
+	printf -v hexnumber '%x' $number
+
+	batch="${batch}filter add dev $iface ingress protocol ipv6 pref 1 \
+		handle $number flower $tcflags \
+		src_ip 2001:db8:1::$hexnumber action drop\n"
+	test_rules_inserted[$index]=true
+}
+
+test_rule_del()
+{
+	local iface=$1
+	local index=$2
+
+	if ! [ ${test_rules_inserted[$index]} ] ; then
+		test_rules_inserted[$index]=false
+	fi
+	if ! ${test_rules_inserted[$index]} ; then
+		return
+	fi
+
+	local number=$(( $index + $TEST_RULE_BASE ))
+	printf -v hexnumber '%x' $number
+
+	batch="${batch}filter del dev $iface ingress protocol ipv6 pref 1 \
+		handle $number flower\n"
+	test_rules_inserted[$index]=false
+}
+
+test_rule_add_or_remove()
+{
+	local iface=$1
+	local tcflags=$2
+	local index=$3
+
+	if ! [ ${test_rules_inserted[$index]} ] ; then
+		test_rules_inserted[$index]=false
+	fi
+	if ${test_rules_inserted[$index]} ; then
+		test_rule_del $iface $index
+	else
+		test_rule_add $iface $tcflags $index
+	fi
+}
+
+test_rule_add_or_remove_random_batch()
+{
+	local iface=$1
+	local tcflags=$2
+	local total_count=$3
+	local skip=0
+	local count=0
+	local MAXSKIP=20
+	local MAXCOUNT=20
+
+	for ((i=1;i<=total_count;i++)); do
+		if (( $skip == 0 )) && (($count == 0)); then
+			((skip=$RANDOM % $MAXSKIP + 1))
+			((count=$RANDOM % $MAXCOUNT + 1))
+		fi
+		if (( $skip != 0 )); then
+			((skip-=1))
+		else
+			((count-=1))
+			test_rule_add_or_remove $iface $tcflags $i
+		fi
+	done
+}
+
+delta_massive_ipv6_rehash_test()
+{
+	RET=0
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	devlink dev param set $DEVLINK_DEV \
+		name acl_region_rehash_interval cmode runtime value 0
+	check_err $? "Failed to set ACL region rehash interval"
+
+	tp_record_all mlxsw:mlxsw_sp_acl_tcam_vregion_rehash 7
+	tp_check_hits_any mlxsw:mlxsw_sp_acl_tcam_vregion_rehash
+	check_fail $? "Rehash trace was hit even when rehash should be disabled"
+
+	RANDOM=4432897
+	declare batch=""
+	test_rule_add_or_remove_random_batch $h2 $tcflags 5000
+
+	echo -n -e $batch | tc -b -
+
+	declare batch=""
+	test_rule_add_or_remove_random_batch $h2 $tcflags 5000
+
+	devlink dev param set $DEVLINK_DEV \
+		name acl_region_rehash_interval cmode runtime value 3000
+	check_err $? "Failed to set ACL region rehash interval"
+
+	sleep 1
+
+	tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 101 flower \
+		$tcflags dst_ip 2001:db8:1::0/121 action drop
+	tc filter add dev $h2 ingress protocol ipv6 pref 2 handle 102 flower \
+		$tcflags dst_ip 2001:db8:2::2 action drop
+	tc filter add dev $h2 ingress protocol ipv6 pref 3 handle 103 flower \
+		$tcflags dst_ip 2001:db8:3::0/120 action drop
+
+	$MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \
+		-A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_fail $? "Matched a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	echo -n -e $batch | tc -b -
+
+	devlink dev param set $DEVLINK_DEV \
+		name acl_region_rehash_interval cmode runtime value 0
+	check_err $? "Failed to set ACL region rehash interval"
+
+	$MZ $h1 -6 -c 1 -p 64 -a $h1mac -b $h2mac \
+		-A 2001:db8:2::1 -B 2001:db8:2::2 -t udp -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched a wrong filter after rehash"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_fail $? "Matched a wrong filter after rehash"
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Did not match on correct filter after rehash"
+
+	tc filter del dev $h2 ingress protocol ipv6 pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ipv6 pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 101 flower
+
+	declare batch=""
+	for i in {1..5000}; do
+		test_rule_del $h2 $tcflags $i
+	done
+	echo -e $batch | tc -b -
+
+	log_test "delta massive IPv6 rehash test ($tcflags)"
+}
+
+bloom_simple_test()
+{
+	# Bloom filter requires that the eRP table is used. This test
+	# verifies that Bloom filter is not harming correctness of ACLs.
+	# First, make sure that eRP table is used and then set rule patterns
+	# which are distant enough and will result skipping a lookup after
+	# consulting the Bloom filter. Although some eRP lookups are skipped,
+	# the correct filter should be hit.
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 5 handle 104 flower \
+		$tcflags dst_ip 198.51.100.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.0.0/8 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Two filters - did not match highest priority"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Single filter - did not match"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Low prio filter - did not match"
+
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 198.0.0.0/8 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Two filters - did not match highest priority after add"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 5 handle 104 flower
+
+	log_test "bloom simple test ($tcflags)"
+}
+
+bloom_complex_test()
+{
+	# Bloom filter index computation is affected from region ID, eRP
+	# ID and from the region key size. In order to excercise those parts
+	# of the Bloom filter code, use a series of regions, each with a
+	# different key size and send packet that should hit all of them.
+	local index
+
+	RET=0
+	NUM_CHAINS=4
+	BASE_INDEX=100
+
+	# Create chain with up to 2 key blocks (ip_proto only)
+	tc chain add dev $h2 ingress chain 1 protocol ip flower \
+		ip_proto tcp &> /dev/null
+	# Create chain with 2-4 key blocks (ip_proto, src MAC)
+	tc chain add dev $h2 ingress chain 2 protocol ip flower \
+		ip_proto tcp \
+		src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+	# Create chain with 4-8 key blocks (ip_proto, src & dst MAC, IPv4 dest)
+	tc chain add dev $h2 ingress chain 3 protocol ip flower \
+		ip_proto tcp \
+		dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \
+		src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF \
+		dst_ip 0.0.0.0/32 &> /dev/null
+	# Default chain contains all fields and therefore is 8-12 key blocks
+	tc chain add dev $h2 ingress chain 4
+
+	# We need at least 2 rules in every region to have eRP table active
+	# so create a dummy rule per chain using a different pattern
+	for i in $(eval echo {0..$NUM_CHAINS}); do
+		index=$((BASE_INDEX - 1 - i))
+		tc filter add dev $h2 ingress chain $i protocol ip \
+			pref 2 handle $index flower \
+			$tcflags ip_proto tcp action drop
+	done
+
+	# Add rules to test Bloom filter, each in a different chain
+	index=$BASE_INDEX
+	tc filter add dev $h2 ingress protocol ip \
+		pref 1 handle $((++index)) flower \
+		$tcflags dst_ip 192.0.0.0/16 action goto chain 1
+	tc filter add dev $h2 ingress chain 1 protocol ip \
+		pref 1 handle $((++index)) flower \
+		$tcflags action goto chain 2
+	tc filter add dev $h2 ingress chain 2 protocol ip \
+		pref 1 handle $((++index)) flower \
+		$tcflags src_mac $h1mac action goto chain 3
+	tc filter add dev $h2 ingress chain 3 protocol ip \
+		pref 1 handle $((++index)) flower \
+		$tcflags dst_ip 192.0.0.0/8 action goto chain 4
+	tc filter add dev $h2 ingress chain 4 protocol ip \
+		pref 1 handle $((++index)) flower \
+		$tcflags src_ip 192.0.2.0/24 action drop
+
+	# Send a packet that is supposed to hit all chains
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	for i in $(eval echo {0..$NUM_CHAINS}); do
+		index=$((BASE_INDEX + i + 1))
+		tc_check_packets "dev $h2 ingress" $index 1
+		check_err $? "Did not match chain $i"
+	done
+
+	# Rules cleanup
+	for i in $(eval echo {$NUM_CHAINS..0}); do
+		index=$((BASE_INDEX - i - 1))
+		tc filter del dev $h2 ingress chain $i \
+			pref 2 handle $index flower
+		index=$((BASE_INDEX + i + 1))
+		tc filter del dev $h2 ingress chain $i \
+			pref 1 handle $index flower
+	done
+
+	# Chains cleanup
+	for i in $(eval echo {$NUM_CHAINS..1}); do
+		tc chain del dev $h2 ingress chain $i
+	done
+
+	log_test "bloom complex test ($tcflags)"
+}
+
+
+bloom_delta_test()
+{
+	# When multiple masks are used, the eRP table is activated. When
+	# masks are close enough (delta) the masks reside on the same
+	# eRP table. This test verifies that the eRP table is correctly
+	# allocated and used in delta condition and that Bloom filter is
+	# still functional with delta.
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.1.0.0/16 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.1.2.1 -B 192.1.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Single filter - did not match"
+
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.2.1.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.2.1.1 -B 192.2.1.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Delta filters - did not match second filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "bloom delta test ($tcflags)"
+}
+
+max_erp_entries_test()
+{
+	# The number of eRP entries is limited. Once the maximum number of eRPs
+	# has been reached, filters cannot be added. This test verifies that
+	# when this limit is reached, inserstion fails without crashing.
+
+	RET=0
+
+	local num_masks=32
+	local num_regions=15
+	local chain_failed
+	local mask_failed
+	local ret
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	for ((i=1; i < $num_regions; i++)); do
+		for ((j=$num_masks; j >= 0; j--)); do
+			tc filter add dev $h2 ingress chain $i protocol ip \
+				pref $i	handle $j flower $tcflags \
+				dst_ip 192.1.0.0/$j &> /dev/null
+			ret=$?
+
+			if [ $ret -ne 0 ]; then
+				chain_failed=$i
+				mask_failed=$j
+				break 2
+			fi
+		done
+	done
+
+	# We expect to exceed the maximum number of eRP entries, so that
+	# insertion eventually fails. Otherwise, the test should be adjusted to
+	# add more filters.
+	check_fail $ret "expected to exceed number of eRP entries"
+
+	for ((; i >= 1; i--)); do
+		for ((j=0; j <= $num_masks; j++)); do
+			tc filter del dev $h2 ingress chain $i protocol ip \
+				pref $i handle $j flower &> /dev/null
+		done
+	done
+
+	log_test "max eRP entries test ($tcflags). " \
+		"max chain $chain_failed, mask $mask_failed"
+}
+
+max_group_size_test()
+{
+	# The number of ACLs in an ACL group is limited. Once the maximum
+	# number of ACLs has been reached, filters cannot be added. This test
+	# verifies that when this limit is reached, insertion fails without
+	# crashing.
+
+	RET=0
+
+	local num_acls=32
+	local max_size
+	local ret
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	for ((i=1; i < $num_acls; i++)); do
+		if [[ $(( i % 2 )) == 1 ]]; then
+			tc filter add dev $h2 ingress pref $i proto ipv4 \
+				flower $tcflags dst_ip 198.51.100.1/32 \
+				ip_proto tcp tcp_flags 0x01/0x01 \
+				action drop &> /dev/null
+		else
+			tc filter add dev $h2 ingress pref $i proto ipv6 \
+				flower $tcflags dst_ip 2001:db8:1::1/128 \
+				action drop &> /dev/null
+		fi
+
+		ret=$?
+		[[ $ret -ne 0 ]] && max_size=$((i - 1)) && break
+	done
+
+	# We expect to exceed the maximum number of ACLs in a group, so that
+	# insertion eventually fails. Otherwise, the test should be adjusted to
+	# add more filters.
+	check_fail $ret "expected to exceed number of ACLs in a group"
+
+	for ((; i >= 1; i--)); do
+		if [[ $(( i % 2 )) == 1 ]]; then
+			tc filter del dev $h2 ingress pref $i proto ipv4 \
+				flower $tcflags dst_ip 198.51.100.1/32 \
+				ip_proto tcp tcp_flags 0x01/0x01 \
+				action drop &> /dev/null
+		else
+			tc filter del dev $h2 ingress pref $i proto ipv6 \
+				flower $tcflags dst_ip 2001:db8:1::1/128 \
+				action drop &> /dev/null
+		fi
+	done
+
+	log_test "max ACL group size test ($tcflags). max size $max_size"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+if ! tc_offload_check; then
+	check_err 1 "Could not test offloaded functionality"
+	log_test "mlxsw-specific tests for tc flower"
+	exit
+else
+	tcflags="skip_sw"
+	tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
new file mode 100644
index 000000000..efd798a85
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_flower_scale.sh
+
+tc_flower_get_target()
+{
+	local should_fail=$1; shift
+
+	# The driver associates a counter with each tc filter, which means the
+	# number of supported filters is bounded by the number of available
+	# counters.
+	# Currently, the driver supports 30K (30,720) flow counters and six of
+	# these are used for multicast routing.
+	local target=30714
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh
new file mode 100644
index 000000000..e79ac0dad
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_police_scale.sh
+
+tc_police_get_target()
+{
+	local should_fail=$1; shift
+	local target
+
+	target=$(devlink_resource_size_get global_policers single_rate_policers)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
new file mode 100644
index 000000000..73035e250
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source "../../../../net/forwarding/devlink_lib.sh"
+
+if [ "$DEVLINK_VIDDID" != "15b3:cb84" ]; then
+	echo "SKIP: test is tailored for Mellanox Spectrum"
+	exit 1
+fi
+
+# Needed for returning to default
+declare -A KVD_DEFAULTS
+
+KVD_CHILDREN="linear hash_single hash_double"
+KVDL_CHILDREN="singles chunks large_chunks"
+
+devlink_sp_resource_minimize()
+{
+	local size
+	local i
+
+	for i in $KVD_CHILDREN; do
+		size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]')
+		devlink_resource_size_set "$size" kvd "$i"
+	done
+
+	for i in $KVDL_CHILDREN; do
+		size=$(devlink_resource_get kvd linear "$i" | \
+		       jq '.["size_min"]')
+		devlink_resource_size_set "$size" kvd linear "$i"
+	done
+}
+
+devlink_sp_size_kvd_to_default()
+{
+	local need_reload=0
+	local i
+
+	for i in $KVD_CHILDREN; do
+		local size=$(echo "${KVD_DEFAULTS[kvd_$i]}" | jq '.["size"]')
+		current_size=$(devlink_resource_size_get kvd "$i")
+
+		if [ "$size" -ne "$current_size" ]; then
+			devlink_resource_size_set "$size" kvd "$i"
+			need_reload=1
+		fi
+	done
+
+	for i in $KVDL_CHILDREN; do
+		local size=$(echo "${KVD_DEFAULTS[kvd_linear_$i]}" | \
+			     jq '.["size"]')
+		current_size=$(devlink_resource_size_get kvd linear "$i")
+
+		if [ "$size" -ne "$current_size" ]; then
+			devlink_resource_size_set "$size" kvd linear "$i"
+			need_reload=1
+		fi
+	done
+
+	if [ "$need_reload" -ne "0" ]; then
+		devlink_reload
+	fi
+}
+
+devlink_sp_read_kvd_defaults()
+{
+	local key
+	local i
+
+	KVD_DEFAULTS[kvd]=$(devlink_resource_get "kvd")
+	for i in $KVD_CHILDREN; do
+		key=kvd_$i
+		KVD_DEFAULTS[$key]=$(devlink_resource_get kvd "$i")
+	done
+
+	for i in $KVDL_CHILDREN; do
+		key=kvd_linear_$i
+		KVD_DEFAULTS[$key]=$(devlink_resource_get kvd linear "$i")
+	done
+}
+
+KVD_PROFILES="default scale ipv4_max"
+
+devlink_sp_resource_kvd_profile_set()
+{
+	local profile=$1
+
+	case "$profile" in
+	scale)
+		devlink_resource_size_set 64000 kvd linear
+		devlink_resource_size_set 15616 kvd linear singles
+		devlink_resource_size_set 32000 kvd linear chunks
+		devlink_resource_size_set 16384 kvd linear large_chunks
+		devlink_resource_size_set 128000 kvd hash_single
+		devlink_resource_size_set 48000 kvd hash_double
+		devlink_reload
+		;;
+	ipv4_max)
+		devlink_resource_size_set 64000 kvd linear
+		devlink_resource_size_set 15616 kvd linear singles
+		devlink_resource_size_set 32000 kvd linear chunks
+		devlink_resource_size_set 16384 kvd linear large_chunks
+		devlink_resource_size_set 144000 kvd hash_single
+		devlink_resource_size_set 32768 kvd hash_double
+		devlink_reload
+		;;
+	default)
+		devlink_resource_size_set 98304 kvd linear
+		devlink_resource_size_set 16384 kvd linear singles
+		devlink_resource_size_set 49152 kvd linear chunks
+		devlink_resource_size_set 32768 kvd linear large_chunks
+		devlink_resource_size_set 87040 kvd hash_single
+		devlink_resource_size_set 60416 kvd hash_double
+		devlink_reload
+		;;
+	*)
+		check_err 1 "Unknown profile $profile"
+	esac
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
new file mode 100755
index 000000000..6f2683cbc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+NUM_NETIFS=1
+source $lib_dir/lib.sh
+source devlink_lib_spectrum.sh
+
+setup_prepare()
+{
+	devlink_sp_read_kvd_defaults
+}
+
+cleanup()
+{
+	pre_cleanup
+	devlink_sp_size_kvd_to_default
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+profiles_test()
+{
+	local i
+
+	log_info "Running profile tests"
+
+	for i in $KVD_PROFILES; do
+		RET=0
+		devlink_sp_resource_kvd_profile_set $i
+		log_test "'$i' profile"
+	done
+
+	# Default is explicitly tested at end to ensure it's actually applied
+	RET=0
+	devlink_sp_resource_kvd_profile_set "default"
+	log_test "'default' profile"
+}
+
+resources_min_test()
+{
+	local size
+	local i
+	local j
+
+	log_info "Running KVD-minimum tests"
+
+	for i in $KVD_CHILDREN; do
+		RET=0
+		size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]')
+		devlink_resource_size_set "$size" kvd "$i"
+
+		# In case of linear, need to minimize sub-resources as well
+		if [[ "$i" == "linear" ]]; then
+			for j in $KVDL_CHILDREN; do
+				devlink_resource_size_set 0 kvd linear "$j"
+			done
+		fi
+
+		devlink_reload
+		devlink_sp_size_kvd_to_default
+		log_test "'$i' minimize [$size]"
+	done
+}
+
+resources_max_test()
+{
+	local min_size
+	local size
+	local i
+	local j
+
+	log_info "Running KVD-maximum tests"
+	for i in $KVD_CHILDREN; do
+		RET=0
+		devlink_sp_resource_minimize
+
+		# Calculate the maximum possible size for the given partition
+		size=$(devlink_resource_size_get kvd)
+		for j in $KVD_CHILDREN; do
+			if [ "$i" != "$j" ]; then
+				min_size=$(devlink_resource_get kvd "$j" | \
+					   jq '.["size_min"]')
+				size=$((size - min_size))
+			fi
+		done
+
+		# Test almost maximum size
+		devlink_resource_size_set "$((size - 128))" kvd "$i"
+		devlink_reload
+		log_test "'$i' almost maximize [$((size - 128))]"
+
+		# Test above maximum size
+		devlink resource set "$DEVLINK_DEV" \
+			path "kvd/$i" size $((size + 128)) &> /dev/null
+		check_fail $? "Set kvd/$i to size $((size + 128)) should fail"
+		log_test "'$i' Overflow rejection [$((size + 128))]"
+
+		# Test maximum size
+		if [ "$i" == "hash_single" ] || [ "$i" == "hash_double" ]; then
+			echo "SKIP: Observed problem with exact max $i"
+			continue
+		fi
+
+		devlink_resource_size_set "$size" kvd "$i"
+		devlink_reload
+		log_test "'$i' maximize [$size]"
+
+		devlink_sp_size_kvd_to_default
+	done
+}
+
+profiles_test
+resources_min_test
+resources_max_test
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
new file mode 100644
index 000000000..f7c168dec
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../mirror_gre_scale.sh
+
+mirror_gre_get_target()
+{
+	local should_fail=$1; shift
+	local target
+
+	target=$(devlink_resource_size_get span_agents)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
new file mode 100755
index 000000000..43f662401
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source devlink_lib_spectrum.sh
+
+current_test=""
+
+cleanup()
+{
+	pre_cleanup
+	if [ ! -z $current_test ]; then
+		${current_test}_cleanup
+	fi
+	devlink_sp_size_kvd_to_default
+}
+
+devlink_sp_read_kvd_defaults
+trap cleanup EXIT
+
+ALL_TESTS="router tc_flower mirror_gre tc_police"
+for current_test in ${TESTS:-$ALL_TESTS}; do
+	source ${current_test}_scale.sh
+
+	num_netifs_var=${current_test^^}_NUM_NETIFS
+	num_netifs=${!num_netifs_var:-$NUM_NETIFS}
+
+	for profile in $KVD_PROFILES; do
+		RET=0
+		devlink_sp_resource_kvd_profile_set $profile
+		if [[ $RET -gt 0 ]]; then
+			log_test "'$current_test' [$profile] setting"
+			continue
+		fi
+
+		for should_fail in 0 1; do
+			RET=0
+			target=$(${current_test}_get_target "$should_fail")
+			${current_test}_setup_prepare
+			setup_wait $num_netifs
+			${current_test}_test "$target" "$should_fail"
+			${current_test}_cleanup
+			if [[ "$should_fail" -eq 0 ]]; then
+				log_test "'$current_test' [$profile] $target"
+			else
+				log_test "'$current_test' [$profile] overflow $target"
+			fi
+		done
+	done
+done
+current_test=""
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh
new file mode 100644
index 000000000..21c4697d5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../router_scale.sh
+
+router_get_target()
+{
+	local should_fail=$1
+	local target
+
+	target=$(devlink_resource_size_get kvd hash_single)
+
+	if [[ $should_fail -eq 0 ]]; then
+		target=$((target * 85 / 100))
+	else
+		target=$((target + 1))
+	fi
+
+	echo $target
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh
new file mode 100644
index 000000000..f9bfd8937
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_flower_scale.sh
+
+tc_flower_get_target()
+{
+	local should_fail=$1; shift
+
+	# 6144 (6x1024) is the theoretical maximum.
+	# One bank of 512 rules is taken by the 18-byte MC router rule.
+	# One rule is the ACL catch-all.
+	# 6144 - 512 - 1 = 5631
+	local target=5631
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh
new file mode 100644
index 000000000..e79ac0dad
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_police_scale.sh
+
+tc_police_get_target()
+{
+	local should_fail=$1; shift
+	local target
+
+	target=$(devlink_resource_size_get global_policers single_rate_policers)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh
new file mode 100755
index 000000000..20ed98fe5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	default_hw_stats_test
+	immediate_hw_stats_test
+	delayed_hw_stats_test
+	disabled_hw_stats_test
+"
+NUM_NETIFS=2
+
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/24
+	tc qdisc add dev $swp1 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1 192.0.2.2/24
+}
+
+hw_stats_test()
+{
+	RET=0
+
+	local name=$1
+	local action_hw_stats=$2
+	local occ_delta=$3
+	local expected_packet_count=$4
+
+	local orig_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]')
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop $action_hw_stats
+	check_err $? "Failed to add rule with $name hw_stats"
+
+	local new_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]')
+	local expected_occ=$((orig_occ + occ_delta))
+	[ "$new_occ" == "$expected_occ" ]
+	check_err $? "Expected occupancy of $expected_occ, got $new_occ"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $swp1mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 101 $expected_packet_count
+	check_err $? "Did not match incoming packet"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "$name hw_stats"
+}
+
+default_hw_stats_test()
+{
+	hw_stats_test "default" "" 2 1
+}
+
+immediate_hw_stats_test()
+{
+	hw_stats_test "immediate" "hw_stats immediate" 2 1
+}
+
+delayed_hw_stats_test()
+{
+	RET=0
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop hw_stats delayed
+	check_fail $? "Unexpected success in adding rule with delayed hw_stats"
+
+	log_test "delayed hw_stats"
+}
+
+disabled_hw_stats_test()
+{
+	hw_stats_test "disabled" "hw_stats disabled" 0 0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	h1mac=$(mac_get $h1)
+	swp1mac=$(mac_get $swp1)
+
+	vrf_prepare
+
+	h1_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+check_tc_action_hw_stats_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
new file mode 100644
index 000000000..aa74be9f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for resource limit of offloaded flower rules. The test adds a given
+# number of flower matches for different IPv6 addresses, then check the offload
+# indication for all of the tc flower rules. This file contains functions to set
+# up a testing topology and run the test, and is meant to be sourced from a test
+# script that calls the testing routine with a given number of rules.
+
+TC_FLOWER_NUM_NETIFS=2
+
+tc_flower_h1_create()
+{
+	simple_if_init $h1
+	tc qdisc add dev $h1 clsact
+}
+
+tc_flower_h1_destroy()
+{
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1
+}
+
+tc_flower_h2_create()
+{
+	simple_if_init $h2
+	tc qdisc add dev $h2 clsact
+}
+
+tc_flower_h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2
+}
+
+tc_flower_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	vrf_prepare
+
+	tc_flower_h1_create
+	tc_flower_h2_create
+}
+
+tc_flower_cleanup()
+{
+	pre_cleanup
+
+	tc_flower_h2_destroy
+	tc_flower_h1_destroy
+
+	vrf_cleanup
+
+	if [[ -v TC_FLOWER_BATCH_FILE ]]; then
+		rm -f $TC_FLOWER_BATCH_FILE
+	fi
+}
+
+tc_flower_addr()
+{
+	local num=$1; shift
+
+	printf "2001:db8:1::%x" $num
+}
+
+tc_flower_rules_create()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	TC_FLOWER_BATCH_FILE="$(mktemp)"
+
+	for ((i = 0; i < count; ++i)); do
+		cat >> $TC_FLOWER_BATCH_FILE <<-EOF
+			filter add dev $h2 ingress \
+				prot ipv6 \
+				pref 1000 \
+				flower $tcflags dst_ip $(tc_flower_addr $i) \
+				action drop
+		EOF
+	done
+
+	tc -b $TC_FLOWER_BATCH_FILE
+	check_err_fail $should_fail $? "Rule insertion"
+}
+
+__tc_flower_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+	local last=$((count - 1))
+
+	tc_flower_rules_create $count $should_fail
+
+	offload_count=$(tc -j -s filter show dev $h2 ingress    |
+			jq -r '[ .[] | select(.kind == "flower") |
+			.options | .in_hw ]' | jq .[] | wc -l)
+	[[ $((offload_count - 1)) -eq $count ]]
+	check_err_fail $should_fail $? "Attempt to offload $count rules (actual result $((offload_count - 1)))"
+}
+
+tc_flower_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	# We use lower 16 bits of IPv6 address for match. Also there are only 16
+	# bits of rule priority space.
+	if ((count > 65536)); then
+		check_err 1 "Invalid count of $count. At most 65536 rules supported"
+		return
+	fi
+
+	if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
+		check_err 1 "Could not test offloaded functionality"
+		return
+	fi
+
+	tcflags="skip_sw"
+	__tc_flower_test $count $should_fail
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh
new file mode 100755
index 000000000..448b75c15
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that policers shared by different tc filters are correctly reference
+# counted by observing policers' occupancy via devlink-resource.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	tc_police_occ_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+switch_create()
+{
+	simple_if_init $swp1
+	tc qdisc add dev $swp1 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	vrf_prepare
+
+	h1_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+tc_police_occ_get()
+{
+	devlink_resource_occ_get global_policers single_rate_policers
+}
+
+tc_police_occ_test()
+{
+	RET=0
+
+	local occ=$(tc_police_occ_get)
+
+	tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+		flower skip_sw \
+		action police rate 100mbit burst 100k conform-exceed drop/ok
+	(( occ + 1 == $(tc_police_occ_get) ))
+	check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+	tc filter del dev $swp1 ingress pref 1 handle 101 flower
+	(( occ == $(tc_police_occ_get) ))
+	check_err $? "Got occupancy $(tc_police_occ_get), expected $occ"
+
+	tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+		flower skip_sw \
+		action police rate 100mbit burst 100k conform-exceed drop/ok \
+		index 10
+	tc filter add dev $swp1 ingress pref 2 handle 102 proto ip \
+		flower skip_sw action police index 10
+
+	(( occ + 1 == $(tc_police_occ_get) ))
+	check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+	tc filter del dev $swp1 ingress pref 2 handle 102 flower
+	(( occ + 1 == $(tc_police_occ_get) ))
+	check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+	tc filter del dev $swp1 ingress pref 1 handle 101 flower
+	(( occ == $(tc_police_occ_get) ))
+	check_err $? "Got occupancy $(tc_police_occ_get), expected $occ"
+
+	log_test "tc police occupancy"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
new file mode 100644
index 000000000..86e787895
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TC_POLICE_NUM_NETIFS=2
+
+tc_police_h1_create()
+{
+	simple_if_init $h1
+}
+
+tc_police_h1_destroy()
+{
+	simple_if_fini $h1
+}
+
+tc_police_switch_create()
+{
+	simple_if_init $swp1
+	tc qdisc add dev $swp1 clsact
+}
+
+tc_police_switch_destroy()
+{
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1
+}
+
+tc_police_addr()
+{
+       local num=$1; shift
+
+       printf "2001:db8:1::%x" $num
+}
+
+tc_police_rules_create()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	TC_POLICE_BATCH_FILE="$(mktemp)"
+
+	for ((i = 0; i < count; ++i)); do
+		cat >> $TC_POLICE_BATCH_FILE <<-EOF
+			filter add dev $swp1 ingress \
+				prot ipv6 \
+				pref 1000 \
+				flower skip_sw dst_ip $(tc_police_addr $i) \
+				action police rate 10mbit burst 100k \
+				conform-exceed drop/ok
+		EOF
+	done
+
+	tc -b $TC_POLICE_BATCH_FILE
+	check_err_fail $should_fail $? "Rule insertion"
+}
+
+__tc_police_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	tc_police_rules_create $count $should_fail
+
+	offload_count=$(tc -j filter show dev $swp1 ingress |
+			jq "[.[] | select(.options.in_hw == true)] | length")
+	((offload_count == count))
+	check_err_fail $should_fail $? "tc police offload count"
+}
+
+tc_police_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	if ! tc_offload_check $TC_POLICE_NUM_NETIFS; then
+		check_err 1 "Could not test offloaded functionality"
+		return
+	fi
+
+	__tc_police_test $count $should_fail
+}
+
+tc_police_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	vrf_prepare
+
+	tc_police_h1_create
+	tc_police_switch_create
+}
+
+tc_police_cleanup()
+{
+	pre_cleanup
+
+	tc_police_switch_destroy
+	tc_police_h1_destroy
+
+	vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
new file mode 100755
index 000000000..553cb9fad
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -0,0 +1,394 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	shared_block_drop_test
+	egress_redirect_test
+	multi_mirror_test
+	matchall_sample_egress_test
+	matchall_mirror_behind_flower_ingress_test
+	matchall_sample_behind_flower_ingress_test
+	matchall_mirror_behind_flower_egress_test
+	police_limits_test
+	multi_police_test
+"
+NUM_NETIFS=2
+
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.1/24
+	simple_if_init $swp2 192.0.2.2/24
+}
+
+switch_destroy()
+{
+	simple_if_fini $swp2 192.0.2.2/24
+	simple_if_fini $swp1 192.0.2.1/24
+}
+
+shared_block_drop_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have mixed-bound
+	# shared block with a drop rule.
+
+	tc qdisc add dev $swp1 ingress_block 22 clsact
+	check_err $? "Failed to create clsact with ingress block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add drop rule to ingress bound block"
+
+	tc qdisc add dev $swp2 ingress_block 22 clsact
+	check_err $? "Failed to create another clsact with ingress shared block"
+
+	tc qdisc del dev $swp2 clsact
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_fail $? "Incorrect success to create another clsact with egress shared block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block after blocker drop rule removed"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_fail $? "Incorrect success to add drop rule to mixed bound block"
+
+	tc qdisc del dev $swp1 clsact
+
+	tc qdisc add dev $swp1 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add drop rule to egress bound shared block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	log_test "shared block drop"
+}
+
+egress_redirect_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have mirred redirect on
+	# egress-bound block.
+
+	tc qdisc add dev $swp1 ingress_block 22 clsact
+	check_err $? "Failed to create clsact with ingress block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_err $? "Failed to add redirect rule to ingress bound block"
+
+	tc qdisc add dev $swp2 ingress_block 22 clsact
+	check_err $? "Failed to create another clsact with ingress shared block"
+
+	tc qdisc del dev $swp2 clsact
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_fail $? "Incorrect success to create another clsact with egress shared block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block after blocker redirect rule removed"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_fail $? "Incorrect success to add redirect rule to mixed bound block"
+
+	tc qdisc del dev $swp1 clsact
+
+	tc qdisc add dev $swp1 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_fail $? "Incorrect success to add redirect rule to egress bound shared block"
+
+	tc qdisc del dev $swp2 clsact
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_fail $? "Incorrect success to add redirect rule to egress bound block"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "shared block drop"
+}
+
+multi_mirror_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have multiple mirror
+	# actions in a single rule.
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress mirror dev $swp2
+	check_err $? "Failed to add rule with single mirror action"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress mirror dev $swp2 \
+		action mirred egress mirror dev $swp1
+	check_fail $? "Incorrect success to add rule with two mirror actions"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "multi mirror"
+}
+
+matchall_sample_egress_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have matchall with sample action
+	# bound on egress
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress protocol all pref 1 handle 101 \
+		matchall skip_sw action sample rate 100 group 1
+	check_err $? "Failed to add rule with sample action on ingress"
+
+	tc filter del dev $swp1 ingress protocol all pref 1 handle 101 matchall
+
+	tc filter add dev $swp1 egress protocol all pref 1 handle 101 \
+		matchall skip_sw action sample rate 100 group 1
+	check_fail $? "Incorrect success to add rule with sample action on egress"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "matchall sample egress"
+}
+
+matchall_behind_flower_ingress_test()
+{
+	local action=$1
+	local action_args=$2
+
+	RET=0
+
+	# On ingress, all matchall-mirror and matchall-sample
+	# rules have to be in front of the flower rules
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+
+	tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \
+		matchall skip_sw action $action_args
+	check_err $? "Failed to add matchall rule in front of a flower rule"
+
+	tc filter del dev $swp1 ingress protocol all pref 9 handle 102 matchall
+
+	tc filter add dev $swp1 ingress protocol all pref 11 handle 102 \
+		matchall skip_sw action $action_args
+	check_fail $? "Incorrect success to add matchall rule behind a flower rule"
+
+	tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \
+		matchall skip_sw action $action_args
+
+	tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add flower rule behind a matchall rule"
+
+	tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 ingress protocol ip pref 8 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_fail $? "Incorrect success to add flower rule in front of a matchall rule"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "matchall $action flower ingress"
+}
+
+matchall_mirror_behind_flower_ingress_test()
+{
+	matchall_behind_flower_ingress_test "mirror" "mirred egress mirror dev $swp2"
+}
+
+matchall_sample_behind_flower_ingress_test()
+{
+	matchall_behind_flower_ingress_test "sample" "sample rate 100 group 1"
+}
+
+matchall_behind_flower_egress_test()
+{
+	local action=$1
+	local action_args=$2
+
+	RET=0
+
+	# On egress, all matchall-mirror rules have to be behind the flower rules
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+
+	tc filter add dev $swp1 egress protocol all pref 11 handle 102 \
+		matchall skip_sw action $action_args
+	check_err $? "Failed to add matchall rule in front of a flower rule"
+
+	tc filter del dev $swp1 egress protocol all pref 11 handle 102 matchall
+
+	tc filter add dev $swp1 egress protocol all pref 9 handle 102 \
+		matchall skip_sw action $action_args
+	check_fail $? "Incorrect success to add matchall rule behind a flower rule"
+
+	tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 egress protocol all pref 11 handle 102 \
+		matchall skip_sw action $action_args
+
+	tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add flower rule behind a matchall rule"
+
+	tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 egress protocol ip pref 12 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_fail $? "Incorrect success to add flower rule in front of a matchall rule"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "matchall $action flower egress"
+}
+
+matchall_mirror_behind_flower_egress_test()
+{
+	matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2"
+}
+
+police_limits_test()
+{
+	RET=0
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 0.5kbit burst 1m conform-exceed drop/ok
+	check_fail $? "Incorrect success to add police action with too low rate"
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 2.5tbit burst 1g conform-exceed drop/ok
+	check_fail $? "Incorrect success to add police action with too high rate"
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 1.5kbit burst 1m conform-exceed drop/ok
+	check_err $? "Failed to add police action with low rate"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 1.9tbit burst 1g conform-exceed drop/ok
+	check_err $? "Failed to add police action with high rate"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 1.5kbit burst 512b conform-exceed drop/ok
+	check_fail $? "Incorrect success to add police action with too low burst size"
+
+	tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+		flower skip_sw \
+		action police rate 1.5kbit burst 2k conform-exceed drop/ok
+	check_err $? "Failed to add police action with low burst size"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "police rate and burst limits"
+}
+
+multi_police_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have multiple police
+	# actions in a single rule.
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+		flower skip_sw \
+		action police rate 100mbit burst 100k conform-exceed drop/ok
+	check_err $? "Failed to add rule with single police action"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+		flower skip_sw \
+		action police rate 100mbit burst 100k conform-exceed drop/pipe \
+		action police rate 200mbit burst 200k conform-exceed drop/ok
+	check_fail $? "Incorrect success to add rule with two police actions"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "multi police"
+}
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	vrf_prepare
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	vrf_cleanup
+}
+
+check_tc_shblock_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
new file mode 100755
index 000000000..729a86cc4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
@@ -0,0 +1,1156 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test various aspects of VxLAN offloading which are specific to mlxsw, such
+# as sanitization of invalid configurations and offload indication.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="sanitization_test offload_indication_test \
+	sanitization_vlan_aware_test offload_indication_vlan_aware_test"
+NUM_NETIFS=2
+: ${TIMEOUT:=20000} # ms
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+}
+
+sanitization_single_dev_test_pass()
+{
+	ip link set dev $swp1 master br0
+	check_err $?
+	ip link set dev vxlan0 master br0
+	check_err $?
+
+	ip link set dev $swp1 nomaster
+
+	ip link set dev $swp1 master br0
+	check_err $?
+}
+
+sanitization_single_dev_test_fail()
+{
+	ip link set dev $swp1 master br0
+	check_err $?
+	ip link set dev vxlan0 master br0 &> /dev/null
+	check_fail $?
+
+	ip link set dev $swp1 nomaster
+
+	ip link set dev vxlan0 master br0
+	check_err $?
+	ip link set dev $swp1 master br0 &> /dev/null
+	check_fail $?
+}
+
+sanitization_single_dev_valid_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	sanitization_single_dev_test_pass
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device - valid configuration"
+}
+
+sanitization_single_dev_vlan_aware_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	sanitization_single_dev_test_pass
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with a vlan-aware bridge"
+}
+
+sanitization_single_dev_mcast_enabled_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with a multicast enabled bridge"
+}
+
+sanitization_single_dev_mcast_group_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link add name dummy1 up type dummy
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789 \
+		dev dummy1 group 239.0.0.1
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev dummy1
+	ip link del dev br0
+
+	log_test "vxlan device with a multicast group"
+}
+
+sanitization_single_dev_no_local_ip_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit dstport 4789
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with no local ip"
+}
+
+sanitization_single_dev_local_ipv6_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 2001:db8::1 dstport 4789
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with local ipv6 address"
+}
+
+sanitization_single_dev_learning_enabled_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 learning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	sanitization_single_dev_test_pass
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with learning enabled"
+}
+
+sanitization_single_dev_local_interface_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link add name dummy1 up type dummy
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev dummy1
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev dummy1
+	ip link del dev br0
+
+	log_test "vxlan device with local interface"
+}
+
+sanitization_single_dev_port_range_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789 \
+		srcport 4000 5000
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with udp source port range"
+}
+
+sanitization_single_dev_tos_static_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos 20 local 198.51.100.1 dstport 4789
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with static tos"
+}
+
+sanitization_single_dev_ttl_inherit_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl inherit tos inherit local 198.51.100.1 dstport 4789
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with inherit ttl"
+}
+
+sanitization_single_dev_udp_checksum_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning udpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	sanitization_single_dev_test_fail
+
+	ip link del dev vxlan0
+	ip link del dev br0
+
+	log_test "vxlan device with udp checksum"
+}
+
+sanitization_single_dev_test()
+{
+	# These tests make sure that we correctly sanitize VxLAN device
+	# configurations we do not support
+	sanitization_single_dev_valid_test
+	sanitization_single_dev_vlan_aware_test
+	sanitization_single_dev_mcast_enabled_test
+	sanitization_single_dev_mcast_group_test
+	sanitization_single_dev_no_local_ip_test
+	sanitization_single_dev_local_ipv6_test
+	sanitization_single_dev_learning_enabled_test
+	sanitization_single_dev_local_interface_test
+	sanitization_single_dev_port_range_test
+	sanitization_single_dev_tos_static_test
+	sanitization_single_dev_ttl_inherit_test
+	sanitization_single_dev_udp_checksum_test
+}
+
+sanitization_multi_devs_test_pass()
+{
+	ip link set dev $swp1 master br0
+	check_err $?
+	ip link set dev vxlan0 master br0
+	check_err $?
+	ip link set dev $swp2 master br1
+	check_err $?
+	ip link set dev vxlan1 master br1
+	check_err $?
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 nomaster
+
+	ip link set dev $swp1 master br0
+	check_err $?
+	ip link set dev $swp2 master br1
+	check_err $?
+}
+
+sanitization_multi_devs_test_fail()
+{
+	ip link set dev $swp1 master br0
+	check_err $?
+	ip link set dev vxlan0 master br0
+	check_err $?
+	ip link set dev $swp2 master br1
+	check_err $?
+	ip link set dev vxlan1 master br1 &> /dev/null
+	check_fail $?
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 nomaster
+
+	ip link set dev vxlan1 master br1
+	check_err $?
+	ip link set dev $swp1 master br0
+	check_err $?
+	ip link set dev $swp2 master br1 &> /dev/null
+	check_fail $?
+}
+
+sanitization_multi_devs_valid_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link add dev br1 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+	ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	sanitization_multi_devs_test_pass
+
+	ip link del dev vxlan1
+	ip link del dev vxlan0
+	ip link del dev br1
+	ip link del dev br0
+
+	log_test "multiple vxlan devices - valid configuration"
+}
+
+sanitization_multi_devs_ttl_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link add dev br1 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+	ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
+		ttl 40 tos inherit local 198.51.100.1 dstport 4789
+
+	sanitization_multi_devs_test_fail
+
+	ip link del dev vxlan1
+	ip link del dev vxlan0
+	ip link del dev br1
+	ip link del dev br0
+
+	log_test "multiple vxlan devices with different ttl"
+}
+
+sanitization_multi_devs_udp_dstport_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link add dev br1 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+	ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 5789
+
+	sanitization_multi_devs_test_fail
+
+	ip link del dev vxlan1
+	ip link del dev vxlan0
+	ip link del dev br1
+	ip link del dev br0
+
+	log_test "multiple vxlan devices with different udp destination port"
+}
+
+sanitization_multi_devs_local_ip_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link add dev br1 type bridge mcast_snooping 0
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+	ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.2 dstport 4789
+
+	sanitization_multi_devs_test_fail
+
+	ip link del dev vxlan1
+	ip link del dev vxlan0
+	ip link del dev br1
+	ip link del dev br0
+
+	log_test "multiple vxlan devices with different local ip"
+}
+
+sanitization_multi_devs_test()
+{
+	# The device has a single VTEP, which means all the VxLAN devices
+	# we offload must share certain properties such as source IP and
+	# UDP destination port. These tests make sure that we forbid
+	# configurations that violate this limitation
+	sanitization_multi_devs_valid_test
+	sanitization_multi_devs_ttl_test
+	sanitization_multi_devs_udp_dstport_test
+	sanitization_multi_devs_local_ip_test
+}
+
+sanitization_test()
+{
+	sanitization_single_dev_test
+	sanitization_multi_devs_test
+}
+
+offload_indication_setup_create()
+{
+	# Create a simple setup with two bridges, each with a VxLAN device
+	# and one local port
+	ip link add name br0 up type bridge mcast_snooping 0
+	ip link add name br1 up type bridge mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br1
+
+	ip address add 198.51.100.1/32 dev lo
+
+	ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \
+		noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+	ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \
+		noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+}
+
+offload_indication_setup_destroy()
+{
+	ip link del dev vxlan1
+	ip link del dev vxlan0
+
+	ip address del 198.51.100.1/32 dev lo
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 nomaster
+
+	ip link del dev br1
+	ip link del dev br0
+}
+
+offload_indication_fdb_flood_test()
+{
+	RET=0
+
+	bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2
+
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb 00:00:00:00:00:00 \
+		bridge fdb show brport vxlan0
+	check_err $?
+
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self
+
+	log_test "vxlan flood entry offload indication"
+}
+
+offload_indication_fdb_bridge_test()
+{
+	RET=0
+
+	bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \
+		dst 198.51.100.2
+
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
+	check_err $?
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
+	check_err $?
+
+	log_test "vxlan entry offload indication - initial state"
+
+	# Remove FDB entry from the bridge driver and check that corresponding
+	# entry in the VxLAN driver is not marked as offloaded
+	RET=0
+
+	bridge fdb del de:ad:be:ef:13:37 dev vxlan0 master
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
+	check_err $?
+
+	log_test "vxlan entry offload indication - after removal from bridge"
+
+	# Add the FDB entry back to the bridge driver and make sure it is
+	# marked as offloaded in both drivers
+	RET=0
+
+	bridge fdb add de:ad:be:ef:13:37 dev vxlan0 master static
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
+	check_err $?
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
+	check_err $?
+
+	log_test "vxlan entry offload indication - after re-add to bridge"
+
+	# Remove FDB entry from the VxLAN driver and check that corresponding
+	# entry in the bridge driver is not marked as offloaded
+	RET=0
+
+	bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
+	check_err $?
+
+	log_test "vxlan entry offload indication - after removal from vxlan"
+
+	# Add the FDB entry back to the VxLAN driver and make sure it is
+	# marked as offloaded in both drivers
+	RET=0
+
+	bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
+	check_err $?
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
+	check_err $?
+
+	log_test "vxlan entry offload indication - after re-add to vxlan"
+
+	bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self master
+}
+
+offload_indication_fdb_test()
+{
+	offload_indication_fdb_flood_test
+	offload_indication_fdb_bridge_test
+}
+
+offload_indication_decap_route_test()
+{
+	RET=0
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	ip link set dev vxlan0 down
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	ip link set dev vxlan1 down
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	log_test "vxlan decap route - vxlan device down"
+
+	RET=0
+
+	ip link set dev vxlan1 up
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	ip link set dev vxlan0 up
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	log_test "vxlan decap route - vxlan device up"
+
+	RET=0
+
+	ip address delete 198.51.100.1/32 dev lo
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	ip address add 198.51.100.1/32 dev lo
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	log_test "vxlan decap route - add local route"
+
+	RET=0
+
+	ip link set dev $swp1 nomaster
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	ip link set dev $swp2 nomaster
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	log_test "vxlan decap route - local ports enslavement"
+
+	RET=0
+
+	ip link del dev br0
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	ip link del dev br1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	log_test "vxlan decap route - bridge device deletion"
+
+	RET=0
+
+	ip link add name br0 up type bridge mcast_snooping 0
+	ip link add name br1 up type bridge mcast_snooping 0
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br1
+	ip link set dev vxlan0 master br0
+	ip link set dev vxlan1 master br1
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	ip link del dev vxlan0
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	ip link del dev vxlan1
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	log_test "vxlan decap route - vxlan device deletion"
+
+	ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \
+		noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+	ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \
+		noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+}
+
+check_fdb_offloaded()
+{
+	local mac=00:11:22:33:44:55
+	local zmac=00:00:00:00:00:00
+
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac self \
+		bridge fdb show dev vxlan0
+	check_err $?
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac master \
+		bridge fdb show dev vxlan0
+	check_err $?
+
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show dev vxlan0
+	check_err $?
+}
+
+check_vxlan_fdb_not_offloaded()
+{
+	local mac=00:11:22:33:44:55
+	local zmac=00:00:00:00:00:00
+
+	bridge fdb show dev vxlan0 | grep $mac | grep -q self
+	check_err $?
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac self \
+		bridge fdb show dev vxlan0
+	check_err $?
+
+	bridge fdb show dev vxlan0 | grep $zmac | grep -q self
+	check_err $?
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show dev vxlan0
+	check_err $?
+}
+
+check_bridge_fdb_not_offloaded()
+{
+	local mac=00:11:22:33:44:55
+	local zmac=00:00:00:00:00:00
+
+	bridge fdb show dev vxlan0 | grep $mac | grep -q master
+	check_err $?
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac master \
+		bridge fdb show dev vxlan0
+	check_err $?
+}
+
+__offload_indication_join_vxlan_first()
+{
+	local vid=$1; shift
+
+	local mac=00:11:22:33:44:55
+	local zmac=00:00:00:00:00:00
+
+	bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2
+
+	ip link set dev vxlan0 master br0
+	bridge fdb add dev vxlan0 $mac self master static dst 198.51.100.2
+
+	RET=0
+	check_vxlan_fdb_not_offloaded
+	ip link set dev $swp1 master br0
+	sleep .1
+	check_fdb_offloaded
+	log_test "offload indication - attach vxlan first"
+
+	RET=0
+	ip link set dev vxlan0 down
+	check_vxlan_fdb_not_offloaded
+	check_bridge_fdb_not_offloaded
+	log_test "offload indication - set vxlan down"
+
+	RET=0
+	ip link set dev vxlan0 up
+	sleep .1
+	check_fdb_offloaded
+	log_test "offload indication - set vxlan up"
+
+	if [[ ! -z $vid ]]; then
+		RET=0
+		bridge vlan del dev vxlan0 vid $vid
+		check_vxlan_fdb_not_offloaded
+		check_bridge_fdb_not_offloaded
+		log_test "offload indication - delete VLAN"
+
+		RET=0
+		bridge vlan add dev vxlan0 vid $vid
+		check_vxlan_fdb_not_offloaded
+		check_bridge_fdb_not_offloaded
+		log_test "offload indication - add tagged VLAN"
+
+		RET=0
+		bridge vlan add dev vxlan0 vid $vid pvid untagged
+		sleep .1
+		check_fdb_offloaded
+		log_test "offload indication - add pvid/untagged VLAN"
+	fi
+
+	RET=0
+	ip link set dev $swp1 nomaster
+	check_vxlan_fdb_not_offloaded
+	log_test "offload indication - detach port"
+}
+
+offload_indication_join_vxlan_first()
+{
+	ip link add dev br0 up type bridge mcast_snooping 0
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	__offload_indication_join_vxlan_first
+
+	ip link del dev vxlan0
+	ip link del dev br0
+}
+
+__offload_indication_join_vxlan_last()
+{
+	local zmac=00:00:00:00:00:00
+
+	RET=0
+
+	bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2
+
+	ip link set dev $swp1 master br0
+
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show dev vxlan0
+	check_err $?
+
+	ip link set dev vxlan0 master br0
+
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show dev vxlan0
+	check_err $?
+
+	log_test "offload indication - attach vxlan last"
+}
+
+offload_indication_join_vxlan_last()
+{
+	ip link add dev br0 up type bridge mcast_snooping 0
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	__offload_indication_join_vxlan_last
+
+	ip link del dev vxlan0
+	ip link del dev br0
+}
+
+offload_indication_test()
+{
+	offload_indication_setup_create
+	offload_indication_fdb_test
+	offload_indication_decap_route_test
+	offload_indication_setup_destroy
+
+	log_info "offload indication - replay & cleanup"
+	offload_indication_join_vxlan_first
+	offload_indication_join_vxlan_last
+}
+
+sanitization_vlan_aware_test()
+{
+	RET=0
+
+	ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1
+
+	ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \
+		noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \
+		noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	# Test that when each VNI is mapped to a different VLAN we can enslave
+	# a port to the bridge
+	bridge vlan add vid 10 dev vxlan10 pvid untagged
+	bridge vlan add vid 20 dev vxlan20 pvid untagged
+
+	ip link set dev $swp1 master br0
+	check_err $?
+
+	log_test "vlan-aware - enslavement to vlan-aware bridge"
+
+	# Try to map both VNIs to the same VLAN and make sure configuration
+	# fails
+	RET=0
+
+	bridge vlan add vid 10 dev vxlan20 pvid untagged &> /dev/null
+	check_fail $?
+
+	log_test "vlan-aware - two vnis mapped to the same vlan"
+
+	# Test that enslavement of a port to a bridge fails when two VNIs
+	# are mapped to the same VLAN
+	RET=0
+
+	ip link set dev $swp1 nomaster
+
+	bridge vlan del vid 20 dev vxlan20 pvid untagged
+	bridge vlan add vid 10 dev vxlan20 pvid untagged
+
+	ip link set dev $swp1 master br0 &> /dev/null
+	check_fail $?
+
+	log_test "vlan-aware - failed enslavement to vlan-aware bridge"
+
+	bridge vlan del vid 10 dev vxlan20
+	bridge vlan add vid 20 dev vxlan20 pvid untagged
+
+	# Test that when two VXLAN tunnels with conflicting configurations
+	# (i.e., different TTL) are enslaved to the same VLAN-aware bridge,
+	# then the enslavement of a port to the bridge is denied.
+
+	# Use the offload indication of the local route to ensure the VXLAN
+	# configuration was correctly rollbacked.
+	ip address add 198.51.100.1/32 dev lo
+
+	ip link set dev vxlan10 type vxlan ttl 10
+	ip link set dev $swp1 master br0 &> /dev/null
+	check_fail $?
+
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	log_test "vlan-aware - failed enslavement to bridge due to conflict"
+
+	ip link set dev vxlan10 type vxlan ttl 20
+	ip address del 198.51.100.1/32 dev lo
+
+	ip link del dev vxlan20
+	ip link del dev vxlan10
+	ip link del dev br0
+}
+
+offload_indication_vlan_aware_setup_create()
+{
+	# Create a simple setup with two VxLAN devices and a single VLAN-aware
+	# bridge
+	ip link add name br0 up type bridge mcast_snooping 0 vlan_filtering 1 \
+		vlan_default_pvid 0
+
+	ip link set dev $swp1 master br0
+
+	bridge vlan add vid 10 dev $swp1
+	bridge vlan add vid 20 dev $swp1
+
+	ip address add 198.51.100.1/32 dev lo
+
+	ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \
+		noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+	ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \
+		noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	bridge vlan add vid 10 dev vxlan10 pvid untagged
+	bridge vlan add vid 20 dev vxlan20 pvid untagged
+}
+
+offload_indication_vlan_aware_setup_destroy()
+{
+	bridge vlan del vid 20 dev vxlan20
+	bridge vlan del vid 10 dev vxlan10
+
+	ip link del dev vxlan20
+	ip link del dev vxlan10
+
+	ip address del 198.51.100.1/32 dev lo
+
+	bridge vlan del vid 20 dev $swp1
+	bridge vlan del vid 10 dev $swp1
+
+	ip link set dev $swp1 nomaster
+
+	ip link del dev br0
+}
+
+offload_indication_vlan_aware_fdb_test()
+{
+	RET=0
+
+	log_info "vxlan entry offload indication - vlan-aware"
+
+	bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \
+		dst 198.51.100.2 vlan 10
+
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
+	check_err $?
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
+	check_err $?
+
+	log_test "vxlan entry offload indication - initial state"
+
+	# Remove FDB entry from the bridge driver and check that corresponding
+	# entry in the VxLAN driver is not marked as offloaded
+	RET=0
+
+	bridge fdb del de:ad:be:ef:13:37 dev vxlan10 master vlan 10
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
+	check_err $?
+
+	log_test "vxlan entry offload indication - after removal from bridge"
+
+	# Add the FDB entry back to the bridge driver and make sure it is
+	# marked as offloaded in both drivers
+	RET=0
+
+	bridge fdb add de:ad:be:ef:13:37 dev vxlan10 master static vlan 10
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
+	check_err $?
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
+	check_err $?
+
+	log_test "vxlan entry offload indication - after re-add to bridge"
+
+	# Remove FDB entry from the VxLAN driver and check that corresponding
+	# entry in the bridge driver is not marked as offloaded
+	RET=0
+
+	bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
+	check_err $?
+
+	log_test "vxlan entry offload indication - after removal from vxlan"
+
+	# Add the FDB entry back to the VxLAN driver and make sure it is
+	# marked as offloaded in both drivers
+	RET=0
+
+	bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
+	check_err $?
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+		de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
+	check_err $?
+
+	log_test "vxlan entry offload indication - after re-add to vxlan"
+
+	bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self master vlan 10
+}
+
+offload_indication_vlan_aware_decap_route_test()
+{
+	RET=0
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	# Toggle PVID flag on one VxLAN device and make sure route is still
+	# marked as offloaded
+	bridge vlan add vid 10 dev vxlan10 untagged
+
+	busywait "$TIMEOUT" wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	# Toggle PVID flag on second VxLAN device and make sure route is no
+	# longer marked as offloaded
+	bridge vlan add vid 20 dev vxlan20 untagged
+
+	busywait "$TIMEOUT" not wait_for_offload \
+		ip route show table local 198.51.100.1
+	check_err $?
+
+	# Toggle PVID flag back and make sure route is marked as offloaded
+	bridge vlan add vid 10 dev vxlan10 pvid untagged
+	bridge vlan add vid 20 dev vxlan20 pvid untagged
+
+	busywait "$TIMEOUT" wait_for_offload ip route show table local 198.51.100.1
+	check_err $?
+
+	log_test "vxlan decap route - vni map/unmap"
+}
+
+offload_indication_vlan_aware_join_vxlan_first()
+{
+	ip link add dev br0 up type bridge mcast_snooping 0 \
+		vlan_filtering 1 vlan_default_pvid 1
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	__offload_indication_join_vxlan_first 1
+
+	ip link del dev vxlan0
+	ip link del dev br0
+}
+
+offload_indication_vlan_aware_join_vxlan_last()
+{
+	ip link add dev br0 up type bridge mcast_snooping 0 \
+		vlan_filtering 1 vlan_default_pvid 1
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	__offload_indication_join_vxlan_last
+
+	ip link del dev vxlan0
+	ip link del dev br0
+}
+
+offload_indication_vlan_aware_l3vni_test()
+{
+	local zmac=00:00:00:00:00:00
+
+	RET=0
+
+	sysctl_set net.ipv6.conf.default.disable_ipv6 1
+	ip link add dev br0 up type bridge mcast_snooping 0 \
+		vlan_filtering 1 vlan_default_pvid 0
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	ip link set dev $swp1 master br0
+
+	# The test will use the offload indication on the FDB entry to
+	# understand if the tunnel is offloaded or not
+	bridge fdb append $zmac dev vxlan0 self dst 192.0.2.1
+
+	ip link set dev vxlan0 master br0
+	bridge vlan add dev vxlan0 vid 10 pvid untagged
+
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show brport vxlan0
+	check_err $? "vxlan tunnel not offloaded when should"
+
+	# Configure a VLAN interface and make sure tunnel is offloaded
+	ip link add link br0 name br10 up type vlan id 10
+	sysctl_set net.ipv6.conf.br10.disable_ipv6 0
+	ip -6 address add 2001:db8:1::1/64 dev br10
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show brport vxlan0
+	check_err $? "vxlan tunnel not offloaded when should"
+
+	# Unlink the VXLAN device, make sure tunnel is no longer offloaded,
+	# then add it back to the bridge and make sure it is offloaded
+	ip link set dev vxlan0 nomaster
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show brport vxlan0
+	check_err $? "vxlan tunnel offloaded after unlinked from bridge"
+
+	ip link set dev vxlan0 master br0
+	busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show brport vxlan0
+	check_err $? "vxlan tunnel offloaded despite no matching vid"
+
+	bridge vlan add dev vxlan0 vid 10 pvid untagged
+	busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+		bridge fdb show brport vxlan0
+	check_err $? "vxlan tunnel not offloaded after adding vid"
+
+	log_test "vxlan - l3 vni"
+
+	ip link del dev vxlan0
+	ip link del dev br0
+	sysctl_restore net.ipv6.conf.default.disable_ipv6
+}
+
+offload_indication_vlan_aware_test()
+{
+	offload_indication_vlan_aware_setup_create
+	offload_indication_vlan_aware_fdb_test
+	offload_indication_vlan_aware_decap_route_test
+	offload_indication_vlan_aware_setup_destroy
+
+	log_info "offload indication - replay & cleanup - vlan aware"
+	offload_indication_vlan_aware_join_vxlan_first
+	offload_indication_vlan_aware_join_vxlan_last
+	offload_indication_vlan_aware_l3vni_test
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
new file mode 100755
index 000000000..749ba3cfd
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
@@ -0,0 +1,126 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test vetoing of FDB entries that mlxsw can not offload. This exercises several
+# different veto vectors to test various rollback scenarios in the vxlan driver.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	fdb_create_veto_test
+	fdb_replace_veto_test
+	fdb_append_veto_test
+	fdb_changelink_veto_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	ip link set dev $swp1 up
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 up
+
+	ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+	ip link set dev vxlan0 master br0
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev vxlan0 nomaster
+	ip link del dev vxlan0
+
+	ip link set dev $swp2 down
+	ip link set dev $swp1 nomaster
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+fdb_create_veto_test()
+{
+	RET=0
+
+	bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \
+	       dst 198.51.100.2 2>/dev/null
+	check_fail $? "multicast MAC not rejected"
+
+	bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \
+	       dst 198.51.100.2 2>&1 >/dev/null | grep -q mlxsw_spectrum
+	check_err $? "multicast MAC rejected without extack"
+
+	log_test "vxlan FDB veto - create"
+}
+
+fdb_replace_veto_test()
+{
+	RET=0
+
+	bridge fdb add 00:01:02:03:04:05 dev vxlan0 self static \
+	       dst 198.51.100.2
+	check_err $? "valid FDB rejected"
+
+	bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \
+	       dst 198.51.100.2 port 1234 2>/dev/null
+	check_fail $? "FDB with an explicit port not rejected"
+
+	bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \
+	       dst 198.51.100.2 port 1234 2>&1 >/dev/null \
+	    | grep -q mlxsw_spectrum
+	check_err $? "FDB with an explicit port rejected without extack"
+
+	log_test "vxlan FDB veto - replace"
+}
+
+fdb_append_veto_test()
+{
+	RET=0
+
+	bridge fdb add 00:00:00:00:00:00 dev vxlan0 self static \
+	       dst 198.51.100.2
+	check_err $? "valid FDB rejected"
+
+	bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \
+	       dst 198.51.100.3 port 1234 2>/dev/null
+	check_fail $? "FDB with an explicit port not rejected"
+
+	bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \
+	       dst 198.51.100.3 port 1234 2>&1 >/dev/null \
+	    | grep -q mlxsw_spectrum
+	check_err $? "FDB with an explicit port rejected without extack"
+
+	log_test "vxlan FDB veto - append"
+}
+
+fdb_changelink_veto_test()
+{
+	RET=0
+
+	ip link set dev vxlan0 type vxlan \
+	   group 224.0.0.1 dev lo 2>/dev/null
+	check_fail $? "FDB with a multicast IP not rejected"
+
+	ip link set dev vxlan0 type vxlan \
+	   group 224.0.0.1 dev lo 2>&1 >/dev/null \
+	    | grep -q mlxsw_spectrum
+	check_err $? "FDB with a multicast IP rejected without extack"
+
+	log_test "vxlan FDB veto - changelink"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
new file mode 100755
index 000000000..af5ea50ed
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
@@ -0,0 +1,326 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test VxLAN flooding. The device stores flood records in a singly linked list
+# where each record stores up to three IPv4 addresses of remote VTEPs. The test
+# verifies that packets are correctly flooded in various cases such as deletion
+# of a record in the middle of the list.
+#
+# +--------------------+
+# | H1 (vrf)           |
+# |    + $h1           |
+# |    | 203.0.113.1/24|
+# +----|---------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# | +--|--------------------------------------------------------------------+ |
+# | |  + $swp1                   BR0 (802.1d)                               | |
+# | |                                                                       | |
+# | |  + vxlan0 (vxlan)                                                     | |
+# | |    local 198.51.100.1                                                 | |
+# | |    remote 198.51.100.{2..13}                                          | |
+# | |    id 10 dstport 4789                                                 | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |  198.51.100.0/24 via 192.0.2.2                                            |
+# |                                                                           |
+# |    + $rp1                                                                 |
+# |    | 192.0.2.1/24                                                         |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                               R2 (vrf) |
+# |    + $rp2                                                   |
+# |      192.0.2.2/24                                           |
+# |                                                             |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="flooding_test"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 203.0.113.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 203.0.113.1/24
+}
+
+switch_create()
+{
+	# Make sure the bridge uses the MAC address of the local port and
+	# not that of the VxLAN's device
+	ip link add dev br0 type bridge mcast_snooping 0
+	ip link set dev br0 address $(mac_get $swp1)
+
+	ip link add name vxlan0 type vxlan id 10 nolearning noudpcsum \
+		ttl 20 tos inherit local 198.51.100.1 dstport 4789
+
+	ip address add 198.51.100.1/32 dev lo
+
+	ip link set dev $swp1 master br0
+	ip link set dev vxlan0 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev vxlan0 up
+}
+
+switch_destroy()
+{
+	ip link set dev vxlan0 down
+	ip link set dev $swp1 down
+	ip link set dev br0 down
+
+	ip link set dev vxlan0 nomaster
+	ip link set dev $swp1 nomaster
+
+	ip address del 198.51.100.1/32 dev lo
+
+	ip link del dev vxlan0
+
+	ip link del dev br0
+}
+
+router1_create()
+{
+	# This router is in the default VRF, where the VxLAN device is
+	# performing the L3 lookup
+	ip link set dev $rp1 up
+	ip address add 192.0.2.1/24 dev $rp1
+	ip route add 198.51.100.0/24 via 192.0.2.2
+}
+
+router1_destroy()
+{
+	ip route del 198.51.100.0/24 via 192.0.2.2
+	ip address del 192.0.2.1/24 dev $rp1
+	ip link set dev $rp1 down
+}
+
+router2_create()
+{
+	# This router is not in the default VRF, so use simple_if_init()
+	simple_if_init $rp2 192.0.2.2/24
+}
+
+router2_destroy()
+{
+	simple_if_fini $rp2 192.0.2.2/24
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	rp1=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+
+	switch_create
+
+	router1_create
+	router2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router2_destroy
+	router1_destroy
+
+	switch_destroy
+
+	h1_destroy
+
+	vrf_cleanup
+}
+
+flooding_remotes_add()
+{
+	local num_remotes=$1
+	local lsb
+	local i
+
+	for i in $(eval echo {1..$num_remotes}); do
+		lsb=$((i + 1))
+
+		bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \
+			dst 198.51.100.$lsb
+	done
+}
+
+flooding_filters_add()
+{
+	local num_remotes=$1
+	local lsb
+	local i
+
+	# Prevent unwanted packets from entering the bridge and interfering
+	# with the test.
+	tc qdisc add dev br0 clsact
+	tc filter add dev br0 egress protocol all pref 1 handle 1 \
+		matchall skip_hw action drop
+	tc qdisc add dev $h1 clsact
+	tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+		flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+	tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+		matchall skip_hw action drop
+
+	tc qdisc add dev $rp2 clsact
+
+	for i in $(eval echo {1..$num_remotes}); do
+		lsb=$((i + 1))
+
+		tc filter add dev $rp2 ingress protocol ip pref $i handle $i \
+			flower ip_proto udp dst_ip 198.51.100.$lsb \
+			dst_port 4789 skip_sw action drop
+	done
+}
+
+flooding_filters_del()
+{
+	local num_remotes=$1
+	local i
+
+	for i in $(eval echo {1..$num_remotes}); do
+		tc filter del dev $rp2 ingress protocol ip pref $i \
+			handle $i flower
+	done
+
+	tc qdisc del dev $rp2 clsact
+
+	tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+	tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+	tc qdisc del dev $h1 clsact
+	tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+	tc qdisc del dev br0 clsact
+}
+
+flooding_check_packets()
+{
+	local packets=("$@")
+	local num_remotes=${#packets[@]}
+	local i
+
+	for i in $(eval echo {1..$num_remotes}); do
+		tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]}
+		check_err $? "remote $i - did not get expected number of packets"
+	done
+}
+
+flooding_test()
+{
+	# Use 12 remote VTEPs that will be stored in 4 records. The array
+	# 'packets' will store how many packets are expected to be received
+	# by each remote VTEP at each stage of the test
+	declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1)
+	local num_remotes=12
+
+	RET=0
+
+	# Add FDB entries for remote VTEPs and corresponding tc filters on the
+	# ingress of the nexthop router. These filters will count how many
+	# packets were flooded to each remote VTEP
+	flooding_remotes_add $num_remotes
+	flooding_filters_add $num_remotes
+
+	# Send one packet and make sure it is flooded to all the remote VTEPs
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 1 packet"
+
+	# Delete the third record which corresponds to VTEPs with LSB 8..10
+	# and check that packet is flooded correctly when we remove a record
+	# from the middle of the list
+	RET=0
+
+	packets=(2 2 2 2 2 2 1 1 1 2 2 2)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.8
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.9
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.10
+
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 2 packets"
+
+	# Delete the first record and make sure the packet is flooded correctly
+	RET=0
+
+	packets=(2 2 2 3 3 3 1 1 1 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.3
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.4
+
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 3 packets"
+
+	# Delete the last record and make sure the packet is flooded correctly
+	RET=0
+
+	packets=(2 2 2 4 4 4 1 1 1 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.11
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.12
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.13
+
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 4 packets"
+
+	# Delete the last record, one entry at a time and make sure single
+	# entries are correctly removed
+	RET=0
+
+	packets=(2 2 2 4 5 5 1 1 1 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.5
+
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 5 packets"
+
+	RET=0
+
+	packets=(2 2 2 4 5 6 1 1 1 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.6
+
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 6 packets"
+
+	RET=0
+
+	packets=(2 2 2 4 5 6 1 1 1 3 3 3)
+	bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.7
+
+	$MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+	flooding_check_packets "${packets[@]}"
+	log_test "flood after 7 packets"
+
+	flooding_filters_del $num_remotes
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 10:05:51 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-27 10:05:51 +0000
commit	5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch)
tree	a94efe259b9009378be6d90eb30d2b019d95c194 /tools/testing/selftests/drivers/net/mlxsw
parent	Initial commit. (diff)
download	linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.tar.xz linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.zip