summaryrefslogtreecommitdiffstats
path: root/tools/testing
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tools/testing/cxl/test/mem.c19
-rw-r--r--tools/testing/kunit/qemu_configs/riscv.py2
-rw-r--r--tools/testing/nvdimm/test/ndtest.c7
-rw-r--r--tools/testing/nvdimm/test/ndtest.h31
-rw-r--r--tools/testing/selftests/Makefile13
-rw-r--r--tools/testing/selftests/alsa/conf.c2
-rw-r--r--tools/testing/selftests/arm64/abi/tpidr2.c2
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch642
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x1
-rw-r--r--tools/testing/selftests/bpf/Makefile65
-rw-r--r--tools/testing/selftests/bpf/bench.c39
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c185
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage_create.c2
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c433
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_trigger.sh22
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_uprobes.sh2
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_list.h4
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h71
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h3
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h241
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c260
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h28
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/config7
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c241
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_atomics.c186
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c114
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c149
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cls_redirect.c38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/crypto_sanity.c197
-rw-r--r--tools/testing/selftests/bpf/prog_tests/empty_skb.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_sleep.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fib_lookup.c132
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/for_each.c62
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c322
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_attach.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mptcp.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c214
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_skip.c137
-rw-r--r--tools/testing/selftests/bpf/prog_tests/preempt_lock.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c121
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_assign.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_lookup.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_addr.c2353
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c171
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_netkit.c94
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_rtt.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c159
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_lockup.c91
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_printk.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_vprintk.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c132
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier_kfunc_prog_types.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/wq.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_metadata.c16
-rw-r--r--tools/testing/selftests/bpf/progs/arena_atomics.c178
-rw-r--r--tools/testing/selftests/bpf/progs/arena_list.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bind4_prog.c24
-rw-r--r--tools/testing/selftests/bpf/progs/bind6_prog.c24
-rw-r--r--tools/testing/selftests/bpf/progs/bind_prog.h19
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cc_cubic.c189
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cubic.c74
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c62
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp_release.c10
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h52
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c4
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c4
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h2
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c12
-rw-r--r--tools/testing/selftests/bpf/progs/connect6_prog.c6
-rw-r--r--tools/testing/selftests/bpf/progs/connect_unix_prog.c6
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_common.h2
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_failure.c3
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_basic.c68
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_bench.c109
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_common.h66
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_sanity.c169
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c12
-rw-r--r--tools/testing/selftests/bpf/progs/fib_lookup.c2
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_multi_maps.c49
-rw-r--r--tools/testing/selftests/bpf/progs/getpeername4_prog.c24
-rw-r--r--tools/testing/selftests/bpf/progs/getpeername6_prog.c31
-rw-r--r--tools/testing/selftests/bpf/progs/getsockname4_prog.c24
-rw-r--r--tools/testing/selftests/bpf/progs/getsockname6_prog.c31
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c2
-rw-r--r--tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c4
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_session.c79
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c58
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr.c10
-rw-r--r--tools/testing/selftests/bpf/progs/mptcp_sock.c4
-rw-r--r--tools/testing/selftests/bpf/progs/mptcpify.c4
-rw-r--r--tools/testing/selftests/bpf/progs/preempt_lock.c132
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg4_prog.c6
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg6_prog.c57
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c6
-rw-r--r--tools/testing/selftests/bpf/progs/skb_pkt_end.c2
-rw-r--r--tools/testing/selftests/bpf/progs/sock_addr_kern.c65
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c16
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c19
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_module.c36
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c22
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_common.h2
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c12
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c121
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_update.c18
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c20
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_rtt.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_access_variable_array.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_cookie.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func10.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_lwt_redirect.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_module_attach.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_skip.c15
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_n.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_write.c46
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c27
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c17
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_link.c35
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c13
-rw-r--r--tools/testing/selftests/bpf/progs/test_tunnel_kern.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c27
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_vlan.c2
-rw-r--r--tools/testing/selftests/bpf/progs/timer.c3
-rw-r--r--tools/testing/selftests/bpf/progs/timer_failure.c2
-rw-r--r--tools/testing/selftests/bpf/progs/timer_lockup.c87
-rw-r--r--tools/testing/selftests/bpf/progs/timer_mim.c2
-rw-r--r--tools/testing/selftests/bpf/progs/timer_mim_reject.c2
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c107
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi.c50
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c63
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_restricted.c8
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c155
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c122
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_movsx.c63
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c41
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sock_addr.c331
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c187
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c89
-rw-r--r--tools/testing/selftests/bpf/progs/wq.c180
-rw-r--r--tools/testing/selftests/bpf/progs/wq_failures.c144
-rw-r--r--tools/testing/selftests/bpf/test_cpp.cpp5
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c1434
-rwxr-xr-xtools/testing/selftests/bpf/test_sock_addr.sh58
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c19
-rw-r--r--tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c117
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c16
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c109
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h9
-rw-r--r--tools/testing/selftests/bpf/uprobe_multi.c2
-rw-r--r--tools/testing/selftests/bpf/veristat.c5
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c16
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c123
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h12
-rw-r--r--tools/testing/selftests/capabilities/test_execve.c12
-rw-r--r--tools/testing/selftests/capabilities/validate_cap.c7
-rw-r--r--tools/testing/selftests/cgroup/Makefile2
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h2
-rw-r--r--tools/testing/selftests/cgroup/test_cpu.c4
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_v1_hp.sh46
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c4
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c4
-rw-r--r--tools/testing/selftests/cgroup/test_zswap.c136
-rw-r--r--tools/testing/selftests/clone3/clone3.c7
-rw-r--r--tools/testing/selftests/clone3/clone3_clear_sighand.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_set_tid.c121
-rw-r--r--tools/testing/selftests/core/close_range_test.c55
-rwxr-xr-xtools/testing/selftests/cpufreq/cpufreq.sh3
-rwxr-xr-xtools/testing/selftests/cpufreq/main.sh47
-rwxr-xr-xtools/testing/selftests/cpufreq/module.sh6
-rw-r--r--tools/testing/selftests/damon/Makefile13
-rw-r--r--tools/testing/selftests/damon/_damon_sysfs.py177
-rw-r--r--tools/testing/selftests/damon/access_memory.c2
-rw-r--r--tools/testing/selftests/damon/damos_quota_goal.py77
-rw-r--r--tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c247
-rw-r--r--tools/testing/selftests/drivers/net/Makefile11
-rw-r--r--tools/testing/selftests/drivers/net/README.rst136
-rw-r--r--tools/testing/selftests/drivers/net/config2
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile28
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/csum.py122
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devlink_port_split.py (renamed from tools/testing/selftests/net/devlink_port_split.py)0
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool.sh (renamed from tools/testing/selftests/net/forwarding/ethtool.sh)20
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh (renamed from tools/testing/selftests/net/forwarding/ethtool_extended_state.sh)5
-rw-r--r--tools/testing/selftests/drivers/net/hw/ethtool_lib.sh (renamed from tools/testing/selftests/net/forwarding/ethtool_lib.sh)0
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool_mm.sh (renamed from tools/testing/selftests/net/forwarding/ethtool_mm.sh)3
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool_rmon.sh (renamed from tools/testing/selftests/net/forwarding/ethtool_rmon.sh)4
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/hw_stats_l3.sh (renamed from tools/testing/selftests/net/forwarding/hw_stats_l3.sh)20
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh (renamed from tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh)8
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/__init__.py16
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/loopback.sh (renamed from tools/testing/selftests/net/forwarding/loopback.sh)5
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/pp_alloc_fail.py129
-rw-r--r--tools/testing/selftests/drivers/net/hw/settings1
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/__init__.py19
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py224
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/load.py41
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/remote.py15
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/remote_netns.py21
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/remote_ssh.py39
-rwxr-xr-xtools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh668
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh14
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh55
-rwxr-xr-xtools/testing/selftests/drivers/net/ping.py51
-rwxr-xr-xtools/testing/selftests/drivers/net/queues.py66
-rwxr-xr-xtools/testing/selftests/drivers/net/stats.py144
-rw-r--r--tools/testing/selftests/drivers/net/virtio_net/Makefile15
-rwxr-xr-xtools/testing/selftests/drivers/net/virtio_net/basic_features.sh131
-rw-r--r--tools/testing/selftests/drivers/net/virtio_net/config8
-rw-r--r--tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh99
-rw-r--r--tools/testing/selftests/exec/recursion-depth.c10
-rw-r--r--tools/testing/selftests/fchmodat2/Makefile11
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test.c13
-rw-r--r--tools/testing/selftests/ftrace/config26
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest8
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest-ktap2
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc41
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc40
-rw-r--r--tools/testing/selftests/futex/Makefile2
-rw-r--r--tools/testing/selftests/hid/config.common1
-rw-r--r--tools/testing/selftests/hid/hid_bpf.c112
-rw-r--r--tools/testing/selftests/hid/progs/hid.c46
-rw-r--r--tools/testing/selftests/hid/progs/hid_bpf_helpers.h6
-rw-r--r--tools/testing/selftests/hid/tests/base.py92
-rw-r--r--tools/testing/selftests/hid/tests/base_device.py421
-rw-r--r--tools/testing/selftests/hid/tests/base_gamepad.py238
-rw-r--r--tools/testing/selftests/hid/tests/test_gamepad.py457
-rw-r--r--tools/testing/selftests/hid/tests/test_tablet.py723
-rw-r--r--tools/testing/selftests/iommu/iommufd.c64
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h6
-rw-r--r--tools/testing/selftests/ipc/msgque.c11
-rw-r--r--tools/testing/selftests/kselftest.h49
-rwxr-xr-xtools/testing/selftests/kselftest_deps.sh1
-rw-r--r--tools/testing/selftests/kvm/Makefile10
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c11
-rw-r--r--tools/testing/selftests/kvm/aarch64/page_fault_test.c5
-rw-r--r--tools/testing/selftests/kvm/aarch64/psci_test.c4
-rw-r--r--tools/testing/selftests/kvm/aarch64/set_id_regs.c123
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_init.c1
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_irq.c15
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c410
-rw-r--r--tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c6
-rw-r--r--tools/testing/selftests/kvm/arch_timer.c4
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c94
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c15
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c26
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c4
-rw-r--r--tools/testing/selftests/kvm/guest_print_test.c1
-rw-r--r--tools/testing/selftests/kvm/hardware_disable_test.c3
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/gic.h21
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/gic_v3.h586
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h19
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h21
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/ucall.h2
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/vgic.h5
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h1111
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_base.h1135
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_types.h20
-rw-r--r--tools/testing/selftests/kvm/include/memstress.h1
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h49
-rw-r--r--tools/testing/selftests/kvm/include/riscv/sbi.h141
-rw-r--r--tools/testing/selftests/kvm/include/riscv/ucall.h1
-rw-r--r--tools/testing/selftests/kvm/include/s390x/ucall.h2
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h19
-rw-r--r--tools/testing/selftests/kvm/include/userfaultfd_util.h19
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h28
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h12
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/sev.h19
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/ucall.h2
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c2
-rw-r--r--tools/testing/selftests/kvm/kvm_create_max_vcpus.c2
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c4
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic.c18
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_private.h4
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_v3.c99
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c248
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c2
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/vgic.c38
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c3
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c21
-rw-r--r--tools/testing/selftests/kvm/lib/memstress.c13
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c13
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/ucall.c1
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c2
-rw-r--r--tools/testing/selftests/kvm/lib/ucall_common.c5
-rw-r--r--tools/testing/selftests/kvm/lib/userfaultfd_util.c156
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c331
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/sev.c45
-rw-r--r--tools/testing/selftests/kvm/max_guest_memory_test.c2
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c3
-rw-r--r--tools/testing/selftests/kvm/riscv/arch_timer.c6
-rw-r--r--tools/testing/selftests/kvm/riscv/ebreak_test.c83
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c4
-rw-r--r--tools/testing/selftests/kvm/riscv/sbi_pmu_test.c682
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c48
-rw-r--r--tools/testing/selftests/kvm/s390x/cmma_test.c3
-rw-r--r--tools/testing/selftests/kvm/s390x/memop.c1
-rw-r--r--tools/testing/selftests/kvm/s390x/shared_zeropage_test.c111
-rw-r--r--tools/testing/selftests/kvm/s390x/sync_regs_test.c2
-rw-r--r--tools/testing/selftests/kvm/s390x/tprot.c1
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c21
-rw-r--r--tools/testing/selftests/kvm/steal_time.c53
-rw-r--r--tools/testing/selftests/kvm/x86_64/amx_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_features.c6
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_ipi.c5
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_pv_test.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c3
-rwxr-xr-xtools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh13
-rw-r--r--tools/testing/selftests/kvm/x86_64/platform_info_test.c61
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_counters_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c9
-rw-r--r--tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_sregs_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/sev_init2_tests.c152
-rw-r--r--tools/testing/selftests/kvm/x86_64/sev_smoke_test.c96
-rw-r--r--tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/state_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86_64/sync_regs_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/ucna_injection_test.c7
-rw-r--r--tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c15
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_state_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c59
-rw-r--r--tools/testing/selftests/kvm/x86_64/xss_msr_test.c2
-rw-r--r--tools/testing/selftests/landlock/base_test.c76
-rw-r--r--tools/testing/selftests/landlock/config1
-rw-r--r--tools/testing/selftests/landlock/fs_test.c532
-rw-r--r--tools/testing/selftests/lib.mk26
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_multi_thread.c2
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_single_thread.c2
-rw-r--r--tools/testing/selftests/memfd/fuse_test.c2
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c2
-rw-r--r--tools/testing/selftests/mm/.gitignore2
-rw-r--r--tools/testing/selftests/mm/Makefile4
-rw-r--r--tools/testing/selftests/mm/cow.c106
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c16
-rw-r--r--tools/testing/selftests/mm/hugetlb_madv_vs_map.c16
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c173
-rw-r--r--tools/testing/selftests/mm/map_fixed_noreplace.c24
-rw-r--r--tools/testing/selftests/mm/memfd_secret.c51
-rw-r--r--tools/testing/selftests/mm/mlock2-tests.c15
-rw-r--r--tools/testing/selftests/mm/mremap_test.c204
-rw-r--r--tools/testing/selftests/mm/mseal_test.c1894
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh13
-rw-r--r--tools/testing/selftests/mm/seal_elf.c179
-rw-r--r--tools/testing/selftests/mm/soft-dirty.c2
-rw-r--r--tools/testing/selftests/mm/virtual_address_range.c78
-rw-r--r--tools/testing/selftests/net/.gitignore4
-rw-r--r--tools/testing/selftests/net/Makefile60
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/config3
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c734
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c307
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c436
-rw-r--r--tools/testing/selftests/net/bpf.mk53
-rwxr-xr-xtools/testing/selftests/net/bpf_offload.py (renamed from tools/testing/selftests/bpf/test_offload.py)142
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c32
-rwxr-xr-xtools/testing/selftests/net/cmsg_time.sh7
-rw-r--r--tools/testing/selftests/net/config2
-rw-r--r--tools/testing/selftests/net/epoll_busy_poll.c320
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh46
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh24
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile9
-rw-r--r--tools/testing/selftests/net/forwarding/README33
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh18
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh2
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample53
-rw-r--r--tools/testing/selftests/net/forwarding/ipip_lib.sh1
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh253
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib_sh_test.sh208
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh30
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh35
-rw-r--r--tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh12
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_nh.sh14
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_tests.sh19
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_red.sh10
-rw-r--r--tools/testing/selftests/net/forwarding/sch_tbf_core.sh2
-rw-r--r--tools/testing/selftests/net/forwarding/tc_common.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_tunnel_key.sh2
-rw-r--r--tools/testing/selftests/net/gro.c138
-rw-r--r--tools/testing/selftests/net/hsr/Makefile3
-rw-r--r--tools/testing/selftests/net/hsr/config1
-rw-r--r--tools/testing/selftests/net/hsr/hsr_common.sh84
-rwxr-xr-xtools/testing/selftests/net/hsr/hsr_ping.sh106
-rwxr-xr-xtools/testing/selftests/net/hsr/hsr_redbox.sh121
-rw-r--r--tools/testing/selftests/net/lib.sh9
-rw-r--r--tools/testing/selftests/net/lib/.gitignore2
-rw-r--r--tools/testing/selftests/net/lib/Makefile15
-rw-r--r--tools/testing/selftests/net/lib/csum.c (renamed from tools/testing/selftests/net/csum.c)18
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py8
-rw-r--r--tools/testing/selftests/net/lib/py/consts.py9
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py159
-rw-r--r--tools/testing/selftests/net/lib/py/netns.py31
-rw-r--r--tools/testing/selftests/net/lib/py/nsim.py134
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py102
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py49
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh53
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh2
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh149
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh165
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh34
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh295
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh16
-rw-r--r--tools/testing/selftests/net/nat6to4.bpf.c (renamed from tools/testing/selftests/net/nat6to4.c)0
-rw-r--r--tools/testing/selftests/net/netfilter/.gitignore (renamed from tools/testing/selftests/netfilter/.gitignore)4
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile52
-rw-r--r--tools/testing/selftests/net/netfilter/audit_logread.c (renamed from tools/testing/selftests/netfilter/audit_logread.c)0
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter.sh171
-rwxr-xr-xtools/testing/selftests/net/netfilter/bridge_brouter.sh122
-rw-r--r--tools/testing/selftests/net/netfilter/config89
-rw-r--r--tools/testing/selftests/net/netfilter/connect_close.c (renamed from tools/testing/selftests/netfilter/connect_close.c)0
-rw-r--r--tools/testing/selftests/net/netfilter/conntrack_dump_flush.c (renamed from tools/testing/selftests/netfilter/conntrack_dump_flush.c)10
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_icmp_related.sh (renamed from tools/testing/selftests/netfilter/conntrack_icmp_related.sh)179
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh (renamed from tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh)118
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh87
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh164
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_vrf.sh (renamed from tools/testing/selftests/netfilter/conntrack_vrf.sh)121
-rwxr-xr-xtools/testing/selftests/net/netfilter/ipvs.sh211
-rw-r--r--tools/testing/selftests/net/netfilter/lib.sh10
-rwxr-xr-xtools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh71
-rwxr-xr-xtools/testing/selftests/net/netfilter/nf_nat_edemux.sh97
-rw-r--r--tools/testing/selftests/net/netfilter/nf_queue.c (renamed from tools/testing/selftests/netfilter/nf-queue.c)0
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_audit.sh (renamed from tools/testing/selftests/netfilter/nft_audit.sh)31
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh (renamed from tools/testing/selftests/netfilter/nft_concat_range.sh)213
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range_perf.sh9
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_conntrack_helper.sh171
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_fib.sh234
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh (renamed from tools/testing/selftests/netfilter/nft_flowtable.sh)371
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_meta.sh (renamed from tools/testing/selftests/netfilter/nft_meta.sh)4
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat.sh (renamed from tools/testing/selftests/netfilter/nft_nat.sh)480
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat_zones.sh (renamed from tools/testing/selftests/netfilter/nft_nat_zones.sh)194
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh417
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_synproxy.sh96
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_zones_many.sh (renamed from tools/testing/selftests/netfilter/nft_zones_many.sh)97
-rwxr-xr-xtools/testing/selftests/net/netfilter/packetdrill/common.sh33
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt118
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt62
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt59
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt44
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt51
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt34
-rwxr-xr-xtools/testing/selftests/net/netfilter/rpath.sh (renamed from tools/testing/selftests/netfilter/rpath.sh)10
-rw-r--r--tools/testing/selftests/net/netfilter/sctp_collision.c (renamed from tools/testing/selftests/netfilter/sctp_collision.c)0
-rw-r--r--tools/testing/selftests/net/netfilter/settings1
-rwxr-xr-xtools/testing/selftests/net/netfilter/xt_string.sh (renamed from tools/testing/selftests/netfilter/xt_string.sh)89
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py98
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py16
-rw-r--r--tools/testing/selftests/net/sample_map_ret0.bpf.c (renamed from tools/testing/selftests/bpf/progs/sample_map_ret0.c)2
-rw-r--r--tools/testing/selftests/net/sample_ret0.bpf.c (renamed from tools/testing/selftests/bpf/progs/sample_ret0.c)3
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh335
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh340
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_bench.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh8
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh2
-rwxr-xr-xtools/testing/selftests/net/veth.sh2
-rw-r--r--tools/testing/selftests/net/xdp_dummy.bpf.c (renamed from tools/testing/selftests/net/xdp_dummy.c)0
-rwxr-xr-xtools/testing/selftests/net/xfrm_policy.sh4
-rw-r--r--tools/testing/selftests/netfilter/Makefile21
-rwxr-xr-xtools/testing/selftests/netfilter/bridge_brouter.sh146
-rw-r--r--tools/testing/selftests/netfilter/bridge_netfilter.sh188
-rw-r--r--tools/testing/selftests/netfilter/config9
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_sctp_collision.sh89
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh167
-rwxr-xr-xtools/testing/selftests/netfilter/ipvs.sh228
-rwxr-xr-xtools/testing/selftests/netfilter/nf_nat_edemux.sh127
-rwxr-xr-xtools/testing/selftests/netfilter/nft_conntrack_helper.sh197
-rwxr-xr-xtools/testing/selftests/netfilter/nft_fib.sh273
-rwxr-xr-xtools/testing/selftests/netfilter/nft_queue.sh449
-rwxr-xr-xtools/testing/selftests/netfilter/nft_synproxy.sh117
-rwxr-xr-xtools/testing/selftests/netfilter/nft_trans_stress.sh151
-rw-r--r--tools/testing/selftests/netfilter/settings1
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c82
-rw-r--r--tools/testing/selftests/openat2/Makefile14
-rw-r--r--tools/testing/selftests/perf_events/.gitignore1
-rw-r--r--tools/testing/selftests/perf_events/Makefile2
-rw-r--r--tools/testing/selftests/perf_events/watermark_signal.c146
-rw-r--r--tools/testing/selftests/pidfd/pidfd_fdinfo_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_open_test.c4
-rw-r--r--tools/testing/selftests/pidfd/pidfd_poll_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c2
-rw-r--r--tools/testing/selftests/powerpc/Makefile11
-rw-r--r--tools/testing/selftests/powerpc/alignment/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/cache_shape/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/Makefile21
-rw-r--r--tools/testing/selftests/powerpc/dexcr/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/dexcr/Makefile7
-rw-r--r--tools/testing/selftests/powerpc/dexcr/chdexcr.c112
-rw-r--r--tools/testing/selftests/powerpc/dexcr/dexcr.c40
-rw-r--r--tools/testing/selftests/powerpc/dexcr/dexcr.h57
-rw-r--r--tools/testing/selftests/powerpc/dexcr/dexcr_test.c215
-rw-r--r--tools/testing/selftests/powerpc/dexcr/hashchk_test.c8
-rw-r--r--tools/testing/selftests/powerpc/dexcr/lsdexcr.c103
-rw-r--r--tools/testing/selftests/powerpc/dscr/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/eeh/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/flags.mk9
-rw-r--r--tools/testing/selftests/powerpc/math/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/mce/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/papr_attributes/Makefile3
-rw-r--r--tools/testing/selftests/powerpc/papr_sysparm/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/papr_vpd/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/pmu/Makefile44
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/Makefile21
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/primitives/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/ptrace/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/security/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/signal/Makefile4
-rw-r--r--tools/testing/selftests/powerpc/stringloops/Makefile11
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/syscalls/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/vphn/Makefile5
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh6
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE095
-rw-r--r--tools/testing/selftests/resctrl/cat_test.c8
-rw-r--r--tools/testing/selftests/resctrl/cmt_test.c8
-rw-r--r--tools/testing/selftests/resctrl/mba_test.c10
-rw-r--r--tools/testing/selftests/resctrl/mbm_test.c10
-rw-r--r--tools/testing/selftests/resctrl/resctrl.h9
-rw-r--r--tools/testing/selftests/resctrl/resctrl_tests.c26
-rw-r--r--tools/testing/selftests/resctrl/resctrl_val.c62
-rw-r--r--tools/testing/selftests/ring-buffer/.gitignore1
-rw-r--r--tools/testing/selftests/ring-buffer/Makefile8
-rw-r--r--tools/testing/selftests/ring-buffer/config2
-rw-r--r--tools/testing/selftests/ring-buffer/map_test.c294
-rw-r--r--tools/testing/selftests/riscv/Makefile2
-rw-r--r--tools/testing/selftests/riscv/sigreturn/.gitignore1
-rw-r--r--tools/testing/selftests/riscv/sigreturn/Makefile12
-rw-r--r--tools/testing/selftests/riscv/sigreturn/sigreturn.c82
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c6
-rw-r--r--tools/testing/selftests/sigaltstack/current_stack_pointer.h2
-rw-r--r--tools/testing/selftests/sync/sync_test.c3
-rw-r--r--tools/testing/selftests/timers/adjtick.c4
-rw-r--r--tools/testing/selftests/timers/alarmtimer-suspend.c4
-rw-r--r--tools/testing/selftests/timers/change_skew.c4
-rw-r--r--tools/testing/selftests/timers/freq-step.c4
-rw-r--r--tools/testing/selftests/timers/leap-a-day.c10
-rw-r--r--tools/testing/selftests/timers/leapcrash.c4
-rw-r--r--tools/testing/selftests/timers/mqueue-lat.c4
-rw-r--r--tools/testing/selftests/timers/posix_timers.c12
-rw-r--r--tools/testing/selftests/timers/raw_skew.c6
-rw-r--r--tools/testing/selftests/timers/set-2038.c4
-rw-r--r--tools/testing/selftests/timers/set-tai.c4
-rw-r--r--tools/testing/selftests/timers/set-timer-lat.c4
-rw-r--r--tools/testing/selftests/timers/set-tz.c4
-rw-r--r--tools/testing/selftests/timers/skew_consistency.c4
-rw-r--r--tools/testing/selftests/timers/threadtest.c2
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c6
-rw-r--r--tools/testing/selftests/tty/tty_tstamp_update.c48
-rw-r--r--tools/testing/selftests/user_events/ftrace_test.c8
-rw-r--r--tools/testing/selftests/vDSO/Makefile29
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/riscv32.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/riscv64.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config1
-rw-r--r--tools/testing/selftests/x86/amx.c27
-rw-r--r--tools/testing/selftests/x86/lam.c4
-rw-r--r--tools/testing/selftests/x86/test_mremap_vdso.c43
-rw-r--r--tools/testing/selftests/x86/test_shadow_stack.c67
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c506
605 files changed, 31647 insertions, 10756 deletions
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 0d0ca63de8..eaf091a3d3 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -128,7 +128,7 @@ static struct {
#define CXL_TEST_EVENT_CNT_MAX 15
/* Set a number of events to return at a time for simulation. */
-#define CXL_TEST_EVENT_CNT 3
+#define CXL_TEST_EVENT_RET_MAX 4
struct mock_event_log {
u16 clear_idx;
@@ -223,6 +223,12 @@ static void mes_add_event(struct mock_event_store *mes,
log->nr_events++;
}
+/*
+ * Vary the number of events returned to simulate events occuring while the
+ * logs are being read.
+ */
+static int ret_limit = 0;
+
static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
{
struct cxl_get_event_payload *pl;
@@ -234,14 +240,18 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
if (cmd->size_in != sizeof(log_type))
return -EINVAL;
- if (cmd->size_out < struct_size(pl, records, CXL_TEST_EVENT_CNT))
+ ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX;
+ if (!ret_limit)
+ ret_limit = 1;
+
+ if (cmd->size_out < struct_size(pl, records, ret_limit))
return -EINVAL;
log_type = *((u8 *)cmd->payload_in);
if (log_type >= CXL_EVENT_TYPE_MAX)
return -EINVAL;
- memset(cmd->payload_out, 0, cmd->size_out);
+ memset(cmd->payload_out, 0, struct_size(pl, records, 0));
log = event_find_log(dev, log_type);
if (!log || event_log_empty(log))
@@ -249,7 +259,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
pl = cmd->payload_out;
- for (i = 0; i < CXL_TEST_EVENT_CNT && !event_log_empty(log); i++) {
+ for (i = 0; i < ret_limit && !event_log_empty(log); i++) {
memcpy(&pl->records[i], event_get_current(log),
sizeof(pl->records[i]));
pl->records[i].event.generic.hdr.handle =
@@ -257,6 +267,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
log->cur_idx++;
}
+ cmd->size_out = struct_size(pl, records, i);
pl->record_count = cpu_to_le16(i);
if (!event_log_empty(log))
pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS;
diff --git a/tools/testing/kunit/qemu_configs/riscv.py b/tools/testing/kunit/qemu_configs/riscv.py
index 12a1d52597..c87758030f 100644
--- a/tools/testing/kunit/qemu_configs/riscv.py
+++ b/tools/testing/kunit/qemu_configs/riscv.py
@@ -13,7 +13,7 @@ if not os.path.isfile(OPENSBI_PATH):
QEMU_ARCH = QemuArchParams(linux_arch='riscv',
kconfig='''
-CONFIG_SOC_VIRT=y
+CONFIG_ARCH_VIRT=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index b8419f4603..b438f3d053 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -13,6 +13,8 @@
#include <nd-core.h>
#include <linux/printk.h>
#include <linux/seq_buf.h>
+#include <linux/papr_scm.h>
+#include <uapi/linux/papr_pdsm.h>
#include "../watermark.h"
#include "nfit_test.h"
@@ -830,12 +832,11 @@ static int ndtest_bus_register(struct ndtest_priv *p)
return 0;
}
-static int ndtest_remove(struct platform_device *pdev)
+static void ndtest_remove(struct platform_device *pdev)
{
struct ndtest_priv *p = to_ndtest_priv(&pdev->dev);
nvdimm_bus_unregister(p->bus);
- return 0;
}
static int ndtest_probe(struct platform_device *pdev)
@@ -882,7 +883,7 @@ static const struct platform_device_id ndtest_id[] = {
static struct platform_driver ndtest_driver = {
.probe = ndtest_probe,
- .remove = ndtest_remove,
+ .remove_new = ndtest_remove,
.driver = {
.name = KBUILD_MODNAME,
},
diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h
index 2c54c9cbb9..8f27ad6f73 100644
--- a/tools/testing/nvdimm/test/ndtest.h
+++ b/tools/testing/nvdimm/test/ndtest.h
@@ -5,37 +5,6 @@
#include <linux/platform_device.h>
#include <linux/libnvdimm.h>
-/* SCM device is unable to persist memory contents */
-#define PAPR_PMEM_UNARMED (1ULL << (63 - 0))
-/* SCM device failed to persist memory contents */
-#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1))
-/* SCM device contents are not persisted from previous IPL */
-#define PAPR_PMEM_EMPTY (1ULL << (63 - 3))
-#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4))
-/* SCM device will be garded off next IPL due to failure */
-#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5))
-/* SCM contents cannot persist due to current platform health status */
-#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6))
-
-/* Bits status indicators for health bitmap indicating unarmed dimm */
-#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \
- PAPR_PMEM_HEALTH_UNHEALTHY)
-
-#define PAPR_PMEM_SAVE_FAILED (1ULL << (63 - 10))
-
-/* Bits status indicators for health bitmap indicating unflushed dimm */
-#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY)
-
-/* Bits status indicators for health bitmap indicating unrestored dimm */
-#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY)
-
-/* Bit status indicators for smart event notification */
-#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \
- PAPR_PMEM_HEALTH_FATAL | \
- PAPR_PMEM_HEALTH_UNHEALTHY)
-
-#define PAPR_PMEM_SAVE_MASK (PAPR_PMEM_SAVE_FAILED)
-
struct ndtest_config;
struct ndtest_priv {
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index e150483365..9039f3709a 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -17,8 +17,10 @@ TARGETS += devices
TARGETS += dmabuf-heaps
TARGETS += drivers/dma-buf
TARGETS += drivers/s390x/uvdevice
+TARGETS += drivers/net
TARGETS += drivers/net/bonding
TARGETS += drivers/net/team
+TARGETS += drivers/net/virtio_net
TARGETS += dt
TARGETS += efivarfs
TARGETS += exec
@@ -63,7 +65,7 @@ TARGETS += net/hsr
TARGETS += net/mptcp
TARGETS += net/openvswitch
TARGETS += net/tcp_ao
-TARGETS += netfilter
+TARGETS += net/netfilter
TARGETS += nsfs
TARGETS += perf_events
TARGETS += pidfd
@@ -116,6 +118,13 @@ TARGETS += zram
TARGETS_HOTPLUG = cpu-hotplug
TARGETS_HOTPLUG += memory-hotplug
+# Networking tests want the net/lib target, include it automatically
+ifneq ($(filter net drivers/net drivers/net/hw,$(TARGETS)),)
+ifeq ($(filter net/lib,$(TARGETS)),)
+ INSTALL_DEP_TARGETS := net/lib
+endif
+endif
+
# User can optionally provide a TARGETS skiplist. By default we skip
# BPF since it has cutting edge build time dependencies which require
# more effort to install.
@@ -245,7 +254,7 @@ ifdef INSTALL_PATH
install -m 744 run_kselftest.sh $(INSTALL_PATH)/
rm -f $(TEST_LIST)
@ret=1; \
- for TARGET in $(TARGETS); do \
+ for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install \
INSTALL_PATH=$(INSTALL_PATH)/$$TARGET \
diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c
index 89e3656a04..e2b3a5810f 100644
--- a/tools/testing/selftests/alsa/conf.c
+++ b/tools/testing/selftests/alsa/conf.c
@@ -105,7 +105,7 @@ static struct card_cfg_data *conf_data_by_card(int card, bool msg)
return NULL;
}
-static int dump_config_tree(snd_config_t *top)
+static void dump_config_tree(snd_config_t *top)
{
snd_output_t *out;
int err;
diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c
index 02ee3a91b7..285c47dd42 100644
--- a/tools/testing/selftests/arm64/abi/tpidr2.c
+++ b/tools/testing/selftests/arm64/abi/tpidr2.c
@@ -262,7 +262,7 @@ static int write_clone_read(void)
int main(int argc, char **argv)
{
- int ret, i;
+ int ret;
putstr("TAP version 13\n");
putstr("1..");
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index f1aebabfb0..5025401323 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -17,7 +17,6 @@ test_dev_cgroup
test_verifier_log
feature
test_sock
-test_sock_addr
urandom_read
test_sockmap
test_lirc_mode2_user
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index d8ade15e27..0445ac38bc 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -10,5 +10,3 @@ fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_mu
fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95
missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
-verifier_arena # JIT does not support arena
-arena_htab # JIT does not support arena
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index f4a2f66a68..c34adf39ee 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -6,3 +6,4 @@ stacktrace_build_id # compare_map_keys stackid_hmap vs. sta
verifier_iterating_callbacks
verifier_arena # JIT does not support arena
arena_htab # JIT does not support arena
+arena_atomics
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3b9eb40d63..dd49c1d23a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -53,6 +53,7 @@ progs/syscall.c-CFLAGS := -fno-strict-aliasing
progs/test_pkt_md_access.c-CFLAGS := -fno-strict-aliasing
progs/test_sk_lookup.c-CFLAGS := -fno-strict-aliasing
progs/timer_crash.c-CFLAGS := -fno-strict-aliasing
+progs/test_global_func9.c-CFLAGS := -fno-strict-aliasing
ifneq ($(LLVM),)
# Silence some warnings when compiled with clang
@@ -81,11 +82,24 @@ TEST_INST_SUBDIRS += bpf_gcc
# The following tests contain C code that, although technically legal,
# triggers GCC warnings that cannot be disabled: declaration of
# anonymous struct types in function parameter lists.
-progs/btf_dump_test_case_bitfields.c-CFLAGS := -Wno-error
-progs/btf_dump_test_case_namespacing.c-CFLAGS := -Wno-error
-progs/btf_dump_test_case_packing.c-CFLAGS := -Wno-error
-progs/btf_dump_test_case_padding.c-CFLAGS := -Wno-error
-progs/btf_dump_test_case_syntax.c-CFLAGS := -Wno-error
+progs/btf_dump_test_case_bitfields.c-bpf_gcc-CFLAGS := -Wno-error
+progs/btf_dump_test_case_namespacing.c-bpf_gcc-CFLAGS := -Wno-error
+progs/btf_dump_test_case_packing.c-bpf_gcc-CFLAGS := -Wno-error
+progs/btf_dump_test_case_padding.c-bpf_gcc-CFLAGS := -Wno-error
+progs/btf_dump_test_case_syntax.c-bpf_gcc-CFLAGS := -Wno-error
+
+# The following tests do type-punning, via the __imm_insn macro, from
+# `struct bpf_insn' to long and then uses the value. This triggers an
+# "is used uninitialized" warning in GCC due to strict-aliasing
+# rules.
+progs/verifier_ref_tracking.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
+progs/verifier_unpriv.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
+progs/verifier_cgroup_storage.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
+progs/verifier_ld_ind.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
+progs/verifier_map_ret_val.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
+progs/verifier_spill_fill.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
+progs/verifier_subprog_precision.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
+progs/verifier_uninit.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
endif
ifneq ($(CLANG_CPUV4),)
@@ -102,8 +116,6 @@ TEST_PROGS := test_kmod.sh \
test_xdp_redirect_multi.sh \
test_xdp_meta.sh \
test_xdp_veth.sh \
- test_offload.py \
- test_sock_addr.sh \
test_tunnel.sh \
test_lwt_seg6local.sh \
test_lirc_mode2.sh \
@@ -128,7 +140,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
test_xdp_vlan.sh test_bpftool.py
# Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
+TEST_GEN_PROGS_EXTENDED = test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \
@@ -136,18 +148,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi
-# Emit succinct information message describing current building step
-# $1 - generic step name (e.g., CC, LINK, etc);
-# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor];
-# $3 - target (assumed to be file); only file name will be emitted;
-# $4 - optional extra arg, emitted as-is, if provided.
-ifeq ($(V),1)
-Q =
-msg =
-else
-Q = @
-msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))";
-MAKEFLAGS += --no-print-directory
+ifneq ($(V),1)
submake_extras := feature_display=0
endif
@@ -274,7 +275,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \
BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
- BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf \
+ BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/ \
BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \
EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \
EXTRA_LDFLAGS='$(SAN_LDFLAGS)' && \
@@ -290,11 +291,11 @@ UNPRIV_HELPERS := $(OUTPUT)/unpriv_helpers.o
TRACE_HELPERS := $(OUTPUT)/trace_helpers.o
JSON_WRITER := $(OUTPUT)/json_writer.o
CAP_HELPERS := $(OUTPUT)/cap_helpers.o
+NETWORK_HELPERS := $(OUTPUT)/network_helpers.o
$(OUTPUT)/test_dev_cgroup: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_skb_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_sock: $(CGROUP_HELPERS) $(TESTING_HELPERS)
-$(OUTPUT)/test_sock_addr: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS)
$(OUTPUT)/get_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS)
@@ -308,6 +309,7 @@ $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS)
$(OUTPUT)/test_maps: $(TESTING_HELPERS)
$(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) $(UNPRIV_HELPERS)
$(OUTPUT)/xsk.o: $(BPFOBJ)
+$(OUTPUT)/test_tcp_check_syncookie_user: $(NETWORK_HELPERS)
BPFTOOL ?= $(DEFAULT_BPFTOOL)
$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
@@ -442,7 +444,7 @@ endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
$(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
- $(Q)$(BPF_GCC) $3 -O2 -c $1 -o $2
+ $(Q)$(BPF_GCC) $3 -DBPF_NO_PRESERVE_ACCESS_INDEX -Wno-attributes -O2 -c $1 -o $2
endef
SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
@@ -455,7 +457,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \
trace_printk.c trace_vprintk.c map_ptr_kern.c \
core_kern.c core_kern_overflow.c test_ringbuf.c \
- test_ringbuf_map_key.c
+ test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
# Generate both light skeleton and libbpf skeleton for these
LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
@@ -481,7 +483,7 @@ LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(ske
# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
# Parameters:
# $1 - test runner base binary name (e.g., test_progs)
-# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, gcc-bpf, etc)
+# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc)
define DEFINE_TEST_RUNNER
TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2
@@ -509,7 +511,7 @@ endef
# Using TRUNNER_XXX variables, provided by callers of DEFINE_TEST_RUNNER and
# set up by DEFINE_TEST_RUNNER itself, create test runner build rules with:
# $1 - test runner base binary name (e.g., test_progs)
-# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, gcc-bpf, etc)
+# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc)
define DEFINE_TEST_RUNNER_RULES
ifeq ($($(TRUNNER_OUTPUT)-dir),)
@@ -532,7 +534,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.bpf.o: \
| $(TRUNNER_OUTPUT) $$(BPFOBJ)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS) \
- $$($$<-CFLAGS))
+ $$($$<-CFLAGS) \
+ $$($$<-$2-CFLAGS))
$(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
@@ -658,7 +661,7 @@ $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
# Define test_progs-cpuv4 test runner.
ifneq ($(CLANG_CPUV4),)
TRUNNER_BPF_BUILD_RULE := CLANG_CPUV4_BPF_BUILD_RULE
-TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS
$(eval $(call DEFINE_TEST_RUNNER,test_progs,cpuv4))
endif
@@ -695,7 +698,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
# Include find_bit.c to compile xskxceiver.
EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c
-$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
+$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
@@ -729,6 +732,7 @@ $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tas
$(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.skel.h
$(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
$(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h
+$(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -748,6 +752,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_bpf_hashmap_lookup.o \
$(OUTPUT)/bench_local_storage_create.o \
$(OUTPUT)/bench_htab_mem.o \
+ $(OUTPUT)/bench_bpf_crypto.o \
#
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
@@ -759,7 +764,7 @@ $(OUTPUT)/veristat: $(OUTPUT)/veristat.o
$(OUTPUT)/uprobe_multi: uprobe_multi.c
$(call msg,BINARY,,$@)
- $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@
+ $(Q)$(CC) $(CFLAGS) -O0 $(LDFLAGS) $^ $(LDLIBS) -o $@
EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index b2b4c391eb..627b74ae04 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -280,6 +280,8 @@ extern struct argp bench_strncmp_argp;
extern struct argp bench_hashmap_lookup_argp;
extern struct argp bench_local_storage_create_argp;
extern struct argp bench_htab_mem_argp;
+extern struct argp bench_trigger_batch_argp;
+extern struct argp bench_crypto_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -292,6 +294,8 @@ static const struct argp_child bench_parsers[] = {
{ &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
{ &bench_local_storage_create_argp, 0, "local-storage-create benchmark", 0 },
{ &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 },
+ { &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
+ { &bench_crypto_argp, 0, "bpf crypto benchmark", 0 },
{},
};
@@ -491,24 +495,31 @@ extern const struct bench bench_rename_kretprobe;
extern const struct bench bench_rename_rawtp;
extern const struct bench bench_rename_fentry;
extern const struct bench bench_rename_fexit;
-extern const struct bench bench_trig_base;
-extern const struct bench bench_trig_tp;
-extern const struct bench bench_trig_rawtp;
+
+/* pure counting benchmarks to establish theoretical lmits */
+extern const struct bench bench_trig_usermode_count;
+extern const struct bench bench_trig_syscall_count;
+extern const struct bench bench_trig_kernel_count;
+
+/* batched, staying mostly in-kernel benchmarks */
extern const struct bench bench_trig_kprobe;
extern const struct bench bench_trig_kretprobe;
extern const struct bench bench_trig_kprobe_multi;
extern const struct bench bench_trig_kretprobe_multi;
extern const struct bench bench_trig_fentry;
extern const struct bench bench_trig_fexit;
-extern const struct bench bench_trig_fentry_sleep;
extern const struct bench bench_trig_fmodret;
-extern const struct bench bench_trig_uprobe_base;
+extern const struct bench bench_trig_tp;
+extern const struct bench bench_trig_rawtp;
+
+/* uprobe/uretprobe benchmarks */
extern const struct bench bench_trig_uprobe_nop;
extern const struct bench bench_trig_uretprobe_nop;
extern const struct bench bench_trig_uprobe_push;
extern const struct bench bench_trig_uretprobe_push;
extern const struct bench bench_trig_uprobe_ret;
extern const struct bench bench_trig_uretprobe_ret;
+
extern const struct bench bench_rb_libbpf;
extern const struct bench bench_rb_custom;
extern const struct bench bench_pb_libbpf;
@@ -529,6 +540,8 @@ extern const struct bench bench_local_storage_tasks_trace;
extern const struct bench bench_bpf_hashmap_lookup;
extern const struct bench bench_local_storage_create;
extern const struct bench bench_htab_mem;
+extern const struct bench bench_crypto_encrypt;
+extern const struct bench bench_crypto_decrypt;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -539,24 +552,28 @@ static const struct bench *benchs[] = {
&bench_rename_rawtp,
&bench_rename_fentry,
&bench_rename_fexit,
- &bench_trig_base,
- &bench_trig_tp,
- &bench_trig_rawtp,
+ /* pure counting benchmarks for establishing theoretical limits */
+ &bench_trig_usermode_count,
+ &bench_trig_kernel_count,
+ &bench_trig_syscall_count,
+ /* batched, staying mostly in-kernel triggers */
&bench_trig_kprobe,
&bench_trig_kretprobe,
&bench_trig_kprobe_multi,
&bench_trig_kretprobe_multi,
&bench_trig_fentry,
&bench_trig_fexit,
- &bench_trig_fentry_sleep,
&bench_trig_fmodret,
- &bench_trig_uprobe_base,
+ &bench_trig_tp,
+ &bench_trig_rawtp,
+ /* uprobes */
&bench_trig_uprobe_nop,
&bench_trig_uretprobe_nop,
&bench_trig_uprobe_push,
&bench_trig_uretprobe_push,
&bench_trig_uprobe_ret,
&bench_trig_uretprobe_ret,
+ /* ringbuf/perfbuf benchmarks */
&bench_rb_libbpf,
&bench_rb_custom,
&bench_pb_libbpf,
@@ -577,6 +594,8 @@ static const struct bench *benchs[] = {
&bench_bpf_hashmap_lookup,
&bench_local_storage_create,
&bench_htab_mem,
+ &bench_crypto_encrypt,
+ &bench_crypto_decrypt,
};
static void find_benchmark(void)
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c b/tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c
new file mode 100644
index 0000000000..2845edaba8
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <argp.h>
+#include "bench.h"
+#include "crypto_bench.skel.h"
+
+#define MAX_CIPHER_LEN 32
+static char *input;
+static struct crypto_ctx {
+ struct crypto_bench *skel;
+ int pfd;
+} ctx;
+
+static struct crypto_args {
+ u32 crypto_len;
+ char *crypto_cipher;
+} args = {
+ .crypto_len = 16,
+ .crypto_cipher = "ecb(aes)",
+};
+
+enum {
+ ARG_CRYPTO_LEN = 5000,
+ ARG_CRYPTO_CIPHER = 5001,
+};
+
+static const struct argp_option opts[] = {
+ { "crypto-len", ARG_CRYPTO_LEN, "CRYPTO_LEN", 0,
+ "Set the length of crypto buffer" },
+ { "crypto-cipher", ARG_CRYPTO_CIPHER, "CRYPTO_CIPHER", 0,
+ "Set the cipher to use (default:ecb(aes))" },
+ {},
+};
+
+static error_t crypto_parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_CRYPTO_LEN:
+ args.crypto_len = strtoul(arg, NULL, 10);
+ if (!args.crypto_len ||
+ args.crypto_len > sizeof(ctx.skel->bss->dst)) {
+ fprintf(stderr, "Invalid crypto buffer len (limit %zu)\n",
+ sizeof(ctx.skel->bss->dst));
+ argp_usage(state);
+ }
+ break;
+ case ARG_CRYPTO_CIPHER:
+ args.crypto_cipher = strdup(arg);
+ if (!strlen(args.crypto_cipher) ||
+ strlen(args.crypto_cipher) > MAX_CIPHER_LEN) {
+ fprintf(stderr, "Invalid crypto cipher len (limit %d)\n",
+ MAX_CIPHER_LEN);
+ argp_usage(state);
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_crypto_argp = {
+ .options = opts,
+ .parser = crypto_parse_arg,
+};
+
+static void crypto_validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "bpf crypto benchmark doesn't support consumer!\n");
+ exit(1);
+ }
+}
+
+static void crypto_setup(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ int err, pfd;
+ size_t i, sz;
+
+ sz = args.crypto_len;
+ if (!sz || sz > sizeof(ctx.skel->bss->dst)) {
+ fprintf(stderr, "invalid encrypt buffer size (source %zu, target %zu)\n",
+ sz, sizeof(ctx.skel->bss->dst));
+ exit(1);
+ }
+
+ setup_libbpf();
+
+ ctx.skel = crypto_bench__open();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ snprintf(ctx.skel->bss->cipher, 128, "%s", args.crypto_cipher);
+ memcpy(ctx.skel->bss->key, "12345678testtest", 16);
+ ctx.skel->bss->key_len = 16;
+ ctx.skel->bss->authsize = 0;
+
+ srandom(time(NULL));
+ input = malloc(sz);
+ for (i = 0; i < sz - 1; i++)
+ input[i] = '1' + random() % 9;
+ input[sz - 1] = '\0';
+
+ ctx.skel->rodata->len = args.crypto_len;
+
+ err = crypto_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to load skeleton\n");
+ crypto_bench__destroy(ctx.skel);
+ exit(1);
+ }
+
+ pfd = bpf_program__fd(ctx.skel->progs.crypto_setup);
+ if (pfd < 0) {
+ fprintf(stderr, "failed to get fd for setup prog\n");
+ crypto_bench__destroy(ctx.skel);
+ exit(1);
+ }
+
+ err = bpf_prog_test_run_opts(pfd, &opts);
+ if (err || ctx.skel->bss->status) {
+ fprintf(stderr, "failed to run setup prog: err %d, status %d\n",
+ err, ctx.skel->bss->status);
+ crypto_bench__destroy(ctx.skel);
+ exit(1);
+ }
+}
+
+static void crypto_encrypt_setup(void)
+{
+ crypto_setup();
+ ctx.pfd = bpf_program__fd(ctx.skel->progs.crypto_encrypt);
+}
+
+static void crypto_decrypt_setup(void)
+{
+ crypto_setup();
+ ctx.pfd = bpf_program__fd(ctx.skel->progs.crypto_decrypt);
+}
+
+static void crypto_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+static void *crypto_producer(void *unused)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .repeat = 64,
+ .data_in = input,
+ .data_size_in = args.crypto_len,
+ );
+
+ while (true)
+ (void)bpf_prog_test_run_opts(ctx.pfd, &opts);
+ return NULL;
+}
+
+const struct bench bench_crypto_encrypt = {
+ .name = "crypto-encrypt",
+ .argp = &bench_crypto_argp,
+ .validate = crypto_validate,
+ .setup = crypto_encrypt_setup,
+ .producer_thread = crypto_producer,
+ .measure = crypto_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_crypto_decrypt = {
+ .name = "crypto-decrypt",
+ .argp = &bench_crypto_argp,
+ .validate = crypto_validate,
+ .setup = crypto_decrypt_setup,
+ .producer_thread = crypto_producer,
+ .measure = crypto_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
index b36de42ee4..e2ff8ea1cb 100644
--- a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
@@ -186,7 +186,7 @@ static void *task_producer(void *input)
for (i = 0; i < batch_sz; i++) {
if (!pthd_results[i])
- pthread_join(pthds[i], NULL);;
+ pthread_join(pthds[i], NULL);
}
}
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index ace0d1011a..4b05539f16 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -1,15 +1,95 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <argp.h>
+#include <unistd.h>
+#include <stdint.h>
#include "bench.h"
#include "trigger_bench.skel.h"
#include "trace_helpers.h"
+#define MAX_TRIG_BATCH_ITERS 1000
+
+static struct {
+ __u32 batch_iters;
+} args = {
+ .batch_iters = 100,
+};
+
+enum {
+ ARG_TRIG_BATCH_ITERS = 7000,
+};
+
+static const struct argp_option opts[] = {
+ { "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0,
+ "Number of in-kernel iterations per one driver test run"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_TRIG_BATCH_ITERS:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) {
+ fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n",
+ 1, MAX_TRIG_BATCH_ITERS);
+ argp_usage(state);
+ }
+ args.batch_iters = ret;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_trigger_batch_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+/* adjust slot shift in inc_hits() if changing */
+#define MAX_BUCKETS 256
+
+#pragma GCC diagnostic ignored "-Wattributes"
+
/* BPF triggering benchmarks */
static struct trigger_ctx {
struct trigger_bench *skel;
+ bool usermode_counters;
+ int driver_prog_fd;
} ctx;
-static struct counter base_hits;
+static struct counter base_hits[MAX_BUCKETS];
+
+static __always_inline void inc_counter(struct counter *counters)
+{
+ static __thread int tid = 0;
+ unsigned slot;
+
+ if (unlikely(tid == 0))
+ tid = syscall(SYS_gettid);
+
+ /* multiplicative hashing, it's fast */
+ slot = 2654435769U * tid;
+ slot >>= 24;
+
+ atomic_inc(&base_hits[slot].value); /* use highest byte as an index */
+}
+
+static long sum_and_reset_counters(struct counter *counters)
+{
+ int i;
+ long sum = 0;
+
+ for (i = 0; i < MAX_BUCKETS; i++)
+ sum += atomic_swap(&counters[i].value, 0);
+ return sum;
+}
static void trigger_validate(void)
{
@@ -19,41 +99,63 @@ static void trigger_validate(void)
}
}
-static void *trigger_base_producer(void *input)
+static void *trigger_producer(void *input)
{
- while (true) {
- (void)syscall(__NR_getpgid);
- atomic_inc(&base_hits.value);
+ if (ctx.usermode_counters) {
+ while (true) {
+ (void)syscall(__NR_getpgid);
+ inc_counter(base_hits);
+ }
+ } else {
+ while (true)
+ (void)syscall(__NR_getpgid);
}
return NULL;
}
-static void trigger_base_measure(struct bench_res *res)
+static void *trigger_producer_batch(void *input)
{
- res->hits = atomic_swap(&base_hits.value, 0);
-}
+ int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver);
-static void *trigger_producer(void *input)
-{
while (true)
- (void)syscall(__NR_getpgid);
+ bpf_prog_test_run_opts(fd, NULL);
+
return NULL;
}
static void trigger_measure(struct bench_res *res)
{
- res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+ if (ctx.usermode_counters)
+ res->hits = sum_and_reset_counters(base_hits);
+ else
+ res->hits = sum_and_reset_counters(ctx.skel->bss->hits);
}
static void setup_ctx(void)
{
setup_libbpf();
- ctx.skel = trigger_bench__open_and_load();
+ ctx.skel = trigger_bench__open();
if (!ctx.skel) {
fprintf(stderr, "failed to open skeleton\n");
exit(1);
}
+
+ /* default "driver" BPF program */
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);
+
+ ctx.skel->rodata->batch_iters = args.batch_iters;
+}
+
+static void load_ctx(void)
+{
+ int err;
+
+ err = trigger_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
}
static void attach_bpf(struct bpf_program *prog)
@@ -67,64 +169,104 @@ static void attach_bpf(struct bpf_program *prog)
}
}
-static void trigger_tp_setup(void)
+static void trigger_syscall_count_setup(void)
{
- setup_ctx();
- attach_bpf(ctx.skel->progs.bench_trigger_tp);
+ ctx.usermode_counters = true;
}
-static void trigger_rawtp_setup(void)
+/* Batched, staying mostly in-kernel triggering setups */
+static void trigger_kernel_count_setup(void)
{
setup_ctx();
- attach_bpf(ctx.skel->progs.bench_trigger_raw_tp);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_count, true);
+ load_ctx();
+ /* override driver program */
+ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
}
static void trigger_kprobe_setup(void)
{
setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true);
+ load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
}
static void trigger_kretprobe_setup(void)
{
setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true);
+ load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kretprobe);
}
static void trigger_kprobe_multi_setup(void)
{
setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true);
+ load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi);
}
static void trigger_kretprobe_multi_setup(void)
{
setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true);
+ load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi);
}
static void trigger_fentry_setup(void)
{
setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true);
+ load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
static void trigger_fexit_setup(void)
{
setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true);
+ load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fexit);
}
-static void trigger_fentry_sleep_setup(void)
+static void trigger_fmodret_setup(void)
{
setup_ctx();
- attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true);
+ load_ctx();
+ /* override driver program */
+ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
+ attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
}
-static void trigger_fmodret_setup(void)
+static void trigger_tp_setup(void)
{
setup_ctx();
- attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true);
+ load_ctx();
+ /* override driver program */
+ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
+ attach_bpf(ctx.skel->progs.bench_trigger_tp);
+}
+
+static void trigger_rawtp_setup(void)
+{
+ setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true);
+ load_ctx();
+ /* override driver program */
+ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
+ attach_bpf(ctx.skel->progs.bench_trigger_rawtp);
}
/* make sure call is not inlined and not avoided by compiler, so __weak and
@@ -137,7 +279,7 @@ static void trigger_fmodret_setup(void)
* GCC doesn't generate stack setup preample for these functions due to them
* having no input arguments and doing nothing in the body.
*/
-__weak void uprobe_target_nop(void)
+__nocf_check __weak void uprobe_target_nop(void)
{
asm volatile ("nop");
}
@@ -146,7 +288,7 @@ __weak void opaque_noop_func(void)
{
}
-__weak int uprobe_target_push(void)
+__nocf_check __weak int uprobe_target_push(void)
{
/* overhead of function call is negligible compared to uprobe
* triggering, so this shouldn't affect benchmark results much
@@ -155,16 +297,16 @@ __weak int uprobe_target_push(void)
return 1;
}
-__weak void uprobe_target_ret(void)
+__nocf_check __weak void uprobe_target_ret(void)
{
asm volatile ("");
}
-static void *uprobe_base_producer(void *input)
+static void *uprobe_producer_count(void *input)
{
while (true) {
uprobe_target_nop();
- atomic_inc(&base_hits.value);
+ inc_counter(base_hits);
}
return NULL;
}
@@ -194,15 +336,24 @@ static void usetup(bool use_retprobe, void *target_addr)
{
size_t uprobe_offset;
struct bpf_link *link;
+ int err;
setup_libbpf();
- ctx.skel = trigger_bench__open_and_load();
+ ctx.skel = trigger_bench__open();
if (!ctx.skel) {
fprintf(stderr, "failed to open skeleton\n");
exit(1);
}
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
+
+ err = trigger_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to load skeleton\n");
+ exit(1);
+ }
+
uprobe_offset = get_uprobe_offset(target_addr);
link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
use_retprobe,
@@ -216,204 +367,90 @@ static void usetup(bool use_retprobe, void *target_addr)
ctx.skel->links.bench_trigger_uprobe = link;
}
-static void uprobe_setup_nop(void)
+static void usermode_count_setup(void)
+{
+ ctx.usermode_counters = true;
+}
+
+static void uprobe_nop_setup(void)
{
usetup(false, &uprobe_target_nop);
}
-static void uretprobe_setup_nop(void)
+static void uretprobe_nop_setup(void)
{
usetup(true, &uprobe_target_nop);
}
-static void uprobe_setup_push(void)
+static void uprobe_push_setup(void)
{
usetup(false, &uprobe_target_push);
}
-static void uretprobe_setup_push(void)
+static void uretprobe_push_setup(void)
{
usetup(true, &uprobe_target_push);
}
-static void uprobe_setup_ret(void)
+static void uprobe_ret_setup(void)
{
usetup(false, &uprobe_target_ret);
}
-static void uretprobe_setup_ret(void)
+static void uretprobe_ret_setup(void)
{
usetup(true, &uprobe_target_ret);
}
-const struct bench bench_trig_base = {
- .name = "trig-base",
+const struct bench bench_trig_syscall_count = {
+ .name = "trig-syscall-count",
.validate = trigger_validate,
- .producer_thread = trigger_base_producer,
- .measure = trigger_base_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_tp = {
- .name = "trig-tp",
- .validate = trigger_validate,
- .setup = trigger_tp_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_rawtp = {
- .name = "trig-rawtp",
- .validate = trigger_validate,
- .setup = trigger_rawtp_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_kprobe = {
- .name = "trig-kprobe",
- .validate = trigger_validate,
- .setup = trigger_kprobe_setup,
+ .setup = trigger_syscall_count_setup,
.producer_thread = trigger_producer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
-const struct bench bench_trig_kretprobe = {
- .name = "trig-kretprobe",
- .validate = trigger_validate,
- .setup = trigger_kretprobe_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_kprobe_multi = {
- .name = "trig-kprobe-multi",
- .validate = trigger_validate,
- .setup = trigger_kprobe_multi_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_kretprobe_multi = {
- .name = "trig-kretprobe-multi",
- .validate = trigger_validate,
- .setup = trigger_kretprobe_multi_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_fentry = {
- .name = "trig-fentry",
- .validate = trigger_validate,
- .setup = trigger_fentry_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_fexit = {
- .name = "trig-fexit",
- .validate = trigger_validate,
- .setup = trigger_fexit_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_fentry_sleep = {
- .name = "trig-fentry-sleep",
- .validate = trigger_validate,
- .setup = trigger_fentry_sleep_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_fmodret = {
- .name = "trig-fmodret",
- .validate = trigger_validate,
- .setup = trigger_fmodret_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uprobe_base = {
- .name = "trig-uprobe-base",
- .setup = NULL, /* no uprobe/uretprobe is attached */
- .producer_thread = uprobe_base_producer,
- .measure = trigger_base_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uprobe_nop = {
- .name = "trig-uprobe-nop",
- .setup = uprobe_setup_nop,
- .producer_thread = uprobe_producer_nop,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uretprobe_nop = {
- .name = "trig-uretprobe-nop",
- .setup = uretprobe_setup_nop,
- .producer_thread = uprobe_producer_nop,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uprobe_push = {
- .name = "trig-uprobe-push",
- .setup = uprobe_setup_push,
- .producer_thread = uprobe_producer_push,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uretprobe_push = {
- .name = "trig-uretprobe-push",
- .setup = uretprobe_setup_push,
- .producer_thread = uprobe_producer_push,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uprobe_ret = {
- .name = "trig-uprobe-ret",
- .setup = uprobe_setup_ret,
- .producer_thread = uprobe_producer_ret,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uretprobe_ret = {
- .name = "trig-uretprobe-ret",
- .setup = uretprobe_setup_ret,
- .producer_thread = uprobe_producer_ret,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
+/* batched (staying mostly in kernel) kprobe/fentry benchmarks */
+#define BENCH_TRIG_KERNEL(KIND, NAME) \
+const struct bench bench_trig_##KIND = { \
+ .name = "trig-" NAME, \
+ .setup = trigger_##KIND##_setup, \
+ .producer_thread = trigger_producer_batch, \
+ .measure = trigger_measure, \
+ .report_progress = hits_drops_report_progress, \
+ .report_final = hits_drops_report_final, \
+ .argp = &bench_trigger_batch_argp, \
+}
+
+BENCH_TRIG_KERNEL(kernel_count, "kernel-count");
+BENCH_TRIG_KERNEL(kprobe, "kprobe");
+BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
+BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
+BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
+BENCH_TRIG_KERNEL(fentry, "fentry");
+BENCH_TRIG_KERNEL(fexit, "fexit");
+BENCH_TRIG_KERNEL(fmodret, "fmodret");
+BENCH_TRIG_KERNEL(tp, "tp");
+BENCH_TRIG_KERNEL(rawtp, "rawtp");
+
+/* uprobe benchmarks */
+#define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \
+const struct bench bench_trig_##KIND = { \
+ .name = "trig-" NAME, \
+ .validate = trigger_validate, \
+ .setup = KIND##_setup, \
+ .producer_thread = uprobe_producer_##PRODUCER, \
+ .measure = trigger_measure, \
+ .report_progress = hits_drops_report_progress, \
+ .report_final = hits_drops_report_final, \
+}
+
+BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count");
+BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop");
+BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push");
+BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret");
+BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop");
+BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push");
+BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret");
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
index 78e83f2432..a690f5a68b 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -2,8 +2,22 @@
set -eufo pipefail
-for i in base tp rawtp kprobe fentry fmodret
-do
- summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
- printf "%-10s: %s\n" $i "$summary"
+def_tests=( \
+ usermode-count kernel-count syscall-count \
+ fentry fexit fmodret \
+ rawtp tp \
+ kprobe kprobe-multi \
+ kretprobe kretprobe-multi \
+)
+
+tests=("$@")
+if [ ${#tests[@]} -eq 0 ]; then
+ tests=("${def_tests[@]}")
+fi
+
+p=${PROD_CNT:-1}
+
+for t in "${tests[@]}"; do
+ summary=$(sudo ./bench -w2 -d5 -a -p$p trig-$t | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+ printf "%-15s: %s\n" $t "$summary"
done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
index 9bdcc74e03..af169f831f 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -2,7 +2,7 @@
set -eufo pipefail
-for i in base {uprobe,uretprobe}-{nop,push,ret}
+for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret}
do
summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
printf "%-15s: %s\n" $i "$summary"
diff --git a/tools/testing/selftests/bpf/bpf_arena_list.h b/tools/testing/selftests/bpf/bpf_arena_list.h
index b99b9f408e..85dbc3ea4d 100644
--- a/tools/testing/selftests/bpf/bpf_arena_list.h
+++ b/tools/testing/selftests/bpf/bpf_arena_list.h
@@ -29,6 +29,7 @@ static inline void *bpf_iter_num_new(struct bpf_iter_num *it, int i, int j) { re
static inline void bpf_iter_num_destroy(struct bpf_iter_num *it) {}
static inline bool bpf_iter_num_next(struct bpf_iter_num *it) { return true; }
#define cond_break ({})
+#define can_loop true
#endif
/* Safely walk link list elements. Deletion of elements is allowed. */
@@ -36,8 +37,7 @@ static inline bool bpf_iter_num_next(struct bpf_iter_num *it) { return true; }
for (void * ___tmp = (pos = list_entry_safe((head)->first, \
typeof(*(pos)), member), \
(void *)0); \
- pos && ({ ___tmp = (void *)pos->member.next; 1; }); \
- cond_break, \
+ pos && ({ ___tmp = (void *)pos->member.next; 1; }) && can_loop; \
pos = list_entry_safe((void __arena *)___tmp, typeof(*(pos)), member))
static inline void list_add_head(arena_list_node_t *n, arena_list_head_t *h)
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index a5b9df38c1..3d9e4b8c6b 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -326,9 +326,48 @@ l_true: \
})
#endif
+/*
+ * Note that cond_break can only be portably used in the body of a breakable
+ * construct, whereas can_loop can be used anywhere.
+ */
+#ifdef __BPF_FEATURE_MAY_GOTO
+#define can_loop \
+ ({ __label__ l_break, l_continue; \
+ bool ret = true; \
+ asm volatile goto("may_goto %l[l_break]" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: ret = false; \
+ l_continue:; \
+ ret; \
+ })
+
+#define cond_break \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("may_goto %l[l_break]" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: break; \
+ l_continue:; \
+ })
+#else
+#define can_loop \
+ ({ __label__ l_break, l_continue; \
+ bool ret = true; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: ret = false; \
+ l_continue:; \
+ ret; \
+ })
+
#define cond_break \
({ __label__ l_break, l_continue; \
- asm volatile goto("1:.byte 0xe5; \
+ asm volatile goto("1:.byte 0xe5; \
.byte 0; \
.long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
.short 0" \
@@ -337,6 +376,7 @@ l_true: \
l_break: break; \
l_continue:; \
})
+#endif
#ifndef bpf_nop_mov
#define bpf_nop_mov(var) \
@@ -386,6 +426,28 @@ l_true: \
, [as]"i"((dst_as << 16) | src_as));
#endif
+void bpf_preempt_disable(void) __weak __ksym;
+void bpf_preempt_enable(void) __weak __ksym;
+
+typedef struct {
+} __bpf_preempt_t;
+
+static inline __bpf_preempt_t __bpf_preempt_constructor(void)
+{
+ __bpf_preempt_t ret = {};
+
+ bpf_preempt_disable();
+ return ret;
+}
+static inline void __bpf_preempt_destructor(__bpf_preempt_t *t)
+{
+ bpf_preempt_enable();
+}
+#define bpf_guard_preempt() \
+ __bpf_preempt_t ___bpf_apply(preempt, __COUNTER__) \
+ __attribute__((__unused__, __cleanup__(__bpf_preempt_destructor))) = \
+ __bpf_preempt_constructor()
+
/* Description
* Assert that a conditional expression is true.
* Returns
@@ -459,4 +521,11 @@ extern int bpf_iter_css_new(struct bpf_iter_css *it,
extern struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym;
extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;
+extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
+extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
+extern int bpf_wq_set_callback_impl(struct bpf_wq *wq,
+ int (callback_fn)(void *map, int *key, struct bpf_wq *wq),
+ unsigned int flags__k, void *aux__ign) __ksym;
+#define bpf_wq_set_callback(timer, cb, flags) \
+ bpf_wq_set_callback_impl(timer, cb, flags, NULL)
#endif
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 14ebe7d9e1..3b6675ab40 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -75,4 +75,7 @@ extern void bpf_key_put(struct bpf_key *key) __ksym;
extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
struct bpf_dynptr *sig_ptr,
struct bpf_key *trusted_keyring) __ksym;
+
+extern bool bpf_session_is_return(void) __ksym __weak;
+extern __u64 *bpf_session_cookie(void) __ksym __weak;
#endif
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
deleted file mode 100644
index 82a7c9de95..0000000000
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ /dev/null
@@ -1,241 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __BPF_TCP_HELPERS_H
-#define __BPF_TCP_HELPERS_H
-
-#include <stdbool.h>
-#include <linux/types.h>
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_core_read.h>
-#include <bpf/bpf_tracing.h>
-
-#define BPF_STRUCT_OPS(name, args...) \
-SEC("struct_ops/"#name) \
-BPF_PROG(name, args)
-
-#ifndef SOL_TCP
-#define SOL_TCP 6
-#endif
-
-#ifndef TCP_CA_NAME_MAX
-#define TCP_CA_NAME_MAX 16
-#endif
-
-#define tcp_jiffies32 ((__u32)bpf_jiffies64())
-
-struct sock_common {
- unsigned char skc_state;
- __u16 skc_num;
-} __attribute__((preserve_access_index));
-
-enum sk_pacing {
- SK_PACING_NONE = 0,
- SK_PACING_NEEDED = 1,
- SK_PACING_FQ = 2,
-};
-
-struct sock {
- struct sock_common __sk_common;
-#define sk_state __sk_common.skc_state
- unsigned long sk_pacing_rate;
- __u32 sk_pacing_status; /* see enum sk_pacing */
-} __attribute__((preserve_access_index));
-
-struct inet_sock {
- struct sock sk;
-} __attribute__((preserve_access_index));
-
-struct inet_connection_sock {
- struct inet_sock icsk_inet;
- __u8 icsk_ca_state:6,
- icsk_ca_setsockopt:1,
- icsk_ca_dst_locked:1;
- struct {
- __u8 pending;
- } icsk_ack;
- __u64 icsk_ca_priv[104 / sizeof(__u64)];
-} __attribute__((preserve_access_index));
-
-struct request_sock {
- struct sock_common __req_common;
-} __attribute__((preserve_access_index));
-
-struct tcp_sock {
- struct inet_connection_sock inet_conn;
-
- __u32 rcv_nxt;
- __u32 snd_nxt;
- __u32 snd_una;
- __u32 window_clamp;
- __u8 ecn_flags;
- __u32 delivered;
- __u32 delivered_ce;
- __u32 snd_cwnd;
- __u32 snd_cwnd_cnt;
- __u32 snd_cwnd_clamp;
- __u32 snd_ssthresh;
- __u8 syn_data:1, /* SYN includes data */
- syn_fastopen:1, /* SYN includes Fast Open option */
- syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
- syn_fastopen_ch:1, /* Active TFO re-enabling probe */
- syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
- save_syn:1, /* Save headers of SYN packet */
- is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
- syn_smc:1; /* SYN includes SMC */
- __u32 max_packets_out;
- __u32 lsndtime;
- __u32 prior_cwnd;
- __u64 tcp_mstamp; /* most recent packet received/sent */
- bool is_mptcp;
-} __attribute__((preserve_access_index));
-
-static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
-{
- return (struct inet_connection_sock *)sk;
-}
-
-static __always_inline void *inet_csk_ca(const struct sock *sk)
-{
- return (void *)inet_csk(sk)->icsk_ca_priv;
-}
-
-static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk)
-{
- return (struct tcp_sock *)sk;
-}
-
-static __always_inline bool before(__u32 seq1, __u32 seq2)
-{
- return (__s32)(seq1-seq2) < 0;
-}
-#define after(seq2, seq1) before(seq1, seq2)
-
-#define TCP_ECN_OK 1
-#define TCP_ECN_QUEUE_CWR 2
-#define TCP_ECN_DEMAND_CWR 4
-#define TCP_ECN_SEEN 8
-
-enum inet_csk_ack_state_t {
- ICSK_ACK_SCHED = 1,
- ICSK_ACK_TIMER = 2,
- ICSK_ACK_PUSHED = 4,
- ICSK_ACK_PUSHED2 = 8,
- ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */
-};
-
-enum tcp_ca_event {
- CA_EVENT_TX_START = 0,
- CA_EVENT_CWND_RESTART = 1,
- CA_EVENT_COMPLETE_CWR = 2,
- CA_EVENT_LOSS = 3,
- CA_EVENT_ECN_NO_CE = 4,
- CA_EVENT_ECN_IS_CE = 5,
-};
-
-struct ack_sample {
- __u32 pkts_acked;
- __s32 rtt_us;
- __u32 in_flight;
-} __attribute__((preserve_access_index));
-
-struct rate_sample {
- __u64 prior_mstamp; /* starting timestamp for interval */
- __u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
- __s32 delivered; /* number of packets delivered over interval */
- long interval_us; /* time for tp->delivered to incr "delivered" */
- __u32 snd_interval_us; /* snd interval for delivered packets */
- __u32 rcv_interval_us; /* rcv interval for delivered packets */
- long rtt_us; /* RTT of last (S)ACKed packet (or -1) */
- int losses; /* number of packets marked lost upon ACK */
- __u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */
- __u32 prior_in_flight; /* in flight before this ACK */
- bool is_app_limited; /* is sample from packet with bubble in pipe? */
- bool is_retrans; /* is sample from retransmission? */
- bool is_ack_delayed; /* is this (likely) a delayed ACK? */
-} __attribute__((preserve_access_index));
-
-#define TCP_CA_NAME_MAX 16
-#define TCP_CONG_NEEDS_ECN 0x2
-
-struct tcp_congestion_ops {
- char name[TCP_CA_NAME_MAX];
- __u32 flags;
-
- /* initialize private data (optional) */
- void (*init)(struct sock *sk);
- /* cleanup private data (optional) */
- void (*release)(struct sock *sk);
-
- /* return slow start threshold (required) */
- __u32 (*ssthresh)(struct sock *sk);
- /* do new cwnd calculation (required) */
- void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked);
- /* call before changing ca_state (optional) */
- void (*set_state)(struct sock *sk, __u8 new_state);
- /* call when cwnd event occurs (optional) */
- void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
- /* call when ack arrives (optional) */
- void (*in_ack_event)(struct sock *sk, __u32 flags);
- /* new value of cwnd after loss (required) */
- __u32 (*undo_cwnd)(struct sock *sk);
- /* hook for packet ack accounting (optional) */
- void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
- /* override sysctl_tcp_min_tso_segs */
- __u32 (*min_tso_segs)(struct sock *sk);
- /* returns the multiplier used in tcp_sndbuf_expand (optional) */
- __u32 (*sndbuf_expand)(struct sock *sk);
- /* call when packets are delivered to update cwnd and pacing rate,
- * after all the ca_state processing. (optional)
- */
- void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
- void *owner;
-};
-
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#define max(a, b) ((a) > (b) ? (a) : (b))
-#define min_not_zero(x, y) ({ \
- typeof(x) __x = (x); \
- typeof(y) __y = (y); \
- __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
-
-static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
-{
- return tp->snd_cwnd < tp->snd_ssthresh;
-}
-
-static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
-
- /* If in slow start, ensure cwnd grows to twice what was ACKed. */
- if (tcp_in_slow_start(tp))
- return tp->snd_cwnd < 2 * tp->max_packets_out;
-
- return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
-}
-
-static __always_inline bool tcp_cc_eq(const char *a, const char *b)
-{
- int i;
-
- for (i = 0; i < TCP_CA_NAME_MAX; i++) {
- if (a[i] != b[i])
- return false;
- if (!a[i])
- break;
- }
-
- return true;
-}
-
-extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
-extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
-
-struct mptcp_sock {
- struct inet_connection_sock sk;
-
- __u32 token;
- struct sock *first;
- char ca_name[TCP_CA_NAME_MAX];
-} __attribute__((preserve_access_index));
-
-#endif
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index edcd261065..2a18bd320e 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -10,18 +10,30 @@
#include <linux/percpu-defs.h>
#include <linux/sysfs.h>
#include <linux/tracepoint.h>
+#include <linux/net.h>
+#include <linux/socket.h>
+#include <linux/nsproxy.h>
+#include <linux/inet.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/un.h>
+#include <net/sock.h>
#include "bpf_testmod.h"
#include "bpf_testmod_kfunc.h"
#define CREATE_TRACE_POINTS
#include "bpf_testmod-events.h"
+#define CONNECT_TIMEOUT_SEC 1
+
typedef int (*func_proto_typedef)(long);
typedef int (*func_proto_typedef_nested1)(func_proto_typedef);
typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1);
DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123;
long bpf_testmod_test_struct_arg_result;
+static DEFINE_MUTEX(sock_lock);
+static struct socket *sock;
struct bpf_testmod_struct_arg_1 {
int a;
@@ -497,6 +509,241 @@ __bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused
return arg;
}
+__bpf_kfunc void bpf_kfunc_call_test_sleepable(void)
+{
+}
+
+__bpf_kfunc int bpf_kfunc_init_sock(struct init_sock_args *args)
+{
+ int proto;
+ int err;
+
+ mutex_lock(&sock_lock);
+
+ if (sock) {
+ pr_err("%s called without releasing old sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ switch (args->af) {
+ case AF_INET:
+ case AF_INET6:
+ proto = args->type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+ break;
+ case AF_UNIX:
+ proto = PF_UNIX;
+ break;
+ default:
+ pr_err("invalid address family %d\n", args->af);
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = sock_create_kern(current->nsproxy->net_ns, args->af, args->type,
+ proto, &sock);
+
+ if (!err)
+ /* Set timeout for call to kernel_connect() to prevent it from hanging,
+ * and consider the connection attempt failed if it returns
+ * -EINPROGRESS.
+ */
+ sock->sk->sk_sndtimeo = CONNECT_TIMEOUT_SEC * HZ;
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc void bpf_kfunc_close_sock(void)
+{
+ mutex_lock(&sock_lock);
+
+ if (sock) {
+ sock_release(sock);
+ sock = NULL;
+ }
+
+ mutex_unlock(&sock_lock);
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_connect(struct addr_args *args)
+{
+ int err;
+
+ if (args->addrlen > sizeof(args->addr))
+ return -EINVAL;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_connect(sock, (struct sockaddr *)&args->addr,
+ args->addrlen, 0);
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_bind(struct addr_args *args)
+{
+ int err;
+
+ if (args->addrlen > sizeof(args->addr))
+ return -EINVAL;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_bind(sock, (struct sockaddr *)&args->addr, args->addrlen);
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_listen(void)
+{
+ int err;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_listen(sock, 128);
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_sendmsg(struct sendmsg_args *args)
+{
+ struct msghdr msg = {
+ .msg_name = &args->addr.addr,
+ .msg_namelen = args->addr.addrlen,
+ };
+ struct kvec iov;
+ int err;
+
+ if (args->addr.addrlen > sizeof(args->addr.addr) ||
+ args->msglen > sizeof(args->msg))
+ return -EINVAL;
+
+ iov.iov_base = args->msg;
+ iov.iov_len = args->msglen;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_sendmsg(sock, &msg, &iov, 1, args->msglen);
+ args->addr.addrlen = msg.msg_namelen;
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_sock_sendmsg(struct sendmsg_args *args)
+{
+ struct msghdr msg = {
+ .msg_name = &args->addr.addr,
+ .msg_namelen = args->addr.addrlen,
+ };
+ struct kvec iov;
+ int err;
+
+ if (args->addr.addrlen > sizeof(args->addr.addr) ||
+ args->msglen > sizeof(args->msg))
+ return -EINVAL;
+
+ iov.iov_base = args->msg;
+ iov.iov_len = args->msglen;
+
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iov, 1, args->msglen);
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = sock_sendmsg(sock, &msg);
+ args->addr.addrlen = msg.msg_namelen;
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_getsockname(struct addr_args *args)
+{
+ int err;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_getsockname(sock, (struct sockaddr *)&args->addr);
+ if (err < 0)
+ goto out;
+
+ args->addrlen = err;
+ err = 0;
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_getpeername(struct addr_args *args)
+{
+ int err;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_getpeername(sock, (struct sockaddr *)&args->addr);
+ if (err < 0)
+ goto out;
+
+ args->addrlen = err;
+ err = 0;
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
@@ -523,6 +770,16 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_sleepable, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_init_sock, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_close_sock, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_connect, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_bind, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_listen, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_sendmsg, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_sock_sendmsg, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getsockname, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getpeername, KF_SLEEPABLE)
BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids)
static int bpf_testmod_ops_init(struct btf *btf)
@@ -653,6 +910,8 @@ static int bpf_testmod_init(void)
return ret;
if (bpf_fentry_test1(0) < 0)
return -EINVAL;
+ sock = NULL;
+ mutex_init(&sock_lock);
return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
}
@@ -666,6 +925,7 @@ static void bpf_testmod_exit(void)
while (refcount_read(&prog_test_struct.cnt) > 1)
msleep(20);
+ bpf_kfunc_close_sock();
sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
}
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
index 7c664dd610..b0d586a675 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
@@ -64,6 +64,22 @@ struct prog_test_fail3 {
char arr2[];
};
+struct init_sock_args {
+ int af;
+ int type;
+};
+
+struct addr_args {
+ char addr[sizeof(struct __kernel_sockaddr_storage)];
+ int addrlen;
+};
+
+struct sendmsg_args {
+ struct addr_args addr;
+ char msg[10];
+ int msglen;
+};
+
struct prog_test_ref_kfunc *
bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym;
void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
@@ -96,6 +112,7 @@ void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
void bpf_kfunc_call_test_destructive(void) __ksym;
+void bpf_kfunc_call_test_sleepable(void) __ksym;
void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p);
struct prog_test_member *bpf_kfunc_call_memb_acquire(void);
@@ -106,4 +123,15 @@ void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p);
void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len);
void bpf_kfunc_common_test(void) __ksym;
+
+int bpf_kfunc_init_sock(struct init_sock_args *args) __ksym;
+void bpf_kfunc_close_sock(void) __ksym;
+int bpf_kfunc_call_kernel_connect(struct addr_args *args) __ksym;
+int bpf_kfunc_call_kernel_bind(struct addr_args *args) __ksym;
+int bpf_kfunc_call_kernel_listen(void) __ksym;
+int bpf_kfunc_call_kernel_sendmsg(struct sendmsg_args *args) __ksym;
+int bpf_kfunc_call_sock_sendmsg(struct sendmsg_args *args) __ksym;
+int bpf_kfunc_call_kernel_getsockname(struct addr_args *args) __ksym;
+int bpf_kfunc_call_kernel_getpeername(struct addr_args *args) __ksym;
+
#endif /* _BPF_TESTMOD_KFUNC_H */
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index e812876d79..23bb9a9e6a 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -429,7 +429,7 @@ int create_and_get_cgroup(const char *relative_path)
* which is an invalid cgroup id.
* If there is a failure, it prints the error to stderr.
*/
-unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir)
+static unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir)
{
int dirfd, err, flags, mount_id, fhsize;
union {
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index dec9fd7ebb..98b6b6a886 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -13,7 +13,12 @@ CONFIG_BPF_SYSCALL=y
CONFIG_CGROUP_BPF=y
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_USER_API=y
CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_CRYPTO_USER_API_SKCIPHER=y
+CONFIG_CRYPTO_SKCIPHER=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_AES=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_DWARF4=y
@@ -91,3 +96,5 @@ CONFIG_VSOCKETS=y
CONFIG_VXLAN=y
CONFIG_XDP_SOCKETS=y
CONFIG_XFRM_INTERFACE=y
+CONFIG_TCP_CONG_DCTCP=y
+CONFIG_TCP_CONG_BBR=y
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index be96bf0223..35250e6cde 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -52,6 +52,8 @@ struct ipv6_packet pkt_v6 = {
.tcp.doff = 5,
};
+static const struct network_helper_opts default_opts;
+
int settimeo(int fd, int timeout_ms)
{
struct timeval timeout = { .tv_sec = 3 };
@@ -78,24 +80,22 @@ int settimeo(int fd, int timeout_ms)
#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
-static int __start_server(int type, int protocol, const struct sockaddr *addr,
- socklen_t addrlen, int timeout_ms, bool reuseport)
+static int __start_server(int type, const struct sockaddr *addr, socklen_t addrlen,
+ const struct network_helper_opts *opts)
{
- int on = 1;
int fd;
- fd = socket(addr->sa_family, type, protocol);
+ fd = socket(addr->sa_family, type, opts->proto);
if (fd < 0) {
log_err("Failed to create server socket");
return -1;
}
- if (settimeo(fd, timeout_ms))
+ if (settimeo(fd, opts->timeout_ms))
goto error_close;
- if (reuseport &&
- setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) {
- log_err("Failed to set SO_REUSEPORT");
+ if (opts->post_socket_cb && opts->post_socket_cb(fd, NULL)) {
+ log_err("Failed to call post_socket_cb");
goto error_close;
}
@@ -118,35 +118,35 @@ error_close:
return -1;
}
-static int start_server_proto(int family, int type, int protocol,
- const char *addr_str, __u16 port, int timeout_ms)
+int start_server(int family, int type, const char *addr_str, __u16 port,
+ int timeout_ms)
{
+ struct network_helper_opts opts = {
+ .timeout_ms = timeout_ms,
+ };
struct sockaddr_storage addr;
socklen_t addrlen;
if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
return -1;
- return __start_server(type, protocol, (struct sockaddr *)&addr,
- addrlen, timeout_ms, false);
+ return __start_server(type, (struct sockaddr *)&addr, addrlen, &opts);
}
-int start_server(int family, int type, const char *addr_str, __u16 port,
- int timeout_ms)
+static int reuseport_cb(int fd, const struct post_socket_opts *opts)
{
- return start_server_proto(family, type, 0, addr_str, port, timeout_ms);
-}
+ int on = 1;
-int start_mptcp_server(int family, const char *addr_str, __u16 port,
- int timeout_ms)
-{
- return start_server_proto(family, SOCK_STREAM, IPPROTO_MPTCP, addr_str,
- port, timeout_ms);
+ return setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on));
}
int *start_reuseport_server(int family, int type, const char *addr_str,
__u16 port, int timeout_ms, unsigned int nr_listens)
{
+ struct network_helper_opts opts = {
+ .timeout_ms = timeout_ms,
+ .post_socket_cb = reuseport_cb,
+ };
struct sockaddr_storage addr;
unsigned int nr_fds = 0;
socklen_t addrlen;
@@ -162,8 +162,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str,
if (!fds)
return NULL;
- fds[0] = __start_server(type, 0, (struct sockaddr *)&addr, addrlen,
- timeout_ms, true);
+ fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen, &opts);
if (fds[0] == -1)
goto close_fds;
nr_fds = 1;
@@ -172,8 +171,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str,
goto close_fds;
for (; nr_fds < nr_listens; nr_fds++) {
- fds[nr_fds] = __start_server(type, 0, (struct sockaddr *)&addr,
- addrlen, timeout_ms, true);
+ fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr, addrlen, &opts);
if (fds[nr_fds] == -1)
goto close_fds;
}
@@ -185,6 +183,15 @@ close_fds:
return NULL;
}
+int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t len,
+ const struct network_helper_opts *opts)
+{
+ if (!opts)
+ opts = &default_opts;
+
+ return __start_server(type, (struct sockaddr *)addr, len, opts);
+}
+
void free_fds(int *fds, unsigned int nr_close_fds)
{
if (fds) {
@@ -258,17 +265,24 @@ static int connect_fd_to_addr(int fd,
return 0;
}
-int connect_to_addr(const struct sockaddr_storage *addr, socklen_t addrlen, int type)
+int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
+ const struct network_helper_opts *opts)
{
int fd;
- fd = socket(addr->ss_family, type, 0);
+ if (!opts)
+ opts = &default_opts;
+
+ fd = socket(addr->ss_family, type, opts->proto);
if (fd < 0) {
log_err("Failed to create client socket");
return -1;
}
- if (connect_fd_to_addr(fd, addr, addrlen, false))
+ if (settimeo(fd, opts->timeout_ms))
+ goto error_close;
+
+ if (connect_fd_to_addr(fd, addr, addrlen, opts->must_fail))
goto error_close;
return fd;
@@ -278,8 +292,6 @@ error_close:
return -1;
}
-static const struct network_helper_opts default_opts;
-
int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
{
struct sockaddr_storage addr;
@@ -442,22 +454,30 @@ struct nstoken *open_netns(const char *name)
struct nstoken *token;
token = calloc(1, sizeof(struct nstoken));
- if (!ASSERT_OK_PTR(token, "malloc token"))
+ if (!token) {
+ log_err("Failed to malloc token");
return NULL;
+ }
token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
- if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
+ if (token->orig_netns_fd == -1) {
+ log_err("Failed to open(/proc/self/ns/net)");
goto fail;
+ }
snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
- if (!ASSERT_GE(nsfd, 0, "open netns fd"))
+ if (nsfd == -1) {
+ log_err("Failed to open(%s)", nspath);
goto fail;
+ }
err = setns(nsfd, CLONE_NEWNET);
close(nsfd);
- if (!ASSERT_OK(err, "setns"))
+ if (err) {
+ log_err("Failed to setns(nsfd)");
goto fail;
+ }
return token;
fail:
@@ -472,7 +492,8 @@ void close_netns(struct nstoken *token)
if (!token)
return;
- ASSERT_OK(setns(token->orig_netns_fd, CLONE_NEWNET), "setns");
+ if (setns(token->orig_netns_fd, CLONE_NEWNET))
+ log_err("Failed to setns(orig_netns_fd)");
close(token->orig_netns_fd);
free(token);
}
@@ -499,3 +520,153 @@ int get_socket_local_port(int sock_fd)
return -1;
}
+
+int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
+{
+ struct ifreq ifr = {0};
+ int sockfd, err;
+
+ sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sockfd < 0)
+ return -errno;
+
+ memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+ ring_param->cmd = ETHTOOL_GRINGPARAM;
+ ifr.ifr_data = (char *)ring_param;
+
+ if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
+ err = errno;
+ close(sockfd);
+ return -err;
+ }
+
+ close(sockfd);
+ return 0;
+}
+
+int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
+{
+ struct ifreq ifr = {0};
+ int sockfd, err;
+
+ sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sockfd < 0)
+ return -errno;
+
+ memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+ ring_param->cmd = ETHTOOL_SRINGPARAM;
+ ifr.ifr_data = (char *)ring_param;
+
+ if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
+ err = errno;
+ close(sockfd);
+ return -err;
+ }
+
+ close(sockfd);
+ return 0;
+}
+
+struct send_recv_arg {
+ int fd;
+ uint32_t bytes;
+ int stop;
+};
+
+static void *send_recv_server(void *arg)
+{
+ struct send_recv_arg *a = (struct send_recv_arg *)arg;
+ ssize_t nr_sent = 0, bytes = 0;
+ char batch[1500];
+ int err = 0, fd;
+
+ fd = accept(a->fd, NULL, NULL);
+ while (fd == -1) {
+ if (errno == EINTR)
+ continue;
+ err = -errno;
+ goto done;
+ }
+
+ if (settimeo(fd, 0)) {
+ err = -errno;
+ goto done;
+ }
+
+ while (bytes < a->bytes && !READ_ONCE(a->stop)) {
+ nr_sent = send(fd, &batch,
+ MIN(a->bytes - bytes, sizeof(batch)), 0);
+ if (nr_sent == -1 && errno == EINTR)
+ continue;
+ if (nr_sent == -1) {
+ err = -errno;
+ break;
+ }
+ bytes += nr_sent;
+ }
+
+ if (bytes != a->bytes) {
+ log_err("send %zd expected %u", bytes, a->bytes);
+ if (!err)
+ err = bytes > a->bytes ? -E2BIG : -EINTR;
+ }
+
+done:
+ if (fd >= 0)
+ close(fd);
+ if (err) {
+ WRITE_ONCE(a->stop, 1);
+ return ERR_PTR(err);
+ }
+ return NULL;
+}
+
+int send_recv_data(int lfd, int fd, uint32_t total_bytes)
+{
+ ssize_t nr_recv = 0, bytes = 0;
+ struct send_recv_arg arg = {
+ .fd = lfd,
+ .bytes = total_bytes,
+ .stop = 0,
+ };
+ pthread_t srv_thread;
+ void *thread_ret;
+ char batch[1500];
+ int err = 0;
+
+ err = pthread_create(&srv_thread, NULL, send_recv_server, (void *)&arg);
+ if (err) {
+ log_err("Failed to pthread_create");
+ return err;
+ }
+
+ /* recv total_bytes */
+ while (bytes < total_bytes && !READ_ONCE(arg.stop)) {
+ nr_recv = recv(fd, &batch,
+ MIN(total_bytes - bytes, sizeof(batch)), 0);
+ if (nr_recv == -1 && errno == EINTR)
+ continue;
+ if (nr_recv == -1) {
+ err = -errno;
+ break;
+ }
+ bytes += nr_recv;
+ }
+
+ if (bytes != total_bytes) {
+ log_err("recv %zd expected %u", bytes, total_bytes);
+ if (!err)
+ err = bytes > total_bytes ? -E2BIG : -EINTR;
+ }
+
+ WRITE_ONCE(arg.stop, 1);
+ pthread_join(srv_thread, &thread_ret);
+ if (IS_ERR(thread_ret)) {
+ log_err("Failed in thread_ret %ld", PTR_ERR(thread_ret));
+ err = err ? : PTR_ERR(thread_ret);
+ }
+
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 94b9be24e3..883c7ea9d8 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -9,14 +9,20 @@ typedef __u16 __sum16;
#include <linux/if_packet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+#include <linux/err.h>
#include <netinet/tcp.h>
#include <bpf/bpf_endian.h>
+#include <net/if.h>
#define MAGIC_VAL 0x1234
#define NUM_ITER 100000
#define VIP_NUM 5
#define MAGIC_BYTES 123
+struct post_socket_opts {};
+
struct network_helper_opts {
const char *cc;
int timeout_ms;
@@ -24,6 +30,7 @@ struct network_helper_opts {
bool noconnect;
int type;
int proto;
+ int (*post_socket_cb)(int fd, const struct post_socket_opts *opts);
};
/* ipv4 test vector */
@@ -45,13 +52,14 @@ extern struct ipv6_packet pkt_v6;
int settimeo(int fd, int timeout_ms);
int start_server(int family, int type, const char *addr, __u16 port,
int timeout_ms);
-int start_mptcp_server(int family, const char *addr, __u16 port,
- int timeout_ms);
int *start_reuseport_server(int family, int type, const char *addr_str,
__u16 port, int timeout_ms,
unsigned int nr_listens);
+int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t len,
+ const struct network_helper_opts *opts);
void free_fds(int *fds, unsigned int nr_close_fds);
-int connect_to_addr(const struct sockaddr_storage *addr, socklen_t len, int type);
+int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t len,
+ const struct network_helper_opts *opts);
int connect_to_fd(int server_fd, int timeout_ms);
int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts);
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
@@ -61,6 +69,8 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
struct sockaddr_storage *addr, socklen_t *len);
char *ping_command(int family);
int get_socket_local_port(int sock_fd);
+int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);
+int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);
struct nstoken;
/**
@@ -71,6 +81,7 @@ struct nstoken;
*/
struct nstoken *open_netns(const char *name);
void close_netns(struct nstoken *token);
+int send_recv_data(int lfd, int fd, uint32_t total_bytes);
static __u16 csum_fold(__u32 csum)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
new file mode 100644
index 0000000000..0807a48a58
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "arena_atomics.skel.h"
+
+static void test_add(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.add);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->add64_value, 3, "add64_value");
+ ASSERT_EQ(skel->arena->add64_result, 1, "add64_result");
+
+ ASSERT_EQ(skel->arena->add32_value, 3, "add32_value");
+ ASSERT_EQ(skel->arena->add32_result, 1, "add32_result");
+
+ ASSERT_EQ(skel->arena->add_stack_value_copy, 3, "add_stack_value");
+ ASSERT_EQ(skel->arena->add_stack_result, 1, "add_stack_result");
+
+ ASSERT_EQ(skel->arena->add_noreturn_value, 3, "add_noreturn_value");
+}
+
+static void test_sub(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.sub);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->sub64_value, -1, "sub64_value");
+ ASSERT_EQ(skel->arena->sub64_result, 1, "sub64_result");
+
+ ASSERT_EQ(skel->arena->sub32_value, -1, "sub32_value");
+ ASSERT_EQ(skel->arena->sub32_result, 1, "sub32_result");
+
+ ASSERT_EQ(skel->arena->sub_stack_value_copy, -1, "sub_stack_value");
+ ASSERT_EQ(skel->arena->sub_stack_result, 1, "sub_stack_result");
+
+ ASSERT_EQ(skel->arena->sub_noreturn_value, -1, "sub_noreturn_value");
+}
+
+static void test_and(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.and);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->and64_value, 0x010ull << 32, "and64_value");
+ ASSERT_EQ(skel->arena->and32_value, 0x010, "and32_value");
+}
+
+static void test_or(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.or);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->or64_value, 0x111ull << 32, "or64_value");
+ ASSERT_EQ(skel->arena->or32_value, 0x111, "or32_value");
+}
+
+static void test_xor(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.xor);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->xor64_value, 0x101ull << 32, "xor64_value");
+ ASSERT_EQ(skel->arena->xor32_value, 0x101, "xor32_value");
+}
+
+static void test_cmpxchg(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.cmpxchg);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->cmpxchg64_value, 2, "cmpxchg64_value");
+ ASSERT_EQ(skel->arena->cmpxchg64_result_fail, 1, "cmpxchg_result_fail");
+ ASSERT_EQ(skel->arena->cmpxchg64_result_succeed, 1, "cmpxchg_result_succeed");
+
+ ASSERT_EQ(skel->arena->cmpxchg32_value, 2, "lcmpxchg32_value");
+ ASSERT_EQ(skel->arena->cmpxchg32_result_fail, 1, "cmpxchg_result_fail");
+ ASSERT_EQ(skel->arena->cmpxchg32_result_succeed, 1, "cmpxchg_result_succeed");
+}
+
+static void test_xchg(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.xchg);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->xchg64_value, 2, "xchg64_value");
+ ASSERT_EQ(skel->arena->xchg64_result, 1, "xchg64_result");
+
+ ASSERT_EQ(skel->arena->xchg32_value, 2, "xchg32_value");
+ ASSERT_EQ(skel->arena->xchg32_result, 1, "xchg32_result");
+}
+
+void test_arena_atomics(void)
+{
+ struct arena_atomics *skel;
+ int err;
+
+ skel = arena_atomics__open();
+ if (!ASSERT_OK_PTR(skel, "arena atomics skeleton open"))
+ return;
+
+ if (skel->data->skip_tests) {
+ printf("%s:SKIP:no ENABLE_ATOMICS_TESTS or no addr_space_cast support in clang",
+ __func__);
+ test__skip();
+ goto cleanup;
+ }
+ err = arena_atomics__load(skel);
+ if (!ASSERT_OK(err, "arena atomics skeleton load"))
+ return;
+ skel->bss->pid = getpid();
+
+ if (test__start_subtest("add"))
+ test_add(skel);
+ if (test__start_subtest("sub"))
+ test_sub(skel);
+ if (test__start_subtest("and"))
+ test_and(skel);
+ if (test__start_subtest("or"))
+ test_or(skel);
+ if (test__start_subtest("xor"))
+ test_xor(skel);
+ if (test__start_subtest("cmpxchg"))
+ test_cmpxchg(skel);
+ if (test__start_subtest("xchg"))
+ test_xchg(skel);
+
+cleanup:
+ arena_atomics__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 1454cebc26..4407ea428e 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -573,6 +573,115 @@ cleanup:
close(lsm_fd);
}
+static void tp_btf_subtest(struct test_bpf_cookie *skel)
+{
+ __u64 cookie;
+ int prog_fd, link_fd = -1;
+ struct bpf_link *link = NULL;
+ LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+ LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts);
+ LIBBPF_OPTS(bpf_trace_opts, trace_opts);
+
+ /* There are three different ways to attach tp_btf (BTF-aware raw
+ * tracepoint) programs. Let's test all of them.
+ */
+ prog_fd = bpf_program__fd(skel->progs.handle_tp_btf);
+
+ /* low-level BPF_RAW_TRACEPOINT_OPEN command wrapper */
+ skel->bss->tp_btf_res = 0;
+
+ raw_tp_opts.cookie = cookie = 0x11000000000000L;
+ link_fd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_tp_opts);
+ if (!ASSERT_GE(link_fd, 0, "bpf_raw_tracepoint_open_opts"))
+ goto cleanup;
+
+ usleep(1); /* trigger */
+ close(link_fd); /* detach */
+ link_fd = -1;
+
+ ASSERT_EQ(skel->bss->tp_btf_res, cookie, "raw_tp_open_res");
+
+ /* low-level generic bpf_link_create() API */
+ skel->bss->tp_btf_res = 0;
+
+ link_opts.tracing.cookie = cookie = 0x22000000000000L;
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_RAW_TP, &link_opts);
+ if (!ASSERT_GE(link_fd, 0, "bpf_link_create"))
+ goto cleanup;
+
+ usleep(1); /* trigger */
+ close(link_fd); /* detach */
+ link_fd = -1;
+
+ ASSERT_EQ(skel->bss->tp_btf_res, cookie, "link_create_res");
+
+ /* high-level bpf_link-based bpf_program__attach_trace_opts() API */
+ skel->bss->tp_btf_res = 0;
+
+ trace_opts.cookie = cookie = 0x33000000000000L;
+ link = bpf_program__attach_trace_opts(skel->progs.handle_tp_btf, &trace_opts);
+ if (!ASSERT_OK_PTR(link, "attach_trace_opts"))
+ goto cleanup;
+
+ usleep(1); /* trigger */
+ bpf_link__destroy(link); /* detach */
+ link = NULL;
+
+ ASSERT_EQ(skel->bss->tp_btf_res, cookie, "attach_trace_opts_res");
+
+cleanup:
+ if (link_fd >= 0)
+ close(link_fd);
+ bpf_link__destroy(link);
+}
+
+static void raw_tp_subtest(struct test_bpf_cookie *skel)
+{
+ __u64 cookie;
+ int prog_fd, link_fd = -1;
+ struct bpf_link *link = NULL;
+ LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts);
+ LIBBPF_OPTS(bpf_raw_tracepoint_opts, opts);
+
+ /* There are two different ways to attach raw_tp programs */
+ prog_fd = bpf_program__fd(skel->progs.handle_raw_tp);
+
+ /* low-level BPF_RAW_TRACEPOINT_OPEN command wrapper */
+ skel->bss->raw_tp_res = 0;
+
+ raw_tp_opts.tp_name = "sys_enter";
+ raw_tp_opts.cookie = cookie = 0x55000000000000L;
+ link_fd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_tp_opts);
+ if (!ASSERT_GE(link_fd, 0, "bpf_raw_tracepoint_open_opts"))
+ goto cleanup;
+
+ usleep(1); /* trigger */
+ close(link_fd); /* detach */
+ link_fd = -1;
+
+ ASSERT_EQ(skel->bss->raw_tp_res, cookie, "raw_tp_open_res");
+
+ /* high-level bpf_link-based bpf_program__attach_raw_tracepoint_opts() API */
+ skel->bss->raw_tp_res = 0;
+
+ opts.cookie = cookie = 0x66000000000000L;
+ link = bpf_program__attach_raw_tracepoint_opts(skel->progs.handle_raw_tp,
+ "sys_enter", &opts);
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp_opts"))
+ goto cleanup;
+
+ usleep(1); /* trigger */
+ bpf_link__destroy(link); /* detach */
+ link = NULL;
+
+ ASSERT_EQ(skel->bss->raw_tp_res, cookie, "attach_raw_tp_opts_res");
+
+cleanup:
+ if (link_fd >= 0)
+ close(link_fd);
+ bpf_link__destroy(link);
+}
+
void test_bpf_cookie(void)
{
struct test_bpf_cookie *skel;
@@ -601,6 +710,9 @@ void test_bpf_cookie(void)
tracing_subtest(skel);
if (test__start_subtest("lsm"))
lsm_subtest(skel);
-
+ if (test__start_subtest("tp_btf"))
+ tp_btf_subtest(skel);
+ if (test__start_subtest("raw_tp"))
+ raw_tp_subtest(skel);
test_bpf_cookie__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index a88e6e07e4..3f0daf6607 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -13,6 +13,8 @@
#include "tcp_ca_write_sk_pacing.skel.h"
#include "tcp_ca_incompl_cong_ops.skel.h"
#include "tcp_ca_unsupp_cong_op.skel.h"
+#include "tcp_ca_kfunc.skel.h"
+#include "bpf_cc_cubic.skel.h"
#ifndef ENOTSUPP
#define ENOTSUPP 524
@@ -20,7 +22,6 @@
static const unsigned int total_bytes = 10 * 1024 * 1024;
static int expected_stg = 0xeB9F;
-static int stop;
static int settcpca(int fd, const char *tcp_ca)
{
@@ -33,63 +34,12 @@ static int settcpca(int fd, const char *tcp_ca)
return 0;
}
-static void *server(void *arg)
-{
- int lfd = (int)(long)arg, err = 0, fd;
- ssize_t nr_sent = 0, bytes = 0;
- char batch[1500];
-
- fd = accept(lfd, NULL, NULL);
- while (fd == -1) {
- if (errno == EINTR)
- continue;
- err = -errno;
- goto done;
- }
-
- if (settimeo(fd, 0)) {
- err = -errno;
- goto done;
- }
-
- while (bytes < total_bytes && !READ_ONCE(stop)) {
- nr_sent = send(fd, &batch,
- MIN(total_bytes - bytes, sizeof(batch)), 0);
- if (nr_sent == -1 && errno == EINTR)
- continue;
- if (nr_sent == -1) {
- err = -errno;
- break;
- }
- bytes += nr_sent;
- }
-
- ASSERT_EQ(bytes, total_bytes, "send");
-
-done:
- if (fd >= 0)
- close(fd);
- if (err) {
- WRITE_ONCE(stop, 1);
- return ERR_PTR(err);
- }
- return NULL;
-}
-
static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
{
- struct sockaddr_in6 sa6 = {};
- ssize_t nr_recv = 0, bytes = 0;
int lfd = -1, fd = -1;
- pthread_t srv_thread;
- socklen_t addrlen = sizeof(sa6);
- void *thread_ret;
- char batch[1500];
int err;
- WRITE_ONCE(stop, 0);
-
- lfd = socket(AF_INET6, SOCK_STREAM, 0);
+ lfd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
if (!ASSERT_NEQ(lfd, -1, "socket"))
return;
@@ -99,23 +49,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
return;
}
- if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) ||
- settimeo(lfd, 0) || settimeo(fd, 0))
- goto done;
-
- /* bind, listen and start server thread to accept */
- sa6.sin6_family = AF_INET6;
- sa6.sin6_addr = in6addr_loopback;
- err = bind(lfd, (struct sockaddr *)&sa6, addrlen);
- if (!ASSERT_NEQ(err, -1, "bind"))
- goto done;
-
- err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen);
- if (!ASSERT_NEQ(err, -1, "getsockname"))
- goto done;
-
- err = listen(lfd, 1);
- if (!ASSERT_NEQ(err, -1, "listen"))
+ if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca))
goto done;
if (sk_stg_map) {
@@ -126,7 +60,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
}
/* connect to server */
- err = connect(fd, (struct sockaddr *)&sa6, addrlen);
+ err = connect_fd_to_fd(fd, lfd, 0);
if (!ASSERT_NEQ(err, -1, "connect"))
goto done;
@@ -140,26 +74,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
goto done;
}
- err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
- if (!ASSERT_OK(err, "pthread_create"))
- goto done;
-
- /* recv total_bytes */
- while (bytes < total_bytes && !READ_ONCE(stop)) {
- nr_recv = recv(fd, &batch,
- MIN(total_bytes - bytes, sizeof(batch)), 0);
- if (nr_recv == -1 && errno == EINTR)
- continue;
- if (nr_recv == -1)
- break;
- bytes += nr_recv;
- }
-
- ASSERT_EQ(bytes, total_bytes, "recv");
-
- WRITE_ONCE(stop, 1);
- pthread_join(srv_thread, &thread_ret);
- ASSERT_OK(IS_ERR(thread_ret), "thread_ret");
+ ASSERT_OK(send_recv_data(lfd, fd, total_bytes), "send_recv_data");
done:
close(lfd);
@@ -315,7 +230,7 @@ static void test_rel_setsockopt(void)
struct bpf_dctcp_release *rel_skel;
libbpf_print_fn_t old_print_fn;
- err_str = "unknown func bpf_setsockopt";
+ err_str = "program of this type cannot use helper bpf_setsockopt";
found = false;
old_print_fn = libbpf_set_print(libbpf_debug_print);
@@ -392,7 +307,8 @@ static void test_update_ca(void)
return;
link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
- ASSERT_OK_PTR(link, "attach_struct_ops");
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops"))
+ goto out;
do_test("tcp_ca_update", NULL);
saved_ca1_cnt = skel->bss->ca1_cnt;
@@ -406,6 +322,7 @@ static void test_update_ca(void)
ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt");
bpf_link__destroy(link);
+out:
tcp_ca_update__destroy(skel);
}
@@ -421,7 +338,8 @@ static void test_update_wrong(void)
return;
link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
- ASSERT_OK_PTR(link, "attach_struct_ops");
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops"))
+ goto out;
do_test("tcp_ca_update", NULL);
saved_ca1_cnt = skel->bss->ca1_cnt;
@@ -434,6 +352,7 @@ static void test_update_wrong(void)
ASSERT_GT(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
bpf_link__destroy(link);
+out:
tcp_ca_update__destroy(skel);
}
@@ -448,7 +367,8 @@ static void test_mixed_links(void)
return;
link_nl = bpf_map__attach_struct_ops(skel->maps.ca_no_link);
- ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl");
+ if (!ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl"))
+ goto out;
link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
ASSERT_OK_PTR(link, "attach_struct_ops");
@@ -461,6 +381,7 @@ static void test_mixed_links(void)
bpf_link__destroy(link);
bpf_link__destroy(link_nl);
+out:
tcp_ca_update__destroy(skel);
}
@@ -503,7 +424,8 @@ static void test_link_replace(void)
bpf_link__destroy(link);
link = bpf_map__attach_struct_ops(skel->maps.ca_update_2);
- ASSERT_OK_PTR(link, "attach_struct_ops_2nd");
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops_2nd"))
+ goto out;
/* BPF_F_REPLACE with a wrong old map Fd. It should fail!
*
@@ -526,9 +448,40 @@ static void test_link_replace(void)
bpf_link__destroy(link);
+out:
tcp_ca_update__destroy(skel);
}
+static void test_tcp_ca_kfunc(void)
+{
+ struct tcp_ca_kfunc *skel;
+
+ skel = tcp_ca_kfunc__open_and_load();
+ ASSERT_OK_PTR(skel, "tcp_ca_kfunc__open_and_load");
+ tcp_ca_kfunc__destroy(skel);
+}
+
+static void test_cc_cubic(void)
+{
+ struct bpf_cc_cubic *cc_cubic_skel;
+ struct bpf_link *link;
+
+ cc_cubic_skel = bpf_cc_cubic__open_and_load();
+ if (!ASSERT_OK_PTR(cc_cubic_skel, "bpf_cc_cubic__open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(cc_cubic_skel->maps.cc_cubic);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
+ bpf_cc_cubic__destroy(cc_cubic_skel);
+ return;
+ }
+
+ do_test("bpf_cc_cubic", NULL);
+
+ bpf_link__destroy(link);
+ bpf_cc_cubic__destroy(cc_cubic_skel);
+}
+
void test_bpf_tcp_ca(void)
{
if (test__start_subtest("dctcp"))
@@ -557,4 +510,8 @@ void test_bpf_tcp_ca(void)
test_multi_links();
if (test__start_subtest("link_replace"))
test_link_replace();
+ if (test__start_subtest("tcp_ca_kfunc"))
+ test_tcp_ca_kfunc();
+ if (test__start_subtest("cc_cubic"))
+ test_cc_cubic();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index e9ea38aa82..09a8e6f9b3 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -653,7 +653,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
cmpstr =
"(struct file_operations){\n"
" .owner = (struct module *)0xffffffffffffffff,\n"
-" .llseek = (loff_t (*)(struct file *, loff_t, int))0xffffffffffffffff,";
+" .fop_flags = (fop_flags_t)4294967295,";
ASSERT_STRNEQ(str, cmpstr, strlen(cmpstr), "file_operations");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index 2a55f717fc..34b59f6bac 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -10,6 +10,7 @@
#include <netinet/tcp.h>
#include <test_progs.h>
+#include "network_helpers.h"
#include "progs/test_cls_redirect.h"
#include "test_cls_redirect.skel.h"
@@ -35,39 +36,6 @@ struct tuple {
struct addr_port dst;
};
-static int start_server(const struct sockaddr *addr, socklen_t len, int type)
-{
- int fd = socket(addr->sa_family, type, 0);
- if (CHECK_FAIL(fd == -1))
- return -1;
- if (CHECK_FAIL(bind(fd, addr, len) == -1))
- goto err;
- if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1))
- goto err;
-
- return fd;
-
-err:
- close(fd);
- return -1;
-}
-
-static int connect_to_server(const struct sockaddr *addr, socklen_t len,
- int type)
-{
- int fd = socket(addr->sa_family, type, 0);
- if (CHECK_FAIL(fd == -1))
- return -1;
- if (CHECK_FAIL(connect(fd, addr, len)))
- goto err;
-
- return fd;
-
-err:
- close(fd);
- return -1;
-}
-
static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap)
{
const struct sockaddr_in6 *in6;
@@ -98,14 +66,14 @@ static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type,
socklen_t slen = sizeof(ss);
struct sockaddr *sa = (struct sockaddr *)&ss;
- *server = start_server(addr, len, type);
+ *server = start_server_addr(type, (struct sockaddr_storage *)addr, len, NULL);
if (*server < 0)
return false;
if (CHECK_FAIL(getsockname(*server, sa, &slen)))
goto close_server;
- *conn = connect_to_server(sa, slen, type);
+ *conn = connect_to_addr(type, (struct sockaddr_storage *)sa, slen, NULL);
if (*conn < 0)
goto close_server;
diff --git a/tools/testing/selftests/bpf/prog_tests/crypto_sanity.c b/tools/testing/selftests/bpf/prog_tests/crypto_sanity.c
new file mode 100644
index 0000000000..b1a3a49a82
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/crypto_sanity.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include <linux/in6.h>
+#include <linux/if_alg.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "crypto_sanity.skel.h"
+#include "crypto_basic.skel.h"
+
+#define NS_TEST "crypto_sanity_ns"
+#define IPV6_IFACE_ADDR "face::1"
+static const unsigned char crypto_key[] = "testtest12345678";
+static const char plain_text[] = "stringtoencrypt0";
+static int opfd = -1, tfmfd = -1;
+static const char algo[] = "ecb(aes)";
+static int init_afalg(void)
+{
+ struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ .salg_type = "skcipher",
+ .salg_name = "ecb(aes)"
+ };
+
+ tfmfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ if (tfmfd == -1)
+ return errno;
+ if (bind(tfmfd, (struct sockaddr *)&sa, sizeof(sa)) == -1)
+ return errno;
+ if (setsockopt(tfmfd, SOL_ALG, ALG_SET_KEY, crypto_key, 16) == -1)
+ return errno;
+ opfd = accept(tfmfd, NULL, 0);
+ if (opfd == -1)
+ return errno;
+ return 0;
+}
+
+static void deinit_afalg(void)
+{
+ if (tfmfd != -1)
+ close(tfmfd);
+ if (opfd != -1)
+ close(opfd);
+}
+
+static void do_crypt_afalg(const void *src, void *dst, int size, bool encrypt)
+{
+ struct msghdr msg = {};
+ struct cmsghdr *cmsg;
+ char cbuf[CMSG_SPACE(4)] = {0};
+ struct iovec iov;
+
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_ALG;
+ cmsg->cmsg_type = ALG_SET_OP;
+ cmsg->cmsg_len = CMSG_LEN(4);
+ *(__u32 *)CMSG_DATA(cmsg) = encrypt ? ALG_OP_ENCRYPT : ALG_OP_DECRYPT;
+
+ iov.iov_base = (char *)src;
+ iov.iov_len = size;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ sendmsg(opfd, &msg, 0);
+ read(opfd, dst, size);
+}
+
+void test_crypto_basic(void)
+{
+ RUN_TESTS(crypto_basic);
+}
+
+void test_crypto_sanity(void)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_hook, .attach_point = BPF_TC_EGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach_enc);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach_dec);
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct nstoken *nstoken = NULL;
+ struct crypto_sanity *skel;
+ char afalg_plain[16] = {0};
+ char afalg_dst[16] = {0};
+ struct sockaddr_in6 addr;
+ int sockfd, err, pfd;
+ socklen_t addrlen;
+ u16 udp_test_port;
+
+ skel = crypto_sanity__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open"))
+ return;
+
+ SYS(fail, "ip netns add %s", NS_TEST);
+ SYS(fail, "ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR);
+ SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
+
+ nstoken = open_netns(NS_TEST);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto fail;
+
+ err = init_afalg();
+ if (!ASSERT_OK(err, "AF_ALG init fail"))
+ goto fail;
+
+ qdisc_hook.ifindex = if_nametoindex("lo");
+ if (!ASSERT_GT(qdisc_hook.ifindex, 0, "if_nametoindex lo"))
+ goto fail;
+
+ skel->bss->key_len = 16;
+ skel->bss->authsize = 0;
+ udp_test_port = skel->data->udp_test_port;
+ memcpy(skel->bss->key, crypto_key, sizeof(crypto_key));
+ snprintf(skel->bss->algo, 128, "%s", algo);
+ pfd = bpf_program__fd(skel->progs.skb_crypto_setup);
+ if (!ASSERT_GT(pfd, 0, "skb_crypto_setup fd"))
+ goto fail;
+
+ err = bpf_prog_test_run_opts(pfd, &opts);
+ if (!ASSERT_OK(err, "skb_crypto_setup") ||
+ !ASSERT_OK(opts.retval, "skb_crypto_setup retval"))
+ goto fail;
+
+ if (!ASSERT_OK(skel->bss->status, "skb_crypto_setup status"))
+ goto fail;
+
+ err = bpf_tc_hook_create(&qdisc_hook);
+ if (!ASSERT_OK(err, "create qdisc hook"))
+ goto fail;
+
+ addrlen = sizeof(addr);
+ err = make_sockaddr(AF_INET6, IPV6_IFACE_ADDR, udp_test_port,
+ (void *)&addr, &addrlen);
+ if (!ASSERT_OK(err, "make_sockaddr"))
+ goto fail;
+
+ tc_attach_enc.prog_fd = bpf_program__fd(skel->progs.encrypt_sanity);
+ err = bpf_tc_attach(&qdisc_hook, &tc_attach_enc);
+ if (!ASSERT_OK(err, "attach encrypt filter"))
+ goto fail;
+
+ sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (!ASSERT_NEQ(sockfd, -1, "encrypt socket"))
+ goto fail;
+ err = sendto(sockfd, plain_text, sizeof(plain_text), 0, (void *)&addr, addrlen);
+ close(sockfd);
+ if (!ASSERT_EQ(err, sizeof(plain_text), "encrypt send"))
+ goto fail;
+
+ do_crypt_afalg(plain_text, afalg_dst, sizeof(afalg_dst), true);
+
+ if (!ASSERT_OK(skel->bss->status, "encrypt status"))
+ goto fail;
+ if (!ASSERT_STRNEQ(skel->bss->dst, afalg_dst, sizeof(afalg_dst), "encrypt AF_ALG"))
+ goto fail;
+
+ tc_attach_enc.flags = tc_attach_enc.prog_fd = tc_attach_enc.prog_id = 0;
+ err = bpf_tc_detach(&qdisc_hook, &tc_attach_enc);
+ if (!ASSERT_OK(err, "bpf_tc_detach encrypt"))
+ goto fail;
+
+ tc_attach_dec.prog_fd = bpf_program__fd(skel->progs.decrypt_sanity);
+ err = bpf_tc_attach(&qdisc_hook, &tc_attach_dec);
+ if (!ASSERT_OK(err, "attach decrypt filter"))
+ goto fail;
+
+ sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (!ASSERT_NEQ(sockfd, -1, "decrypt socket"))
+ goto fail;
+ err = sendto(sockfd, afalg_dst, sizeof(afalg_dst), 0, (void *)&addr, addrlen);
+ close(sockfd);
+ if (!ASSERT_EQ(err, sizeof(afalg_dst), "decrypt send"))
+ goto fail;
+
+ do_crypt_afalg(afalg_dst, afalg_plain, sizeof(afalg_plain), false);
+
+ if (!ASSERT_OK(skel->bss->status, "decrypt status"))
+ goto fail;
+ if (!ASSERT_STRNEQ(skel->bss->dst, afalg_plain, sizeof(afalg_plain), "decrypt AF_ALG"))
+ goto fail;
+
+ tc_attach_dec.flags = tc_attach_dec.prog_fd = tc_attach_dec.prog_id = 0;
+ err = bpf_tc_detach(&qdisc_hook, &tc_attach_dec);
+ ASSERT_OK(err, "bpf_tc_detach decrypt");
+
+fail:
+ close_netns(nstoken);
+ deinit_afalg();
+ SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
+ crypto_sanity__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
index 261228eb68..438583e1f2 100644
--- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
@@ -94,6 +94,8 @@ void test_empty_skb(void)
SYS(out, "ip netns add empty_skb");
tok = open_netns("empty_skb");
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
SYS(out, "ip link add veth0 type veth peer veth1");
SYS(out, "ip link set dev veth0 up");
SYS(out, "ip link set dev veth1 up");
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
index f949647dbb..552a0875ca 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
@@ -21,13 +21,13 @@ static int do_sleep(void *skel)
}
#define STACK_SIZE (1024 * 1024)
-static char child_stack[STACK_SIZE];
void test_fexit_sleep(void)
{
struct fexit_sleep_lskel *fexit_skel = NULL;
int wstatus, duration = 0;
pid_t cpid;
+ char *child_stack = NULL;
int err, fexit_cnt;
fexit_skel = fexit_sleep_lskel__open_and_load();
@@ -38,6 +38,11 @@ void test_fexit_sleep(void)
if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
goto cleanup;
+ child_stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE |
+ MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (!ASSERT_NEQ(child_stack, MAP_FAILED, "mmap"))
+ goto cleanup;
+
cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel);
if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno)))
goto cleanup;
@@ -78,5 +83,6 @@ void test_fexit_sleep(void)
goto cleanup;
cleanup:
+ munmap(child_stack, STACK_SIZE);
fexit_sleep_lskel__destroy(fexit_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
index 3379df2d4c..bd76589580 100644
--- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -26,6 +26,17 @@
#define IPV6_TBID_ADDR "fd00::FFFF"
#define IPV6_TBID_NET "fd00::"
#define IPV6_TBID_DST "fd00::2"
+#define MARK_NO_POLICY 33
+#define MARK 42
+#define MARK_TABLE "200"
+#define IPV4_REMOTE_DST "1.2.3.4"
+#define IPV4_LOCAL "10.4.0.3"
+#define IPV4_GW1 "10.4.0.1"
+#define IPV4_GW2 "10.4.0.2"
+#define IPV6_REMOTE_DST "be:ef::b0:10"
+#define IPV6_LOCAL "fd01::3"
+#define IPV6_GW1 "fd01::1"
+#define IPV6_GW2 "fd01::2"
#define DMAC "11:11:11:11:11:11"
#define DMAC_INIT { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, }
#define DMAC2 "01:01:01:01:01:01"
@@ -36,9 +47,11 @@ struct fib_lookup_test {
const char *daddr;
int expected_ret;
const char *expected_src;
+ const char *expected_dst;
int lookup_flags;
__u32 tbid;
__u8 dmac[6];
+ __u32 mark;
};
static const struct fib_lookup_test tests[] = {
@@ -90,10 +103,47 @@ static const struct fib_lookup_test tests[] = {
.daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
.expected_src = IPV6_IFACE_ADDR_SEC,
.lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ /* policy routing */
+ { .desc = "IPv4 policy routing, default",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv4 policy routing, mark doesn't point to a policy",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK_NO_POLICY, },
+ { .desc = "IPv4 policy routing, mark points to a policy",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW2,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
+ { .desc = "IPv4 policy routing, mark points to a policy, but no flag",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
+ { .desc = "IPv6 policy routing, default",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv6 policy routing, mark doesn't point to a policy",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK_NO_POLICY, },
+ { .desc = "IPv6 policy routing, mark points to a policy",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW2,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
+ { .desc = "IPv6 policy routing, mark points to a policy, but no flag",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
};
-static int ifindex;
-
static int setup_netns(void)
{
int err;
@@ -144,12 +194,24 @@ static int setup_netns(void)
if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)"))
goto fail;
+ /* Setup for policy routing tests */
+ SYS(fail, "ip addr add %s/24 dev veth1", IPV4_LOCAL);
+ SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_LOCAL);
+ SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
+ SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
+ SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
+ SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
+ SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
+ SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
+
return 0;
fail:
return -1;
}
-static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_lookup_test *test)
+static int set_lookup_params(struct bpf_fib_lookup *params,
+ const struct fib_lookup_test *test,
+ int ifindex)
{
int ret;
@@ -158,6 +220,7 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo
params->l4_protocol = IPPROTO_TCP;
params->ifindex = ifindex;
params->tbid = test->tbid;
+ params->mark = test->mark;
if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) {
params->family = AF_INET6;
@@ -190,40 +253,45 @@ static void mac_str(char *b, const __u8 *mac)
mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
}
-static void assert_src_ip(struct bpf_fib_lookup *fib_params, const char *expected_src)
+static void assert_ip_address(int family, void *addr, const char *expected_str)
{
+ char str[INET6_ADDRSTRLEN];
+ u8 expected_addr[16];
+ int addr_len = 0;
int ret;
- __u32 src6[4];
- __be32 src4;
- switch (fib_params->family) {
+ switch (family) {
case AF_INET6:
- ret = inet_pton(AF_INET6, expected_src, src6);
- ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
-
- ret = memcmp(src6, fib_params->ipv6_src, sizeof(fib_params->ipv6_src));
- if (!ASSERT_EQ(ret, 0, "fib_lookup ipv6 src")) {
- char str_src6[64];
-
- inet_ntop(AF_INET6, fib_params->ipv6_src, str_src6,
- sizeof(str_src6));
- printf("ipv6 expected %s actual %s ", expected_src,
- str_src6);
- }
-
+ ret = inet_pton(AF_INET6, expected_str, expected_addr);
+ ASSERT_EQ(ret, 1, "inet_pton(AF_INET6, expected_str)");
+ addr_len = 16;
break;
case AF_INET:
- ret = inet_pton(AF_INET, expected_src, &src4);
- ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
-
- ASSERT_EQ(fib_params->ipv4_src, src4, "fib_lookup ipv4 src");
-
+ ret = inet_pton(AF_INET, expected_str, expected_addr);
+ ASSERT_EQ(ret, 1, "inet_pton(AF_INET, expected_str)");
+ addr_len = 4;
break;
default:
- PRINT_FAIL("invalid addr family: %d", fib_params->family);
+ PRINT_FAIL("invalid address family: %d", family);
+ break;
+ }
+
+ if (memcmp(addr, expected_addr, addr_len)) {
+ inet_ntop(family, addr, str, sizeof(str));
+ PRINT_FAIL("expected %s actual %s ", expected_str, str);
}
}
+static void assert_src_ip(struct bpf_fib_lookup *params, const char *expected)
+{
+ assert_ip_address(params->family, params->ipv6_src, expected);
+}
+
+static void assert_dst_ip(struct bpf_fib_lookup *params, const char *expected)
+{
+ assert_ip_address(params->family, params->ipv6_dst, expected);
+}
+
void test_fib_lookup(void)
{
struct bpf_fib_lookup *fib_params;
@@ -256,15 +324,18 @@ void test_fib_lookup(void)
if (setup_netns())
goto fail;
- ifindex = if_nametoindex("veth1");
- skb.ifindex = ifindex;
+ skb.ifindex = if_nametoindex("veth1");
+ if (!ASSERT_NEQ(skb.ifindex, 0, "if_nametoindex(veth1)"))
+ goto fail;
+
fib_params = &skel->bss->fib_params;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
printf("Testing %s ", tests[i].desc);
- if (set_lookup_params(fib_params, &tests[i]))
+ if (set_lookup_params(fib_params, &tests[i], skb.ifindex))
continue;
+
skel->bss->fib_lookup_ret = -1;
skel->bss->lookup_flags = tests[i].lookup_flags;
@@ -278,6 +349,9 @@ void test_fib_lookup(void)
if (tests[i].expected_src)
assert_src_ip(fib_params, tests[i].expected_src);
+ if (tests[i].expected_dst)
+ assert_dst_ip(fib_params, tests[i].expected_dst);
+
ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac));
if (!ASSERT_EQ(ret, 0, "dmac not match")) {
char expected[18], actual[18];
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index c4773173a4..9e5f387391 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -2,7 +2,6 @@
#include <test_progs.h>
#include <network_helpers.h>
#include <error.h>
-#include <linux/if.h>
#include <linux/if_tun.h>
#include <sys/uio.h>
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index 8963f8a549..09f6487f58 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -5,6 +5,7 @@
#include "for_each_hash_map_elem.skel.h"
#include "for_each_array_map_elem.skel.h"
#include "for_each_map_elem_write_key.skel.h"
+#include "for_each_multi_maps.skel.h"
static unsigned int duration;
@@ -143,6 +144,65 @@ static void test_write_map_key(void)
for_each_map_elem_write_key__destroy(skel);
}
+static void test_multi_maps(void)
+{
+ struct for_each_multi_maps *skel;
+ __u64 val, array_total, hash_total;
+ __u32 key, max_entries;
+ int i, err;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = for_each_multi_maps__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_multi_maps__open_and_load"))
+ return;
+
+ array_total = 0;
+ max_entries = bpf_map__max_entries(skel->maps.arraymap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i + 1;
+ array_total += val;
+ err = bpf_map__update_elem(skel->maps.arraymap, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY);
+ if (!ASSERT_OK(err, "array_map_update"))
+ goto out;
+ }
+
+ hash_total = 0;
+ max_entries = bpf_map__max_entries(skel->maps.hashmap);
+ for (i = 0; i < max_entries; i++) {
+ key = i + 100;
+ val = i + 1;
+ hash_total += val;
+ err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY);
+ if (!ASSERT_OK(err, "hash_map_update"))
+ goto out;
+ }
+
+ skel->bss->data_output = 0;
+ skel->bss->use_array = 1;
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_OK(topts.retval, "retval");
+ ASSERT_EQ(skel->bss->data_output, array_total, "array output");
+
+ skel->bss->data_output = 0;
+ skel->bss->use_array = 0;
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_OK(topts.retval, "retval");
+ ASSERT_EQ(skel->bss->data_output, hash_total, "hash output");
+
+out:
+ for_each_multi_maps__destroy(skel);
+}
+
void test_for_each(void)
{
if (test__start_subtest("hash_map"))
@@ -151,4 +211,6 @@ void test_for_each(void)
test_array_map();
if (test__start_subtest("write_map_key"))
test_write_map_key();
+ if (test__start_subtest("multi_maps"))
+ test_multi_maps();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
index 8dd2af9081..284764e717 100644
--- a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
+++ b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
@@ -88,6 +88,8 @@ static int attach(struct ip_check_defrag *skel, bool ipv6)
int err = -1;
nstoken = open_netns(NS1);
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ goto out;
skel->links.defrag = bpf_program__attach_netfilter(skel->progs.defrag, &opts);
if (!ASSERT_OK_PTR(skel->links.defrag, "program attach"))
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 05000810e2..960c9323d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -4,6 +4,8 @@
#include "trace_helpers.h"
#include "kprobe_multi_empty.skel.h"
#include "kprobe_multi_override.skel.h"
+#include "kprobe_multi_session.skel.h"
+#include "kprobe_multi_session_cookie.skel.h"
#include "bpf/libbpf_internal.h"
#include "bpf/hashmap.h"
@@ -326,6 +328,74 @@ cleanup:
kprobe_multi__destroy(skel);
}
+static void test_session_skel_api(void)
+{
+ struct kprobe_multi_session *skel = NULL;
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_link *link = NULL;
+ int i, err, prog_fd;
+
+ skel = kprobe_multi_session__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi_session__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ err = kprobe_multi_session__attach(skel);
+ if (!ASSERT_OK(err, " kprobe_multi_session__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ /* bpf_fentry_test1-4 trigger return probe, result is 2 */
+ for (i = 0; i < 4; i++)
+ ASSERT_EQ(skel->bss->kprobe_session_result[i], 2, "kprobe_session_result");
+
+ /* bpf_fentry_test5-8 trigger only entry probe, result is 1 */
+ for (i = 4; i < 8; i++)
+ ASSERT_EQ(skel->bss->kprobe_session_result[i], 1, "kprobe_session_result");
+
+cleanup:
+ bpf_link__destroy(link);
+ kprobe_multi_session__destroy(skel);
+}
+
+static void test_session_cookie_skel_api(void)
+{
+ struct kprobe_multi_session_cookie *skel = NULL;
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_link *link = NULL;
+ int err, prog_fd;
+
+ skel = kprobe_multi_session_cookie__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ err = kprobe_multi_session_cookie__attach(skel);
+ if (!ASSERT_OK(err, " kprobe_multi_wrapper__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(skel->bss->test_kprobe_1_result, 1, "test_kprobe_1_result");
+ ASSERT_EQ(skel->bss->test_kprobe_2_result, 2, "test_kprobe_2_result");
+ ASSERT_EQ(skel->bss->test_kprobe_3_result, 3, "test_kprobe_3_result");
+
+cleanup:
+ bpf_link__destroy(link);
+ kprobe_multi_session_cookie__destroy(skel);
+}
+
static size_t symbol_hash(long key, void *ctx __maybe_unused)
{
return str_hash((const char *) key);
@@ -336,15 +406,80 @@ static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
return strcmp((const char *) key1, (const char *) key2) == 0;
}
+static bool is_invalid_entry(char *buf, bool kernel)
+{
+ if (kernel && strchr(buf, '['))
+ return true;
+ if (!kernel && !strchr(buf, '['))
+ return true;
+ return false;
+}
+
+static bool skip_entry(char *name)
+{
+ /*
+ * We attach to almost all kernel functions and some of them
+ * will cause 'suspicious RCU usage' when fprobe is attached
+ * to them. Filter out the current culprits - arch_cpu_idle
+ * default_idle and rcu_* functions.
+ */
+ if (!strcmp(name, "arch_cpu_idle"))
+ return true;
+ if (!strcmp(name, "default_idle"))
+ return true;
+ if (!strncmp(name, "rcu_", 4))
+ return true;
+ if (!strcmp(name, "bpf_dispatcher_xdp_func"))
+ return true;
+ if (!strncmp(name, "__ftrace_invalid_address__",
+ sizeof("__ftrace_invalid_address__") - 1))
+ return true;
+ return false;
+}
+
+/* Do comparision by ignoring '.llvm.<hash>' suffixes. */
+static int compare_name(const char *name1, const char *name2)
+{
+ const char *res1, *res2;
+ int len1, len2;
+
+ res1 = strstr(name1, ".llvm.");
+ res2 = strstr(name2, ".llvm.");
+ len1 = res1 ? res1 - name1 : strlen(name1);
+ len2 = res2 ? res2 - name2 : strlen(name2);
+
+ if (len1 == len2)
+ return strncmp(name1, name2, len1);
+ if (len1 < len2)
+ return strncmp(name1, name2, len1) <= 0 ? -1 : 1;
+ return strncmp(name1, name2, len2) >= 0 ? 1 : -1;
+}
+
+static int load_kallsyms_compare(const void *p1, const void *p2)
+{
+ return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name);
+}
+
+static int search_kallsyms_compare(const void *p1, const struct ksym *p2)
+{
+ return compare_name(p1, p2->name);
+}
+
static int get_syms(char ***symsp, size_t *cntp, bool kernel)
{
- size_t cap = 0, cnt = 0, i;
- char *name = NULL, **syms = NULL;
+ size_t cap = 0, cnt = 0;
+ char *name = NULL, *ksym_name, **syms = NULL;
struct hashmap *map;
+ struct ksyms *ksyms;
+ struct ksym *ks;
char buf[256];
FILE *f;
int err = 0;
+ ksyms = load_kallsyms_custom_local(load_kallsyms_compare);
+ if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_custom_local"))
+ return -EINVAL;
+
/*
* The available_filter_functions contains many duplicates,
* but other than that all symbols are usable in kprobe multi
@@ -368,33 +503,23 @@ static int get_syms(char ***symsp, size_t *cntp, bool kernel)
}
while (fgets(buf, sizeof(buf), f)) {
- if (kernel && strchr(buf, '['))
- continue;
- if (!kernel && !strchr(buf, '['))
+ if (is_invalid_entry(buf, kernel))
continue;
free(name);
if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
continue;
- /*
- * We attach to almost all kernel functions and some of them
- * will cause 'suspicious RCU usage' when fprobe is attached
- * to them. Filter out the current culprits - arch_cpu_idle
- * default_idle and rcu_* functions.
- */
- if (!strcmp(name, "arch_cpu_idle"))
- continue;
- if (!strcmp(name, "default_idle"))
- continue;
- if (!strncmp(name, "rcu_", 4))
- continue;
- if (!strcmp(name, "bpf_dispatcher_xdp_func"))
- continue;
- if (!strncmp(name, "__ftrace_invalid_address__",
- sizeof("__ftrace_invalid_address__") - 1))
+ if (skip_entry(name))
continue;
- err = hashmap__add(map, name, 0);
+ ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare);
+ if (!ks) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ ksym_name = ks->name;
+ err = hashmap__add(map, ksym_name, 0);
if (err == -EEXIST) {
err = 0;
continue;
@@ -407,8 +532,7 @@ static int get_syms(char ***symsp, size_t *cntp, bool kernel)
if (err)
goto error;
- syms[cnt++] = name;
- name = NULL;
+ syms[cnt++] = ksym_name;
}
*symsp = syms;
@@ -418,42 +542,88 @@ error:
free(name);
fclose(f);
hashmap__free(map);
- if (err) {
- for (i = 0; i < cnt; i++)
- free(syms[i]);
+ if (err)
free(syms);
+ return err;
+}
+
+static int get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel)
+{
+ unsigned long *addr, *addrs, *tmp_addrs;
+ int err = 0, max_cnt, inc_cnt;
+ char *name = NULL;
+ size_t cnt = 0;
+ char buf[256];
+ FILE *f;
+
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+ f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r");
+ else
+ f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r");
+
+ if (!f)
+ return -ENOENT;
+
+ /* In my local setup, the number of entries is 50k+ so Let us initially
+ * allocate space to hold 64k entries. If 64k is not enough, incrementally
+ * increase 1k each time.
+ */
+ max_cnt = 65536;
+ inc_cnt = 1024;
+ addrs = malloc(max_cnt * sizeof(long));
+ if (addrs == NULL) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ if (is_invalid_entry(buf, kernel))
+ continue;
+
+ free(name);
+ if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2)
+ continue;
+ if (skip_entry(name))
+ continue;
+
+ if (cnt == max_cnt) {
+ max_cnt += inc_cnt;
+ tmp_addrs = realloc(addrs, max_cnt);
+ if (!tmp_addrs) {
+ err = -ENOMEM;
+ goto error;
+ }
+ addrs = tmp_addrs;
+ }
+
+ addrs[cnt++] = (unsigned long)addr;
}
+
+ *addrsp = addrs;
+ *cntp = cnt;
+
+error:
+ free(name);
+ fclose(f);
+ if (err)
+ free(addrs);
return err;
}
-static void test_kprobe_multi_bench_attach(bool kernel)
+static void do_bench_test(struct kprobe_multi_empty *skel, struct bpf_kprobe_multi_opts *opts)
{
- LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
- struct kprobe_multi_empty *skel = NULL;
long attach_start_ns, attach_end_ns;
long detach_start_ns, detach_end_ns;
double attach_delta, detach_delta;
struct bpf_link *link = NULL;
- char **syms = NULL;
- size_t cnt = 0, i;
-
- if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms"))
- return;
-
- skel = kprobe_multi_empty__open_and_load();
- if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
- goto cleanup;
-
- opts.syms = (const char **) syms;
- opts.cnt = cnt;
attach_start_ns = get_time_ns();
link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_empty,
- NULL, &opts);
+ NULL, opts);
attach_end_ns = get_time_ns();
if (!ASSERT_OK_PTR(link, "bpf_program__attach_kprobe_multi_opts"))
- goto cleanup;
+ return;
detach_start_ns = get_time_ns();
bpf_link__destroy(link);
@@ -462,17 +632,65 @@ static void test_kprobe_multi_bench_attach(bool kernel)
attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0;
detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0;
- printf("%s: found %lu functions\n", __func__, cnt);
+ printf("%s: found %lu functions\n", __func__, opts->cnt);
printf("%s: attached in %7.3lfs\n", __func__, attach_delta);
printf("%s: detached in %7.3lfs\n", __func__, detach_delta);
+}
+
+static void test_kprobe_multi_bench_attach(bool kernel)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ struct kprobe_multi_empty *skel = NULL;
+ char **syms = NULL;
+ size_t cnt = 0;
+
+ if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms"))
+ return;
+
+ skel = kprobe_multi_empty__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
+ goto cleanup;
+
+ opts.syms = (const char **) syms;
+ opts.cnt = cnt;
+
+ do_bench_test(skel, &opts);
cleanup:
kprobe_multi_empty__destroy(skel);
- if (syms) {
- for (i = 0; i < cnt; i++)
- free(syms[i]);
+ if (syms)
free(syms);
+}
+
+static void test_kprobe_multi_bench_attach_addr(bool kernel)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ struct kprobe_multi_empty *skel = NULL;
+ unsigned long *addrs = NULL;
+ size_t cnt = 0;
+ int err;
+
+ err = get_addrs(&addrs, &cnt, kernel);
+ if (err == -ENOENT) {
+ test__skip();
+ return;
}
+
+ if (!ASSERT_OK(err, "get_addrs"))
+ return;
+
+ skel = kprobe_multi_empty__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
+ goto cleanup;
+
+ opts.addrs = addrs;
+ opts.cnt = cnt;
+
+ do_bench_test(skel, &opts);
+
+cleanup:
+ kprobe_multi_empty__destroy(skel);
+ free(addrs);
}
static void test_attach_override(void)
@@ -515,6 +733,10 @@ void serial_test_kprobe_multi_bench_attach(void)
test_kprobe_multi_bench_attach(true);
if (test__start_subtest("modules"))
test_kprobe_multi_bench_attach(false);
+ if (test__start_subtest("kernel"))
+ test_kprobe_multi_bench_attach_addr(true);
+ if (test__start_subtest("modules"))
+ test_kprobe_multi_bench_attach_addr(false);
}
void test_kprobe_multi_test(void)
@@ -538,4 +760,8 @@ void test_kprobe_multi_test(void)
test_attach_api_fails();
if (test__start_subtest("attach_override"))
test_attach_override();
+ if (test__start_subtest("session"))
+ test_session_skel_api();
+ if (test__start_subtest("session_cookie"))
+ test_session_cookie_skel_api();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c
index b295969b26..dc7aab532f 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c
@@ -5,8 +5,6 @@
#include "test_ksyms.skel.h"
#include <sys/stat.h>
-static int duration;
-
void test_ksyms(void)
{
const char *btf_path = "/sys/kernel/btf/vmlinux";
@@ -18,43 +16,37 @@ void test_ksyms(void)
int err;
err = kallsyms_find("bpf_link_fops", &link_fops_addr);
- if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ if (!ASSERT_NEQ(err, -EINVAL, "bpf_link_fops: kallsyms_fopen"))
return;
- if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_link_fops' not found\n"))
+ if (!ASSERT_NEQ(err, -ENOENT, "bpf_link_fops: ksym_find"))
return;
err = kallsyms_find("__per_cpu_start", &per_cpu_start_addr);
- if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ if (!ASSERT_NEQ(err, -EINVAL, "__per_cpu_start: kallsyms_fopen"))
return;
- if (CHECK(err == -ENOENT, "ksym_find", "symbol 'per_cpu_start' not found\n"))
+ if (!ASSERT_NEQ(err, -ENOENT, "__per_cpu_start: ksym_find"))
return;
- if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno))
+ if (!ASSERT_OK(stat(btf_path, &st), "stat_btf"))
return;
btf_size = st.st_size;
skel = test_ksyms__open_and_load();
- if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
+ if (!ASSERT_OK_PTR(skel, "test_ksyms__open_and_load"))
return;
err = test_ksyms__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "test_ksyms__attach"))
goto cleanup;
/* trigger tracepoint */
usleep(1);
data = skel->data;
- CHECK(data->out__bpf_link_fops != link_fops_addr, "bpf_link_fops",
- "got 0x%llx, exp 0x%llx\n",
- data->out__bpf_link_fops, link_fops_addr);
- CHECK(data->out__bpf_link_fops1 != 0, "bpf_link_fops1",
- "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0);
- CHECK(data->out__btf_size != btf_size, "btf_size",
- "got %llu, exp %llu\n", data->out__btf_size, btf_size);
- CHECK(data->out__per_cpu_start != per_cpu_start_addr, "__per_cpu_start",
- "got %llu, exp %llu\n", data->out__per_cpu_start,
- per_cpu_start_addr);
+ ASSERT_EQ(data->out__bpf_link_fops, link_fops_addr, "bpf_link_fops");
+ ASSERT_EQ(data->out__bpf_link_fops1, 0, "bpf_link_fops1");
+ ASSERT_EQ(data->out__btf_size, btf_size, "btf_size");
+ ASSERT_EQ(data->out__per_cpu_start, per_cpu_start_addr, "__per_cpu_start");
cleanup:
test_ksyms__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index f53d658ed0..6d391d95f9 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -51,6 +51,10 @@ void test_module_attach(void)
0, "bpf_testmod_test_read");
ASSERT_OK(err, "set_attach_target");
+ err = bpf_program__set_attach_target(skel->progs.handle_fentry_explicit_manual,
+ 0, "bpf_testmod:bpf_testmod_test_read");
+ ASSERT_OK(err, "set_attach_target_explicit");
+
err = test_module_attach__load(skel);
if (CHECK(err, "skel_load", "failed to load skeleton\n"))
return;
@@ -70,6 +74,8 @@ void test_module_attach(void)
ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf");
ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry");
ASSERT_EQ(bss->fentry_manual_read_sz, READ_SZ, "fentry_manual");
+ ASSERT_EQ(bss->fentry_explicit_read_sz, READ_SZ, "fentry_explicit");
+ ASSERT_EQ(bss->fentry_explicit_manual_read_sz, READ_SZ, "fentry_explicit_manual");
ASSERT_EQ(bss->fexit_read_sz, READ_SZ, "fexit");
ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet");
ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret");
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 8f8d792307..274d2e033e 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -82,6 +82,22 @@ static void cleanup_netns(struct nstoken *nstoken)
SYS_NOFAIL("ip netns del %s", NS_TEST);
}
+static int start_mptcp_server(int family, const char *addr_str, __u16 port,
+ int timeout_ms)
+{
+ struct network_helper_opts opts = {
+ .timeout_ms = timeout_ms,
+ .proto = IPPROTO_MPTCP,
+ };
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ return -1;
+
+ return start_server_addr(SOCK_STREAM, &addr, addrlen, &opts);
+}
+
static int verify_tsk(int map_fd, int client_fd)
{
int err, cfd = client_fd;
@@ -273,6 +289,8 @@ static int run_mptcpify(int cgroup_fd)
if (!ASSERT_OK_PTR(mptcpify_skel, "skel_open_load"))
return libbpf_get_error(mptcpify_skel);
+ mptcpify_skel->bss->pid = getpid();
+
err = mptcpify__attach(mptcpify_skel);
if (!ASSERT_OK(err, "skel_attach"))
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index 24d493482f..e72d75d6ba 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -12,77 +12,229 @@
#include <sys/wait.h>
#include <sys/mount.h>
#include <sys/fcntl.h>
+#include "network_helpers.h"
#define STACK_SIZE (1024 * 1024)
static char child_stack[STACK_SIZE];
-static int test_current_pid_tgid(void *args)
+static int get_pid_tgid(pid_t *pid, pid_t *tgid,
+ struct test_ns_current_pid_tgid__bss *bss)
{
- struct test_ns_current_pid_tgid__bss *bss;
- struct test_ns_current_pid_tgid *skel;
- int err = -1, duration = 0;
- pid_t tgid, pid;
struct stat st;
+ int err;
- skel = test_ns_current_pid_tgid__open_and_load();
- if (CHECK(!skel, "skel_open_load", "failed to load skeleton\n"))
- goto cleanup;
-
- pid = syscall(SYS_gettid);
- tgid = getpid();
+ *pid = syscall(SYS_gettid);
+ *tgid = getpid();
err = stat("/proc/self/ns/pid", &st);
- if (CHECK(err, "stat", "failed /proc/self/ns/pid: %d\n", err))
- goto cleanup;
+ if (!ASSERT_OK(err, "stat /proc/self/ns/pid"))
+ return err;
- bss = skel->bss;
bss->dev = st.st_dev;
bss->ino = st.st_ino;
bss->user_pid = 0;
bss->user_tgid = 0;
+ return 0;
+}
+
+static int test_current_pid_tgid_tp(void *args)
+{
+ struct test_ns_current_pid_tgid__bss *bss;
+ struct test_ns_current_pid_tgid *skel;
+ int ret = -1, err;
+ pid_t tgid, pid;
+
+ skel = test_ns_current_pid_tgid__open();
+ if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open"))
+ return ret;
+
+ bpf_program__set_autoload(skel->progs.tp_handler, true);
+
+ err = test_ns_current_pid_tgid__load(skel);
+ if (!ASSERT_OK(err, "test_ns_current_pid_tgid__load"))
+ goto cleanup;
+
+ bss = skel->bss;
+ if (get_pid_tgid(&pid, &tgid, bss))
+ goto cleanup;
err = test_ns_current_pid_tgid__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "test_ns_current_pid_tgid__attach"))
goto cleanup;
/* trigger tracepoint */
usleep(1);
- ASSERT_EQ(bss->user_pid, pid, "pid");
- ASSERT_EQ(bss->user_tgid, tgid, "tgid");
- err = 0;
+ if (!ASSERT_EQ(bss->user_pid, pid, "pid"))
+ goto cleanup;
+ if (!ASSERT_EQ(bss->user_tgid, tgid, "tgid"))
+ goto cleanup;
+ ret = 0;
+
+cleanup:
+ test_ns_current_pid_tgid__destroy(skel);
+ return ret;
+}
+
+static int test_current_pid_tgid_cgrp(void *args)
+{
+ struct test_ns_current_pid_tgid__bss *bss;
+ struct test_ns_current_pid_tgid *skel;
+ int server_fd = -1, ret = -1, err;
+ int cgroup_fd = *(int *)args;
+ pid_t tgid, pid;
+
+ skel = test_ns_current_pid_tgid__open();
+ if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open"))
+ return ret;
+
+ bpf_program__set_autoload(skel->progs.cgroup_bind4, true);
+
+ err = test_ns_current_pid_tgid__load(skel);
+ if (!ASSERT_OK(err, "test_ns_current_pid_tgid__load"))
+ goto cleanup;
+
+ bss = skel->bss;
+ if (get_pid_tgid(&pid, &tgid, bss))
+ goto cleanup;
+
+ skel->links.cgroup_bind4 = bpf_program__attach_cgroup(
+ skel->progs.cgroup_bind4, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.cgroup_bind4, "bpf_program__attach_cgroup"))
+ goto cleanup;
+
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(bss->user_pid, pid, "pid"))
+ goto cleanup;
+ if (!ASSERT_EQ(bss->user_tgid, tgid, "tgid"))
+ goto cleanup;
+ ret = 0;
cleanup:
- test_ns_current_pid_tgid__destroy(skel);
+ if (server_fd >= 0)
+ close(server_fd);
+ test_ns_current_pid_tgid__destroy(skel);
+ return ret;
+}
+
+static int test_current_pid_tgid_sk_msg(void *args)
+{
+ int verdict, map, server_fd = -1, client_fd = -1;
+ struct test_ns_current_pid_tgid__bss *bss;
+ static const char send_msg[] = "message";
+ struct test_ns_current_pid_tgid *skel;
+ int ret = -1, err, key = 0;
+ pid_t tgid, pid;
+
+ skel = test_ns_current_pid_tgid__open();
+ if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open"))
+ return ret;
+
+ bpf_program__set_autoload(skel->progs.sk_msg, true);
+
+ err = test_ns_current_pid_tgid__load(skel);
+ if (!ASSERT_OK(err, "test_ns_current_pid_tgid__load"))
+ goto cleanup;
+
+ bss = skel->bss;
+ if (get_pid_tgid(&pid, &tgid, skel->bss))
+ goto cleanup;
+
+ verdict = bpf_program__fd(skel->progs.sk_msg);
+ map = bpf_map__fd(skel->maps.sock_map);
+ err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "prog_attach"))
+ goto cleanup;
+
+ server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
+ goto cleanup;
- return err;
+ client_fd = connect_to_fd(server_fd, 0);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto cleanup;
+
+ err = bpf_map_update_elem(map, &key, &client_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto cleanup;
+
+ err = send(client_fd, send_msg, sizeof(send_msg), 0);
+ if (!ASSERT_EQ(err, sizeof(send_msg), "send(msg)"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(bss->user_pid, pid, "pid"))
+ goto cleanup;
+ if (!ASSERT_EQ(bss->user_tgid, tgid, "tgid"))
+ goto cleanup;
+ ret = 0;
+
+cleanup:
+ if (server_fd >= 0)
+ close(server_fd);
+ if (client_fd >= 0)
+ close(client_fd);
+ test_ns_current_pid_tgid__destroy(skel);
+ return ret;
}
-static void test_ns_current_pid_tgid_new_ns(void)
+static void test_ns_current_pid_tgid_new_ns(int (*fn)(void *), void *arg)
{
- int wstatus, duration = 0;
+ int wstatus;
pid_t cpid;
/* Create a process in a new namespace, this process
* will be the init process of this new namespace hence will be pid 1.
*/
- cpid = clone(test_current_pid_tgid, child_stack + STACK_SIZE,
- CLONE_NEWPID | SIGCHLD, NULL);
+ cpid = clone(fn, child_stack + STACK_SIZE,
+ CLONE_NEWPID | SIGCHLD, arg);
- if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno)))
+ if (!ASSERT_NEQ(cpid, -1, "clone"))
return;
- if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno)))
+ if (!ASSERT_NEQ(waitpid(cpid, &wstatus, 0), -1, "waitpid"))
return;
- if (CHECK(WEXITSTATUS(wstatus) != 0, "newns_pidtgid", "failed"))
+ if (!ASSERT_OK(WEXITSTATUS(wstatus), "newns_pidtgid"))
return;
}
+static void test_in_netns(int (*fn)(void *), void *arg)
+{
+ struct nstoken *nstoken = NULL;
+
+ SYS(cleanup, "ip netns add ns_current_pid_tgid");
+ SYS(cleanup, "ip -net ns_current_pid_tgid link set dev lo up");
+
+ nstoken = open_netns("ns_current_pid_tgid");
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto cleanup;
+
+ test_ns_current_pid_tgid_new_ns(fn, arg);
+
+cleanup:
+ if (nstoken)
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del ns_current_pid_tgid");
+}
+
/* TODO: use a different tracepoint */
void serial_test_ns_current_pid_tgid(void)
{
- if (test__start_subtest("ns_current_pid_tgid_root_ns"))
- test_current_pid_tgid(NULL);
- if (test__start_subtest("ns_current_pid_tgid_new_ns"))
- test_ns_current_pid_tgid_new_ns();
+ if (test__start_subtest("root_ns_tp"))
+ test_current_pid_tgid_tp(NULL);
+ if (test__start_subtest("new_ns_tp"))
+ test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_tp, NULL);
+ if (test__start_subtest("new_ns_cgrp")) {
+ int cgroup_fd = -1;
+
+ cgroup_fd = test__join_cgroup("/sock_addr");
+ if (ASSERT_GE(cgroup_fd, 0, "join_cgroup")) {
+ test_in_netns(test_current_pid_tgid_cgrp, &cgroup_fd);
+ close(cgroup_fd);
+ }
+ }
+ if (test__start_subtest("new_ns_sk_msg"))
+ test_in_netns(test_current_pid_tgid_sk_msg, NULL);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_skip.c b/tools/testing/selftests/bpf/prog_tests/perf_skip.c
new file mode 100644
index 0000000000..37d8618800
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_skip.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <test_progs.h>
+#include "test_perf_skip.skel.h"
+#include <linux/compiler.h>
+#include <linux/hw_breakpoint.h>
+#include <sys/mman.h>
+
+#ifndef TRAP_PERF
+#define TRAP_PERF 6
+#endif
+
+int sigio_count, sigtrap_count;
+
+static void handle_sigio(int sig __always_unused)
+{
+ ++sigio_count;
+}
+
+static void handle_sigtrap(int signum __always_unused,
+ siginfo_t *info,
+ void *ucontext __always_unused)
+{
+ ASSERT_EQ(info->si_code, TRAP_PERF, "si_code");
+ ++sigtrap_count;
+}
+
+static noinline int test_function(void)
+{
+ asm volatile ("");
+ return 0;
+}
+
+void serial_test_perf_skip(void)
+{
+ struct sigaction action = {};
+ struct sigaction previous_sigtrap;
+ sighandler_t previous_sigio = SIG_ERR;
+ struct test_perf_skip *skel = NULL;
+ struct perf_event_attr attr = {};
+ int perf_fd = -1;
+ int err;
+ struct f_owner_ex owner;
+ struct bpf_link *prog_link = NULL;
+
+ action.sa_flags = SA_SIGINFO | SA_NODEFER;
+ action.sa_sigaction = handle_sigtrap;
+ sigemptyset(&action.sa_mask);
+ if (!ASSERT_OK(sigaction(SIGTRAP, &action, &previous_sigtrap), "sigaction"))
+ return;
+
+ previous_sigio = signal(SIGIO, handle_sigio);
+ if (!ASSERT_NEQ(previous_sigio, SIG_ERR, "signal"))
+ goto cleanup;
+
+ skel = test_perf_skip__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ attr.type = PERF_TYPE_BREAKPOINT;
+ attr.size = sizeof(attr);
+ attr.bp_type = HW_BREAKPOINT_X;
+ attr.bp_addr = (uintptr_t)test_function;
+ attr.bp_len = sizeof(long);
+ attr.sample_period = 1;
+ attr.sample_type = PERF_SAMPLE_IP;
+ attr.pinned = 1;
+ attr.exclude_kernel = 1;
+ attr.exclude_hv = 1;
+ attr.precise_ip = 3;
+ attr.sigtrap = 1;
+ attr.remove_on_exec = 1;
+
+ perf_fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
+ if (perf_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) {
+ printf("SKIP:no PERF_TYPE_BREAKPOINT/HW_BREAKPOINT_X\n");
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_OK(perf_fd < 0, "perf_event_open"))
+ goto cleanup;
+
+ /* Configure the perf event to signal on sample. */
+ err = fcntl(perf_fd, F_SETFL, O_ASYNC);
+ if (!ASSERT_OK(err, "fcntl(F_SETFL, O_ASYNC)"))
+ goto cleanup;
+
+ owner.type = F_OWNER_TID;
+ owner.pid = syscall(__NR_gettid);
+ err = fcntl(perf_fd, F_SETOWN_EX, &owner);
+ if (!ASSERT_OK(err, "fcntl(F_SETOWN_EX)"))
+ goto cleanup;
+
+ /* Allow at most one sample. A sample rejected by bpf should
+ * not count against this.
+ */
+ err = ioctl(perf_fd, PERF_EVENT_IOC_REFRESH, 1);
+ if (!ASSERT_OK(err, "ioctl(PERF_EVENT_IOC_REFRESH)"))
+ goto cleanup;
+
+ prog_link = bpf_program__attach_perf_event(skel->progs.handler, perf_fd);
+ if (!ASSERT_OK_PTR(prog_link, "bpf_program__attach_perf_event"))
+ goto cleanup;
+
+ /* Configure the bpf program to suppress the sample. */
+ skel->bss->ip = (uintptr_t)test_function;
+ test_function();
+
+ ASSERT_EQ(sigio_count, 0, "sigio_count");
+ ASSERT_EQ(sigtrap_count, 0, "sigtrap_count");
+
+ /* Configure the bpf program to allow the sample. */
+ skel->bss->ip = 0;
+ test_function();
+
+ ASSERT_EQ(sigio_count, 1, "sigio_count");
+ ASSERT_EQ(sigtrap_count, 1, "sigtrap_count");
+
+ /* Test that the sample above is the only one allowed (by perf, not
+ * by bpf)
+ */
+ test_function();
+
+ ASSERT_EQ(sigio_count, 1, "sigio_count");
+ ASSERT_EQ(sigtrap_count, 1, "sigtrap_count");
+
+cleanup:
+ bpf_link__destroy(prog_link);
+ if (perf_fd >= 0)
+ close(perf_fd);
+ test_perf_skip__destroy(skel);
+
+ if (previous_sigio != SIG_ERR)
+ signal(SIGIO, previous_sigio);
+ sigaction(SIGTRAP, &previous_sigtrap, NULL);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/preempt_lock.c b/tools/testing/selftests/bpf/prog_tests/preempt_lock.c
new file mode 100644
index 0000000000..02917c6724
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/preempt_lock.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <preempt_lock.skel.h>
+
+void test_preempt_lock(void)
+{
+ RUN_TESTS(preempt_lock);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index 48c5695b7a..da430df45a 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -12,8 +12,11 @@
#include <sys/sysinfo.h>
#include <linux/perf_event.h>
#include <linux/ring_buffer.h>
+
#include "test_ringbuf.lskel.h"
+#include "test_ringbuf_n.lskel.h"
#include "test_ringbuf_map_key.lskel.h"
+#include "test_ringbuf_write.lskel.h"
#define EDONE 7777
@@ -83,6 +86,58 @@ static void *poll_thread(void *input)
return (void *)(long)ring_buffer__poll(ringbuf, timeout);
}
+static void ringbuf_write_subtest(void)
+{
+ struct test_ringbuf_write_lskel *skel;
+ int page_size = getpagesize();
+ size_t *mmap_ptr;
+ int err, rb_fd;
+
+ skel = test_ringbuf_write_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->maps.ringbuf.max_entries = 0x4000;
+
+ err = test_ringbuf_write_lskel__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ rb_fd = skel->maps.ringbuf.map_fd;
+
+ mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
+ if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos"))
+ goto cleanup;
+ *mmap_ptr = 0x3000;
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
+
+ skel->bss->pid = getpid();
+
+ ringbuf = ring_buffer__new(rb_fd, process_sample, NULL, NULL);
+ if (!ASSERT_OK_PTR(ringbuf, "ringbuf_new"))
+ goto cleanup;
+
+ err = test_ringbuf_write_lskel__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup_ringbuf;
+
+ skel->bss->discarded = 0;
+ skel->bss->passed = 0;
+
+ /* trigger exactly two samples */
+ syscall(__NR_getpgid);
+ syscall(__NR_getpgid);
+
+ ASSERT_EQ(skel->bss->discarded, 2, "discarded");
+ ASSERT_EQ(skel->bss->passed, 0, "passed");
+
+ test_ringbuf_write_lskel__detach(skel);
+cleanup_ringbuf:
+ ring_buffer__free(ringbuf);
+cleanup:
+ test_ringbuf_write_lskel__destroy(skel);
+}
+
static void ringbuf_subtest(void)
{
const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
@@ -326,6 +381,68 @@ cleanup:
test_ringbuf_lskel__destroy(skel);
}
+/*
+ * Test ring_buffer__consume_n() by producing N_TOT_SAMPLES samples in the ring
+ * buffer, via getpid(), and consuming them in chunks of N_SAMPLES.
+ */
+#define N_TOT_SAMPLES 32
+#define N_SAMPLES 4
+
+/* Sample value to verify the callback validity */
+#define SAMPLE_VALUE 42L
+
+static int process_n_sample(void *ctx, void *data, size_t len)
+{
+ struct sample *s = data;
+
+ ASSERT_EQ(s->value, SAMPLE_VALUE, "sample_value");
+
+ return 0;
+}
+
+static void ringbuf_n_subtest(void)
+{
+ struct test_ringbuf_n_lskel *skel_n;
+ int err, i;
+
+ skel_n = test_ringbuf_n_lskel__open();
+ if (!ASSERT_OK_PTR(skel_n, "test_ringbuf_n_lskel__open"))
+ return;
+
+ skel_n->maps.ringbuf.max_entries = getpagesize();
+ skel_n->bss->pid = getpid();
+
+ err = test_ringbuf_n_lskel__load(skel_n);
+ if (!ASSERT_OK(err, "test_ringbuf_n_lskel__load"))
+ goto cleanup;
+
+ ringbuf = ring_buffer__new(skel_n->maps.ringbuf.map_fd,
+ process_n_sample, NULL, NULL);
+ if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new"))
+ goto cleanup;
+
+ err = test_ringbuf_n_lskel__attach(skel_n);
+ if (!ASSERT_OK(err, "test_ringbuf_n_lskel__attach"))
+ goto cleanup_ringbuf;
+
+ /* Produce N_TOT_SAMPLES samples in the ring buffer by calling getpid() */
+ skel_n->bss->value = SAMPLE_VALUE;
+ for (i = 0; i < N_TOT_SAMPLES; i++)
+ syscall(__NR_getpgid);
+
+ /* Consume all samples from the ring buffer in batches of N_SAMPLES */
+ for (i = 0; i < N_TOT_SAMPLES; i += err) {
+ err = ring_buffer__consume_n(ringbuf, N_SAMPLES);
+ if (!ASSERT_EQ(err, N_SAMPLES, "rb_consume"))
+ goto cleanup_ringbuf;
+ }
+
+cleanup_ringbuf:
+ ring_buffer__free(ringbuf);
+cleanup:
+ test_ringbuf_n_lskel__destroy(skel_n);
+}
+
static int process_map_key_sample(void *ctx, void *data, size_t len)
{
struct sample *s;
@@ -384,6 +501,10 @@ void test_ringbuf(void)
{
if (test__start_subtest("ringbuf"))
ringbuf_subtest();
+ if (test__start_subtest("ringbuf_n"))
+ ringbuf_n_subtest();
if (test__start_subtest("ringbuf_map_key"))
ringbuf_map_key_subtest();
+ if (test__start_subtest("ringbuf_write"))
+ ringbuf_write_subtest();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index b15b343ebb..920aee41bd 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -179,7 +179,7 @@ static void test_send_signal_nmi(bool signal_thread)
pmu_fd = syscall(__NR_perf_event_open, &attr, 0 /* pid */,
-1 /* cpu */, -1 /* group_fd */, 0 /* flags */);
if (pmu_fd == -1) {
- if (errno == ENOENT) {
+ if (errno == ENOENT || errno == EOPNOTSUPP) {
printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n",
__func__);
test__skip();
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 1374b626a9..0b9bd1d6f7 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -15,6 +15,7 @@
#include <unistd.h>
#include "test_progs.h"
+#include "network_helpers.h"
#define BIND_PORT 1234
#define CONNECT_PORT 4321
@@ -22,8 +23,6 @@
#define NS_SELF "/proc/self/ns/net"
#define SERVER_MAP_PATH "/sys/fs/bpf/tc/globals/server_map"
-static const struct timeval timeo_sec = { .tv_sec = 3 };
-static const size_t timeo_optlen = sizeof(timeo_sec);
static int stop, duration;
static bool
@@ -73,52 +72,6 @@ configure_stack(void)
return true;
}
-static int
-start_server(const struct sockaddr *addr, socklen_t len, int type)
-{
- int fd;
-
- fd = socket(addr->sa_family, type, 0);
- if (CHECK_FAIL(fd == -1))
- goto out;
- if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
- timeo_optlen)))
- goto close_out;
- if (CHECK_FAIL(bind(fd, addr, len) == -1))
- goto close_out;
- if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1))
- goto close_out;
-
- goto out;
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-static int
-connect_to_server(const struct sockaddr *addr, socklen_t len, int type)
-{
- int fd = -1;
-
- fd = socket(addr->sa_family, type, 0);
- if (CHECK_FAIL(fd == -1))
- goto out;
- if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec,
- timeo_optlen)))
- goto close_out;
- if (CHECK_FAIL(connect(fd, addr, len)))
- goto close_out;
-
- goto out;
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
static in_port_t
get_port(int fd)
{
@@ -161,7 +114,7 @@ run_test(int server_fd, const struct sockaddr *addr, socklen_t len, int type)
in_port_t port;
int ret = 1;
- client = connect_to_server(addr, len, type);
+ client = connect_to_addr(type, (struct sockaddr_storage *)addr, len, NULL);
if (client == -1) {
perror("Cannot connect to server");
goto out;
@@ -310,7 +263,9 @@ void test_sk_assign(void)
continue;
prepare_addr(test->addr, test->family, BIND_PORT, false);
addr = (const struct sockaddr *)test->addr;
- server = start_server(addr, test->len, test->type);
+ server = start_server_addr(test->type,
+ (const struct sockaddr_storage *)addr,
+ test->len, NULL);
if (server == -1)
goto close;
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index 597d0467a9..de2466547e 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -994,7 +994,7 @@ static void drop_on_reuseport(const struct test *t)
err = update_lookup_map(t->sock_map, SERVER_A, server1);
if (err)
- goto detach;
+ goto close_srv1;
/* second server on destination address we should never reach */
server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port,
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_addr.c b/tools/testing/selftests/bpf/prog_tests/sock_addr.c
index 5fd6177189..b880c564a2 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_addr.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_addr.c
@@ -3,16 +3,56 @@
#include "test_progs.h"
+#include "sock_addr_kern.skel.h"
+#include "bind4_prog.skel.h"
+#include "bind6_prog.skel.h"
#include "connect_unix_prog.skel.h"
+#include "connect4_prog.skel.h"
+#include "connect6_prog.skel.h"
+#include "sendmsg4_prog.skel.h"
+#include "sendmsg6_prog.skel.h"
+#include "recvmsg4_prog.skel.h"
+#include "recvmsg6_prog.skel.h"
#include "sendmsg_unix_prog.skel.h"
#include "recvmsg_unix_prog.skel.h"
+#include "getsockname4_prog.skel.h"
+#include "getsockname6_prog.skel.h"
#include "getsockname_unix_prog.skel.h"
+#include "getpeername4_prog.skel.h"
+#include "getpeername6_prog.skel.h"
#include "getpeername_unix_prog.skel.h"
#include "network_helpers.h"
+#ifndef ENOTSUPP
+# define ENOTSUPP 524
+#endif
+
+#define TEST_NS "sock_addr"
+#define TEST_IF_PREFIX "test_sock_addr"
+#define TEST_IPV4 "127.0.0.4"
+#define TEST_IPV6 "::6"
+
+#define SERV4_IP "192.168.1.254"
+#define SERV4_REWRITE_IP "127.0.0.1"
+#define SRC4_IP "172.16.0.1"
+#define SRC4_REWRITE_IP TEST_IPV4
+#define SERV4_PORT 4040
+#define SERV4_REWRITE_PORT 4444
+
+#define SERV6_IP "face:b00c:1234:5678::abcd"
+#define SERV6_REWRITE_IP "::1"
+#define SERV6_V4MAPPED_IP "::ffff:192.168.0.4"
+#define SRC6_IP "::1"
+#define SRC6_REWRITE_IP TEST_IPV6
+#define WILDCARD6_IP "::"
+#define SERV6_PORT 6060
+#define SERV6_REWRITE_PORT 6666
+
#define SERVUN_ADDRESS "bpf_cgroup_unix_test"
#define SERVUN_REWRITE_ADDRESS "bpf_cgroup_unix_test_rewrite"
-#define SRCUN_ADDRESS "bpf_cgroup_unix_test_src"
+#define SRCUN_ADDRESS "bpf_cgroup_unix_test_src"
+
+#define save_errno_do(op) ({ int __save = errno; op; errno = __save; })
enum sock_addr_test_type {
SOCK_ADDR_TEST_BIND,
@@ -23,152 +63,955 @@ enum sock_addr_test_type {
SOCK_ADDR_TEST_GETPEERNAME,
};
-typedef void *(*load_fn)(int cgroup_fd);
+typedef void *(*load_fn)(int cgroup_fd,
+ enum bpf_attach_type attach_type,
+ bool expect_reject);
typedef void (*destroy_fn)(void *skel);
-struct sock_addr_test {
- enum sock_addr_test_type type;
- const char *name;
- /* BPF prog properties */
- load_fn loadfn;
- destroy_fn destroyfn;
- /* Socket properties */
- int socket_family;
- int socket_type;
- /* IP:port pairs for BPF prog to override */
- const char *requested_addr;
- unsigned short requested_port;
- const char *expected_addr;
- unsigned short expected_port;
- const char *expected_src_addr;
+static int cmp_addr(const struct sockaddr_storage *addr1, socklen_t addr1_len,
+ const struct sockaddr_storage *addr2, socklen_t addr2_len,
+ bool cmp_port);
+
+struct init_sock_args {
+ int af;
+ int type;
};
-static void *connect_unix_prog_load(int cgroup_fd)
-{
- struct connect_unix_prog *skel;
+struct addr_args {
+ char addr[sizeof(struct sockaddr_storage)];
+ int addrlen;
+};
- skel = connect_unix_prog__open_and_load();
- if (!ASSERT_OK_PTR(skel, "skel_open"))
- goto cleanup;
+struct sendmsg_args {
+ struct addr_args addr;
+ char msg[10];
+ int msglen;
+};
- skel->links.connect_unix_prog = bpf_program__attach_cgroup(
- skel->progs.connect_unix_prog, cgroup_fd);
- if (!ASSERT_OK_PTR(skel->links.connect_unix_prog, "prog_attach"))
- goto cleanup;
+static struct sock_addr_kern *skel;
- return skel;
-cleanup:
- connect_unix_prog__destroy(skel);
- return NULL;
+static int run_bpf_prog(const char *prog_name, void *ctx, int ctx_size)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_program *prog;
+ int prog_fd, err;
+
+ topts.ctx_in = ctx;
+ topts.ctx_size_in = ctx_size;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto err;
+
+ prog_fd = bpf_program__fd(prog);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, prog_name))
+ goto err;
+
+ err = topts.retval;
+ errno = -topts.retval;
+ goto out;
+err:
+ err = -1;
+out:
+ return err;
}
-static void connect_unix_prog_destroy(void *skel)
+static int kernel_init_sock(int af, int type, int protocol)
{
- connect_unix_prog__destroy(skel);
+ struct init_sock_args args = {
+ .af = af,
+ .type = type,
+ };
+
+ return run_bpf_prog("init_sock", &args, sizeof(args));
}
-static void *sendmsg_unix_prog_load(int cgroup_fd)
+static int kernel_close_sock(int fd)
{
- struct sendmsg_unix_prog *skel;
+ return run_bpf_prog("close_sock", NULL, 0);
+}
- skel = sendmsg_unix_prog__open_and_load();
- if (!ASSERT_OK_PTR(skel, "skel_open"))
- goto cleanup;
+static int sock_addr_op(const char *name, struct sockaddr *addr,
+ socklen_t *addrlen, bool expect_change)
+{
+ struct addr_args args;
+ int err;
- skel->links.sendmsg_unix_prog = bpf_program__attach_cgroup(
- skel->progs.sendmsg_unix_prog, cgroup_fd);
- if (!ASSERT_OK_PTR(skel->links.sendmsg_unix_prog, "prog_attach"))
- goto cleanup;
+ if (addrlen)
+ args.addrlen = *addrlen;
- return skel;
-cleanup:
- sendmsg_unix_prog__destroy(skel);
- return NULL;
+ if (addr)
+ memcpy(&args.addr, addr, *addrlen);
+
+ err = run_bpf_prog(name, &args, sizeof(args));
+
+ if (!expect_change && addr)
+ if (!ASSERT_EQ(cmp_addr((struct sockaddr_storage *)addr,
+ *addrlen,
+ (struct sockaddr_storage *)&args.addr,
+ args.addrlen, 1),
+ 0, "address_param_modified"))
+ return -1;
+
+ if (addrlen)
+ *addrlen = args.addrlen;
+
+ if (addr)
+ memcpy(addr, &args.addr, *addrlen);
+
+ return err;
}
-static void sendmsg_unix_prog_destroy(void *skel)
+static int send_msg_op(const char *name, struct sockaddr *addr,
+ socklen_t addrlen, const char *msg, int msglen)
{
- sendmsg_unix_prog__destroy(skel);
+ struct sendmsg_args args;
+ int err;
+
+ memset(&args, 0, sizeof(args));
+ memcpy(&args.addr.addr, addr, addrlen);
+ args.addr.addrlen = addrlen;
+ memcpy(args.msg, msg, msglen);
+ args.msglen = msglen;
+
+ err = run_bpf_prog(name, &args, sizeof(args));
+
+ if (!ASSERT_EQ(cmp_addr((struct sockaddr_storage *)addr,
+ addrlen,
+ (struct sockaddr_storage *)&args.addr.addr,
+ args.addr.addrlen, 1),
+ 0, "address_param_modified"))
+ return -1;
+
+ return err;
}
-static void *recvmsg_unix_prog_load(int cgroup_fd)
+static int kernel_connect(struct sockaddr *addr, socklen_t addrlen)
{
- struct recvmsg_unix_prog *skel;
-
- skel = recvmsg_unix_prog__open_and_load();
- if (!ASSERT_OK_PTR(skel, "skel_open"))
- goto cleanup;
+ return sock_addr_op("kernel_connect", addr, &addrlen, false);
+}
- skel->links.recvmsg_unix_prog = bpf_program__attach_cgroup(
- skel->progs.recvmsg_unix_prog, cgroup_fd);
- if (!ASSERT_OK_PTR(skel->links.recvmsg_unix_prog, "prog_attach"))
- goto cleanup;
+static int kernel_bind(int fd, struct sockaddr *addr, socklen_t addrlen)
+{
+ return sock_addr_op("kernel_bind", addr, &addrlen, false);
+}
- return skel;
-cleanup:
- recvmsg_unix_prog__destroy(skel);
- return NULL;
+static int kernel_listen(void)
+{
+ return sock_addr_op("kernel_listen", NULL, NULL, false);
}
-static void recvmsg_unix_prog_destroy(void *skel)
+static int kernel_sendmsg(int fd, struct sockaddr *addr, socklen_t addrlen,
+ char *msg, int msglen)
{
- recvmsg_unix_prog__destroy(skel);
+ return send_msg_op("kernel_sendmsg", addr, addrlen, msg, msglen);
}
-static void *getsockname_unix_prog_load(int cgroup_fd)
+static int sock_sendmsg(int fd, struct sockaddr *addr, socklen_t addrlen,
+ char *msg, int msglen)
{
- struct getsockname_unix_prog *skel;
+ return send_msg_op("sock_sendmsg", addr, addrlen, msg, msglen);
+}
- skel = getsockname_unix_prog__open_and_load();
- if (!ASSERT_OK_PTR(skel, "skel_open"))
- goto cleanup;
+static int kernel_getsockname(int fd, struct sockaddr *addr, socklen_t *addrlen)
+{
+ return sock_addr_op("kernel_getsockname", addr, addrlen, true);
+}
- skel->links.getsockname_unix_prog = bpf_program__attach_cgroup(
- skel->progs.getsockname_unix_prog, cgroup_fd);
- if (!ASSERT_OK_PTR(skel->links.getsockname_unix_prog, "prog_attach"))
- goto cleanup;
+static int kernel_getpeername(int fd, struct sockaddr *addr, socklen_t *addrlen)
+{
+ return sock_addr_op("kernel_getpeername", addr, addrlen, true);
+}
- return skel;
-cleanup:
- getsockname_unix_prog__destroy(skel);
- return NULL;
+int kernel_connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
+ const struct network_helper_opts *opts)
+{
+ int err;
+
+ if (!ASSERT_OK(kernel_init_sock(addr->ss_family, type, 0),
+ "kernel_init_sock"))
+ goto err;
+
+ if (kernel_connect((struct sockaddr *)addr, addrlen) < 0)
+ goto err;
+
+ /* Test code expects a "file descriptor" on success. */
+ err = 1;
+ goto out;
+err:
+ err = -1;
+ save_errno_do(ASSERT_OK(kernel_close_sock(0), "kernel_close_sock"));
+out:
+ return err;
}
-static void getsockname_unix_prog_destroy(void *skel)
+int kernel_start_server(int family, int type, const char *addr_str, __u16 port,
+ int timeout_ms)
{
- getsockname_unix_prog__destroy(skel);
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+ int err;
+
+ if (!ASSERT_OK(kernel_init_sock(family, type, 0), "kernel_init_sock"))
+ goto err;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ goto err;
+
+ if (kernel_bind(0, (struct sockaddr *)&addr, addrlen) < 0)
+ goto err;
+
+ if (type == SOCK_STREAM) {
+ if (!ASSERT_OK(kernel_listen(), "kernel_listen"))
+ goto err;
+ }
+
+ /* Test code expects a "file descriptor" on success. */
+ err = 1;
+ goto out;
+err:
+ err = -1;
+ save_errno_do(ASSERT_OK(kernel_close_sock(0), "kernel_close_sock"));
+out:
+ return err;
}
-static void *getpeername_unix_prog_load(int cgroup_fd)
+struct sock_ops {
+ int (*connect_to_addr)(int type, const struct sockaddr_storage *addr,
+ socklen_t addrlen,
+ const struct network_helper_opts *opts);
+ int (*start_server)(int family, int type, const char *addr_str,
+ __u16 port, int timeout_ms);
+ int (*socket)(int famil, int type, int protocol);
+ int (*bind)(int fd, struct sockaddr *addr, socklen_t addrlen);
+ int (*getsockname)(int fd, struct sockaddr *addr, socklen_t *addrlen);
+ int (*getpeername)(int fd, struct sockaddr *addr, socklen_t *addrlen);
+ int (*sendmsg)(int fd, struct sockaddr *addr, socklen_t addrlen,
+ char *msg, int msglen);
+ int (*close)(int fd);
+};
+
+static int user_sendmsg(int fd, struct sockaddr *addr, socklen_t addrlen,
+ char *msg, int msglen)
{
- struct getpeername_unix_prog *skel;
+ struct msghdr hdr;
+ struct iovec iov;
- skel = getpeername_unix_prog__open_and_load();
- if (!ASSERT_OK_PTR(skel, "skel_open"))
- goto cleanup;
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = msg;
+ iov.iov_len = msglen;
- skel->links.getpeername_unix_prog = bpf_program__attach_cgroup(
- skel->progs.getpeername_unix_prog, cgroup_fd);
- if (!ASSERT_OK_PTR(skel->links.getpeername_unix_prog, "prog_attach"))
- goto cleanup;
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_name = (void *)addr;
+ hdr.msg_namelen = addrlen;
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
- return skel;
-cleanup:
- getpeername_unix_prog__destroy(skel);
- return NULL;
+ return sendmsg(fd, &hdr, 0);
}
-static void getpeername_unix_prog_destroy(void *skel)
+static int user_bind(int fd, struct sockaddr *addr, socklen_t addrlen)
{
- getpeername_unix_prog__destroy(skel);
+ return bind(fd, (const struct sockaddr *)addr, addrlen);
+}
+
+struct sock_ops user_ops = {
+ .connect_to_addr = connect_to_addr,
+ .start_server = start_server,
+ .socket = socket,
+ .bind = user_bind,
+ .getsockname = getsockname,
+ .getpeername = getpeername,
+ .sendmsg = user_sendmsg,
+ .close = close,
+};
+
+struct sock_ops kern_ops_sock_sendmsg = {
+ .connect_to_addr = kernel_connect_to_addr,
+ .start_server = kernel_start_server,
+ .socket = kernel_init_sock,
+ .bind = kernel_bind,
+ .getsockname = kernel_getsockname,
+ .getpeername = kernel_getpeername,
+ .sendmsg = sock_sendmsg,
+ .close = kernel_close_sock,
+};
+
+struct sock_ops kern_ops_kernel_sendmsg = {
+ .connect_to_addr = kernel_connect_to_addr,
+ .start_server = kernel_start_server,
+ .socket = kernel_init_sock,
+ .bind = kernel_bind,
+ .getsockname = kernel_getsockname,
+ .getpeername = kernel_getpeername,
+ .sendmsg = kernel_sendmsg,
+ .close = kernel_close_sock,
+};
+
+struct sock_addr_test {
+ enum sock_addr_test_type type;
+ const char *name;
+ /* BPF prog properties */
+ load_fn loadfn;
+ destroy_fn destroyfn;
+ enum bpf_attach_type attach_type;
+ /* Socket operations */
+ struct sock_ops *ops;
+ /* Socket properties */
+ int socket_family;
+ int socket_type;
+ /* IP:port pairs for BPF prog to override */
+ const char *requested_addr;
+ unsigned short requested_port;
+ const char *expected_addr;
+ unsigned short expected_port;
+ const char *expected_src_addr;
+ /* Expected test result */
+ enum {
+ LOAD_REJECT,
+ ATTACH_REJECT,
+ SYSCALL_EPERM,
+ SYSCALL_ENOTSUPP,
+ SUCCESS,
+ } expected_result;
+};
+
+#define BPF_SKEL_FUNCS_RAW(skel_name, prog_name) \
+static void *prog_name##_load_raw(int cgroup_fd, \
+ enum bpf_attach_type attach_type, \
+ bool expect_reject) \
+{ \
+ struct skel_name *skel = skel_name##__open(); \
+ int prog_fd = -1; \
+ if (!ASSERT_OK_PTR(skel, "skel_open")) \
+ goto cleanup; \
+ if (!ASSERT_OK(skel_name##__load(skel), "load")) \
+ goto cleanup; \
+ prog_fd = bpf_program__fd(skel->progs.prog_name); \
+ if (!ASSERT_GT(prog_fd, 0, "prog_fd")) \
+ goto cleanup; \
+ if (bpf_prog_attach(prog_fd, cgroup_fd, attach_type, \
+ BPF_F_ALLOW_OVERRIDE), "bpf_prog_attach") { \
+ ASSERT_TRUE(expect_reject, "unexpected rejection"); \
+ goto cleanup; \
+ } \
+ if (!ASSERT_FALSE(expect_reject, "expected rejection")) \
+ goto cleanup; \
+cleanup: \
+ if (prog_fd > 0) \
+ bpf_prog_detach(cgroup_fd, attach_type); \
+ skel_name##__destroy(skel); \
+ return NULL; \
+} \
+static void prog_name##_destroy_raw(void *progfd) \
+{ \
+ /* No-op. *_load_raw does all cleanup. */ \
+} \
+
+#define BPF_SKEL_FUNCS(skel_name, prog_name) \
+static void *prog_name##_load(int cgroup_fd, \
+ enum bpf_attach_type attach_type, \
+ bool expect_reject) \
+{ \
+ struct skel_name *skel = skel_name##__open(); \
+ if (!ASSERT_OK_PTR(skel, "skel_open")) \
+ goto cleanup; \
+ if (!ASSERT_OK(bpf_program__set_expected_attach_type(skel->progs.prog_name, \
+ attach_type), \
+ "set_expected_attach_type")) \
+ goto cleanup; \
+ if (skel_name##__load(skel)) { \
+ ASSERT_TRUE(expect_reject, "unexpected rejection"); \
+ goto cleanup; \
+ } \
+ if (!ASSERT_FALSE(expect_reject, "expected rejection")) \
+ goto cleanup; \
+ skel->links.prog_name = bpf_program__attach_cgroup( \
+ skel->progs.prog_name, cgroup_fd); \
+ if (!ASSERT_OK_PTR(skel->links.prog_name, "prog_attach")) \
+ goto cleanup; \
+ return skel; \
+cleanup: \
+ skel_name##__destroy(skel); \
+ return NULL; \
+} \
+static void prog_name##_destroy(void *skel) \
+{ \
+ skel_name##__destroy(skel); \
}
+BPF_SKEL_FUNCS(bind4_prog, bind_v4_prog);
+BPF_SKEL_FUNCS_RAW(bind4_prog, bind_v4_prog);
+BPF_SKEL_FUNCS(bind4_prog, bind_v4_deny_prog);
+BPF_SKEL_FUNCS(bind6_prog, bind_v6_prog);
+BPF_SKEL_FUNCS_RAW(bind6_prog, bind_v6_prog);
+BPF_SKEL_FUNCS(bind6_prog, bind_v6_deny_prog);
+BPF_SKEL_FUNCS(connect4_prog, connect_v4_prog);
+BPF_SKEL_FUNCS_RAW(connect4_prog, connect_v4_prog);
+BPF_SKEL_FUNCS(connect4_prog, connect_v4_deny_prog);
+BPF_SKEL_FUNCS(connect6_prog, connect_v6_prog);
+BPF_SKEL_FUNCS_RAW(connect6_prog, connect_v6_prog);
+BPF_SKEL_FUNCS(connect6_prog, connect_v6_deny_prog);
+BPF_SKEL_FUNCS(connect_unix_prog, connect_unix_prog);
+BPF_SKEL_FUNCS_RAW(connect_unix_prog, connect_unix_prog);
+BPF_SKEL_FUNCS(connect_unix_prog, connect_unix_deny_prog);
+BPF_SKEL_FUNCS(sendmsg4_prog, sendmsg_v4_prog);
+BPF_SKEL_FUNCS_RAW(sendmsg4_prog, sendmsg_v4_prog);
+BPF_SKEL_FUNCS(sendmsg4_prog, sendmsg_v4_deny_prog);
+BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_prog);
+BPF_SKEL_FUNCS_RAW(sendmsg6_prog, sendmsg_v6_prog);
+BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_deny_prog);
+BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_preserve_dst_prog);
+BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_v4mapped_prog);
+BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_wildcard_prog);
+BPF_SKEL_FUNCS(sendmsg_unix_prog, sendmsg_unix_prog);
+BPF_SKEL_FUNCS_RAW(sendmsg_unix_prog, sendmsg_unix_prog);
+BPF_SKEL_FUNCS(sendmsg_unix_prog, sendmsg_unix_deny_prog);
+BPF_SKEL_FUNCS(recvmsg4_prog, recvmsg4_prog);
+BPF_SKEL_FUNCS_RAW(recvmsg4_prog, recvmsg4_prog);
+BPF_SKEL_FUNCS(recvmsg6_prog, recvmsg6_prog);
+BPF_SKEL_FUNCS_RAW(recvmsg6_prog, recvmsg6_prog);
+BPF_SKEL_FUNCS(recvmsg_unix_prog, recvmsg_unix_prog);
+BPF_SKEL_FUNCS_RAW(recvmsg_unix_prog, recvmsg_unix_prog);
+BPF_SKEL_FUNCS(getsockname_unix_prog, getsockname_unix_prog);
+BPF_SKEL_FUNCS_RAW(getsockname_unix_prog, getsockname_unix_prog);
+BPF_SKEL_FUNCS(getsockname4_prog, getsockname_v4_prog);
+BPF_SKEL_FUNCS_RAW(getsockname4_prog, getsockname_v4_prog);
+BPF_SKEL_FUNCS(getsockname6_prog, getsockname_v6_prog);
+BPF_SKEL_FUNCS_RAW(getsockname6_prog, getsockname_v6_prog);
+BPF_SKEL_FUNCS(getpeername_unix_prog, getpeername_unix_prog);
+BPF_SKEL_FUNCS_RAW(getpeername_unix_prog, getpeername_unix_prog);
+BPF_SKEL_FUNCS(getpeername4_prog, getpeername_v4_prog);
+BPF_SKEL_FUNCS_RAW(getpeername4_prog, getpeername_v4_prog);
+BPF_SKEL_FUNCS(getpeername6_prog, getpeername_v6_prog);
+BPF_SKEL_FUNCS_RAW(getpeername6_prog, getpeername_v6_prog);
+
static struct sock_addr_test tests[] = {
+ /* bind - system calls */
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: bind (stream)",
+ bind_v4_prog_load,
+ bind_v4_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: bind deny (stream)",
+ bind_v4_deny_prog_load,
+ bind_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: bind (dgram)",
+ bind_v4_prog_load,
+ bind_v4_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: bind deny (dgram)",
+ bind_v4_deny_prog_load,
+ bind_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: load prog with wrong expected attach type",
+ bind_v4_prog_load,
+ bind_v4_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: attach prog with wrong attach type",
+ bind_v4_prog_load_raw,
+ bind_v4_prog_destroy_raw,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: bind (stream)",
+ bind_v6_prog_load,
+ bind_v6_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: bind deny (stream)",
+ bind_v6_deny_prog_load,
+ bind_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: bind (dgram)",
+ bind_v6_prog_load,
+ bind_v6_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: bind deny (dgram)",
+ bind_v6_deny_prog_load,
+ bind_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: load prog with wrong expected attach type",
+ bind_v6_prog_load,
+ bind_v6_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: attach prog with wrong attach type",
+ bind_v6_prog_load_raw,
+ bind_v6_prog_destroy_raw,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+
+ /* bind - kernel calls */
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: kernel_bind (stream)",
+ bind_v4_prog_load,
+ bind_v4_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: kernel_bind deny (stream)",
+ bind_v4_deny_prog_load,
+ bind_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: kernel_bind (dgram)",
+ bind_v4_prog_load,
+ bind_v4_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: kernel_bind deny (dgram)",
+ bind_v4_deny_prog_load,
+ bind_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: kernel_bind (stream)",
+ bind_v6_prog_load,
+ bind_v6_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: kernel_bind deny (stream)",
+ bind_v6_deny_prog_load,
+ bind_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: kernel_bind (dgram)",
+ bind_v6_prog_load,
+ bind_v6_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: kernel_bind deny (dgram)",
+ bind_v6_deny_prog_load,
+ bind_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+
+ /* connect - system calls */
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: connect (stream)",
+ connect_v4_prog_load,
+ connect_v4_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: connect deny (stream)",
+ connect_v4_deny_prog_load,
+ connect_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: connect (dgram)",
+ connect_v4_prog_load,
+ connect_v4_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: connect deny (dgram)",
+ connect_v4_deny_prog_load,
+ connect_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: load prog with wrong expected attach type",
+ connect_v4_prog_load,
+ connect_v4_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: attach prog with wrong attach type",
+ connect_v4_prog_load_raw,
+ connect_v4_prog_destroy_raw,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: connect (stream)",
+ connect_v6_prog_load,
+ connect_v6_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: connect deny (stream)",
+ connect_v6_deny_prog_load,
+ connect_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
{
SOCK_ADDR_TEST_CONNECT,
- "connect_unix",
+ "connect6: connect (dgram)",
+ connect_v6_prog_load,
+ connect_v6_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: connect deny (dgram)",
+ connect_v6_deny_prog_load,
+ connect_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: load prog with wrong expected attach type",
+ connect_v6_prog_load,
+ connect_v6_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: attach prog with wrong attach type",
+ connect_v6_prog_load_raw,
+ connect_v6_prog_destroy_raw,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix: connect (stream)",
connect_unix_prog_load,
connect_unix_prog_destroy,
+ BPF_CGROUP_UNIX_CONNECT,
+ &user_ops,
AF_UNIX,
SOCK_STREAM,
SERVUN_ADDRESS,
@@ -176,12 +1019,631 @@ static struct sock_addr_test tests[] = {
SERVUN_REWRITE_ADDRESS,
0,
NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix: connect deny (stream)",
+ connect_unix_deny_prog_load,
+ connect_unix_deny_prog_destroy,
+ BPF_CGROUP_UNIX_CONNECT,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix: attach prog with wrong attach type",
+ connect_unix_prog_load_raw,
+ connect_unix_prog_destroy_raw,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+
+ /* connect - kernel calls */
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: kernel_connect (stream)",
+ connect_v4_prog_load,
+ connect_v4_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: kernel_connect deny (stream)",
+ connect_v4_deny_prog_load,
+ connect_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: kernel_connect (dgram)",
+ connect_v4_prog_load,
+ connect_v4_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: kernel_connect deny (dgram)",
+ connect_v4_deny_prog_load,
+ connect_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: kernel_connect (stream)",
+ connect_v6_prog_load,
+ connect_v6_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: kernel_connect deny (stream)",
+ connect_v6_deny_prog_load,
+ connect_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: kernel_connect (dgram)",
+ connect_v6_prog_load,
+ connect_v6_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: kernel_connect deny (dgram)",
+ connect_v6_deny_prog_load,
+ connect_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix: kernel_connect (dgram)",
+ connect_unix_prog_load,
+ connect_unix_prog_destroy,
+ BPF_CGROUP_UNIX_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix: kernel_connect deny (dgram)",
+ connect_unix_deny_prog_load,
+ connect_unix_deny_prog_destroy,
+ BPF_CGROUP_UNIX_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SYSCALL_EPERM,
+ },
+
+ /* sendmsg - system calls */
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: sendmsg (dgram)",
+ sendmsg_v4_prog_load,
+ sendmsg_v4_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: sendmsg deny (dgram)",
+ sendmsg_v4_deny_prog_load,
+ sendmsg_v4_deny_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: load prog with wrong expected attach type",
+ sendmsg_v4_prog_load,
+ sendmsg_v4_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: attach prog with wrong attach type",
+ sendmsg_v4_prog_load_raw,
+ sendmsg_v4_prog_destroy_raw,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sendmsg (dgram)",
+ sendmsg_v6_prog_load,
+ sendmsg_v6_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sendmsg [::] (BSD'ism) (dgram)",
+ sendmsg_v6_preserve_dst_prog_load,
+ sendmsg_v6_preserve_dst_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ WILDCARD6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_PORT,
+ SRC6_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sendmsg deny (dgram)",
+ sendmsg_v6_deny_prog_load,
+ sendmsg_v6_deny_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sendmsg IPv4-mapped IPv6 (dgram)",
+ sendmsg_v6_v4mapped_prog_load,
+ sendmsg_v6_v4mapped_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_ENOTSUPP,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sendmsg dst IP = [::] (BSD'ism) (dgram)",
+ sendmsg_v6_wildcard_prog_load,
+ sendmsg_v6_wildcard_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: load prog with wrong expected attach type",
+ sendmsg_v6_prog_load,
+ sendmsg_v6_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: attach prog with wrong attach type",
+ sendmsg_v6_prog_load_raw,
+ sendmsg_v6_prog_destroy_raw,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: sendmsg (dgram)",
+ sendmsg_unix_prog_load,
+ sendmsg_unix_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &user_ops,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: sendmsg deny (dgram)",
+ sendmsg_unix_deny_prog_load,
+ sendmsg_unix_deny_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &user_ops,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: attach prog with wrong attach type",
+ sendmsg_unix_prog_load_raw,
+ sendmsg_unix_prog_destroy_raw,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &user_ops,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+
+ /* sendmsg - kernel calls (sock_sendmsg) */
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: sock_sendmsg (dgram)",
+ sendmsg_v4_prog_load,
+ sendmsg_v4_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: sock_sendmsg deny (dgram)",
+ sendmsg_v4_deny_prog_load,
+ sendmsg_v4_deny_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sock_sendmsg (dgram)",
+ sendmsg_v6_prog_load,
+ sendmsg_v6_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sock_sendmsg [::] (BSD'ism) (dgram)",
+ sendmsg_v6_preserve_dst_prog_load,
+ sendmsg_v6_preserve_dst_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ WILDCARD6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_PORT,
+ SRC6_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sock_sendmsg deny (dgram)",
+ sendmsg_v6_deny_prog_load,
+ sendmsg_v6_deny_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: sock_sendmsg (dgram)",
+ sendmsg_unix_prog_load,
+ sendmsg_unix_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: sock_sendmsg deny (dgram)",
+ sendmsg_unix_deny_prog_load,
+ sendmsg_unix_deny_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SYSCALL_EPERM,
+ },
+
+ /* sendmsg - kernel calls (kernel_sendmsg) */
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: kernel_sendmsg (dgram)",
+ sendmsg_v4_prog_load,
+ sendmsg_v4_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: kernel_sendmsg deny (dgram)",
+ sendmsg_v4_deny_prog_load,
+ sendmsg_v4_deny_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: kernel_sendmsg (dgram)",
+ sendmsg_v6_prog_load,
+ sendmsg_v6_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: kernel_sendmsg [::] (BSD'ism) (dgram)",
+ sendmsg_v6_preserve_dst_prog_load,
+ sendmsg_v6_preserve_dst_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ WILDCARD6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_PORT,
+ SRC6_IP,
+ SUCCESS,
},
{
SOCK_ADDR_TEST_SENDMSG,
- "sendmsg_unix",
+ "sendmsg6: kernel_sendmsg deny (dgram)",
+ sendmsg_v6_deny_prog_load,
+ sendmsg_v6_deny_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: sock_sendmsg (dgram)",
sendmsg_unix_prog_load,
sendmsg_unix_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &kern_ops_kernel_sendmsg,
AF_UNIX,
SOCK_DGRAM,
SERVUN_ADDRESS,
@@ -189,12 +1651,97 @@ static struct sock_addr_test tests[] = {
SERVUN_REWRITE_ADDRESS,
0,
NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: kernel_sendmsg deny (dgram)",
+ sendmsg_unix_deny_prog_load,
+ sendmsg_unix_deny_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SYSCALL_EPERM,
+ },
+
+ /* recvmsg - system calls */
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg4: recvfrom (dgram)",
+ recvmsg4_prog_load,
+ recvmsg4_prog_destroy,
+ BPF_CGROUP_UDP4_RECVMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg4: attach prog with wrong attach type",
+ recvmsg4_prog_load_raw,
+ recvmsg4_prog_destroy_raw,
+ BPF_CGROUP_UDP6_RECVMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ ATTACH_REJECT,
},
{
SOCK_ADDR_TEST_RECVMSG,
- "recvmsg_unix-dgram",
+ "recvmsg6: recvfrom (dgram)",
+ recvmsg6_prog_load,
+ recvmsg6_prog_destroy,
+ BPF_CGROUP_UDP6_RECVMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg6: attach prog with wrong attach type",
+ recvmsg6_prog_load_raw,
+ recvmsg6_prog_destroy_raw,
+ BPF_CGROUP_UDP4_RECVMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg_unix: recvfrom (dgram)",
recvmsg_unix_prog_load,
recvmsg_unix_prog_destroy,
+ BPF_CGROUP_UNIX_RECVMSG,
+ &user_ops,
AF_UNIX,
SOCK_DGRAM,
SERVUN_REWRITE_ADDRESS,
@@ -202,12 +1749,15 @@ static struct sock_addr_test tests[] = {
SERVUN_REWRITE_ADDRESS,
0,
SERVUN_ADDRESS,
+ SUCCESS,
},
{
SOCK_ADDR_TEST_RECVMSG,
- "recvmsg_unix-stream",
+ "recvmsg_unix: recvfrom (stream)",
recvmsg_unix_prog_load,
recvmsg_unix_prog_destroy,
+ BPF_CGROUP_UNIX_RECVMSG,
+ &user_ops,
AF_UNIX,
SOCK_STREAM,
SERVUN_REWRITE_ADDRESS,
@@ -215,12 +1765,227 @@ static struct sock_addr_test tests[] = {
SERVUN_REWRITE_ADDRESS,
0,
SERVUN_ADDRESS,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg_unix: attach prog with wrong attach type",
+ recvmsg_unix_prog_load_raw,
+ recvmsg_unix_prog_destroy_raw,
+ BPF_CGROUP_UDP4_RECVMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_ADDRESS,
+ ATTACH_REJECT,
},
+
+ /* getsockname - system calls */
{
SOCK_ADDR_TEST_GETSOCKNAME,
- "getsockname_unix",
+ "getsockname4: getsockname (stream)",
+ getsockname_v4_prog_load,
+ getsockname_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname4: getsockname (dgram)",
+ getsockname_v4_prog_load,
+ getsockname_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname4: attach prog with wrong attach type",
+ getsockname_v4_prog_load_raw,
+ getsockname_v4_prog_destroy_raw,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname6: getsockname (stream)",
+ getsockname_v6_prog_load,
+ getsockname_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname6: getsockname (dgram)",
+ getsockname_v6_prog_load,
+ getsockname_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname6: attach prog with wrong attach type",
+ getsockname_v6_prog_load_raw,
+ getsockname_v6_prog_destroy_raw,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname_unix: getsockname",
+ getsockname_unix_prog_load,
+ getsockname_unix_prog_destroy,
+ BPF_CGROUP_UNIX_GETSOCKNAME,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname_unix: attach prog with wrong attach type",
+ getsockname_unix_prog_load_raw,
+ getsockname_unix_prog_destroy_raw,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+
+ /* getsockname - kernel calls */
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname4: kernel_getsockname (stream)",
+ getsockname_v4_prog_load,
+ getsockname_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname4: kernel_getsockname (dgram)",
+ getsockname_v4_prog_load,
+ getsockname_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname6: kernel_getsockname (stream)",
+ getsockname_v6_prog_load,
+ getsockname_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname6: kernel_getsockname (dgram)",
+ getsockname_v6_prog_load,
+ getsockname_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname_unix: kernel_getsockname",
getsockname_unix_prog_load,
getsockname_unix_prog_destroy,
+ BPF_CGROUP_UNIX_GETSOCKNAME,
+ &kern_ops_kernel_sendmsg,
AF_UNIX,
SOCK_STREAM,
SERVUN_ADDRESS,
@@ -228,12 +1993,113 @@ static struct sock_addr_test tests[] = {
SERVUN_REWRITE_ADDRESS,
0,
NULL,
+ SUCCESS,
+ },
+
+ /* getpeername - system calls */
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername4: getpeername (stream)",
+ getpeername_v4_prog_load,
+ getpeername_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername4: getpeername (dgram)",
+ getpeername_v4_prog_load,
+ getpeername_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
},
{
SOCK_ADDR_TEST_GETPEERNAME,
- "getpeername_unix",
+ "getpeername4: attach prog with wrong attach type",
+ getpeername_v4_prog_load_raw,
+ getpeername_v4_prog_destroy_raw,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &user_ops,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername6: getpeername (stream)",
+ getpeername_v6_prog_load,
+ getpeername_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername6: getpeername (dgram)",
+ getpeername_v6_prog_load,
+ getpeername_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername6: attach prog with wrong attach type",
+ getpeername_v6_prog_load_raw,
+ getpeername_v6_prog_destroy_raw,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername_unix: getpeername",
getpeername_unix_prog_load,
getpeername_unix_prog_destroy,
+ BPF_CGROUP_UNIX_GETPEERNAME,
+ &user_ops,
AF_UNIX,
SOCK_STREAM,
SERVUN_ADDRESS,
@@ -241,6 +2107,105 @@ static struct sock_addr_test tests[] = {
SERVUN_REWRITE_ADDRESS,
0,
NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername_unix: attach prog with wrong attach type",
+ getpeername_unix_prog_load_raw,
+ getpeername_unix_prog_destroy_raw,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+
+ /* getpeername - kernel calls */
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername4: kernel_getpeername (stream)",
+ getpeername_v4_prog_load,
+ getpeername_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername4: kernel_getpeername (dgram)",
+ getpeername_v4_prog_load,
+ getpeername_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername6: kernel_getpeername (stream)",
+ getpeername_v6_prog_load,
+ getpeername_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername6: kernel_getpeername (dgram)",
+ getpeername_v6_prog_load,
+ getpeername_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername_unix: kernel_getpeername",
+ getpeername_unix_prog_load,
+ getpeername_unix_prog_destroy,
+ BPF_CGROUP_UNIX_GETPEERNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
},
};
@@ -294,28 +2259,40 @@ static int cmp_sock_addr(info_fn fn, int sock1,
return cmp_addr(&addr1, len1, addr2, addr2_len, cmp_port);
}
-static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2,
- socklen_t addr2_len, bool cmp_port)
+static int load_sock_addr_kern(void)
{
- return cmp_sock_addr(getsockname, sock1, addr2, addr2_len, cmp_port);
+ int err;
+
+ skel = sock_addr_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto err;
+
+ err = 0;
+ goto out;
+err:
+ err = -1;
+out:
+ return err;
}
-static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2,
- socklen_t addr2_len, bool cmp_port)
+static void unload_sock_addr_kern(void)
{
- return cmp_sock_addr(getpeername, sock1, addr2, addr2_len, cmp_port);
+ sock_addr_kern__destroy(skel);
}
-static void test_bind(struct sock_addr_test *test)
+static int test_bind(struct sock_addr_test *test)
{
struct sockaddr_storage expected_addr;
socklen_t expected_addr_len = sizeof(struct sockaddr_storage);
int serv = -1, client = -1, err;
- serv = start_server(test->socket_family, test->socket_type,
- test->requested_addr, test->requested_port, 0);
- if (!ASSERT_GE(serv, 0, "start_server"))
- goto cleanup;
+ serv = test->ops->start_server(test->socket_family, test->socket_type,
+ test->requested_addr,
+ test->requested_port, 0);
+ if (serv < 0) {
+ err = errno;
+ goto err;
+ }
err = make_sockaddr(test->socket_family,
test->expected_addr, test->expected_port,
@@ -323,23 +2300,28 @@ static void test_bind(struct sock_addr_test *test)
if (!ASSERT_EQ(err, 0, "make_sockaddr"))
goto cleanup;
- err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true);
+ err = cmp_sock_addr(test->ops->getsockname, serv, &expected_addr,
+ expected_addr_len, true);
if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
goto cleanup;
/* Try to connect to server just in case */
- client = connect_to_addr(&expected_addr, expected_addr_len, test->socket_type);
+ client = connect_to_addr(test->socket_type, &expected_addr, expected_addr_len, NULL);
if (!ASSERT_GE(client, 0, "connect_to_addr"))
goto cleanup;
cleanup:
+ err = 0;
+err:
if (client != -1)
close(client);
if (serv != -1)
- close(serv);
+ test->ops->close(serv);
+
+ return err;
}
-static void test_connect(struct sock_addr_test *test)
+static int test_connect(struct sock_addr_test *test)
{
struct sockaddr_storage addr, expected_addr, expected_src_addr;
socklen_t addr_len = sizeof(struct sockaddr_storage),
@@ -357,9 +2339,12 @@ static void test_connect(struct sock_addr_test *test)
if (!ASSERT_EQ(err, 0, "make_sockaddr"))
goto cleanup;
- client = connect_to_addr(&addr, addr_len, test->socket_type);
- if (!ASSERT_GE(client, 0, "connect_to_addr"))
- goto cleanup;
+ client = test->ops->connect_to_addr(test->socket_type, &addr, addr_len,
+ NULL);
+ if (client < 0) {
+ err = errno;
+ goto err;
+ }
err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port,
&expected_addr, &expected_addr_len);
@@ -373,29 +2358,34 @@ static void test_connect(struct sock_addr_test *test)
goto cleanup;
}
- err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true);
+ err = cmp_sock_addr(test->ops->getpeername, client, &expected_addr,
+ expected_addr_len, true);
if (!ASSERT_EQ(err, 0, "cmp_peer_addr"))
goto cleanup;
if (test->expected_src_addr) {
- err = cmp_local_addr(client, &expected_src_addr, expected_src_addr_len, false);
+ err = cmp_sock_addr(test->ops->getsockname, client,
+ &expected_src_addr, expected_src_addr_len,
+ false);
if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
goto cleanup;
}
cleanup:
+ err = 0;
+err:
if (client != -1)
- close(client);
+ test->ops->close(client);
if (serv != -1)
close(serv);
+
+ return err;
}
-static void test_xmsg(struct sock_addr_test *test)
+static int test_xmsg(struct sock_addr_test *test)
{
struct sockaddr_storage addr, src_addr;
socklen_t addr_len = sizeof(struct sockaddr_storage),
src_addr_len = sizeof(struct sockaddr_storage);
- struct msghdr hdr;
- struct iovec iov;
char data = 'a';
int serv = -1, client = -1, err;
@@ -408,7 +2398,7 @@ static void test_xmsg(struct sock_addr_test *test)
if (!ASSERT_GE(serv, 0, "start_server"))
goto cleanup;
- client = socket(test->socket_family, test->socket_type, 0);
+ client = test->ops->socket(test->socket_family, test->socket_type, 0);
if (!ASSERT_GE(client, 0, "socket"))
goto cleanup;
@@ -418,7 +2408,8 @@ static void test_xmsg(struct sock_addr_test *test)
if (!ASSERT_EQ(err, 0, "make_sockaddr"))
goto cleanup;
- err = bind(client, (const struct sockaddr *) &src_addr, src_addr_len);
+ err = test->ops->bind(client, (struct sockaddr *)&src_addr,
+ src_addr_len);
if (!ASSERT_OK(err, "bind"))
goto cleanup;
}
@@ -429,17 +2420,13 @@ static void test_xmsg(struct sock_addr_test *test)
goto cleanup;
if (test->socket_type == SOCK_DGRAM) {
- memset(&iov, 0, sizeof(iov));
- iov.iov_base = &data;
- iov.iov_len = sizeof(data);
-
- memset(&hdr, 0, sizeof(hdr));
- hdr.msg_name = (void *)&addr;
- hdr.msg_namelen = addr_len;
- hdr.msg_iov = &iov;
- hdr.msg_iovlen = 1;
+ err = test->ops->sendmsg(client, (struct sockaddr *)&addr,
+ addr_len, &data, sizeof(data));
+ if (err < 0) {
+ err = errno;
+ goto err;
+ }
- err = sendmsg(client, &hdr, 0);
if (!ASSERT_EQ(err, sizeof(data), "sendmsg"))
goto cleanup;
} else {
@@ -489,19 +2476,23 @@ static void test_xmsg(struct sock_addr_test *test)
}
cleanup:
+ err = 0;
+err:
if (client != -1)
- close(client);
+ test->ops->close(client);
if (serv != -1)
close(serv);
+
+ return err;
}
-static void test_getsockname(struct sock_addr_test *test)
+static int test_getsockname(struct sock_addr_test *test)
{
struct sockaddr_storage expected_addr;
socklen_t expected_addr_len = sizeof(struct sockaddr_storage);
int serv = -1, err;
- serv = start_server(test->socket_family, test->socket_type,
+ serv = test->ops->start_server(test->socket_family, test->socket_type,
test->requested_addr, test->requested_port, 0);
if (!ASSERT_GE(serv, 0, "start_server"))
goto cleanup;
@@ -512,16 +2503,18 @@ static void test_getsockname(struct sock_addr_test *test)
if (!ASSERT_EQ(err, 0, "make_sockaddr"))
goto cleanup;
- err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true);
+ err = cmp_sock_addr(test->ops->getsockname, serv, &expected_addr, expected_addr_len, true);
if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
goto cleanup;
cleanup:
if (serv != -1)
- close(serv);
+ test->ops->close(serv);
+
+ return 0;
}
-static void test_getpeername(struct sock_addr_test *test)
+static int test_getpeername(struct sock_addr_test *test)
{
struct sockaddr_storage addr, expected_addr;
socklen_t addr_len = sizeof(struct sockaddr_storage),
@@ -538,7 +2531,8 @@ static void test_getpeername(struct sock_addr_test *test)
if (!ASSERT_EQ(err, 0, "make_sockaddr"))
goto cleanup;
- client = connect_to_addr(&addr, addr_len, test->socket_type);
+ client = test->ops->connect_to_addr(test->socket_type, &addr, addr_len,
+ NULL);
if (!ASSERT_GE(client, 0, "connect_to_addr"))
goto cleanup;
@@ -547,19 +2541,58 @@ static void test_getpeername(struct sock_addr_test *test)
if (!ASSERT_EQ(err, 0, "make_sockaddr"))
goto cleanup;
- err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true);
+ err = cmp_sock_addr(test->ops->getpeername, client, &expected_addr,
+ expected_addr_len, true);
if (!ASSERT_EQ(err, 0, "cmp_peer_addr"))
goto cleanup;
cleanup:
if (client != -1)
- close(client);
+ test->ops->close(client);
if (serv != -1)
close(serv);
+
+ return 0;
+}
+
+static int setup_test_env(struct nstoken **tok)
+{
+ int err;
+
+ SYS_NOFAIL("ip netns delete %s", TEST_NS);
+ SYS(fail, "ip netns add %s", TEST_NS);
+ *tok = open_netns(TEST_NS);
+ if (!ASSERT_OK_PTR(*tok, "netns token"))
+ goto fail;
+
+ SYS(fail, "ip link add dev %s1 type veth peer name %s2", TEST_IF_PREFIX,
+ TEST_IF_PREFIX);
+ SYS(fail, "ip link set lo up");
+ SYS(fail, "ip link set %s1 up", TEST_IF_PREFIX);
+ SYS(fail, "ip link set %s2 up", TEST_IF_PREFIX);
+ SYS(fail, "ip -4 addr add %s/8 dev %s1", TEST_IPV4, TEST_IF_PREFIX);
+ SYS(fail, "ip -6 addr add %s/128 nodad dev %s1", TEST_IPV6, TEST_IF_PREFIX);
+
+ err = 0;
+ goto out;
+fail:
+ err = -1;
+ close_netns(*tok);
+ *tok = NULL;
+ SYS_NOFAIL("ip netns delete %s", TEST_NS);
+out:
+ return err;
+}
+
+static void cleanup_test_env(struct nstoken *tok)
+{
+ close_netns(tok);
+ SYS_NOFAIL("ip netns delete %s", TEST_NS);
}
void test_sock_addr(void)
{
+ struct nstoken *tok = NULL;
int cgroup_fd = -1;
void *skel;
@@ -567,13 +2600,22 @@ void test_sock_addr(void)
if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
goto cleanup;
+ if (!ASSERT_OK(setup_test_env(&tok), "setup_test_env"))
+ goto cleanup;
+
+ if (!ASSERT_OK(load_sock_addr_kern(), "load_sock_addr_kern"))
+ goto cleanup;
+
for (size_t i = 0; i < ARRAY_SIZE(tests); ++i) {
struct sock_addr_test *test = &tests[i];
+ int err;
if (!test__start_subtest(test->name))
continue;
- skel = test->loadfn(cgroup_fd);
+ skel = test->loadfn(cgroup_fd, test->attach_type,
+ test->expected_result == LOAD_REJECT ||
+ test->expected_result == ATTACH_REJECT);
if (!skel)
continue;
@@ -583,30 +2625,39 @@ void test_sock_addr(void)
* the future.
*/
case SOCK_ADDR_TEST_BIND:
- test_bind(test);
+ err = test_bind(test);
break;
case SOCK_ADDR_TEST_CONNECT:
- test_connect(test);
+ err = test_connect(test);
break;
case SOCK_ADDR_TEST_SENDMSG:
case SOCK_ADDR_TEST_RECVMSG:
- test_xmsg(test);
+ err = test_xmsg(test);
break;
case SOCK_ADDR_TEST_GETSOCKNAME:
- test_getsockname(test);
+ err = test_getsockname(test);
break;
case SOCK_ADDR_TEST_GETPEERNAME:
- test_getpeername(test);
+ err = test_getpeername(test);
break;
default:
ASSERT_TRUE(false, "Unknown sock addr test type");
break;
}
+ if (test->expected_result == SYSCALL_EPERM)
+ ASSERT_EQ(err, EPERM, "socket operation returns EPERM");
+ else if (test->expected_result == SYSCALL_ENOTSUPP)
+ ASSERT_EQ(err, ENOTSUPP, "socket operation returns ENOTSUPP");
+ else if (test->expected_result == SUCCESS)
+ ASSERT_OK(err, "socket operation succeeds");
+
test->destroyfn(skel);
}
cleanup:
+ unload_sock_addr_kern();
+ cleanup_test_env(tok);
if (cgroup_fd >= 0)
close(cgroup_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 77e26ecffa..1337153eb0 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -131,6 +131,65 @@ out:
test_skmsg_load_helpers__destroy(skel);
}
+static void test_skmsg_helpers_with_link(enum bpf_map_type map_type)
+{
+ struct bpf_program *prog, *prog_clone, *prog_clone2;
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, opts);
+ struct test_skmsg_load_helpers *skel;
+ struct bpf_link *link, *link2;
+ int err, map;
+
+ skel = test_skmsg_load_helpers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_skmsg_load_helpers__open_and_load"))
+ return;
+
+ prog = skel->progs.prog_msg_verdict;
+ prog_clone = skel->progs.prog_msg_verdict_clone;
+ prog_clone2 = skel->progs.prog_msg_verdict_clone2;
+ map = bpf_map__fd(skel->maps.sock_map);
+
+ link = bpf_program__attach_sockmap(prog, map);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
+ goto out;
+
+ /* Fail since bpf_link for the same prog has been created. */
+ err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_ERR(err, "bpf_prog_attach"))
+ goto out;
+
+ /* Fail since bpf_link for the same prog type has been created. */
+ link2 = bpf_program__attach_sockmap(prog_clone, map);
+ if (!ASSERT_ERR_PTR(link2, "bpf_program__attach_sockmap")) {
+ bpf_link__detach(link2);
+ goto out;
+ }
+
+ err = bpf_link__update_program(link, prog_clone);
+ if (!ASSERT_OK(err, "bpf_link__update_program"))
+ goto out;
+
+ /* Fail since a prog with different type attempts to do update. */
+ err = bpf_link__update_program(link, skel->progs.prog_skb_verdict);
+ if (!ASSERT_ERR(err, "bpf_link__update_program"))
+ goto out;
+
+ /* Fail since the old prog does not match the one in the kernel. */
+ opts.old_prog_fd = bpf_program__fd(prog_clone2);
+ opts.flags = BPF_F_REPLACE;
+ err = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), &opts);
+ if (!ASSERT_ERR(err, "bpf_link_update"))
+ goto out;
+
+ opts.old_prog_fd = bpf_program__fd(prog_clone);
+ opts.flags = BPF_F_REPLACE;
+ err = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), &opts);
+ if (!ASSERT_OK(err, "bpf_link_update"))
+ goto out;
+out:
+ bpf_link__detach(link);
+ test_skmsg_load_helpers__destroy(skel);
+}
+
static void test_sockmap_update(enum bpf_map_type map_type)
{
int err, prog, src;
@@ -298,6 +357,40 @@ out:
test_sockmap_skb_verdict_attach__destroy(skel);
}
+static void test_sockmap_skb_verdict_attach_with_link(void)
+{
+ struct test_sockmap_skb_verdict_attach *skel;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ int err, map;
+
+ skel = test_sockmap_skb_verdict_attach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+ prog = skel->progs.prog_skb_verdict;
+ map = bpf_map__fd(skel->maps.sock_map);
+ link = bpf_program__attach_sockmap(prog, map);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
+ goto out;
+
+ bpf_link__detach(link);
+
+ err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ /* Fail since attaching with the same prog/map has been done. */
+ link = bpf_program__attach_sockmap(prog, map);
+ if (!ASSERT_ERR_PTR(link, "bpf_program__attach_sockmap"))
+ bpf_link__detach(link);
+
+ err = bpf_prog_detach2(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT);
+ if (!ASSERT_OK(err, "bpf_prog_detach2"))
+ goto out;
+out:
+ test_sockmap_skb_verdict_attach__destroy(skel);
+}
+
static __u32 query_prog_id(int prog_fd)
{
struct bpf_prog_info info = {};
@@ -475,30 +568,19 @@ out:
test_sockmap_drop_prog__destroy(drop);
}
-static void test_sockmap_skb_verdict_peek(void)
+static void test_sockmap_skb_verdict_peek_helper(int map)
{
- int err, map, verdict, s, c1, p1, zero = 0, sent, recvd, avail;
- struct test_sockmap_pass_prog *pass;
+ int err, s, c1, p1, zero = 0, sent, recvd, avail;
char snd[256] = "0123456789";
char rcv[256] = "0";
- pass = test_sockmap_pass_prog__open_and_load();
- if (!ASSERT_OK_PTR(pass, "open_and_load"))
- return;
- verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
- map = bpf_map__fd(pass->maps.sock_map_rx);
-
- err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
- if (!ASSERT_OK(err, "bpf_prog_attach"))
- goto out;
-
s = socket_loopback(AF_INET, SOCK_STREAM);
if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
- goto out;
+ return;
err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
if (!ASSERT_OK(err, "create_pairs(s)"))
- goto out;
+ return;
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
@@ -520,7 +602,58 @@ static void test_sockmap_skb_verdict_peek(void)
out_close:
close(c1);
close(p1);
+}
+
+static void test_sockmap_skb_verdict_peek(void)
+{
+ struct test_sockmap_pass_prog *pass;
+ int err, map, verdict;
+
+ pass = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(pass, "open_and_load"))
+ return;
+ verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
+ map = bpf_map__fd(pass->maps.sock_map_rx);
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ test_sockmap_skb_verdict_peek_helper(map);
+
+out:
+ test_sockmap_pass_prog__destroy(pass);
+}
+
+static void test_sockmap_skb_verdict_peek_with_link(void)
+{
+ struct test_sockmap_pass_prog *pass;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ int err, map;
+
+ pass = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(pass, "open_and_load"))
+ return;
+ prog = pass->progs.prog_skb_verdict;
+ map = bpf_map__fd(pass->maps.sock_map_rx);
+ link = bpf_program__attach_sockmap(prog, map);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
+ goto out;
+
+ err = bpf_link__update_program(link, pass->progs.prog_skb_verdict_clone);
+ if (!ASSERT_OK(err, "bpf_link__update_program"))
+ goto out;
+
+ /* Fail since a prog with different attach type attempts to do update. */
+ err = bpf_link__update_program(link, pass->progs.prog_skb_parser);
+ if (!ASSERT_ERR(err, "bpf_link__update_program"))
+ goto out;
+
+ test_sockmap_skb_verdict_peek_helper(map);
+ ASSERT_EQ(pass->bss->clone_called, 1, "clone_called");
out:
+ bpf_link__detach(link);
test_sockmap_pass_prog__destroy(pass);
}
@@ -788,6 +921,8 @@ void test_sockmap_basic(void)
test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
BPF_SK_SKB_VERDICT);
}
+ if (test__start_subtest("sockmap skb_verdict attach_with_link"))
+ test_sockmap_skb_verdict_attach_with_link();
if (test__start_subtest("sockmap msg_verdict progs query"))
test_sockmap_progs_query(BPF_SK_MSG_VERDICT);
if (test__start_subtest("sockmap stream_parser progs query"))
@@ -804,6 +939,8 @@ void test_sockmap_basic(void)
test_sockmap_skb_verdict_fionread(false);
if (test__start_subtest("sockmap skb_verdict msg_f_peek"))
test_sockmap_skb_verdict_peek();
+ if (test__start_subtest("sockmap skb_verdict msg_f_peek with link"))
+ test_sockmap_skb_verdict_peek_with_link();
if (test__start_subtest("sockmap unconnected af_unix"))
test_sockmap_unconnected_unix();
if (test__start_subtest("sockmap one socket to many map entries"))
@@ -812,4 +949,8 @@ void test_sockmap_basic(void)
test_sockmap_many_maps();
if (test__start_subtest("sockmap same socket replace"))
test_sockmap_same_sock();
+ if (test__start_subtest("sockmap sk_msg attach sockmap helpers with link"))
+ test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKMAP);
+ if (test__start_subtest("sockhash sk_msg attach sockhash helpers with link"))
+ test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index a92807bfcd..e91b593660 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -767,6 +767,24 @@ static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
}
+static void test_msg_redir_to_connected_with_link(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int prog_msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int link_fd;
+
+ link_fd = bpf_link_create(prog_msg_verdict, sock_map, BPF_SK_MSG_VERDICT, NULL);
+ if (!ASSERT_GE(link_fd, 0, "bpf_link_create"))
+ return;
+
+ redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
+
+ close(link_fd);
+}
+
static void redir_to_listening(int family, int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
@@ -869,6 +887,24 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
}
+static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ struct bpf_program *verdict = skel->progs.prog_msg_verdict;
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ struct bpf_link *link;
+
+ link = bpf_program__attach_sockmap(verdict, sock_map);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
+ return;
+
+ redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
+
+ bpf_link__detach(link);
+}
+
static void redir_partial(int family, int sotype, int sock_map, int parser_map)
{
int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
@@ -1316,7 +1352,9 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
TEST(test_skb_redir_to_listening),
TEST(test_skb_redir_partial),
TEST(test_msg_redir_to_connected),
+ TEST(test_msg_redir_to_connected_with_link),
TEST(test_msg_redir_to_listening),
+ TEST(test_msg_redir_to_listening_with_link),
};
const char *family_name, *map_name;
const struct redir_test *t;
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c
index 5a4491d4ed..eaac83a7f3 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c
@@ -24,6 +24,7 @@ enum sockopt_test_error {
static struct sockopt_test {
const char *descr;
const struct bpf_insn insns[64];
+ enum bpf_prog_type prog_type;
enum bpf_attach_type attach_type;
enum bpf_attach_type expected_attach_type;
@@ -928,9 +929,40 @@ static struct sockopt_test {
.error = EPERM_SETSOCKOPT,
},
+
+ /* ==================== prog_type ==================== */
+
+ {
+ .descr = "can attach only BPF_CGROUP_SETSOCKOP",
+ .insns = {
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = 0,
+ .error = DENY_ATTACH,
+ },
+
+ {
+ .descr = "can attach only BPF_CGROUP_GETSOCKOP",
+ .insns = {
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = 0,
+ .error = DENY_ATTACH,
+ },
};
static int load_prog(const struct bpf_insn *insns,
+ enum bpf_prog_type prog_type,
enum bpf_attach_type expected_attach_type)
{
LIBBPF_OPTS(bpf_prog_load_opts, opts,
@@ -947,7 +979,7 @@ static int load_prog(const struct bpf_insn *insns,
}
insns_cnt++;
- fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCKOPT, NULL, "GPL", insns, insns_cnt, &opts);
+ fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, &opts);
if (verbose && fd < 0)
fprintf(stderr, "%s\n", bpf_log_buf);
@@ -1036,13 +1068,18 @@ static int call_getsockopt(bool use_io_uring, int fd, int level, int optname,
return getsockopt(fd, level, optname, optval, optlen);
}
-static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring)
+static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring,
+ bool use_link)
{
- int sock_fd, err, prog_fd;
+ int prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT;
+ int sock_fd, err, prog_fd, link_fd = -1;
void *optval = NULL;
int ret = 0;
- prog_fd = load_prog(test->insns, test->expected_attach_type);
+ if (test->prog_type)
+ prog_type = test->prog_type;
+
+ prog_fd = load_prog(test->insns, prog_type, test->expected_attach_type);
if (prog_fd < 0) {
if (test->error == DENY_LOAD)
return 0;
@@ -1051,7 +1088,12 @@ static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring)
return -1;
}
- err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0);
+ if (use_link) {
+ err = bpf_link_create(prog_fd, cgroup_fd, test->attach_type, NULL);
+ link_fd = err;
+ } else {
+ err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0);
+ }
if (err < 0) {
if (test->error == DENY_ATTACH)
goto close_prog_fd;
@@ -1142,7 +1184,12 @@ free_optval:
close_sock_fd:
close(sock_fd);
detach_prog:
- bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type);
+ if (use_link) {
+ if (link_fd >= 0)
+ close(link_fd);
+ } else {
+ bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type);
+ }
close_prog_fd:
close(prog_fd);
return ret;
@@ -1160,10 +1207,12 @@ void test_sockopt(void)
if (!test__start_subtest(tests[i].descr))
continue;
- ASSERT_OK(run_test(cgroup_fd, &tests[i], false),
+ ASSERT_OK(run_test(cgroup_fd, &tests[i], false, false),
+ tests[i].descr);
+ ASSERT_OK(run_test(cgroup_fd, &tests[i], false, true),
tests[i].descr);
if (tests[i].io_uring_support)
- ASSERT_OK(run_test(cgroup_fd, &tests[i], true),
+ ASSERT_OK(run_test(cgroup_fd, &tests[i], true, false),
tests[i].descr);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index 917f486db8..1d3a20f01b 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "cgroup_helpers.h"
+#include "network_helpers.h"
#include "sockopt_inherit.skel.h"
@@ -9,35 +10,6 @@
#define CUSTOM_INHERIT2 1
#define CUSTOM_LISTENER 2
-static int connect_to_server(int server_fd)
-{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int fd;
-
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if (fd < 0) {
- log_err("Failed to create client socket");
- return -1;
- }
-
- if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
- log_err("Failed to get server addr");
- goto out;
- }
-
- if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
- log_err("Fail to connect to server");
- goto out;
- }
-
- return fd;
-
-out:
- close(fd);
- return -1;
-}
-
static int verify_sockopt(int fd, int optname, const char *msg, char expected)
{
socklen_t optlen = 1;
@@ -98,47 +70,36 @@ static void *server_thread(void *arg)
return (void *)(long)err;
}
-static int start_server(void)
+static int custom_cb(int fd, const struct post_socket_opts *opts)
{
- struct sockaddr_in addr = {
- .sin_family = AF_INET,
- .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
- };
char buf;
int err;
- int fd;
int i;
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if (fd < 0) {
- log_err("Failed to create server socket");
- return -1;
- }
-
for (i = CUSTOM_INHERIT1; i <= CUSTOM_LISTENER; i++) {
buf = 0x01;
err = setsockopt(fd, SOL_CUSTOM, i, &buf, 1);
if (err) {
log_err("Failed to call setsockopt(%d)", i);
- close(fd);
return -1;
}
}
- if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
- log_err("Failed to bind socket");
- close(fd);
- return -1;
- }
-
- return fd;
+ return 0;
}
static void run_test(int cgroup_fd)
{
struct bpf_link *link_getsockopt = NULL;
struct bpf_link *link_setsockopt = NULL;
+ struct network_helper_opts opts = {
+ .post_socket_cb = custom_cb,
+ };
int server_fd = -1, client_fd;
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ };
struct sockopt_inherit *obj;
void *server_err;
pthread_t tid;
@@ -160,7 +121,8 @@ static void run_test(int cgroup_fd)
if (!ASSERT_OK_PTR(link_setsockopt, "cg-attach-setsockopt"))
goto close_bpf_object;
- server_fd = start_server();
+ server_fd = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr,
+ sizeof(addr), &opts);
if (!ASSERT_GE(server_fd, 0, "start_server"))
goto close_bpf_object;
@@ -173,7 +135,7 @@ static void run_test(int cgroup_fd)
pthread_cond_wait(&server_started, &server_started_mtx);
pthread_mutex_unlock(&server_started_mtx);
- client_fd = connect_to_server(server_fd);
+ client_fd = connect_to_fd(server_fd, 0);
if (!ASSERT_GE(client_fd, 0, "connect_to_server"))
goto close_server_fd;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 5db9eec24b..0832fd7874 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -35,7 +35,7 @@ retry:
pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
0 /* cpu 0 */, -1 /* group id */,
0 /* flags */);
- if (pmu_fd < 0 && errno == ENOENT) {
+ if (pmu_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) {
printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__);
test__skip();
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
index 15ee7b2fc4..b913572002 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
@@ -73,6 +73,16 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
"up primary");
ASSERT_OK(system("ip addr add dev " netkit_name " 10.0.0.1/24"),
"addr primary");
+
+ if (mode == NETKIT_L3) {
+ ASSERT_EQ(system("ip link set dev " netkit_name
+ " addr ee:ff:bb:cc:aa:dd 2> /dev/null"), 512,
+ "set hwaddress");
+ } else {
+ ASSERT_OK(system("ip link set dev " netkit_name
+ " addr ee:ff:bb:cc:aa:dd"),
+ "set hwaddress");
+ }
if (same_netns) {
ASSERT_OK(system("ip link set dev " netkit_peer " up"),
"up peer");
@@ -89,6 +99,16 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
return err;
}
+static void move_netkit(void)
+{
+ ASSERT_OK(system("ip link set " netkit_peer " netns foo"),
+ "move peer");
+ ASSERT_OK(system("ip netns exec foo ip link set dev "
+ netkit_peer " up"), "up peer");
+ ASSERT_OK(system("ip netns exec foo ip addr add dev "
+ netkit_peer " 10.0.0.2/24"), "addr peer");
+}
+
static void destroy_netkit(void)
{
ASSERT_OK(system("ip link del dev " netkit_name), "del primary");
@@ -685,3 +705,77 @@ void serial_test_tc_netkit_neigh_links(void)
serial_test_tc_netkit_neigh_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
serial_test_tc_netkit_neigh_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
}
+
+static void serial_test_tc_netkit_pkt_type_mode(int mode)
+{
+ LIBBPF_OPTS(bpf_netkit_opts, optl_nk);
+ LIBBPF_OPTS(bpf_tcx_opts, optl_tcx);
+ int err, ifindex, ifindex2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+
+ err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, true);
+ if (err)
+ return;
+
+ ifindex2 = if_nametoindex(netkit_peer);
+ ASSERT_NEQ(ifindex, ifindex2, "ifindex_1_2");
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc7,
+ BPF_TCX_INGRESS), 0, "tc7_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 0);
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl_nk);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc7, ifindex2, &optl_tcx);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc7 = link;
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 1);
+
+ move_netkit();
+
+ tc_skel_reset_all_seen(skel);
+ skel->bss->set_type = true;
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc7, true, "seen_tc7");
+
+ ASSERT_EQ(skel->bss->seen_host, true, "seen_host");
+ ASSERT_EQ(skel->bss->seen_mcast, true, "seen_mcast");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ destroy_netkit();
+}
+
+void serial_test_tc_netkit_pkt_type(void)
+{
+ serial_test_tc_netkit_pkt_type_mode(NETKIT_L2);
+ serial_test_tc_netkit_pkt_type_mode(NETKIT_L3);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index dbe06aeaa2..b1073d36d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -530,7 +530,7 @@ static int wait_netstamp_needed_key(void)
__u64 tstamp = 0;
nstoken = open_netns(NS_DST);
- if (!nstoken)
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
return -1;
srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0);
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index 8fe84da1b9..f2b99d95d9 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -10,6 +10,9 @@ struct tcp_rtt_storage {
__u32 delivered;
__u32 delivered_ce;
__u32 icsk_retransmits;
+
+ __u32 mrtt_us; /* args[0] */
+ __u32 srtt; /* args[1] */
};
static void send_byte(int fd)
@@ -83,6 +86,17 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
err++;
}
+ /* Precise values of mrtt and srtt are unavailable, just make sure they are nonzero */
+ if (val.mrtt_us == 0) {
+ log_err("%s: unexpected bpf_tcp_sock.args[0] (mrtt_us) %u == 0", msg, val.mrtt_us);
+ err++;
+ }
+
+ if (val.srtt == 0) {
+ log_err("%s: unexpected bpf_tcp_sock.args[1] (srtt) %u == 0", msg, val.srtt);
+ err++;
+ }
+
return err;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
index ee5372c7f2..29e183a80f 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
@@ -4,6 +4,8 @@
#include <time.h>
#include "struct_ops_module.skel.h"
+#include "struct_ops_nulled_out_cb.skel.h"
+#include "struct_ops_forgotten_cb.skel.h"
static void check_map_info(struct bpf_map_info *info)
{
@@ -66,6 +68,7 @@ static void test_struct_ops_load(void)
* auto-loading, or it will fail to load.
*/
bpf_program__set_autoload(skel->progs.test_2, false);
+ bpf_map__set_autocreate(skel->maps.testmod_zeroed, false);
err = struct_ops_module__load(skel);
if (!ASSERT_OK(err, "struct_ops_module_load"))
@@ -93,9 +96,163 @@ cleanup:
struct_ops_module__destroy(skel);
}
+static void test_struct_ops_not_zeroed(void)
+{
+ struct struct_ops_module *skel;
+ int err;
+
+ /* zeroed is 0, and zeroed_op is null */
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open"))
+ return;
+
+ skel->struct_ops.testmod_zeroed->zeroed = 0;
+ /* zeroed_op prog should be not loaded automatically now */
+ skel->struct_ops.testmod_zeroed->zeroed_op = NULL;
+
+ err = struct_ops_module__load(skel);
+ ASSERT_OK(err, "struct_ops_module_load");
+
+ struct_ops_module__destroy(skel);
+
+ /* zeroed is not 0 */
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open_not_zeroed"))
+ return;
+
+ /* libbpf should reject the testmod_zeroed since struct
+ * bpf_testmod_ops in the kernel has no "zeroed" field and the
+ * value of "zeroed" is non-zero.
+ */
+ skel->struct_ops.testmod_zeroed->zeroed = 0xdeadbeef;
+ skel->struct_ops.testmod_zeroed->zeroed_op = NULL;
+ err = struct_ops_module__load(skel);
+ ASSERT_ERR(err, "struct_ops_module_load_not_zeroed");
+
+ struct_ops_module__destroy(skel);
+
+ /* zeroed_op is not null */
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open_not_zeroed_op"))
+ return;
+
+ /* libbpf should reject the testmod_zeroed since the value of its
+ * "zeroed_op" is not null.
+ */
+ skel->struct_ops.testmod_zeroed->zeroed_op = skel->progs.test_3;
+ err = struct_ops_module__load(skel);
+ ASSERT_ERR(err, "struct_ops_module_load_not_zeroed_op");
+
+ struct_ops_module__destroy(skel);
+}
+
+/* The signature of an implementation might not match the signature of the
+ * function pointer prototype defined in the BPF program. This mismatch
+ * should be allowed as long as the behavior of the operator program
+ * adheres to the signature in the kernel. Libbpf should not enforce the
+ * signature; rather, let the kernel verifier handle the enforcement.
+ */
+static void test_struct_ops_incompatible(void)
+{
+ struct struct_ops_module *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open"))
+ return;
+
+ bpf_map__set_autocreate(skel->maps.testmod_zeroed, false);
+
+ err = struct_ops_module__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_incompatible);
+ if (ASSERT_OK_PTR(link, "attach_struct_ops"))
+ bpf_link__destroy(link);
+
+cleanup:
+ struct_ops_module__destroy(skel);
+}
+
+/* validate that it's ok to "turn off" callback that kernel supports */
+static void test_struct_ops_nulled_out_cb(void)
+{
+ struct struct_ops_nulled_out_cb *skel;
+ int err;
+
+ skel = struct_ops_nulled_out_cb__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ /* kernel knows about test_1, but we still null it out */
+ skel->struct_ops.ops->test_1 = NULL;
+
+ err = struct_ops_nulled_out_cb__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.test_1_turn_off), "prog_autoload");
+ ASSERT_LT(bpf_program__fd(skel->progs.test_1_turn_off), 0, "prog_fd");
+
+cleanup:
+ struct_ops_nulled_out_cb__destroy(skel);
+}
+
+/* validate that libbpf generates reasonable error message if struct_ops is
+ * not referenced in any struct_ops map
+ */
+static void test_struct_ops_forgotten_cb(void)
+{
+ struct struct_ops_forgotten_cb *skel;
+ char *log;
+ int err;
+
+ skel = struct_ops_forgotten_cb__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ start_libbpf_log_capture();
+
+ err = struct_ops_forgotten_cb__load(skel);
+ if (!ASSERT_ERR(err, "skel_load"))
+ goto cleanup;
+
+ log = stop_libbpf_log_capture();
+ ASSERT_HAS_SUBSTR(log,
+ "prog 'test_1_forgotten': SEC(\"struct_ops\") program isn't referenced anywhere, did you forget to use it?",
+ "libbpf_log");
+ free(log);
+
+ struct_ops_forgotten_cb__destroy(skel);
+
+ /* now let's programmatically use it, we should be fine now */
+ skel = struct_ops_forgotten_cb__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->struct_ops.ops->test_1 = skel->progs.test_1_forgotten; /* not anymore */
+
+ err = struct_ops_forgotten_cb__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+cleanup:
+ struct_ops_forgotten_cb__destroy(skel);
+}
+
void serial_test_struct_ops_module(void)
{
- if (test__start_subtest("test_struct_ops_load"))
+ if (test__start_subtest("struct_ops_load"))
test_struct_ops_load();
+ if (test__start_subtest("struct_ops_not_zeroed"))
+ test_struct_ops_not_zeroed();
+ if (test__start_subtest("struct_ops_incompatible"))
+ test_struct_ops_incompatible();
+ if (test__start_subtest("struct_ops_null_out_cb"))
+ test_struct_ops_nulled_out_cb();
+ if (test__start_subtest("struct_ops_forgotten_cb"))
+ test_struct_ops_forgotten_cb();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index 5f1fb0a2ea..cec746e77c 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -612,6 +612,8 @@ static void test_ipip_tunnel(enum ipip_encap encap)
/* ping from at_ns0 namespace test */
nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ goto done;
err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
if (!ASSERT_OK(err, "test_ping"))
goto done;
@@ -666,6 +668,8 @@ static void test_xfrm_tunnel(void)
/* ping from at_ns0 namespace test */
nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ goto done;
err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
close_netns(nstoken);
if (!ASSERT_OK(err, "test_ping"))
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
new file mode 100644
index 0000000000..871d16cb95
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include <pthread.h>
+#include <network_helpers.h>
+
+#include "timer_lockup.skel.h"
+
+static long cpu;
+static int *timer1_err;
+static int *timer2_err;
+static bool skip;
+
+volatile int k = 0;
+
+static void *timer_lockup_thread(void *arg)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1000,
+ );
+ int i, prog_fd = *(int *)arg;
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset);
+ ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset),
+ &cpuset),
+ "cpu affinity");
+
+ for (i = 0; !READ_ONCE(*timer1_err) && !READ_ONCE(*timer2_err); i++) {
+ bpf_prog_test_run_opts(prog_fd, &opts);
+ /* Skip the test if we can't reproduce the race in a reasonable
+ * amount of time.
+ */
+ if (i > 50) {
+ WRITE_ONCE(skip, true);
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+void test_timer_lockup(void)
+{
+ int timer1_prog, timer2_prog;
+ struct timer_lockup *skel;
+ pthread_t thrds[2];
+ void *ret;
+
+ skel = timer_lockup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load"))
+ return;
+
+ timer1_prog = bpf_program__fd(skel->progs.timer1_prog);
+ timer2_prog = bpf_program__fd(skel->progs.timer2_prog);
+
+ timer1_err = &skel->bss->timer1_err;
+ timer2_err = &skel->bss->timer2_err;
+
+ if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread,
+ &timer1_prog),
+ "pthread_create thread1"))
+ goto out;
+ if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread,
+ &timer2_prog),
+ "pthread_create thread2")) {
+ pthread_exit(&thrds[0]);
+ goto out;
+ }
+
+ pthread_join(thrds[1], &ret);
+ pthread_join(thrds[0], &ret);
+
+ if (skip) {
+ test__skip();
+ goto out;
+ }
+
+ if (*timer1_err != -EDEADLK && *timer1_err != 0)
+ ASSERT_FAIL("timer1_err bad value");
+ if (*timer2_err != -EDEADLK && *timer2_err != 0)
+ ASSERT_FAIL("timer2_err bad value");
+out:
+ timer_lockup__destroy(skel);
+ return;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
index 7b9124d506..e56e88596d 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
@@ -5,18 +5,19 @@
#include "trace_printk.lskel.h"
-#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
-#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
#define SEARCHMSG "testing,testing"
+static void trace_pipe_cb(const char *str, void *data)
+{
+ if (strstr(str, SEARCHMSG) != NULL)
+ (*(int *)data)++;
+}
+
void serial_test_trace_printk(void)
{
struct trace_printk_lskel__bss *bss;
- int err = 0, iter = 0, found = 0;
struct trace_printk_lskel *skel;
- char *buf = NULL;
- FILE *fp = NULL;
- size_t buflen;
+ int err = 0, found = 0;
skel = trace_printk_lskel__open();
if (!ASSERT_OK_PTR(skel, "trace_printk__open"))
@@ -35,16 +36,6 @@ void serial_test_trace_printk(void)
if (!ASSERT_OK(err, "trace_printk__attach"))
goto cleanup;
- if (access(TRACEFS_PIPE, F_OK) == 0)
- fp = fopen(TRACEFS_PIPE, "r");
- else
- fp = fopen(DEBUGFS_PIPE, "r");
- if (!ASSERT_OK_PTR(fp, "fopen(TRACE_PIPE)"))
- goto cleanup;
-
- /* We do not want to wait forever if this test fails... */
- fcntl(fileno(fp), F_SETFL, O_NONBLOCK);
-
/* wait for tracepoint to trigger */
usleep(1);
trace_printk_lskel__detach(skel);
@@ -56,21 +47,12 @@ void serial_test_trace_printk(void)
goto cleanup;
/* verify our search string is in the trace buffer */
- while (getline(&buf, &buflen, fp) >= 0 || errno == EAGAIN) {
- if (strstr(buf, SEARCHMSG) != NULL)
- found++;
- if (found == bss->trace_printk_ran)
- break;
- if (++iter > 1000)
- break;
- }
+ ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, &found, 1000),
+ "read_trace_pipe_iter");
if (!ASSERT_EQ(found, bss->trace_printk_ran, "found"))
goto cleanup;
cleanup:
trace_printk_lskel__destroy(skel);
- free(buf);
- if (fp)
- fclose(fp);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c
index 44ea2fd88f..2af6a6f209 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c
@@ -5,18 +5,19 @@
#include "trace_vprintk.lskel.h"
-#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
-#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
#define SEARCHMSG "1,2,3,4,5,6,7,8,9,10"
+static void trace_pipe_cb(const char *str, void *data)
+{
+ if (strstr(str, SEARCHMSG) != NULL)
+ (*(int *)data)++;
+}
+
void serial_test_trace_vprintk(void)
{
struct trace_vprintk_lskel__bss *bss;
- int err = 0, iter = 0, found = 0;
struct trace_vprintk_lskel *skel;
- char *buf = NULL;
- FILE *fp = NULL;
- size_t buflen;
+ int err = 0, found = 0;
skel = trace_vprintk_lskel__open_and_load();
if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load"))
@@ -28,16 +29,6 @@ void serial_test_trace_vprintk(void)
if (!ASSERT_OK(err, "trace_vprintk__attach"))
goto cleanup;
- if (access(TRACEFS_PIPE, F_OK) == 0)
- fp = fopen(TRACEFS_PIPE, "r");
- else
- fp = fopen(DEBUGFS_PIPE, "r");
- if (!ASSERT_OK_PTR(fp, "fopen(TRACE_PIPE)"))
- goto cleanup;
-
- /* We do not want to wait forever if this test fails... */
- fcntl(fileno(fp), F_SETFL, O_NONBLOCK);
-
/* wait for tracepoint to trigger */
usleep(1);
trace_vprintk_lskel__detach(skel);
@@ -49,14 +40,8 @@ void serial_test_trace_vprintk(void)
goto cleanup;
/* verify our search string is in the trace buffer */
- while (getline(&buf, &buflen, fp) >= 0 || errno == EAGAIN) {
- if (strstr(buf, SEARCHMSG) != NULL)
- found++;
- if (found == bss->trace_vprintk_ran)
- break;
- if (++iter > 1000)
- break;
- }
+ ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, &found, 1000),
+ "read_trace_pipe_iter");
if (!ASSERT_EQ(found, bss->trace_vprintk_ran, "found"))
goto cleanup;
@@ -66,7 +51,4 @@ void serial_test_trace_vprintk(void)
cleanup:
trace_vprintk_lskel__destroy(skel);
- free(buf);
- if (fp)
- fclose(fp);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
index 38fda42fd7..bf6ca8e3eb 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
@@ -1,12 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#include <unistd.h>
+#include <pthread.h>
#include <test_progs.h>
#include "uprobe_multi.skel.h"
#include "uprobe_multi_bench.skel.h"
#include "uprobe_multi_usdt.skel.h"
#include "bpf/libbpf_internal.h"
#include "testing_helpers.h"
+#include "../sdt.h"
static char test_data[] = "test_data";
@@ -25,9 +27,17 @@ noinline void uprobe_multi_func_3(void)
asm volatile ("");
}
+noinline void usdt_trigger(void)
+{
+ STAP_PROBE(test, pid_filter_usdt);
+}
+
struct child {
int go[2];
+ int c2p[2]; /* child -> parent channel */
int pid;
+ int tid;
+ pthread_t thread;
};
static void release_child(struct child *child)
@@ -38,6 +48,10 @@ static void release_child(struct child *child)
return;
close(child->go[1]);
close(child->go[0]);
+ if (child->thread)
+ pthread_join(child->thread, NULL);
+ close(child->c2p[0]);
+ close(child->c2p[1]);
if (child->pid > 0)
waitpid(child->pid, &child_status, 0);
}
@@ -63,7 +77,7 @@ static struct child *spawn_child(void)
if (pipe(child.go))
return NULL;
- child.pid = fork();
+ child.pid = child.tid = fork();
if (child.pid < 0) {
release_child(&child);
errno = EINVAL;
@@ -82,6 +96,7 @@ static struct child *spawn_child(void)
uprobe_multi_func_1();
uprobe_multi_func_2();
uprobe_multi_func_3();
+ usdt_trigger();
exit(errno);
}
@@ -89,6 +104,67 @@ static struct child *spawn_child(void)
return &child;
}
+static void *child_thread(void *ctx)
+{
+ struct child *child = ctx;
+ int c = 0, err;
+
+ child->tid = syscall(SYS_gettid);
+
+ /* let parent know we are ready */
+ err = write(child->c2p[1], &c, 1);
+ if (err != 1)
+ pthread_exit(&err);
+
+ /* wait for parent's kick */
+ err = read(child->go[0], &c, 1);
+ if (err != 1)
+ pthread_exit(&err);
+
+ uprobe_multi_func_1();
+ uprobe_multi_func_2();
+ uprobe_multi_func_3();
+ usdt_trigger();
+
+ err = 0;
+ pthread_exit(&err);
+}
+
+static struct child *spawn_thread(void)
+{
+ static struct child child;
+ int c, err;
+
+ /* pipe to notify child to execute the trigger functions */
+ if (pipe(child.go))
+ return NULL;
+ /* pipe to notify parent that child thread is ready */
+ if (pipe(child.c2p)) {
+ close(child.go[0]);
+ close(child.go[1]);
+ return NULL;
+ }
+
+ child.pid = getpid();
+
+ err = pthread_create(&child.thread, NULL, child_thread, &child);
+ if (err) {
+ err = -errno;
+ close(child.go[0]);
+ close(child.go[1]);
+ close(child.c2p[0]);
+ close(child.c2p[1]);
+ errno = -err;
+ return NULL;
+ }
+
+ err = read(child.c2p[0], &c, 1);
+ if (!ASSERT_EQ(err, 1, "child_thread_ready"))
+ return NULL;
+
+ return &child;
+}
+
static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child)
{
skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1;
@@ -103,15 +179,23 @@ static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child
* passed at the probe attach.
*/
skel->bss->pid = child ? 0 : getpid();
+ skel->bss->expect_pid = child ? child->pid : 0;
+
+ /* trigger all probes, if we are testing child *process*, just to make
+ * sure that PID filtering doesn't let through activations from wrong
+ * PIDs; when we test child *thread*, we don't want to do this to
+ * avoid double counting number of triggering events
+ */
+ if (!child || !child->thread) {
+ uprobe_multi_func_1();
+ uprobe_multi_func_2();
+ uprobe_multi_func_3();
+ usdt_trigger();
+ }
if (child)
kick_child(child);
- /* trigger all probes */
- uprobe_multi_func_1();
- uprobe_multi_func_2();
- uprobe_multi_func_3();
-
/*
* There are 2 entry and 2 exit probe called for each uprobe_multi_func_[123]
* function and each slepable probe (6) increments uprobe_multi_sleep_result.
@@ -126,8 +210,12 @@ static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child
ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 6, "uprobe_multi_sleep_result");
- if (child)
+ ASSERT_FALSE(skel->bss->bad_pid_seen, "bad_pid_seen");
+
+ if (child) {
ASSERT_EQ(skel->bss->child_pid, child->pid, "uprobe_multi_child_pid");
+ ASSERT_EQ(skel->bss->child_tid, child->tid, "uprobe_multi_child_tid");
+ }
}
static void test_skel_api(void)
@@ -190,8 +278,24 @@ __test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_mul
if (!ASSERT_OK_PTR(skel->links.uprobe_extra, "bpf_program__attach_uprobe_multi"))
goto cleanup;
+ /* Attach (uprobe-backed) USDTs */
+ skel->links.usdt_pid = bpf_program__attach_usdt(skel->progs.usdt_pid, pid, binary,
+ "test", "pid_filter_usdt", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_pid, "attach_usdt_pid"))
+ goto cleanup;
+
+ skel->links.usdt_extra = bpf_program__attach_usdt(skel->progs.usdt_extra, -1, binary,
+ "test", "pid_filter_usdt", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_extra, "attach_usdt_extra"))
+ goto cleanup;
+
uprobe_multi_test_run(skel, child);
+ ASSERT_FALSE(skel->bss->bad_pid_seen_usdt, "bad_pid_seen_usdt");
+ if (child) {
+ ASSERT_EQ(skel->bss->child_pid_usdt, child->pid, "usdt_multi_child_pid");
+ ASSERT_EQ(skel->bss->child_tid_usdt, child->tid, "usdt_multi_child_tid");
+ }
cleanup:
uprobe_multi__destroy(skel);
}
@@ -210,6 +314,13 @@ test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi
return;
__test_attach_api(binary, pattern, opts, child);
+
+ /* pid filter (thread) */
+ child = spawn_thread();
+ if (!ASSERT_OK_PTR(child, "spawn_thread"))
+ return;
+
+ __test_attach_api(binary, pattern, opts, child);
}
static void test_attach_api_pattern(void)
@@ -495,6 +606,13 @@ static void test_link_api(void)
return;
__test_link_api(child);
+
+ /* pid filter (thread) */
+ child = spawn_thread();
+ if (!ASSERT_OK_PTR(child, "spawn_thread"))
+ return;
+
+ __test_link_api(child);
}
static void test_bench_attach_uprobe(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index c4f9f30664..98ef39efa7 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -53,6 +53,7 @@
#include "verifier_movsx.skel.h"
#include "verifier_netfilter_ctx.skel.h"
#include "verifier_netfilter_retcode.skel.h"
+#include "verifier_or_jmp32_k.skel.h"
#include "verifier_precision.skel.h"
#include "verifier_prevent_map_lookup.skel.h"
#include "verifier_raw_stack.skel.h"
@@ -66,6 +67,8 @@
#include "verifier_sdiv.skel.h"
#include "verifier_search_pruning.skel.h"
#include "verifier_sock.skel.h"
+#include "verifier_sock_addr.skel.h"
+#include "verifier_sockmap_mutate.skel.h"
#include "verifier_spill_fill.skel.h"
#include "verifier_spin_lock.skel.h"
#include "verifier_stack_ptr.skel.h"
@@ -168,6 +171,7 @@ void test_verifier_meta_access(void) { RUN(verifier_meta_access); }
void test_verifier_movsx(void) { RUN(verifier_movsx); }
void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); }
void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); }
+void test_verifier_or_jmp32_k(void) { RUN(verifier_or_jmp32_k); }
void test_verifier_precision(void) { RUN(verifier_precision); }
void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); }
void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); }
@@ -181,6 +185,8 @@ void test_verifier_scalar_ids(void) { RUN(verifier_scalar_ids); }
void test_verifier_sdiv(void) { RUN(verifier_sdiv); }
void test_verifier_search_pruning(void) { RUN(verifier_search_pruning); }
void test_verifier_sock(void) { RUN(verifier_sock); }
+void test_verifier_sock_addr(void) { RUN(verifier_sock_addr); }
+void test_verifier_sockmap_mutate(void) { RUN(verifier_sockmap_mutate); }
void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); }
void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); }
void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); }
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_kfunc_prog_types.c b/tools/testing/selftests/bpf/prog_tests/verifier_kfunc_prog_types.c
new file mode 100644
index 0000000000..3918ecc2ee
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/verifier_kfunc_prog_types.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+
+#include "verifier_kfunc_prog_types.skel.h"
+
+void test_verifier_kfunc_prog_types(void)
+{
+ RUN_TESTS(verifier_kfunc_prog_types);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/wq.c b/tools/testing/selftests/bpf/prog_tests/wq.c
new file mode 100644
index 0000000000..99e438fe12
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/wq.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Benjamin Tissoires */
+#include <test_progs.h>
+#include "wq.skel.h"
+#include "wq_failures.skel.h"
+
+void serial_test_wq(void)
+{
+ struct wq *wq_skel = NULL;
+ int err, prog_fd;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ RUN_TESTS(wq);
+
+ /* re-run the success test to check if the timer was actually executed */
+
+ wq_skel = wq__open_and_load();
+ if (!ASSERT_OK_PTR(wq_skel, "wq_skel_load"))
+ return;
+
+ err = wq__attach(wq_skel);
+ if (!ASSERT_OK(err, "wq_attach"))
+ return;
+
+ prog_fd = bpf_program__fd(wq_skel->progs.test_syscall_array_sleepable);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ usleep(50); /* 10 usecs should be enough, but give it extra */
+
+ ASSERT_EQ(wq_skel->bss->ok_sleepable, (1 << 1), "ok_sleepable");
+ wq__destroy(wq_skel);
+}
+
+void serial_test_failures_wq(void)
+{
+ RUN_TESTS(wq_failures);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index f09505f8b0..53d6ad8c22 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -222,7 +222,7 @@ static void test_xdp_adjust_frags_tail_grow(void)
prog = bpf_object__next_program(obj, NULL);
if (bpf_object__load(obj))
- return;
+ goto out;
prog_fd = bpf_program__fd(prog);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index 05edcf32f5..f76b5d67a3 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -384,6 +384,8 @@ void test_xdp_metadata(void)
SYS(out, "ip netns add " RX_NETNS_NAME);
tok = open_netns(TX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
" type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1");
SYS(out, "ip link set " RX_NAME " netns " RX_NETNS_NAME);
@@ -400,6 +402,8 @@ void test_xdp_metadata(void)
SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME_VLAN);
switch_ns_to_rx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns rx"))
+ goto out;
SYS(out, "ip link set dev " RX_NAME " address " RX_MAC);
SYS(out, "ip link set dev " RX_NAME " up");
@@ -449,6 +453,8 @@ void test_xdp_metadata(void)
goto out;
switch_ns_to_tx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns tx"))
+ goto out;
/* Setup separate AF_XDP for TX interface nad send packet to the RX socket. */
tx_ifindex = if_nametoindex(TX_NAME);
@@ -461,6 +467,8 @@ void test_xdp_metadata(void)
goto out;
switch_ns_to_rx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns rx"))
+ goto out;
/* Verify packet sent from AF_XDP has proper metadata. */
if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, true), 0,
@@ -468,6 +476,8 @@ void test_xdp_metadata(void)
goto out;
switch_ns_to_tx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns tx"))
+ goto out;
complete_tx(&tx_xsk);
/* Now check metadata of packet, generated with network stack */
@@ -475,6 +485,8 @@ void test_xdp_metadata(void)
goto out;
switch_ns_to_rx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns rx"))
+ goto out;
if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, false), 0,
"verify_xsk_metadata"))
@@ -498,6 +510,8 @@ void test_xdp_metadata(void)
goto out;
switch_ns_to_tx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns tx"))
+ goto out;
/* Send packet to trigger . */
if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
@@ -505,6 +519,8 @@ void test_xdp_metadata(void)
goto out;
switch_ns_to_rx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns rx"))
+ goto out;
while (!retries--) {
if (bpf_obj2->bss->called)
diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c
new file mode 100644
index 0000000000..55f1056320
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_atomics.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+#include "bpf_arena_common.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 10); /* number of pages */
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, 0x1ull << 32); /* start of mmap() region */
+#else
+ __ulong(map_extra, 0x1ull << 44); /* start of mmap() region */
+#endif
+} arena SEC(".maps");
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+bool skip_tests __attribute((__section__(".data"))) = false;
+#else
+bool skip_tests = true;
+#endif
+
+__u32 pid = 0;
+
+#undef __arena
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+#define __arena __attribute__((address_space(1)))
+#else
+#define __arena SEC(".addr_space.1")
+#endif
+
+__u64 __arena add64_value = 1;
+__u64 __arena add64_result = 0;
+__u32 __arena add32_value = 1;
+__u32 __arena add32_result = 0;
+__u64 __arena add_stack_value_copy = 0;
+__u64 __arena add_stack_result = 0;
+__u64 __arena add_noreturn_value = 1;
+
+SEC("raw_tp/sys_enter")
+int add(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __u64 add_stack_value = 1;
+
+ add64_result = __sync_fetch_and_add(&add64_value, 2);
+ add32_result = __sync_fetch_and_add(&add32_value, 2);
+ add_stack_result = __sync_fetch_and_add(&add_stack_value, 2);
+ add_stack_value_copy = add_stack_value;
+ __sync_fetch_and_add(&add_noreturn_value, 2);
+#endif
+
+ return 0;
+}
+
+__s64 __arena sub64_value = 1;
+__s64 __arena sub64_result = 0;
+__s32 __arena sub32_value = 1;
+__s32 __arena sub32_result = 0;
+__s64 __arena sub_stack_value_copy = 0;
+__s64 __arena sub_stack_result = 0;
+__s64 __arena sub_noreturn_value = 1;
+
+SEC("raw_tp/sys_enter")
+int sub(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __u64 sub_stack_value = 1;
+
+ sub64_result = __sync_fetch_and_sub(&sub64_value, 2);
+ sub32_result = __sync_fetch_and_sub(&sub32_value, 2);
+ sub_stack_result = __sync_fetch_and_sub(&sub_stack_value, 2);
+ sub_stack_value_copy = sub_stack_value;
+ __sync_fetch_and_sub(&sub_noreturn_value, 2);
+#endif
+
+ return 0;
+}
+
+__u64 __arena and64_value = (0x110ull << 32);
+__u32 __arena and32_value = 0x110;
+
+SEC("raw_tp/sys_enter")
+int and(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+
+ __sync_fetch_and_and(&and64_value, 0x011ull << 32);
+ __sync_fetch_and_and(&and32_value, 0x011);
+#endif
+
+ return 0;
+}
+
+__u32 __arena or32_value = 0x110;
+__u64 __arena or64_value = (0x110ull << 32);
+
+SEC("raw_tp/sys_enter")
+int or(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __sync_fetch_and_or(&or64_value, 0x011ull << 32);
+ __sync_fetch_and_or(&or32_value, 0x011);
+#endif
+
+ return 0;
+}
+
+__u64 __arena xor64_value = (0x110ull << 32);
+__u32 __arena xor32_value = 0x110;
+
+SEC("raw_tp/sys_enter")
+int xor(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __sync_fetch_and_xor(&xor64_value, 0x011ull << 32);
+ __sync_fetch_and_xor(&xor32_value, 0x011);
+#endif
+
+ return 0;
+}
+
+__u32 __arena cmpxchg32_value = 1;
+__u32 __arena cmpxchg32_result_fail = 0;
+__u32 __arena cmpxchg32_result_succeed = 0;
+__u64 __arena cmpxchg64_value = 1;
+__u64 __arena cmpxchg64_result_fail = 0;
+__u64 __arena cmpxchg64_result_succeed = 0;
+
+SEC("raw_tp/sys_enter")
+int cmpxchg(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ cmpxchg64_result_fail = __sync_val_compare_and_swap(&cmpxchg64_value, 0, 3);
+ cmpxchg64_result_succeed = __sync_val_compare_and_swap(&cmpxchg64_value, 1, 2);
+
+ cmpxchg32_result_fail = __sync_val_compare_and_swap(&cmpxchg32_value, 0, 3);
+ cmpxchg32_result_succeed = __sync_val_compare_and_swap(&cmpxchg32_value, 1, 2);
+#endif
+
+ return 0;
+}
+
+__u64 __arena xchg64_value = 1;
+__u64 __arena xchg64_result = 0;
+__u32 __arena xchg32_value = 1;
+__u32 __arena xchg32_result = 0;
+
+SEC("raw_tp/sys_enter")
+int xchg(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __u64 val64 = 2;
+ __u32 val32 = 2;
+
+ xchg64_result = __sync_lock_test_and_set(&xchg64_value, val64);
+ xchg32_result = __sync_lock_test_and_set(&xchg32_value, val32);
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c
index c0422c58ce..93bd0600eb 100644
--- a/tools/testing/selftests/bpf/progs/arena_list.c
+++ b/tools/testing/selftests/bpf/progs/arena_list.c
@@ -49,7 +49,7 @@ int arena_list_add(void *ctx)
list_head = &global_head;
- for (i = zero; i < cnt; cond_break, i++) {
+ for (i = zero; i < cnt && can_loop; i++) {
struct elem __arena *n = bpf_alloc(sizeof(*n));
test_val++;
diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
index a487f60b73..b7ddf8ec4e 100644
--- a/tools/testing/selftests/bpf/progs/bind4_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
@@ -12,6 +12,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bind_prog.h"
+
#define SERV4_IP 0xc0a801feU /* 192.168.1.254 */
#define SERV4_PORT 4040
#define SERV4_REWRITE_IP 0x7f000001U /* 127.0.0.1 */
@@ -118,23 +120,23 @@ int bind_v4_prog(struct bpf_sock_addr *ctx)
// u8 narrow loads:
user_ip4 = 0;
- user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[0] << 0;
- user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[1] << 8;
- user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[2] << 16;
- user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[3] << 24;
+ user_ip4 |= load_byte(ctx->user_ip4, 0, sizeof(user_ip4));
+ user_ip4 |= load_byte(ctx->user_ip4, 1, sizeof(user_ip4));
+ user_ip4 |= load_byte(ctx->user_ip4, 2, sizeof(user_ip4));
+ user_ip4 |= load_byte(ctx->user_ip4, 3, sizeof(user_ip4));
if (ctx->user_ip4 != user_ip4)
return 0;
user_port = 0;
- user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0;
- user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8;
+ user_port |= load_byte(ctx->user_port, 0, sizeof(user_port));
+ user_port |= load_byte(ctx->user_port, 1, sizeof(user_port));
if (ctx->user_port != user_port)
return 0;
// u16 narrow loads:
user_ip4 = 0;
- user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[0] << 0;
- user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[1] << 16;
+ user_ip4 |= load_word(ctx->user_ip4, 0, sizeof(user_ip4));
+ user_ip4 |= load_word(ctx->user_ip4, 1, sizeof(user_ip4));
if (ctx->user_ip4 != user_ip4)
return 0;
@@ -156,4 +158,10 @@ int bind_v4_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/bind4")
+int bind_v4_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
index d62cd9e9cf..501c3fc11d 100644
--- a/tools/testing/selftests/bpf/progs/bind6_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
@@ -12,6 +12,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bind_prog.h"
+
#define SERV6_IP_0 0xfaceb00c /* face:b00c:1234:5678::abcd */
#define SERV6_IP_1 0x12345678
#define SERV6_IP_2 0x00000000
@@ -129,25 +131,25 @@ int bind_v6_prog(struct bpf_sock_addr *ctx)
// u8 narrow loads:
for (i = 0; i < 4; i++) {
user_ip6 = 0;
- user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[0] << 0;
- user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[1] << 8;
- user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[2] << 16;
- user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[3] << 24;
+ user_ip6 |= load_byte(ctx->user_ip6[i], 0, sizeof(user_ip6));
+ user_ip6 |= load_byte(ctx->user_ip6[i], 1, sizeof(user_ip6));
+ user_ip6 |= load_byte(ctx->user_ip6[i], 2, sizeof(user_ip6));
+ user_ip6 |= load_byte(ctx->user_ip6[i], 3, sizeof(user_ip6));
if (ctx->user_ip6[i] != user_ip6)
return 0;
}
user_port = 0;
- user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0;
- user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8;
+ user_port |= load_byte(ctx->user_port, 0, sizeof(user_port));
+ user_port |= load_byte(ctx->user_port, 1, sizeof(user_port));
if (ctx->user_port != user_port)
return 0;
// u16 narrow loads:
for (i = 0; i < 4; i++) {
user_ip6 = 0;
- user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[0] << 0;
- user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[1] << 16;
+ user_ip6 |= load_word(ctx->user_ip6[i], 0, sizeof(user_ip6));
+ user_ip6 |= load_word(ctx->user_ip6[i], 1, sizeof(user_ip6));
if (ctx->user_ip6[i] != user_ip6)
return 0;
}
@@ -173,4 +175,10 @@ int bind_v6_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/bind6")
+int bind_v6_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bind_prog.h b/tools/testing/selftests/bpf/progs/bind_prog.h
new file mode 100644
index 0000000000..e830caa940
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bind_prog.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BIND_PROG_H__
+#define __BIND_PROG_H__
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define load_byte(src, b, s) \
+ (((volatile __u8 *)&(src))[b] << 8 * b)
+#define load_word(src, w, s) \
+ (((volatile __u16 *)&(src))[w] << 16 * w)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define load_byte(src, b, s) \
+ (((volatile __u8 *)&(src))[(b) + (sizeof(src) - (s))] << 8 * ((s) - (b) - 1))
+#define load_word(src, w, s) \
+ (((volatile __u16 *)&(src))[w] << 16 * (((s) / 2) - (w) - 1))
+#else
+# error "Fix your compiler's __BYTE_ORDER__?!"
+#endif
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
new file mode 100644
index 0000000000..1654a530aa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Highlights:
+ * 1. The major difference between this bpf program and tcp_cubic.c
+ * is that this bpf program relies on `cong_control` rather than
+ * `cong_avoid` in the struct tcp_congestion_ops.
+ * 2. Logic such as tcp_cwnd_reduction, tcp_cong_avoid, and
+ * tcp_update_pacing_rate is bypassed when `cong_control` is
+ * defined, so moving these logic to `cong_control`.
+ * 3. WARNING: This bpf program is NOT the same as tcp_cubic.c.
+ * The main purpose is to show use cases of the arguments in
+ * `cong_control`. For simplicity's sake, it reuses tcp cubic's
+ * kernel functions.
+ */
+
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define USEC_PER_SEC 1000000UL
+#define TCP_PACING_SS_RATIO (200)
+#define TCP_PACING_CA_RATIO (120)
+#define TCP_REORDERING (12)
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define after(seq2, seq1) before(seq1, seq2)
+
+extern void cubictcp_init(struct sock *sk) __ksym;
+extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
+extern __u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym;
+extern void cubictcp_state(struct sock *sk, __u8 new_state) __ksym;
+extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
+extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym;
+extern void cubictcp_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
+
+static bool before(__u32 seq1, __u32 seq2)
+{
+ return (__s32)(seq1-seq2) < 0;
+}
+
+static __u64 div64_u64(__u64 dividend, __u64 divisor)
+{
+ return dividend / divisor;
+}
+
+static void tcp_update_pacing_rate(struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ __u64 rate;
+
+ /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
+ rate = (__u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
+
+ /* current rate is (cwnd * mss) / srtt
+ * In Slow Start [1], set sk_pacing_rate to 200 % the current rate.
+ * In Congestion Avoidance phase, set it to 120 % the current rate.
+ *
+ * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh)
+ * If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
+ * end of slow start and should slow down.
+ */
+ if (tp->snd_cwnd < tp->snd_ssthresh / 2)
+ rate *= TCP_PACING_SS_RATIO;
+ else
+ rate *= TCP_PACING_CA_RATIO;
+
+ rate *= max(tp->snd_cwnd, tp->packets_out);
+
+ if (tp->srtt_us)
+ rate = div64_u64(rate, (__u64)tp->srtt_us);
+
+ sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate);
+}
+
+static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
+ int newly_lost, int flag)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int sndcnt = 0;
+ __u32 pkts_in_flight = tp->packets_out - (tp->sacked_out + tp->lost_out) + tp->retrans_out;
+ int delta = tp->snd_ssthresh - pkts_in_flight;
+
+ if (newly_acked_sacked <= 0 || !tp->prior_cwnd)
+ return;
+
+ __u32 prr_delivered = tp->prr_delivered + newly_acked_sacked;
+
+ if (delta < 0) {
+ __u64 dividend =
+ (__u64)tp->snd_ssthresh * prr_delivered + tp->prior_cwnd - 1;
+ sndcnt = (__u32)div64_u64(dividend, (__u64)tp->prior_cwnd) - tp->prr_out;
+ } else {
+ sndcnt = max(prr_delivered - tp->prr_out, newly_acked_sacked);
+ if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost)
+ sndcnt++;
+ sndcnt = min(delta, sndcnt);
+ }
+ /* Force a fast retransmit upon entering fast recovery */
+ sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
+ tp->snd_cwnd = pkts_in_flight + sndcnt;
+}
+
+/* Decide wheather to run the increase function of congestion control. */
+static bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
+{
+ if (tcp_sk(sk)->reordering > TCP_REORDERING)
+ return flag & FLAG_FORWARD_PROGRESS;
+
+ return flag & FLAG_DATA_ACKED;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_init, struct sock *sk)
+{
+ cubictcp_init(sk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
+{
+ cubictcp_cwnd_event(sk, event);
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_cong_control, struct sock *sk, __u32 ack, int flag,
+ const struct rate_sample *rs)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (((1<<TCP_CA_CWR) | (1<<TCP_CA_Recovery)) &
+ (1 << inet_csk(sk)->icsk_ca_state)) {
+ /* Reduce cwnd if state mandates */
+ tcp_cwnd_reduction(sk, rs->acked_sacked, rs->losses, flag);
+
+ if (!before(tp->snd_una, tp->high_seq)) {
+ /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
+ if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
+ inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) {
+ tp->snd_cwnd = tp->snd_ssthresh;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
+ }
+ }
+ } else if (tcp_may_raise_cwnd(sk, flag)) {
+ /* Advance cwnd if state allows */
+ cubictcp_cong_avoid(sk, ack, rs->acked_sacked);
+ tp->snd_cwnd_stamp = tcp_jiffies32;
+ }
+
+ tcp_update_pacing_rate(sk);
+}
+
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_cubic_recalc_ssthresh, struct sock *sk)
+{
+ return cubictcp_recalc_ssthresh(sk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_state, struct sock *sk, __u8 new_state)
+{
+ cubictcp_state(sk, new_state);
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample)
+{
+ cubictcp_acked(sk, sample);
+}
+
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_cubic_undo_cwnd, struct sock *sk)
+{
+ return tcp_reno_undo_cwnd(sk);
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops cc_cubic = {
+ .init = (void *)bpf_cubic_init,
+ .ssthresh = (void *)bpf_cubic_recalc_ssthresh,
+ .cong_control = (void *)bpf_cubic_cong_control,
+ .set_state = (void *)bpf_cubic_state,
+ .undo_cwnd = (void *)bpf_cubic_undo_cwnd,
+ .cwnd_event = (void *)bpf_cubic_cwnd_event,
+ .pkts_acked = (void *)bpf_cubic_acked,
+ .name = "bpf_cc_cubic",
+};
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index c997e3e3d3..d665b8a15c 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -14,14 +14,22 @@
* "ca->ack_cnt / delta" operation.
*/
-#include <linux/bpf.h>
-#include <linux/stddef.h>
-#include <linux/tcp.h>
-#include "bpf_tcp_helpers.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define max(a, b) ((a) > (b) ? (a) : (b))
+static bool before(__u32 seq1, __u32 seq2)
+{
+ return (__s32)(seq1-seq2) < 0;
+}
+#define after(seq2, seq1) before(seq1, seq2)
+
+extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
+extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
* max_cwnd = snd_cwnd * beta
@@ -70,7 +78,7 @@ static const __u64 cube_factor = (__u64)(1ull << (10+3*BICTCP_HZ))
/ (bic_scale * 10);
/* BIC TCP Parameters */
-struct bictcp {
+struct bpf_bictcp {
__u32 cnt; /* increase cwnd by 1 after ACKs */
__u32 last_max_cwnd; /* last maximum snd_cwnd */
__u32 last_cwnd; /* the last snd_cwnd */
@@ -91,7 +99,7 @@ struct bictcp {
__u32 curr_rtt; /* the minimum rtt of current round */
};
-static inline void bictcp_reset(struct bictcp *ca)
+static void bictcp_reset(struct bpf_bictcp *ca)
{
ca->cnt = 0;
ca->last_max_cwnd = 0;
@@ -112,7 +120,7 @@ extern unsigned long CONFIG_HZ __kconfig;
#define USEC_PER_SEC 1000000UL
#define USEC_PER_JIFFY (USEC_PER_SEC / HZ)
-static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor)
+static __u64 div64_u64(__u64 dividend, __u64 divisor)
{
return dividend / divisor;
}
@@ -120,7 +128,7 @@ static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor)
#define div64_ul div64_u64
#define BITS_PER_U64 (sizeof(__u64) * 8)
-static __always_inline int fls64(__u64 x)
+static int fls64(__u64 x)
{
int num = BITS_PER_U64 - 1;
@@ -153,15 +161,15 @@ static __always_inline int fls64(__u64 x)
return num + 1;
}
-static __always_inline __u32 bictcp_clock_us(const struct sock *sk)
+static __u32 bictcp_clock_us(const struct sock *sk)
{
return tcp_sk(sk)->tcp_mstamp;
}
-static __always_inline void bictcp_hystart_reset(struct sock *sk)
+static void bictcp_hystart_reset(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
ca->round_start = ca->last_ack = bictcp_clock_us(sk);
ca->end_seq = tp->snd_nxt;
@@ -169,11 +177,10 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk)
ca->sample_cnt = 0;
}
-/* "struct_ops/" prefix is a requirement */
-SEC("struct_ops/bpf_cubic_init")
+SEC("struct_ops")
void BPF_PROG(bpf_cubic_init, struct sock *sk)
{
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
bictcp_reset(ca);
@@ -184,12 +191,11 @@ void BPF_PROG(bpf_cubic_init, struct sock *sk)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
-/* "struct_ops" prefix is a requirement */
-SEC("struct_ops/bpf_cubic_cwnd_event")
+SEC("struct_ops")
void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_TX_START) {
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 now = tcp_jiffies32;
__s32 delta;
@@ -230,7 +236,7 @@ static const __u8 v[] = {
* Newton-Raphson iteration.
* Avg err ~= 0.195%
*/
-static __always_inline __u32 cubic_root(__u64 a)
+static __u32 cubic_root(__u64 a)
{
__u32 x, b, shift;
@@ -263,8 +269,7 @@ static __always_inline __u32 cubic_root(__u64 a)
/*
* Compute congestion window to use.
*/
-static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd,
- __u32 acked)
+static void bictcp_update(struct bpf_bictcp *ca, __u32 cwnd, __u32 acked)
{
__u32 delta, bic_target, max_cnt;
__u64 offs, t;
@@ -377,11 +382,11 @@ tcp_friendliness:
ca->cnt = max(ca->cnt, 2U);
}
-/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */
-void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
if (!tcp_is_cwnd_limited(sk))
return;
@@ -397,10 +402,11 @@ void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acke
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
-__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_cubic_recalc_ssthresh, struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
ca->epoch_start = 0; /* end of epoch */
@@ -414,7 +420,8 @@ __u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
}
-void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_state, struct sock *sk, __u8 new_state)
{
if (new_state == TCP_CA_Loss) {
bictcp_reset(inet_csk_ca(sk));
@@ -433,7 +440,7 @@ void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
* We apply another 100% factor because @rate is doubled at this point.
* We cap the cushion to 1ms.
*/
-static __always_inline __u32 hystart_ack_delay(struct sock *sk)
+static __u32 hystart_ack_delay(struct sock *sk)
{
unsigned long rate;
@@ -444,10 +451,10 @@ static __always_inline __u32 hystart_ack_delay(struct sock *sk)
div64_ul((__u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
}
-static __always_inline void hystart_update(struct sock *sk, __u32 delay)
+static void hystart_update(struct sock *sk, __u32 delay)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 threshold;
if (hystart_detect & HYSTART_ACK_TRAIN) {
@@ -492,11 +499,11 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay)
int bpf_cubic_acked_called = 0;
-void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
- const struct ack_sample *sample)
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample)
{
const struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 delay;
bpf_cubic_acked_called = 1;
@@ -524,7 +531,8 @@ void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
-__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk)
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_cubic_undo_cwnd, struct sock *sk)
{
return tcp_reno_undo_cwnd(sk);
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 460682759a..3c9ffe3403 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -6,15 +6,23 @@
* the kernel BPF logic.
*/
-#include <stddef.h>
-#include <linux/bpf.h>
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/tcp.h>
-#include <errno.h>
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include "bpf_tcp_helpers.h"
+
+#ifndef EBUSY
+#define EBUSY 16
+#endif
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define min_not_zero(x, y) ({ \
+ typeof(x) __x = (x); \
+ typeof(y) __y = (y); \
+ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
+static bool before(__u32 seq1, __u32 seq2)
+{
+ return (__s32)(seq1-seq2) < 0;
+}
char _license[] SEC("license") = "GPL";
@@ -35,7 +43,7 @@ struct {
#define DCTCP_MAX_ALPHA 1024U
-struct dctcp {
+struct bpf_dctcp {
__u32 old_delivered;
__u32 old_delivered_ce;
__u32 prior_rcv_nxt;
@@ -48,8 +56,7 @@ struct dctcp {
static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */
static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA;
-static __always_inline void dctcp_reset(const struct tcp_sock *tp,
- struct dctcp *ca)
+static void dctcp_reset(const struct tcp_sock *tp, struct bpf_dctcp *ca)
{
ca->next_seq = tp->snd_nxt;
@@ -57,11 +64,11 @@ static __always_inline void dctcp_reset(const struct tcp_sock *tp,
ca->old_delivered_ce = tp->delivered_ce;
}
-SEC("struct_ops/dctcp_init")
+SEC("struct_ops")
void BPF_PROG(dctcp_init, struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- struct dctcp *ca = inet_csk_ca(sk);
+ struct bpf_dctcp *ca = inet_csk_ca(sk);
int *stg;
if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) {
@@ -104,21 +111,21 @@ void BPF_PROG(dctcp_init, struct sock *sk)
dctcp_reset(tp, ca);
}
-SEC("struct_ops/dctcp_ssthresh")
+SEC("struct_ops")
__u32 BPF_PROG(dctcp_ssthresh, struct sock *sk)
{
- struct dctcp *ca = inet_csk_ca(sk);
+ struct bpf_dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
ca->loss_cwnd = tp->snd_cwnd;
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
}
-SEC("struct_ops/dctcp_update_alpha")
+SEC("struct_ops")
void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags)
{
const struct tcp_sock *tp = tcp_sk(sk);
- struct dctcp *ca = inet_csk_ca(sk);
+ struct bpf_dctcp *ca = inet_csk_ca(sk);
/* Expired RTT */
if (!before(tp->snd_una, ca->next_seq)) {
@@ -144,16 +151,16 @@ void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags)
}
}
-static __always_inline void dctcp_react_to_loss(struct sock *sk)
+static void dctcp_react_to_loss(struct sock *sk)
{
- struct dctcp *ca = inet_csk_ca(sk);
+ struct bpf_dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
ca->loss_cwnd = tp->snd_cwnd;
tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
}
-SEC("struct_ops/dctcp_state")
+SEC("struct_ops")
void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state)
{
if (new_state == TCP_CA_Recovery &&
@@ -164,7 +171,7 @@ void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state)
*/
}
-static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
+static void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -179,9 +186,8 @@ static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
* S: 0 <- last pkt was non-CE
* 1 <- last pkt was CE
*/
-static __always_inline
-void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
- __u32 *prior_rcv_nxt, __u32 *ce_state)
+static void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
+ __u32 *prior_rcv_nxt, __u32 *ce_state)
{
__u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;
@@ -201,10 +207,10 @@ void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
dctcp_ece_ack_cwr(sk, new_ce_state);
}
-SEC("struct_ops/dctcp_cwnd_event")
+SEC("struct_ops")
void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
{
- struct dctcp *ca = inet_csk_ca(sk);
+ struct bpf_dctcp *ca = inet_csk_ca(sk);
switch (ev) {
case CA_EVENT_ECN_IS_CE:
@@ -220,17 +226,17 @@ void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
}
}
-SEC("struct_ops/dctcp_cwnd_undo")
+SEC("struct_ops")
__u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk)
{
- const struct dctcp *ca = inet_csk_ca(sk);
+ const struct bpf_dctcp *ca = inet_csk_ca(sk);
return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
}
extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
-SEC("struct_ops/dctcp_reno_cong_avoid")
+SEC("struct_ops")
void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{
tcp_reno_cong_avoid(sk, ack, acked);
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
index d836f7c372..c91763f248 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
@@ -1,19 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
-#include <stddef.h>
-#include <linux/bpf.h>
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/tcp.h>
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL";
const char cubic[] = "cubic";
-void BPF_STRUCT_OPS(dctcp_nouse_release, struct sock *sk)
+SEC("struct_ops")
+void BPF_PROG(dctcp_nouse_release, struct sock *sk)
{
bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
(void *)cubic, sizeof(cubic));
diff --git a/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c b/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c
index 2ecd833dcd..8a7a4c1b54 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c
+++ b/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c
@@ -1,14 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
-#include <linux/types.h>
-#include <bpf/bpf_helpers.h>
+#include "bpf_tracing_net.h"
#include <bpf/bpf_tracing.h>
-#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "X";
-void BPF_STRUCT_OPS(nogpltcp_init, struct sock *sk)
+SEC("struct_ops")
+void BPF_PROG(nogpltcp_init, struct sock *sk)
{
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 7001965d1c..59843b430f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -2,6 +2,9 @@
#ifndef __BPF_TRACING_NET_H__
#define __BPF_TRACING_NET_H__
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+
#define AF_INET 2
#define AF_INET6 10
@@ -22,6 +25,7 @@
#define IP_TOS 1
+#define SOL_IPV6 41
#define IPV6_TCLASS 67
#define IPV6_AUTOFLOWLABEL 70
@@ -46,6 +50,13 @@
#define TCP_CA_NAME_MAX 16
#define TCP_NAGLE_OFF 1
+#define TCP_ECN_OK 1
+#define TCP_ECN_QUEUE_CWR 2
+#define TCP_ECN_DEMAND_CWR 4
+#define TCP_ECN_SEEN 8
+
+#define TCP_CONG_NEEDS_ECN 0x2
+
#define ICSK_TIME_RETRANS 1
#define ICSK_TIME_PROBE0 3
#define ICSK_TIME_LOSS_PROBE 5
@@ -80,6 +91,14 @@
#define TCP_INFINITE_SSTHRESH 0x7fffffff
#define TCP_PINGPONG_THRESH 3
+#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */
+#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
+#define FLAG_DATA_SACKED 0x20 /* New SACK. */
+#define FLAG_SND_UNA_ADVANCED \
+ 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
+#define FLAG_ACKED (FLAG_DATA_ACKED | FLAG_SYN_ACKED)
+#define FLAG_FORWARD_PROGRESS (FLAG_ACKED | FLAG_DATA_SACKED)
+
#define fib_nh_dev nh_common.nhc_dev
#define fib_nh_gw_family nh_common.nhc_gw_family
#define fib_nh_gw6 nh_common.nhc_gw.ipv6
@@ -119,4 +138,37 @@
#define tw_v6_daddr __tw_common.skc_v6_daddr
#define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr
+#define tcp_jiffies32 ((__u32)bpf_jiffies64())
+
+static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
+{
+ return (struct inet_connection_sock *)sk;
+}
+
+static inline void *inet_csk_ca(const struct sock *sk)
+{
+ return (void *)inet_csk(sk)->icsk_ca_priv;
+}
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+ return (struct tcp_sock *)sk;
+}
+
+static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
+{
+ return tp->snd_cwnd < tp->snd_ssthresh;
+}
+
+static inline bool tcp_is_cwnd_limited(const struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ /* If in slow start, ensure cwnd grows to twice what was ACKed. */
+ if (tcp_in_slow_start(tp))
+ return tp->snd_cwnd < 2 * tp->max_packets_out;
+
+ return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
+}
+
#endif
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
index ba97165bdb..a657651eba 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
@@ -14,9 +14,9 @@ typedef int *ptr_arr_t[6];
typedef int *ptr_multiarr_t[7][8][9][10];
-typedef int * (*fn_ptr_arr_t[11])();
+typedef int * (*fn_ptr_arr_t[11])(void);
-typedef int * (*fn_ptr_multiarr_t[12][13])();
+typedef int * (*fn_ptr_multiarr_t[12][13])(void);
struct root_struct {
arr_t _1;
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index ad21ee8c7e..29d01fff32 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -100,7 +100,7 @@ typedef void (*printf_fn_t)(const char *, ...);
* `int -> char *` function and returns pointer to a char. Equivalent:
* typedef char * (*fn_input_t)(int);
* typedef char * (*fn_output_outer_t)(fn_input_t);
- * typedef const fn_output_outer_t (* fn_output_inner_t)();
+ * typedef const fn_output_outer_t (* fn_output_inner_t)(void);
* typedef const fn_output_inner_t fn_ptr_arr2_t[5];
*/
/* ----- START-EXPECTED-OUTPUT ----- */
@@ -127,7 +127,7 @@ typedef void (* (*signal_t)(int, void (*)(int)))(int);
typedef char * (*fn_ptr_arr1_t[10])(int **);
-typedef char * (* (* const fn_ptr_arr2_t[5])())(char * (*)(int));
+typedef char * (* (* const fn_ptr_arr2_t[5])(void))(char * (*)(int));
struct struct_w_typedefs {
int_t a;
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
index 22914a70db..73ba32e9a6 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
@@ -13,7 +13,7 @@ struct __cgrps_kfunc_map_value {
struct cgroup __kptr * cgrp;
};
-struct hash_map {
+struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, int);
__type(value, struct __cgrps_kfunc_map_value);
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index 7ef49ec048..9e9ebf27b8 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -14,8 +14,6 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-#include "bpf_tcp_helpers.h"
-
#define SRC_REWRITE_IP4 0x7f000004U
#define DST_REWRITE_IP4 0x7f000001U
#define DST_REWRITE_PORT4 4444
@@ -32,6 +30,10 @@
#define IFNAMSIZ 16
#endif
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
__attribute__ ((noinline)) __weak
int do_bind(struct bpf_sock_addr *ctx)
{
@@ -197,4 +199,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
return do_bind(ctx) ? 1 : 0;
}
+SEC("cgroup/connect4")
+int connect_v4_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c
index 40266d2c73..e98573b00d 100644
--- a/tools/testing/selftests/bpf/progs/connect6_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect6_prog.c
@@ -90,4 +90,10 @@ int connect_v6_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/connect6")
+int connect_v6_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect_unix_prog.c b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
index 2ef0e0c46d..ba60adadb3 100644
--- a/tools/testing/selftests/bpf/progs/connect_unix_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
@@ -36,4 +36,10 @@ int connect_unix_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/connect_unix")
+int connect_unix_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index c705d8112a..b979e91f55 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -9,7 +9,7 @@
int err;
-#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+#define private(name) SEC(".bss." #name) __attribute__((aligned(8)))
private(MASK) static struct bpf_cpumask __kptr * global_mask;
struct __cpumask_map_value {
diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c
index a9bf6ea336..a988d2823b 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_failure.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c
@@ -61,11 +61,8 @@ SEC("tp_btf/task_newtask")
__failure __msg("bpf_cpumask_set_cpu args#1 expected pointer to STRUCT bpf_cpumask")
int BPF_PROG(test_mutate_cpumask, struct task_struct *task, u64 clone_flags)
{
- struct bpf_cpumask *cpumask;
-
/* Can't set the CPU of a non-struct bpf_cpumask. */
bpf_cpumask_set_cpu(0, (struct bpf_cpumask *)task->cpus_ptr);
- __sink(cpumask);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/crypto_basic.c b/tools/testing/selftests/bpf/progs/crypto_basic.c
new file mode 100644
index 0000000000..8cf7168b42
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/crypto_basic.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include "crypto_common.h"
+
+int status;
+SEC("syscall")
+int crypto_release(void *ctx)
+{
+ struct bpf_crypto_params params = {
+ .type = "skcipher",
+ .algo = "ecb(aes)",
+ .key_len = 16,
+ };
+
+ struct bpf_crypto_ctx *cctx;
+ int err = 0;
+
+ status = 0;
+
+ cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
+
+ if (!cctx) {
+ status = err;
+ return 0;
+ }
+
+ bpf_crypto_ctx_release(cctx);
+
+ return 0;
+}
+
+SEC("syscall")
+__failure __msg("Unreleased reference")
+int crypto_acquire(void *ctx)
+{
+ struct bpf_crypto_params params = {
+ .type = "skcipher",
+ .algo = "ecb(aes)",
+ .key_len = 16,
+ };
+ struct bpf_crypto_ctx *cctx;
+ int err = 0;
+
+ status = 0;
+
+ cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
+
+ if (!cctx) {
+ status = err;
+ return 0;
+ }
+
+ cctx = bpf_crypto_ctx_acquire(cctx);
+ if (!cctx)
+ return -EINVAL;
+
+ bpf_crypto_ctx_release(cctx);
+
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/crypto_bench.c b/tools/testing/selftests/bpf/progs/crypto_bench.c
new file mode 100644
index 0000000000..e61fe08822
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/crypto_bench.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include "crypto_common.h"
+
+const volatile unsigned int len = 16;
+char cipher[128] = {};
+u32 key_len, authsize;
+char dst[256] = {};
+u8 key[256] = {};
+long hits = 0;
+int status;
+
+SEC("syscall")
+int crypto_setup(void *args)
+{
+ struct bpf_crypto_ctx *cctx;
+ struct bpf_crypto_params params = {
+ .type = "skcipher",
+ .key_len = key_len,
+ .authsize = authsize,
+ };
+ int err = 0;
+
+ status = 0;
+
+ if (!cipher[0] || !key_len || key_len > 256) {
+ status = -EINVAL;
+ return 0;
+ }
+
+ __builtin_memcpy(&params.algo, cipher, sizeof(cipher));
+ __builtin_memcpy(&params.key, key, sizeof(key));
+ cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
+
+ if (!cctx) {
+ status = err;
+ return 0;
+ }
+
+ err = crypto_ctx_insert(cctx);
+ if (err && err != -EEXIST)
+ status = err;
+
+ return 0;
+}
+
+SEC("tc")
+int crypto_encrypt(struct __sk_buff *skb)
+{
+ struct __crypto_ctx_value *v;
+ struct bpf_crypto_ctx *ctx;
+ struct bpf_dynptr psrc, pdst, iv;
+
+ v = crypto_ctx_value_lookup();
+ if (!v) {
+ status = -ENOENT;
+ return 0;
+ }
+
+ ctx = v->ctx;
+ if (!ctx) {
+ status = -ENOENT;
+ return 0;
+ }
+
+ bpf_dynptr_from_skb(skb, 0, &psrc);
+ bpf_dynptr_from_mem(dst, len, 0, &pdst);
+ bpf_dynptr_from_mem(dst, 0, 0, &iv);
+
+ status = bpf_crypto_encrypt(ctx, &psrc, &pdst, &iv);
+ __sync_add_and_fetch(&hits, 1);
+
+ return 0;
+}
+
+SEC("tc")
+int crypto_decrypt(struct __sk_buff *skb)
+{
+ struct bpf_dynptr psrc, pdst, iv;
+ struct __crypto_ctx_value *v;
+ struct bpf_crypto_ctx *ctx;
+
+ v = crypto_ctx_value_lookup();
+ if (!v)
+ return -ENOENT;
+
+ ctx = v->ctx;
+ if (!ctx)
+ return -ENOENT;
+
+ bpf_dynptr_from_skb(skb, 0, &psrc);
+ bpf_dynptr_from_mem(dst, len, 0, &pdst);
+ bpf_dynptr_from_mem(dst, 0, 0, &iv);
+
+ status = bpf_crypto_decrypt(ctx, &psrc, &pdst, &iv);
+ __sync_add_and_fetch(&hits, 1);
+
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/crypto_common.h b/tools/testing/selftests/bpf/progs/crypto_common.h
new file mode 100644
index 0000000000..57dd7a68a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/crypto_common.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _CRYPTO_COMMON_H
+#define _CRYPTO_COMMON_H
+
+#include "errno.h"
+#include <stdbool.h>
+
+struct bpf_crypto_ctx *bpf_crypto_ctx_create(const struct bpf_crypto_params *params,
+ u32 params__sz, int *err) __ksym;
+struct bpf_crypto_ctx *bpf_crypto_ctx_acquire(struct bpf_crypto_ctx *ctx) __ksym;
+void bpf_crypto_ctx_release(struct bpf_crypto_ctx *ctx) __ksym;
+int bpf_crypto_encrypt(struct bpf_crypto_ctx *ctx, const struct bpf_dynptr *src,
+ const struct bpf_dynptr *dst, const struct bpf_dynptr *iv) __ksym;
+int bpf_crypto_decrypt(struct bpf_crypto_ctx *ctx, const struct bpf_dynptr *src,
+ const struct bpf_dynptr *dst, const struct bpf_dynptr *iv) __ksym;
+
+struct __crypto_ctx_value {
+ struct bpf_crypto_ctx __kptr * ctx;
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct __crypto_ctx_value);
+ __uint(max_entries, 1);
+} __crypto_ctx_map SEC(".maps");
+
+static inline struct __crypto_ctx_value *crypto_ctx_value_lookup(void)
+{
+ u32 key = 0;
+
+ return bpf_map_lookup_elem(&__crypto_ctx_map, &key);
+}
+
+static inline int crypto_ctx_insert(struct bpf_crypto_ctx *ctx)
+{
+ struct __crypto_ctx_value local, *v;
+ struct bpf_crypto_ctx *old;
+ u32 key = 0;
+ int err;
+
+ local.ctx = NULL;
+ err = bpf_map_update_elem(&__crypto_ctx_map, &key, &local, 0);
+ if (err) {
+ bpf_crypto_ctx_release(ctx);
+ return err;
+ }
+
+ v = bpf_map_lookup_elem(&__crypto_ctx_map, &key);
+ if (!v) {
+ bpf_crypto_ctx_release(ctx);
+ return -ENOENT;
+ }
+
+ old = bpf_kptr_xchg(&v->ctx, ctx);
+ if (old) {
+ bpf_crypto_ctx_release(old);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+#endif /* _CRYPTO_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c
new file mode 100644
index 0000000000..1be0a3fa5e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include "crypto_common.h"
+
+unsigned char key[256] = {};
+u16 udp_test_port = 7777;
+u32 authsize, key_len;
+char algo[128] = {};
+char dst[16] = {};
+int status;
+
+static int skb_dynptr_validate(struct __sk_buff *skb, struct bpf_dynptr *psrc)
+{
+ struct ipv6hdr ip6h;
+ struct udphdr udph;
+ u32 offset;
+
+ if (skb->protocol != __bpf_constant_htons(ETH_P_IPV6))
+ return -1;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &ip6h, sizeof(ip6h)))
+ return -1;
+
+ if (ip6h.nexthdr != IPPROTO_UDP)
+ return -1;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(ip6h), &udph, sizeof(udph)))
+ return -1;
+
+ if (udph.dest != __bpf_htons(udp_test_port))
+ return -1;
+
+ offset = ETH_HLEN + sizeof(ip6h) + sizeof(udph);
+ if (skb->len < offset + 16)
+ return -1;
+
+ /* let's make sure that 16 bytes of payload are in the linear part of skb */
+ bpf_skb_pull_data(skb, offset + 16);
+ bpf_dynptr_from_skb(skb, 0, psrc);
+ bpf_dynptr_adjust(psrc, offset, offset + 16);
+
+ return 0;
+}
+
+SEC("syscall")
+int skb_crypto_setup(void *ctx)
+{
+ struct bpf_crypto_params params = {
+ .type = "skcipher",
+ .key_len = key_len,
+ .authsize = authsize,
+ };
+ struct bpf_crypto_ctx *cctx;
+ int err = 0;
+
+ status = 0;
+
+ if (key_len > 256) {
+ status = -EINVAL;
+ return 0;
+ }
+
+ __builtin_memcpy(&params.algo, algo, sizeof(algo));
+ __builtin_memcpy(&params.key, key, sizeof(key));
+ cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
+
+ if (!cctx) {
+ status = err;
+ return 0;
+ }
+
+ err = crypto_ctx_insert(cctx);
+ if (err && err != -EEXIST)
+ status = err;
+
+ return 0;
+}
+
+SEC("tc")
+int decrypt_sanity(struct __sk_buff *skb)
+{
+ struct __crypto_ctx_value *v;
+ struct bpf_crypto_ctx *ctx;
+ struct bpf_dynptr psrc, pdst, iv;
+ int err;
+
+ err = skb_dynptr_validate(skb, &psrc);
+ if (err < 0) {
+ status = err;
+ return TC_ACT_SHOT;
+ }
+
+ v = crypto_ctx_value_lookup();
+ if (!v) {
+ status = -ENOENT;
+ return TC_ACT_SHOT;
+ }
+
+ ctx = v->ctx;
+ if (!ctx) {
+ status = -ENOENT;
+ return TC_ACT_SHOT;
+ }
+
+ /* dst is a global variable to make testing part easier to check. In real
+ * production code, a percpu map should be used to store the result.
+ */
+ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
+ /* iv dynptr has to be initialized with 0 size, but proper memory region
+ * has to be provided anyway
+ */
+ bpf_dynptr_from_mem(dst, 0, 0, &iv);
+
+ status = bpf_crypto_decrypt(ctx, &psrc, &pdst, &iv);
+
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int encrypt_sanity(struct __sk_buff *skb)
+{
+ struct __crypto_ctx_value *v;
+ struct bpf_crypto_ctx *ctx;
+ struct bpf_dynptr psrc, pdst, iv;
+ int err;
+
+ status = 0;
+
+ err = skb_dynptr_validate(skb, &psrc);
+ if (err < 0) {
+ status = err;
+ return TC_ACT_SHOT;
+ }
+
+ v = crypto_ctx_value_lookup();
+ if (!v) {
+ status = -ENOENT;
+ return TC_ACT_SHOT;
+ }
+
+ ctx = v->ctx;
+ if (!ctx) {
+ status = -ENOENT;
+ return TC_ACT_SHOT;
+ }
+
+ /* dst is a global variable to make testing part easier to check. In real
+ * production code, a percpu map should be used to store the result.
+ */
+ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
+ /* iv dynptr has to be initialized with 0 size, but proper memory region
+ * has to be provided anyway
+ */
+ bpf_dynptr_from_mem(dst, 0, 0, &iv);
+
+ status = bpf_crypto_encrypt(ctx, &psrc, &pdst, &iv);
+
+ return TC_ACT_SHOT;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 7ce7e827d5..66a60bfb58 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -80,7 +80,7 @@ SEC("?raw_tp")
__failure __msg("Unreleased reference id=2")
int ringbuf_missing_release1(void *ctx)
{
- struct bpf_dynptr ptr;
+ struct bpf_dynptr ptr = {};
bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
@@ -1385,7 +1385,7 @@ SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
int dynptr_adjust_invalid(void *ctx)
{
- struct bpf_dynptr ptr;
+ struct bpf_dynptr ptr = {};
/* this should fail */
bpf_dynptr_adjust(&ptr, 1, 2);
@@ -1398,7 +1398,7 @@ SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
int dynptr_is_null_invalid(void *ctx)
{
- struct bpf_dynptr ptr;
+ struct bpf_dynptr ptr = {};
/* this should fail */
bpf_dynptr_is_null(&ptr);
@@ -1411,7 +1411,7 @@ SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
int dynptr_is_rdonly_invalid(void *ctx)
{
- struct bpf_dynptr ptr;
+ struct bpf_dynptr ptr = {};
/* this should fail */
bpf_dynptr_is_rdonly(&ptr);
@@ -1424,7 +1424,7 @@ SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
int dynptr_size_invalid(void *ctx)
{
- struct bpf_dynptr ptr;
+ struct bpf_dynptr ptr = {};
/* this should fail */
bpf_dynptr_size(&ptr);
@@ -1437,7 +1437,7 @@ SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
int clone_invalid1(void *ctx)
{
- struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr1 = {};
struct bpf_dynptr ptr2;
/* this should fail */
diff --git a/tools/testing/selftests/bpf/progs/fib_lookup.c b/tools/testing/selftests/bpf/progs/fib_lookup.c
index c4514dd58c..7b5dd2214f 100644
--- a/tools/testing/selftests/bpf/progs/fib_lookup.c
+++ b/tools/testing/selftests/bpf/progs/fib_lookup.c
@@ -3,8 +3,8 @@
#include <linux/types.h>
#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_tracing_net.h"
struct bpf_fib_lookup fib_params = {};
int fib_lookup_ret = 0;
diff --git a/tools/testing/selftests/bpf/progs/for_each_multi_maps.c b/tools/testing/selftests/bpf/progs/for_each_multi_maps.c
new file mode 100644
index 0000000000..ff0bed7d44
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_multi_maps.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} arraymap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 5);
+ __type(key, __u32);
+ __type(value, __u64);
+} hashmap SEC(".maps");
+
+struct callback_ctx {
+ int output;
+};
+
+u32 data_output = 0;
+int use_array = 0;
+
+static __u64
+check_map_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ data->output += *val;
+ return 0;
+}
+
+SEC("tc")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ struct callback_ctx data;
+
+ data.output = 0;
+ if (use_array)
+ bpf_for_each_map_elem(&arraymap, check_map_elem, &data, 0);
+ else
+ bpf_for_each_map_elem(&hashmap, check_map_elem, &data, 0);
+ data_output = data.output;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/getpeername4_prog.c b/tools/testing/selftests/bpf/progs/getpeername4_prog.c
new file mode 100644
index 0000000000..4c97208cd2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getpeername4_prog.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+#define REWRITE_ADDRESS_IP4 0xc0a801fe // 192.168.1.254
+#define REWRITE_ADDRESS_PORT4 4040
+
+SEC("cgroup/getpeername4")
+int getpeername_v4_prog(struct bpf_sock_addr *ctx)
+{
+ ctx->user_ip4 = bpf_htonl(REWRITE_ADDRESS_IP4);
+ ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT4);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getpeername6_prog.c b/tools/testing/selftests/bpf/progs/getpeername6_prog.c
new file mode 100644
index 0000000000..070e4d7f63
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getpeername6_prog.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+#define REWRITE_ADDRESS_IP6_0 0xfaceb00c
+#define REWRITE_ADDRESS_IP6_1 0x12345678
+#define REWRITE_ADDRESS_IP6_2 0x00000000
+#define REWRITE_ADDRESS_IP6_3 0x0000abcd
+
+#define REWRITE_ADDRESS_PORT6 6060
+
+SEC("cgroup/getpeername6")
+int getpeername_v6_prog(struct bpf_sock_addr *ctx)
+{
+ ctx->user_ip6[0] = bpf_htonl(REWRITE_ADDRESS_IP6_0);
+ ctx->user_ip6[1] = bpf_htonl(REWRITE_ADDRESS_IP6_1);
+ ctx->user_ip6[2] = bpf_htonl(REWRITE_ADDRESS_IP6_2);
+ ctx->user_ip6[3] = bpf_htonl(REWRITE_ADDRESS_IP6_3);
+ ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT6);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getsockname4_prog.c b/tools/testing/selftests/bpf/progs/getsockname4_prog.c
new file mode 100644
index 0000000000..e298487c63
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getsockname4_prog.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+#define REWRITE_ADDRESS_IP4 0xc0a801fe // 192.168.1.254
+#define REWRITE_ADDRESS_PORT4 4040
+
+SEC("cgroup/getsockname4")
+int getsockname_v4_prog(struct bpf_sock_addr *ctx)
+{
+ ctx->user_ip4 = bpf_htonl(REWRITE_ADDRESS_IP4);
+ ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT4);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getsockname6_prog.c b/tools/testing/selftests/bpf/progs/getsockname6_prog.c
new file mode 100644
index 0000000000..811d10cd55
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getsockname6_prog.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+#define REWRITE_ADDRESS_IP6_0 0xfaceb00c
+#define REWRITE_ADDRESS_IP6_1 0x12345678
+#define REWRITE_ADDRESS_IP6_2 0x00000000
+#define REWRITE_ADDRESS_IP6_3 0x0000abcd
+
+#define REWRITE_ADDRESS_PORT6 6060
+
+SEC("cgroup/getsockname6")
+int getsockname_v6_prog(struct bpf_sock_addr *ctx)
+{
+ ctx->user_ip6[0] = bpf_htonl(REWRITE_ADDRESS_IP6_0);
+ ctx->user_ip6[1] = bpf_htonl(REWRITE_ADDRESS_IP6_1);
+ ctx->user_ip6[2] = bpf_htonl(REWRITE_ADDRESS_IP6_2);
+ ctx->user_ip6[3] = bpf_htonl(REWRITE_ADDRESS_IP6_3);
+ ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT6);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 3db416606f..fe65e0952a 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -673,7 +673,7 @@ static __noinline void fill(struct bpf_iter_num *it, int *arr, __u32 n, int mul)
static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n)
{
- int *t, i, sum = 0;;
+ int *t, i, sum = 0;
while ((t = bpf_iter_num_next(it))) {
i = *t;
diff --git a/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c
index f46965053a..4d619bea9c 100644
--- a/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c
+++ b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c
@@ -4,6 +4,10 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#endif
+
char _license[] SEC("license") = "GPL";
struct {
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c
new file mode 100644
index 0000000000..bbba9eb465
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+#include "bpf_kfuncs.h"
+
+#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0]))
+
+char _license[] SEC("license") = "GPL";
+
+extern const void bpf_fentry_test1 __ksym;
+extern const void bpf_fentry_test2 __ksym;
+extern const void bpf_fentry_test3 __ksym;
+extern const void bpf_fentry_test4 __ksym;
+extern const void bpf_fentry_test5 __ksym;
+extern const void bpf_fentry_test6 __ksym;
+extern const void bpf_fentry_test7 __ksym;
+extern const void bpf_fentry_test8 __ksym;
+
+int pid = 0;
+
+__u64 kprobe_session_result[8];
+
+static int session_check(void *ctx)
+{
+ unsigned int i;
+ __u64 addr;
+ const void *kfuncs[] = {
+ &bpf_fentry_test1,
+ &bpf_fentry_test2,
+ &bpf_fentry_test3,
+ &bpf_fentry_test4,
+ &bpf_fentry_test5,
+ &bpf_fentry_test6,
+ &bpf_fentry_test7,
+ &bpf_fentry_test8,
+ };
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 1;
+
+ addr = bpf_get_func_ip(ctx);
+
+ for (i = 0; i < ARRAY_SIZE(kfuncs); i++) {
+ if (kfuncs[i] == (void *) addr) {
+ kprobe_session_result[i]++;
+ break;
+ }
+ }
+
+ /*
+ * Force probes for function bpf_fentry_test[5-8] not to
+ * install and execute the return probe
+ */
+ if (((const void *) addr == &bpf_fentry_test5) ||
+ ((const void *) addr == &bpf_fentry_test6) ||
+ ((const void *) addr == &bpf_fentry_test7) ||
+ ((const void *) addr == &bpf_fentry_test8))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+SEC("kprobe.session/bpf_fentry_test*")
+int test_kprobe(struct pt_regs *ctx)
+{
+ return session_check(ctx);
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c
new file mode 100644
index 0000000000..0835b5edf6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+
+__u64 test_kprobe_1_result = 0;
+__u64 test_kprobe_2_result = 0;
+__u64 test_kprobe_3_result = 0;
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+static int check_cookie(__u64 val, __u64 *result)
+{
+ __u64 *cookie;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 1;
+
+ cookie = bpf_session_cookie();
+
+ if (bpf_session_is_return())
+ *result = *cookie == val ? val : 0;
+ else
+ *cookie = val;
+ return 0;
+}
+
+SEC("kprobe.session/bpf_fentry_test1")
+int test_kprobe_1(struct pt_regs *ctx)
+{
+ return check_cookie(1, &test_kprobe_1_result);
+}
+
+SEC("kprobe.session/bpf_fentry_test1")
+int test_kprobe_2(struct pt_regs *ctx)
+{
+ return check_cookie(2, &test_kprobe_2_result);
+}
+
+SEC("kprobe.session/bpf_fentry_test1")
+int test_kprobe_3(struct pt_regs *ctx)
+{
+ return check_cookie(3, &test_kprobe_3_result);
+}
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
index da30f0d593..ab0ce1d01a 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -110,10 +110,14 @@ DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_map, array_of_array_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_map, array_of_hash_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_malloc_map, array_of_hash_malloc_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, lru_hash_map, array_of_lru_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, pcpu_array_map, array_of_pcpu_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, pcpu_hash_map, array_of_pcpu_hash_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, array_map, hash_of_array_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, pcpu_array_map, hash_of_pcpu_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, pcpu_hash_map, hash_of_pcpu_hash_maps);
#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
@@ -204,6 +208,8 @@ int test_map_kptr(struct __sk_buff *ctx)
TEST(hash_map);
TEST(hash_malloc_map);
TEST(lru_hash_map);
+ TEST(pcpu_array_map);
+ TEST(pcpu_hash_map);
#undef TEST
return 0;
@@ -281,10 +287,14 @@ int test_map_in_map_kptr(struct __sk_buff *ctx)
TEST(array_of_hash_maps);
TEST(array_of_hash_malloc_maps);
TEST(array_of_lru_hash_maps);
+ TEST(array_of_pcpu_array_maps);
+ TEST(array_of_pcpu_hash_maps);
TEST(hash_of_array_maps);
TEST(hash_of_hash_maps);
TEST(hash_of_hash_malloc_maps);
TEST(hash_of_lru_hash_maps);
+ TEST(hash_of_pcpu_array_maps);
+ TEST(hash_of_pcpu_hash_maps);
#undef TEST
return 0;
diff --git a/tools/testing/selftests/bpf/progs/mptcp_sock.c b/tools/testing/selftests/bpf/progs/mptcp_sock.c
index 91a0d7eff2..f3acb90588 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_sock.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_sock.c
@@ -2,9 +2,9 @@
/* Copyright (c) 2020, Tessares SA. */
/* Copyright (c) 2022, SUSE. */
-#include <linux/bpf.h>
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
__u32 token = 0;
diff --git a/tools/testing/selftests/bpf/progs/mptcpify.c b/tools/testing/selftests/bpf/progs/mptcpify.c
index 53301ae8a8..cbdc730c3a 100644
--- a/tools/testing/selftests/bpf/progs/mptcpify.c
+++ b/tools/testing/selftests/bpf/progs/mptcpify.c
@@ -6,10 +6,14 @@
#include "bpf_tracing_net.h"
char _license[] SEC("license") = "GPL";
+int pid;
SEC("fmod_ret/update_socket_protocol")
int BPF_PROG(mptcpify, int family, int type, int protocol)
{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return protocol;
+
if ((family == AF_INET || family == AF_INET6) &&
type == SOCK_STREAM &&
(!protocol || protocol == IPPROTO_TCP)) {
diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c
new file mode 100644
index 0000000000..672fc368d9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/preempt_lock.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+SEC("?tc")
+__failure __msg("1 bpf_preempt_enable is missing")
+int preempt_lock_missing_1(struct __sk_buff *ctx)
+{
+ bpf_preempt_disable();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("2 bpf_preempt_enable(s) are missing")
+int preempt_lock_missing_2(struct __sk_buff *ctx)
+{
+ bpf_preempt_disable();
+ bpf_preempt_disable();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("3 bpf_preempt_enable(s) are missing")
+int preempt_lock_missing_3(struct __sk_buff *ctx)
+{
+ bpf_preempt_disable();
+ bpf_preempt_disable();
+ bpf_preempt_disable();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("1 bpf_preempt_enable is missing")
+int preempt_lock_missing_3_minus_2(struct __sk_buff *ctx)
+{
+ bpf_preempt_disable();
+ bpf_preempt_disable();
+ bpf_preempt_disable();
+ bpf_preempt_enable();
+ bpf_preempt_enable();
+ return 0;
+}
+
+static __noinline void preempt_disable(void)
+{
+ bpf_preempt_disable();
+}
+
+static __noinline void preempt_enable(void)
+{
+ bpf_preempt_enable();
+}
+
+SEC("?tc")
+__failure __msg("1 bpf_preempt_enable is missing")
+int preempt_lock_missing_1_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("2 bpf_preempt_enable(s) are missing")
+int preempt_lock_missing_2_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ preempt_disable();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("1 bpf_preempt_enable is missing")
+int preempt_lock_missing_2_minus_1_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ preempt_disable();
+ preempt_enable();
+ return 0;
+}
+
+static __noinline void preempt_balance_subprog(void)
+{
+ preempt_disable();
+ preempt_enable();
+}
+
+SEC("?tc")
+__success int preempt_balance(struct __sk_buff *ctx)
+{
+ bpf_guard_preempt();
+ return 0;
+}
+
+SEC("?tc")
+__success int preempt_balance_subprog_test(struct __sk_buff *ctx)
+{
+ preempt_balance_subprog();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("sleepable helper bpf_copy_from_user#")
+int preempt_sleepable_helper(void *ctx)
+{
+ u32 data;
+
+ bpf_preempt_disable();
+ bpf_copy_from_user(&data, sizeof(data), NULL);
+ bpf_preempt_enable();
+ return 0;
+}
+
+int __noinline preempt_global_subprog(void)
+{
+ preempt_balance_subprog();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("global function calls are not allowed with preemption disabled")
+int preempt_global_subprog_test(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ preempt_global_subprog();
+ preempt_enable();
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
index 351e79aef2..edc159598a 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
@@ -49,4 +49,10 @@ int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/sendmsg4")
+int sendmsg_v4_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
index bf9b46b806..36a7f96079 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
@@ -20,6 +20,11 @@
#define DST_REWRITE_IP6_2 0
#define DST_REWRITE_IP6_3 1
+#define DST_REWRITE_IP6_V4_MAPPED_0 0
+#define DST_REWRITE_IP6_V4_MAPPED_1 0
+#define DST_REWRITE_IP6_V4_MAPPED_2 0x0000FFFF
+#define DST_REWRITE_IP6_V4_MAPPED_3 0xc0a80004 // 192.168.0.4
+
#define DST_REWRITE_PORT6 6666
SEC("cgroup/sendmsg6")
@@ -59,4 +64,56 @@ int sendmsg_v6_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_v4mapped_prog(struct bpf_sock_addr *ctx)
+{
+ /* Rewrite source. */
+ ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+ ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+ ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+ ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+
+ /* Rewrite destination. */
+ ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_0);
+ ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_1);
+ ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_2);
+ ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_3);
+
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+
+ return 1;
+}
+
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_wildcard_prog(struct bpf_sock_addr *ctx)
+{
+ /* Rewrite source. */
+ ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+ ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+ ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+ ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+
+ /* Rewrite destination. */
+ ctx->user_ip6[0] = bpf_htonl(0);
+ ctx->user_ip6[1] = bpf_htonl(0);
+ ctx->user_ip6[2] = bpf_htonl(0);
+ ctx->user_ip6[3] = bpf_htonl(0);
+
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+
+ return 1;
+}
+
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_preserve_dst_prog(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
index d8869b03dd..332d0eb111 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
@@ -36,4 +36,10 @@ int sendmsg_unix_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/sendmsg_unix")
+int sendmsg_unix_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
index 992b786100..db4abd2682 100644
--- a/tools/testing/selftests/bpf/progs/skb_pkt_end.c
+++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
+#ifndef BPF_NO_PRESERVE_ACCESS_INDEX
#define BPF_NO_PRESERVE_ACCESS_INDEX
+#endif
#include <vmlinux.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
diff --git a/tools/testing/selftests/bpf/progs/sock_addr_kern.c b/tools/testing/selftests/bpf/progs/sock_addr_kern.c
new file mode 100644
index 0000000000..8386bb15cc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sock_addr_kern.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+SEC("syscall")
+int init_sock(struct init_sock_args *args)
+{
+ bpf_kfunc_init_sock(args);
+
+ return 0;
+}
+
+SEC("syscall")
+int close_sock(void *ctx)
+{
+ bpf_kfunc_close_sock();
+
+ return 0;
+}
+
+SEC("syscall")
+int kernel_connect(struct addr_args *args)
+{
+ return bpf_kfunc_call_kernel_connect(args);
+}
+
+SEC("syscall")
+int kernel_bind(struct addr_args *args)
+{
+ return bpf_kfunc_call_kernel_bind(args);
+}
+
+SEC("syscall")
+int kernel_listen(struct addr_args *args)
+{
+ return bpf_kfunc_call_kernel_listen();
+}
+
+SEC("syscall")
+int kernel_sendmsg(struct sendmsg_args *args)
+{
+ return bpf_kfunc_call_kernel_sendmsg(args);
+}
+
+SEC("syscall")
+int sock_sendmsg(struct sendmsg_args *args)
+{
+ return bpf_kfunc_call_sock_sendmsg(args);
+}
+
+SEC("syscall")
+int kernel_getsockname(struct addr_args *args)
+{
+ return bpf_kfunc_call_kernel_getsockname(args);
+}
+
+SEC("syscall")
+int kernel_getpeername(struct addr_args *args)
+{
+ return bpf_kfunc_call_kernel_getpeername(args);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
index 83753b00a5..5c3614333b 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
@@ -1,24 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
-#include <string.h>
-#include <linux/tcp.h>
-#include <netinet/in.h>
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
+#include "bpf_tracing_net.h"
char _license[] SEC("license") = "GPL";
__s32 page_size = 0;
+const char cc_reno[TCP_CA_NAME_MAX] = "reno";
+const char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
+
SEC("cgroup/setsockopt")
int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
{
void *optval_end = ctx->optval_end;
int *optval = ctx->optval;
char buf[TCP_CA_NAME_MAX];
- char cc_reno[TCP_CA_NAME_MAX] = "reno";
- char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS)
goto out;
@@ -29,11 +25,11 @@ int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
if (bpf_getsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
return 0;
- if (!tcp_cc_eq(buf, cc_cubic))
+ if (bpf_strncmp(buf, sizeof(buf), cc_cubic))
return 0;
if (*optval == 0x2d) {
- if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &cc_reno,
+ if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, (void *)&cc_reno,
sizeof(cc_reno)))
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c
new file mode 100644
index 0000000000..3c822103bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1_forgotten)
+{
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops ops = {
+ /* we forgot to reference test_1_forgotten above, oops */
+};
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c
index 026cabfa7f..4c56d4a9d9 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_module.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c
@@ -23,7 +23,7 @@ void BPF_PROG(test_2, int a, int b)
test_2_result = a + b;
}
-SEC("struct_ops/test_3")
+SEC("?struct_ops/test_3")
int BPF_PROG(test_3, int a, int b)
{
test_2_result = a + b + 3;
@@ -54,3 +54,37 @@ struct bpf_testmod_ops___v2 testmod_2 = {
.test_1 = (void *)test_1,
.test_2 = (void *)test_2_v2,
};
+
+struct bpf_testmod_ops___zeroed {
+ int (*test_1)(void);
+ void (*test_2)(int a, int b);
+ int (*test_maybe_null)(int dummy, struct task_struct *task);
+ void (*zeroed_op)(int a, int b);
+ int zeroed;
+};
+
+SEC("struct_ops/test_3")
+int BPF_PROG(zeroed_op)
+{
+ return 1;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___zeroed testmod_zeroed = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2_v2,
+ .zeroed_op = (void *)zeroed_op,
+};
+
+struct bpf_testmod_ops___incompatible {
+ int (*test_1)(void);
+ void (*test_2)(int *a);
+ int data;
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___incompatible testmod_incompatible = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2,
+ .data = 3,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c
new file mode 100644
index 0000000000..fa20213884
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+int rand;
+int arr[1];
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1_turn_off)
+{
+ return arr[rand]; /* potentially way out of range access */
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops ops = {
+ .test_1 = (void *)test_1_turn_off,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
index 41f2d44f49..6720c4b5be 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
@@ -13,7 +13,7 @@ struct __tasks_kfunc_map_value {
struct task_struct __kptr * task;
};
-struct hash_map {
+struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, int);
__type(value, struct __tasks_kfunc_map_value);
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
index 7bb872fb22..0016c90e9c 100644
--- a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
@@ -1,24 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
-#include "vmlinux.h"
-
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-static inline struct tcp_sock *tcp_sk(const struct sock *sk)
-{
- return (struct tcp_sock *)sk;
-}
-
-SEC("struct_ops/incompl_cong_ops_ssthresh")
+SEC("struct_ops")
__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk)
{
return tcp_sk(sk)->snd_ssthresh;
}
-SEC("struct_ops/incompl_cong_ops_undo_cwnd")
+SEC("struct_ops")
__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk)
{
return tcp_sk(sk)->snd_cwnd;
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c b/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c
new file mode 100644
index 0000000000..f95862f570
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+
+extern void bbr_init(struct sock *sk) __ksym;
+extern void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) __ksym;
+extern u32 bbr_sndbuf_expand(struct sock *sk) __ksym;
+extern u32 bbr_undo_cwnd(struct sock *sk) __ksym;
+extern void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
+extern u32 bbr_ssthresh(struct sock *sk) __ksym;
+extern u32 bbr_min_tso_segs(struct sock *sk) __ksym;
+extern void bbr_set_state(struct sock *sk, u8 new_state) __ksym;
+
+extern void dctcp_init(struct sock *sk) __ksym;
+extern void dctcp_update_alpha(struct sock *sk, u32 flags) __ksym;
+extern void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) __ksym;
+extern u32 dctcp_ssthresh(struct sock *sk) __ksym;
+extern u32 dctcp_cwnd_undo(struct sock *sk) __ksym;
+extern void dctcp_state(struct sock *sk, u8 new_state) __ksym;
+
+extern void cubictcp_init(struct sock *sk) __ksym;
+extern u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym;
+extern void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) __ksym;
+extern void cubictcp_state(struct sock *sk, u8 new_state) __ksym;
+extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
+extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym;
+
+SEC("struct_ops")
+void BPF_PROG(init, struct sock *sk)
+{
+ bbr_init(sk);
+ dctcp_init(sk);
+ cubictcp_init(sk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(in_ack_event, struct sock *sk, u32 flags)
+{
+ dctcp_update_alpha(sk, flags);
+}
+
+SEC("struct_ops")
+void BPF_PROG(cong_control, struct sock *sk, u32 ack, int flag, const struct rate_sample *rs)
+{
+ bbr_main(sk, ack, flag, rs);
+}
+
+SEC("struct_ops")
+void BPF_PROG(cong_avoid, struct sock *sk, u32 ack, u32 acked)
+{
+ cubictcp_cong_avoid(sk, ack, acked);
+}
+
+SEC("struct_ops")
+u32 BPF_PROG(sndbuf_expand, struct sock *sk)
+{
+ return bbr_sndbuf_expand(sk);
+}
+
+SEC("struct_ops")
+u32 BPF_PROG(undo_cwnd, struct sock *sk)
+{
+ bbr_undo_cwnd(sk);
+ return dctcp_cwnd_undo(sk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(cwnd_event, struct sock *sk, enum tcp_ca_event event)
+{
+ bbr_cwnd_event(sk, event);
+ dctcp_cwnd_event(sk, event);
+ cubictcp_cwnd_event(sk, event);
+}
+
+SEC("struct_ops")
+u32 BPF_PROG(ssthresh, struct sock *sk)
+{
+ bbr_ssthresh(sk);
+ dctcp_ssthresh(sk);
+ return cubictcp_recalc_ssthresh(sk);
+}
+
+SEC("struct_ops")
+u32 BPF_PROG(min_tso_segs, struct sock *sk)
+{
+ return bbr_min_tso_segs(sk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(set_state, struct sock *sk, u8 new_state)
+{
+ bbr_set_state(sk, new_state);
+ dctcp_state(sk, new_state);
+ cubictcp_state(sk, new_state);
+}
+
+SEC("struct_ops")
+void BPF_PROG(pkts_acked, struct sock *sk, const struct ack_sample *sample)
+{
+ cubictcp_acked(sk, sample);
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops tcp_ca_kfunc = {
+ .init = (void *)init,
+ .in_ack_event = (void *)in_ack_event,
+ .cong_control = (void *)cong_control,
+ .cong_avoid = (void *)cong_avoid,
+ .sndbuf_expand = (void *)sndbuf_expand,
+ .undo_cwnd = (void *)undo_cwnd,
+ .cwnd_event = (void *)cwnd_event,
+ .ssthresh = (void *)ssthresh,
+ .min_tso_segs = (void *)min_tso_segs,
+ .set_state = (void *)set_state,
+ .pkts_acked = (void *)pkts_acked,
+ .name = "tcp_ca_kfunc",
+};
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
index c06f4a41c2..54f916a931 100644
--- a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
@@ -7,7 +7,7 @@
char _license[] SEC("license") = "GPL";
-SEC("struct_ops/unsupp_cong_op_get_info")
+SEC("struct_ops")
size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info)
{
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
index b93a0ed330..e4bd82bc0d 100644
--- a/tools/testing/selftests/bpf/progs/tcp_ca_update.c
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-#include "vmlinux.h"
-
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -10,36 +9,31 @@ char _license[] SEC("license") = "GPL";
int ca1_cnt = 0;
int ca2_cnt = 0;
-static inline struct tcp_sock *tcp_sk(const struct sock *sk)
-{
- return (struct tcp_sock *)sk;
-}
-
-SEC("struct_ops/ca_update_1_init")
+SEC("struct_ops")
void BPF_PROG(ca_update_1_init, struct sock *sk)
{
ca1_cnt++;
}
-SEC("struct_ops/ca_update_2_init")
+SEC("struct_ops")
void BPF_PROG(ca_update_2_init, struct sock *sk)
{
ca2_cnt++;
}
-SEC("struct_ops/ca_update_cong_control")
+SEC("struct_ops")
void BPF_PROG(ca_update_cong_control, struct sock *sk,
const struct rate_sample *rs)
{
}
-SEC("struct_ops/ca_update_ssthresh")
+SEC("struct_ops")
__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk)
{
return tcp_sk(sk)->snd_ssthresh;
}
-SEC("struct_ops/ca_update_undo_cwnd")
+SEC("struct_ops")
__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk)
{
return tcp_sk(sk)->snd_cwnd;
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
index 0724a79cec..a58b5194fc 100644
--- a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-#include "vmlinux.h"
-
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -11,22 +10,17 @@ char _license[] SEC("license") = "GPL";
#define min(a, b) ((a) < (b) ? (a) : (b))
-static inline struct tcp_sock *tcp_sk(const struct sock *sk)
-{
- return (struct tcp_sock *)sk;
-}
-
-static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
+static unsigned int tcp_left_out(const struct tcp_sock *tp)
{
return tp->sacked_out + tp->lost_out;
}
-static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
+static unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
{
return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
}
-SEC("struct_ops/write_sk_pacing_init")
+SEC("struct_ops")
void BPF_PROG(write_sk_pacing_init, struct sock *sk)
{
#ifdef ENABLE_ATOMICS_TESTS
@@ -37,7 +31,7 @@ void BPF_PROG(write_sk_pacing_init, struct sock *sk)
#endif
}
-SEC("struct_ops/write_sk_pacing_cong_control")
+SEC("struct_ops")
void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
const struct rate_sample *rs)
{
@@ -49,13 +43,13 @@ void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
tp->app_limited = (tp->delivered + tcp_packets_in_flight(tp)) ?: 1;
}
-SEC("struct_ops/write_sk_pacing_ssthresh")
+SEC("struct_ops")
__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk)
{
return tcp_sk(sk)->snd_ssthresh;
}
-SEC("struct_ops/write_sk_pacing_undo_cwnd")
+SEC("struct_ops")
__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk)
{
return tcp_sk(sk)->snd_cwnd;
diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c
index 0988d79f15..42c729f855 100644
--- a/tools/testing/selftests/bpf/progs/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c
@@ -10,6 +10,9 @@ struct tcp_rtt_storage {
__u32 delivered;
__u32 delivered_ce;
__u32 icsk_retransmits;
+
+ __u32 mrtt_us; /* args[0] */
+ __u32 srtt; /* args[1] */
};
struct {
@@ -55,5 +58,8 @@ int _sockops(struct bpf_sock_ops *ctx)
storage->delivered_ce = tcp_sk->delivered_ce;
storage->icsk_retransmits = tcp_sk->icsk_retransmits;
+ storage->mrtt_us = ctx->args[0];
+ storage->srtt = ctx->args[1];
+
return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/test_access_variable_array.c b/tools/testing/selftests/bpf/progs/test_access_variable_array.c
index 808c49b798..326b7d1f49 100644
--- a/tools/testing/selftests/bpf/progs/test_access_variable_array.c
+++ b/tools/testing/selftests/bpf/progs/test_access_variable_array.c
@@ -7,7 +7,7 @@
unsigned long span = 0;
-SEC("fentry/load_balance")
+SEC("fentry/sched_balance_rq")
int BPF_PROG(fentry_fentry, int this_cpu, struct rq *this_rq,
struct sched_domain *sd)
{
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
index 5a3a80f751..c83142b55f 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
@@ -15,6 +15,8 @@ __u64 uprobe_res;
__u64 uretprobe_res;
__u64 tp_res;
__u64 pe_res;
+__u64 raw_tp_res;
+__u64 tp_btf_res;
__u64 fentry_res;
__u64 fexit_res;
__u64 fmod_ret_res;
@@ -87,6 +89,20 @@ int handle_pe(struct pt_regs *ctx)
return 0;
}
+SEC("raw_tp/sys_enter")
+int handle_raw_tp(void *ctx)
+{
+ update(ctx, &raw_tp_res);
+ return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int handle_tp_btf(void *ctx)
+{
+ update(ctx, &tp_btf_res);
+ return 0;
+}
+
SEC("fentry/bpf_fentry_test1")
int BPF_PROG(fentry_test1, int a)
{
diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
index e2bea4da19..f0759efff6 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
@@ -1,19 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include <string.h>
-#include <errno.h>
-#include <netinet/in.h>
-#include <linux/stddef.h>
-#include <linux/bpf.h>
-#include <linux/ipv6.h>
-#include <linux/tcp.h>
-#include <linux/if_ether.h>
-#include <linux/pkt_cls.h>
-
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-#include "bpf_tcp_helpers.h"
+
+#ifndef ENOENT
+#define ENOENT 2
+#endif
struct sockaddr_in6 srv_sa6 = {};
__u16 listen_tp_sport = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c
index 8fba3f3649..5da001ca57 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func10.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func10.c
@@ -4,6 +4,10 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#if !defined(__clang__)
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
struct Small {
long x;
};
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_redirect.c b/tools/testing/selftests/bpf/progs/test_lwt_redirect.c
index 8c895122f2..83439b87b7 100644
--- a/tools/testing/selftests/bpf/progs/test_lwt_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_lwt_redirect.c
@@ -3,7 +3,7 @@
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
#include <linux/ip.h>
-#include "bpf_tracing_net.h"
+#include <linux/if_ether.h>
/* We don't care about whether the packet can be received by network stack.
* Just care if the packet is sent to the correct device at correct direction
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index 8a1b50f3a0..cc1a012d03 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -73,6 +73,29 @@ int BPF_PROG(handle_fentry_manual,
return 0;
}
+__u32 fentry_explicit_read_sz = 0;
+
+SEC("fentry/bpf_testmod:bpf_testmod_test_read")
+int BPF_PROG(handle_fentry_explicit,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ fentry_explicit_read_sz = len;
+ return 0;
+}
+
+
+__u32 fentry_explicit_manual_read_sz = 0;
+
+SEC("fentry")
+int BPF_PROG(handle_fentry_explicit_manual,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ fentry_explicit_manual_read_sz = len;
+ return 0;
+}
+
__u32 fexit_read_sz = 0;
int fexit_ret = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
index 0763d49f9c..386315afad 100644
--- a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
@@ -5,23 +5,48 @@
#include <stdint.h>
#include <bpf/bpf_helpers.h>
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u32);
+} sock_map SEC(".maps");
+
__u64 user_pid = 0;
__u64 user_tgid = 0;
__u64 dev = 0;
__u64 ino = 0;
-SEC("tracepoint/syscalls/sys_enter_nanosleep")
-int handler(const void *ctx)
+static void get_pid_tgid(void)
{
struct bpf_pidns_info nsdata;
if (bpf_get_ns_current_pid_tgid(dev, ino, &nsdata, sizeof(struct bpf_pidns_info)))
- return 0;
+ return;
user_pid = nsdata.pid;
user_tgid = nsdata.tgid;
+}
+SEC("?tracepoint/syscalls/sys_enter_nanosleep")
+int tp_handler(const void *ctx)
+{
+ get_pid_tgid();
return 0;
}
+SEC("?cgroup/bind4")
+int cgroup_bind4(struct bpf_sock_addr *ctx)
+{
+ get_pid_tgid();
+ return 1;
+}
+
+SEC("?sk_msg")
+int sk_msg(struct sk_msg_md *msg)
+{
+ get_pid_tgid();
+ return SK_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_skip.c b/tools/testing/selftests/bpf/progs/test_perf_skip.c
new file mode 100644
index 0000000000..7eb8b6de7a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_perf_skip.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+uintptr_t ip;
+
+SEC("perf_event")
+int handler(struct bpf_perf_event_data *data)
+{
+ /* Skip events that have the correct ip. */
+ return ip != PT_REGS_IP(&data->regs);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_n.c b/tools/testing/selftests/bpf/progs/test_ringbuf_n.c
new file mode 100644
index 0000000000..8669eb42db
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_n.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2024 Andrea Righi <andrea.righi@canonical.com>
+
+#include <linux/bpf.h>
+#include <sched.h>
+#include <unistd.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define TASK_COMM_LEN 16
+
+struct sample {
+ int pid;
+ long value;
+ char comm[16];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+int pid = 0;
+long value = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_ringbuf_n(void *ctx)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ struct sample *sample;
+
+ if (cur_pid != pid)
+ return 0;
+
+ sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
+ if (!sample)
+ return 0;
+
+ sample->pid = pid;
+ sample->value = value;
+ bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+
+ bpf_ringbuf_submit(sample, 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
new file mode 100644
index 0000000000..350513c0e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+/* inputs */
+int pid = 0;
+
+/* outputs */
+long passed = 0;
+long discarded = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_ringbuf_write(void *ctx)
+{
+ int *foo, cur_pid = bpf_get_current_pid_tgid() >> 32;
+ void *sample1, *sample2;
+
+ if (cur_pid != pid)
+ return 0;
+
+ sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+ if (!sample1)
+ return 0;
+ /* first one can pass */
+ sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+ if (!sample2) {
+ bpf_ringbuf_discard(sample1, 0);
+ __sync_fetch_and_add(&discarded, 1);
+ return 0;
+ }
+ /* second one must not */
+ __sync_fetch_and_add(&passed, 1);
+ foo = sample2 + 4084;
+ *foo = 256;
+ bpf_ringbuf_discard(sample1, 0);
+ bpf_ringbuf_discard(sample2, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
index 02e718f06e..40531e5677 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
@@ -84,7 +84,7 @@ int BPF_PROG(trace_tcp_connect, struct sock *sk)
}
SEC("fexit/inet_csk_accept")
-int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern,
+int BPF_PROG(inet_csk_accept, struct sock *sk, struct proto_accept_arg *arg,
struct sock *accepted_sk)
{
set_task_info(accepted_sk);
diff --git a/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
index 45e8fc75a7..996b177324 100644
--- a/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
@@ -24,8 +24,7 @@ struct {
__type(value, __u64);
} socket_storage SEC(".maps");
-SEC("sk_msg")
-int prog_msg_verdict(struct sk_msg_md *msg)
+static int prog_msg_verdict_common(struct sk_msg_md *msg)
{
struct task_struct *task = (struct task_struct *)bpf_get_current_task();
int verdict = SK_PASS;
@@ -44,4 +43,28 @@ int prog_msg_verdict(struct sk_msg_md *msg)
return verdict;
}
+SEC("sk_msg")
+int prog_msg_verdict(struct sk_msg_md *msg)
+{
+ return prog_msg_verdict_common(msg);
+}
+
+SEC("sk_msg")
+int prog_msg_verdict_clone(struct sk_msg_md *msg)
+{
+ return prog_msg_verdict_common(msg);
+}
+
+SEC("sk_msg")
+int prog_msg_verdict_clone2(struct sk_msg_md *msg)
+{
+ return prog_msg_verdict_common(msg);
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index f75e531bf3..196844be34 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -7,7 +7,6 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-#include "bpf_tcp_helpers.h"
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
@@ -42,6 +41,10 @@ struct {
__type(value, struct bpf_spinlock_cnt);
} sk_pkt_out_cnt10 SEC(".maps");
+struct tcp_sock {
+ __u32 lsndtime;
+} __attribute__((preserve_access_index));
+
struct bpf_tcp_sock listen_tp = {};
struct sockaddr_in6 srv_sa6 = {};
struct bpf_tcp_sock cli_tp = {};
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
index 1d86a717a2..69aacc96db 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
@@ -23,10 +23,25 @@ struct {
__type(value, int);
} sock_map_msg SEC(".maps");
-SEC("sk_skb")
+SEC("sk_skb/stream_verdict")
int prog_skb_verdict(struct __sk_buff *skb)
{
return SK_PASS;
}
+int clone_called;
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict_clone(struct __sk_buff *skb)
+{
+ clone_called = 1;
+ return SK_PASS;
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
index 3c69aa9717..d25b0bb30f 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
@@ -9,7 +9,7 @@ struct {
__type(value, __u64);
} sock_map SEC(".maps");
-SEC("sk_skb")
+SEC("sk_skb/verdict")
int prog_skb_verdict(struct __sk_buff *skb)
{
return SK_DROP;
diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c
index 992400acb9..ab3eae3d6a 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_link.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_link.c
@@ -4,7 +4,8 @@
#include <linux/bpf.h>
#include <linux/if_ether.h>
-
+#include <linux/stddef.h>
+#include <linux/if_packet.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
@@ -16,7 +17,13 @@ bool seen_tc3;
bool seen_tc4;
bool seen_tc5;
bool seen_tc6;
+bool seen_tc7;
+
+bool set_type;
+
bool seen_eth;
+bool seen_host;
+bool seen_mcast;
SEC("tc/ingress")
int tc1(struct __sk_buff *skb)
@@ -28,8 +35,16 @@ int tc1(struct __sk_buff *skb)
if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)))
goto out;
seen_eth = eth.h_proto == bpf_htons(ETH_P_IP);
+ seen_host = skb->pkt_type == PACKET_HOST;
+ if (seen_host && set_type) {
+ eth.h_dest[0] = 4;
+ if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0))
+ goto fail;
+ bpf_skb_change_type(skb, PACKET_MULTICAST);
+ }
out:
seen_tc1 = true;
+fail:
return TCX_NEXT;
}
@@ -67,3 +82,21 @@ int tc6(struct __sk_buff *skb)
seen_tc6 = true;
return TCX_PASS;
}
+
+SEC("tc/ingress")
+int tc7(struct __sk_buff *skb)
+{
+ struct ethhdr eth = {};
+
+ if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
+ goto out;
+ if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)))
+ goto out;
+ if (eth.h_dest[0] == 4 && set_type) {
+ seen_mcast = skb->pkt_type == PACKET_MULTICAST;
+ bpf_skb_change_type(skb, PACKET_HOST);
+ }
+out:
+ seen_tc7 = true;
+ return TCX_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index a3f3f43fc1..6935f32eeb 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -1,18 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
-#include <stddef.h>
-#include <string.h>
-#include <netinet/in.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/tcp.h>
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-#include "bpf_tcp_helpers.h"
#include "test_tcpbpf.h"
struct tcpbpf_globals global = {};
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index 3e436e6f73..3f5abcf3ff 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -567,12 +567,18 @@ int ip6vxlan_get_tunnel_src(struct __sk_buff *skb)
return TC_ACT_OK;
}
+struct local_geneve_opt {
+ struct geneve_opt gopt;
+ int data;
+};
+
SEC("tc")
int geneve_set_tunnel(struct __sk_buff *skb)
{
int ret;
struct bpf_tunnel_key key;
- struct geneve_opt gopt;
+ struct local_geneve_opt local_gopt;
+ struct geneve_opt *gopt = (struct geneve_opt *) &local_gopt;
__builtin_memset(&key, 0x0, sizeof(key));
key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
@@ -580,14 +586,14 @@ int geneve_set_tunnel(struct __sk_buff *skb)
key.tunnel_tos = 0;
key.tunnel_ttl = 64;
- __builtin_memset(&gopt, 0x0, sizeof(gopt));
- gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
- gopt.type = 0x08;
- gopt.r1 = 0;
- gopt.r2 = 0;
- gopt.r3 = 0;
- gopt.length = 2; /* 4-byte multiple */
- *(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef);
+ __builtin_memset(gopt, 0x0, sizeof(local_gopt));
+ gopt->opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+ gopt->type = 0x08;
+ gopt->r1 = 0;
+ gopt->r2 = 0;
+ gopt->r3 = 0;
+ gopt->length = 2; /* 4-byte multiple */
+ *(int *) &gopt->opt_data = bpf_htonl(0xdeadbeef);
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_ZERO_CSUM_TX);
@@ -596,7 +602,7 @@ int geneve_set_tunnel(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+ ret = bpf_skb_set_tunnel_opt(skb, gopt, sizeof(local_gopt));
if (ret < 0) {
log_err(ret);
return TC_ACT_SHOT;
@@ -631,7 +637,8 @@ SEC("tc")
int ip6geneve_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
- struct geneve_opt gopt;
+ struct local_geneve_opt local_gopt;
+ struct geneve_opt *gopt = (struct geneve_opt *) &local_gopt;
int ret;
__builtin_memset(&key, 0x0, sizeof(key));
@@ -647,16 +654,16 @@ int ip6geneve_set_tunnel(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- __builtin_memset(&gopt, 0x0, sizeof(gopt));
- gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
- gopt.type = 0x08;
- gopt.r1 = 0;
- gopt.r2 = 0;
- gopt.r3 = 0;
- gopt.length = 2; /* 4-byte multiple */
- *(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef);
+ __builtin_memset(gopt, 0x0, sizeof(local_gopt));
+ gopt->opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+ gopt->type = 0x08;
+ gopt->r1 = 0;
+ gopt->r2 = 0;
+ gopt->r3 = 0;
+ gopt->length = 2; /* 4-byte multiple */
+ *(int *) &gopt->opt_data = bpf_htonl(0xfeedbeef);
- ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+ ret = bpf_skb_set_tunnel_opt(skb, gopt, sizeof(gopt));
if (ret < 0) {
log_err(ret);
return TC_ACT_SHOT;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 5c7e4758a0..fad94e41ce 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -318,6 +318,14 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
return true;
}
+#ifndef __clang__
+#pragma GCC push_options
+/* GCC optimization collapses functions and increases the number of arguments
+ * beyond the compatible amount supported by BPF.
+ */
+#pragma GCC optimize("-fno-ipa-sra")
+#endif
+
static __attribute__ ((noinline))
bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
struct packet_description *pckt,
@@ -372,6 +380,10 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
return true;
}
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
+
static __attribute__ ((noinline))
int swap_mac_and_send(void *data, void *data_end)
{
@@ -588,12 +600,13 @@ static void connection_table_lookup(struct real_definition **real,
__attribute__ ((noinline))
static int process_l3_headers_v6(struct packet_description *pckt,
__u8 *protocol, __u64 off,
- __u16 *pkt_bytes, void *data,
- void *data_end)
+ __u16 *pkt_bytes, void *extra_args[2])
{
struct ipv6hdr *ip6h;
__u64 iph_len;
int action;
+ void *data = extra_args[0];
+ void *data_end = extra_args[1];
ip6h = data + off;
if (ip6h + 1 > data_end)
@@ -619,11 +632,12 @@ static int process_l3_headers_v6(struct packet_description *pckt,
__attribute__ ((noinline))
static int process_l3_headers_v4(struct packet_description *pckt,
__u8 *protocol, __u64 off,
- __u16 *pkt_bytes, void *data,
- void *data_end)
+ __u16 *pkt_bytes, void *extra_args[2])
{
struct iphdr *iph;
int action;
+ void *data = extra_args[0];
+ void *data_end = extra_args[1];
iph = data + off;
if (iph + 1 > data_end)
@@ -666,13 +680,14 @@ static int process_packet(void *data, __u64 off, void *data_end,
__u8 protocol;
__u32 vip_num;
int action;
+ void *extra_args[2] = { data, data_end };
if (is_ipv6)
action = process_l3_headers_v6(&pckt, &protocol, off,
- &pkt_bytes, data, data_end);
+ &pkt_bytes, extra_args);
else
action = process_l3_headers_v4(&pckt, &protocol, off,
- &pkt_bytes, data, data_end);
+ &pkt_bytes, extra_args);
if (action >= 0)
return action;
protocol = pckt.flow.proto;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
index f3ec808648..a758830226 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
@@ -160,7 +160,7 @@ int xdp_prognum1(struct xdp_md *ctx)
/* Modifying VLAN, preserve top 4 bits */
vlan_hdr->h_vlan_TCI =
- bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000)
+ bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000U)
| TO_VLAN);
}
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
index f615da97df..4c677c0012 100644
--- a/tools/testing/selftests/bpf/progs/timer.c
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -2,9 +2,10 @@
/* Copyright (c) 2021 Facebook */
#include <linux/bpf.h>
#include <time.h>
+#include <stdbool.h>
#include <errno.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct hmap_elem {
diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c
index 0996c2486f..5a2e9dabf1 100644
--- a/tools/testing/selftests/bpf/progs/timer_failure.c
+++ b/tools/testing/selftests/bpf/progs/timer_failure.c
@@ -5,8 +5,8 @@
#include <time.h>
#include <errno.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
-#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/timer_lockup.c b/tools/testing/selftests/bpf/progs/timer_lockup.c
new file mode 100644
index 0000000000..3e52013328
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_lockup.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer1_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer2_map SEC(".maps");
+
+int timer1_err;
+int timer2_err;
+
+static int timer_cb1(void *map, int *k, struct elem *v)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer2_map, &key);
+ if (timer)
+ timer2_err = bpf_timer_cancel(timer);
+
+ return 0;
+}
+
+static int timer_cb2(void *map, int *k, struct elem *v)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer1_map, &key);
+ if (timer)
+ timer1_err = bpf_timer_cancel(timer);
+
+ return 0;
+}
+
+SEC("tc")
+int timer1_prog(void *ctx)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer1_map, &key);
+ if (timer) {
+ bpf_timer_init(timer, &timer1_map, CLOCK_BOOTTIME);
+ bpf_timer_set_callback(timer, timer_cb1);
+ bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
+ }
+
+ return 0;
+}
+
+SEC("tc")
+int timer2_prog(void *ctx)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer2_map, &key);
+ if (timer) {
+ bpf_timer_init(timer, &timer2_map, CLOCK_BOOTTIME);
+ bpf_timer_set_callback(timer, timer_cb2);
+ bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_mim.c b/tools/testing/selftests/bpf/progs/timer_mim.c
index 2fee7ab105..50ebc3f685 100644
--- a/tools/testing/selftests/bpf/progs/timer_mim.c
+++ b/tools/testing/selftests/bpf/progs/timer_mim.c
@@ -4,7 +4,7 @@
#include <time.h>
#include <errno.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct hmap_elem {
diff --git a/tools/testing/selftests/bpf/progs/timer_mim_reject.c b/tools/testing/selftests/bpf/progs/timer_mim_reject.c
index 5d648e3d8a..dd3f1ed6d6 100644
--- a/tools/testing/selftests/bpf/progs/timer_mim_reject.c
+++ b/tools/testing/selftests/bpf/progs/timer_mim_reject.c
@@ -4,7 +4,7 @@
#include <time.h>
#include <errno.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct hmap_elem {
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 5fda439010..2619ed193c 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Facebook
-
#include <linux/bpf.h>
#include <asm/unistd.h>
#include <bpf/bpf_helpers.h>
@@ -9,82 +8,126 @@
char _license[] SEC("license") = "GPL";
-long hits = 0;
+#define CPU_MASK 255
+#define MAX_CPUS (CPU_MASK + 1) /* should match MAX_BUCKETS in benchs/bench_trigger.c */
-SEC("tp/syscalls/sys_enter_getpgid")
-int bench_trigger_tp(void *ctx)
+/* matches struct counter in bench.h */
+struct counter {
+ long value;
+} __attribute__((aligned(128)));
+
+struct counter hits[MAX_CPUS];
+
+static __always_inline void inc_counter(void)
+{
+ int cpu = bpf_get_smp_processor_id();
+
+ __sync_add_and_fetch(&hits[cpu & CPU_MASK].value, 1);
+}
+
+SEC("?uprobe")
+int bench_trigger_uprobe(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("raw_tp/sys_enter")
-int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id)
+const volatile int batch_iters = 0;
+
+SEC("?raw_tp")
+int trigger_count(void *ctx)
{
- if (id == __NR_getpgid)
- __sync_add_and_fetch(&hits, 1);
+ int i;
+
+ for (i = 0; i < batch_iters; i++)
+ inc_counter();
+
return 0;
}
-SEC("kprobe/" SYS_PREFIX "sys_getpgid")
+SEC("?raw_tp")
+int trigger_driver(void *ctx)
+{
+ int i;
+
+ for (i = 0; i < batch_iters; i++)
+ (void)bpf_get_numa_node_id(); /* attach point for benchmarking */
+
+ return 0;
+}
+
+extern int bpf_modify_return_test_tp(int nonce) __ksym __weak;
+
+SEC("?raw_tp")
+int trigger_driver_kfunc(void *ctx)
+{
+ int i;
+
+ for (i = 0; i < batch_iters; i++)
+ (void)bpf_modify_return_test_tp(0); /* attach point for benchmarking */
+
+ return 0;
+}
+
+SEC("?kprobe/bpf_get_numa_node_id")
int bench_trigger_kprobe(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("kretprobe/" SYS_PREFIX "sys_getpgid")
+SEC("?kretprobe/bpf_get_numa_node_id")
int bench_trigger_kretprobe(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("kprobe.multi/" SYS_PREFIX "sys_getpgid")
+SEC("?kprobe.multi/bpf_get_numa_node_id")
int bench_trigger_kprobe_multi(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("kretprobe.multi/" SYS_PREFIX "sys_getpgid")
+SEC("?kretprobe.multi/bpf_get_numa_node_id")
int bench_trigger_kretprobe_multi(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("fentry/" SYS_PREFIX "sys_getpgid")
+SEC("?fentry/bpf_get_numa_node_id")
int bench_trigger_fentry(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("fexit/" SYS_PREFIX "sys_getpgid")
+SEC("?fexit/bpf_get_numa_node_id")
int bench_trigger_fexit(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
-int bench_trigger_fentry_sleep(void *ctx)
+SEC("?fmod_ret/bpf_modify_return_test_tp")
+int bench_trigger_fmodret(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
- return 0;
+ inc_counter();
+ return -22;
}
-SEC("fmod_ret/" SYS_PREFIX "sys_getpgid")
-int bench_trigger_fmodret(void *ctx)
+SEC("?tp/bpf_test_run/bpf_trigger_tp")
+int bench_trigger_tp(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
- return -22;
+ inc_counter();
+ return 0;
}
-SEC("uprobe")
-int bench_trigger_uprobe(void *ctx)
+SEC("?raw_tp/bpf_trigger_tp")
+int bench_trigger_rawtp(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi.c b/tools/testing/selftests/bpf/progs/uprobe_multi.c
index 419d9aa28f..44190efcdb 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_multi.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include <stdbool.h>
+#include <bpf/usdt.bpf.h>
char _license[] SEC("license") = "GPL";
@@ -22,6 +22,13 @@ __u64 uprobe_multi_sleep_result = 0;
int pid = 0;
int child_pid = 0;
+int child_tid = 0;
+int child_pid_usdt = 0;
+int child_tid_usdt = 0;
+
+int expect_pid = 0;
+bool bad_pid_seen = false;
+bool bad_pid_seen_usdt = false;
bool test_cookie = false;
void *user_ptr = 0;
@@ -36,11 +43,19 @@ static __always_inline bool verify_sleepable_user_copy(void)
static void uprobe_multi_check(void *ctx, bool is_return, bool is_sleep)
{
- child_pid = bpf_get_current_pid_tgid() >> 32;
+ __u64 cur_pid_tgid = bpf_get_current_pid_tgid();
+ __u32 cur_pid;
- if (pid && child_pid != pid)
+ cur_pid = cur_pid_tgid >> 32;
+ if (pid && cur_pid != pid)
return;
+ if (expect_pid && cur_pid != expect_pid)
+ bad_pid_seen = true;
+
+ child_pid = cur_pid_tgid >> 32;
+ child_tid = (__u32)cur_pid_tgid;
+
__u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0;
__u64 addr = bpf_get_func_ip(ctx);
@@ -97,5 +112,32 @@ int uretprobe_sleep(struct pt_regs *ctx)
SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*")
int uprobe_extra(struct pt_regs *ctx)
{
+ /* we need this one just to mix PID-filtered and global uprobes */
+ return 0;
+}
+
+SEC("usdt")
+int usdt_pid(struct pt_regs *ctx)
+{
+ __u64 cur_pid_tgid = bpf_get_current_pid_tgid();
+ __u32 cur_pid;
+
+ cur_pid = cur_pid_tgid >> 32;
+ if (pid && cur_pid != pid)
+ return 0;
+
+ if (expect_pid && cur_pid != expect_pid)
+ bad_pid_seen_usdt = true;
+
+ child_pid_usdt = cur_pid_tgid >> 32;
+ child_tid_usdt = (__u32)cur_pid_tgid;
+
+ return 0;
+}
+
+SEC("usdt")
+int usdt_extra(struct pt_regs *ctx)
+{
+ /* we need this one just to mix PID-filtered and global USDT probes */
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index 960998f163..a0bb7fb40e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -886,6 +886,69 @@ l1_%=: r0 = 0; \
}
SEC("socket")
+__description("bounds check for non const xor src dst")
+__success __log_level(2)
+__msg("5: (af) r0 ^= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
+__naked void non_const_xor_src_dst(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r6 &= 0xaf; \
+ r0 &= 0x1a0; \
+ r0 ^= r6; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for non const or src dst")
+__success __log_level(2)
+__msg("5: (4f) r0 |= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
+__naked void non_const_or_src_dst(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r6 &= 0xaf; \
+ r0 &= 0x1a0; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for non const mul regs")
+__success __log_level(2)
+__msg("5: (2f) r0 *= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))")
+__naked void non_const_mul_regs(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r6 &= 0xff; \
+ r0 &= 0x0f; \
+ r0 *= r6; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
__description("bounds checks after 32-bit truncation. test 1")
__success __failure_unpriv __msg_unpriv("R0 leaks addr")
__retval(0)
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
index 0ede0ccd09..059aa716e3 100644
--- a/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
@@ -30,7 +30,7 @@ struct {
SEC("kprobe")
__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_KPROBE")
-__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns")
__naked void in_bpf_prog_type_kprobe_1(void)
{
asm volatile (" \
@@ -44,7 +44,7 @@ __naked void in_bpf_prog_type_kprobe_1(void)
SEC("tracepoint")
__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_TRACEPOINT")
-__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns")
__naked void in_bpf_prog_type_tracepoint_1(void)
{
asm volatile (" \
@@ -58,7 +58,7 @@ __naked void in_bpf_prog_type_tracepoint_1(void)
SEC("perf_event")
__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_PERF_EVENT")
-__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns")
__naked void bpf_prog_type_perf_event_1(void)
{
asm volatile (" \
@@ -72,7 +72,7 @@ __naked void bpf_prog_type_perf_event_1(void)
SEC("raw_tracepoint")
__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT")
-__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns")
__naked void bpf_prog_type_raw_tracepoint_1(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
index 99e561f18f..80c737b6d3 100644
--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -274,6 +274,58 @@ static __naked void iter_limit_bug_cb(void)
);
}
+int tmp_var;
+SEC("socket")
+__failure __msg("infinite loop detected at insn 2")
+__naked void jgt_imm64_and_may_goto(void)
+{
+ asm volatile (" \
+ r0 = %[tmp_var] ll; \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short -3; /* off -3 */ \
+ .long 0; /* imm */ \
+ if r0 > 10 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" :: __imm_addr(tmp_var)
+ : __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("infinite loop detected at insn 1")
+__naked void may_goto_self(void)
+{
+ asm volatile (" \
+ r0 = *(u32 *)(r10 - 4); \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short -1; /* off -1 */ \
+ .long 0; /* imm */ \
+ if r0 > 10 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void may_goto_neg_off(void)
+{
+ asm volatile (" \
+ r0 = *(u32 *)(r10 - 4); \
+ goto l0_%=; \
+ goto l1_%=; \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short -2; /* off -2 */ \
+ .long 0; /* imm */ \
+ if r0 > 10 goto l0_%=; \
+l1_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
SEC("tc")
__failure
__flag(BPF_F_TEST_STATE_FREQ)
@@ -307,6 +359,100 @@ int iter_limit_bug(struct __sk_buff *skb)
return 0;
}
+SEC("socket")
+__success __retval(0)
+__naked void ja_and_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_common);
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void ja_and_may_goto2(void)
+{
+ asm volatile (" \
+l0_%=: r0 = 0; \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_common);
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void jlt_and_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: call %[bpf_jiffies64]; \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ if r0 < 10 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" :: __imm(bpf_jiffies64)
+ : __clobber_all);
+}
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+SEC("socket")
+__success __retval(0)
+__naked void gotol_and_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: r0 = 0; \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ gotol l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_common);
+}
+#endif
+
+SEC("socket")
+__success __retval(0)
+__naked void ja_and_may_goto_subprog(void)
+{
+ asm volatile (" \
+ call subprog_with_may_goto; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __used
+void subprog_with_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
#define ARR_SZ 1000000
int zero;
char arr[ARR_SZ];
@@ -318,7 +464,7 @@ int cond_break1(const void *ctx)
unsigned long i;
unsigned int sum = 0;
- for (i = zero; i < ARR_SZ; cond_break, i++)
+ for (i = zero; i < ARR_SZ && can_loop; i++)
sum += i;
for (i = zero; i < ARR_SZ; i++) {
barrier_var(i);
@@ -336,12 +482,11 @@ int cond_break2(const void *ctx)
int i, j;
int sum = 0;
- for (i = zero; i < 1000; cond_break, i++)
+ for (i = zero; i < 1000 && can_loop; i++)
for (j = zero; j < 1000; j++) {
sum += i + j;
cond_break;
- }
-
+ }
return sum;
}
@@ -349,7 +494,7 @@ static __noinline int loop(void)
{
int i, sum = 0;
- for (i = zero; i <= 1000000; i++, cond_break)
+ for (i = zero; i <= 1000000 && can_loop; i++)
sum += i;
return sum;
diff --git a/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c b/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c
new file mode 100644
index 0000000000..cb32b0cfc8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "cgrp_kfunc_common.h"
+#include "cpumask_common.h"
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+/***************
+ * Task kfuncs *
+ ***************/
+
+static void task_kfunc_load_test(void)
+{
+ struct task_struct *current, *ref_1, *ref_2;
+
+ current = bpf_get_current_task_btf();
+ ref_1 = bpf_task_from_pid(current->pid);
+ if (!ref_1)
+ return;
+
+ ref_2 = bpf_task_acquire(ref_1);
+ if (ref_2)
+ bpf_task_release(ref_2);
+ bpf_task_release(ref_1);
+}
+
+SEC("raw_tp")
+__failure __msg("calling kernel function")
+int BPF_PROG(task_kfunc_raw_tp)
+{
+ task_kfunc_load_test();
+ return 0;
+}
+
+SEC("syscall")
+__success
+int BPF_PROG(task_kfunc_syscall)
+{
+ task_kfunc_load_test();
+ return 0;
+}
+
+/*****************
+ * cgroup kfuncs *
+ *****************/
+
+static void cgrp_kfunc_load_test(void)
+{
+ struct cgroup *cgrp, *ref;
+
+ cgrp = bpf_cgroup_from_id(0);
+ if (!cgrp)
+ return;
+
+ ref = bpf_cgroup_acquire(cgrp);
+ if (!ref) {
+ bpf_cgroup_release(cgrp);
+ return;
+ }
+
+ bpf_cgroup_release(ref);
+ bpf_cgroup_release(cgrp);
+}
+
+SEC("raw_tp")
+__failure __msg("calling kernel function")
+int BPF_PROG(cgrp_kfunc_raw_tp)
+{
+ cgrp_kfunc_load_test();
+ return 0;
+}
+
+SEC("syscall")
+__success
+int BPF_PROG(cgrp_kfunc_syscall)
+{
+ cgrp_kfunc_load_test();
+ return 0;
+}
+
+/******************
+ * cpumask kfuncs *
+ ******************/
+
+static void cpumask_kfunc_load_test(void)
+{
+ struct bpf_cpumask *alloc, *ref;
+
+ alloc = bpf_cpumask_create();
+ if (!alloc)
+ return;
+
+ ref = bpf_cpumask_acquire(alloc);
+ bpf_cpumask_set_cpu(0, alloc);
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)ref);
+
+ bpf_cpumask_release(ref);
+ bpf_cpumask_release(alloc);
+}
+
+SEC("raw_tp")
+__failure __msg("calling kernel function")
+int BPF_PROG(cpumask_kfunc_raw_tp)
+{
+ cpumask_kfunc_load_test();
+ return 0;
+}
+
+SEC("syscall")
+__success
+int BPF_PROG(cpumask_kfunc_syscall)
+{
+ cpumask_kfunc_load_test();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c
index cbb9d6714f..028ec85558 100644
--- a/tools/testing/selftests/bpf/progs/verifier_movsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
@@ -224,6 +224,69 @@ l0_%=: \
: __clobber_all);
}
+SEC("socket")
+__description("MOV32SX, S8, var_off u32_max")
+__failure __msg("infinite loop detected")
+__failure_unpriv __msg_unpriv("back-edge from insn 2 to 0")
+__naked void mov64sx_s32_varoff_1(void)
+{
+ asm volatile (" \
+l0_%=: \
+ r3 = *(u8 *)(r10 -387); \
+ w7 = (s8)w3; \
+ if w7 >= 0x2533823b goto l0_%=; \
+ w0 = 0; \
+ exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S8, var_off not u32_max, positive after s8 extension")
+__success __retval(0)
+__failure_unpriv __msg_unpriv("frame pointer is read only")
+__naked void mov64sx_s32_varoff_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ r3 &= 0xf; \
+ w7 = (s8)w3; \
+ if w7 s>= 16 goto l0_%=; \
+ w0 = 0; \
+ exit; \
+l0_%=: \
+ r10 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S8, var_off not u32_max, negative after s8 extension")
+__success __retval(0)
+__failure_unpriv __msg_unpriv("frame pointer is read only")
+__naked void mov64sx_s32_varoff_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ r3 &= 0xf; \
+ r3 |= 0x80; \
+ w7 = (s8)w3; \
+ if w7 s>= -5 goto l0_%=; \
+ w0 = 0; \
+ exit; \
+l0_%=: \
+ r10 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
#else
SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c
new file mode 100644
index 0000000000..f37713a265
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("or_jmp32_k: bit ops + branch on unknown value")
+__failure
+__msg("R0 invalid mem access 'scalar'")
+__naked void or_jmp32_k(void)
+{
+ asm volatile (" \
+ r0 = 0xffffffff; \
+ r0 /= 1; \
+ r1 = 0; \
+ w1 = -1; \
+ w1 >>= 1; \
+ w0 &= w1; \
+ w0 |= 2; \
+ if w0 != 0x7ffffffd goto l1; \
+ r0 = 1; \
+ exit; \
+l3: \
+ r0 = 5; \
+ *(u64*)(r0 - 8) = r0; \
+ exit; \
+l2: \
+ w0 -= 0xe; \
+ if w0 == 1 goto l3; \
+ r0 = 4; \
+ exit; \
+l1: \
+ w0 -= 0x7ffffff0; \
+ if w0 s>= 0xe goto l2; \
+ r0 = 3; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_sock_addr.c b/tools/testing/selftests/bpf/progs/verifier_sock_addr.c
new file mode 100644
index 0000000000..9c31448a0f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_sock_addr.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf_sockopt_helpers.h>
+#include "bpf_misc.h"
+
+SEC("cgroup/recvmsg4")
+__success
+int recvmsg4_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/recvmsg4")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int recvmsg4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/recvmsg6")
+__success
+int recvmsg6_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/recvmsg6")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int recvmsg6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/recvmsg_unix")
+__success
+int recvmsg_unix_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/recvmsg_unix")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int recvmsg_unix_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/sendmsg4")
+__success
+int sendmsg4_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/sendmsg4")
+__success
+int sendmsg4_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/sendmsg4")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int sendmsg4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/sendmsg6")
+__success
+int sendmsg6_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/sendmsg6")
+__success
+int sendmsg6_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/sendmsg6")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int sendmsg6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/sendmsg_unix")
+__success
+int sendmsg_unix_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/sendmsg_unix")
+__success
+int sendmsg_unix_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/sendmsg_unix")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int sendmsg_unix_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/getpeername4")
+__success
+int getpeername4_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getpeername4")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getpeername4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/getpeername6")
+__success
+int getpeername6_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getpeername6")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getpeername6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/getpeername_unix")
+__success
+int getpeername_unix_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getpeername_unix")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getpeername_unix_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/getsockname4")
+__success
+int getsockname4_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockname4")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getsockname4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/getsockname6")
+__success
+int getsockname6_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockname6")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getsockname6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/getsockname_unix")
+__success
+int getsockname_unix_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockname_unix")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getsockname_unix_unix_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/bind4")
+__success
+int bind4_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/bind4")
+__success
+int bind4_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/bind4")
+__success
+int bind4_good_return_code_2(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/bind4")
+__success
+int bind4_good_return_code_3(struct bpf_sock_addr *ctx)
+{
+ return 3;
+}
+
+SEC("cgroup/bind4")
+__failure __msg("At program exit the register R0 has smin=4 smax=4 should have been in [0, 3]")
+int bind4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 4;
+}
+
+SEC("cgroup/bind6")
+__success
+int bind6_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/bind6")
+__success
+int bind6_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/bind6")
+__success
+int bind6_good_return_code_2(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/bind6")
+__success
+int bind6_good_return_code_3(struct bpf_sock_addr *ctx)
+{
+ return 3;
+}
+
+SEC("cgroup/bind6")
+__failure __msg("At program exit the register R0 has smin=4 smax=4 should have been in [0, 3]")
+int bind6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 4;
+}
+
+SEC("cgroup/connect4")
+__success
+int connect4_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/connect4")
+__success
+int connect4_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/connect4")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int connect4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/connect6")
+__success
+int connect6_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/connect6")
+__success
+int connect6_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/connect6")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int connect6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/connect_unix")
+__success
+int connect_unix_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/connect_unix")
+__success
+int connect_unix_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/connect_unix")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int connect_unix_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c b/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c
new file mode 100644
index 0000000000..fe4b123187
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "bpf_misc.h"
+
+#define __always_unused __attribute__((unused))
+
+char _license[] SEC("license") = "GPL";
+
+struct sock {
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__sockmap {
+ union {
+ struct sock *sk;
+ };
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} sockhash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} sockmap SEC(".maps");
+
+enum { CG_OK = 1 };
+
+int zero = 0;
+
+static __always_inline void test_sockmap_delete(void)
+{
+ bpf_map_delete_elem(&sockmap, &zero);
+ bpf_map_delete_elem(&sockhash, &zero);
+}
+
+static __always_inline void test_sockmap_update(void *sk)
+{
+ if (sk) {
+ bpf_map_update_elem(&sockmap, &zero, sk, BPF_ANY);
+ bpf_map_update_elem(&sockhash, &zero, sk, BPF_ANY);
+ }
+}
+
+static __always_inline void test_sockmap_lookup_and_update(void)
+{
+ struct bpf_sock *sk = bpf_map_lookup_elem(&sockmap, &zero);
+
+ if (sk) {
+ test_sockmap_update(sk);
+ bpf_sk_release(sk);
+ }
+}
+
+static __always_inline void test_sockmap_mutate(void *sk)
+{
+ test_sockmap_delete();
+ test_sockmap_update(sk);
+}
+
+static __always_inline void test_sockmap_lookup_and_mutate(void)
+{
+ test_sockmap_delete();
+ test_sockmap_lookup_and_update();
+}
+
+SEC("action")
+__success
+int test_sched_act(struct __sk_buff *skb)
+{
+ test_sockmap_mutate(skb->sk);
+ return 0;
+}
+
+SEC("classifier")
+__success
+int test_sched_cls(struct __sk_buff *skb)
+{
+ test_sockmap_mutate(skb->sk);
+ return 0;
+}
+
+SEC("flow_dissector")
+__success
+int test_flow_dissector_delete(struct __sk_buff *skb __always_unused)
+{
+ test_sockmap_delete();
+ return 0;
+}
+
+SEC("flow_dissector")
+__failure __msg("program of this type cannot use helper bpf_sk_release")
+int test_flow_dissector_update(struct __sk_buff *skb __always_unused)
+{
+ test_sockmap_lookup_and_update(); /* no access to skb->sk */
+ return 0;
+}
+
+SEC("iter/sockmap")
+__success
+int test_trace_iter(struct bpf_iter__sockmap *ctx)
+{
+ test_sockmap_mutate(ctx->sk);
+ return 0;
+}
+
+SEC("raw_tp/kfree")
+__failure __msg("cannot update sockmap in this context")
+int test_raw_tp_delete(const void *ctx __always_unused)
+{
+ test_sockmap_delete();
+ return 0;
+}
+
+SEC("raw_tp/kfree")
+__failure __msg("cannot update sockmap in this context")
+int test_raw_tp_update(const void *ctx __always_unused)
+{
+ test_sockmap_lookup_and_update();
+ return 0;
+}
+
+SEC("sk_lookup")
+__success
+int test_sk_lookup(struct bpf_sk_lookup *ctx)
+{
+ test_sockmap_mutate(ctx->sk);
+ return 0;
+}
+
+SEC("sk_reuseport")
+__success
+int test_sk_reuseport(struct sk_reuseport_md *ctx)
+{
+ test_sockmap_mutate(ctx->sk);
+ return 0;
+}
+
+SEC("socket")
+__success
+int test_socket_filter(struct __sk_buff *skb)
+{
+ test_sockmap_mutate(skb->sk);
+ return 0;
+}
+
+SEC("sockops")
+__success
+int test_sockops_delete(struct bpf_sock_ops *ctx __always_unused)
+{
+ test_sockmap_delete();
+ return CG_OK;
+}
+
+SEC("sockops")
+__failure __msg("cannot update sockmap in this context")
+int test_sockops_update(struct bpf_sock_ops *ctx)
+{
+ test_sockmap_update(ctx->sk);
+ return CG_OK;
+}
+
+SEC("sockops")
+__success
+int test_sockops_update_dedicated(struct bpf_sock_ops *ctx)
+{
+ bpf_sock_map_update(ctx, &sockmap, &zero, BPF_ANY);
+ bpf_sock_hash_update(ctx, &sockhash, &zero, BPF_ANY);
+ return CG_OK;
+}
+
+SEC("xdp")
+__success
+int test_xdp(struct xdp_md *ctx __always_unused)
+{
+ test_sockmap_lookup_and_mutate();
+ return XDP_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index 6f5d19665c..4a58e0398e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -6,6 +6,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#include <../../../tools/include/linux/filter.h>
#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
@@ -76,6 +77,94 @@ __naked int subprog_result_precise(void)
);
}
+__naked __noinline __used
+static unsigned long fp_leaking_subprog()
+{
+ asm volatile (
+ ".8byte %[r0_eq_r10_cast_s8];"
+ "exit;"
+ :: __imm_insn(r0_eq_r10_cast_s8, BPF_MOVSX64_REG(BPF_REG_0, BPF_REG_10, 8))
+ );
+}
+
+__naked __noinline __used
+static unsigned long sneaky_fp_leaking_subprog()
+{
+ asm volatile (
+ "r1 = r10;"
+ ".8byte %[r0_eq_r1_cast_s8];"
+ "exit;"
+ :: __imm_insn(r0_eq_r1_cast_s8, BPF_MOVSX64_REG(BPF_REG_0, BPF_REG_1, 8))
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("6: (0f) r1 += r0")
+__msg("mark_precise: frame0: last_idx 6 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3")
+__msg("mark_precise: frame0: regs=r0 stack= before 10: (95) exit")
+__msg("mark_precise: frame1: regs=r0 stack= before 9: (bf) r0 = (s8)r10")
+__msg("7: R0_w=scalar")
+__naked int fp_precise_subprog_result(void)
+{
+ asm volatile (
+ "call fp_leaking_subprog;"
+ /* use subprog's returned value (which is derived from r10=fp
+ * register), as index into vals array, forcing all of that to
+ * be known precisely
+ */
+ "r0 &= 3;"
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ /* force precision marking */
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("6: (0f) r1 += r0")
+__msg("mark_precise: frame0: last_idx 6 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3")
+__msg("mark_precise: frame0: regs=r0 stack= before 11: (95) exit")
+__msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = (s8)r1")
+/* here r1 is marked precise, even though it's fp register, but that's fine
+ * because by the time we get out of subprogram it has to be derived from r10
+ * anyways, at which point we'll break precision chain
+ */
+__msg("mark_precise: frame1: regs=r1 stack= before 9: (bf) r1 = r10")
+__msg("7: R0_w=scalar")
+__naked int sneaky_fp_precise_subprog_result(void)
+{
+ asm volatile (
+ "call sneaky_fp_leaking_subprog;"
+ /* use subprog's returned value (which is derived from r10=fp
+ * register), as index into vals array, forcing all of that to
+ * be known precisely
+ */
+ "r0 &= 3;"
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ /* force precision marking */
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common
+ );
+}
+
SEC("?raw_tp")
__success __log_level(2)
__msg("9: (0f) r1 += r0")
diff --git a/tools/testing/selftests/bpf/progs/wq.c b/tools/testing/selftests/bpf/progs/wq.c
new file mode 100644
index 0000000000..49e712acbf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/wq.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Benjamin Tissoires
+ */
+
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct hmap_elem {
+ int counter;
+ struct bpf_timer timer; /* unused */
+ struct bpf_spin_lock lock; /* unused */
+ struct bpf_wq work;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap_malloc SEC(".maps");
+
+struct elem {
+ struct bpf_wq w;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 4);
+ __type(key, int);
+ __type(value, struct elem);
+} lru SEC(".maps");
+
+__u32 ok;
+__u32 ok_sleepable;
+
+static int test_elem_callback(void *map, int *key,
+ int (callback_fn)(void *map, int *key, struct bpf_wq *wq))
+{
+ struct elem init = {}, *val;
+ struct bpf_wq *wq;
+
+ if ((ok & (1 << *key) ||
+ (ok_sleepable & (1 << *key))))
+ return -22;
+
+ if (map == &lru &&
+ bpf_map_update_elem(map, key, &init, 0))
+ return -1;
+
+ val = bpf_map_lookup_elem(map, key);
+ if (!val)
+ return -2;
+
+ wq = &val->w;
+ if (bpf_wq_init(wq, map, 0) != 0)
+ return -3;
+
+ if (bpf_wq_set_callback(wq, callback_fn, 0))
+ return -4;
+
+ if (bpf_wq_start(wq, 0))
+ return -5;
+
+ return 0;
+}
+
+static int test_hmap_elem_callback(void *map, int *key,
+ int (callback_fn)(void *map, int *key, struct bpf_wq *wq))
+{
+ struct hmap_elem init = {}, *val;
+ struct bpf_wq *wq;
+
+ if ((ok & (1 << *key) ||
+ (ok_sleepable & (1 << *key))))
+ return -22;
+
+ if (bpf_map_update_elem(map, key, &init, 0))
+ return -1;
+
+ val = bpf_map_lookup_elem(map, key);
+ if (!val)
+ return -2;
+
+ wq = &val->work;
+ if (bpf_wq_init(wq, map, 0) != 0)
+ return -3;
+
+ if (bpf_wq_set_callback(wq, callback_fn, 0))
+ return -4;
+
+ if (bpf_wq_start(wq, 0))
+ return -5;
+
+ return 0;
+}
+
+/* callback for non sleepable workqueue */
+static int wq_callback(void *map, int *key, struct bpf_wq *work)
+{
+ bpf_kfunc_common_test();
+ ok |= (1 << *key);
+ return 0;
+}
+
+/* callback for sleepable workqueue */
+static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work)
+{
+ bpf_kfunc_call_test_sleepable();
+ ok_sleepable |= (1 << *key);
+ return 0;
+}
+
+SEC("tc")
+/* test that workqueues can be used from an array */
+__retval(0)
+long test_call_array_sleepable(void *ctx)
+{
+ int key = 0;
+
+ return test_elem_callback(&array, &key, wq_cb_sleepable);
+}
+
+SEC("syscall")
+/* Same test than above but from a sleepable context. */
+__retval(0)
+long test_syscall_array_sleepable(void *ctx)
+{
+ int key = 1;
+
+ return test_elem_callback(&array, &key, wq_cb_sleepable);
+}
+
+SEC("tc")
+/* test that workqueues can be used from a hashmap */
+__retval(0)
+long test_call_hash_sleepable(void *ctx)
+{
+ int key = 2;
+
+ return test_hmap_elem_callback(&hmap, &key, wq_callback);
+}
+
+SEC("tc")
+/* test that workqueues can be used from a hashmap with NO_PREALLOC. */
+__retval(0)
+long test_call_hash_malloc_sleepable(void *ctx)
+{
+ int key = 3;
+
+ return test_hmap_elem_callback(&hmap_malloc, &key, wq_callback);
+}
+
+SEC("tc")
+/* test that workqueues can be used from a LRU map */
+__retval(0)
+long test_call_lru_sleepable(void *ctx)
+{
+ int key = 4;
+
+ return test_elem_callback(&lru, &key, wq_callback);
+}
diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c
new file mode 100644
index 0000000000..4cbdb425f2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/wq_failures.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Benjamin Tissoires
+ */
+
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_wq w;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 4);
+ __type(key, int);
+ __type(value, struct elem);
+} lru SEC(".maps");
+
+/* callback for non sleepable workqueue */
+static int wq_callback(void *map, int *key, struct bpf_wq *work)
+{
+ bpf_kfunc_common_test();
+ return 0;
+}
+
+/* callback for sleepable workqueue */
+static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work)
+{
+ bpf_kfunc_call_test_sleepable();
+ return 0;
+}
+
+SEC("tc")
+/* test that bpf_wq_init takes a map as a second argument
+ */
+__log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure
+__msg(": (85) call bpf_wq_init#") /* anchor message */
+__msg("pointer in R2 isn't map pointer")
+long test_wq_init_nomap(void *ctx)
+{
+ struct bpf_wq *wq;
+ struct elem *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&array, &key);
+ if (!val)
+ return -1;
+
+ wq = &val->w;
+ if (bpf_wq_init(wq, &key, 0) != 0)
+ return -3;
+
+ return 0;
+}
+
+SEC("tc")
+/* test that the workqueue is part of the map in bpf_wq_init
+ */
+__log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure
+__msg(": (85) call bpf_wq_init#") /* anchor message */
+__msg("workqueue pointer in R1 map_uid=0 doesn't match map pointer in R2 map_uid=0")
+long test_wq_init_wrong_map(void *ctx)
+{
+ struct bpf_wq *wq;
+ struct elem *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&array, &key);
+ if (!val)
+ return -1;
+
+ wq = &val->w;
+ if (bpf_wq_init(wq, &lru, 0) != 0)
+ return -3;
+
+ return 0;
+}
+
+SEC("?tc")
+__log_level(2)
+__failure
+/* check that the first argument of bpf_wq_set_callback()
+ * is a correct bpf_wq pointer.
+ */
+__msg(": (85) call bpf_wq_set_callback_impl#") /* anchor message */
+__msg("arg#0 doesn't point to a map value")
+long test_wrong_wq_pointer(void *ctx)
+{
+ int key = 0;
+ struct bpf_wq *wq;
+
+ wq = bpf_map_lookup_elem(&array, &key);
+ if (!wq)
+ return 1;
+
+ if (bpf_wq_init(wq, &array, 0))
+ return 2;
+
+ if (bpf_wq_set_callback((void *)&wq, wq_callback, 0))
+ return 3;
+
+ return -22;
+}
+
+SEC("?tc")
+__log_level(2)
+__failure
+/* check that the first argument of bpf_wq_set_callback()
+ * is a correct bpf_wq pointer.
+ */
+__msg(": (85) call bpf_wq_set_callback_impl#") /* anchor message */
+__msg("off 1 doesn't point to 'struct bpf_wq' that is at 0")
+long test_wrong_wq_pointer_offset(void *ctx)
+{
+ int key = 0;
+ struct bpf_wq *wq;
+
+ wq = bpf_map_lookup_elem(&array, &key);
+ if (!wq)
+ return 1;
+
+ if (bpf_wq_init(wq, &array, 0))
+ return 2;
+
+ if (bpf_wq_set_callback((void *)wq + 1, wq_cb_sleepable, 0))
+ return 3;
+
+ return -22;
+}
diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp
index f4936834f7..dde0bb16e7 100644
--- a/tools/testing/selftests/bpf/test_cpp.cpp
+++ b/tools/testing/selftests/bpf/test_cpp.cpp
@@ -7,6 +7,7 @@
#include <bpf/bpf.h>
#include <bpf/btf.h>
#include "test_core_extern.skel.h"
+#include "struct_ops_module.skel.h"
template <typename T>
class Skeleton {
@@ -98,6 +99,7 @@ int main(int argc, char *argv[])
{
struct btf_dump_opts opts = { };
struct test_core_extern *skel;
+ struct struct_ops_module *skel2;
struct btf *btf;
int fd;
@@ -118,6 +120,9 @@ int main(int argc, char *argv[])
skel = test_core_extern__open_and_load();
test_core_extern__destroy(skel);
+ skel2 = struct_ops_module__open_and_load();
+ struct_ops_module__destroy(skel2);
+
fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
if (fd < 0)
std::cout << "FAILED to enable stats: " << fd << std::endl;
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
deleted file mode 100644
index 80c42583f5..0000000000
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ /dev/null
@@ -1,1434 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include <arpa/inet.h>
-#include <netinet/in.h>
-#include <sys/types.h>
-#include <sys/select.h>
-#include <sys/socket.h>
-
-#include <linux/filter.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "testing_helpers.h"
-#include "bpf_util.h"
-
-#ifndef ENOTSUPP
-# define ENOTSUPP 524
-#endif
-
-#define CG_PATH "/foo"
-#define CONNECT4_PROG_PATH "./connect4_prog.bpf.o"
-#define CONNECT6_PROG_PATH "./connect6_prog.bpf.o"
-#define SENDMSG4_PROG_PATH "./sendmsg4_prog.bpf.o"
-#define SENDMSG6_PROG_PATH "./sendmsg6_prog.bpf.o"
-#define RECVMSG4_PROG_PATH "./recvmsg4_prog.bpf.o"
-#define RECVMSG6_PROG_PATH "./recvmsg6_prog.bpf.o"
-#define BIND4_PROG_PATH "./bind4_prog.bpf.o"
-#define BIND6_PROG_PATH "./bind6_prog.bpf.o"
-
-#define SERV4_IP "192.168.1.254"
-#define SERV4_REWRITE_IP "127.0.0.1"
-#define SRC4_IP "172.16.0.1"
-#define SRC4_REWRITE_IP "127.0.0.4"
-#define SERV4_PORT 4040
-#define SERV4_REWRITE_PORT 4444
-
-#define SERV6_IP "face:b00c:1234:5678::abcd"
-#define SERV6_REWRITE_IP "::1"
-#define SERV6_V4MAPPED_IP "::ffff:192.168.0.4"
-#define SRC6_IP "::1"
-#define SRC6_REWRITE_IP "::6"
-#define WILDCARD6_IP "::"
-#define SERV6_PORT 6060
-#define SERV6_REWRITE_PORT 6666
-
-#define INET_NTOP_BUF 40
-
-struct sock_addr_test;
-
-typedef int (*load_fn)(const struct sock_addr_test *test);
-typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
-
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-
-struct sock_addr_test {
- const char *descr;
- /* BPF prog properties */
- load_fn loadfn;
- enum bpf_attach_type expected_attach_type;
- enum bpf_attach_type attach_type;
- /* Socket properties */
- int domain;
- int type;
- /* IP:port pairs for BPF prog to override */
- const char *requested_ip;
- unsigned short requested_port;
- const char *expected_ip;
- unsigned short expected_port;
- const char *expected_src_ip;
- /* Expected test result */
- enum {
- LOAD_REJECT,
- ATTACH_REJECT,
- ATTACH_OKAY,
- SYSCALL_EPERM,
- SYSCALL_ENOTSUPP,
- SUCCESS,
- } expected_result;
-};
-
-static int bind4_prog_load(const struct sock_addr_test *test);
-static int bind6_prog_load(const struct sock_addr_test *test);
-static int connect4_prog_load(const struct sock_addr_test *test);
-static int connect6_prog_load(const struct sock_addr_test *test);
-static int sendmsg_allow_prog_load(const struct sock_addr_test *test);
-static int sendmsg_deny_prog_load(const struct sock_addr_test *test);
-static int recvmsg_allow_prog_load(const struct sock_addr_test *test);
-static int recvmsg_deny_prog_load(const struct sock_addr_test *test);
-static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test);
-static int recvmsg4_rw_c_prog_load(const struct sock_addr_test *test);
-static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test);
-static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test);
-static int recvmsg6_rw_c_prog_load(const struct sock_addr_test *test);
-static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test);
-static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test);
-static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test);
-
-static struct sock_addr_test tests[] = {
- /* bind */
- {
- "bind4: load prog with wrong expected attach type",
- bind4_prog_load,
- BPF_CGROUP_INET6_BIND,
- BPF_CGROUP_INET4_BIND,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "bind4: attach prog with wrong attach type",
- bind4_prog_load,
- BPF_CGROUP_INET4_BIND,
- BPF_CGROUP_INET6_BIND,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "bind4: rewrite IP & TCP port in",
- bind4_prog_load,
- BPF_CGROUP_INET4_BIND,
- BPF_CGROUP_INET4_BIND,
- AF_INET,
- SOCK_STREAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- NULL,
- SUCCESS,
- },
- {
- "bind4: rewrite IP & UDP port in",
- bind4_prog_load,
- BPF_CGROUP_INET4_BIND,
- BPF_CGROUP_INET4_BIND,
- AF_INET,
- SOCK_DGRAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- NULL,
- SUCCESS,
- },
- {
- "bind6: load prog with wrong expected attach type",
- bind6_prog_load,
- BPF_CGROUP_INET4_BIND,
- BPF_CGROUP_INET6_BIND,
- AF_INET6,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "bind6: attach prog with wrong attach type",
- bind6_prog_load,
- BPF_CGROUP_INET6_BIND,
- BPF_CGROUP_INET4_BIND,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "bind6: rewrite IP & TCP port in",
- bind6_prog_load,
- BPF_CGROUP_INET6_BIND,
- BPF_CGROUP_INET6_BIND,
- AF_INET6,
- SOCK_STREAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- NULL,
- SUCCESS,
- },
- {
- "bind6: rewrite IP & UDP port in",
- bind6_prog_load,
- BPF_CGROUP_INET6_BIND,
- BPF_CGROUP_INET6_BIND,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- NULL,
- SUCCESS,
- },
-
- /* connect */
- {
- "connect4: load prog with wrong expected attach type",
- connect4_prog_load,
- BPF_CGROUP_INET6_CONNECT,
- BPF_CGROUP_INET4_CONNECT,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "connect4: attach prog with wrong attach type",
- connect4_prog_load,
- BPF_CGROUP_INET4_CONNECT,
- BPF_CGROUP_INET6_CONNECT,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "connect4: rewrite IP & TCP port",
- connect4_prog_load,
- BPF_CGROUP_INET4_CONNECT,
- BPF_CGROUP_INET4_CONNECT,
- AF_INET,
- SOCK_STREAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SRC4_REWRITE_IP,
- SUCCESS,
- },
- {
- "connect4: rewrite IP & UDP port",
- connect4_prog_load,
- BPF_CGROUP_INET4_CONNECT,
- BPF_CGROUP_INET4_CONNECT,
- AF_INET,
- SOCK_DGRAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SRC4_REWRITE_IP,
- SUCCESS,
- },
- {
- "connect6: load prog with wrong expected attach type",
- connect6_prog_load,
- BPF_CGROUP_INET4_CONNECT,
- BPF_CGROUP_INET6_CONNECT,
- AF_INET6,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "connect6: attach prog with wrong attach type",
- connect6_prog_load,
- BPF_CGROUP_INET6_CONNECT,
- BPF_CGROUP_INET4_CONNECT,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "connect6: rewrite IP & TCP port",
- connect6_prog_load,
- BPF_CGROUP_INET6_CONNECT,
- BPF_CGROUP_INET6_CONNECT,
- AF_INET6,
- SOCK_STREAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SUCCESS,
- },
- {
- "connect6: rewrite IP & UDP port",
- connect6_prog_load,
- BPF_CGROUP_INET6_CONNECT,
- BPF_CGROUP_INET6_CONNECT,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SUCCESS,
- },
-
- /* sendmsg */
- {
- "sendmsg4: load prog with wrong expected attach type",
- sendmsg4_rw_asm_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP4_SENDMSG,
- AF_INET,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "sendmsg4: attach prog with wrong attach type",
- sendmsg4_rw_asm_prog_load,
- BPF_CGROUP_UDP4_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "sendmsg4: rewrite IP & port (asm)",
- sendmsg4_rw_asm_prog_load,
- BPF_CGROUP_UDP4_SENDMSG,
- BPF_CGROUP_UDP4_SENDMSG,
- AF_INET,
- SOCK_DGRAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SRC4_REWRITE_IP,
- SUCCESS,
- },
- {
- "sendmsg4: rewrite IP & port (C)",
- sendmsg4_rw_c_prog_load,
- BPF_CGROUP_UDP4_SENDMSG,
- BPF_CGROUP_UDP4_SENDMSG,
- AF_INET,
- SOCK_DGRAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SRC4_REWRITE_IP,
- SUCCESS,
- },
- {
- "sendmsg4: deny call",
- sendmsg_deny_prog_load,
- BPF_CGROUP_UDP4_SENDMSG,
- BPF_CGROUP_UDP4_SENDMSG,
- AF_INET,
- SOCK_DGRAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SRC4_REWRITE_IP,
- SYSCALL_EPERM,
- },
- {
- "sendmsg6: load prog with wrong expected attach type",
- sendmsg6_rw_asm_prog_load,
- BPF_CGROUP_UDP4_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "sendmsg6: attach prog with wrong attach type",
- sendmsg6_rw_asm_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP4_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "sendmsg6: rewrite IP & port (asm)",
- sendmsg6_rw_asm_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SUCCESS,
- },
- {
- "sendmsg6: rewrite IP & port (C)",
- sendmsg6_rw_c_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SUCCESS,
- },
- {
- "sendmsg6: IPv4-mapped IPv6",
- sendmsg6_rw_v4mapped_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SYSCALL_ENOTSUPP,
- },
- {
- "sendmsg6: set dst IP = [::] (BSD'ism)",
- sendmsg6_rw_wildcard_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SUCCESS,
- },
- {
- "sendmsg6: preserve dst IP = [::] (BSD'ism)",
- sendmsg_allow_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- WILDCARD6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_PORT,
- SRC6_IP,
- SUCCESS,
- },
- {
- "sendmsg6: deny call",
- sendmsg_deny_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SYSCALL_EPERM,
- },
-
- /* recvmsg */
- {
- "recvmsg4: return code ok",
- recvmsg_allow_prog_load,
- BPF_CGROUP_UDP4_RECVMSG,
- BPF_CGROUP_UDP4_RECVMSG,
- AF_INET,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_OKAY,
- },
- {
- "recvmsg4: return code !ok",
- recvmsg_deny_prog_load,
- BPF_CGROUP_UDP4_RECVMSG,
- BPF_CGROUP_UDP4_RECVMSG,
- AF_INET,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "recvmsg6: return code ok",
- recvmsg_allow_prog_load,
- BPF_CGROUP_UDP6_RECVMSG,
- BPF_CGROUP_UDP6_RECVMSG,
- AF_INET6,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_OKAY,
- },
- {
- "recvmsg6: return code !ok",
- recvmsg_deny_prog_load,
- BPF_CGROUP_UDP6_RECVMSG,
- BPF_CGROUP_UDP6_RECVMSG,
- AF_INET6,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "recvmsg4: rewrite IP & port (C)",
- recvmsg4_rw_c_prog_load,
- BPF_CGROUP_UDP4_RECVMSG,
- BPF_CGROUP_UDP4_RECVMSG,
- AF_INET,
- SOCK_DGRAM,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SERV4_IP,
- SUCCESS,
- },
- {
- "recvmsg6: rewrite IP & port (C)",
- recvmsg6_rw_c_prog_load,
- BPF_CGROUP_UDP6_RECVMSG,
- BPF_CGROUP_UDP6_RECVMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SERV6_IP,
- SUCCESS,
- },
-};
-
-static int mk_sockaddr(int domain, const char *ip, unsigned short port,
- struct sockaddr *addr, socklen_t addr_len)
-{
- struct sockaddr_in6 *addr6;
- struct sockaddr_in *addr4;
-
- if (domain != AF_INET && domain != AF_INET6) {
- log_err("Unsupported address family");
- return -1;
- }
-
- memset(addr, 0, addr_len);
-
- if (domain == AF_INET) {
- if (addr_len < sizeof(struct sockaddr_in))
- return -1;
- addr4 = (struct sockaddr_in *)addr;
- addr4->sin_family = domain;
- addr4->sin_port = htons(port);
- if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1) {
- log_err("Invalid IPv4: %s", ip);
- return -1;
- }
- } else if (domain == AF_INET6) {
- if (addr_len < sizeof(struct sockaddr_in6))
- return -1;
- addr6 = (struct sockaddr_in6 *)addr;
- addr6->sin6_family = domain;
- addr6->sin6_port = htons(port);
- if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1) {
- log_err("Invalid IPv6: %s", ip);
- return -1;
- }
- }
-
- return 0;
-}
-
-static int load_insns(const struct sock_addr_test *test,
- const struct bpf_insn *insns, size_t insns_cnt)
-{
- LIBBPF_OPTS(bpf_prog_load_opts, opts);
- int ret;
-
- opts.expected_attach_type = test->expected_attach_type;
- opts.log_buf = bpf_log_buf;
- opts.log_size = BPF_LOG_BUF_SIZE;
-
- ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, NULL, "GPL", insns, insns_cnt, &opts);
- if (ret < 0 && test->expected_result != LOAD_REJECT) {
- log_err(">>> Loading program error.\n"
- ">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
- }
-
- return ret;
-}
-
-static int load_path(const struct sock_addr_test *test, const char *path)
-{
- struct bpf_object *obj;
- struct bpf_program *prog;
- int err;
-
- obj = bpf_object__open_file(path, NULL);
- err = libbpf_get_error(obj);
- if (err) {
- log_err(">>> Opening BPF object (%s) error.\n", path);
- return -1;
- }
-
- prog = bpf_object__next_program(obj, NULL);
- if (!prog)
- goto err_out;
-
- bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR);
- bpf_program__set_expected_attach_type(prog, test->expected_attach_type);
- bpf_program__set_flags(prog, testing_prog_flags());
-
- err = bpf_object__load(obj);
- if (err) {
- if (test->expected_result != LOAD_REJECT)
- log_err(">>> Loading program (%s) error.\n", path);
- goto err_out;
- }
-
- return bpf_program__fd(prog);
-err_out:
- bpf_object__close(obj);
- return -1;
-}
-
-static int bind4_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, BIND4_PROG_PATH);
-}
-
-static int bind6_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, BIND6_PROG_PATH);
-}
-
-static int connect4_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, CONNECT4_PROG_PATH);
-}
-
-static int connect6_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, CONNECT6_PROG_PATH);
-}
-
-static int xmsg_ret_only_prog_load(const struct sock_addr_test *test,
- int32_t rc)
-{
- struct bpf_insn insns[] = {
- /* return rc */
- BPF_MOV64_IMM(BPF_REG_0, rc),
- BPF_EXIT_INSN(),
- };
- return load_insns(test, insns, ARRAY_SIZE(insns));
-}
-
-static int sendmsg_allow_prog_load(const struct sock_addr_test *test)
-{
- return xmsg_ret_only_prog_load(test, /*rc*/ 1);
-}
-
-static int sendmsg_deny_prog_load(const struct sock_addr_test *test)
-{
- return xmsg_ret_only_prog_load(test, /*rc*/ 0);
-}
-
-static int recvmsg_allow_prog_load(const struct sock_addr_test *test)
-{
- return xmsg_ret_only_prog_load(test, /*rc*/ 1);
-}
-
-static int recvmsg_deny_prog_load(const struct sock_addr_test *test)
-{
- return xmsg_ret_only_prog_load(test, /*rc*/ 0);
-}
-
-static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test)
-{
- struct sockaddr_in dst4_rw_addr;
- struct in_addr src4_rw_ip;
-
- if (inet_pton(AF_INET, SRC4_REWRITE_IP, (void *)&src4_rw_ip) != 1) {
- log_err("Invalid IPv4: %s", SRC4_REWRITE_IP);
- return -1;
- }
-
- if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
- (struct sockaddr *)&dst4_rw_addr,
- sizeof(dst4_rw_addr)) == -1)
- return -1;
-
- struct bpf_insn insns[] = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
- /* if (sk.family == AF_INET && */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, family)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 8),
-
- /* sk.type == SOCK_DGRAM) { */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, type)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 6),
-
- /* msg_src_ip4 = src4_rw_ip */
- BPF_MOV32_IMM(BPF_REG_7, src4_rw_ip.s_addr),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, msg_src_ip4)),
-
- /* user_ip4 = dst4_rw_addr.sin_addr */
- BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_addr.s_addr),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, user_ip4)),
-
- /* user_port = dst4_rw_addr.sin_port */
- BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_port),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, user_port)),
- /* } */
-
- /* return 1 */
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- };
-
- return load_insns(test, insns, ARRAY_SIZE(insns));
-}
-
-static int recvmsg4_rw_c_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, RECVMSG4_PROG_PATH);
-}
-
-static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, SENDMSG4_PROG_PATH);
-}
-
-static int sendmsg6_rw_dst_asm_prog_load(const struct sock_addr_test *test,
- const char *rw_dst_ip)
-{
- struct sockaddr_in6 dst6_rw_addr;
- struct in6_addr src6_rw_ip;
-
- if (inet_pton(AF_INET6, SRC6_REWRITE_IP, (void *)&src6_rw_ip) != 1) {
- log_err("Invalid IPv6: %s", SRC6_REWRITE_IP);
- return -1;
- }
-
- if (mk_sockaddr(AF_INET6, rw_dst_ip, SERV6_REWRITE_PORT,
- (struct sockaddr *)&dst6_rw_addr,
- sizeof(dst6_rw_addr)) == -1)
- return -1;
-
- struct bpf_insn insns[] = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
- /* if (sk.family == AF_INET6) { */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, family)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
-
-#define STORE_IPV6_WORD_N(DST, SRC, N) \
- BPF_MOV32_IMM(BPF_REG_7, SRC[N]), \
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \
- offsetof(struct bpf_sock_addr, DST[N]))
-
-#define STORE_IPV6(DST, SRC) \
- STORE_IPV6_WORD_N(DST, SRC, 0), \
- STORE_IPV6_WORD_N(DST, SRC, 1), \
- STORE_IPV6_WORD_N(DST, SRC, 2), \
- STORE_IPV6_WORD_N(DST, SRC, 3)
-
- STORE_IPV6(msg_src_ip6, src6_rw_ip.s6_addr32),
- STORE_IPV6(user_ip6, dst6_rw_addr.sin6_addr.s6_addr32),
-
- /* user_port = dst6_rw_addr.sin6_port */
- BPF_MOV32_IMM(BPF_REG_7, dst6_rw_addr.sin6_port),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, user_port)),
-
- /* } */
-
- /* return 1 */
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- };
-
- return load_insns(test, insns, ARRAY_SIZE(insns));
-}
-
-static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test)
-{
- return sendmsg6_rw_dst_asm_prog_load(test, SERV6_REWRITE_IP);
-}
-
-static int recvmsg6_rw_c_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, RECVMSG6_PROG_PATH);
-}
-
-static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test)
-{
- return sendmsg6_rw_dst_asm_prog_load(test, SERV6_V4MAPPED_IP);
-}
-
-static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test)
-{
- return sendmsg6_rw_dst_asm_prog_load(test, WILDCARD6_IP);
-}
-
-static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, SENDMSG6_PROG_PATH);
-}
-
-static int cmp_addr(const struct sockaddr_storage *addr1,
- const struct sockaddr_storage *addr2, int cmp_port)
-{
- const struct sockaddr_in *four1, *four2;
- const struct sockaddr_in6 *six1, *six2;
-
- if (addr1->ss_family != addr2->ss_family)
- return -1;
-
- if (addr1->ss_family == AF_INET) {
- four1 = (const struct sockaddr_in *)addr1;
- four2 = (const struct sockaddr_in *)addr2;
- return !((four1->sin_port == four2->sin_port || !cmp_port) &&
- four1->sin_addr.s_addr == four2->sin_addr.s_addr);
- } else if (addr1->ss_family == AF_INET6) {
- six1 = (const struct sockaddr_in6 *)addr1;
- six2 = (const struct sockaddr_in6 *)addr2;
- return !((six1->sin6_port == six2->sin6_port || !cmp_port) &&
- !memcmp(&six1->sin6_addr, &six2->sin6_addr,
- sizeof(struct in6_addr)));
- }
-
- return -1;
-}
-
-static int cmp_sock_addr(info_fn fn, int sock1,
- const struct sockaddr_storage *addr2, int cmp_port)
-{
- struct sockaddr_storage addr1;
- socklen_t len1 = sizeof(addr1);
-
- memset(&addr1, 0, len1);
- if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0)
- return -1;
-
- return cmp_addr(&addr1, addr2, cmp_port);
-}
-
-static int cmp_local_ip(int sock1, const struct sockaddr_storage *addr2)
-{
- return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 0);
-}
-
-static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2)
-{
- return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 1);
-}
-
-static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2)
-{
- return cmp_sock_addr(getpeername, sock1, addr2, /*cmp_port*/ 1);
-}
-
-static int start_server(int type, const struct sockaddr_storage *addr,
- socklen_t addr_len)
-{
- int fd;
-
- fd = socket(addr->ss_family, type, 0);
- if (fd == -1) {
- log_err("Failed to create server socket");
- goto out;
- }
-
- if (bind(fd, (const struct sockaddr *)addr, addr_len) == -1) {
- log_err("Failed to bind server socket");
- goto close_out;
- }
-
- if (type == SOCK_STREAM) {
- if (listen(fd, 128) == -1) {
- log_err("Failed to listen on server socket");
- goto close_out;
- }
- }
-
- goto out;
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-static int connect_to_server(int type, const struct sockaddr_storage *addr,
- socklen_t addr_len)
-{
- int domain;
- int fd = -1;
-
- domain = addr->ss_family;
-
- if (domain != AF_INET && domain != AF_INET6) {
- log_err("Unsupported address family");
- goto err;
- }
-
- fd = socket(domain, type, 0);
- if (fd == -1) {
- log_err("Failed to create client socket");
- goto err;
- }
-
- if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) {
- log_err("Fail to connect to server");
- goto err;
- }
-
- goto out;
-err:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-int init_pktinfo(int domain, struct cmsghdr *cmsg)
-{
- struct in6_pktinfo *pktinfo6;
- struct in_pktinfo *pktinfo4;
-
- if (domain == AF_INET) {
- cmsg->cmsg_level = SOL_IP;
- cmsg->cmsg_type = IP_PKTINFO;
- cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
- pktinfo4 = (struct in_pktinfo *)CMSG_DATA(cmsg);
- memset(pktinfo4, 0, sizeof(struct in_pktinfo));
- if (inet_pton(domain, SRC4_IP,
- (void *)&pktinfo4->ipi_spec_dst) != 1)
- return -1;
- } else if (domain == AF_INET6) {
- cmsg->cmsg_level = SOL_IPV6;
- cmsg->cmsg_type = IPV6_PKTINFO;
- cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
- pktinfo6 = (struct in6_pktinfo *)CMSG_DATA(cmsg);
- memset(pktinfo6, 0, sizeof(struct in6_pktinfo));
- if (inet_pton(domain, SRC6_IP,
- (void *)&pktinfo6->ipi6_addr) != 1)
- return -1;
- } else {
- return -1;
- }
-
- return 0;
-}
-
-static int sendmsg_to_server(int type, const struct sockaddr_storage *addr,
- socklen_t addr_len, int set_cmsg, int flags,
- int *syscall_err)
-{
- union {
- char buf[CMSG_SPACE(sizeof(struct in6_pktinfo))];
- struct cmsghdr align;
- } control6;
- union {
- char buf[CMSG_SPACE(sizeof(struct in_pktinfo))];
- struct cmsghdr align;
- } control4;
- struct msghdr hdr;
- struct iovec iov;
- char data = 'a';
- int domain;
- int fd = -1;
-
- domain = addr->ss_family;
-
- if (domain != AF_INET && domain != AF_INET6) {
- log_err("Unsupported address family");
- goto err;
- }
-
- fd = socket(domain, type, 0);
- if (fd == -1) {
- log_err("Failed to create client socket");
- goto err;
- }
-
- memset(&iov, 0, sizeof(iov));
- iov.iov_base = &data;
- iov.iov_len = sizeof(data);
-
- memset(&hdr, 0, sizeof(hdr));
- hdr.msg_name = (void *)addr;
- hdr.msg_namelen = addr_len;
- hdr.msg_iov = &iov;
- hdr.msg_iovlen = 1;
-
- if (set_cmsg) {
- if (domain == AF_INET) {
- hdr.msg_control = &control4;
- hdr.msg_controllen = sizeof(control4.buf);
- } else if (domain == AF_INET6) {
- hdr.msg_control = &control6;
- hdr.msg_controllen = sizeof(control6.buf);
- }
- if (init_pktinfo(domain, CMSG_FIRSTHDR(&hdr))) {
- log_err("Fail to init pktinfo");
- goto err;
- }
- }
-
- if (sendmsg(fd, &hdr, flags) != sizeof(data)) {
- log_err("Fail to send message to server");
- *syscall_err = errno;
- goto err;
- }
-
- goto out;
-err:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-static int fastconnect_to_server(const struct sockaddr_storage *addr,
- socklen_t addr_len)
-{
- int sendmsg_err;
-
- return sendmsg_to_server(SOCK_STREAM, addr, addr_len, /*set_cmsg*/0,
- MSG_FASTOPEN, &sendmsg_err);
-}
-
-static int recvmsg_from_client(int sockfd, struct sockaddr_storage *src_addr)
-{
- struct timeval tv;
- struct msghdr hdr;
- struct iovec iov;
- char data[64];
- fd_set rfds;
-
- FD_ZERO(&rfds);
- FD_SET(sockfd, &rfds);
-
- tv.tv_sec = 2;
- tv.tv_usec = 0;
-
- if (select(sockfd + 1, &rfds, NULL, NULL, &tv) <= 0 ||
- !FD_ISSET(sockfd, &rfds))
- return -1;
-
- memset(&iov, 0, sizeof(iov));
- iov.iov_base = data;
- iov.iov_len = sizeof(data);
-
- memset(&hdr, 0, sizeof(hdr));
- hdr.msg_name = src_addr;
- hdr.msg_namelen = sizeof(struct sockaddr_storage);
- hdr.msg_iov = &iov;
- hdr.msg_iovlen = 1;
-
- return recvmsg(sockfd, &hdr, 0);
-}
-
-static int init_addrs(const struct sock_addr_test *test,
- struct sockaddr_storage *requested_addr,
- struct sockaddr_storage *expected_addr,
- struct sockaddr_storage *expected_src_addr)
-{
- socklen_t addr_len = sizeof(struct sockaddr_storage);
-
- if (mk_sockaddr(test->domain, test->expected_ip, test->expected_port,
- (struct sockaddr *)expected_addr, addr_len) == -1)
- goto err;
-
- if (mk_sockaddr(test->domain, test->requested_ip, test->requested_port,
- (struct sockaddr *)requested_addr, addr_len) == -1)
- goto err;
-
- if (test->expected_src_ip &&
- mk_sockaddr(test->domain, test->expected_src_ip, 0,
- (struct sockaddr *)expected_src_addr, addr_len) == -1)
- goto err;
-
- return 0;
-err:
- return -1;
-}
-
-static int run_bind_test_case(const struct sock_addr_test *test)
-{
- socklen_t addr_len = sizeof(struct sockaddr_storage);
- struct sockaddr_storage requested_addr;
- struct sockaddr_storage expected_addr;
- int clientfd = -1;
- int servfd = -1;
- int err = 0;
-
- if (init_addrs(test, &requested_addr, &expected_addr, NULL))
- goto err;
-
- servfd = start_server(test->type, &requested_addr, addr_len);
- if (servfd == -1)
- goto err;
-
- if (cmp_local_addr(servfd, &expected_addr))
- goto err;
-
- /* Try to connect to server just in case */
- clientfd = connect_to_server(test->type, &expected_addr, addr_len);
- if (clientfd == -1)
- goto err;
-
- goto out;
-err:
- err = -1;
-out:
- close(clientfd);
- close(servfd);
- return err;
-}
-
-static int run_connect_test_case(const struct sock_addr_test *test)
-{
- socklen_t addr_len = sizeof(struct sockaddr_storage);
- struct sockaddr_storage expected_src_addr;
- struct sockaddr_storage requested_addr;
- struct sockaddr_storage expected_addr;
- int clientfd = -1;
- int servfd = -1;
- int err = 0;
-
- if (init_addrs(test, &requested_addr, &expected_addr,
- &expected_src_addr))
- goto err;
-
- /* Prepare server to connect to */
- servfd = start_server(test->type, &expected_addr, addr_len);
- if (servfd == -1)
- goto err;
-
- clientfd = connect_to_server(test->type, &requested_addr, addr_len);
- if (clientfd == -1)
- goto err;
-
- /* Make sure src and dst addrs were overridden properly */
- if (cmp_peer_addr(clientfd, &expected_addr))
- goto err;
-
- if (cmp_local_ip(clientfd, &expected_src_addr))
- goto err;
-
- if (test->type == SOCK_STREAM) {
- /* Test TCP Fast Open scenario */
- clientfd = fastconnect_to_server(&requested_addr, addr_len);
- if (clientfd == -1)
- goto err;
-
- /* Make sure src and dst addrs were overridden properly */
- if (cmp_peer_addr(clientfd, &expected_addr))
- goto err;
-
- if (cmp_local_ip(clientfd, &expected_src_addr))
- goto err;
- }
-
- goto out;
-err:
- err = -1;
-out:
- close(clientfd);
- close(servfd);
- return err;
-}
-
-static int run_xmsg_test_case(const struct sock_addr_test *test, int max_cmsg)
-{
- socklen_t addr_len = sizeof(struct sockaddr_storage);
- struct sockaddr_storage expected_addr;
- struct sockaddr_storage server_addr;
- struct sockaddr_storage sendmsg_addr;
- struct sockaddr_storage recvmsg_addr;
- int clientfd = -1;
- int servfd = -1;
- int set_cmsg;
- int err = 0;
-
- if (test->type != SOCK_DGRAM)
- goto err;
-
- if (init_addrs(test, &sendmsg_addr, &server_addr, &expected_addr))
- goto err;
-
- /* Prepare server to sendmsg to */
- servfd = start_server(test->type, &server_addr, addr_len);
- if (servfd == -1)
- goto err;
-
- for (set_cmsg = 0; set_cmsg <= max_cmsg; ++set_cmsg) {
- if (clientfd >= 0)
- close(clientfd);
-
- clientfd = sendmsg_to_server(test->type, &sendmsg_addr,
- addr_len, set_cmsg, /*flags*/0,
- &err);
- if (err)
- goto out;
- else if (clientfd == -1)
- goto err;
-
- /* Try to receive message on server instead of using
- * getpeername(2) on client socket, to check that client's
- * destination address was rewritten properly, since
- * getpeername(2) doesn't work with unconnected datagram
- * sockets.
- *
- * Get source address from recvmsg(2) as well to make sure
- * source was rewritten properly: getsockname(2) can't be used
- * since socket is unconnected and source defined for one
- * specific packet may differ from the one used by default and
- * returned by getsockname(2).
- */
- if (recvmsg_from_client(servfd, &recvmsg_addr) == -1)
- goto err;
-
- if (cmp_addr(&recvmsg_addr, &expected_addr, /*cmp_port*/0))
- goto err;
- }
-
- goto out;
-err:
- err = -1;
-out:
- close(clientfd);
- close(servfd);
- return err;
-}
-
-static int run_test_case(int cgfd, const struct sock_addr_test *test)
-{
- int progfd = -1;
- int err = 0;
-
- printf("Test case: %s .. ", test->descr);
-
- progfd = test->loadfn(test);
- if (test->expected_result == LOAD_REJECT && progfd < 0)
- goto out;
- else if (test->expected_result == LOAD_REJECT || progfd < 0)
- goto err;
-
- err = bpf_prog_attach(progfd, cgfd, test->attach_type,
- BPF_F_ALLOW_OVERRIDE);
- if (test->expected_result == ATTACH_REJECT && err) {
- err = 0; /* error was expected, reset it */
- goto out;
- } else if (test->expected_result == ATTACH_REJECT || err) {
- goto err;
- } else if (test->expected_result == ATTACH_OKAY) {
- err = 0;
- goto out;
- }
-
- switch (test->attach_type) {
- case BPF_CGROUP_INET4_BIND:
- case BPF_CGROUP_INET6_BIND:
- err = run_bind_test_case(test);
- break;
- case BPF_CGROUP_INET4_CONNECT:
- case BPF_CGROUP_INET6_CONNECT:
- err = run_connect_test_case(test);
- break;
- case BPF_CGROUP_UDP4_SENDMSG:
- case BPF_CGROUP_UDP6_SENDMSG:
- err = run_xmsg_test_case(test, 1);
- break;
- case BPF_CGROUP_UDP4_RECVMSG:
- case BPF_CGROUP_UDP6_RECVMSG:
- err = run_xmsg_test_case(test, 0);
- break;
- default:
- goto err;
- }
-
- if (test->expected_result == SYSCALL_EPERM && err == EPERM) {
- err = 0; /* error was expected, reset it */
- goto out;
- }
-
- if (test->expected_result == SYSCALL_ENOTSUPP && err == ENOTSUPP) {
- err = 0; /* error was expected, reset it */
- goto out;
- }
-
- if (err || test->expected_result != SUCCESS)
- goto err;
-
- goto out;
-err:
- err = -1;
-out:
- /* Detaching w/o checking return code: best effort attempt. */
- if (progfd != -1)
- bpf_prog_detach(cgfd, test->attach_type);
- close(progfd);
- printf("[%s]\n", err ? "FAIL" : "PASS");
- return err;
-}
-
-static int run_tests(int cgfd)
-{
- int passes = 0;
- int fails = 0;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(tests); ++i) {
- if (run_test_case(cgfd, &tests[i]))
- ++fails;
- else
- ++passes;
- }
- printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
- return fails ? -1 : 0;
-}
-
-int main(int argc, char **argv)
-{
- int cgfd = -1;
- int err = 0;
-
- if (argc < 2) {
- fprintf(stderr,
- "%s has to be run via %s.sh. Skip direct run.\n",
- argv[0], argv[0]);
- exit(err);
- }
-
- cgfd = cgroup_setup_and_join(CG_PATH);
- if (cgfd < 0)
- goto err;
-
- /* Use libbpf 1.0 API mode */
- libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-
- if (run_tests(cgfd))
- goto err;
-
- goto out;
-err:
- err = -1;
-out:
- close(cgfd);
- cleanup_cgroup_environment();
- return err;
-}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
deleted file mode 100755
index 3b9fdb8094..0000000000
--- a/tools/testing/selftests/bpf/test_sock_addr.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/sh
-
-set -eu
-
-ping_once()
-{
- type ping${1} >/dev/null 2>&1 && PING="ping${1}" || PING="ping -${1}"
- $PING -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
-}
-
-wait_for_ip()
-{
- local _i
- echo -n "Wait for testing IPv4/IPv6 to become available "
- for _i in $(seq ${MAX_PING_TRIES}); do
- echo -n "."
- if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then
- echo " OK"
- return
- fi
- done
- echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
- exit 1
-}
-
-setup()
-{
- # Create testing interfaces not to interfere with current environment.
- ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
- ip link set ${TEST_IF} up
- ip link set ${TEST_IF_PEER} up
-
- ip -4 addr add ${TEST_IPv4} dev ${TEST_IF}
- ip -6 addr add ${TEST_IPv6} dev ${TEST_IF}
- wait_for_ip
-}
-
-cleanup()
-{
- ip link del ${TEST_IF} 2>/dev/null || :
- ip link del ${TEST_IF_PEER} 2>/dev/null || :
-}
-
-main()
-{
- trap cleanup EXIT 2 3 6 15
- setup
- ./test_sock_addr setup_done
-}
-
-BASENAME=$(basename $0 .sh)
-TEST_IF="${BASENAME}1"
-TEST_IF_PEER="${BASENAME}2"
-TEST_IPv4="127.0.0.4/8"
-TEST_IPv6="::6/128"
-MAX_PING_TRIES=5
-
-main
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 43612de44f..a709911cdd 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -63,7 +63,7 @@ int passed;
int failed;
int map_fd[9];
struct bpf_map *maps[9];
-int prog_fd[11];
+int prog_fd[9];
int txmsg_pass;
int txmsg_redir;
@@ -680,7 +680,8 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
}
}
- s->bytes_recvd += recv;
+ if (recv > 0)
+ s->bytes_recvd += recv;
if (opt->check_recved_len && s->bytes_recvd > total_bytes) {
errno = EMSGSIZE;
@@ -1793,8 +1794,6 @@ int prog_attach_type[] = {
BPF_SK_MSG_VERDICT,
BPF_SK_MSG_VERDICT,
BPF_SK_MSG_VERDICT,
- BPF_SK_MSG_VERDICT,
- BPF_SK_MSG_VERDICT,
};
int prog_type[] = {
@@ -1807,8 +1806,6 @@ int prog_type[] = {
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_SK_MSG,
- BPF_PROG_TYPE_SK_MSG,
- BPF_PROG_TYPE_SK_MSG,
};
static int populate_progs(char *bpf_file)
@@ -1887,10 +1884,13 @@ static int check_whitelist(struct _test *t, struct sockmap_options *opt)
while (entry) {
if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
strstr(opt->map, entry) != 0 ||
- strstr(t->title, entry) != 0)
+ strstr(t->title, entry) != 0) {
+ free(ptr);
return 0;
+ }
entry = strtok(NULL, ",");
}
+ free(ptr);
return -EINVAL;
}
@@ -1907,10 +1907,13 @@ static int check_blacklist(struct _test *t, struct sockmap_options *opt)
while (entry) {
if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
strstr(opt->map, entry) != 0 ||
- strstr(t->title, entry) != 0)
+ strstr(t->title, entry) != 0) {
+ free(ptr);
return 0;
+ }
entry = strtok(NULL, ",");
}
+ free(ptr);
return -EINVAL;
}
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
index 32df937470..7b5fc98838 100644
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
@@ -16,68 +16,7 @@
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
-
-static int start_server(const struct sockaddr *addr, socklen_t len, bool dual)
-{
- int mode = !dual;
- int fd;
-
- fd = socket(addr->sa_family, SOCK_STREAM, 0);
- if (fd == -1) {
- log_err("Failed to create server socket");
- goto out;
- }
-
- if (addr->sa_family == AF_INET6) {
- if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (char *)&mode,
- sizeof(mode)) == -1) {
- log_err("Failed to set the dual-stack mode");
- goto close_out;
- }
- }
-
- if (bind(fd, addr, len) == -1) {
- log_err("Failed to bind server socket");
- goto close_out;
- }
-
- if (listen(fd, 128) == -1) {
- log_err("Failed to listen on server socket");
- goto close_out;
- }
-
- goto out;
-
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-static int connect_to_server(const struct sockaddr *addr, socklen_t len)
-{
- int fd = -1;
-
- fd = socket(addr->sa_family, SOCK_STREAM, 0);
- if (fd == -1) {
- log_err("Failed to create client socket");
- goto out;
- }
-
- if (connect(fd, (const struct sockaddr *)addr, len) == -1) {
- log_err("Fail to connect to server");
- goto close_out;
- }
-
- goto out;
-
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
+#include "network_helpers.h"
static int get_map_fd_by_prog_id(int prog_id, bool *xdp)
{
@@ -117,8 +56,7 @@ err:
return map_fd;
}
-static int run_test(int server_fd, int results_fd, bool xdp,
- const struct sockaddr *addr, socklen_t len)
+static int run_test(int server_fd, int results_fd, bool xdp)
{
int client = -1, srv_client = -1;
int ret = 0;
@@ -144,7 +82,7 @@ static int run_test(int server_fd, int results_fd, bool xdp,
goto err;
}
- client = connect_to_server(addr, len);
+ client = connect_to_fd(server_fd, 0);
if (client == -1)
goto err;
@@ -201,23 +139,23 @@ out:
return ret;
}
-static bool get_port(int server_fd, in_port_t *port)
+static int v6only_true(int fd, const struct post_socket_opts *opts)
{
- struct sockaddr_in addr;
- socklen_t len = sizeof(addr);
+ int mode = true;
- if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
- log_err("Failed to get server addr");
- return false;
- }
+ return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode));
+}
+
+static int v6only_false(int fd, const struct post_socket_opts *opts)
+{
+ int mode = false;
- /* sin_port and sin6_port are located at the same offset. */
- *port = addr.sin_port;
- return true;
+ return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode));
}
int main(int argc, char **argv)
{
+ struct network_helper_opts opts = { 0 };
struct sockaddr_in addr4;
struct sockaddr_in6 addr6;
struct sockaddr_in addr4dual;
@@ -259,31 +197,30 @@ int main(int argc, char **argv)
addr6dual.sin6_addr = in6addr_any;
addr6dual.sin6_port = 0;
- server = start_server((const struct sockaddr *)&addr4, sizeof(addr4),
- false);
- if (server == -1 || !get_port(server, &addr4.sin_port))
+ server = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr4,
+ sizeof(addr4), NULL);
+ if (server == -1)
goto err;
- server_v6 = start_server((const struct sockaddr *)&addr6,
- sizeof(addr6), false);
- if (server_v6 == -1 || !get_port(server_v6, &addr6.sin6_port))
+ opts.post_socket_cb = v6only_true;
+ server_v6 = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr6,
+ sizeof(addr6), &opts);
+ if (server_v6 == -1)
goto err;
- server_dual = start_server((const struct sockaddr *)&addr6dual,
- sizeof(addr6dual), true);
- if (server_dual == -1 || !get_port(server_dual, &addr4dual.sin_port))
+ opts.post_socket_cb = v6only_false;
+ server_dual = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr6dual,
+ sizeof(addr6dual), &opts);
+ if (server_dual == -1)
goto err;
- if (run_test(server, results, xdp,
- (const struct sockaddr *)&addr4, sizeof(addr4)))
+ if (run_test(server, results, xdp))
goto err;
- if (run_test(server_v6, results, xdp,
- (const struct sockaddr *)&addr6, sizeof(addr6)))
+ if (run_test(server_v6, results, xdp))
goto err;
- if (run_test(server_dual, results, xdp,
- (const struct sockaddr *)&addr4dual, sizeof(addr4dual)))
+ if (run_test(server_dual, results, xdp))
goto err;
printf("ok\n");
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 28b6646662..d5379a0e6d 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -368,9 +368,23 @@ int delete_module(const char *name, int flags)
int unload_bpf_testmod(bool verbose)
{
+ int ret, cnt = 0;
+
if (kern_sync_rcu())
fprintf(stdout, "Failed to trigger kernel-side RCU sync!\n");
- if (delete_module("bpf_testmod", 0)) {
+
+ for (;;) {
+ ret = delete_module("bpf_testmod", 0);
+ if (!ret || errno != EAGAIN)
+ break;
+ if (++cnt > 10000) {
+ fprintf(stdout, "Unload of bpf_testmod timed out\n");
+ break;
+ }
+ usleep(100);
+ }
+
+ if (ret) {
if (errno == ENOENT) {
if (verbose)
fprintf(stdout, "bpf_testmod.ko is already unloaded.\n");
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 27fd7ed3e4..70e29f316f 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -61,12 +61,7 @@ void free_kallsyms_local(struct ksyms *ksyms)
free(ksyms);
}
-static int ksym_cmp(const void *p1, const void *p2)
-{
- return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
-}
-
-struct ksyms *load_kallsyms_local(void)
+static struct ksyms *load_kallsyms_local_common(ksym_cmp_t cmp_cb)
{
FILE *f;
char func[256], buf[256];
@@ -100,7 +95,7 @@ struct ksyms *load_kallsyms_local(void)
goto error;
}
fclose(f);
- qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), ksym_cmp);
+ qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), cmp_cb);
return ksyms;
error:
@@ -109,6 +104,21 @@ error:
return NULL;
}
+static int ksym_cmp(const void *p1, const void *p2)
+{
+ return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
+}
+
+struct ksyms *load_kallsyms_local(void)
+{
+ return load_kallsyms_local_common(ksym_cmp);
+}
+
+struct ksyms *load_kallsyms_custom_local(ksym_cmp_t cmp_cb)
+{
+ return load_kallsyms_local_common(cmp_cb);
+}
+
int load_kallsyms(void)
{
pthread_mutex_lock(&ksyms_mutex);
@@ -148,6 +158,28 @@ struct ksym *ksym_search_local(struct ksyms *ksyms, long key)
return &ksyms->syms[0];
}
+struct ksym *search_kallsyms_custom_local(struct ksyms *ksyms, const void *p,
+ ksym_search_cmp_t cmp_cb)
+{
+ int start = 0, mid, end = ksyms->sym_cnt;
+ struct ksym *ks;
+ int result;
+
+ while (start < end) {
+ mid = start + (end - start) / 2;
+ ks = &ksyms->syms[mid];
+ result = cmp_cb(p, ks);
+ if (result < 0)
+ end = mid;
+ else if (result > 0)
+ start = mid + 1;
+ else
+ return ks;
+ }
+
+ return NULL;
+}
+
struct ksym *ksym_search(long key)
{
if (!ksyms)
@@ -201,29 +233,6 @@ out:
return err;
}
-void read_trace_pipe(void)
-{
- int trace_fd;
-
- if (access(TRACEFS_PIPE, F_OK) == 0)
- trace_fd = open(TRACEFS_PIPE, O_RDONLY, 0);
- else
- trace_fd = open(DEBUGFS_PIPE, O_RDONLY, 0);
- if (trace_fd < 0)
- return;
-
- while (1) {
- static char buf[4096];
- ssize_t sz;
-
- sz = read(trace_fd, buf, sizeof(buf) - 1);
- if (sz > 0) {
- buf[sz] = 0;
- puts(buf);
- }
- }
-}
-
ssize_t get_uprobe_offset(const void *addr)
{
size_t start, end, base;
@@ -381,3 +390,43 @@ out:
close(fd);
return err;
}
+
+int read_trace_pipe_iter(void (*cb)(const char *str, void *data), void *data, int iter)
+{
+ size_t buflen, n;
+ char *buf = NULL;
+ FILE *fp = NULL;
+
+ if (access(TRACEFS_PIPE, F_OK) == 0)
+ fp = fopen(TRACEFS_PIPE, "r");
+ else
+ fp = fopen(DEBUGFS_PIPE, "r");
+ if (!fp)
+ return -1;
+
+ /* We do not want to wait forever when iter is specified. */
+ if (iter)
+ fcntl(fileno(fp), F_SETFL, O_NONBLOCK);
+
+ while ((n = getline(&buf, &buflen, fp) >= 0) || errno == EAGAIN) {
+ if (n > 0)
+ cb(buf, data);
+ if (iter && !(--iter))
+ break;
+ }
+
+ free(buf);
+ if (fp)
+ fclose(fp);
+ return 0;
+}
+
+static void trace_pipe_cb(const char *str, void *data)
+{
+ printf("%s", str);
+}
+
+void read_trace_pipe(void)
+{
+ read_trace_pipe_iter(trace_pipe_cb, NULL, 0);
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 04fd1da707..2ce873c9f9 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -13,6 +13,9 @@ struct ksym {
};
struct ksyms;
+typedef int (*ksym_cmp_t)(const void *p1, const void *p2);
+typedef int (*ksym_search_cmp_t)(const void *p1, const struct ksym *p2);
+
int load_kallsyms(void);
struct ksym *ksym_search(long key);
long ksym_get_addr(const char *name);
@@ -22,10 +25,16 @@ struct ksym *ksym_search_local(struct ksyms *ksyms, long key);
long ksym_get_addr_local(struct ksyms *ksyms, const char *name);
void free_kallsyms_local(struct ksyms *ksyms);
+struct ksyms *load_kallsyms_custom_local(ksym_cmp_t cmp_cb);
+struct ksym *search_kallsyms_custom_local(struct ksyms *ksyms, const void *p1,
+ ksym_search_cmp_t cmp_cb);
+
/* open kallsyms and find addresses on the fly, faster than load + search. */
int kallsyms_find(const char *sym, unsigned long long *addr);
void read_trace_pipe(void);
+int read_trace_pipe_iter(void (*cb)(const char *str, void *data),
+ void *data, int iter);
ssize_t get_uprobe_offset(const void *addr);
ssize_t get_rel_offset(uintptr_t addr);
diff --git a/tools/testing/selftests/bpf/uprobe_multi.c b/tools/testing/selftests/bpf/uprobe_multi.c
index a61ceab60b..7ffa563ffe 100644
--- a/tools/testing/selftests/bpf/uprobe_multi.c
+++ b/tools/testing/selftests/bpf/uprobe_multi.c
@@ -9,7 +9,7 @@
#define NAME(name, idx) PASTE(name, idx)
-#define DEF(name, idx) int NAME(name, idx)(void) { return 0; }
+#define DEF(name, idx) int __attribute__((weak)) NAME(name, idx)(void) { return 0; }
#define CALL(name, idx) NAME(name, idx)();
#define F(body, name, idx) body(name, idx)
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 244d4996e0..b2854238d4 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -792,10 +792,13 @@ static int parse_stats(const char *stats_str, struct stat_specs *specs)
while ((next = strtok_r(state ? NULL : input, ",", &state))) {
err = parse_stat(next, specs);
- if (err)
+ if (err) {
+ free(input);
return err;
+ }
}
+ free(input);
return 0;
}
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index bdf5d81800..6f9956eed7 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -495,20 +495,6 @@ peek:
return 0;
}
-struct ethtool_channels {
- __u32 cmd;
- __u32 max_rx;
- __u32 max_tx;
- __u32 max_other;
- __u32 max_combined;
- __u32 rx_count;
- __u32 tx_count;
- __u32 other_count;
- __u32 combined_count;
-};
-
-#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */
-
static int rxq_num(const char *ifname)
{
struct ethtool_channels ch = {
@@ -595,6 +581,8 @@ static void cleanup(void)
if (bpf_obj)
xdp_hw_metadata__destroy(bpf_obj);
+
+ free((void *)saved_hwtstamp_ifname);
}
static void handle_signal(int sig)
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index b1102ee13f..2eac0895b0 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -81,6 +81,7 @@
#include <linux/mman.h>
#include <linux/netdev.h>
#include <linux/bitmap.h>
+#include <linux/ethtool.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <locale.h>
@@ -105,11 +106,15 @@
#include "../kselftest.h"
#include "xsk_xdp_common.h"
+#include <network_helpers.h>
+
static bool opt_verbose;
static bool opt_print_tests;
static enum test_mode opt_mode = TEST_MODE_ALL;
static u32 opt_run_test = RUN_ALL_TESTS;
+void test__fail(void) { /* for network_helpers.c */ }
+
static void __exit_with_error(int error, const char *file, const char *func, int line)
{
ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
@@ -239,7 +244,7 @@ static void enable_busy_poll(struct xsk_socket_info *xsk)
(void *)&sock_opt, sizeof(sock_opt)) < 0)
exit_with_error(errno);
- sock_opt = BATCH_SIZE;
+ sock_opt = xsk->batch_size;
if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
(void *)&sock_opt, sizeof(sock_opt)) < 0)
exit_with_error(errno);
@@ -409,6 +414,33 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
}
}
+static int set_ring_size(struct ifobject *ifobj)
+{
+ int ret;
+ u32 ctr = 0;
+
+ while (ctr++ < SOCK_RECONF_CTR) {
+ ret = set_hw_ring_size(ifobj->ifname, &ifobj->ring);
+ if (!ret)
+ break;
+
+ /* Retry if it fails */
+ if (ctr >= SOCK_RECONF_CTR || errno != EBUSY)
+ return -errno;
+
+ usleep(USLEEP_MAX);
+ }
+
+ return ret;
+}
+
+static int hw_ring_size_reset(struct ifobject *ifobj)
+{
+ ifobj->ring.tx_pending = ifobj->set_ring.default_tx;
+ ifobj->ring.rx_pending = ifobj->set_ring.default_rx;
+ return set_ring_size(ifobj);
+}
+
static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
struct ifobject *ifobj_rx)
{
@@ -439,6 +471,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
for (j = 0; j < MAX_SOCKETS; j++) {
memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ ifobj->xsk_arr[j].batch_size = DEFAULT_BATCH_SIZE;
if (i == 0)
ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default;
else
@@ -451,12 +484,16 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
}
}
+ if (ifobj_tx->hw_ring_size_supp)
+ hw_ring_size_reset(ifobj_tx);
+
test->ifobj_tx = ifobj_tx;
test->ifobj_rx = ifobj_rx;
test->current_step = 0;
test->total_steps = 1;
test->nb_sockets = 1;
test->fail = false;
+ test->set_ring = false;
test->mtu = MAX_ETH_PKT_SIZE;
test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
@@ -1087,7 +1124,7 @@ static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
return TEST_CONTINUE;
}
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ rcvd = xsk_ring_cons__peek(&xsk->rx, xsk->batch_size, &idx_rx);
if (!rcvd)
return TEST_CONTINUE;
@@ -1239,7 +1276,8 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b
buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
/* pkts_in_flight might be negative if many invalid packets are sent */
- if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) {
+ if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) /
+ buffer_len)) {
ret = kick_tx(xsk);
if (ret)
return TEST_FAILURE;
@@ -1249,7 +1287,7 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b
fds.fd = xsk_socket__fd(xsk->xsk);
fds.events = POLLOUT;
- while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) {
+ while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) {
if (use_poll) {
ret = poll(&fds, 1, POLL_TMOUT);
if (timeout) {
@@ -1269,10 +1307,10 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b
}
}
- complete_pkts(xsk, BATCH_SIZE);
+ complete_pkts(xsk, xsk->batch_size);
}
- for (i = 0; i < BATCH_SIZE; i++) {
+ for (i = 0; i < xsk->batch_size; i++) {
struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
u32 nb_frags_left, nb_frags, bytes_written = 0;
@@ -1280,9 +1318,9 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b
break;
nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt);
- if (nb_frags > BATCH_SIZE - i) {
+ if (nb_frags > xsk->batch_size - i) {
pkt_stream_cancel(pkt_stream);
- xsk_ring_prod__cancel(&xsk->tx, BATCH_SIZE - i);
+ xsk_ring_prod__cancel(&xsk->tx, xsk->batch_size - i);
break;
}
nb_frags_left = nb_frags;
@@ -1370,7 +1408,7 @@ static int wait_for_tx_completion(struct xsk_socket_info *xsk)
return TEST_FAILURE;
}
- complete_pkts(xsk, BATCH_SIZE);
+ complete_pkts(xsk, xsk->batch_size);
}
return TEST_PASS;
@@ -1860,6 +1898,14 @@ static int testapp_validate_traffic(struct test_spec *test)
return TEST_SKIP;
}
+ if (test->set_ring) {
+ if (ifobj_tx->hw_ring_size_supp)
+ return set_ring_size(ifobj_tx);
+
+ ksft_test_result_skip("Changing HW ring size not supported.\n");
+ return TEST_SKIP;
+ }
+
xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx);
return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
}
@@ -2373,6 +2419,50 @@ static int testapp_xdp_metadata_mb(struct test_spec *test)
return testapp_xdp_metadata_copy(test);
}
+static int testapp_hw_sw_min_ring_size(struct test_spec *test)
+{
+ int ret;
+
+ test->set_ring = true;
+ test->total_steps = 2;
+ test->ifobj_tx->ring.tx_pending = DEFAULT_BATCH_SIZE;
+ test->ifobj_tx->ring.rx_pending = DEFAULT_BATCH_SIZE * 2;
+ test->ifobj_tx->xsk->batch_size = 1;
+ test->ifobj_rx->xsk->batch_size = 1;
+ ret = testapp_validate_traffic(test);
+ if (ret)
+ return ret;
+
+ /* Set batch size to hw_ring_size - 1 */
+ test->ifobj_tx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
+ test->ifobj_rx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_hw_sw_max_ring_size(struct test_spec *test)
+{
+ u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2;
+ int ret;
+
+ test->set_ring = true;
+ test->total_steps = 2;
+ test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending;
+ test->ifobj_tx->ring.rx_pending = test->ifobj_tx->ring.rx_max_pending;
+ test->ifobj_rx->umem->num_frames = max_descs;
+ test->ifobj_rx->xsk->rxqsize = max_descs;
+ test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+
+ ret = testapp_validate_traffic(test);
+ if (ret)
+ return ret;
+
+ /* Set batch_size to 4095 */
+ test->ifobj_tx->xsk->batch_size = max_descs - 1;
+ test->ifobj_rx->xsk->batch_size = max_descs - 1;
+ return testapp_validate_traffic(test);
+}
+
static void run_pkt_test(struct test_spec *test)
{
int ret;
@@ -2477,7 +2567,9 @@ static const struct test_spec tests[] = {
{.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb},
{.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb},
{.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
-};
+ {.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size},
+ {.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size},
+ };
static void print_tests(void)
{
@@ -2497,6 +2589,7 @@ int main(int argc, char **argv)
int modes = TEST_MODE_SKB + 1;
struct test_spec test;
bool shared_netdev;
+ int ret;
/* Use libbpf 1.0 API mode */
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
@@ -2534,6 +2627,13 @@ int main(int argc, char **argv)
modes++;
}
+ ret = get_hw_ring_size(ifobj_tx->ifname, &ifobj_tx->ring);
+ if (!ret) {
+ ifobj_tx->hw_ring_size_supp = true;
+ ifobj_tx->set_ring.default_tx = ifobj_tx->ring.tx_pending;
+ ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending;
+ }
+
init_iface(ifobj_rx, worker_testapp_validate_rx);
init_iface(ifobj_tx, worker_testapp_validate_tx);
@@ -2581,6 +2681,9 @@ int main(int argc, char **argv)
}
}
+ if (ifobj_tx->hw_ring_size_supp)
+ hw_ring_size_reset(ifobj_tx);
+
pkt_stream_delete(tx_pkt_stream_default);
pkt_stream_delete(rx_pkt_stream_default);
xsk_unload_xdp_programs(ifobj_tx);
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index f174df2d69..906de5fab7 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -44,7 +44,7 @@
#define MAX_ETH_JUMBO_SIZE 9000
#define USLEEP_MAX 10000
#define SOCK_RECONF_CTR 10
-#define BATCH_SIZE 64
+#define DEFAULT_BATCH_SIZE 64
#define POLL_TMOUT 1000
#define THREAD_TMOUT 3
#define DEFAULT_PKT_CNT (4 * 1024)
@@ -91,6 +91,7 @@ struct xsk_socket_info {
struct pkt_stream *pkt_stream;
u32 outstanding_tx;
u32 rxqsize;
+ u32 batch_size;
u8 dst_mac[ETH_ALEN];
u8 src_mac[ETH_ALEN];
};
@@ -113,6 +114,11 @@ struct pkt_stream {
bool verbatim;
};
+struct set_hw_ring {
+ u32 default_tx;
+ u32 default_rx;
+};
+
struct ifobject;
struct test_spec;
typedef int (*validation_func_t)(struct ifobject *ifobj);
@@ -129,6 +135,8 @@ struct ifobject {
struct xsk_xdp_progs *xdp_progs;
struct bpf_map *xskmap;
struct bpf_program *xdp_prog;
+ struct ethtool_ringparam ring;
+ struct set_hw_ring set_ring;
enum test_mode mode;
int ifindex;
int mtu;
@@ -145,6 +153,7 @@ struct ifobject {
bool unaligned_supp;
bool multi_buff_supp;
bool multi_buff_zc_supp;
+ bool hw_ring_size_supp;
};
struct test_spec {
@@ -162,6 +171,7 @@ struct test_spec {
u16 current_step;
u16 nb_sockets;
bool fail;
+ bool set_ring;
enum test_mode mode;
char name[MAX_TEST_NAME_SIZE];
};
diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c
index 7cde07a5df..47bad7ddc5 100644
--- a/tools/testing/selftests/capabilities/test_execve.c
+++ b/tools/testing/selftests/capabilities/test_execve.c
@@ -82,7 +82,7 @@ static bool create_and_enter_ns(uid_t inner_uid)
{
uid_t outer_uid;
gid_t outer_gid;
- int i;
+ int i, ret;
bool have_outer_privilege;
outer_uid = getuid();
@@ -97,7 +97,10 @@ static bool create_and_enter_ns(uid_t inner_uid)
ksft_exit_fail_msg("setresuid - %s\n", strerror(errno));
// Re-enable effective caps
- capng_get_caps_process();
+ ret = capng_get_caps_process();
+ if (ret == -1)
+ ksft_exit_fail_msg("capng_get_caps_process failed\n");
+
for (i = 0; i < CAP_LAST_CAP; i++)
if (capng_have_capability(CAPNG_PERMITTED, i))
capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i);
@@ -207,6 +210,7 @@ static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient)
static int do_tests(int uid, const char *our_path)
{
+ int ret;
bool have_outer_privilege = create_and_enter_ns(uid);
int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY);
@@ -250,7 +254,9 @@ static int do_tests(int uid, const char *our_path)
ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
}
- capng_get_caps_process();
+ ret = capng_get_caps_process();
+ if (ret == -1)
+ ksft_exit_fail_msg("capng_get_caps_process failed\n");
/* Make sure that i starts out clear */
capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c
index 60b4e7b716..65f2a1c892 100644
--- a/tools/testing/selftests/capabilities/validate_cap.c
+++ b/tools/testing/selftests/capabilities/validate_cap.c
@@ -28,6 +28,7 @@ static bool bool_arg(char **argv, int i)
int main(int argc, char **argv)
{
const char *atsec = "";
+ int ret;
/*
* Be careful just in case a setgid or setcapped copy of this
@@ -44,7 +45,11 @@ int main(int argc, char **argv)
atsec = " (AT_SECURE is not set)";
#endif
- capng_get_caps_process();
+ ret = capng_get_caps_process();
+ if (ret == -1) {
+ ksft_print_msg("capng_get_caps_process failed\n");
+ return 1;
+ }
if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) {
ksft_print_msg("Wrong effective state%s\n", atsec);
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 00b4419289..16461dc0ff 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -4,7 +4,7 @@ CFLAGS += -Wall -pthread
all: ${HELPER_PROGS}
TEST_FILES := with_stress.sh
-TEST_PROGS := test_stress.sh test_cpuset_prs.sh
+TEST_PROGS := test_stress.sh test_cpuset_prs.sh test_cpuset_v1_hp.sh
TEST_GEN_FILES := wait_inotify
TEST_GEN_PROGS = test_memcontrol
TEST_GEN_PROGS += test_kmem
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 89e8519fb2..e8d04ac9e3 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -18,7 +18,7 @@
*/
static inline int values_close(long a, long b, int err)
{
- return abs(a - b) <= (a + b) / 100 * err;
+ return labs(a - b) <= (a + b) / 100 * err;
}
extern int cg_find_unified_root(char *root, size_t len, bool *nsdelegate);
diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index 186bf96f6a..dad2ed82f3 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -237,7 +237,7 @@ run_cpucg_weight_test(
{
int ret = KSFT_FAIL, i;
char *parent = NULL;
- struct cpu_hogger children[3] = {NULL};
+ struct cpu_hogger children[3] = {};
parent = cg_name(root, "cpucg_test_0");
if (!parent)
@@ -408,7 +408,7 @@ run_cpucg_nested_weight_test(const char *root, bool overprovisioned)
{
int ret = KSFT_FAIL, i;
char *parent = NULL, *child = NULL;
- struct cpu_hogger leaf[3] = {NULL};
+ struct cpu_hogger leaf[3] = {};
long nested_leaf_usage, child_usage;
int nprocs = get_nprocs();
diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh
new file mode 100755
index 0000000000..3f45512fb5
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test the special cpuset v1 hotplug case where a cpuset become empty of
+# CPUs will force migration of tasks out to an ancestor.
+#
+
+skip_test() {
+ echo "$1"
+ echo "Test SKIPPED"
+ exit 4 # ksft_skip
+}
+
+[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!"
+
+# Find cpuset v1 mount point
+CPUSET=$(mount -t cgroup | grep cpuset | head -1 | awk -e '{print $3}')
+[[ -n "$CPUSET" ]] || skip_test "cpuset v1 mount point not found!"
+
+#
+# Create a test cpuset, put a CPU and a task there and offline that CPU
+#
+TDIR=test$$
+[[ -d $CPUSET/$TDIR ]] || mkdir $CPUSET/$TDIR
+echo 1 > $CPUSET/$TDIR/cpuset.cpus
+echo 0 > $CPUSET/$TDIR/cpuset.mems
+sleep 10&
+TASK=$!
+echo $TASK > $CPUSET/$TDIR/tasks
+NEWCS=$(cat /proc/$TASK/cpuset)
+[[ $NEWCS != "/$TDIR" ]] && {
+ echo "Unexpected cpuset $NEWCS, test FAILED!"
+ exit 1
+}
+
+echo 0 > /sys/devices/system/cpu/cpu1/online
+sleep 0.5
+echo 1 > /sys/devices/system/cpu/cpu1/online
+NEWCS=$(cat /proc/$TASK/cpuset)
+rmdir $CPUSET/$TDIR
+[[ $NEWCS != "/" ]] && {
+ echo "cpuset $NEWCS, test FAILED!"
+ exit 1
+}
+echo "Test PASSED"
+exit 0
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index 137506db03..96693d8772 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -192,7 +192,7 @@ static int test_kmem_memcg_deletion(const char *root)
goto cleanup;
sum = anon + file + kernel + sock;
- if (abs(sum - current) < MAX_VMSTAT_ERROR) {
+ if (labs(sum - current) < MAX_VMSTAT_ERROR) {
ret = KSFT_PASS;
} else {
printf("memory.current = %ld\n", current);
@@ -380,7 +380,7 @@ static int test_percpu_basic(const char *root)
current = cg_read_long(parent, "memory.current");
percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
- if (current > 0 && percpu > 0 && abs(current - percpu) <
+ if (current > 0 && percpu > 0 && labs(current - percpu) <
MAX_VMSTAT_ERROR)
ret = KSFT_PASS;
else
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index b462416b38..41ae8047b8 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -716,7 +716,9 @@ static bool reclaim_until(const char *memcg, long goal)
*/
static int test_memcg_reclaim(const char *root)
{
- int ret = KSFT_FAIL, fd, retries;
+ int ret = KSFT_FAIL;
+ int fd = -1;
+ int retries;
char *memcg;
long current, expected_usage;
diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c
index ef7f395453..190096017f 100644
--- a/tools/testing/selftests/cgroup/test_zswap.c
+++ b/tools/testing/selftests/cgroup/test_zswap.c
@@ -50,7 +50,7 @@ static int get_zswap_stored_pages(size_t *value)
return read_int("/sys/kernel/debug/zswap/stored_pages", value);
}
-static int get_cg_wb_count(const char *cg)
+static long get_cg_wb_count(const char *cg)
{
return cg_read_key_long(cg, "memory.stat", "zswpwb");
}
@@ -249,6 +249,132 @@ out:
}
/*
+ * Attempt writeback with the following steps:
+ * 1. Allocate memory.
+ * 2. Reclaim memory equal to the amount that was allocated in step 1.
+ This will move it into zswap.
+ * 3. Save current zswap usage.
+ * 4. Move the memory allocated in step 1 back in from zswap.
+ * 5. Set zswap.max to half the amount that was recorded in step 3.
+ * 6. Attempt to reclaim memory equal to the amount that was allocated,
+ this will either trigger writeback if it's enabled, or reclamation
+ will fail if writeback is disabled as there isn't enough zswap space.
+ */
+static int attempt_writeback(const char *cgroup, void *arg)
+{
+ long pagesize = sysconf(_SC_PAGESIZE);
+ char *test_group = arg;
+ size_t memsize = MB(4);
+ char buf[pagesize];
+ long zswap_usage;
+ bool wb_enabled;
+ int ret = -1;
+ char *mem;
+
+ wb_enabled = cg_read_long(test_group, "memory.zswap.writeback");
+ mem = (char *)malloc(memsize);
+ if (!mem)
+ return ret;
+
+ /*
+ * Fill half of each page with increasing data, and keep other
+ * half empty, this will result in data that is still compressible
+ * and ends up in zswap, with material zswap usage.
+ */
+ for (int i = 0; i < pagesize; i++)
+ buf[i] = i < pagesize/2 ? (char) i : 0;
+
+ for (int i = 0; i < memsize; i += pagesize)
+ memcpy(&mem[i], buf, pagesize);
+
+ /* Try and reclaim allocated memory */
+ if (cg_write_numeric(test_group, "memory.reclaim", memsize)) {
+ ksft_print_msg("Failed to reclaim all of the requested memory\n");
+ goto out;
+ }
+
+ zswap_usage = cg_read_long(test_group, "memory.zswap.current");
+
+ /* zswpin */
+ for (int i = 0; i < memsize; i += pagesize) {
+ if (memcmp(&mem[i], buf, pagesize)) {
+ ksft_print_msg("invalid memory\n");
+ goto out;
+ }
+ }
+
+ if (cg_write_numeric(test_group, "memory.zswap.max", zswap_usage/2))
+ goto out;
+
+ /*
+ * If writeback is enabled, trying to reclaim memory now will trigger a
+ * writeback as zswap.max is half of what was needed when reclaim ran the first time.
+ * If writeback is disabled, memory reclaim will fail as zswap is limited and
+ * it can't writeback to swap.
+ */
+ ret = cg_write_numeric(test_group, "memory.reclaim", memsize);
+ if (!wb_enabled)
+ ret = (ret == -EAGAIN) ? 0 : -1;
+
+out:
+ free(mem);
+ return ret;
+}
+
+/* Test to verify the zswap writeback path */
+static int test_zswap_writeback(const char *root, bool wb)
+{
+ long zswpwb_before, zswpwb_after;
+ int ret = KSFT_FAIL;
+ char *test_group;
+
+ test_group = cg_name(root, "zswap_writeback_test");
+ if (!test_group)
+ goto out;
+ if (cg_create(test_group))
+ goto out;
+ if (cg_write(test_group, "memory.zswap.writeback", wb ? "1" : "0"))
+ goto out;
+
+ zswpwb_before = get_cg_wb_count(test_group);
+ if (zswpwb_before != 0) {
+ ksft_print_msg("zswpwb_before = %ld instead of 0\n", zswpwb_before);
+ goto out;
+ }
+
+ if (cg_run(test_group, attempt_writeback, (void *) test_group))
+ goto out;
+
+ /* Verify that zswap writeback occurred only if writeback was enabled */
+ zswpwb_after = get_cg_wb_count(test_group);
+ if (zswpwb_after < 0)
+ goto out;
+
+ if (wb != !!zswpwb_after) {
+ ksft_print_msg("zswpwb_after is %ld while wb is %s",
+ zswpwb_after, wb ? "enabled" : "disabled");
+ goto out;
+ }
+
+ ret = KSFT_PASS;
+
+out:
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+static int test_zswap_writeback_enabled(const char *root)
+{
+ return test_zswap_writeback(root, true);
+}
+
+static int test_zswap_writeback_disabled(const char *root)
+{
+ return test_zswap_writeback(root, false);
+}
+
+/*
* When trying to store a memcg page in zswap, if the memcg hits its memory
* limit in zswap, writeback should affect only the zswapped pages of that
* memcg.
@@ -257,7 +383,7 @@ static int test_no_invasive_cgroup_shrink(const char *root)
{
int ret = KSFT_FAIL;
size_t control_allocation_size = MB(10);
- char *control_allocation, *wb_group = NULL, *control_group = NULL;
+ char *control_allocation = NULL, *wb_group = NULL, *control_group = NULL;
wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
if (!wb_group)
@@ -342,7 +468,7 @@ static int test_no_kmem_bypass(const char *root)
struct sysinfo sys_info;
int ret = KSFT_FAIL;
int child_status;
- char *test_group;
+ char *test_group = NULL;
pid_t child_pid;
/* Read sys info and compute test values accordingly */
@@ -364,8 +490,6 @@ static int test_no_kmem_bypass(const char *root)
trigger_allocation_size = sys_info.totalram / 20;
/* Set up test memcg */
- if (cg_write(root, "cgroup.subtree_control", "+memory"))
- goto out;
test_group = cg_name(root, "kmem_bypass_test");
if (!test_group)
goto out;
@@ -425,6 +549,8 @@ struct zswap_test {
T(test_zswap_usage),
T(test_swapin_nozswap),
T(test_zswapin),
+ T(test_zswap_writeback_enabled),
+ T(test_zswap_writeback_disabled),
T(test_no_kmem_bypass),
T(test_no_invasive_cgroup_shrink),
};
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index 3c9bf0cd82..e61f07973c 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -95,9 +95,14 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
getpid(), pid);
if (waitpid(-1, &status, __WALL) < 0) {
- ksft_print_msg("Child returned %s\n", strerror(errno));
+ ksft_print_msg("waitpid() returned %s\n", strerror(errno));
return -errno;
}
+ if (!WIFEXITED(status)) {
+ ksft_print_msg("Child did not exit normally, status 0x%x\n",
+ status);
+ return EXIT_FAILURE;
+ }
if (WEXITSTATUS(status))
return WEXITSTATUS(status);
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
index 54a8b2445b..ce04267868 100644
--- a/tools/testing/selftests/clone3/clone3_clear_sighand.c
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -120,5 +120,5 @@ int main(int argc, char **argv)
test_clone3_clear_sighand();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
index ed785afb60..bfb0da2b4f 100644
--- a/tools/testing/selftests/clone3/clone3_set_tid.c
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -114,7 +114,8 @@ static int call_clone3_set_tid(pid_t *set_tid,
return WEXITSTATUS(status);
}
-static void test_clone3_set_tid(pid_t *set_tid,
+static void test_clone3_set_tid(const char *desc,
+ pid_t *set_tid,
size_t set_tid_size,
int flags,
int expected,
@@ -129,17 +130,13 @@ static void test_clone3_set_tid(pid_t *set_tid,
ret = call_clone3_set_tid(set_tid, set_tid_size, flags, expected_pid,
wait_for_it);
ksft_print_msg(
- "[%d] clone3() with CLONE_SET_TID %d says :%d - expected %d\n",
+ "[%d] clone3() with CLONE_SET_TID %d says: %d - expected %d\n",
getpid(), set_tid[0], ret, expected);
- if (ret != expected)
- ksft_test_result_fail(
- "[%d] Result (%d) is different than expected (%d)\n",
- getpid(), ret, expected);
- else
- ksft_test_result_pass(
- "[%d] Result (%d) matches expectation (%d)\n",
- getpid(), ret, expected);
+
+ ksft_test_result(ret == expected, "%s with %zu TIDs and flags 0x%x\n",
+ desc, set_tid_size, flags);
}
+
int main(int argc, char *argv[])
{
FILE *f;
@@ -172,73 +169,91 @@ int main(int argc, char *argv[])
/* Try invalid settings */
memset(&set_tid, 0, sizeof(set_tid));
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, 0 TID",
+ set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, 0 TID",
+ set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
- -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, 0 TID",
+ set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
+ -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, 0 TID",
+ set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
/*
* This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
* nested PID namespace.
*/
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, 0 TID",
+ set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
memset(&set_tid, 0xff, sizeof(set_tid));
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, TID all 1s",
+ set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, TID all 1s",
+ set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
- -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, TID all 1s",
+ set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
+ -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, TID all 1s",
+ set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
/*
* This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
* nested PID namespace.
*/
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, TID all 1s",
+ set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
memset(&set_tid, 0, sizeof(set_tid));
/* Try with an invalid PID */
set_tid[0] = 0;
- test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("valid size, 0 TID",
+ set_tid, 1, 0, -EINVAL, 0, 0);
set_tid[0] = -1;
- test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("valid size, -1 TID",
+ set_tid, 1, 0, -EINVAL, 0, 0);
/* Claim that the set_tid array actually contains 2 elements. */
- test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("2 TIDs, -1 and 0",
+ set_tid, 2, 0, -EINVAL, 0, 0);
/* Try it in a new PID namespace */
if (uid == 0)
- test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ test_clone3_set_tid("valid size, -1 TID",
+ set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
else
ksft_test_result_skip("Clone3() with set_tid requires root\n");
/* Try with a valid PID (1) this should return -EEXIST. */
set_tid[0] = 1;
if (uid == 0)
- test_clone3_set_tid(set_tid, 1, 0, -EEXIST, 0, 0);
+ test_clone3_set_tid("duplicate PID 1",
+ set_tid, 1, 0, -EEXIST, 0, 0);
else
ksft_test_result_skip("Clone3() with set_tid requires root\n");
/* Try it in a new PID namespace */
if (uid == 0)
- test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, 0, 0, 0);
+ test_clone3_set_tid("duplicate PID 1",
+ set_tid, 1, CLONE_NEWPID, 0, 0, 0);
else
ksft_test_result_skip("Clone3() with set_tid requires root\n");
/* pid_max should fail everywhere */
set_tid[0] = pid_max;
- test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("set TID to maximum",
+ set_tid, 1, 0, -EINVAL, 0, 0);
if (uid == 0)
- test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ test_clone3_set_tid("set TID to maximum",
+ set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
else
ksft_test_result_skip("Clone3() with set_tid requires root\n");
@@ -262,10 +277,12 @@ int main(int argc, char *argv[])
/* After the child has finished, its PID should be free. */
set_tid[0] = pid;
- test_clone3_set_tid(set_tid, 1, 0, 0, 0, 0);
+ test_clone3_set_tid("reallocate child TID",
+ set_tid, 1, 0, 0, 0, 0);
/* This should fail as there is no PID 1 in that namespace */
- test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ test_clone3_set_tid("duplicate child TID",
+ set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
/*
* Creating a process with PID 1 in the newly created most nested
@@ -274,7 +291,8 @@ int main(int argc, char *argv[])
*/
set_tid[0] = 1;
set_tid[1] = pid;
- test_clone3_set_tid(set_tid, 2, CLONE_NEWPID, 0, pid, 0);
+ test_clone3_set_tid("create PID 1 in new NS",
+ set_tid, 2, CLONE_NEWPID, 0, pid, 0);
ksft_print_msg("unshare PID namespace\n");
if (unshare(CLONE_NEWPID) == -1)
@@ -284,7 +302,8 @@ int main(int argc, char *argv[])
set_tid[0] = pid;
/* This should fail as there is no PID 1 in that namespace */
- test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("duplicate PID 1",
+ set_tid, 1, 0, -EINVAL, 0, 0);
/* Let's create a PID 1 */
ns_pid = fork();
@@ -295,21 +314,25 @@ int main(int argc, char *argv[])
*/
set_tid[0] = 43;
set_tid[1] = -1;
- test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("check leak on invalid TID -1",
+ set_tid, 2, 0, -EINVAL, 0, 0);
set_tid[0] = 43;
set_tid[1] = pid;
- test_clone3_set_tid(set_tid, 2, 0, 0, 43, 0);
+ test_clone3_set_tid("check leak on invalid specific TID",
+ set_tid, 2, 0, 0, 43, 0);
ksft_print_msg("Child in PID namespace has PID %d\n", getpid());
set_tid[0] = 2;
- test_clone3_set_tid(set_tid, 1, 0, 0, 2, 0);
+ test_clone3_set_tid("create PID 2 in child NS",
+ set_tid, 1, 0, 0, 2, 0);
set_tid[0] = 1;
set_tid[1] = -1;
set_tid[2] = pid;
/* This should fail as there is invalid PID at level '1'. */
- test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, -EINVAL, 0, 0);
+ test_clone3_set_tid("fail due to invalid TID at level 1",
+ set_tid, 3, CLONE_NEWPID, -EINVAL, 0, 0);
set_tid[0] = 1;
set_tid[1] = 42;
@@ -319,13 +342,15 @@ int main(int argc, char *argv[])
* namespaces. Again assuming this is running in the host's
* PID namespace. Not yet nested.
*/
- test_clone3_set_tid(set_tid, 4, CLONE_NEWPID, -EINVAL, 0, 0);
+ test_clone3_set_tid("fail due to too few active PID NSs",
+ set_tid, 4, CLONE_NEWPID, -EINVAL, 0, 0);
/*
* This should work and from the parent we should see
* something like 'NSpid: pid 42 1'.
*/
- test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, 0, 42, true);
+ test_clone3_set_tid("verify that we have 3 PID NSs",
+ set_tid, 3, CLONE_NEWPID, 0, 42, true);
child_exit(ksft_cnt.ksft_fail);
}
@@ -380,16 +405,14 @@ int main(int argc, char *argv[])
ksft_cnt.ksft_pass += 6 - (ksft_cnt.ksft_fail - WEXITSTATUS(status));
ksft_cnt.ksft_fail = WEXITSTATUS(status);
- if (ns3 == pid && ns2 == 42 && ns1 == 1)
- ksft_test_result_pass(
- "PIDs in all namespaces as expected (%d,%d,%d)\n",
- ns3, ns2, ns1);
- else
- ksft_test_result_fail(
- "PIDs in all namespaces not as expected (%d,%d,%d)\n",
- ns3, ns2, ns1);
+ ksft_print_msg("Expecting PIDs %d, 42, 1\n", pid);
+ ksft_print_msg("Have PIDs in namespaces: %d, %d, %d\n", ns3, ns2, ns1);
+ ksft_test_result(ns3 == pid && ns2 == 42 && ns1 == 1,
+ "PIDs in all namespaces as expected\n");
out:
ret = 0;
- return !ret ? ksft_exit_pass() : ksft_exit_fail();
+ if (ret)
+ ksft_exit_fail();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index c59e4adb90..991c473e38 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -17,6 +17,15 @@
#include "../kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
+
+#ifndef F_LINUX_SPECIFIC_BASE
+#define F_LINUX_SPECIFIC_BASE 1024
+#endif
+
+#ifndef F_DUPFD_QUERY
+#define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
+#endif
+
static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
unsigned int flags)
{
@@ -45,6 +54,15 @@ TEST(core_close_range)
SKIP(return, "close_range() syscall not supported");
}
+ for (i = 0; i < 100; i++) {
+ ret = fcntl(open_fds[i], F_DUPFD_QUERY, open_fds[i + 1]);
+ if (ret < 0) {
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ EXPECT_EQ(ret, 0);
+ }
+ }
+
EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
for (i = 0; i <= 50; i++)
@@ -358,7 +376,7 @@ TEST(close_range_cloexec_unshare)
*/
TEST(close_range_cloexec_syzbot)
{
- int fd1, fd2, fd3, flags, ret, status;
+ int fd1, fd2, fd3, fd4, flags, ret, status;
pid_t pid;
struct __clone_args args = {
.flags = CLONE_FILES,
@@ -372,6 +390,13 @@ TEST(close_range_cloexec_syzbot)
fd2 = dup2(fd1, 1000);
EXPECT_GT(fd2, 0);
+ flags = fcntl(fd1, F_DUPFD_QUERY, fd2);
+ if (flags < 0) {
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ EXPECT_EQ(flags, 1);
+ }
+
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
@@ -396,6 +421,15 @@ TEST(close_range_cloexec_syzbot)
fd3 = dup2(fd1, 42);
EXPECT_GT(fd3, 0);
+ flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
+ if (flags < 0) {
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ EXPECT_EQ(flags, 1);
+ }
+
+
+
/*
* Duplicating the file descriptor must remove the
* FD_CLOEXEC flag.
@@ -426,6 +460,24 @@ TEST(close_range_cloexec_syzbot)
fd3 = dup2(fd1, 42);
EXPECT_GT(fd3, 0);
+ flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
+ if (flags < 0) {
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ EXPECT_EQ(flags, 1);
+ }
+
+ fd4 = open("/dev/null", O_RDWR);
+ EXPECT_GT(fd4, 0);
+
+ /* Same inode, different file pointers. */
+ flags = fcntl(fd1, F_DUPFD_QUERY, fd4);
+ if (flags < 0) {
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ EXPECT_EQ(flags, 0);
+ }
+
flags = fcntl(fd3, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, 0);
@@ -433,6 +485,7 @@ TEST(close_range_cloexec_syzbot)
EXPECT_EQ(close(fd1), 0);
EXPECT_EQ(close(fd2), 0);
EXPECT_EQ(close(fd3), 0);
+ EXPECT_EQ(close(fd4), 0);
}
/*
diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh
index b583a2fb45..a8b1dbc0a3 100755
--- a/tools/testing/selftests/cpufreq/cpufreq.sh
+++ b/tools/testing/selftests/cpufreq/cpufreq.sh
@@ -178,8 +178,7 @@ cpufreq_basic_tests()
count=$(count_cpufreq_managed_cpus)
if [ $count = 0 ]; then
- printf "No cpu is managed by cpufreq core, exiting\n"
- exit;
+ ktap_exit_fail_msg "No cpu is managed by cpufreq core, exiting\n"
else
printf "CPUFreq manages: $count CPUs\n\n"
fi
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
index 60ce18ed06..a0eb84cf71 100755
--- a/tools/testing/selftests/cpufreq/main.sh
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -7,15 +7,15 @@ source governor.sh
source module.sh
source special-tests.sh
+DIR="$(dirname $(readlink -f "$0"))"
+source "${DIR}"/../kselftest/ktap_helpers.sh
+
FUNC=basic # do basic tests by default
OUTFILE=cpufreq_selftest
SYSFS=
CPUROOT=
CPUFREQROOT=
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
helpme()
{
printf "Usage: $0 [-h] [-todg args]
@@ -32,7 +32,7 @@ helpme()
[-d <driver's module name: only with \"-t modtest>\"]
[-g <governor's module name: only with \"-t modtest>\"]
\n"
- exit 2
+ exit "${KSFT_FAIL}"
}
prerequisite()
@@ -40,8 +40,8 @@ prerequisite()
msg="skip all tests:"
if [ $UID != 0 ]; then
- echo $msg must be run as root >&2
- exit $ksft_skip
+ ktap_skip_all "$msg must be run as root"
+ exit "${KSFT_SKIP}"
fi
taskset -p 01 $$
@@ -49,21 +49,21 @@ prerequisite()
SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
if [ ! -d "$SYSFS" ]; then
- echo $msg sysfs is not mounted >&2
- exit 2
+ ktap_skip_all "$msg sysfs is not mounted"
+ exit "${KSFT_SKIP}"
fi
CPUROOT=$SYSFS/devices/system/cpu
CPUFREQROOT="$CPUROOT/cpufreq"
if ! ls $CPUROOT/cpu* > /dev/null 2>&1; then
- echo $msg cpus not available in sysfs >&2
- exit 2
+ ktap_skip_all "$msg cpus not available in sysfs"
+ exit "${KSFT_SKIP}"
fi
if ! ls $CPUROOT/cpufreq > /dev/null 2>&1; then
- echo $msg cpufreq directory not available in sysfs >&2
- exit 2
+ ktap_skip_all "$msg cpufreq directory not available in sysfs"
+ exit "${KSFT_SKIP}"
fi
}
@@ -105,8 +105,7 @@ do_test()
count=$(count_cpufreq_managed_cpus)
if [ $count = 0 -a $FUNC != "modtest" ]; then
- echo "No cpu is managed by cpufreq core, exiting"
- exit 2;
+ ktap_exit_fail_msg "No cpu is managed by cpufreq core, exiting"
fi
case "$FUNC" in
@@ -125,8 +124,7 @@ do_test()
"modtest")
# Do we have modules in place?
if [ -z $DRIVER_MOD ] && [ -z $GOVERNOR_MOD ]; then
- echo "No driver or governor module passed with -d or -g"
- exit 2;
+ ktap_exit_fail_msg "No driver or governor module passed with -d or -g"
fi
if [ $DRIVER_MOD ]; then
@@ -137,8 +135,7 @@ do_test()
fi
else
if [ $count = 0 ]; then
- echo "No cpu is managed by cpufreq core, exiting"
- exit 2;
+ ktap_exit_fail_msg "No cpu is managed by cpufreq core, exiting"
fi
module_governor_test $GOVERNOR_MOD
@@ -162,7 +159,7 @@ do_test()
;;
*)
- echo "Invalid [-f] function type"
+ ktap_print_msg "Invalid [-f] function type"
helpme
;;
esac
@@ -186,13 +183,25 @@ dmesg_dumps()
dmesg >> $1.dmesg_full.txt
}
+ktap_print_header
+
# Parse arguments
parse_arguments $@
+ktap_set_plan 1
+
# Make sure all requirements are met
prerequisite
# Run requested functions
clear_dumps $OUTFILE
do_test | tee -a $OUTFILE.txt
+if [ "${PIPESTATUS[0]}" -ne 0 ]; then
+ exit ${PIPESTATUS[0]};
+fi
dmesg_dumps $OUTFILE
+
+ktap_test_pass "Completed successfully"
+
+ktap_print_totals
+exit "${KSFT_PASS}"
diff --git a/tools/testing/selftests/cpufreq/module.sh b/tools/testing/selftests/cpufreq/module.sh
index 22563cd122..7f2667e0ae 100755
--- a/tools/testing/selftests/cpufreq/module.sh
+++ b/tools/testing/selftests/cpufreq/module.sh
@@ -24,16 +24,14 @@ test_basic_insmod_rmmod()
# insert module
insmod $1
if [ $? != 0 ]; then
- printf "Insmod $1 failed\n"
- exit;
+ ktap_exit_fail_msg "Insmod $1 failed\n"
fi
printf "Removing $1 module\n"
# remove module
rmmod $1
if [ $? != 0 ]; then
- printf "rmmod $1 failed\n"
- exit;
+ ktap_exit_fail_msg "rmmod $1 failed\n"
fi
printf "\n"
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index 789d6949c2..29a22f50e7 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -7,16 +7,21 @@ TEST_GEN_FILES += debugfs_target_ids_pid_leak
TEST_GEN_FILES += access_memory
TEST_FILES = _chk_dependency.sh _debugfs_common.sh
+
+# functionality tests
TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
+TEST_PROGS += sysfs.sh
+TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py
+TEST_PROGS += damos_quota.py damos_quota_goal.py damos_apply_interval.py
+TEST_PROGS += reclaim.sh lru_sort.sh
+
+# regression tests (reproducers of previously found bugs)
TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh
TEST_PROGS += debugfs_duplicate_context_creation.sh
TEST_PROGS += debugfs_rm_non_contexts.sh
TEST_PROGS += debugfs_target_ids_read_before_terminate_race.sh
TEST_PROGS += debugfs_target_ids_pid_leak.sh
-TEST_PROGS += sysfs.sh sysfs_update_removed_scheme_dir.sh
+TEST_PROGS += sysfs_update_removed_scheme_dir.sh
TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py
-TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py
-TEST_PROGS += damos_quota.py damos_apply_interval.py
-TEST_PROGS += reclaim.sh lru_sort.sh
include ../lib.mk
diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py
index fe77d7e73a..2bd44c32be 100644
--- a/tools/testing/selftests/damon/_damon_sysfs.py
+++ b/tools/testing/selftests/damon/_damon_sysfs.py
@@ -2,7 +2,18 @@
import os
-sysfs_root = '/sys/kernel/mm/damon/admin'
+ksft_skip=4
+
+sysfs_root = None
+with open('/proc/mounts', 'r') as f:
+ for line in f:
+ dev_name, mount_point, dev_fs = line.split()[:3]
+ if dev_fs == 'sysfs':
+ sysfs_root = '%s/kernel/mm/damon/admin' % mount_point
+ break
+if sysfs_root is None:
+ print('Seems sysfs not mounted?')
+ exit(ksft_skip)
def write_file(path, string):
"Returns error string if failed, or None otherwise"
@@ -34,11 +45,11 @@ class DamosAccessPattern:
self.nr_accesses = nr_accesses
self.age = age
- if self.size == None:
+ if self.size is None:
self.size = [0, 2**64 - 1]
- if self.nr_accesses == None:
+ if self.nr_accesses is None:
self.nr_accesses = [0, 2**64 - 1]
- if self.age == None:
+ if self.age is None:
self.age = [0, 2**64 - 1]
def sysfs_dir(self):
@@ -47,55 +58,109 @@ class DamosAccessPattern:
def stage(self):
err = write_file(
os.path.join(self.sysfs_dir(), 'sz', 'min'), self.size[0])
- if err != None:
+ if err is not None:
return err
err = write_file(
os.path.join(self.sysfs_dir(), 'sz', 'max'), self.size[1])
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'min'),
self.nr_accesses[0])
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'max'),
self.nr_accesses[1])
- if err != None:
+ if err is not None:
return err
err = write_file(
os.path.join(self.sysfs_dir(), 'age', 'min'), self.age[0])
- if err != None:
+ if err is not None:
return err
err = write_file(
os.path.join(self.sysfs_dir(), 'age', 'max'), self.age[1])
- if err != None:
+ if err is not None:
return err
+qgoal_metric_user_input = 'user_input'
+qgoal_metric_some_mem_psi_us = 'some_mem_psi_us'
+qgoal_metrics = [qgoal_metric_user_input, qgoal_metric_some_mem_psi_us]
+
+class DamosQuotaGoal:
+ metric = None
+ target_value = None
+ current_value = None
+ effective_bytes = None
+ quota = None # owner quota
+ idx = None
+
+ def __init__(self, metric, target_value=10000, current_value=0):
+ self.metric = metric
+ self.target_value = target_value
+ self.current_value = current_value
+
+ def sysfs_dir(self):
+ return os.path.join(self.quota.sysfs_dir(), 'goals', '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'target_metric'),
+ self.metric)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'target_value'),
+ self.target_value)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'current_value'),
+ self.current_value)
+ if err is not None:
+ return err
+ return None
+
class DamosQuota:
sz = None # size quota, in bytes
ms = None # time quota
+ goals = None # quota goals
reset_interval_ms = None # quota reset interval
scheme = None # owner scheme
- def __init__(self, sz=0, ms=0, reset_interval_ms=0):
+ def __init__(self, sz=0, ms=0, goals=None, reset_interval_ms=0):
self.sz = sz
self.ms = ms
self.reset_interval_ms = reset_interval_ms
+ self.goals = goals if goals is not None else []
+ for idx, goal in enumerate(self.goals):
+ goal.idx = idx
+ goal.quota = self
def sysfs_dir(self):
return os.path.join(self.scheme.sysfs_dir(), 'quotas')
def stage(self):
err = write_file(os.path.join(self.sysfs_dir(), 'bytes'), self.sz)
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.sysfs_dir(), 'ms'), self.ms)
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.sysfs_dir(), 'reset_interval_ms'),
self.reset_interval_ms)
- if err != None:
+ if err is not None:
return err
+ nr_goals_file = os.path.join(self.sysfs_dir(), 'goals', 'nr_goals')
+ content, err = read_file(nr_goals_file)
+ if err is not None:
+ return err
+ if int(content) != len(self.goals):
+ err = write_file(nr_goals_file, len(self.goals))
+ if err is not None:
+ return err
+ for goal in self.goals:
+ err = goal.stage()
+ if err is not None:
+ return err
+ return None
+
class DamosStats:
nr_tried = None
sz_tried = None
@@ -136,30 +201,30 @@ class Damos:
def stage(self):
err = write_file(os.path.join(self.sysfs_dir(), 'action'), self.action)
- if err != None:
+ if err is not None:
return err
err = self.access_pattern.stage()
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.sysfs_dir(), 'apply_interval_us'),
'%d' % self.apply_interval_us)
- if err != None:
+ if err is not None:
return err
err = self.quota.stage()
- if err != None:
+ if err is not None:
return err
# disable watermarks
err = write_file(
os.path.join(self.sysfs_dir(), 'watermarks', 'metric'), 'none')
- if err != None:
+ if err is not None:
return err
# disable filters
err = write_file(
os.path.join(self.sysfs_dir(), 'filters', 'nr_filters'), '0')
- if err != None:
+ if err is not None:
return err
class DamonTarget:
@@ -178,7 +243,7 @@ class DamonTarget:
def stage(self):
err = write_file(
os.path.join(self.sysfs_dir(), 'regions', 'nr_regions'), '0')
- if err != None:
+ if err is not None:
return err
return write_file(
os.path.join(self.sysfs_dir(), 'pid_target'), self.pid)
@@ -210,27 +275,27 @@ class DamonAttrs:
def stage(self):
err = write_file(os.path.join(self.interval_sysfs_dir(), 'sample_us'),
self.sample_us)
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.interval_sysfs_dir(), 'aggr_us'),
self.aggr_us)
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.interval_sysfs_dir(), 'update_us'),
self.update_us)
- if err != None:
+ if err is not None:
return err
err = write_file(
os.path.join(self.nr_regions_range_sysfs_dir(), 'min'),
self.min_nr_regions)
- if err != None:
+ if err is not None:
return err
err = write_file(
os.path.join(self.nr_regions_range_sysfs_dir(), 'max'),
self.max_nr_regions)
- if err != None:
+ if err is not None:
return err
class DamonCtx:
@@ -264,24 +329,24 @@ class DamonCtx:
def stage(self):
err = write_file(
os.path.join(self.sysfs_dir(), 'operations'), self.ops)
- if err != None:
+ if err is not None:
return err
err = self.monitoring_attrs.stage()
- if err != None:
+ if err is not None:
return err
nr_targets_file = os.path.join(
self.sysfs_dir(), 'targets', 'nr_targets')
content, err = read_file(nr_targets_file)
- if err != None:
+ if err is not None:
return err
if int(content) != len(self.targets):
err = write_file(nr_targets_file, '%d' % len(self.targets))
- if err != None:
+ if err is not None:
return err
for target in self.targets:
err = target.stage()
- if err != None:
+ if err is not None:
return err
nr_schemes_file = os.path.join(
@@ -291,11 +356,11 @@ class DamonCtx:
return err
if int(content) != len(self.schemes):
err = write_file(nr_schemes_file, '%d' % len(self.schemes))
- if err != None:
+ if err is not None:
return err
for scheme in self.schemes:
err = scheme.stage()
- if err != None:
+ if err is not None:
return err
return None
@@ -319,16 +384,16 @@ class Kdamond:
nr_contexts_file = os.path.join(self.sysfs_dir(),
'contexts', 'nr_contexts')
content, err = read_file(nr_contexts_file)
- if err != None:
+ if err is not None:
return err
if int(content) != len(self.contexts):
err = write_file(nr_contexts_file, '%d' % len(self.contexts))
- if err != None:
+ if err is not None:
return err
for context in self.contexts:
err = context.stage()
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on')
return err
@@ -336,20 +401,20 @@ class Kdamond:
def update_schemes_tried_bytes(self):
err = write_file(os.path.join(self.sysfs_dir(), 'state'),
'update_schemes_tried_bytes')
- if err != None:
+ if err is not None:
return err
for context in self.contexts:
for scheme in context.schemes:
content, err = read_file(os.path.join(scheme.sysfs_dir(),
'tried_regions', 'total_bytes'))
- if err != None:
+ if err is not None:
return err
scheme.tried_bytes = int(content)
def update_schemes_stats(self):
err = write_file(os.path.join(self.sysfs_dir(), 'state'),
'update_schemes_stats')
- if err != None:
+ if err is not None:
return err
for context in self.contexts:
for scheme in context.schemes:
@@ -358,11 +423,39 @@ class Kdamond:
'sz_applied', 'qt_exceeds']:
content, err = read_file(
os.path.join(scheme.sysfs_dir(), 'stats', stat))
- if err != None:
+ if err is not None:
return err
stat_values.append(int(content))
scheme.stats = DamosStats(*stat_values)
+ def update_schemes_effective_quotas(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'),
+ 'update_schemes_effective_quotas')
+ if err is not None:
+ return err
+ for context in self.contexts:
+ for scheme in context.schemes:
+ for goal in scheme.quota.goals:
+ content, err = read_file(
+ os.path.join(scheme.quota.sysfs_dir(),
+ 'effective_bytes'))
+ if err is not None:
+ return err
+ goal.effective_bytes = int(content)
+ return None
+
+ def commit_schemes_quota_goals(self):
+ for context in self.contexts:
+ for scheme in context.schemes:
+ for goal in scheme.quota.goals:
+ err = goal.stage()
+ if err is not None:
+ print('commit_schemes_quota_goals failed stagign: %s'%
+ err)
+ exit(1)
+ return write_file(os.path.join(self.sysfs_dir(), 'state'),
+ 'commit_schemes_quota_goals')
+
class Kdamonds:
kdamonds = []
@@ -378,10 +471,10 @@ class Kdamonds:
def start(self):
err = write_file(os.path.join(self.sysfs_dir(), 'nr_kdamonds'),
'%s' % len(self.kdamonds))
- if err != None:
+ if err is not None:
return err
for kdamond in self.kdamonds:
err = kdamond.start()
- if err != None:
+ if err is not None:
return err
return None
diff --git a/tools/testing/selftests/damon/access_memory.c b/tools/testing/selftests/damon/access_memory.c
index 585a2fa543..56b17e8fe1 100644
--- a/tools/testing/selftests/damon/access_memory.c
+++ b/tools/testing/selftests/damon/access_memory.c
@@ -35,7 +35,7 @@ int main(int argc, char *argv[])
start_clock = clock();
while ((clock() - start_clock) * 1000 / CLOCKS_PER_SEC <
access_time_ms)
- memset(regions[i], i, 1024 * 1024 * 10);
+ memset(regions[i], i, sz_region);
}
return 0;
}
diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py
new file mode 100644
index 0000000000..18246f3b62
--- /dev/null
+++ b/tools/testing/selftests/damon/damos_quota_goal.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+ # access two 10 MiB memory regions, 2 second per each
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000'])
+
+ goal = _damon_sysfs.DamosQuotaGoal(
+ metric=_damon_sysfs.qgoal_metric_user_input, target_value=10000)
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(
+ action='stat',
+ quota=_damon_sysfs.DamosQuota(
+ goals=[goal], reset_interval_ms=100),
+ )] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err != None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ score_values_to_test = [0, 15000, 5000, 18000]
+ while proc.poll() == None:
+ if len(score_values_to_test) == 0:
+ time.sleep(0.1)
+ continue
+
+ goal.current_value = score_values_to_test.pop(0)
+ expect_increase = goal.current_value < goal.target_value
+
+ err = kdamonds.kdamonds[0].commit_schemes_quota_goals()
+ if err is not None:
+ print('commit_schemes_quota_goals failed: %s' % err)
+ exit(1)
+
+ err = kdamonds.kdamonds[0].update_schemes_effective_quotas()
+ if err is not None:
+ print('before-update_schemes_effective_quotas failed: %s' % err)
+ exit(1)
+ last_effective_bytes = goal.effective_bytes
+
+ time.sleep(0.5)
+
+ err = kdamonds.kdamonds[0].update_schemes_effective_quotas()
+ if err is not None:
+ print('after-update_schemes_effective_quotas failed: %s' % err)
+ exit(1)
+
+ print('score: %s, effective quota: %d -> %d (%.3fx)' % (
+ goal.current_value, last_effective_bytes, goal.effective_bytes,
+ goal.effective_bytes / last_effective_bytes
+ if last_effective_bytes != 0 else -1.0))
+
+ if last_effective_bytes == goal.effective_bytes:
+ print('efective bytes not changed: %d' % goal.effective_bytes)
+ exit(1)
+
+ increased = last_effective_bytes < goal.effective_bytes
+ if expect_increase != increased:
+ print('expectation of increase (%s) != increased (%s)' %
+ (expect_increase, increased))
+ exit(1)
+ last_effective_bytes = goal.effective_bytes
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
index 890a8236a8..5f54152236 100644
--- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
+++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
@@ -15,6 +15,7 @@
#include <linux/dma-buf.h>
#include <linux/dma-heap.h>
#include <drm/drm.h>
+#include "../kselftest.h"
#define DEVPATH "/dev/dma_heap"
@@ -90,14 +91,13 @@ static int dmabuf_heap_open(char *name)
char buf[256];
ret = snprintf(buf, 256, "%s/%s", DEVPATH, name);
- if (ret < 0) {
- printf("snprintf failed!\n");
- return ret;
- }
+ if (ret < 0)
+ ksft_exit_fail_msg("snprintf failed! %d\n", ret);
fd = open(buf, O_RDWR);
if (fd < 0)
- printf("open %s failed!\n", buf);
+ ksft_exit_fail_msg("open %s failed: %s\n", buf, strerror(errno));
+
return fd;
}
@@ -140,7 +140,7 @@ static int dmabuf_sync(int fd, int start_stop)
#define ONE_MEG (1024 * 1024)
-static int test_alloc_and_import(char *heap_name)
+static void test_alloc_and_import(char *heap_name)
{
int heap_fd = -1, dmabuf_fd = -1, importer_fd = -1;
uint32_t handle = 0;
@@ -148,27 +148,19 @@ static int test_alloc_and_import(char *heap_name)
int ret;
heap_fd = dmabuf_heap_open(heap_name);
- if (heap_fd < 0)
- return -1;
- printf(" Testing allocation and importing: ");
+ ksft_print_msg("Testing allocation and importing:\n");
ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0, &dmabuf_fd);
if (ret) {
- printf("FAIL (Allocation Failed!)\n");
- ret = -1;
- goto out;
+ ksft_test_result_fail("FAIL (Allocation Failed!) %d\n", ret);
+ return;
}
+
/* mmap and write a simple pattern */
- p = mmap(NULL,
- ONE_MEG,
- PROT_READ | PROT_WRITE,
- MAP_SHARED,
- dmabuf_fd,
- 0);
+ p = mmap(NULL, ONE_MEG, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd, 0);
if (p == MAP_FAILED) {
- printf("FAIL (mmap() failed)\n");
- ret = -1;
- goto out;
+ ksft_test_result_fail("FAIL (mmap() failed): %s\n", strerror(errno));
+ goto close_and_return;
}
dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START);
@@ -178,71 +170,64 @@ static int test_alloc_and_import(char *heap_name)
importer_fd = open_vgem();
if (importer_fd < 0) {
- ret = importer_fd;
- printf("(Could not open vgem - skipping): ");
+ ksft_test_result_skip("Could not open vgem %d\n", importer_fd);
} else {
ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle);
- if (ret < 0) {
- printf("FAIL (Failed to import buffer)\n");
- goto out;
- }
+ ksft_test_result(ret >= 0, "Import buffer %d\n", ret);
}
ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START);
if (ret < 0) {
- printf("FAIL (DMA_BUF_SYNC_START failed!)\n");
+ ksft_print_msg("FAIL (DMA_BUF_SYNC_START failed!) %d\n", ret);
goto out;
}
memset(p, 0xff, ONE_MEG);
ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END);
if (ret < 0) {
- printf("FAIL (DMA_BUF_SYNC_END failed!)\n");
+ ksft_print_msg("FAIL (DMA_BUF_SYNC_END failed!) %d\n", ret);
goto out;
}
close_handle(importer_fd, handle);
- ret = 0;
- printf(" OK\n");
+ ksft_test_result_pass("%s dmabuf sync succeeded\n", __func__);
+ return;
+
out:
- if (p)
- munmap(p, ONE_MEG);
- if (importer_fd >= 0)
- close(importer_fd);
- if (dmabuf_fd >= 0)
- close(dmabuf_fd);
- if (heap_fd >= 0)
- close(heap_fd);
+ ksft_test_result_fail("%s dmabuf sync failed\n", __func__);
+ munmap(p, ONE_MEG);
+ close(importer_fd);
- return ret;
+close_and_return:
+ close(dmabuf_fd);
+ close(heap_fd);
}
-static int test_alloc_zeroed(char *heap_name, size_t size)
+static void test_alloc_zeroed(char *heap_name, size_t size)
{
int heap_fd = -1, dmabuf_fd[32];
- int i, j, ret;
+ int i, j, k, ret;
void *p = NULL;
char *c;
- printf(" Testing alloced %ldk buffers are zeroed: ", size / 1024);
+ ksft_print_msg("Testing alloced %ldk buffers are zeroed:\n", size / 1024);
heap_fd = dmabuf_heap_open(heap_name);
- if (heap_fd < 0)
- return -1;
/* Allocate and fill a bunch of buffers */
for (i = 0; i < 32; i++) {
ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]);
- if (ret < 0) {
- printf("FAIL (Allocation (%i) failed)\n", i);
- goto out;
+ if (ret) {
+ ksft_test_result_fail("FAIL (Allocation (%i) failed) %d\n", i, ret);
+ goto close_and_return;
}
+
/* mmap and fill with simple pattern */
p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0);
if (p == MAP_FAILED) {
- printf("FAIL (mmap() failed!)\n");
- ret = -1;
- goto out;
+ ksft_test_result_fail("FAIL (mmap() failed!): %s\n", strerror(errno));
+ goto close_and_return;
}
+
dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START);
memset(p, 0xff, size);
dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END);
@@ -251,48 +236,47 @@ static int test_alloc_zeroed(char *heap_name, size_t size)
/* close them all */
for (i = 0; i < 32; i++)
close(dmabuf_fd[i]);
+ ksft_test_result_pass("Allocate and fill a bunch of buffers\n");
/* Allocate and validate all buffers are zeroed */
for (i = 0; i < 32; i++) {
ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]);
if (ret < 0) {
- printf("FAIL (Allocation (%i) failed)\n", i);
- goto out;
+ ksft_test_result_fail("FAIL (Allocation (%i) failed) %d\n", i, ret);
+ goto close_and_return;
}
/* mmap and validate everything is zero */
p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0);
if (p == MAP_FAILED) {
- printf("FAIL (mmap() failed!)\n");
- ret = -1;
- goto out;
+ ksft_test_result_fail("FAIL (mmap() failed!): %s\n", strerror(errno));
+ goto close_and_return;
}
+
dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START);
c = (char *)p;
for (j = 0; j < size; j++) {
if (c[j] != 0) {
- printf("FAIL (Allocated buffer not zeroed @ %i)\n", j);
- break;
+ ksft_print_msg("FAIL (Allocated buffer not zeroed @ %i)\n", j);
+ dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END);
+ munmap(p, size);
+ goto out;
}
}
dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END);
munmap(p, size);
}
- /* close them all */
- for (i = 0; i < 32; i++)
- close(dmabuf_fd[i]);
-
- close(heap_fd);
- printf("OK\n");
- return 0;
out:
- while (i > 0) {
- close(dmabuf_fd[i]);
- i--;
- }
+ ksft_test_result(i == 32, "Allocate and validate all buffers are zeroed\n");
+
+close_and_return:
+ /* close them all */
+ for (k = 0; k < i; k++)
+ close(dmabuf_fd[k]);
+
close(heap_fd);
- return ret;
+ return;
}
/* Test the ioctl version compatibility w/ a smaller structure then expected */
@@ -360,126 +344,97 @@ static int dmabuf_heap_alloc_newer(int fd, size_t len, unsigned int flags,
return ret;
}
-static int test_alloc_compat(char *heap_name)
+static void test_alloc_compat(char *heap_name)
{
- int heap_fd = -1, dmabuf_fd = -1;
- int ret;
+ int ret, heap_fd = -1, dmabuf_fd = -1;
heap_fd = dmabuf_heap_open(heap_name);
- if (heap_fd < 0)
- return -1;
- printf(" Testing (theoretical)older alloc compat: ");
+ ksft_print_msg("Testing (theoretical) older alloc compat:\n");
ret = dmabuf_heap_alloc_older(heap_fd, ONE_MEG, 0, &dmabuf_fd);
- if (ret) {
- printf("FAIL (Older compat allocation failed!)\n");
- ret = -1;
- goto out;
- }
- close(dmabuf_fd);
- printf("OK\n");
+ if (dmabuf_fd >= 0)
+ close(dmabuf_fd);
+ ksft_test_result(!ret, "dmabuf_heap_alloc_older\n");
- printf(" Testing (theoretical)newer alloc compat: ");
+ ksft_print_msg("Testing (theoretical) newer alloc compat:\n");
ret = dmabuf_heap_alloc_newer(heap_fd, ONE_MEG, 0, &dmabuf_fd);
- if (ret) {
- printf("FAIL (Newer compat allocation failed!)\n");
- ret = -1;
- goto out;
- }
- printf("OK\n");
-out:
if (dmabuf_fd >= 0)
close(dmabuf_fd);
- if (heap_fd >= 0)
- close(heap_fd);
+ ksft_test_result(!ret, "dmabuf_heap_alloc_newer\n");
- return ret;
+ close(heap_fd);
}
-static int test_alloc_errors(char *heap_name)
+static void test_alloc_errors(char *heap_name)
{
int heap_fd = -1, dmabuf_fd = -1;
int ret;
heap_fd = dmabuf_heap_open(heap_name);
- if (heap_fd < 0)
- return -1;
- printf(" Testing expected error cases: ");
+ ksft_print_msg("Testing expected error cases:\n");
ret = dmabuf_heap_alloc(0, ONE_MEG, 0x111111, &dmabuf_fd);
- if (!ret) {
- printf("FAIL (Did not see expected error (invalid fd)!)\n");
- ret = -1;
- goto out;
- }
+ ksft_test_result(ret, "Error expected on invalid fd %d\n", ret);
ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0x111111, &dmabuf_fd);
- if (!ret) {
- printf("FAIL (Did not see expected error (invalid heap flags)!)\n");
- ret = -1;
- goto out;
- }
+ ksft_test_result(ret, "Error expected on invalid heap flags %d\n", ret);
ret = dmabuf_heap_alloc_fdflags(heap_fd, ONE_MEG,
~(O_RDWR | O_CLOEXEC), 0, &dmabuf_fd);
- if (!ret) {
- printf("FAIL (Did not see expected error (invalid fd flags)!)\n");
- ret = -1;
- goto out;
- }
+ ksft_test_result(ret, "Error expected on invalid heap flags %d\n", ret);
- printf("OK\n");
- ret = 0;
-out:
if (dmabuf_fd >= 0)
close(dmabuf_fd);
- if (heap_fd >= 0)
- close(heap_fd);
+ close(heap_fd);
+}
- return ret;
+static int numer_of_heaps(void)
+{
+ DIR *d = opendir(DEVPATH);
+ struct dirent *dir;
+ int heaps = 0;
+
+ while ((dir = readdir(d))) {
+ if (!strncmp(dir->d_name, ".", 2))
+ continue;
+ if (!strncmp(dir->d_name, "..", 3))
+ continue;
+ heaps++;
+ }
+
+ return heaps;
}
int main(void)
{
- DIR *d;
struct dirent *dir;
- int ret = -1;
+ DIR *d;
+
+ ksft_print_header();
d = opendir(DEVPATH);
if (!d) {
- printf("No %s directory?\n", DEVPATH);
- return -1;
+ ksft_print_msg("No %s directory?\n", DEVPATH);
+ return KSFT_SKIP;
}
- while ((dir = readdir(d)) != NULL) {
+ ksft_set_plan(11 * numer_of_heaps());
+
+ while ((dir = readdir(d))) {
if (!strncmp(dir->d_name, ".", 2))
continue;
if (!strncmp(dir->d_name, "..", 3))
continue;
- printf("Testing heap: %s\n", dir->d_name);
- printf("=======================================\n");
- ret = test_alloc_and_import(dir->d_name);
- if (ret)
- break;
-
- ret = test_alloc_zeroed(dir->d_name, 4 * 1024);
- if (ret)
- break;
-
- ret = test_alloc_zeroed(dir->d_name, ONE_MEG);
- if (ret)
- break;
-
- ret = test_alloc_compat(dir->d_name);
- if (ret)
- break;
-
- ret = test_alloc_errors(dir->d_name);
- if (ret)
- break;
+ ksft_print_msg("Testing heap: %s\n", dir->d_name);
+ ksft_print_msg("=======================================\n");
+ test_alloc_and_import(dir->d_name);
+ test_alloc_zeroed(dir->d_name, 4 * 1024);
+ test_alloc_zeroed(dir->d_name, ONE_MEG);
+ test_alloc_compat(dir->d_name);
+ test_alloc_errors(dir->d_name);
}
closedir(d);
- return ret;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
new file mode 100644
index 0000000000..e54f382bcb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_INCLUDES := $(wildcard lib/py/*.py)
+
+TEST_PROGS := \
+ ping.py \
+ queues.py \
+ stats.py \
+# end of TEST_PROGS
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst
new file mode 100644
index 0000000000..3b6a29e656
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/README.rst
@@ -0,0 +1,136 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Running driver tests
+====================
+
+Networking driver tests are executed within kselftest framework like any
+other tests. They support testing both real device drivers and emulated /
+software drivers (latter mostly to test the core parts of the stack).
+
+SW mode
+~~~~~~~
+
+By default, when no extra parameters are set or exported, tests execute
+against software drivers such as netdevsim. No extra preparation is required
+the software devices are created and destroyed as part of the test.
+In this mode the tests are indistinguishable from other selftests and
+(for example) can be run under ``virtme-ng`` like the core networking selftests.
+
+HW mode
+~~~~~~~
+
+Executing tests against a real device requires external preparation.
+The netdevice against which tests will be run must exist, be running
+(in UP state) and be configured with an IP address.
+
+Refer to list of :ref:`Variables` later in this file to set up running
+the tests against a real device.
+
+Both modes required
+~~~~~~~~~~~~~~~~~~~
+
+All tests in drivers/net must support running both against a software device
+and a real device. SW-only tests should instead be placed in net/ or
+drivers/net/netdevsim, HW-only tests in drivers/net/hw.
+
+Variables
+=========
+
+The variables can be set in the environment or by creating a net.config
+file in the same directory as this README file. Example::
+
+ $ NETIF=eth0 ./some_test.sh
+
+or::
+
+ $ cat tools/testing/selftests/drivers/net/net.config
+ # Variable set in a file
+ NETIF=eth0
+
+Local test (which don't require endpoint for sending / receiving traffic)
+need only the ``NETIF`` variable. Remaining variables define the endpoint
+and communication method.
+
+NETIF
+~~~~~
+
+Name of the netdevice against which the test should be executed.
+When empty or not set software devices will be used.
+
+LOCAL_V4, LOCAL_V6, REMOTE_V4, REMOTE_V6
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Local and remote endpoint IP addresses.
+
+REMOTE_TYPE
+~~~~~~~~~~~
+
+Communication method used to run commands on the remote endpoint.
+Test framework has built-in support for ``netns`` and ``ssh`` channels.
+``netns`` assumes the "remote" interface is part of the same
+host, just moved to the specified netns.
+``ssh`` communicates with remote endpoint over ``ssh`` and ``scp``.
+Using persistent SSH connections is strongly encouraged to avoid
+the latency of SSH connection setup on every command.
+
+Communication methods are defined by classes in ``lib/py/remote_{name}.py``.
+It should be possible to add a new method without modifying any of
+the framework, by simply adding an appropriately named file to ``lib/py``.
+
+REMOTE_ARGS
+~~~~~~~~~~~
+
+Arguments used to construct the communication channel.
+Communication channel dependent::
+
+ for netns - name of the "remote" namespace
+ for ssh - name/address of the remote host
+
+Example
+=======
+
+Build the selftests::
+
+ # make -C tools/testing/selftests/ TARGETS="drivers/net drivers/net/hw"
+
+"Install" the tests and copy them over to the target machine::
+
+ # make -C tools/testing/selftests/ TARGETS="drivers/net drivers/net/hw" \
+ install INSTALL_PATH=/tmp/ksft-net-drv
+
+ # rsync -ra --delete /tmp/ksft-net-drv root@192.168.1.1:/root/
+
+On the target machine, running the tests will use netdevsim by default::
+
+ [/root] # ./ksft-net-drv/run_kselftest.sh -t drivers/net:ping.py
+ TAP version 13
+ 1..1
+ # timeout set to 45
+ # selftests: drivers/net: ping.py
+ # KTAP version 1
+ # 1..3
+ # ok 1 ping.test_v4
+ # ok 2 ping.test_v6
+ # ok 3 ping.test_tcp
+ # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0
+ ok 1 selftests: drivers/net: ping.py
+
+Create a config with remote info::
+
+ [/root] # cat > ./ksft-net-drv/drivers/net/net.config <<EOF
+ NETIF=eth0
+ LOCAL_V4=192.168.1.1
+ REMOTE_V4=192.168.1.2
+ REMOTE_TYPE=ssh
+ REMOTE_ARGS=root@192.168.1.2
+ EOF
+
+Run the test::
+
+ [/root] # ./ksft-net-drv/drivers/net/ping.py
+ KTAP version 1
+ 1..3
+ ok 1 ping.test_v4
+ ok 2 ping.test_v6 # SKIP Test requires IPv6 connectivity
+ ok 3 ping.test_tcp
+ # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:1 error:0
diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config
new file mode 100644
index 0000000000..f6a58ce8a2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/config
@@ -0,0 +1,2 @@
+CONFIG_IPV6=y
+CONFIG_NETDEVSIM=m
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
new file mode 100644
index 0000000000..4933d045ab
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS = \
+ csum.py \
+ devlink_port_split.py \
+ ethtool.sh \
+ ethtool_extended_state.sh \
+ ethtool_mm.sh \
+ ethtool_rmon.sh \
+ hw_stats_l3.sh \
+ hw_stats_l3_gre.sh \
+ loopback.sh \
+ pp_alloc_fail.py \
+ #
+
+TEST_FILES := \
+ ethtool_lib.sh \
+ #
+
+TEST_INCLUDES := \
+ $(wildcard lib/py/*.py ../lib/py/*.py) \
+ ../../../net/lib.sh \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/forwarding/ipip_lib.sh \
+ ../../../net/forwarding/tc_common.sh \
+ #
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py
new file mode 100755
index 0000000000..cb40497fae
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/csum.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Run the tools/testing/selftests/net/csum testsuite."""
+
+from os import path
+
+from lib.py import ksft_run, ksft_exit, KsftSkipEx
+from lib.py import EthtoolFamily, NetDrvEpEnv
+from lib.py import bkg, cmd, wait_port_listen
+
+def test_receive(cfg, ipv4=False, extra_args=None):
+ """Test local nic checksum receive. Remote host sends crafted packets."""
+ if not cfg.have_rx_csum:
+ raise KsftSkipEx(f"Test requires rx checksum offload on {cfg.ifname}")
+
+ if ipv4:
+ ip_args = f"-4 -S {cfg.remote_v4} -D {cfg.v4}"
+ else:
+ ip_args = f"-6 -S {cfg.remote_v6} -D {cfg.v6}"
+
+ rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}"
+ tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}"
+
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(34000, proto="udp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
+def test_transmit(cfg, ipv4=False, extra_args=None):
+ """Test local nic checksum transmit. Remote host verifies packets."""
+ if (not cfg.have_tx_csum_generic and
+ not (cfg.have_tx_csum_ipv4 and ipv4) and
+ not (cfg.have_tx_csum_ipv6 and not ipv4)):
+ raise KsftSkipEx(f"Test requires tx checksum offload on {cfg.ifname}")
+
+ if ipv4:
+ ip_args = f"-4 -S {cfg.v4} -D {cfg.remote_v4}"
+ else:
+ ip_args = f"-6 -S {cfg.v6} -D {cfg.remote_v6}"
+
+ # Cannot randomize input when calculating zero checksum
+ if extra_args != "-U -Z":
+ extra_args += " -r 1"
+
+ rx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -L 1 -n 100 {ip_args} -R {extra_args}"
+ tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}"
+
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(34000, proto="udp", host=cfg.remote)
+ cmd(tx_cmd)
+
+
+def test_builder(name, cfg, ipv4=False, tx=False, extra_args=""):
+ """Construct specific tests from the common template.
+
+ Most tests follow the same basic pattern, differing only in
+ Direction of the test and optional flags passed to csum."""
+ def f(cfg):
+ if ipv4:
+ cfg.require_v4()
+ else:
+ cfg.require_v6()
+
+ if tx:
+ test_transmit(cfg, ipv4, extra_args)
+ else:
+ test_receive(cfg, ipv4, extra_args)
+
+ if ipv4:
+ f.__name__ = "ipv4_" + name
+ else:
+ f.__name__ = "ipv6_" + name
+ return f
+
+
+def check_nic_features(cfg) -> None:
+ """Test whether Tx and Rx checksum offload are enabled.
+
+ If the device under test has either off, then skip the relevant tests."""
+ cfg.have_tx_csum_generic = False
+ cfg.have_tx_csum_ipv4 = False
+ cfg.have_tx_csum_ipv6 = False
+ cfg.have_rx_csum = False
+
+ ethnl = EthtoolFamily()
+ features = ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+ for f in features["active"]["bits"]["bit"]:
+ if f["name"] == "tx-checksum-ip-generic":
+ cfg.have_tx_csum_generic = True
+ elif f["name"] == "tx-checksum-ipv4":
+ cfg.have_tx_csum_ipv4 = True
+ elif f["name"] == "tx-checksum-ipv6":
+ cfg.have_tx_csum_ipv6 = True
+ elif f["name"] == "rx-checksum":
+ cfg.have_rx_csum = True
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ check_nic_features(cfg)
+
+ cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../net/lib/csum")
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ cases = []
+ for ipv4 in [True, False]:
+ cases.append(test_builder("rx_tcp", cfg, ipv4, False, "-t"))
+ cases.append(test_builder("rx_tcp_invalid", cfg, ipv4, False, "-t -E"))
+
+ cases.append(test_builder("rx_udp", cfg, ipv4, False, ""))
+ cases.append(test_builder("rx_udp_invalid", cfg, ipv4, False, "-E"))
+
+ cases.append(test_builder("tx_udp_csum_offload", cfg, ipv4, True, "-U"))
+ cases.append(test_builder("tx_udp_zero_checksum", cfg, ipv4, True, "-U -Z"))
+
+ ksft_run(cases=cases, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py
index 2d84c7a0be..2d84c7a0be 100755
--- a/tools/testing/selftests/net/devlink_port_split.py
+++ b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py
diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/drivers/net/hw/ethtool.sh
index aa2eafb7b2..fa6953de6b 100755
--- a/tools/testing/selftests/net/forwarding/ethtool.sh
+++ b/tools/testing/selftests/drivers/net/hw/ethtool.sh
@@ -10,7 +10,8 @@ ALL_TESTS="
different_speeds_autoneg_on
"
NUM_NETIFS=2
-source lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
source ethtool_lib.sh
h1_create()
@@ -64,9 +65,8 @@ same_speeds_autoneg_off()
setup_wait_dev_with_timeout $h1
setup_wait_dev_with_timeout $h2
ping_do $h1 192.0.2.2
- check_err $? "speed $speed autoneg off"
- log_test "force of same speed autoneg off"
- log_info "speed = $speed"
+ check_err $? "ping with speed $speed autoneg off"
+ log_test "force speed $speed on both ends"
done
ethtool -s $h2 autoneg on
@@ -111,9 +111,8 @@ combination_of_neg_on_and_off()
setup_wait_dev_with_timeout $h1
setup_wait_dev_with_timeout $h2
ping_do $h1 192.0.2.2
- check_err $? "h1-speed=$speed autoneg off, h2 autoneg on"
- log_test "one side with autoneg off and another with autoneg on"
- log_info "force speed = $speed"
+ check_err $? "ping with h1-speed=$speed autoneg off, h2 autoneg on"
+ log_test "force speed $speed vs. autoneg"
done
ethtool -s $h1 autoneg on
@@ -206,10 +205,9 @@ advertise_subset_of_speeds()
setup_wait_dev_with_timeout $h1
setup_wait_dev_with_timeout $h2
ping_do $h1 192.0.2.2
- check_err $? "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)"
+ check_err $? "ping with h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)"
- log_test "advertise subset of speeds"
- log_info "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise"
+ log_test "advertise $speed_1_to_advertise vs. $speed_2_to_advertise"
done
ethtool -s $h2 autoneg on
@@ -286,8 +284,6 @@ different_speeds_autoneg_on()
ethtool -s $h1 autoneg on
}
-skip_on_veth
-
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh
index 17f89c3b7c..a758444841 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh
@@ -8,7 +8,8 @@ ALL_TESTS="
"
NUM_NETIFS=2
-source lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
source ethtool_lib.sh
TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
@@ -108,8 +109,6 @@ no_cable()
ip link set dev $swp3 down
}
-skip_on_veth
-
setup_prepare
tests_run
diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh
index b9bfb45085..b9bfb45085 100644
--- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh
diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh
index 50d5bfb17e..c301e735c8 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_mm.sh
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh
@@ -14,7 +14,8 @@ ALL_TESTS="
NUM_NETIFS=2
REQUIRE_MZ=no
PREEMPTIBLE_PRIO=0
-source lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
traffic_test()
{
diff --git a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh
index e78776db85..8f60c1685a 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh
@@ -7,7 +7,8 @@ ALL_TESTS="
"
NUM_NETIFS=2
-source lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
ETH_FCS_LEN=4
ETH_HLEN=$((6+6+2))
@@ -43,6 +44,7 @@ bucket_test()
# Mausezahn does not include FCS bytes in its length - but the
# histogram counters do
len=$((len - ETH_FCS_LEN))
+ len=$((len > 0 ? len : 0))
before=$(ethtool --json -S $iface --groups rmon | \
jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh
index 48584a5138..67fafefc80 100755
--- a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
+++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh
@@ -48,7 +48,9 @@ ALL_TESTS="
test_double_enable
"
NUM_NETIFS=4
-source lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/../../../net/forwarding/tc_common.sh
h1_create()
{
@@ -324,17 +326,9 @@ setup_wait
used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info |
jq '.[].info.l3_stats.used')
-kind=$(ip -j -d link show dev $rp1 |
- jq -r '.[].linkinfo.info_kind')
-if [[ $used != true ]]; then
- if [[ $kind == veth ]]; then
- log_test_skip "l3_stats not offloaded on veth interface"
- EXIT_STATUS=$ksft_skip
- else
- RET=1 log_test "l3_stats not offloaded"
- fi
-else
- tests_run
-fi
+[[ $used = true ]]
+check_err $? "hw_stats_info.used=$used"
+log_test "l3_stats offloaded"
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh
index 7594bbb490..a94d92e1ab 100755
--- a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
+++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh
@@ -12,8 +12,10 @@ ALL_TESTS="
test_stats_tx
"
NUM_NETIFS=6
-source lib.sh
-source ipip_lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/../../../net/forwarding/ipip_lib.sh
+source "$lib_dir"/../../../net/forwarding/tc_common.sh
setup_prepare()
{
@@ -99,8 +101,6 @@ test_stats_rx()
test_stats g2a rx
}
-skip_on_veth
-
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
new file mode 100644
index 0000000000..b582885786
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import sys
+from pathlib import Path
+
+KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve()
+
+try:
+ sys.path.append(KSFT_DIR.as_posix())
+ from net.lib.py import *
+ from drivers.net.lib.py import *
+except ModuleNotFoundError as e:
+ ksft_pr("Failed importing `net` library from kernel sources")
+ ksft_pr(str(e))
+ ktap_result(True, comment="SKIP")
+ sys.exit(4)
diff --git a/tools/testing/selftests/net/forwarding/loopback.sh b/tools/testing/selftests/drivers/net/hw/loopback.sh
index 8f4057310b..5acc3ff820 100755
--- a/tools/testing/selftests/net/forwarding/loopback.sh
+++ b/tools/testing/selftests/drivers/net/hw/loopback.sh
@@ -6,8 +6,9 @@ ksft_skip=4
ALL_TESTS="loopback_test"
NUM_NETIFS=2
-source tc_common.sh
-source lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/tc_common.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
h1_create()
{
diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
new file mode 100755
index 0000000000..026d98976c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import time
+import os
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import NetdevFamily, NlError
+from lib.py import NetDrvEpEnv
+from lib.py import cmd, tool, GenerateTraffic
+
+
+def _write_fail_config(config):
+ for key, value in config.items():
+ with open("/sys/kernel/debug/fail_function/" + key, "w") as fp:
+ fp.write(str(value) + "\n")
+
+
+def _enable_pp_allocation_fail():
+ if not os.path.exists("/sys/kernel/debug/fail_function"):
+ raise KsftSkipEx("Kernel built without function error injection (or DebugFS)")
+
+ if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"):
+ with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
+ fp.write("page_pool_alloc_pages\n")
+
+ _write_fail_config({
+ "verbose": 0,
+ "interval": 511,
+ "probability": 100,
+ "times": -1,
+ })
+
+
+def _disable_pp_allocation_fail():
+ if not os.path.exists("/sys/kernel/debug/fail_function"):
+ return
+
+ if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"):
+ with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
+ fp.write("\n")
+
+ _write_fail_config({
+ "probability": 0,
+ "times": 0,
+ })
+
+
+def test_pp_alloc(cfg, netdevnl):
+ def get_stats():
+ return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ def check_traffic_flowing():
+ stat1 = get_stats()
+ time.sleep(1)
+ stat2 = get_stats()
+ if stat2['rx-packets'] - stat1['rx-packets'] < 15000:
+ raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets'])
+
+
+ try:
+ stats = get_stats()
+ except NlError as e:
+ if e.nl_msg.error == -95:
+ stats = {}
+ else:
+ raise
+ if 'rx-alloc-fail' not in stats:
+ raise KsftSkipEx("Driver does not report 'rx-alloc-fail' via qstats")
+
+ set_g = False
+ traffic = None
+ try:
+ traffic = GenerateTraffic(cfg)
+
+ check_traffic_flowing()
+
+ _enable_pp_allocation_fail()
+
+ s1 = get_stats()
+ time.sleep(3)
+ s2 = get_stats()
+
+ if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 1:
+ raise KsftSkipEx("Allocation failures not increasing")
+ if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 100:
+ raise KsftSkipEx("Allocation increasing too slowly", s2['rx-alloc-fail'] - s1['rx-alloc-fail'],
+ "packets:", s2['rx-packets'] - s1['rx-packets'])
+
+ # Basic failures are fine, try to wobble some settings to catch extra failures
+ check_traffic_flowing()
+ g = tool("ethtool", "-g " + cfg.ifname, json=True)[0]
+ if 'rx' in g and g["rx"] * 2 <= g["rx-max"]:
+ new_g = g['rx'] * 2
+ elif 'rx' in g:
+ new_g = g['rx'] // 2
+ else:
+ new_g = None
+
+ if new_g:
+ set_g = cmd(f"ethtool -G {cfg.ifname} rx {new_g}", fail=False).ret == 0
+ if set_g:
+ ksft_pr("ethtool -G change retval: success")
+ else:
+ ksft_pr("ethtool -G change retval: did not succeed", new_g)
+ else:
+ ksft_pr("ethtool -G change retval: did not try")
+
+ time.sleep(0.1)
+ check_traffic_flowing()
+ finally:
+ _disable_pp_allocation_fail()
+ if traffic:
+ traffic.stop()
+ time.sleep(0.1)
+ if set_g:
+ cmd(f"ethtool -G {cfg.ifname} rx {g['rx']}")
+
+
+def main() -> None:
+ netdevnl = NetdevFamily()
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+
+ ksft_run([test_pp_alloc], args=(cfg, netdevnl, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/settings b/tools/testing/selftests/drivers/net/hw/settings
new file mode 100644
index 0000000000..e7b9417537
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py
new file mode 100644
index 0000000000..401e70f7f1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import sys
+from pathlib import Path
+
+KSFT_DIR = (Path(__file__).parent / "../../../..").resolve()
+
+try:
+ sys.path.append(KSFT_DIR.as_posix())
+ from net.lib.py import *
+except ModuleNotFoundError as e:
+ ksft_pr("Failed importing `net` library from kernel sources")
+ ksft_pr(str(e))
+ ktap_result(True, comment="SKIP")
+ sys.exit(4)
+
+from .env import *
+from .load import *
+from .remote import Remote
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
new file mode 100644
index 0000000000..edcedd7bff
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -0,0 +1,224 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+from pathlib import Path
+from lib.py import KsftSkipEx, KsftXfailEx
+from lib.py import cmd, ip
+from lib.py import NetNS, NetdevSimDev
+from .remote import Remote
+
+
+def _load_env_file(src_path):
+ env = os.environ.copy()
+
+ src_dir = Path(src_path).parent.resolve()
+ if not (src_dir / "net.config").exists():
+ return env
+
+ with open((src_dir / "net.config").as_posix(), 'r') as fp:
+ for line in fp.readlines():
+ full_file = line
+ # Strip comments
+ pos = line.find("#")
+ if pos >= 0:
+ line = line[:pos]
+ line = line.strip()
+ if not line:
+ continue
+ pair = line.split('=', maxsplit=1)
+ if len(pair) != 2:
+ raise Exception("Can't parse configuration line:", full_file)
+ env[pair[0]] = pair[1]
+ return env
+
+
+class NetDrvEnv:
+ """
+ Class for a single NIC / host env, with no remote end
+ """
+ def __init__(self, src_path, **kwargs):
+ self._ns = None
+
+ self.env = _load_env_file(src_path)
+
+ if 'NETIF' in self.env:
+ self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0]
+ else:
+ self._ns = NetdevSimDev(**kwargs)
+ self.dev = self._ns.nsims[0].dev
+ self.ifindex = self.dev['ifindex']
+
+ def __enter__(self):
+ ip(f"link set dev {self.dev['ifname']} up")
+
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ """
+ __exit__ gets called at the end of a "with" block.
+ """
+ self.__del__()
+
+ def __del__(self):
+ if self._ns:
+ self._ns.remove()
+ self._ns = None
+
+
+class NetDrvEpEnv:
+ """
+ Class for an environment with a local device and "remote endpoint"
+ which can be used to send traffic in.
+
+ For local testing it creates two network namespaces and a pair
+ of netdevsim devices.
+ """
+
+ # Network prefixes used for local tests
+ nsim_v4_pfx = "192.0.2."
+ nsim_v6_pfx = "2001:db8::"
+
+ def __init__(self, src_path, nsim_test=None):
+
+ self.env = _load_env_file(src_path)
+
+ # Things we try to destroy
+ self.remote = None
+ # These are for local testing state
+ self._netns = None
+ self._ns = None
+ self._ns_peer = None
+
+ if "NETIF" in self.env:
+ if nsim_test is True:
+ raise KsftXfailEx("Test only works on netdevsim")
+ self._check_env()
+
+ self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0]
+
+ self.v4 = self.env.get("LOCAL_V4")
+ self.v6 = self.env.get("LOCAL_V6")
+ self.remote_v4 = self.env.get("REMOTE_V4")
+ self.remote_v6 = self.env.get("REMOTE_V6")
+ kind = self.env["REMOTE_TYPE"]
+ args = self.env["REMOTE_ARGS"]
+ else:
+ if nsim_test is False:
+ raise KsftXfailEx("Test does not work on netdevsim")
+
+ self.create_local()
+
+ self.dev = self._ns.nsims[0].dev
+
+ self.v4 = self.nsim_v4_pfx + "1"
+ self.v6 = self.nsim_v6_pfx + "1"
+ self.remote_v4 = self.nsim_v4_pfx + "2"
+ self.remote_v6 = self.nsim_v6_pfx + "2"
+ kind = "netns"
+ args = self._netns.name
+
+ self.remote = Remote(kind, args, src_path)
+
+ self.addr = self.v6 if self.v6 else self.v4
+ self.remote_addr = self.remote_v6 if self.remote_v6 else self.remote_v4
+
+ self.addr_ipver = "6" if self.v6 else "4"
+ # Bracketed addresses, some commands need IPv6 to be inside []
+ self.baddr = f"[{self.v6}]" if self.v6 else self.v4
+ self.remote_baddr = f"[{self.remote_v6}]" if self.remote_v6 else self.remote_v4
+
+ self.ifname = self.dev['ifname']
+ self.ifindex = self.dev['ifindex']
+
+ self._required_cmd = {}
+
+ def create_local(self):
+ self._netns = NetNS()
+ self._ns = NetdevSimDev()
+ self._ns_peer = NetdevSimDev(ns=self._netns)
+
+ with open("/proc/self/ns/net") as nsfd0, \
+ open("/var/run/netns/" + self._netns.name) as nsfd1:
+ ifi0 = self._ns.nsims[0].ifindex
+ ifi1 = self._ns_peer.nsims[0].ifindex
+ NetdevSimDev.ctrl_write('link_device',
+ f'{nsfd0.fileno()}:{ifi0} {nsfd1.fileno()}:{ifi1}')
+
+ ip(f" addr add dev {self._ns.nsims[0].ifname} {self.nsim_v4_pfx}1/24")
+ ip(f"-6 addr add dev {self._ns.nsims[0].ifname} {self.nsim_v6_pfx}1/64 nodad")
+ ip(f" link set dev {self._ns.nsims[0].ifname} up")
+
+ ip(f" addr add dev {self._ns_peer.nsims[0].ifname} {self.nsim_v4_pfx}2/24", ns=self._netns)
+ ip(f"-6 addr add dev {self._ns_peer.nsims[0].ifname} {self.nsim_v6_pfx}2/64 nodad", ns=self._netns)
+ ip(f" link set dev {self._ns_peer.nsims[0].ifname} up", ns=self._netns)
+
+ def _check_env(self):
+ vars_needed = [
+ ["LOCAL_V4", "LOCAL_V6"],
+ ["REMOTE_V4", "REMOTE_V6"],
+ ["REMOTE_TYPE"],
+ ["REMOTE_ARGS"]
+ ]
+ missing = []
+
+ for choice in vars_needed:
+ for entry in choice:
+ if entry in self.env:
+ break
+ else:
+ missing.append(choice)
+ # Make sure v4 / v6 configs are symmetric
+ if ("LOCAL_V6" in self.env) != ("REMOTE_V6" in self.env):
+ missing.append(["LOCAL_V6", "REMOTE_V6"])
+ if ("LOCAL_V4" in self.env) != ("REMOTE_V4" in self.env):
+ missing.append(["LOCAL_V4", "REMOTE_V4"])
+ if missing:
+ raise Exception("Invalid environment, missing configuration:", missing,
+ "Please see tools/testing/selftests/drivers/net/README.rst")
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ """
+ __exit__ gets called at the end of a "with" block.
+ """
+ self.__del__()
+
+ def __del__(self):
+ if self._ns:
+ self._ns.remove()
+ self._ns = None
+ if self._ns_peer:
+ self._ns_peer.remove()
+ self._ns_peer = None
+ if self._netns:
+ del self._netns
+ self._netns = None
+ if self.remote:
+ del self.remote
+ self.remote = None
+
+ def require_v4(self):
+ if not self.v4 or not self.remote_v4:
+ raise KsftSkipEx("Test requires IPv4 connectivity")
+
+ def require_v6(self):
+ if not self.v6 or not self.remote_v6:
+ raise KsftSkipEx("Test requires IPv6 connectivity")
+
+ def _require_cmd(self, comm, key, host=None):
+ cached = self._required_cmd.get(comm, {})
+ if cached.get(key) is None:
+ cached[key] = cmd("command -v -- " + comm, fail=False,
+ shell=True, host=host).ret == 0
+ self._required_cmd[comm] = cached
+ return cached[key]
+
+ def require_cmd(self, comm, local=True, remote=False):
+ if local:
+ if not self._require_cmd(comm, "local"):
+ raise KsftSkipEx("Test requires command: " + comm)
+ if remote:
+ if not self._require_cmd(comm, "remote"):
+ raise KsftSkipEx("Test requires (remote) command: " + comm)
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
new file mode 100644
index 0000000000..abdb677bdb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import time
+
+from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen
+
+class GenerateTraffic:
+ def __init__(self, env):
+ env.require_cmd("iperf3", remote=True)
+
+ self.env = env
+
+ port = rand_port()
+ self._iperf_server = cmd(f"iperf3 -s -p {port}", background=True)
+ wait_port_listen(port)
+ time.sleep(0.1)
+ self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400",
+ background=True, host=env.remote)
+
+ # Wait for traffic to ramp up
+ pkt = ip("-s link show dev " + env.ifname, json=True)[0]["stats64"]["rx"]["packets"]
+ for _ in range(50):
+ time.sleep(0.1)
+ now = ip("-s link show dev " + env.ifname, json=True)[0]["stats64"]["rx"]["packets"]
+ if now - pkt > 1000:
+ return
+ pkt = now
+ self.stop(verbose=True)
+ raise Exception("iperf3 traffic did not ramp up")
+
+ def stop(self, verbose=None):
+ self._iperf_client.process(terminate=True)
+ if verbose:
+ ksft_pr(">> Client:")
+ ksft_pr(self._iperf_client.stdout)
+ ksft_pr(self._iperf_client.stderr)
+ self._iperf_server.process(terminate=True)
+ if verbose:
+ ksft_pr(">> Server:")
+ ksft_pr(self._iperf_server.stdout)
+ ksft_pr(self._iperf_server.stderr)
diff --git a/tools/testing/selftests/drivers/net/lib/py/remote.py b/tools/testing/selftests/drivers/net/lib/py/remote.py
new file mode 100644
index 0000000000..b1780b9877
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/remote.py
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import importlib
+
+_modules = {}
+
+def Remote(kind, args, src_path):
+ global _modules
+
+ if kind not in _modules:
+ _modules[kind] = importlib.import_module("..remote_" + kind, __name__)
+
+ dir_path = os.path.abspath(src_path + "/../")
+ return getattr(_modules[kind], "Remote")(args, dir_path)
diff --git a/tools/testing/selftests/drivers/net/lib/py/remote_netns.py b/tools/testing/selftests/drivers/net/lib/py/remote_netns.py
new file mode 100644
index 0000000000..7d5eeb0271
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/remote_netns.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import subprocess
+
+from lib.py import cmd
+
+
+class Remote:
+ def __init__(self, name, dir_path):
+ self.name = name
+ self.dir_path = dir_path
+
+ def cmd(self, comm):
+ return subprocess.Popen(["ip", "netns", "exec", self.name, "bash", "-c", comm],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+ def deploy(self, what):
+ if os.path.isabs(what):
+ return what
+ return os.path.abspath(self.dir_path + "/" + what)
diff --git a/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py b/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py
new file mode 100644
index 0000000000..924addde19
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import string
+import subprocess
+import random
+
+from lib.py import cmd
+
+
+class Remote:
+ def __init__(self, name, dir_path):
+ self.name = name
+ self.dir_path = dir_path
+ self._tmpdir = None
+
+ def __del__(self):
+ if self._tmpdir:
+ cmd("rm -rf " + self._tmpdir, host=self)
+ self._tmpdir = None
+
+ def cmd(self, comm):
+ return subprocess.Popen(["ssh", "-q", self.name, comm],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+ def _mktmp(self):
+ return ''.join(random.choice(string.ascii_lowercase) for _ in range(8))
+
+ def deploy(self, what):
+ if not self._tmpdir:
+ self._tmpdir = "/tmp/" + self._mktmp()
+ cmd("mkdir " + self._tmpdir, host=self)
+ file_name = self._tmpdir + "/" + self._mktmp() + os.path.basename(what)
+
+ if not os.path.isabs(what):
+ what = os.path.abspath(self.dir_path + "/" + what)
+
+ cmd(f"scp {what} {self.name}:{file_name}")
+ return file_name
diff --git a/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh b/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh
new file mode 100755
index 0000000000..82be5d0133
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh
@@ -0,0 +1,668 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2024 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
+
+# The script is adopted to work with the Microchip KSZ switch driver.
+
+ETH_FCS_LEN=4
+
+WAIT_TIME=1
+NUM_NETIFS=4
+REQUIRE_JQ="yes"
+REQUIRE_MZ="yes"
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+require_command dcb
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# On h1_ and h2_create do not set IP addresses to avoid interaction with the
+# system, to keep packet counters clean.
+h1_create()
+{
+ simple_if_init $h1
+ sysctl_set net.ipv6.conf.${h1}.disable_ipv6 1
+ # Get the MAC address of the interface to use it with mausezahn
+ h1_mac=$(ip -j link show dev ${h1} | jq -e '.[].address')
+}
+
+h1_destroy()
+{
+ sysctl_restore net.ipv6.conf.${h1}.disable_ipv6
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ sysctl_set net.ipv6.conf.${h2}.disable_ipv6 1
+ h2_mac=$(ip -j link show dev ${h2} | jq -e '.[].address')
+}
+
+h2_destroy()
+{
+ sysctl_restore net.ipv6.conf.${h2}.disable_ipv6
+ simple_if_fini $h2
+}
+
+switch_create()
+{
+ ip link set ${swp1} up
+ ip link set ${swp2} up
+ sysctl_set net.ipv6.conf.${swp1}.disable_ipv6 1
+ sysctl_set net.ipv6.conf.${swp2}.disable_ipv6 1
+
+ # Ports should trust VLAN PCP even with vlan_filtering=0
+ ip link add br0 type bridge
+ ip link set ${swp1} master br0
+ ip link set ${swp2} master br0
+ ip link set br0 up
+ sysctl_set net.ipv6.conf.br0.disable_ipv6 1
+}
+
+switch_destroy()
+{
+ sysctl_restore net.ipv6.conf.${swp2}.disable_ipv6
+ sysctl_restore net.ipv6.conf.${swp1}.disable_ipv6
+
+ ip link del br0
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+ switch_destroy
+
+ vrf_cleanup
+}
+
+set_apptrust_order()
+{
+ local if_name=$1
+ local order=$2
+
+ dcb apptrust set dev ${if_name} order ${order}
+}
+
+# Function to extract a specified field from a given JSON stats string
+extract_network_stat() {
+ local stats_json=$1
+ local field_name=$2
+
+ echo $(echo "$stats_json" | jq -r "$field_name")
+}
+
+run_test()
+{
+ local test_name=$1;
+ local apptrust_order=$2;
+ local port_prio=$3;
+ local dscp_ipv=$4;
+ local dscp=$5;
+ local have_vlan=$6;
+ local pcp_ipv=$7;
+ local vlan_pcp=$8;
+ local ip_v6=$9
+
+ local rx_ipv
+ local tx_ipv
+
+ RET=0
+
+ # Send some packet to populate the switch MAC table
+ $MZ ${h2} -a ${h2_mac} -b ${h1_mac} -p 64 -t icmp echores -c 1
+
+ # Based on the apptrust order, set the expected Internal Priority values
+ # for the RX and TX paths.
+ if [ "${apptrust_order}" == "" ]; then
+ echo "Apptrust order not set."
+ rx_ipv=${port_prio}
+ tx_ipv=${port_prio}
+ elif [ "${apptrust_order}" == "dscp" ]; then
+ echo "Apptrust order is DSCP."
+ rx_ipv=${dscp_ipv}
+ tx_ipv=${dscp_ipv}
+ elif [ "${apptrust_order}" == "pcp" ]; then
+ echo "Apptrust order is PCP."
+ rx_ipv=${pcp_ipv}
+ tx_ipv=${pcp_ipv}
+ elif [ "${apptrust_order}" == "pcp dscp" ]; then
+ echo "Apptrust order is PCP DSCP."
+ if [ ${have_vlan} -eq 1 ]; then
+ rx_ipv=$((dscp_ipv > pcp_ipv ? dscp_ipv : pcp_ipv))
+ tx_ipv=${pcp_ipv}
+ else
+ rx_ipv=${dscp_ipv}
+ tx_ipv=${dscp_ipv}
+ fi
+ else
+ RET=1
+ echo "Error: Unknown apptrust order ${apptrust_order}"
+ log_test "${test_name}"
+ return
+ fi
+
+ # Most/all? of the KSZ switches do not provide per-TC counters. There
+ # are only tx_hi and rx_hi counters, which are used to count packets
+ # which are considered as high priority and most likely not assigned
+ # to the queue 0.
+ # On the ingress path, packets seem to get high priority status
+ # independently of the DSCP or PCP global mapping. On the egress path,
+ # the high priority status is assigned based on the DSCP or PCP global
+ # map configuration.
+ # The thresholds for the high priority status are not documented, but
+ # it seems that the switch considers packets as high priority on the
+ # ingress path if detected Internal Priority is greater than 0. On the
+ # egress path, the switch considers packets as high priority if
+ # detected Internal Priority is greater than 1.
+ if [ ${rx_ipv} -ge 1 ]; then
+ local expect_rx_high_prio=1
+ else
+ local expect_rx_high_prio=0
+ fi
+
+ if [ ${tx_ipv} -ge 2 ]; then
+ local expect_tx_high_prio=1
+ else
+ local expect_tx_high_prio=0
+ fi
+
+ # Use ip tool to get the current switch packet counters. ethool stats
+ # need to be recalculated to get the correct values.
+ local swp1_stats=$(ip -s -j link show dev ${swp1})
+ local swp2_stats=$(ip -s -j link show dev ${swp2})
+ local swp1_rx_packets_before=$(extract_network_stat "$swp1_stats" \
+ '.[0].stats64.rx.packets')
+ local swp1_rx_bytes_before=$(extract_network_stat "$swp1_stats" \
+ '.[0].stats64.rx.bytes')
+ local swp2_tx_packets_before=$(extract_network_stat "$swp2_stats" \
+ '.[0].stats64.tx.packets')
+ local swp2_tx_bytes_before=$(extract_network_stat "$swp2_stats" \
+ '.[0].stats64.tx.bytes')
+ local swp1_rx_hi_before=$(ethtool_stats_get ${swp1} "rx_hi")
+ local swp2_tx_hi_before=$(ethtool_stats_get ${swp2} "tx_hi")
+
+ # Assamble the mausezahn command based on the test parameters
+ # For the testis with ipv4 or ipv6, use icmp response packets,
+ # to avoid interaction with the system, to keep packet counters
+ # clean.
+ if [ ${ip_v6} -eq 0 ]; then
+ local ip="-a ${h1_mac} -b ${h2_mac} -A ${H1_IPV4} \
+ -B ${H2_IPV4} -t icmp unreach,code=1,dscp=${dscp}"
+ else
+ local ip="-6 -a ${h1_mac} -b ${h2_mac} -A ${H1_IPV6} \
+ -B ${H2_IPV6} -t icmp6 type=1,code=0,dscp=${dscp}"
+ fi
+
+ if [ ${have_vlan} -eq 1 ]; then
+ local vlan_pcp_opt="-Q ${vlan_pcp}:0"
+ else
+ local vlan_pcp_opt=""
+ fi
+ $MZ ${h1} ${ip} -c ${PING_COUNT} -d 10msec ${vlan_pcp_opt}
+
+ # Wait until the switch packet counters are updated
+ sleep 6
+
+ local swp1_stats=$(ip -s -j link show dev ${swp1})
+ local swp2_stats=$(ip -s -j link show dev ${swp2})
+
+ local swp1_rx_packets_after=$(extract_network_stat "$swp1_stats" \
+ '.[0].stats64.rx.packets')
+ local swp1_rx_bytes_after=$(extract_network_stat "$swp1_stats" \
+ '.[0].stats64.rx.bytes')
+ local swp2_tx_packets_after=$(extract_network_stat "$swp2_stats" \
+ '.[0].stats64.tx.packets')
+ local swp2_tx_bytes_after=$(extract_network_stat "$swp2_stats" \
+ '.[0].stats64.tx.bytes')
+
+ local swp1_rx_packets_diff=$((${swp1_rx_packets_after} - \
+ ${swp1_rx_packets_before}))
+ local swp2_tx_packets_diff=$((${swp2_tx_packets_after} - \
+ ${swp2_tx_packets_before}))
+
+ local swp1_rx_hi_after=$(ethtool_stats_get ${swp1} "rx_hi")
+ local swp2_tx_hi_after=$(ethtool_stats_get ${swp2} "tx_hi")
+
+ # Test if any packets were received on swp1, we will rx before and after
+ if [ ${swp1_rx_packets_diff} -lt ${PING_COUNT} ]; then
+ echo "Not expected amount of received packets on ${swp1}"
+ echo "before ${swp1_rx_packets_before} after ${swp1_rx_packets_after}"
+ RET=1
+ fi
+
+ # Test if any packets were transmitted on swp2, we will tx before and after
+ if [ ${swp2_tx_packets_diff} -lt ${PING_COUNT} ]; then
+ echo "Not expected amount of transmitted packets on ${swp2}"
+ echo "before ${swp2_tx_packets_before} after ${swp2_tx_packets_after}"
+ RET=1
+ fi
+
+ # tx/rx_hi counted in bytes. So, we need to compare the difference in bytes
+ local swp1_rx_bytes_diff=$(($swp1_rx_bytes_after - $swp1_rx_bytes_before))
+ local swp2_tx_bytes_diff=$(($swp2_tx_bytes_after - $swp2_tx_bytes_before))
+ local swp1_rx_hi_diff=$(($swp1_rx_hi_after - $swp1_rx_hi_before))
+ local swp2_tx_hi_diff=$(($swp2_tx_hi_after - $swp2_tx_hi_before))
+
+ if [ ${expect_rx_high_prio} -eq 1 ]; then
+ swp1_rx_hi_diff=$((${swp1_rx_hi_diff} - \
+ ${swp1_rx_packets_diff} * ${ETH_FCS_LEN}))
+ if [ ${swp1_rx_hi_diff} -ne ${swp1_rx_bytes_diff} ]; then
+ echo "Not expected amount of high priority packets received on ${swp1}"
+ echo "RX hi diff: ${swp1_rx_hi_diff}, expected RX bytes diff: ${swp1_rx_bytes_diff}"
+ RET=1
+ fi
+ else
+ if [ ${swp1_rx_hi_diff} -ne 0 ]; then
+ echo "Unexpected amount of high priority packets received on ${swp1}"
+ echo "RX hi diff: ${swp1_rx_hi_diff}, expected 0"
+ RET=1
+ fi
+ fi
+
+ if [ ${expect_tx_high_prio} -eq 1 ]; then
+ swp2_tx_hi_diff=$((${swp2_tx_hi_diff} - \
+ ${swp2_tx_packets_diff} * ${ETH_FCS_LEN}))
+ if [ ${swp2_tx_hi_diff} -ne ${swp2_tx_bytes_diff} ]; then
+ echo "Not expected amount of high priority packets transmitted on ${swp2}"
+ echo "TX hi diff: ${swp2_tx_hi_diff}, expected TX bytes diff: ${swp2_tx_bytes_diff}"
+ RET=1
+ fi
+ else
+ if [ ${swp2_tx_hi_diff} -ne 0 ]; then
+ echo "Unexpected amount of high priority packets transmitted on ${swp2}"
+ echo "TX hi diff: ${swp2_tx_hi_diff}, expected 0"
+ RET=1
+ fi
+ fi
+
+ log_test "${test_name}"
+}
+
+run_test_dscp()
+{
+ # IPv4 test
+ run_test "$1" "$2" "$3" "$4" "$5" 0 0 0 0
+ # IPv6 test
+ run_test "$1" "$2" "$3" "$4" "$5" 0 0 0 1
+}
+
+run_test_dscp_pcp()
+{
+ # IPv4 test
+ run_test "$1" "$2" "$3" "$4" "$5" 1 "$6" "$7" 0
+ # IPv6 test
+ run_test "$1" "$2" "$3" "$4" "$5" 1 "$6" "$7" 1
+}
+
+port_default_prio_get()
+{
+ local if_name=$1
+ local prio
+
+ prio="$(dcb -j app show dev ${if_name} default-prio | \
+ jq '.default_prio[]')"
+ if [ -z "${prio}" ]; then
+ prio=0
+ fi
+
+ echo ${prio}
+}
+
+test_port_default()
+{
+ local orig_apptrust=$(port_get_default_apptrust ${swp1})
+ local orig_prio=$(port_default_prio_get ${swp1})
+ local apptrust_order=""
+
+ RET=0
+
+ # Make sure no other priority sources will interfere with the test
+ set_apptrust_order ${swp1} "${apptrust_order}"
+
+ for val in $(seq 0 7); do
+ dcb app replace dev ${swp1} default-prio ${val}
+ if [ $val -ne $(port_default_prio_get ${swp1}) ]; then
+ RET=1
+ break
+ fi
+
+ run_test_dscp "Port-default QoS classification, prio: ${val}" \
+ "${apptrust_order}" ${val} 0 0
+ done
+
+ set_apptrust_order ${swp1} "${orig_apptrust}"
+ if [[ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]]; then
+ RET=1
+ fi
+
+ dcb app replace dev ${swp1} default-prio ${orig_prio}
+ if [ $orig_prio -ne $(port_default_prio_get ${swp1}) ]; then
+ RET=1
+ fi
+
+ log_test "Port-default QoS classification"
+}
+
+port_get_default_apptrust()
+{
+ local if_name=$1
+
+ dcb -j apptrust show dev ${if_name} | jq -r '.order[]' | \
+ tr '\n' ' ' | xargs
+}
+
+test_port_apptrust()
+{
+ local original_dscp_prios_swp1=$(get_dscp_prios ${swp1})
+ local orig_apptrust=$(port_get_default_apptrust ${swp1})
+ local orig_port_prio=$(port_default_prio_get ${swp1})
+ local order_variants=("pcp dscp" "dscp" "pcp")
+ local apptrust_order
+ local port_prio
+ local dscp_prio
+ local pcp_prio
+ local dscp
+ local pcp
+
+ RET=0
+
+ # First, test if apptrust configuration as taken by the kernel
+ for order in "${order_variants[@]}"; do
+ set_apptrust_order ${swp1} "${order}"
+ if [[ "$order" != "$(port_get_default_apptrust ${swp1})" ]]; then
+ RET=1
+ break
+ fi
+ done
+
+ log_test "Apptrust, supported variants"
+
+ # To test if the apptrust configuration is working as expected, we need
+ # to set DSCP priorities for the switch port.
+ init_dscp_prios "${swp1}" "${original_dscp_prios_swp1}"
+
+ # Start with a simple test where all apptrust sources are disabled
+ # default port priority is 0, DSCP priority is mapped to 7.
+ # No high priority packets should be received or transmitted.
+ port_prio=0
+ dscp_prio=7
+ dscp=4
+
+ dcb app replace dev ${swp1} default-prio ${port_prio}
+ dcb app replace dev ${swp1} dscp-prio ${dscp}:${dscp_prio}
+
+ apptrust_order=""
+ set_apptrust_order ${swp1} "${apptrust_order}"
+ # Test with apptrust sources disabled, Packets should get port default
+ # priority which is 0
+ run_test_dscp "Apptrust, all disabled. DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+ apptrust_order="pcp"
+ set_apptrust_order ${swp1} "${apptrust_order}"
+ # If PCP is enabled, packets should get PCP priority, which is not
+ # set in this test (no VLAN tags are present in the packet). No high
+ # priority packets should be received or transmitted.
+ run_test_dscp "Apptrust, PCP enabled. DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+ apptrust_order="dscp"
+ set_apptrust_order ${swp1} "${apptrust_order}"
+ # If DSCP is enabled, packets should get DSCP priority which is set to 7
+ # in this test. High priority packets should be received and transmitted.
+ run_test_dscp "Apptrust, DSCP enabled. DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+ apptrust_order="pcp dscp"
+ set_apptrust_order ${swp1} "${apptrust_order}"
+ # If PCP and DSCP are enabled, PCP would have higher apptrust priority
+ # so packets should get PCP priority. But in this test VLAN PCP is not
+ # set, so it should get DSCP priority which is set to 7. High priority
+ # packets should be received and transmitted.
+ run_test_dscp "Apptrust, PCP and DSCP are enabled. DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+ # If VLAN PCP is set, it should have higher apptrust priority than DSCP
+ # so packets should get VLAN PCP priority. Send packets with VLAN PCP
+ # set to 0, DSCP set to 7. Packets should get VLAN PCP priority.
+ # No high priority packets should be transmitted. Due to nature of the
+ # switch, high priority packets will be received.
+ pcp_prio=0
+ pcp=0
+ run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+
+ # If VLAN PCP is set to 7, it should have higher apptrust priority than
+ # DSCP so packets should get VLAN PCP priority. Send packets with VLAN
+ # PCP set to 7, DSCP set to 7. Packets should get VLAN PCP priority.
+ # High priority packets should be received and transmitted.
+ pcp_prio=7
+ pcp=7
+ run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+ # Now make sure that the switch is able to handle the case where DSCP
+ # priority is set to 0 and PCP priority is set to 7. Packets should get
+ # PCP priority. High priority packets should be received and transmitted.
+ dscp_prio=0
+ dcb app replace dev ${swp1} dscp-prio ${dscp}:${dscp_prio}
+ run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+ # If both VLAN PCP and DSCP are set to 0, packets should get 0 priority.
+ # No high priority packets should be received or transmitted.
+ pcp_prio=0
+ pcp=0
+ run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+
+ # Restore original priorities
+ if ! restore_priorities "${swp1}" "${original_dscp_prios_swp1}"; then
+ RET=1
+ fi
+
+ set_apptrust_order ${swp1} "${orig_apptrust}"
+ if [ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]; then
+ RET=1
+ fi
+
+ dcb app replace dev ${swp1} default-prio ${orig_port_prio}
+ if [ $orig_port_prio -ne $(port_default_prio_get ${swp1}) ]; then
+ RET=1
+ fi
+
+ log_test "Apptrust, restore original settings"
+}
+
+# Function to get current DSCP priorities
+get_dscp_prios() {
+ local if_name=$1
+ dcb -j app show dev ${if_name} | jq -c '.dscp_prio'
+}
+
+# Function to set a specific DSCP priority on a device
+replace_dscp_prio() {
+ local if_name=$1
+ local dscp=$2
+ local prio=$3
+ dcb app replace dev ${if_name} dscp-prio ${dscp}:${prio}
+}
+
+# Function to compare DSCP maps
+compare_dscp_maps() {
+ local old_json=$1
+ local new_json=$2
+ local dscp=$3
+ local prio=$4
+
+ # Create a modified old_json with the expected change for comparison
+ local modified_old_json=$(echo "$old_json" |
+ jq --argjson dscp $dscp --argjson prio $prio \
+ 'map(if .[0] == $dscp then [$dscp, $prio] else . end)' |
+ tr -d " \n")
+
+ # Compare new_json with the modified_old_json
+ if [[ "$modified_old_json" == "$new_json" ]]; then
+ return 0
+ else
+ return 1
+ fi
+}
+
+# Function to set DSCP priorities
+set_and_verify_dscp() {
+ local port=$1
+ local dscp=$2
+ local new_prio=$3
+
+ local old_prios=$(get_dscp_prios $port)
+
+ replace_dscp_prio "$port" $dscp $new_prio
+
+ # Fetch current settings and compare
+ local current_prios=$(get_dscp_prios $port)
+ if ! compare_dscp_maps "$old_prios" "$current_prios" $dscp $new_prio; then
+ echo "Error: Unintended changes detected in DSCP map for $port after setting DSCP $dscp to $new_prio."
+ return 1
+ fi
+ return 0
+}
+
+# Function to restore original priorities
+restore_priorities() {
+ local port=$1
+ local original_prios=$2
+
+ echo "Removing test artifacts for $port"
+ local current_prios=$(get_dscp_prios $port)
+ local prio_str=$(echo "$current_prios" |
+ jq -r 'map("\(.[0]):\(.[1])") | join(" ")')
+ dcb app del dev $port dscp-prio $prio_str
+
+ echo "Restoring original DSCP priorities for $port"
+ local restore_str=$(echo "$original_prios" |
+ jq -r 'map("\(.[0]):\(.[1])") | join(" ")')
+ dcb app add dev $port dscp-prio $restore_str
+
+ local current_prios=$(get_dscp_prios $port)
+ if [[ "$original_prios" != "$current_prios" ]]; then
+ echo "Error: Failed to restore original DSCP priorities for $port"
+ return 1
+ fi
+ return 0
+}
+
+# Initialize DSCP priorities. Set them to predictable values for testing.
+init_dscp_prios() {
+ local port=$1
+ local original_prios=$2
+
+ echo "Removing any existing DSCP priority mappins for $port"
+ local prio_str=$(echo "$original_prios" |
+ jq -r 'map("\(.[0]):\(.[1])") | join(" ")')
+ dcb app del dev $port dscp-prio $prio_str
+
+ # Initialize DSCP priorities list
+ local dscp_prios=""
+ for dscp in {0..63}; do
+ dscp_prios+=("$dscp:0")
+ done
+
+ echo "Setting initial DSCP priorities map to 0 for $port"
+ dcb app add dev $port dscp-prio ${dscp_prios[@]}
+}
+
+# Main function to test global DSCP map across specified ports
+test_global_dscp_map() {
+ local ports=("$swp1" "$swp2")
+ local original_dscp_prios_port0=$(get_dscp_prios ${ports[0]})
+ local orig_apptrust=$(port_get_default_apptrust ${swp1})
+ local orig_port_prio=$(port_default_prio_get ${swp1})
+ local apptrust_order="dscp"
+ local port_prio=0
+ local dscp_prio
+ local dscp
+
+ RET=0
+
+ set_apptrust_order ${swp1} "${apptrust_order}"
+ dcb app replace dev ${swp1} default-prio ${port_prio}
+
+ # Initialize DSCP priorities
+ init_dscp_prios "${ports[0]}" "$original_dscp_prios_port0"
+
+ # Loop over each DSCP index
+ for dscp in {0..63}; do
+ # and test each Internal Priority value
+ for dscp_prio in {0..7}; do
+ # do it for each port. This is to test if the global DSCP map
+ # is accessible from all ports.
+ for port in "${ports[@]}"; do
+ if ! set_and_verify_dscp "$port" $dscp $dscp_prio; then
+ RET=1
+ fi
+ done
+
+ # Test if the DSCP priority is correctly applied to the packets
+ run_test_dscp "DSCP (${dscp}) QoS classification, prio: ${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+ if [ ${RET} -eq 1 ]; then
+ break
+ fi
+ done
+ done
+
+ # Restore original priorities
+ if ! restore_priorities "${ports[0]}" "${original_dscp_prios_port0}"; then
+ RET=1
+ fi
+
+ set_apptrust_order ${swp1} "${orig_apptrust}"
+ if [[ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]]; then
+ RET=1
+ fi
+
+ dcb app replace dev ${swp1} default-prio ${orig_port_prio}
+ if [ $orig_port_prio -ne $(port_default_prio_get ${swp1}) ]; then
+ RET=1
+ fi
+
+ log_test "DSCP global map"
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+ test_port_default
+ test_port_apptrust
+ test_global_dscp_map
+"
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
index 91891b9418..877cd6df94 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
@@ -24,8 +24,8 @@ setup_prepare()
busywait "$TIMEOUT" wait_for_port_up ethtool $swp2
check_err $? "ports did not come up"
- local lanes_exist=$(ethtool $swp1 | grep 'Lanes:')
- if [[ -z $lanes_exist ]]; then
+ busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:"
+ if [[ $? -ne 0 ]]; then
log_test "SKIP: driver does not support lanes setting"
exit 1
fi
@@ -122,8 +122,9 @@ autoneg()
ethtool_set $swp1 speed $max_speed lanes $lanes
ip link set dev $swp1 up
ip link set dev $swp2 up
- busywait "$TIMEOUT" wait_for_port_up ethtool $swp2
- check_err $? "ports did not come up"
+
+ busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:"
+ check_err $? "Lanes parameter is not presented on time"
check_lanes $swp1 $lanes $max_speed
log_test "$lanes lanes is autonegotiated"
@@ -160,8 +161,9 @@ autoneg_force_mode()
ethtool_set $swp2 speed $max_speed lanes $lanes autoneg off
ip link set dev $swp1 up
ip link set dev $swp2 up
- busywait "$TIMEOUT" wait_for_port_up ethtool $swp2
- check_err $? "ports did not come up"
+
+ busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:"
+ check_err $? "Lanes parameter is not presented on time"
check_lanes $swp1 $lanes $max_speed
log_test "Autoneg off, $lanes lanes detected during force mode"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
index 31252bc877..4994bea5da 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
@@ -11,7 +11,7 @@ ALL_TESTS="single_mask_test identical_filters_test two_masks_test \
multiple_masks_test ctcam_edge_cases_test delta_simple_test \
delta_two_masks_one_key_test delta_simple_rehash_test \
bloom_simple_test bloom_complex_test bloom_delta_test \
- max_erp_entries_test max_group_size_test"
+ max_erp_entries_test max_group_size_test collision_test"
NUM_NETIFS=2
source $lib_dir/lib.sh
source $lib_dir/tc_common.sh
@@ -457,7 +457,7 @@ delta_two_masks_one_key_test()
{
# If 2 keys are the same and only differ in mask in a way that
# they belong under the same ERP (second is delta of the first),
- # there should be no C-TCAM spill.
+ # there should be C-TCAM spill.
RET=0
@@ -474,8 +474,8 @@ delta_two_masks_one_key_test()
tp_record "mlxsw:*" "tc filter add dev $h2 ingress protocol ip \
pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \
action drop"
- tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 0
- check_err $? "incorrect C-TCAM spill while inserting the second rule"
+ tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 1
+ check_err $? "C-TCAM spill did not happen while inserting the second rule"
$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
-t ip -q
@@ -1087,6 +1087,53 @@ max_group_size_test()
log_test "max ACL group size test ($tcflags). max size $max_size"
}
+collision_test()
+{
+ # Filters cannot share an eRP if in the common unmasked part (i.e.,
+ # without the delta bits) they have the same values. If the driver does
+ # not prevent such configuration (by spilling into the C-TCAM), then
+ # multiple entries will be present in the device with the same key,
+ # leading to collisions and a reduced scale.
+ #
+ # Create such a scenario and make sure all the filters are successfully
+ # added.
+
+ RET=0
+
+ local ret
+
+ if [[ "$tcflags" != "skip_sw" ]]; then
+ return 0;
+ fi
+
+ # Add a single dst_ip/24 filter and multiple dst_ip/32 filters that all
+ # have the same values in the common unmasked part (dst_ip/24).
+
+ tc filter add dev $h2 ingress pref 1 proto ipv4 handle 101 \
+ flower $tcflags dst_ip 198.51.100.0/24 \
+ action drop
+
+ for i in {0..255}; do
+ tc filter add dev $h2 ingress pref 2 proto ipv4 \
+ handle $((102 + i)) \
+ flower $tcflags dst_ip 198.51.100.${i}/32 \
+ action drop
+ ret=$?
+ [[ $ret -ne 0 ]] && break
+ done
+
+ check_err $ret "failed to add all the filters"
+
+ for i in {255..0}; do
+ tc filter del dev $h2 ingress pref 2 proto ipv4 \
+ handle $((102 + i)) flower
+ done
+
+ tc filter del dev $h2 ingress pref 1 proto ipv4 handle 101 flower
+
+ log_test "collision test ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
new file mode 100755
index 0000000000..eb83e7b487
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq
+from lib.py import NetDrvEpEnv
+from lib.py import bkg, cmd, wait_port_listen, rand_port
+
+
+def test_v4(cfg) -> None:
+ cfg.require_v4()
+
+ cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}")
+ cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote)
+
+
+def test_v6(cfg) -> None:
+ cfg.require_v6()
+
+ cmd(f"ping -c 1 -W0.5 {cfg.remote_v6}")
+ cmd(f"ping -c 1 -W0.5 {cfg.v6}", host=cfg.remote)
+
+
+def test_tcp(cfg) -> None:
+ cfg.require_cmd("socat", remote=True)
+
+ port = rand_port()
+ listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT"
+
+ with bkg(listen_cmd, exit_wait=True) as nc:
+ wait_port_listen(port)
+
+ cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}",
+ shell=True, host=cfg.remote)
+ ksft_eq(nc.stdout.strip(), "ping")
+
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc:
+ wait_port_listen(port, host=cfg.remote)
+
+ cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
+ ksft_eq(nc.stdout.strip(), "ping")
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__) as cfg:
+ ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
new file mode 100755
index 0000000000..30f29096e2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit, ksft_eq, KsftSkipEx
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import NetDrvEnv
+from lib.py import cmd
+import glob
+
+
+def sys_get_queues(ifname) -> int:
+ folders = glob.glob(f'/sys/class/net/{ifname}/queues/rx-*')
+ return len(folders)
+
+
+def nl_get_queues(cfg, nl):
+ queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
+ if queues:
+ return len([q for q in queues if q['type'] == 'rx'])
+ return None
+
+
+def get_queues(cfg, nl) -> None:
+ queues = nl_get_queues(cfg, nl)
+ if not queues:
+ raise KsftSkipEx('queue-get not supported by device')
+
+ expected = sys_get_queues(cfg.dev['ifname'])
+ ksft_eq(queues, expected)
+
+
+def addremove_queues(cfg, nl) -> None:
+ queues = nl_get_queues(cfg, nl)
+ if not queues:
+ raise KsftSkipEx('queue-get not supported by device')
+
+ curr_queues = sys_get_queues(cfg.dev['ifname'])
+ if curr_queues == 1:
+ raise KsftSkipEx('cannot decrement queue: already at 1')
+
+ netnl = EthtoolFamily()
+ channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ if channels['combined-count'] == 0:
+ rx_type = 'rx'
+ else:
+ rx_type = 'combined'
+
+ expected = curr_queues - 1
+ cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10)
+ queues = nl_get_queues(cfg, nl)
+ ksft_eq(queues, expected)
+
+ expected = curr_queues
+ cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10)
+ queues = nl_get_queues(cfg, nl)
+ ksft_eq(queues, expected)
+
+
+def main() -> None:
+ with NetDrvEnv(__file__, queue_count=3) as cfg:
+ ksft_run([get_queues, addremove_queues], args=(cfg, NetdevFamily()))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
new file mode 100755
index 0000000000..820b8e0a22
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx
+from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
+from lib.py import NetDrvEnv
+
+ethnl = EthtoolFamily()
+netfam = NetdevFamily()
+rtnl = RtnlFamily()
+
+
+def check_pause(cfg) -> None:
+ global ethnl
+
+ try:
+ ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
+ except NlError as e:
+ if e.error == 95:
+ raise KsftXfailEx("pause not supported by the device")
+ raise
+
+ data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
+ "flags": {'stats'}}})
+ ksft_true(data['stats'], "driver does not report stats")
+
+
+def check_fec(cfg) -> None:
+ global ethnl
+
+ try:
+ ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
+ except NlError as e:
+ if e.error == 95:
+ raise KsftXfailEx("FEC not supported by the device")
+ raise
+
+ data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
+ "flags": {'stats'}}})
+ ksft_true(data['stats'], "driver does not report stats")
+
+
+def pkt_byte_sum(cfg) -> None:
+ global netfam, rtnl
+
+ def get_qstat(test):
+ global netfam
+ stats = netfam.qstats_get({}, dump=True)
+ if stats:
+ for qs in stats:
+ if qs["ifindex"]== test.ifindex:
+ return qs
+
+ qstat = get_qstat(cfg)
+ if qstat is None:
+ raise KsftSkipEx("qstats not supported by the device")
+
+ for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
+ ksft_in(key, qstat, "Drivers should always report basic keys")
+
+ # Compare stats, rtnl stats and qstats must match,
+ # but the interface may be up, so do a series of dumps
+ # each time the more "recent" stats must be higher or same.
+ def stat_cmp(rstat, qstat):
+ for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
+ if rstat[key] != qstat[key]:
+ return rstat[key] - qstat[key]
+ return 0
+
+ for _ in range(10):
+ rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+ if stat_cmp(rtstat, qstat) < 0:
+ raise Exception("RTNL stats are lower, fetched later")
+ qstat = get_qstat(cfg)
+ if stat_cmp(rtstat, qstat) > 0:
+ raise Exception("Qstats are lower, fetched later")
+
+
+def qstat_by_ifindex(cfg) -> None:
+ global netfam
+ global rtnl
+
+ # Construct a map ifindex -> [dump, by-index, dump]
+ ifindexes = {}
+ stats = netfam.qstats_get({}, dump=True)
+ for entry in stats:
+ ifindexes[entry['ifindex']] = [entry, None, None]
+
+ for ifindex in ifindexes.keys():
+ entry = netfam.qstats_get({"ifindex": ifindex}, dump=True)
+ ksft_eq(len(entry), 1)
+ ifindexes[entry[0]['ifindex']][1] = entry[0]
+
+ stats = netfam.qstats_get({}, dump=True)
+ for entry in stats:
+ ifindexes[entry['ifindex']][2] = entry
+
+ if len(ifindexes) == 0:
+ raise KsftSkipEx("No ifindex supports qstats")
+
+ # Now make sure the stats match/make sense
+ for ifindex, triple in ifindexes.items():
+ all_keys = triple[0].keys() | triple[1].keys() | triple[2].keys()
+
+ for key in all_keys:
+ ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key)
+ ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key)
+
+ # Test invalid dumps
+ # 0 is invalid
+ with ksft_raises(NlError) as cm:
+ netfam.qstats_get({"ifindex": 0}, dump=True)
+ ksft_eq(cm.exception.nl_msg.error, -34)
+ ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+
+ # loopback has no stats
+ with ksft_raises(NlError) as cm:
+ netfam.qstats_get({"ifindex": 1}, dump=True)
+ ksft_eq(cm.exception.nl_msg.error, -95)
+ ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+
+ # Try to get stats for lowest unused ifindex but not 0
+ devs = rtnl.getlink({}, dump=True)
+ all_ifindexes = set([dev["ifi-index"] for dev in devs])
+ lowest = 2
+ while lowest in all_ifindexes:
+ lowest += 1
+
+ with ksft_raises(NlError) as cm:
+ netfam.qstats_get({"ifindex": lowest}, dump=True)
+ ksft_eq(cm.exception.nl_msg.error, -19)
+ ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+
+
+def main() -> None:
+ with NetDrvEnv(__file__) as cfg:
+ ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/virtio_net/Makefile b/tools/testing/selftests/drivers/net/virtio_net/Makefile
new file mode 100644
index 0000000000..7ec7cd3ab2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS = basic_features.sh \
+ #
+
+TEST_FILES = \
+ virtio_net_common.sh \
+ #
+
+TEST_INCLUDES = \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/lib.sh \
+ #
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh b/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh
new file mode 100755
index 0000000000..cf8cf816ed
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# See virtio_net_common.sh comments for more details about assumed setup
+
+ALL_TESTS="
+ initial_ping_test
+ f_mac_test
+"
+
+source virtio_net_common.sh
+
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+h1=${NETIFS[p1]}
+h2=${NETIFS[p2]}
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+initial_ping_test()
+{
+ setup_cleanup
+ setup_prepare
+ ping_test $h1 $H2_IPV4 " simple"
+}
+
+f_mac_test()
+{
+ RET=0
+ local test_name="mac feature filtered"
+
+ virtio_feature_present $h1 $VIRTIO_NET_F_MAC
+ if [ $? -ne 0 ]; then
+ log_test_skip "$test_name" "Device $h1 is missing feature $VIRTIO_NET_F_MAC."
+ return 0
+ fi
+ virtio_feature_present $h1 $VIRTIO_NET_F_MAC
+ if [ $? -ne 0 ]; then
+ log_test_skip "$test_name" "Device $h2 is missing feature $VIRTIO_NET_F_MAC."
+ return 0
+ fi
+
+ setup_cleanup
+ setup_prepare
+
+ grep -q 0 /sys/class/net/$h1/addr_assign_type
+ check_err $? "Permanent address assign type for $h1 is not set"
+ grep -q 0 /sys/class/net/$h2/addr_assign_type
+ check_err $? "Permanent address assign type for $h2 is not set"
+
+ setup_cleanup
+ virtio_filter_feature_add $h1 $VIRTIO_NET_F_MAC
+ virtio_filter_feature_add $h2 $VIRTIO_NET_F_MAC
+ setup_prepare
+
+ grep -q 0 /sys/class/net/$h1/addr_assign_type
+ check_fail $? "Permanent address assign type for $h1 is set when F_MAC feature is filtered"
+ grep -q 0 /sys/class/net/$h2/addr_assign_type
+ check_fail $? "Permanent address assign type for $h2 is set when F_MAC feature is filtered"
+
+ ping_do $h1 $H2_IPV4
+ check_err $? "Ping failed"
+
+ log_test "$test_name"
+}
+
+setup_prepare()
+{
+ virtio_device_rebind $h1
+ virtio_device_rebind $h2
+ wait_for_dev $h1
+ wait_for_dev $h2
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+setup_cleanup()
+{
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ virtio_filter_features_clear $h1
+ virtio_filter_features_clear $h2
+ virtio_device_rebind $h1
+ virtio_device_rebind $h2
+ wait_for_dev $h1
+ wait_for_dev $h2
+}
+
+cleanup()
+{
+ pre_cleanup
+ setup_cleanup
+}
+
+check_driver $h1 "virtio_net"
+check_driver $h2 "virtio_net"
+check_virtio_debugfs $h1
+check_virtio_debugfs $h2
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/virtio_net/config b/tools/testing/selftests/drivers/net/virtio_net/config
new file mode 100644
index 0000000000..bcf7555eaf
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/config
@@ -0,0 +1,8 @@
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_VRF=m
+CONFIG_VIRTIO_DEBUG=y
+CONFIG_VIRTIO_NET=y
diff --git a/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh b/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh
new file mode 100644
index 0000000000..57bd8055e2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This assumes running on a host with two virtio interfaces connected
+# back to back. Example script to do such wire-up of tap devices would
+# look like this:
+#
+# =======================================================================================================
+# #!/bin/bash
+#
+# DEV1="$1"
+# DEV2="$2"
+#
+# sudo tc qdisc add dev $DEV1 clsact
+# sudo tc qdisc add dev $DEV2 clsact
+# sudo tc filter add dev $DEV1 ingress protocol all pref 1 matchall action mirred egress redirect dev $DEV2
+# sudo tc filter add dev $DEV2 ingress protocol all pref 1 matchall action mirred egress redirect dev $DEV1
+# sudo ip link set $DEV1 up
+# sudo ip link set $DEV2 up
+# =======================================================================================================
+
+REQUIRE_MZ="no"
+NETIF_CREATE="no"
+NETIF_FIND_DRIVER="virtio_net"
+NUM_NETIFS=2
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+VIRTIO_NET_F_MAC=5
+
+virtio_device_get()
+{
+ local dev=$1; shift
+ local device_path="/sys/class/net/$dev/device/"
+
+ basename `realpath $device_path`
+}
+
+virtio_device_rebind()
+{
+ local dev=$1; shift
+ local device=`virtio_device_get $dev`
+
+ echo "$device" > /sys/bus/virtio/drivers/virtio_net/unbind
+ echo "$device" > /sys/bus/virtio/drivers/virtio_net/bind
+}
+
+virtio_debugfs_get()
+{
+ local dev=$1; shift
+ local device=`virtio_device_get $dev`
+
+ echo /sys/kernel/debug/virtio/$device/
+}
+
+check_virtio_debugfs()
+{
+ local dev=$1; shift
+ local debugfs=`virtio_debugfs_get $dev`
+
+ if [ ! -f "$debugfs/device_features" ] ||
+ [ ! -f "$debugfs/filter_feature_add" ] ||
+ [ ! -f "$debugfs/filter_feature_del" ] ||
+ [ ! -f "$debugfs/filter_features" ] ||
+ [ ! -f "$debugfs/filter_features_clear" ]; then
+ echo "SKIP: not possible to access debugfs for $dev"
+ exit $ksft_skip
+ fi
+}
+
+virtio_feature_present()
+{
+ local dev=$1; shift
+ local feature=$1; shift
+ local debugfs=`virtio_debugfs_get $dev`
+
+ cat $debugfs/device_features |grep "^$feature$" &> /dev/null
+ return $?
+}
+
+virtio_filter_features_clear()
+{
+ local dev=$1; shift
+ local debugfs=`virtio_debugfs_get $dev`
+
+ echo "1" > $debugfs/filter_features_clear
+}
+
+virtio_filter_feature_add()
+{
+ local dev=$1; shift
+ local feature=$1; shift
+ local debugfs=`virtio_debugfs_get $dev`
+
+ echo "$feature" > $debugfs/filter_feature_add
+}
diff --git a/tools/testing/selftests/exec/recursion-depth.c b/tools/testing/selftests/exec/recursion-depth.c
index b2f37d86a5..438c8ff2fd 100644
--- a/tools/testing/selftests/exec/recursion-depth.c
+++ b/tools/testing/selftests/exec/recursion-depth.c
@@ -37,25 +37,25 @@ int main(void)
ksft_test_result_skip("error: unshare, errno %d\n", errno);
ksft_finished();
}
- ksft_exit_fail_msg("error: unshare, errno %d\n", errno);
+ ksft_exit_fail_perror("error: unshare");
}
if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL) == -1)
- ksft_exit_fail_msg("error: mount '/', errno %d\n", errno);
+ ksft_exit_fail_perror("error: mount '/'");
/* Require "exec" filesystem. */
if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1)
- ksft_exit_fail_msg("error: mount ramfs, errno %d\n", errno);
+ ksft_exit_fail_perror("error: mount ramfs");
#define FILENAME "/tmp/1"
fd = creat(FILENAME, 0700);
if (fd == -1)
- ksft_exit_fail_msg("error: creat, errno %d\n", errno);
+ ksft_exit_fail_perror("error: creat");
#define S "#!" FILENAME "\n"
if (write(fd, S, strlen(S)) != strlen(S))
- ksft_exit_fail_msg("error: write, errno %d\n", errno);
+ ksft_exit_fail_perror("error: write");
close(fd);
diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile
index 71ec34bf15..4373cea79b 100644
--- a/tools/testing/selftests/fchmodat2/Makefile
+++ b/tools/testing/selftests/fchmodat2/Makefile
@@ -1,6 +1,15 @@
# SPDX-License-Identifier: GPL-2.0-or-later
-CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan $(KHDR_INCLUDES)
+CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(KHDR_INCLUDES)
+
+# gcc requires -static-libasan in order to ensure that Address Sanitizer's
+# library is the first one loaded. However, clang already statically links the
+# Address Sanitizer if -fsanitize is specified. Therefore, simply omit
+# -static-libasan for clang builds.
+ifeq ($(LLVM),)
+ CFLAGS += -static-libasan
+endif
+
TEST_GEN_PROGS := fchmodat2_test
include ../lib.mk
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c
index 3eafd7da58..e8c019d72c 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c
@@ -3,6 +3,7 @@
#define _GNU_SOURCE
#include <assert.h>
+#include <stddef.h>
#include <stdint.h>
#include <sched.h>
#include <fcntl.h>
@@ -124,8 +125,16 @@ static uint32_t old_root_id, old_parent_id;
static void cleanup_namespace(void)
{
- fchdir(orig_root);
- chroot(".");
+ int ret;
+
+ ret = fchdir(orig_root);
+ if (ret == -1)
+ ksft_perror("fchdir to original root");
+
+ ret = chroot(".");
+ if (ret == -1)
+ ksft_perror("chroot to original root");
+
umount2(root_mntpoint, MNT_DETACH);
rmdir(root_mntpoint);
}
diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config
index e59d985eef..048a312abf 100644
--- a/tools/testing/selftests/ftrace/config
+++ b/tools/testing/selftests/ftrace/config
@@ -1,16 +1,28 @@
-CONFIG_KPROBES=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_EPROBE_EVENTS=y
+CONFIG_FPROBE=y
+CONFIG_FPROBE_EVENTS=y
CONFIG_FTRACE=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_FUNCTION_GRAPH_RETVAL=y
CONFIG_FUNCTION_PROFILER=y
-CONFIG_TRACER_SNAPSHOT=y
-CONFIG_STACK_TRACER=y
CONFIG_HIST_TRIGGERS=y
-CONFIG_SCHED_TRACER=y
-CONFIG_PREEMPT_TRACER=y
CONFIG_IRQSOFF_TRACER=y
-CONFIG_PREEMPTIRQ_DELAY_TEST=m
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KPROBES=y
+CONFIG_KPROBE_EVENTS=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
+CONFIG_PREEMPTIRQ_DELAY_TEST=m
+CONFIG_PREEMPT_TRACER=y
+CONFIG_PROBE_EVENTS_BTF_ARGS=y
CONFIG_SAMPLES=y
CONFIG_SAMPLE_FTRACE_DIRECT=m
CONFIG_SAMPLE_TRACE_PRINTK=m
-CONFIG_KALLSYMS_ALL=y
+CONFIG_SCHED_TRACER=y
+CONFIG_STACK_TRACER=y
+CONFIG_TRACER_SNAPSHOT=y
+CONFIG_UPROBES=y
+CONFIG_UPROBE_EVENTS=y
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index 25d4e0fca3..cce72f8b03 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -255,7 +255,13 @@ prlog() { # messages
[ "$LOG_FILE" ] && printf "$*$newline" | strip_esc >> $LOG_FILE
}
catlog() { #file
- cat $1
+ if [ "${KTAP}" = "1" ]; then
+ cat $1 | while read line ; do
+ echo "# $line"
+ done
+ else
+ cat $1
+ fi
[ "$LOG_FILE" ] && cat $1 | strip_esc >> $LOG_FILE
}
prlog "=== Ftrace unit tests ==="
diff --git a/tools/testing/selftests/ftrace/ftracetest-ktap b/tools/testing/selftests/ftrace/ftracetest-ktap
index b3284679ef..14e62ef3f3 100755
--- a/tools/testing/selftests/ftrace/ftracetest-ktap
+++ b/tools/testing/selftests/ftrace/ftracetest-ktap
@@ -5,4 +5,4 @@
#
# Copyright (C) Arm Ltd., 2023
-./ftracetest -K
+./ftracetest -K -v
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc
new file mode 100644
index 0000000000..c6a9d2466a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc
@@ -0,0 +1,41 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Fprobe event VFS type argument
+# requires: dynamic_events "%pd/%pD":README "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README
+
+
+: "Test argument %pd with name for fprobe"
+echo 'f:testprobe dput name=$arg1:%pd' > dynamic_events
+echo 1 > events/fprobes/testprobe/enable
+grep -q "1" events/fprobes/testprobe/enable
+echo 0 > events/fprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > dynamic_events
+echo "" > trace
+
+: "Test argument %pd without name for fprobe"
+echo 'f:testprobe dput $arg1:%pd' > dynamic_events
+echo 1 > events/fprobes/testprobe/enable
+grep -q "1" events/fprobes/testprobe/enable
+echo 0 > events/fprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > dynamic_events
+echo "" > trace
+
+: "Test argument %pD with name for fprobe"
+echo 'f:testprobe vfs_read name=$arg1:%pD' > dynamic_events
+echo 1 > events/fprobes/testprobe/enable
+grep -q "1" events/fprobes/testprobe/enable
+echo 0 > events/fprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > dynamic_events
+echo "" > trace
+
+: "Test argument %pD without name for fprobe"
+echo 'f:testprobe vfs_read $arg1:%pD' > dynamic_events
+echo 1 > events/fprobes/testprobe/enable
+grep -q "1" events/fprobes/testprobe/enable
+echo 0 > events/fprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > dynamic_events
+echo "" > trace
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
index 25432b8cd5..073a748b93 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
@@ -19,7 +19,7 @@ fail() { # mesg
FILTER=set_ftrace_filter
FUNC1="schedule"
-FUNC2="scheduler_tick"
+FUNC2="sched_tick"
ALL_FUNCS="#### all functions enabled ####"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
new file mode 100644
index 0000000000..21a54be689
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
@@ -0,0 +1,40 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event VFS type argument
+# requires: kprobe_events "%pd/%pD":README
+
+: "Test argument %pd with name"
+echo 'p:testprobe dput name=$arg1:%pd' > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pd without name"
+echo 'p:testprobe dput $arg1:%pd' > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pD with name"
+echo 'p:testprobe vfs_read name=$arg1:%pD' > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pD without name"
+echo 'p:testprobe vfs_read $arg1:%pD' > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 11e157d753..78ab2cd111 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -3,8 +3,6 @@ SUBDIRS := functional
TEST_PROGS := run.sh
-.PHONY: all clean
-
include ../lib.mk
all:
diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common
index 0f456dbab6..45b5570441 100644
--- a/tools/testing/selftests/hid/config.common
+++ b/tools/testing/selftests/hid/config.common
@@ -238,3 +238,4 @@ CONFIG_VLAN_8021Q=y
CONFIG_XFRM_SUB_POLICY=y
CONFIG_XFRM_USER=y
CONFIG_ZEROPLUS_FF=y
+CONFIG_KASAN=y
diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c
index 2cf96f818f..f825623e3e 100644
--- a/tools/testing/selftests/hid/hid_bpf.c
+++ b/tools/testing/selftests/hid/hid_bpf.c
@@ -16,6 +16,11 @@
#define SHOW_UHID_DEBUG 0
+#define min(a, b) \
+ ({ __typeof__(a) _a = (a); \
+ __typeof__(b) _b = (b); \
+ _a < _b ? _a : _b; })
+
static unsigned char rdesc[] = {
0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */
0x09, 0x21, /* Usage (Vendor Usage 0x21) */
@@ -111,6 +116,10 @@ struct hid_hw_request_syscall_args {
static pthread_mutex_t uhid_started_mtx = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t uhid_started = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t uhid_output_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t uhid_output_cond = PTHREAD_COND_INITIALIZER;
+static unsigned char output_report[10];
+
/* no need to protect uhid_stopped, only one thread accesses it */
static bool uhid_stopped;
@@ -205,6 +214,13 @@ static int uhid_event(struct __test_metadata *_metadata, int fd)
break;
case UHID_OUTPUT:
UHID_LOG("UHID_OUTPUT from uhid-dev");
+
+ pthread_mutex_lock(&uhid_output_mtx);
+ memcpy(output_report,
+ ev.u.output.data,
+ min(ev.u.output.size, sizeof(output_report)));
+ pthread_cond_signal(&uhid_output_cond);
+ pthread_mutex_unlock(&uhid_output_mtx);
break;
case UHID_GET_REPORT:
UHID_LOG("UHID_GET_REPORT from uhid-dev");
@@ -734,8 +750,100 @@ TEST_F(hid_bpf, test_hid_change_report)
}
/*
- * Attach hid_user_raw_request to the given uhid device,
- * call the bpf program from userspace
+ * Call hid_bpf_input_report against the given uhid device,
+ * check that the program is called and does the expected.
+ */
+TEST_F(hid_bpf, test_hid_user_input_report_call)
+{
+ struct hid_hw_request_syscall_args args = {
+ .retval = -1,
+ .size = 10,
+ };
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattrs,
+ .ctx_in = &args,
+ .ctx_size_in = sizeof(args),
+ );
+ __u8 buf[10] = {0};
+ int err, prog_fd;
+
+ LOAD_BPF;
+
+ args.hid = self->hid_id;
+ args.data[0] = 1; /* report ID */
+ args.data[1] = 2; /* report ID */
+ args.data[2] = 42; /* report ID */
+
+ prog_fd = bpf_program__fd(self->skel->progs.hid_user_input_report);
+
+ /* check that there is no data to read from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, -1) TH_LOG("read_hidraw");
+
+ err = bpf_prog_test_run_opts(prog_fd, &tattrs);
+
+ ASSERT_OK(err) TH_LOG("error while calling bpf_prog_test_run_opts");
+
+ ASSERT_EQ(args.retval, 0);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 2);
+ ASSERT_EQ(buf[2], 42);
+}
+
+/*
+ * Call hid_bpf_hw_output_report against the given uhid device,
+ * check that the program is called and does the expected.
+ */
+TEST_F(hid_bpf, test_hid_user_output_report_call)
+{
+ struct hid_hw_request_syscall_args args = {
+ .retval = -1,
+ .size = 10,
+ };
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattrs,
+ .ctx_in = &args,
+ .ctx_size_in = sizeof(args),
+ );
+ int err, cond_err, prog_fd;
+ struct timespec time_to_wait;
+
+ LOAD_BPF;
+
+ args.hid = self->hid_id;
+ args.data[0] = 1; /* report ID */
+ args.data[1] = 2; /* report ID */
+ args.data[2] = 42; /* report ID */
+
+ prog_fd = bpf_program__fd(self->skel->progs.hid_user_output_report);
+
+ pthread_mutex_lock(&uhid_output_mtx);
+
+ memset(output_report, 0, sizeof(output_report));
+ clock_gettime(CLOCK_REALTIME, &time_to_wait);
+ time_to_wait.tv_sec += 2;
+
+ err = bpf_prog_test_run_opts(prog_fd, &tattrs);
+ cond_err = pthread_cond_timedwait(&uhid_output_cond, &uhid_output_mtx, &time_to_wait);
+
+ ASSERT_OK(err) TH_LOG("error while calling bpf_prog_test_run_opts");
+ ASSERT_OK(cond_err) TH_LOG("error while calling waiting for the condition");
+
+ ASSERT_EQ(args.retval, 3);
+
+ ASSERT_EQ(output_report[0], 1);
+ ASSERT_EQ(output_report[1], 2);
+ ASSERT_EQ(output_report[2], 42);
+
+ pthread_mutex_unlock(&uhid_output_mtx);
+}
+
+/*
+ * Call hid_hw_raw_request against the given uhid device,
* check that the program is called and does the expected.
*/
TEST_F(hid_bpf, test_hid_user_raw_request_call)
diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c
index 1e558826b8..f67d35def1 100644
--- a/tools/testing/selftests/hid/progs/hid.c
+++ b/tools/testing/selftests/hid/progs/hid.c
@@ -101,6 +101,52 @@ int hid_user_raw_request(struct hid_hw_request_syscall_args *args)
return 0;
}
+SEC("syscall")
+int hid_user_output_report(struct hid_hw_request_syscall_args *args)
+{
+ struct hid_bpf_ctx *ctx;
+ const size_t size = args->size;
+ int i, ret = 0;
+
+ if (size > sizeof(args->data))
+ return -7; /* -E2BIG */
+
+ ctx = hid_bpf_allocate_context(args->hid);
+ if (!ctx)
+ return -1; /* EPERM check */
+
+ ret = hid_bpf_hw_output_report(ctx,
+ args->data,
+ size);
+ args->retval = ret;
+
+ hid_bpf_release_context(ctx);
+
+ return 0;
+}
+
+SEC("syscall")
+int hid_user_input_report(struct hid_hw_request_syscall_args *args)
+{
+ struct hid_bpf_ctx *ctx;
+ const size_t size = args->size;
+ int i, ret = 0;
+
+ if (size > sizeof(args->data))
+ return -7; /* -E2BIG */
+
+ ctx = hid_bpf_allocate_context(args->hid);
+ if (!ctx)
+ return -1; /* EPERM check */
+
+ ret = hid_bpf_input_report(ctx, HID_INPUT_REPORT, args->data, size);
+ args->retval = ret;
+
+ hid_bpf_release_context(ctx);
+
+ return 0;
+}
+
static const __u8 rdesc[] = {
0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */
0x09, 0x32, /* USAGE (Z) */
diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
index 65e657ac11..9cd56821d0 100644
--- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
+++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
@@ -94,5 +94,11 @@ extern int hid_bpf_hw_request(struct hid_bpf_ctx *ctx,
size_t buf__sz,
enum hid_report_type type,
enum hid_class_request reqtype) __ksym;
+extern int hid_bpf_hw_output_report(struct hid_bpf_ctx *ctx,
+ __u8 *buf, size_t buf__sz) __ksym;
+extern int hid_bpf_input_report(struct hid_bpf_ctx *ctx,
+ enum hid_report_type type,
+ __u8 *data,
+ size_t buf__sz) __ksym;
#endif /* __HID_BPF_HELPERS_H */
diff --git a/tools/testing/selftests/hid/tests/base.py b/tools/testing/selftests/hid/tests/base.py
index 51433063b2..3a465768e5 100644
--- a/tools/testing/selftests/hid/tests/base.py
+++ b/tools/testing/selftests/hid/tests/base.py
@@ -8,11 +8,13 @@
import libevdev
import os
import pytest
+import shutil
+import subprocess
import time
import logging
-from hidtools.device.base_device import BaseDevice, EvdevMatch, SysfsFile
+from .base_device import BaseDevice, EvdevMatch, SysfsFile
from pathlib import Path
from typing import Final, List, Tuple
@@ -157,6 +159,17 @@ class BaseTestCase:
# for example ("playstation", "hid-playstation")
kernel_modules: List[Tuple[str, str]] = []
+ # List of in kernel HID-BPF object files to load
+ # before starting the test
+ # Any existing pre-loaded HID-BPF module will be removed
+ # before the ones in this list will be manually loaded.
+ # Each Element is a tuple '(hid_bpf_object, rdesc_fixup_present)',
+ # for example '("xppen-ArtistPro16Gen2.bpf.o", True)'
+ # If 'rdesc_fixup_present' is True, the test needs to wait
+ # for one unbind and rebind before it can be sure the kernel is
+ # ready
+ hid_bpfs: List[Tuple[str, bool]] = []
+
def assertInputEventsIn(self, expected_events, effective_events):
effective_events = effective_events.copy()
for ev in expected_events:
@@ -211,8 +224,6 @@ class BaseTestCase:
# we don't know beforehand the name of the module from modinfo
sysfs_path = Path("/sys/module") / kernel_module.replace("-", "_")
if not sysfs_path.exists():
- import subprocess
-
ret = subprocess.run(["/usr/sbin/modprobe", kernel_module])
if ret.returncode != 0:
pytest.skip(
@@ -225,6 +236,64 @@ class BaseTestCase:
self._load_kernel_module(kernel_driver, kernel_module)
yield
+ def load_hid_bpfs(self):
+ script_dir = Path(os.path.dirname(os.path.realpath(__file__)))
+ root_dir = (script_dir / "../../../../..").resolve()
+ bpf_dir = root_dir / "drivers/hid/bpf/progs"
+
+ udev_hid_bpf = shutil.which("udev-hid-bpf")
+ if not udev_hid_bpf:
+ pytest.skip("udev-hid-bpf not found in $PATH, skipping")
+
+ wait = False
+ for _, rdesc_fixup in self.hid_bpfs:
+ if rdesc_fixup:
+ wait = True
+
+ for hid_bpf, _ in self.hid_bpfs:
+ # We need to start `udev-hid-bpf` in the background
+ # and dispatch uhid events in case the kernel needs
+ # to fetch features on the device
+ process = subprocess.Popen(
+ [
+ "udev-hid-bpf",
+ "--verbose",
+ "add",
+ str(self.uhdev.sys_path),
+ str(bpf_dir / hid_bpf),
+ ],
+ )
+ while process.poll() is None:
+ self.uhdev.dispatch(1)
+
+ if process.poll() != 0:
+ pytest.fail(
+ f"Couldn't insert hid-bpf program '{hid_bpf}', marking the test as failed"
+ )
+
+ if wait:
+ # the HID-BPF program exports a rdesc fixup, so it needs to be
+ # unbound by the kernel and then rebound.
+ # Ensure we get the bound event exactly 2 times (one for the normal
+ # uhid loading, and then the reload from HID-BPF)
+ now = time.time()
+ while self.uhdev.kernel_ready_count < 2 and time.time() - now < 2:
+ self.uhdev.dispatch(1)
+
+ if self.uhdev.kernel_ready_count < 2:
+ pytest.fail(
+ f"Couldn't insert hid-bpf programs, marking the test as failed"
+ )
+
+ def unload_hid_bpfs(self):
+ ret = subprocess.run(
+ ["udev-hid-bpf", "--verbose", "remove", str(self.uhdev.sys_path)],
+ )
+ if ret.returncode != 0:
+ pytest.fail(
+ f"Couldn't unload hid-bpf programs, marking the test as failed"
+ )
+
@pytest.fixture()
def new_uhdev(self, load_kernel_module):
return self.create_device()
@@ -248,12 +317,18 @@ class BaseTestCase:
now = time.time()
while not self.uhdev.is_ready() and time.time() - now < 5:
self.uhdev.dispatch(1)
+
+ if self.hid_bpfs:
+ self.load_hid_bpfs()
+
if self.uhdev.get_evdev() is None:
logger.warning(
f"available list of input nodes: (default application is '{self.uhdev.application}')"
)
logger.warning(self.uhdev.input_nodes)
yield
+ if self.hid_bpfs:
+ self.unload_hid_bpfs()
self.uhdev = None
except PermissionError:
pytest.skip("Insufficient permissions, run me as root")
@@ -313,8 +388,6 @@ class HIDTestUdevRule(object):
self.reload_udev_rules()
def reload_udev_rules(self):
- import subprocess
-
subprocess.run("udevadm control --reload-rules".split())
subprocess.run("systemd-hwdb update".split())
@@ -330,10 +403,11 @@ class HIDTestUdevRule(object):
delete=False,
) as f:
f.write(
- 'KERNELS=="*input*", ATTRS{name}=="*uhid test *", ENV{LIBINPUT_IGNORE_DEVICE}="1"\n'
- )
- f.write(
- 'KERNELS=="*input*", ATTRS{name}=="*uhid test * System Multi Axis", ENV{ID_INPUT_TOUCHSCREEN}="", ENV{ID_INPUT_SYSTEM_MULTIAXIS}="1"\n'
+ """
+KERNELS=="*input*", ATTRS{name}=="*uhid test *", ENV{LIBINPUT_IGNORE_DEVICE}="1"
+KERNELS=="*hid*", ENV{HID_NAME}=="*uhid test *", ENV{HID_BPF_IGNORE_DEVICE}="1"
+KERNELS=="*input*", ATTRS{name}=="*uhid test * System Multi Axis", ENV{ID_INPUT_TOUCHSCREEN}="", ENV{ID_INPUT_SYSTEM_MULTIAXIS}="1"
+"""
)
self.rulesfile = f
diff --git a/tools/testing/selftests/hid/tests/base_device.py b/tools/testing/selftests/hid/tests/base_device.py
new file mode 100644
index 0000000000..e0515be97f
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/base_device.py
@@ -0,0 +1,421 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2017 Red Hat, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import fcntl
+import functools
+import libevdev
+import os
+
+try:
+ import pyudev
+except ImportError:
+ raise ImportError("UHID is not supported due to missing pyudev dependency")
+
+import logging
+
+import hidtools.hid as hid
+from hidtools.uhid import UHIDDevice
+from hidtools.util import BusType
+
+from pathlib import Path
+from typing import Any, ClassVar, Dict, List, Optional, Tuple, Type, Union
+
+logger = logging.getLogger("hidtools.device.base_device")
+
+
+class SysfsFile(object):
+ def __init__(self, path):
+ self.path = path
+
+ def __set_value(self, value):
+ with open(self.path, "w") as f:
+ return f.write(f"{value}\n")
+
+ def __get_value(self):
+ with open(self.path) as f:
+ return f.read().strip()
+
+ @property
+ def int_value(self) -> int:
+ return int(self.__get_value())
+
+ @int_value.setter
+ def int_value(self, v: int) -> None:
+ self.__set_value(v)
+
+ @property
+ def str_value(self) -> str:
+ return self.__get_value()
+
+ @str_value.setter
+ def str_value(self, v: str) -> None:
+ self.__set_value(v)
+
+
+class LED(object):
+ def __init__(self, sys_path):
+ self.max_brightness = SysfsFile(sys_path / "max_brightness").int_value
+ self.__brightness = SysfsFile(sys_path / "brightness")
+
+ @property
+ def brightness(self) -> int:
+ return self.__brightness.int_value
+
+ @brightness.setter
+ def brightness(self, value: int) -> None:
+ self.__brightness.int_value = value
+
+
+class PowerSupply(object):
+ """Represents Linux power_supply_class sysfs nodes."""
+
+ def __init__(self, sys_path):
+ self._capacity = SysfsFile(sys_path / "capacity")
+ self._status = SysfsFile(sys_path / "status")
+ self._type = SysfsFile(sys_path / "type")
+
+ @property
+ def capacity(self) -> int:
+ return self._capacity.int_value
+
+ @property
+ def status(self) -> str:
+ return self._status.str_value
+
+ @property
+ def type(self) -> str:
+ return self._type.str_value
+
+
+class HIDIsReady(object):
+ """
+ Companion class that binds to a kernel mechanism
+ and that allows to know when a uhid device is ready or not.
+
+ See :meth:`is_ready` for details.
+ """
+
+ def __init__(self: "HIDIsReady", uhid: UHIDDevice) -> None:
+ self.uhid = uhid
+
+ def is_ready(self: "HIDIsReady") -> bool:
+ """
+ Overwrite in subclasses: should return True or False whether
+ the attached uhid device is ready or not.
+ """
+ return False
+
+
+class UdevHIDIsReady(HIDIsReady):
+ _pyudev_context: ClassVar[Optional[pyudev.Context]] = None
+ _pyudev_monitor: ClassVar[Optional[pyudev.Monitor]] = None
+ _uhid_devices: ClassVar[Dict[int, Tuple[bool, int]]] = {}
+
+ def __init__(self: "UdevHIDIsReady", uhid: UHIDDevice) -> None:
+ super().__init__(uhid)
+ self._init_pyudev()
+
+ @classmethod
+ def _init_pyudev(cls: Type["UdevHIDIsReady"]) -> None:
+ if cls._pyudev_context is None:
+ cls._pyudev_context = pyudev.Context()
+ cls._pyudev_monitor = pyudev.Monitor.from_netlink(cls._pyudev_context)
+ cls._pyudev_monitor.filter_by("hid")
+ cls._pyudev_monitor.start()
+
+ UHIDDevice._append_fd_to_poll(
+ cls._pyudev_monitor.fileno(), cls._cls_udev_event_callback
+ )
+
+ @classmethod
+ def _cls_udev_event_callback(cls: Type["UdevHIDIsReady"]) -> None:
+ if cls._pyudev_monitor is None:
+ return
+ event: pyudev.Device
+ for event in iter(functools.partial(cls._pyudev_monitor.poll, 0.02), None):
+ if event.action not in ["bind", "remove", "unbind"]:
+ return
+
+ logger.debug(f"udev event: {event.action} -> {event}")
+
+ id = int(event.sys_path.strip().split(".")[-1], 16)
+
+ device_ready, count = cls._uhid_devices.get(id, (False, 0))
+
+ ready = event.action == "bind"
+ if not device_ready and ready:
+ count += 1
+ cls._uhid_devices[id] = (ready, count)
+
+ def is_ready(self: "UdevHIDIsReady") -> Tuple[bool, int]:
+ try:
+ return self._uhid_devices[self.uhid.hid_id]
+ except KeyError:
+ return (False, 0)
+
+
+class EvdevMatch(object):
+ def __init__(
+ self: "EvdevMatch",
+ *,
+ requires: List[Any] = [],
+ excludes: List[Any] = [],
+ req_properties: List[Any] = [],
+ excl_properties: List[Any] = [],
+ ) -> None:
+ self.requires = requires
+ self.excludes = excludes
+ self.req_properties = req_properties
+ self.excl_properties = excl_properties
+
+ def is_a_match(self: "EvdevMatch", evdev: libevdev.Device) -> bool:
+ for m in self.requires:
+ if not evdev.has(m):
+ return False
+ for m in self.excludes:
+ if evdev.has(m):
+ return False
+ for p in self.req_properties:
+ if not evdev.has_property(p):
+ return False
+ for p in self.excl_properties:
+ if evdev.has_property(p):
+ return False
+ return True
+
+
+class EvdevDevice(object):
+ """
+ Represents an Evdev node and its properties.
+ This is a stub for the libevdev devices, as they are relying on
+ uevent to get the data, saving us some ioctls to fetch the names
+ and properties.
+ """
+
+ def __init__(self: "EvdevDevice", sysfs: Path) -> None:
+ self.sysfs = sysfs
+ self.event_node: Any = None
+ self.libevdev: Optional[libevdev.Device] = None
+
+ self.uevents = {}
+ # all of the interesting properties are stored in the input uevent, so in the parent
+ # so convert the uevent file of the parent input node into a dict
+ with open(sysfs.parent / "uevent") as f:
+ for line in f.readlines():
+ key, value = line.strip().split("=")
+ self.uevents[key] = value.strip('"')
+
+ # we open all evdev nodes in order to not miss any event
+ self.open()
+
+ @property
+ def name(self: "EvdevDevice") -> str:
+ assert "NAME" in self.uevents
+
+ return self.uevents["NAME"]
+
+ @property
+ def evdev(self: "EvdevDevice") -> Path:
+ return Path("/dev/input") / self.sysfs.name
+
+ def matches_application(
+ self: "EvdevDevice", application: str, matches: Dict[str, EvdevMatch]
+ ) -> bool:
+ if self.libevdev is None:
+ return False
+
+ if application in matches:
+ return matches[application].is_a_match(self.libevdev)
+
+ logger.error(
+ f"application '{application}' is unknown, please update/fix hid-tools"
+ )
+ assert False # hid-tools likely needs an update
+
+ def open(self: "EvdevDevice") -> libevdev.Device:
+ self.event_node = open(self.evdev, "rb")
+ self.libevdev = libevdev.Device(self.event_node)
+
+ assert self.libevdev.fd is not None
+
+ fd = self.libevdev.fd.fileno()
+ flag = fcntl.fcntl(fd, fcntl.F_GETFD)
+ fcntl.fcntl(fd, fcntl.F_SETFL, flag | os.O_NONBLOCK)
+
+ return self.libevdev
+
+ def close(self: "EvdevDevice") -> None:
+ if self.libevdev is not None and self.libevdev.fd is not None:
+ self.libevdev.fd.close()
+ self.libevdev = None
+ if self.event_node is not None:
+ self.event_node.close()
+ self.event_node = None
+
+
+class BaseDevice(UHIDDevice):
+ # default _application_matches that matches nothing. This needs
+ # to be set in the subclasses to have get_evdev() working
+ _application_matches: Dict[str, EvdevMatch] = {}
+
+ def __init__(
+ self,
+ name,
+ application,
+ rdesc_str: Optional[str] = None,
+ rdesc: Optional[Union[hid.ReportDescriptor, str, bytes]] = None,
+ input_info=None,
+ ) -> None:
+ self._kernel_is_ready: HIDIsReady = UdevHIDIsReady(self)
+ if rdesc_str is None and rdesc is None:
+ raise Exception("Please provide at least a rdesc or rdesc_str")
+ super().__init__()
+ if name is None:
+ name = f"uhid gamepad test {self.__class__.__name__}"
+ if input_info is None:
+ input_info = (BusType.USB, 1, 2)
+ self.name = name
+ self.info = input_info
+ self.default_reportID = None
+ self.opened = False
+ self.started = False
+ self.application = application
+ self._input_nodes: Optional[list[EvdevDevice]] = None
+ if rdesc is None:
+ assert rdesc_str is not None
+ self.rdesc = hid.ReportDescriptor.from_human_descr(rdesc_str) # type: ignore
+ else:
+ self.rdesc = rdesc # type: ignore
+
+ @property
+ def power_supply_class(self: "BaseDevice") -> Optional[PowerSupply]:
+ ps = self.walk_sysfs("power_supply", "power_supply/*")
+ if ps is None or len(ps) < 1:
+ return None
+
+ return PowerSupply(ps[0])
+
+ @property
+ def led_classes(self: "BaseDevice") -> List[LED]:
+ leds = self.walk_sysfs("led", "**/max_brightness")
+ if leds is None:
+ return []
+
+ return [LED(led.parent) for led in leds]
+
+ @property
+ def kernel_is_ready(self: "BaseDevice") -> bool:
+ return self._kernel_is_ready.is_ready()[0] and self.started
+
+ @property
+ def kernel_ready_count(self: "BaseDevice") -> int:
+ return self._kernel_is_ready.is_ready()[1]
+
+ @property
+ def input_nodes(self: "BaseDevice") -> List[EvdevDevice]:
+ if self._input_nodes is not None:
+ return self._input_nodes
+
+ if not self.kernel_is_ready or not self.started:
+ return []
+
+ self._input_nodes = [
+ EvdevDevice(path)
+ for path in self.walk_sysfs("input", "input/input*/event*")
+ ]
+ return self._input_nodes
+
+ def match_evdev_rule(self, application, evdev):
+ """Replace this in subclasses if the device has multiple reports
+ of the same type and we need to filter based on the actual evdev
+ node.
+
+ returning True will append the corresponding report to
+ `self.input_nodes[type]`
+ returning False will ignore this report / type combination
+ for the device.
+ """
+ return True
+
+ def open(self):
+ self.opened = True
+
+ def _close_all_opened_evdev(self):
+ if self._input_nodes is not None:
+ for e in self._input_nodes:
+ e.close()
+
+ def __del__(self):
+ self._close_all_opened_evdev()
+
+ def close(self):
+ self.opened = False
+
+ def start(self, flags):
+ self.started = True
+
+ def stop(self):
+ self.started = False
+ self._close_all_opened_evdev()
+
+ def next_sync_events(self, application=None):
+ evdev = self.get_evdev(application)
+ if evdev is not None:
+ return list(evdev.events())
+ return []
+
+ @property
+ def application_matches(self: "BaseDevice") -> Dict[str, EvdevMatch]:
+ return self._application_matches
+
+ @application_matches.setter
+ def application_matches(self: "BaseDevice", data: Dict[str, EvdevMatch]) -> None:
+ self._application_matches = data
+
+ def get_evdev(self, application=None):
+ if application is None:
+ application = self.application
+
+ if len(self.input_nodes) == 0:
+ return None
+
+ assert self._input_nodes is not None
+
+ if len(self._input_nodes) == 1:
+ evdev = self._input_nodes[0]
+ if self.match_evdev_rule(application, evdev.libevdev):
+ return evdev.libevdev
+ else:
+ for _evdev in self._input_nodes:
+ if _evdev.matches_application(application, self.application_matches):
+ if self.match_evdev_rule(application, _evdev.libevdev):
+ return _evdev.libevdev
+
+ def is_ready(self):
+ """Returns whether a UHID device is ready. Can be overwritten in
+ subclasses to add extra conditions on when to consider a UHID
+ device ready. This can be:
+
+ - we need to wait on different types of input devices to be ready
+ (Touch Screen and Pen for example)
+ - we need to have at least 4 LEDs present
+ (len(self.uhdev.leds_classes) == 4)
+ - or any other combinations"""
+ return self.kernel_is_ready
diff --git a/tools/testing/selftests/hid/tests/base_gamepad.py b/tools/testing/selftests/hid/tests/base_gamepad.py
new file mode 100644
index 0000000000..ec74d75767
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/base_gamepad.py
@@ -0,0 +1,238 @@
+# SPDX-License-Identifier: GPL-2.0
+import libevdev
+
+from .base_device import BaseDevice
+from hidtools.util import BusType
+
+
+class InvalidHIDCommunication(Exception):
+ pass
+
+
+class GamepadData(object):
+ pass
+
+
+class AxisMapping(object):
+ """Represents a mapping between a HID type
+ and an evdev event"""
+
+ def __init__(self, hid, evdev=None):
+ self.hid = hid.lower()
+
+ if evdev is None:
+ evdev = f"ABS_{hid.upper()}"
+
+ self.evdev = libevdev.evbit("EV_ABS", evdev)
+
+
+class BaseGamepad(BaseDevice):
+ buttons_map = {
+ 1: "BTN_SOUTH",
+ 2: "BTN_EAST",
+ 3: "BTN_C",
+ 4: "BTN_NORTH",
+ 5: "BTN_WEST",
+ 6: "BTN_Z",
+ 7: "BTN_TL",
+ 8: "BTN_TR",
+ 9: "BTN_TL2",
+ 10: "BTN_TR2",
+ 11: "BTN_SELECT",
+ 12: "BTN_START",
+ 13: "BTN_MODE",
+ 14: "BTN_THUMBL",
+ 15: "BTN_THUMBR",
+ }
+
+ axes_map = {
+ "left_stick": {
+ "x": AxisMapping("x"),
+ "y": AxisMapping("y"),
+ },
+ "right_stick": {
+ "x": AxisMapping("z"),
+ "y": AxisMapping("Rz"),
+ },
+ }
+
+ def __init__(self, rdesc, application="Game Pad", name=None, input_info=None):
+ assert rdesc is not None
+ super().__init__(name, application, input_info=input_info, rdesc=rdesc)
+ self.buttons = (1, 2, 3)
+ self._buttons = {}
+ self.left = (127, 127)
+ self.right = (127, 127)
+ self.hat_switch = 15
+ assert self.parsed_rdesc is not None
+
+ self.fields = []
+ for r in self.parsed_rdesc.input_reports.values():
+ if r.application_name == self.application:
+ self.fields.extend([f.usage_name for f in r])
+
+ def store_axes(self, which, gamepad, data):
+ amap = self.axes_map[which]
+ x, y = data
+ setattr(gamepad, amap["x"].hid, x)
+ setattr(gamepad, amap["y"].hid, y)
+
+ def create_report(
+ self,
+ *,
+ left=(None, None),
+ right=(None, None),
+ hat_switch=None,
+ buttons=None,
+ reportID=None,
+ application="Game Pad",
+ ):
+ """
+ Return an input report for this device.
+
+ :param left: a tuple of absolute (x, y) value of the left joypad
+ where ``None`` is "leave unchanged"
+ :param right: a tuple of absolute (x, y) value of the right joypad
+ where ``None`` is "leave unchanged"
+ :param hat_switch: an absolute angular value of the hat switch
+ (expressed in 1/8 of circle, 0 being North, 2 East)
+ where ``None`` is "leave unchanged"
+ :param buttons: a dict of index/bool for the button states,
+ where ``None`` is "leave unchanged"
+ :param reportID: the numeric report ID for this report, if needed
+ :param application: the application used to report the values
+ """
+ if buttons is not None:
+ for i, b in buttons.items():
+ if i not in self.buttons:
+ raise InvalidHIDCommunication(
+ f"button {i} is not part of this {self.application}"
+ )
+ if b is not None:
+ self._buttons[i] = b
+
+ def replace_none_in_tuple(item, default):
+ if item is None:
+ item = (None, None)
+
+ if None in item:
+ if item[0] is None:
+ item = (default[0], item[1])
+ if item[1] is None:
+ item = (item[0], default[1])
+
+ return item
+
+ right = replace_none_in_tuple(right, self.right)
+ self.right = right
+ left = replace_none_in_tuple(left, self.left)
+ self.left = left
+
+ if hat_switch is None:
+ hat_switch = self.hat_switch
+ else:
+ self.hat_switch = hat_switch
+
+ reportID = reportID or self.default_reportID
+
+ gamepad = GamepadData()
+ for i, b in self._buttons.items():
+ gamepad.__setattr__(f"b{i}", int(b) if b is not None else 0)
+
+ self.store_axes("left_stick", gamepad, left)
+ self.store_axes("right_stick", gamepad, right)
+ gamepad.hatswitch = hat_switch # type: ignore ### gamepad is by default empty
+ return super().create_report(
+ gamepad, reportID=reportID, application=application
+ )
+
+ def event(
+ self, *, left=(None, None), right=(None, None), hat_switch=None, buttons=None
+ ):
+ """
+ Send an input event on the default report ID.
+
+ :param left: a tuple of absolute (x, y) value of the left joypad
+ where ``None`` is "leave unchanged"
+ :param right: a tuple of absolute (x, y) value of the right joypad
+ where ``None`` is "leave unchanged"
+ :param hat_switch: an absolute angular value of the hat switch
+ where ``None`` is "leave unchanged"
+ :param buttons: a dict of index/bool for the button states,
+ where ``None`` is "leave unchanged"
+ """
+ r = self.create_report(
+ left=left, right=right, hat_switch=hat_switch, buttons=buttons
+ )
+ self.call_input_event(r)
+ return [r]
+
+
+class JoystickGamepad(BaseGamepad):
+ buttons_map = {
+ 1: "BTN_TRIGGER",
+ 2: "BTN_THUMB",
+ 3: "BTN_THUMB2",
+ 4: "BTN_TOP",
+ 5: "BTN_TOP2",
+ 6: "BTN_PINKIE",
+ 7: "BTN_BASE",
+ 8: "BTN_BASE2",
+ 9: "BTN_BASE3",
+ 10: "BTN_BASE4",
+ 11: "BTN_BASE5",
+ 12: "BTN_BASE6",
+ 13: "BTN_DEAD",
+ }
+
+ axes_map = {
+ "left_stick": {
+ "x": AxisMapping("x"),
+ "y": AxisMapping("y"),
+ },
+ "right_stick": {
+ "x": AxisMapping("rudder"),
+ "y": AxisMapping("throttle"),
+ },
+ }
+
+ def __init__(self, rdesc, application="Joystick", name=None, input_info=None):
+ super().__init__(rdesc, application, name, input_info)
+
+ def create_report(
+ self,
+ *,
+ left=(None, None),
+ right=(None, None),
+ hat_switch=None,
+ buttons=None,
+ reportID=None,
+ application=None,
+ ):
+ """
+ Return an input report for this device.
+
+ :param left: a tuple of absolute (x, y) value of the left joypad
+ where ``None`` is "leave unchanged"
+ :param right: a tuple of absolute (x, y) value of the right joypad
+ where ``None`` is "leave unchanged"
+ :param hat_switch: an absolute angular value of the hat switch
+ where ``None`` is "leave unchanged"
+ :param buttons: a dict of index/bool for the button states,
+ where ``None`` is "leave unchanged"
+ :param reportID: the numeric report ID for this report, if needed
+ :param application: the application for this report, if needed
+ """
+ if application is None:
+ application = "Joystick"
+ return super().create_report(
+ left=left,
+ right=right,
+ hat_switch=hat_switch,
+ buttons=buttons,
+ reportID=reportID,
+ application=application,
+ )
+
+ def store_right_joystick(self, gamepad, data):
+ gamepad.rudder, gamepad.throttle = data
diff --git a/tools/testing/selftests/hid/tests/test_gamepad.py b/tools/testing/selftests/hid/tests/test_gamepad.py
index 26c74040b7..8d5b5ffdae 100644
--- a/tools/testing/selftests/hid/tests/test_gamepad.py
+++ b/tools/testing/selftests/hid/tests/test_gamepad.py
@@ -10,7 +10,8 @@ from . import base
import libevdev
import pytest
-from hidtools.device.base_gamepad import AsusGamepad, SaitekGamepad
+from .base_gamepad import BaseGamepad, JoystickGamepad, AxisMapping
+from hidtools.util import BusType
import logging
@@ -199,6 +200,449 @@ class BaseTest:
)
+class SaitekGamepad(JoystickGamepad):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop) 0
+ 0x09, 0x04, # Usage (Joystick) 2
+ 0xa1, 0x01, # Collection (Application) 4
+ 0x09, 0x01, # .Usage (Pointer) 6
+ 0xa1, 0x00, # .Collection (Physical) 8
+ 0x85, 0x01, # ..Report ID (1) 10
+ 0x09, 0x30, # ..Usage (X) 12
+ 0x15, 0x00, # ..Logical Minimum (0) 14
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 16
+ 0x35, 0x00, # ..Physical Minimum (0) 19
+ 0x46, 0xff, 0x00, # ..Physical Maximum (255) 21
+ 0x75, 0x08, # ..Report Size (8) 24
+ 0x95, 0x01, # ..Report Count (1) 26
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 28
+ 0x09, 0x31, # ..Usage (Y) 30
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 32
+ 0x05, 0x02, # ..Usage Page (Simulation Controls) 34
+ 0x09, 0xba, # ..Usage (Rudder) 36
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 38
+ 0x09, 0xbb, # ..Usage (Throttle) 40
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 42
+ 0x05, 0x09, # ..Usage Page (Button) 44
+ 0x19, 0x01, # ..Usage Minimum (1) 46
+ 0x29, 0x0c, # ..Usage Maximum (12) 48
+ 0x25, 0x01, # ..Logical Maximum (1) 50
+ 0x45, 0x01, # ..Physical Maximum (1) 52
+ 0x75, 0x01, # ..Report Size (1) 54
+ 0x95, 0x0c, # ..Report Count (12) 56
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 58
+ 0x95, 0x01, # ..Report Count (1) 60
+ 0x75, 0x00, # ..Report Size (0) 62
+ 0x81, 0x03, # ..Input (Cnst,Var,Abs) 64
+ 0x05, 0x01, # ..Usage Page (Generic Desktop) 66
+ 0x09, 0x39, # ..Usage (Hat switch) 68
+ 0x25, 0x07, # ..Logical Maximum (7) 70
+ 0x46, 0x3b, 0x01, # ..Physical Maximum (315) 72
+ 0x55, 0x00, # ..Unit Exponent (0) 75
+ 0x65, 0x44, # ..Unit (Degrees^4,EngRotation) 77
+ 0x75, 0x04, # ..Report Size (4) 79
+ 0x81, 0x42, # ..Input (Data,Var,Abs,Null) 81
+ 0x65, 0x00, # ..Unit (None) 83
+ 0xc0, # .End Collection 85
+ 0x05, 0x0f, # .Usage Page (Vendor Usage Page 0x0f) 86
+ 0x09, 0x92, # .Usage (Vendor Usage 0x92) 88
+ 0xa1, 0x02, # .Collection (Logical) 90
+ 0x85, 0x02, # ..Report ID (2) 92
+ 0x09, 0xa0, # ..Usage (Vendor Usage 0xa0) 94
+ 0x09, 0x9f, # ..Usage (Vendor Usage 0x9f) 96
+ 0x25, 0x01, # ..Logical Maximum (1) 98
+ 0x45, 0x00, # ..Physical Maximum (0) 100
+ 0x75, 0x01, # ..Report Size (1) 102
+ 0x95, 0x02, # ..Report Count (2) 104
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 106
+ 0x75, 0x06, # ..Report Size (6) 108
+ 0x95, 0x01, # ..Report Count (1) 110
+ 0x81, 0x03, # ..Input (Cnst,Var,Abs) 112
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 114
+ 0x75, 0x07, # ..Report Size (7) 116
+ 0x25, 0x7f, # ..Logical Maximum (127) 118
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 120
+ 0x09, 0x94, # ..Usage (Vendor Usage 0x94) 122
+ 0x75, 0x01, # ..Report Size (1) 124
+ 0x25, 0x01, # ..Logical Maximum (1) 126
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 128
+ 0xc0, # .End Collection 130
+ 0x09, 0x21, # .Usage (Vendor Usage 0x21) 131
+ 0xa1, 0x02, # .Collection (Logical) 133
+ 0x85, 0x0b, # ..Report ID (11) 135
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 137
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 139
+ 0x75, 0x08, # ..Report Size (8) 142
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 144
+ 0x09, 0x53, # ..Usage (Vendor Usage 0x53) 146
+ 0x25, 0x0a, # ..Logical Maximum (10) 148
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 150
+ 0x09, 0x50, # ..Usage (Vendor Usage 0x50) 152
+ 0x27, 0xfe, 0xff, 0x00, 0x00, # ..Logical Maximum (65534) 154
+ 0x47, 0xfe, 0xff, 0x00, 0x00, # ..Physical Maximum (65534) 159
+ 0x75, 0x10, # ..Report Size (16) 164
+ 0x55, 0xfd, # ..Unit Exponent (237) 166
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 168
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 171
+ 0x55, 0x00, # ..Unit Exponent (0) 173
+ 0x65, 0x00, # ..Unit (None) 175
+ 0x09, 0x54, # ..Usage (Vendor Usage 0x54) 177
+ 0x55, 0xfd, # ..Unit Exponent (237) 179
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 181
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 184
+ 0x55, 0x00, # ..Unit Exponent (0) 186
+ 0x65, 0x00, # ..Unit (None) 188
+ 0x09, 0xa7, # ..Usage (Vendor Usage 0xa7) 190
+ 0x55, 0xfd, # ..Unit Exponent (237) 192
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 194
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 197
+ 0x55, 0x00, # ..Unit Exponent (0) 199
+ 0x65, 0x00, # ..Unit (None) 201
+ 0xc0, # .End Collection 203
+ 0x09, 0x5a, # .Usage (Vendor Usage 0x5a) 204
+ 0xa1, 0x02, # .Collection (Logical) 206
+ 0x85, 0x0c, # ..Report ID (12) 208
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 210
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 212
+ 0x45, 0x00, # ..Physical Maximum (0) 215
+ 0x75, 0x08, # ..Report Size (8) 217
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 219
+ 0x09, 0x5c, # ..Usage (Vendor Usage 0x5c) 221
+ 0x26, 0x10, 0x27, # ..Logical Maximum (10000) 223
+ 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 226
+ 0x75, 0x10, # ..Report Size (16) 229
+ 0x55, 0xfd, # ..Unit Exponent (237) 231
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 233
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 236
+ 0x55, 0x00, # ..Unit Exponent (0) 238
+ 0x65, 0x00, # ..Unit (None) 240
+ 0x09, 0x5b, # ..Usage (Vendor Usage 0x5b) 242
+ 0x25, 0x7f, # ..Logical Maximum (127) 244
+ 0x75, 0x08, # ..Report Size (8) 246
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 248
+ 0x09, 0x5e, # ..Usage (Vendor Usage 0x5e) 250
+ 0x26, 0x10, 0x27, # ..Logical Maximum (10000) 252
+ 0x75, 0x10, # ..Report Size (16) 255
+ 0x55, 0xfd, # ..Unit Exponent (237) 257
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 259
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 262
+ 0x55, 0x00, # ..Unit Exponent (0) 264
+ 0x65, 0x00, # ..Unit (None) 266
+ 0x09, 0x5d, # ..Usage (Vendor Usage 0x5d) 268
+ 0x25, 0x7f, # ..Logical Maximum (127) 270
+ 0x75, 0x08, # ..Report Size (8) 272
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 274
+ 0xc0, # .End Collection 276
+ 0x09, 0x73, # .Usage (Vendor Usage 0x73) 277
+ 0xa1, 0x02, # .Collection (Logical) 279
+ 0x85, 0x0d, # ..Report ID (13) 281
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 283
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 285
+ 0x45, 0x00, # ..Physical Maximum (0) 288
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 290
+ 0x09, 0x70, # ..Usage (Vendor Usage 0x70) 292
+ 0x15, 0x81, # ..Logical Minimum (-127) 294
+ 0x25, 0x7f, # ..Logical Maximum (127) 296
+ 0x36, 0xf0, 0xd8, # ..Physical Minimum (-10000) 298
+ 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 301
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 304
+ 0xc0, # .End Collection 306
+ 0x09, 0x6e, # .Usage (Vendor Usage 0x6e) 307
+ 0xa1, 0x02, # .Collection (Logical) 309
+ 0x85, 0x0e, # ..Report ID (14) 311
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 313
+ 0x15, 0x00, # ..Logical Minimum (0) 315
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 317
+ 0x35, 0x00, # ..Physical Minimum (0) 320
+ 0x45, 0x00, # ..Physical Maximum (0) 322
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 324
+ 0x09, 0x70, # ..Usage (Vendor Usage 0x70) 326
+ 0x25, 0x7f, # ..Logical Maximum (127) 328
+ 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 330
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 333
+ 0x09, 0x6f, # ..Usage (Vendor Usage 0x6f) 335
+ 0x15, 0x81, # ..Logical Minimum (-127) 337
+ 0x36, 0xf0, 0xd8, # ..Physical Minimum (-10000) 339
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 342
+ 0x09, 0x71, # ..Usage (Vendor Usage 0x71) 344
+ 0x15, 0x00, # ..Logical Minimum (0) 346
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 348
+ 0x35, 0x00, # ..Physical Minimum (0) 351
+ 0x46, 0x68, 0x01, # ..Physical Maximum (360) 353
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 356
+ 0x09, 0x72, # ..Usage (Vendor Usage 0x72) 358
+ 0x75, 0x10, # ..Report Size (16) 360
+ 0x26, 0x10, 0x27, # ..Logical Maximum (10000) 362
+ 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 365
+ 0x55, 0xfd, # ..Unit Exponent (237) 368
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 370
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 373
+ 0x55, 0x00, # ..Unit Exponent (0) 375
+ 0x65, 0x00, # ..Unit (None) 377
+ 0xc0, # .End Collection 379
+ 0x09, 0x77, # .Usage (Vendor Usage 0x77) 380
+ 0xa1, 0x02, # .Collection (Logical) 382
+ 0x85, 0x51, # ..Report ID (81) 384
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 386
+ 0x25, 0x7f, # ..Logical Maximum (127) 388
+ 0x45, 0x00, # ..Physical Maximum (0) 390
+ 0x75, 0x08, # ..Report Size (8) 392
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 394
+ 0x09, 0x78, # ..Usage (Vendor Usage 0x78) 396
+ 0xa1, 0x02, # ..Collection (Logical) 398
+ 0x09, 0x7b, # ...Usage (Vendor Usage 0x7b) 400
+ 0x09, 0x79, # ...Usage (Vendor Usage 0x79) 402
+ 0x09, 0x7a, # ...Usage (Vendor Usage 0x7a) 404
+ 0x15, 0x01, # ...Logical Minimum (1) 406
+ 0x25, 0x03, # ...Logical Maximum (3) 408
+ 0x91, 0x00, # ...Output (Data,Arr,Abs) 410
+ 0xc0, # ..End Collection 412
+ 0x09, 0x7c, # ..Usage (Vendor Usage 0x7c) 413
+ 0x15, 0x00, # ..Logical Minimum (0) 415
+ 0x26, 0xfe, 0x00, # ..Logical Maximum (254) 417
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 420
+ 0xc0, # .End Collection 422
+ 0x09, 0x92, # .Usage (Vendor Usage 0x92) 423
+ 0xa1, 0x02, # .Collection (Logical) 425
+ 0x85, 0x52, # ..Report ID (82) 427
+ 0x09, 0x96, # ..Usage (Vendor Usage 0x96) 429
+ 0xa1, 0x02, # ..Collection (Logical) 431
+ 0x09, 0x9a, # ...Usage (Vendor Usage 0x9a) 433
+ 0x09, 0x99, # ...Usage (Vendor Usage 0x99) 435
+ 0x09, 0x97, # ...Usage (Vendor Usage 0x97) 437
+ 0x09, 0x98, # ...Usage (Vendor Usage 0x98) 439
+ 0x09, 0x9b, # ...Usage (Vendor Usage 0x9b) 441
+ 0x09, 0x9c, # ...Usage (Vendor Usage 0x9c) 443
+ 0x15, 0x01, # ...Logical Minimum (1) 445
+ 0x25, 0x06, # ...Logical Maximum (6) 447
+ 0x91, 0x00, # ...Output (Data,Arr,Abs) 449
+ 0xc0, # ..End Collection 451
+ 0xc0, # .End Collection 452
+ 0x05, 0xff, # .Usage Page (Vendor Usage Page 0xff) 453
+ 0x0a, 0x01, 0x03, # .Usage (Vendor Usage 0x301) 455
+ 0xa1, 0x02, # .Collection (Logical) 458
+ 0x85, 0x40, # ..Report ID (64) 460
+ 0x0a, 0x02, 0x03, # ..Usage (Vendor Usage 0x302) 462
+ 0xa1, 0x02, # ..Collection (Logical) 465
+ 0x1a, 0x11, 0x03, # ...Usage Minimum (785) 467
+ 0x2a, 0x20, 0x03, # ...Usage Maximum (800) 470
+ 0x25, 0x10, # ...Logical Maximum (16) 473
+ 0x91, 0x00, # ...Output (Data,Arr,Abs) 475
+ 0xc0, # ..End Collection 477
+ 0x0a, 0x03, 0x03, # ..Usage (Vendor Usage 0x303) 478
+ 0x15, 0x00, # ..Logical Minimum (0) 481
+ 0x27, 0xff, 0xff, 0x00, 0x00, # ..Logical Maximum (65535) 483
+ 0x75, 0x10, # ..Report Size (16) 488
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 490
+ 0xc0, # .End Collection 492
+ 0x05, 0x0f, # .Usage Page (Vendor Usage Page 0x0f) 493
+ 0x09, 0x7d, # .Usage (Vendor Usage 0x7d) 495
+ 0xa1, 0x02, # .Collection (Logical) 497
+ 0x85, 0x43, # ..Report ID (67) 499
+ 0x09, 0x7e, # ..Usage (Vendor Usage 0x7e) 501
+ 0x26, 0x80, 0x00, # ..Logical Maximum (128) 503
+ 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 506
+ 0x75, 0x08, # ..Report Size (8) 509
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 511
+ 0xc0, # .End Collection 513
+ 0x09, 0x7f, # .Usage (Vendor Usage 0x7f) 514
+ 0xa1, 0x02, # .Collection (Logical) 516
+ 0x85, 0x0b, # ..Report ID (11) 518
+ 0x09, 0x80, # ..Usage (Vendor Usage 0x80) 520
+ 0x26, 0xff, 0x7f, # ..Logical Maximum (32767) 522
+ 0x45, 0x00, # ..Physical Maximum (0) 525
+ 0x75, 0x0f, # ..Report Size (15) 527
+ 0xb1, 0x03, # ..Feature (Cnst,Var,Abs) 529
+ 0x09, 0xa9, # ..Usage (Vendor Usage 0xa9) 531
+ 0x25, 0x01, # ..Logical Maximum (1) 533
+ 0x75, 0x01, # ..Report Size (1) 535
+ 0xb1, 0x03, # ..Feature (Cnst,Var,Abs) 537
+ 0x09, 0x83, # ..Usage (Vendor Usage 0x83) 539
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 541
+ 0x75, 0x08, # ..Report Size (8) 544
+ 0xb1, 0x03, # ..Feature (Cnst,Var,Abs) 546
+ 0xc0, # .End Collection 548
+ 0x09, 0xab, # .Usage (Vendor Usage 0xab) 549
+ 0xa1, 0x03, # .Collection (Report) 551
+ 0x85, 0x15, # ..Report ID (21) 553
+ 0x09, 0x25, # ..Usage (Vendor Usage 0x25) 555
+ 0xa1, 0x02, # ..Collection (Logical) 557
+ 0x09, 0x26, # ...Usage (Vendor Usage 0x26) 559
+ 0x09, 0x30, # ...Usage (Vendor Usage 0x30) 561
+ 0x09, 0x32, # ...Usage (Vendor Usage 0x32) 563
+ 0x09, 0x31, # ...Usage (Vendor Usage 0x31) 565
+ 0x09, 0x33, # ...Usage (Vendor Usage 0x33) 567
+ 0x09, 0x34, # ...Usage (Vendor Usage 0x34) 569
+ 0x15, 0x01, # ...Logical Minimum (1) 571
+ 0x25, 0x06, # ...Logical Maximum (6) 573
+ 0xb1, 0x00, # ...Feature (Data,Arr,Abs) 575
+ 0xc0, # ..End Collection 577
+ 0xc0, # .End Collection 578
+ 0x09, 0x89, # .Usage (Vendor Usage 0x89) 579
+ 0xa1, 0x03, # .Collection (Report) 581
+ 0x85, 0x16, # ..Report ID (22) 583
+ 0x09, 0x8b, # ..Usage (Vendor Usage 0x8b) 585
+ 0xa1, 0x02, # ..Collection (Logical) 587
+ 0x09, 0x8c, # ...Usage (Vendor Usage 0x8c) 589
+ 0x09, 0x8d, # ...Usage (Vendor Usage 0x8d) 591
+ 0x09, 0x8e, # ...Usage (Vendor Usage 0x8e) 593
+ 0x25, 0x03, # ...Logical Maximum (3) 595
+ 0xb1, 0x00, # ...Feature (Data,Arr,Abs) 597
+ 0xc0, # ..End Collection 599
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 600
+ 0x15, 0x00, # ..Logical Minimum (0) 602
+ 0x26, 0xfe, 0x00, # ..Logical Maximum (254) 604
+ 0xb1, 0x02, # ..Feature (Data,Var,Abs) 607
+ 0xc0, # .End Collection 609
+ 0x09, 0x90, # .Usage (Vendor Usage 0x90) 610
+ 0xa1, 0x03, # .Collection (Report) 612
+ 0x85, 0x50, # ..Report ID (80) 614
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 616
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 618
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 621
+ 0xc0, # .End Collection 623
+ 0xc0, # End Collection 624
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None):
+ super().__init__(rdesc, name=name, input_info=(BusType.USB, 0x06A3, 0xFF0D))
+ self.buttons = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
+
+
+class AsusGamepad(BaseGamepad):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop) 0
+ 0x09, 0x05, # Usage (Game Pad) 2
+ 0xa1, 0x01, # Collection (Application) 4
+ 0x85, 0x01, # .Report ID (1) 6
+ 0x05, 0x09, # .Usage Page (Button) 8
+ 0x0a, 0x01, 0x00, # .Usage (Vendor Usage 0x01) 10
+ 0x0a, 0x02, 0x00, # .Usage (Vendor Usage 0x02) 13
+ 0x0a, 0x04, 0x00, # .Usage (Vendor Usage 0x04) 16
+ 0x0a, 0x05, 0x00, # .Usage (Vendor Usage 0x05) 19
+ 0x0a, 0x07, 0x00, # .Usage (Vendor Usage 0x07) 22
+ 0x0a, 0x08, 0x00, # .Usage (Vendor Usage 0x08) 25
+ 0x0a, 0x0e, 0x00, # .Usage (Vendor Usage 0x0e) 28
+ 0x0a, 0x0f, 0x00, # .Usage (Vendor Usage 0x0f) 31
+ 0x0a, 0x0d, 0x00, # .Usage (Vendor Usage 0x0d) 34
+ 0x05, 0x0c, # .Usage Page (Consumer Devices) 37
+ 0x0a, 0x24, 0x02, # .Usage (AC Back) 39
+ 0x0a, 0x23, 0x02, # .Usage (AC Home) 42
+ 0x15, 0x00, # .Logical Minimum (0) 45
+ 0x25, 0x01, # .Logical Maximum (1) 47
+ 0x75, 0x01, # .Report Size (1) 49
+ 0x95, 0x0b, # .Report Count (11) 51
+ 0x81, 0x02, # .Input (Data,Var,Abs) 53
+ 0x75, 0x01, # .Report Size (1) 55
+ 0x95, 0x01, # .Report Count (1) 57
+ 0x81, 0x03, # .Input (Cnst,Var,Abs) 59
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 61
+ 0x75, 0x04, # .Report Size (4) 63
+ 0x95, 0x01, # .Report Count (1) 65
+ 0x25, 0x07, # .Logical Maximum (7) 67
+ 0x46, 0x3b, 0x01, # .Physical Maximum (315) 69
+ 0x66, 0x14, 0x00, # .Unit (Degrees,EngRotation) 72
+ 0x09, 0x39, # .Usage (Hat switch) 75
+ 0x81, 0x42, # .Input (Data,Var,Abs,Null) 77
+ 0x66, 0x00, 0x00, # .Unit (None) 79
+ 0x09, 0x01, # .Usage (Pointer) 82
+ 0xa1, 0x00, # .Collection (Physical) 84
+ 0x09, 0x30, # ..Usage (X) 86
+ 0x09, 0x31, # ..Usage (Y) 88
+ 0x09, 0x32, # ..Usage (Z) 90
+ 0x09, 0x35, # ..Usage (Rz) 92
+ 0x05, 0x02, # ..Usage Page (Simulation Controls) 94
+ 0x09, 0xc5, # ..Usage (Brake) 96
+ 0x09, 0xc4, # ..Usage (Accelerator) 98
+ 0x15, 0x00, # ..Logical Minimum (0) 100
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 102
+ 0x35, 0x00, # ..Physical Minimum (0) 105
+ 0x46, 0xff, 0x00, # ..Physical Maximum (255) 107
+ 0x75, 0x08, # ..Report Size (8) 110
+ 0x95, 0x06, # ..Report Count (6) 112
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 114
+ 0xc0, # .End Collection 116
+ 0x85, 0x02, # .Report ID (2) 117
+ 0x05, 0x08, # .Usage Page (LEDs) 119
+ 0x0a, 0x01, 0x00, # .Usage (Num Lock) 121
+ 0x0a, 0x02, 0x00, # .Usage (Caps Lock) 124
+ 0x0a, 0x03, 0x00, # .Usage (Scroll Lock) 127
+ 0x0a, 0x04, 0x00, # .Usage (Compose) 130
+ 0x15, 0x00, # .Logical Minimum (0) 133
+ 0x25, 0x01, # .Logical Maximum (1) 135
+ 0x75, 0x01, # .Report Size (1) 137
+ 0x95, 0x04, # .Report Count (4) 139
+ 0x91, 0x02, # .Output (Data,Var,Abs) 141
+ 0x75, 0x04, # .Report Size (4) 143
+ 0x95, 0x01, # .Report Count (1) 145
+ 0x91, 0x03, # .Output (Cnst,Var,Abs) 147
+ 0xc0, # End Collection 149
+ 0x05, 0x0c, # Usage Page (Consumer Devices) 150
+ 0x09, 0x01, # Usage (Consumer Control) 152
+ 0xa1, 0x01, # Collection (Application) 154
+ 0x85, 0x03, # .Report ID (3) 156
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 158
+ 0x09, 0x06, # .Usage (Keyboard) 160
+ 0xa1, 0x02, # .Collection (Logical) 162
+ 0x05, 0x06, # ..Usage Page (Generic Device Controls) 164
+ 0x09, 0x20, # ..Usage (Battery Strength) 166
+ 0x15, 0x00, # ..Logical Minimum (0) 168
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 170
+ 0x75, 0x08, # ..Report Size (8) 173
+ 0x95, 0x01, # ..Report Count (1) 175
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 177
+ 0x06, 0xbc, 0xff, # ..Usage Page (Vendor Usage Page 0xffbc) 179
+ 0x0a, 0xad, 0xbd, # ..Usage (Vendor Usage 0xbdad) 182
+ 0x75, 0x08, # ..Report Size (8) 185
+ 0x95, 0x06, # ..Report Count (6) 187
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 189
+ 0xc0, # .End Collection 191
+ 0xc0, # End Collection 192
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None):
+ super().__init__(rdesc, name=name, input_info=(BusType.USB, 0x18D1, 0x2C40))
+ self.buttons = (1, 2, 4, 5, 7, 8, 14, 15, 13)
+
+
+class RaptorMach2Joystick(JoystickGamepad):
+ axes_map = {
+ "left_stick": {
+ "x": AxisMapping("x"),
+ "y": AxisMapping("y"),
+ },
+ "right_stick": {
+ "x": AxisMapping("z"),
+ "y": AxisMapping("Rz"),
+ },
+ }
+
+ def __init__(
+ self,
+ name,
+ rdesc=None,
+ application="Joystick",
+ input_info=(BusType.USB, 0x11C0, 0x5606),
+ ):
+ super().__init__(rdesc, application, name, input_info)
+ self.buttons = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
+ self.hat_switch = 240 # null value is 240 as max is 239
+
+ def event(
+ self, *, left=(None, None), right=(None, None), hat_switch=None, buttons=None
+ ):
+ if hat_switch is not None:
+ hat_switch *= 30
+
+ return super().event(
+ left=left, right=right, hat_switch=hat_switch, buttons=buttons
+ )
+
+
class TestSaitekGamepad(BaseTest.TestGamepad):
def create_device(self):
return SaitekGamepad()
@@ -207,3 +651,14 @@ class TestSaitekGamepad(BaseTest.TestGamepad):
class TestAsusGamepad(BaseTest.TestGamepad):
def create_device(self):
return AsusGamepad()
+
+
+class TestRaptorMach2Joystick(BaseTest.TestGamepad):
+ hid_bpfs = [("FR-TEC__Raptor-Mach-2.bpf.o", True)]
+
+ def create_device(self):
+ return RaptorMach2Joystick(
+ "uhid test Sanmos Group FR-TEC Raptor MACH 2",
+ rdesc="05 01 09 04 a1 01 05 01 85 01 05 01 09 30 75 10 95 01 15 00 26 ff 07 46 ff 07 81 02 05 01 09 31 75 10 95 01 15 00 26 ff 07 46 ff 07 81 02 05 01 09 33 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 00 09 00 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 01 09 32 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 01 09 35 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 01 09 34 75 10 95 01 15 00 26 ff 07 46 ff 07 81 02 05 01 09 36 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 09 19 01 2a 1d 00 15 00 25 01 75 01 96 80 00 81 02 05 01 09 39 26 ef 00 46 68 01 65 14 75 10 95 01 81 42 05 01 09 00 75 08 95 1d 81 01 15 00 26 ef 00 85 58 26 ff 00 46 ff 00 75 08 95 3f 09 00 91 02 85 59 75 08 95 80 09 00 b1 02 c0",
+ input_info=(BusType.USB, 0x11C0, 0x5606),
+ )
diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py
index 903f19f7cb..a9e2de1e88 100644
--- a/tools/testing/selftests/hid/tests/test_tablet.py
+++ b/tools/testing/selftests/hid/tests/test_tablet.py
@@ -35,6 +35,7 @@ class BtnPressed(Enum):
PRIMARY_PRESSED = libevdev.EV_KEY.BTN_STYLUS
SECONDARY_PRESSED = libevdev.EV_KEY.BTN_STYLUS2
+ THIRD_PRESSED = libevdev.EV_KEY.BTN_STYLUS3
class PenState(Enum):
@@ -44,58 +45,28 @@ class PenState(Enum):
We extend it with the various buttons when we need to check them.
"""
- PEN_IS_OUT_OF_RANGE = BtnTouch.UP, None, None
- PEN_IS_IN_RANGE = BtnTouch.UP, ToolType.PEN, None
- PEN_IS_IN_RANGE_WITH_BUTTON = BtnTouch.UP, ToolType.PEN, BtnPressed.PRIMARY_PRESSED
- PEN_IS_IN_RANGE_WITH_SECOND_BUTTON = (
- BtnTouch.UP,
- ToolType.PEN,
- BtnPressed.SECONDARY_PRESSED,
- )
- PEN_IS_IN_CONTACT = BtnTouch.DOWN, ToolType.PEN, None
- PEN_IS_IN_CONTACT_WITH_BUTTON = (
- BtnTouch.DOWN,
- ToolType.PEN,
- BtnPressed.PRIMARY_PRESSED,
- )
- PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON = (
- BtnTouch.DOWN,
- ToolType.PEN,
- BtnPressed.SECONDARY_PRESSED,
- )
- PEN_IS_IN_RANGE_WITH_ERASING_INTENT = BtnTouch.UP, ToolType.RUBBER, None
- PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON = (
- BtnTouch.UP,
- ToolType.RUBBER,
- BtnPressed.PRIMARY_PRESSED,
- )
- PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_SECOND_BUTTON = (
- BtnTouch.UP,
- ToolType.RUBBER,
- BtnPressed.SECONDARY_PRESSED,
- )
- PEN_IS_ERASING = BtnTouch.DOWN, ToolType.RUBBER, None
- PEN_IS_ERASING_WITH_BUTTON = (
- BtnTouch.DOWN,
- ToolType.RUBBER,
- BtnPressed.PRIMARY_PRESSED,
- )
- PEN_IS_ERASING_WITH_SECOND_BUTTON = (
- BtnTouch.DOWN,
- ToolType.RUBBER,
- BtnPressed.SECONDARY_PRESSED,
- )
-
- def __init__(self, touch: BtnTouch, tool: Optional[ToolType], button: Optional[BtnPressed]):
+ PEN_IS_OUT_OF_RANGE = BtnTouch.UP, None, False
+ PEN_IS_IN_RANGE = BtnTouch.UP, ToolType.PEN, False
+ PEN_IS_IN_RANGE_WITH_BUTTON = BtnTouch.UP, ToolType.PEN, True
+ PEN_IS_IN_CONTACT = BtnTouch.DOWN, ToolType.PEN, False
+ PEN_IS_IN_CONTACT_WITH_BUTTON = BtnTouch.DOWN, ToolType.PEN, True
+ PEN_IS_IN_RANGE_WITH_ERASING_INTENT = BtnTouch.UP, ToolType.RUBBER, False
+ PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON = BtnTouch.UP, ToolType.RUBBER, True
+ PEN_IS_ERASING = BtnTouch.DOWN, ToolType.RUBBER, False
+ PEN_IS_ERASING_WITH_BUTTON = BtnTouch.DOWN, ToolType.RUBBER, True
+
+ def __init__(
+ self, touch: BtnTouch, tool: Optional[ToolType], button: Optional[bool]
+ ):
self.touch = touch # type: ignore
self.tool = tool # type: ignore
self.button = button # type: ignore
@classmethod
- def from_evdev(cls, evdev) -> "PenState":
+ def from_evdev(cls, evdev, test_button) -> "PenState":
touch = BtnTouch(evdev.value[libevdev.EV_KEY.BTN_TOUCH])
tool = None
- button = None
+ button = False
if (
evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER]
and not evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN]
@@ -112,19 +83,20 @@ class PenState(Enum):
):
raise ValueError("2 tools are not allowed")
- # we take only the highest button in account
- for b in [libevdev.EV_KEY.BTN_STYLUS, libevdev.EV_KEY.BTN_STYLUS2]:
- if bool(evdev.value[b]):
- button = BtnPressed(b)
+ # we take only the provided button into account
+ if test_button is not None:
+ button = bool(evdev.value[test_button.value])
# the kernel tends to insert an EV_SYN once removing the tool, so
# the button will be released after
if tool is None:
- button = None
+ button = False
return cls((touch, tool, button)) # type: ignore
- def apply(self, events: List[libevdev.InputEvent], strict: bool) -> "PenState":
+ def apply(
+ self, events: List[libevdev.InputEvent], strict: bool, test_button: BtnPressed
+ ) -> "PenState":
if libevdev.EV_SYN.SYN_REPORT in events:
raise ValueError("EV_SYN is in the event sequence")
touch = self.touch
@@ -148,19 +120,16 @@ class PenState(Enum):
raise ValueError(f"duplicated BTN_TOOL_* in {events}")
tool_found = True
tool = ToolType(ev.code) if ev.value else None
- elif ev in (
- libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS),
- libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS2),
- ):
+ elif test_button is not None and ev in (test_button.value,):
if button_found:
raise ValueError(f"duplicated BTN_STYLUS* in {events}")
button_found = True
- button = BtnPressed(ev.code) if ev.value else None
+ button = bool(ev.value)
# the kernel tends to insert an EV_SYN once removing the tool, so
# the button will be released after
if tool is None:
- button = None
+ button = False
new_state = PenState((touch, tool, button)) # type: ignore
if strict:
@@ -183,11 +152,9 @@ class PenState(Enum):
PenState.PEN_IS_OUT_OF_RANGE,
PenState.PEN_IS_IN_RANGE,
PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
PenState.PEN_IS_IN_CONTACT,
PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
PenState.PEN_IS_ERASING,
)
@@ -195,7 +162,6 @@ class PenState(Enum):
return (
PenState.PEN_IS_IN_RANGE,
PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
PenState.PEN_IS_OUT_OF_RANGE,
PenState.PEN_IS_IN_CONTACT,
)
@@ -204,7 +170,6 @@ class PenState(Enum):
return (
PenState.PEN_IS_IN_CONTACT,
PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
PenState.PEN_IS_IN_RANGE,
)
@@ -236,21 +201,6 @@ class PenState(Enum):
PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
)
- if self == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON:
- return (
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_OUT_OF_RANGE,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- )
-
- if self == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON:
- return (
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_CONTACT,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- )
-
return tuple()
def historically_tolerated_transitions(self) -> Tuple["PenState", ...]:
@@ -263,11 +213,9 @@ class PenState(Enum):
PenState.PEN_IS_OUT_OF_RANGE,
PenState.PEN_IS_IN_RANGE,
PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
PenState.PEN_IS_IN_CONTACT,
PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
PenState.PEN_IS_ERASING,
)
@@ -275,7 +223,6 @@ class PenState(Enum):
return (
PenState.PEN_IS_IN_RANGE,
PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
PenState.PEN_IS_OUT_OF_RANGE,
PenState.PEN_IS_IN_CONTACT,
)
@@ -284,7 +231,6 @@ class PenState(Enum):
return (
PenState.PEN_IS_IN_CONTACT,
PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
PenState.PEN_IS_IN_RANGE,
PenState.PEN_IS_OUT_OF_RANGE,
)
@@ -319,22 +265,6 @@ class PenState(Enum):
PenState.PEN_IS_OUT_OF_RANGE,
)
- if self == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON:
- return (
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_OUT_OF_RANGE,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- )
-
- if self == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON:
- return (
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_CONTACT,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_OUT_OF_RANGE,
- )
-
return tuple()
@staticmethod
@@ -402,9 +332,9 @@ class PenState(Enum):
}
@staticmethod
- def legal_transitions_with_primary_button() -> Dict[str, Tuple["PenState", ...]]:
+ def legal_transitions_with_button() -> Dict[str, Tuple["PenState", ...]]:
"""We revisit the Windows Pen Implementation state machine:
- we now have a primary button.
+ we now have a button.
"""
return {
"hover-button": (PenState.PEN_IS_IN_RANGE_WITH_BUTTON,),
@@ -451,56 +381,6 @@ class PenState(Enum):
}
@staticmethod
- def legal_transitions_with_secondary_button() -> Dict[str, Tuple["PenState", ...]]:
- """We revisit the Windows Pen Implementation state machine:
- we now have a secondary button.
- Note: we don't looks for 2 buttons interactions.
- """
- return {
- "hover-button": (PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,),
- "hover-button -> out-of-range": (
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_OUT_OF_RANGE,
- ),
- "in-range -> button-press": (
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- ),
- "in-range -> button-press -> button-release": (
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_RANGE,
- ),
- "in-range -> touch -> button-press -> button-release": (
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_IN_CONTACT,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_CONTACT,
- ),
- "in-range -> touch -> button-press -> release -> button-release": (
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_IN_CONTACT,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_RANGE,
- ),
- "in-range -> button-press -> touch -> release -> button-release": (
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_RANGE,
- ),
- "in-range -> button-press -> touch -> button-release -> release": (
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_CONTACT,
- PenState.PEN_IS_IN_RANGE,
- ),
- }
-
- @staticmethod
def tolerated_transitions() -> Dict[str, Tuple["PenState", ...]]:
"""This is not adhering to the Windows Pen Implementation state machine
but we should expect the kernel to behave properly, mostly for historical
@@ -616,10 +496,22 @@ class Pen(object):
evdev.value[axis] == value
), f"assert evdev.value[{axis}] ({evdev.value[axis]}) != {value}"
- def assert_expected_input_events(self, evdev):
+ def assert_expected_input_events(self, evdev, button):
assert evdev.value[libevdev.EV_ABS.ABS_X] == self.x
assert evdev.value[libevdev.EV_ABS.ABS_Y] == self.y
- assert self.current_state == PenState.from_evdev(evdev)
+
+ # assert no other buttons than the tested ones are set
+ buttons = [
+ BtnPressed.PRIMARY_PRESSED,
+ BtnPressed.SECONDARY_PRESSED,
+ BtnPressed.THIRD_PRESSED,
+ ]
+ if button is not None:
+ buttons.remove(button)
+ for b in buttons:
+ assert evdev.value[b.value] is None or evdev.value[b.value] == False
+
+ assert self.current_state == PenState.from_evdev(evdev, button)
class PenDigitizer(base.UHIDTestDevice):
@@ -647,7 +539,7 @@ class PenDigitizer(base.UHIDTestDevice):
continue
self.fields = [f.usage_name for f in r]
- def move_to(self, pen, state):
+ def move_to(self, pen, state, button):
# fill in the previous values
if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
pen.restore()
@@ -690,29 +582,17 @@ class PenDigitizer(base.UHIDTestDevice):
pen.inrange = True
pen.invert = False
pen.eraser = False
- pen.barrelswitch = True
- pen.secondarybarrelswitch = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.secondarybarrelswitch = button == BtnPressed.SECONDARY_PRESSED
elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
pen.tipswitch = True
pen.inrange = True
pen.invert = False
pen.eraser = False
- pen.barrelswitch = True
- pen.secondarybarrelswitch = False
- elif state == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON:
- pen.tipswitch = False
- pen.inrange = True
- pen.invert = False
- pen.eraser = False
- pen.barrelswitch = False
- pen.secondarybarrelswitch = True
- elif state == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON:
- pen.tipswitch = True
- pen.inrange = True
- pen.invert = False
- pen.eraser = False
- pen.barrelswitch = False
- pen.secondarybarrelswitch = True
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.secondarybarrelswitch = button == BtnPressed.SECONDARY_PRESSED
elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT:
pen.tipswitch = False
pen.inrange = True
@@ -730,7 +610,7 @@ class PenDigitizer(base.UHIDTestDevice):
pen.current_state = state
- def event(self, pen):
+ def event(self, pen, button):
rs = []
r = self.create_report(application=self.cur_application, data=pen)
self.call_input_event(r)
@@ -771,17 +651,17 @@ class BaseTest:
def create_device(self):
raise Exception("please reimplement me in subclasses")
- def post(self, uhdev, pen):
- r = uhdev.event(pen)
+ def post(self, uhdev, pen, test_button):
+ r = uhdev.event(pen, test_button)
events = uhdev.next_sync_events()
self.debug_reports(r, uhdev, events)
return events
def validate_transitions(
- self, from_state, pen, evdev, events, allow_intermediate_states
+ self, from_state, pen, evdev, events, allow_intermediate_states, button
):
# check that the final state is correct
- pen.assert_expected_input_events(evdev)
+ pen.assert_expected_input_events(evdev, button)
state = from_state
@@ -794,12 +674,14 @@ class BaseTest:
events = events[idx + 1 :]
# now check for a valid transition
- state = state.apply(sync_events, not allow_intermediate_states)
+ state = state.apply(sync_events, not allow_intermediate_states, button)
if events:
- state = state.apply(sync_events, not allow_intermediate_states)
+ state = state.apply(sync_events, not allow_intermediate_states, button)
- def _test_states(self, state_list, scribble, allow_intermediate_states):
+ def _test_states(
+ self, state_list, scribble, allow_intermediate_states, button=None
+ ):
"""Internal method to test against a list of
transition between states.
state_list is a list of PenState objects
@@ -812,10 +694,10 @@ class BaseTest:
cur_state = PenState.PEN_IS_OUT_OF_RANGE
p = Pen(50, 60)
- uhdev.move_to(p, PenState.PEN_IS_OUT_OF_RANGE)
- events = self.post(uhdev, p)
+ uhdev.move_to(p, PenState.PEN_IS_OUT_OF_RANGE, button)
+ events = self.post(uhdev, p, button)
self.validate_transitions(
- cur_state, p, evdev, events, allow_intermediate_states
+ cur_state, p, evdev, events, allow_intermediate_states, button
)
cur_state = p.current_state
@@ -824,18 +706,18 @@ class BaseTest:
if scribble and cur_state != PenState.PEN_IS_OUT_OF_RANGE:
p.x += 1
p.y -= 1
- events = self.post(uhdev, p)
+ events = self.post(uhdev, p, button)
self.validate_transitions(
- cur_state, p, evdev, events, allow_intermediate_states
+ cur_state, p, evdev, events, allow_intermediate_states, button
)
assert len(events) >= 3 # X, Y, SYN
- uhdev.move_to(p, state)
+ uhdev.move_to(p, state, button)
if scribble and state != PenState.PEN_IS_OUT_OF_RANGE:
p.x += 1
p.y -= 1
- events = self.post(uhdev, p)
+ events = self.post(uhdev, p, button)
self.validate_transitions(
- cur_state, p, evdev, events, allow_intermediate_states
+ cur_state, p, evdev, events, allow_intermediate_states, button
)
cur_state = p.current_state
@@ -874,12 +756,17 @@ class BaseTest:
"state_list",
[
pytest.param(v, id=k)
- for k, v in PenState.legal_transitions_with_primary_button().items()
+ for k, v in PenState.legal_transitions_with_button().items()
],
)
def test_valid_primary_button_pen_states(self, state_list, scribble):
"""Rework the transition state machine by adding the primary button."""
- self._test_states(state_list, scribble, allow_intermediate_states=False)
+ self._test_states(
+ state_list,
+ scribble,
+ allow_intermediate_states=False,
+ button=BtnPressed.PRIMARY_PRESSED,
+ )
@pytest.mark.skip_if_uhdev(
lambda uhdev: "Secondary Barrel Switch" not in uhdev.fields,
@@ -890,12 +777,38 @@ class BaseTest:
"state_list",
[
pytest.param(v, id=k)
- for k, v in PenState.legal_transitions_with_secondary_button().items()
+ for k, v in PenState.legal_transitions_with_button().items()
],
)
def test_valid_secondary_button_pen_states(self, state_list, scribble):
"""Rework the transition state machine by adding the secondary button."""
- self._test_states(state_list, scribble, allow_intermediate_states=False)
+ self._test_states(
+ state_list,
+ scribble,
+ allow_intermediate_states=False,
+ button=BtnPressed.SECONDARY_PRESSED,
+ )
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Third Barrel Switch" not in uhdev.fields,
+ "Device not compatible, missing Third Barrel Switch usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [
+ pytest.param(v, id=k)
+ for k, v in PenState.legal_transitions_with_button().items()
+ ],
+ )
+ def test_valid_third_button_pen_states(self, state_list, scribble):
+ """Rework the transition state machine by adding the secondary button."""
+ self._test_states(
+ state_list,
+ scribble,
+ allow_intermediate_states=False,
+ button=BtnPressed.THIRD_PRESSED,
+ )
@pytest.mark.skip_if_uhdev(
lambda uhdev: "Invert" not in uhdev.fields,
@@ -956,7 +869,7 @@ class BaseTest:
class GXTP_pen(PenDigitizer):
- def event(self, pen):
+ def event(self, pen, test_button):
if not hasattr(self, "prev_tip_state"):
self.prev_tip_state = False
@@ -977,13 +890,407 @@ class GXTP_pen(PenDigitizer):
if pen.eraser:
internal_pen.invert = False
- return super().event(internal_pen)
+ return super().event(internal_pen, test_button)
class USIPen(PenDigitizer):
pass
+class XPPen_ArtistPro16Gen2_28bd_095b(PenDigitizer):
+ """
+ Pen with two buttons and a rubber end, but which reports
+ the second button as an eraser
+ """
+
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Pen",
+ physical="Stylus",
+ input_info=(BusType.USB, 0x28BD, 0x095B),
+ evdev_name_suffix=None,
+ ):
+ super().__init__(
+ name, rdesc_str, rdesc, application, physical, input_info, evdev_name_suffix
+ )
+ self.fields.append("Secondary Barrel Switch")
+
+ def move_to(self, pen, state, button):
+ # fill in the previous values
+ if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.restore()
+
+ print(f"\n *** pen is moving to {state} ***")
+
+ if state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.backup()
+ pen.x = 0
+ pen.y = 0
+ pen.tipswitch = False
+ pen.tippressure = 0
+ pen.azimuth = 0
+ pen.inrange = False
+ pen.width = 0
+ pen.height = 0
+ pen.invert = False
+ pen.eraser = False
+ pen.xtilt = 0
+ pen.ytilt = 0
+ pen.twist = 0
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_CONTACT:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.eraser = button == BtnPressed.SECONDARY_PRESSED
+ elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.eraser = button == BtnPressed.SECONDARY_PRESSED
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = True
+ pen.eraser = False
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_ERASING:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = True
+ pen.eraser = False
+ pen.barrelswitch = False
+
+ pen.current_state = state
+
+ def event(self, pen, test_button):
+ import math
+
+ pen_copy = copy.copy(pen)
+ width = 13.567
+ height = 8.480
+ tip_height = 0.055677699
+ hx = tip_height * (32767 / width)
+ hy = tip_height * (32767 / height)
+ if pen_copy.xtilt != 0:
+ pen_copy.x += round(hx * math.sin(math.radians(pen_copy.xtilt)))
+ if pen_copy.ytilt != 0:
+ pen_copy.y += round(hy * math.sin(math.radians(pen_copy.ytilt)))
+
+ return super().event(pen_copy, test_button)
+
+
+class XPPen_Artist24_28bd_093a(PenDigitizer):
+ """
+ Pen that reports secondary barrel switch through eraser
+ """
+
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Pen",
+ physical="Stylus",
+ input_info=(BusType.USB, 0x28BD, 0x093A),
+ evdev_name_suffix=None,
+ ):
+ super().__init__(
+ name, rdesc_str, rdesc, application, physical, input_info, evdev_name_suffix
+ )
+ self.fields.append("Secondary Barrel Switch")
+ self.previous_state = PenState.PEN_IS_OUT_OF_RANGE
+
+ def move_to(self, pen, state, button, debug=True):
+ # fill in the previous values
+ if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.restore()
+
+ if debug:
+ print(f"\n *** pen is moving to {state} ***")
+
+ if state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.backup()
+ pen.tipswitch = False
+ pen.tippressure = 0
+ pen.azimuth = 0
+ pen.inrange = False
+ pen.width = 0
+ pen.height = 0
+ pen.invert = False
+ pen.eraser = False
+ pen.xtilt = 0
+ pen.ytilt = 0
+ pen.twist = 0
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_CONTACT:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.eraser = button == BtnPressed.SECONDARY_PRESSED
+ elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.eraser = button == BtnPressed.SECONDARY_PRESSED
+
+ pen.current_state = state
+
+ def send_intermediate_state(self, pen, state, button):
+ intermediate_pen = copy.copy(pen)
+ self.move_to(intermediate_pen, state, button, debug=False)
+ return super().event(intermediate_pen, button)
+
+ def event(self, pen, button):
+ rs = []
+
+ # the pen reliably sends in-range events in a normal case (non emulation of eraser mode)
+ if self.previous_state == PenState.PEN_IS_IN_CONTACT:
+ if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
+ rs.extend(
+ self.send_intermediate_state(pen, PenState.PEN_IS_IN_RANGE, button)
+ )
+
+ if button == BtnPressed.SECONDARY_PRESSED:
+ if self.previous_state == PenState.PEN_IS_IN_RANGE:
+ if pen.current_state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_OUT_OF_RANGE, button
+ )
+ )
+
+ if self.previous_state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ if pen.current_state == PenState.PEN_IS_IN_RANGE:
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_OUT_OF_RANGE, button
+ )
+ )
+
+ if self.previous_state == PenState.PEN_IS_IN_CONTACT:
+ if pen.current_state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_OUT_OF_RANGE, button
+ )
+ )
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_IN_RANGE_WITH_BUTTON, button
+ )
+ )
+
+ if self.previous_state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ if pen.current_state == PenState.PEN_IS_IN_CONTACT:
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_OUT_OF_RANGE, button
+ )
+ )
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_IN_RANGE, button
+ )
+ )
+
+ rs.extend(super().event(pen, button))
+ self.previous_state = pen.current_state
+ return rs
+
+
+class Huion_Kamvas_Pro_19_256c_006b(PenDigitizer):
+ """
+ Pen that reports secondary barrel switch through secondary TipSwtich
+ and 3rd button through Invert
+ """
+
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Stylus",
+ physical=None,
+ input_info=(BusType.USB, 0x256C, 0x006B),
+ evdev_name_suffix=None,
+ ):
+ super().__init__(
+ name, rdesc_str, rdesc, application, physical, input_info, evdev_name_suffix
+ )
+ self.fields.append("Secondary Barrel Switch")
+ self.fields.append("Third Barrel Switch")
+ self.previous_state = PenState.PEN_IS_OUT_OF_RANGE
+
+ def move_to(self, pen, state, button, debug=True):
+ # fill in the previous values
+ if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.restore()
+
+ if debug:
+ print(f"\n *** pen is moving to {state} ***")
+
+ if state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.backup()
+ pen.tipswitch = False
+ pen.tippressure = 0
+ pen.azimuth = 0
+ pen.inrange = False
+ pen.width = 0
+ pen.height = 0
+ pen.invert = False
+ pen.eraser = False
+ pen.xtilt = 0
+ pen.ytilt = 0
+ pen.twist = 0
+ pen.barrelswitch = False
+ pen.secondarytipswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarytipswitch = False
+ elif state == PenState.PEN_IS_IN_CONTACT:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarytipswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.eraser = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.secondarytipswitch = button == BtnPressed.SECONDARY_PRESSED
+ pen.invert = button == BtnPressed.THIRD_PRESSED
+ elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.eraser = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.secondarytipswitch = button == BtnPressed.SECONDARY_PRESSED
+ pen.invert = button == BtnPressed.THIRD_PRESSED
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = True
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarytipswitch = False
+ elif state == PenState.PEN_IS_ERASING:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = True
+ pen.barrelswitch = False
+ pen.secondarytipswitch = False
+
+ pen.current_state = state
+
+ def call_input_event(self, report):
+ if report[0] == 0x0a:
+ # ensures the original second Eraser usage is null
+ report[1] &= 0xdf
+
+ # ensures the original last bit is equal to bit 6 (In Range)
+ if report[1] & 0x40:
+ report[1] |= 0x80
+
+ super().call_input_event(report)
+
+ def send_intermediate_state(self, pen, state, test_button):
+ intermediate_pen = copy.copy(pen)
+ self.move_to(intermediate_pen, state, test_button, debug=False)
+ return super().event(intermediate_pen, test_button)
+
+ def event(self, pen, button):
+ rs = []
+
+ # it's not possible to go between eraser mode or not without
+ # going out-of-prox: the eraser mode is activated by presenting
+ # the tail of the pen
+ if self.previous_state in (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ ) and pen.current_state in (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_ERASING_WITH_BUTTON,
+ ):
+ rs.extend(
+ self.send_intermediate_state(pen, PenState.PEN_IS_OUT_OF_RANGE, button)
+ )
+
+ # same than above except from eraser to normal
+ if self.previous_state in (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_ERASING_WITH_BUTTON,
+ ) and pen.current_state in (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ ):
+ rs.extend(
+ self.send_intermediate_state(pen, PenState.PEN_IS_OUT_OF_RANGE, button)
+ )
+
+ if self.previous_state == PenState.PEN_IS_OUT_OF_RANGE:
+ if pen.current_state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ rs.extend(
+ self.send_intermediate_state(pen, PenState.PEN_IS_IN_RANGE, button)
+ )
+
+ rs.extend(super().event(pen, button))
+ self.previous_state = pen.current_state
+ return rs
+
+
################################################################################
#
# Windows 7 compatible devices
@@ -1162,3 +1469,37 @@ class TestGoodix_27c6_0e00(BaseTest.TestTablet):
rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 55 0e 65 11 35 00 15 00 09 42 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 95 01 75 08 09 51 81 02 75 10 05 01 26 04 20 46 e6 09 09 30 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 95 01 75 08 09 51 81 02 75 10 05 01 26 04 20 46 e6 09 09 30 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 95 01 75 08 09 51 81 02 75 10 05 01 26 04 20 46 e6 09 09 30 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 75 08 09 51 95 01 81 02 05 01 26 04 20 75 10 55 0e 65 11 09 30 35 00 46 e6 09 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 75 08 09 51 95 01 81 02 05 01 26 04 20 75 10 55 0e 65 11 09 30 35 00 46 e6 09 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 54 15 00 25 7f 75 08 95 01 81 02 85 02 09 55 95 01 25 0a b1 02 85 03 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 02 a1 01 09 20 a1 00 85 08 05 01 a4 09 30 35 00 46 e6 09 15 00 26 04 20 55 0d 65 13 75 10 95 01 81 02 09 31 46 9a 06 26 60 15 81 02 b4 05 0d 09 38 95 01 75 08 15 00 25 01 81 02 09 30 75 10 26 ff 0f 81 02 09 31 81 02 09 42 09 44 09 5a 09 3c 09 45 09 32 75 01 95 06 25 01 81 02 95 02 81 03 09 3d 55 0e 65 14 36 d8 dc 46 28 23 16 d8 dc 26 28 23 95 01 75 10 81 02 09 3e 81 02 09 41 15 00 27 a0 8c 00 00 35 00 47 a0 8c 00 00 81 02 05 20 0a 53 04 65 00 16 01 f8 26 ff 07 75 10 95 01 81 02 0a 54 04 81 02 0a 55 04 81 02 0a 57 04 81 02 0a 58 04 81 02 0a 59 04 81 02 0a 72 04 81 02 0a 73 04 81 02 0a 74 04 81 02 05 0d 09 3b 15 00 25 64 75 08 81 02 09 5b 25 ff 75 40 81 02 06 00 ff 09 5b 75 20 81 02 05 0d 09 5c 26 ff 00 75 08 81 02 09 5e 81 02 09 70 a1 02 15 01 25 06 09 72 09 73 09 74 09 75 09 76 09 77 81 20 c0 06 00 ff 09 01 15 00 27 ff ff 00 00 75 10 95 01 81 02 85 09 09 81 a1 02 09 81 15 01 25 04 09 82 09 83 09 84 09 85 81 20 c0 85 10 09 5c a1 02 15 00 25 01 75 08 95 01 09 38 b1 02 09 5c 26 ff 00 b1 02 09 5d 75 01 95 01 25 01 b1 02 95 07 b1 03 c0 85 11 09 5e a1 02 09 38 15 00 25 01 75 08 95 01 b1 02 09 5e 26 ff 00 b1 02 09 5f 75 01 25 01 b1 02 75 07 b1 03 c0 85 12 09 70 a1 02 75 08 95 01 15 00 25 01 09 38 b1 02 09 70 a1 02 25 06 09 72 09 73 09 74 09 75 09 76 09 77 b1 20 c0 09 71 75 01 25 01 b1 02 75 07 b1 03 c0 85 13 09 80 15 00 25 ff 75 40 95 01 b1 02 85 14 09 44 a1 02 09 38 75 08 95 01 25 01 b1 02 15 01 25 03 09 44 a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 09 5a a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 09 45 a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 c0 85 15 75 08 95 01 05 0d 09 90 a1 02 09 38 25 01 b1 02 09 91 75 10 26 ff 0f b1 02 09 92 75 40 25 ff b1 02 05 06 09 2a 75 08 26 ff 00 a1 02 09 2d b1 02 09 2e b1 02 c0 c0 85 16 05 06 09 2b a1 02 05 0d 25 01 09 38 b1 02 05 06 09 2b a1 02 09 2d 26 ff 00 b1 02 09 2e b1 02 c0 c0 85 17 06 00 ff 09 01 a1 02 05 0d 09 38 75 08 95 01 25 01 b1 02 06 00 ff 09 01 75 10 27 ff ff 00 00 b1 02 c0 85 18 05 0d 09 38 75 08 95 01 15 00 25 01 b1 02 c0 c0 06 f0 ff 09 01 a1 01 85 0e 09 01 15 00 25 ff 75 08 95 40 91 02 09 01 15 00 25 ff 75 08 95 40 81 02 c0",
input_info=(BusType.I2C, 0x27C6, 0x0E00),
)
+
+
+class TestXPPen_ArtistPro16Gen2_28bd_095b(BaseTest.TestTablet):
+ hid_bpfs = [("XPPen__ArtistPro16Gen2.bpf.o", True)]
+
+ def create_device(self):
+ dev = XPPen_ArtistPro16Gen2_28bd_095b(
+ "uhid test XPPen Artist Pro 16 Gen2 28bd 095b",
+ rdesc="05 0d 09 02 a1 01 85 07 09 20 a1 00 09 42 09 44 09 45 09 3c 15 00 25 01 75 01 95 04 81 02 95 01 81 03 09 32 15 00 25 01 95 01 81 02 95 02 81 03 75 10 95 01 35 00 a4 05 01 09 30 65 13 55 0d 46 ff 34 26 ff 7f 81 02 09 31 46 20 21 26 ff 7f 81 02 b4 09 30 45 00 26 ff 3f 81 42 09 3d 15 81 25 7f 75 08 95 01 81 02 09 3e 15 81 25 7f 81 02 c0 c0",
+ input_info=(BusType.USB, 0x28BD, 0x095B),
+ )
+ return dev
+
+
+class TestXPPen_Artist24_28bd_093a(BaseTest.TestTablet):
+ hid_bpfs = [("XPPen__Artist24.bpf.o", True)]
+
+ def create_device(self):
+ return XPPen_Artist24_28bd_093a(
+ "uhid test XPPen Artist 24 28bd 093a",
+ rdesc="05 0d 09 02 a1 01 85 07 09 20 a1 00 09 42 09 44 09 45 15 00 25 01 75 01 95 03 81 02 95 02 81 03 09 32 95 01 81 02 95 02 81 03 75 10 95 01 35 00 a4 05 01 09 30 65 13 55 0d 46 f0 50 26 ff 7f 81 02 09 31 46 91 2d 26 ff 7f 81 02 b4 09 30 45 00 26 ff 1f 81 42 09 3d 15 81 25 7f 75 08 95 01 81 02 09 3e 15 81 25 7f 81 02 c0 c0",
+ input_info=(BusType.USB, 0x28BD, 0x093A),
+ )
+
+
+class TestHuion_Kamvas_Pro_19_256c_006b(BaseTest.TestTablet):
+ hid_bpfs = [("Huion__Kamvas-Pro-19.bpf.o", True)]
+
+ def create_device(self):
+ return Huion_Kamvas_Pro_19_256c_006b(
+ "uhid test HUION Huion Tablet_GT1902",
+ rdesc="05 0d 09 02 a1 01 85 0a 09 20 a1 01 09 42 09 44 09 43 09 3c 09 45 15 00 25 01 75 01 95 06 81 02 09 32 75 01 95 01 81 02 81 03 05 01 09 30 09 31 55 0d 65 33 26 ff 7f 35 00 46 00 08 75 10 95 02 81 02 05 0d 09 30 26 ff 3f 75 10 95 01 81 02 09 3d 09 3e 15 a6 25 5a 75 08 95 02 81 02 c0 c0 05 0d 09 04 a1 01 85 04 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 75 08 95 08 81 03 85 05 09 55 25 0a 75 08 95 01 b1 02 06 00 ff 09 c5 85 06 15 00 26 ff 00 75 08 96 00 01 b1 02 c0",
+ input_info=(BusType.USB, 0x256C, 0x006B),
+ )
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index edf1c99c99..5f7d5a5ba8 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -1722,10 +1722,17 @@ FIXTURE_VARIANT(iommufd_dirty_tracking)
FIXTURE_SETUP(iommufd_dirty_tracking)
{
+ unsigned long size;
int mmap_flags;
void *vrc;
int rc;
+ if (variant->buffer_size < MOCK_PAGE_SIZE) {
+ SKIP(return,
+ "Skipping buffer_size=%lu, less than MOCK_PAGE_SIZE=%lu",
+ variant->buffer_size, MOCK_PAGE_SIZE);
+ }
+
self->fd = open("/dev/iommu", O_RDWR);
ASSERT_NE(-1, self->fd);
@@ -1749,12 +1756,11 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
assert(vrc == self->buffer);
self->page_size = MOCK_PAGE_SIZE;
- self->bitmap_size =
- variant->buffer_size / self->page_size / BITS_PER_BYTE;
+ self->bitmap_size = variant->buffer_size / self->page_size;
/* Provision with an extra (PAGE_SIZE) for the unaligned case */
- rc = posix_memalign(&self->bitmap, PAGE_SIZE,
- self->bitmap_size + PAGE_SIZE);
+ size = DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE);
+ rc = posix_memalign(&self->bitmap, PAGE_SIZE, size + PAGE_SIZE);
assert(!rc);
assert(self->bitmap);
assert((uintptr_t)self->bitmap % PAGE_SIZE == 0);
@@ -1775,51 +1781,63 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
FIXTURE_TEARDOWN(iommufd_dirty_tracking)
{
munmap(self->buffer, variant->buffer_size);
- munmap(self->bitmap, self->bitmap_size);
+ munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE));
teardown_iommufd(self->fd, _metadata);
}
-FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k)
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty8k)
+{
+ /* half of an u8 index bitmap */
+ .buffer_size = 8UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty16k)
+{
+ /* one u8 index bitmap */
+ .buffer_size = 16UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64k)
{
/* one u32 index bitmap */
- .buffer_size = 128UL * 1024UL,
+ .buffer_size = 64UL * 1024UL,
};
-FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256k)
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k)
{
/* one u64 index bitmap */
- .buffer_size = 256UL * 1024UL,
+ .buffer_size = 128UL * 1024UL,
};
-FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty640k)
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty320k)
{
/* two u64 index and trailing end bitmap */
- .buffer_size = 640UL * 1024UL,
+ .buffer_size = 320UL * 1024UL,
};
-FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M)
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M)
{
- /* 4K bitmap (128M IOVA range) */
- .buffer_size = 128UL * 1024UL * 1024UL,
+ /* 4K bitmap (64M IOVA range) */
+ .buffer_size = 64UL * 1024UL * 1024UL,
};
-FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge)
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M_huge)
{
- /* 4K bitmap (128M IOVA range) */
- .buffer_size = 128UL * 1024UL * 1024UL,
+ /* 4K bitmap (64M IOVA range) */
+ .buffer_size = 64UL * 1024UL * 1024UL,
.hugepages = true,
};
-FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M)
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M)
{
- /* 8K bitmap (256M IOVA range) */
- .buffer_size = 256UL * 1024UL * 1024UL,
+ /* 8K bitmap (128M IOVA range) */
+ .buffer_size = 128UL * 1024UL * 1024UL,
};
-FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M_huge)
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge)
{
- /* 8K bitmap (256M IOVA range) */
- .buffer_size = 256UL * 1024UL * 1024UL,
+ /* 8K bitmap (128M IOVA range) */
+ .buffer_size = 128UL * 1024UL * 1024UL,
.hugepages = true,
};
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index 8d2b46b211..c612fbf019 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -22,6 +22,8 @@
#define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG))
#define BIT_WORD(nr) ((nr) / __BITS_PER_LONG)
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+
static inline void set_bit(unsigned int nr, unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
@@ -346,12 +348,12 @@ static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length,
static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
__u64 iova, size_t page_size,
size_t pte_page_size, __u64 *bitmap,
- __u64 bitmap_size, __u32 flags,
+ __u64 nbits, __u32 flags,
struct __test_metadata *_metadata)
{
unsigned long npte = pte_page_size / page_size, pteset = 2 * npte;
- unsigned long nbits = bitmap_size * BITS_PER_BYTE;
unsigned long j, i, nr = nbits / pteset ?: 1;
+ unsigned long bitmap_size = DIV_ROUND_UP(nbits, BITS_PER_BYTE);
__u64 out_dirty = 0;
/* Mark all even bits as dirty in the mock domain */
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
index 656c43c240..c75ea40948 100644
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -198,13 +198,12 @@ int main(int argc, char **argv)
struct msgque_data msgque;
if (getuid() != 0)
- return ksft_exit_skip(
- "Please run the test as root - Exiting.\n");
+ ksft_exit_skip("Please run the test as root - Exiting.\n");
msgque.key = ftok(argv[0], 822155650);
if (msgque.key == -1) {
printf("Can't make key: %d\n", -errno);
- return ksft_exit_fail();
+ ksft_exit_fail();
}
msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666);
@@ -243,13 +242,13 @@ int main(int argc, char **argv)
printf("Failed to test queue: %d\n", err);
goto err_out;
}
- return ksft_exit_pass();
+ ksft_exit_pass();
err_destroy:
if (msgctl(msgque.msq_id, IPC_RMID, NULL)) {
printf("Failed to destroy queue: %d\n", -errno);
- return ksft_exit_fail();
+ ksft_exit_fail();
}
err_out:
- return ksft_exit_fail();
+ ksft_exit_fail();
}
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 14bbab0cce..76c2a6945d 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -16,10 +16,12 @@
* For each test, report any progress, debugging, etc with:
*
* ksft_print_msg(fmt, ...);
+ * ksft_perror(msg);
*
* and finally report the pass/fail/skip/xfail state of the test with one of:
*
* ksft_test_result(condition, fmt, ...);
+ * ksft_test_result_report(result, fmt, ...);
* ksft_test_result_pass(fmt, ...);
* ksft_test_result_fail(fmt, ...);
* ksft_test_result_skip(fmt, ...);
@@ -39,6 +41,7 @@
* the program is aborting before finishing all tests):
*
* ksft_exit_fail_msg(fmt, ...);
+ * ksft_exit_fail_perror(msg);
*
*/
#ifndef __KSELFTEST_H
@@ -305,13 +308,34 @@ void ksft_test_result_code(int exit_code, const char *test_name,
printf("\n");
}
-static inline __noreturn int ksft_exit_pass(void)
+/**
+ * ksft_test_result() - Report test success based on truth of condition
+ *
+ * @condition: if true, report test success, otherwise failure.
+ */
+#define ksft_test_result_report(result, fmt, ...) do { \
+ switch (result) { \
+ case KSFT_PASS: \
+ ksft_test_result_pass(fmt, ##__VA_ARGS__); \
+ break; \
+ case KSFT_FAIL: \
+ ksft_test_result_fail(fmt, ##__VA_ARGS__); \
+ break; \
+ case KSFT_XFAIL: \
+ ksft_test_result_xfail(fmt, ##__VA_ARGS__); \
+ break; \
+ case KSFT_SKIP: \
+ ksft_test_result_skip(fmt, ##__VA_ARGS__); \
+ break; \
+ } } while (0)
+
+static inline __noreturn void ksft_exit_pass(void)
{
ksft_print_cnts();
exit(KSFT_PASS);
}
-static inline __noreturn int ksft_exit_fail(void)
+static inline __noreturn void ksft_exit_fail(void)
{
ksft_print_cnts();
exit(KSFT_FAIL);
@@ -338,7 +362,7 @@ static inline __noreturn int ksft_exit_fail(void)
ksft_cnt.ksft_xfail + \
ksft_cnt.ksft_xskip)
-static inline __noreturn __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
+static inline __noreturn __printf(1, 2) void ksft_exit_fail_msg(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -353,19 +377,32 @@ static inline __noreturn __printf(1, 2) int ksft_exit_fail_msg(const char *msg,
exit(KSFT_FAIL);
}
-static inline __noreturn int ksft_exit_xfail(void)
+static inline __noreturn void ksft_exit_fail_perror(const char *msg)
+{
+#ifndef NOLIBC
+ ksft_exit_fail_msg("%s: %s (%d)\n", msg, strerror(errno), errno);
+#else
+ /*
+ * nolibc doesn't provide strerror() and it seems
+ * inappropriate to add one, just print the errno.
+ */
+ ksft_exit_fail_msg("%s: %d)\n", msg, errno);
+#endif
+}
+
+static inline __noreturn void ksft_exit_xfail(void)
{
ksft_print_cnts();
exit(KSFT_XFAIL);
}
-static inline __noreturn int ksft_exit_xpass(void)
+static inline __noreturn void ksft_exit_xpass(void)
{
ksft_print_cnts();
exit(KSFT_XPASS);
}
-static inline __noreturn __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
+static inline __noreturn __printf(1, 2) void ksft_exit_skip(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh
index de59cc8f03..487e49fdf2 100755
--- a/tools/testing/selftests/kselftest_deps.sh
+++ b/tools/testing/selftests/kselftest_deps.sh
@@ -244,6 +244,7 @@ l4_test()
l5_test()
{
tests=$(find $(dirname "$test") -type f -name "*.mk")
+ [[ -z "${tests// }" ]] && return
test_libs=$(grep "^IOURING_EXTRA_LIBS +\?=" $tests | \
cut -d "=" -f 2)
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 741c7dc16a..ac280dcba9 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -45,6 +45,7 @@ LIBKVM_x86_64 += lib/x86_64/vmx.c
LIBKVM_aarch64 += lib/aarch64/gic.c
LIBKVM_aarch64 += lib/aarch64/gic_v3.c
+LIBKVM_aarch64 += lib/aarch64/gic_v3_its.c
LIBKVM_aarch64 += lib/aarch64/handlers.S
LIBKVM_aarch64 += lib/aarch64/processor.c
LIBKVM_aarch64 += lib/aarch64/spinlock.c
@@ -120,6 +121,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
+TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests
TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test
TEST_GEN_PROGS_x86_64 += x86_64/amx_test
@@ -157,6 +159,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter
TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
+TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress
TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
TEST_GEN_PROGS_aarch64 += arch_timer
@@ -180,6 +183,7 @@ TEST_GEN_PROGS_s390x += s390x/sync_regs_test
TEST_GEN_PROGS_s390x += s390x/tprot
TEST_GEN_PROGS_s390x += s390x/cmma_test
TEST_GEN_PROGS_s390x += s390x/debug_test
+TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test
TEST_GEN_PROGS_s390x += demand_paging_test
TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += guest_print_test
@@ -189,6 +193,8 @@ TEST_GEN_PROGS_s390x += rseq_test
TEST_GEN_PROGS_s390x += set_memory_region_test
TEST_GEN_PROGS_s390x += kvm_binary_stats_test
+TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
+TEST_GEN_PROGS_riscv += riscv/ebreak_test
TEST_GEN_PROGS_riscv += arch_timer
TEST_GEN_PROGS_riscv += demand_paging_test
TEST_GEN_PROGS_riscv += dirty_log_test
@@ -225,8 +231,8 @@ LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
endif
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
- -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \
- -fno-builtin-strnlen \
+ -D_GNU_SOURCE -fno-builtin-memcmp -fno-builtin-memcpy \
+ -fno-builtin-memset -fno-builtin-strnlen \
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
-I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index 4eaba83cdc..eeba1cc87f 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -5,18 +5,14 @@
*
* Copyright (c) 2021, Google LLC.
*/
-#define _GNU_SOURCE
-
#include "arch_timer.h"
#include "delay.h"
#include "gic.h"
#include "processor.h"
#include "timer_test.h"
+#include "ucall_common.h"
#include "vgic.h"
-#define GICD_BASE_GPA 0x8000000ULL
-#define GICR_BASE_GPA 0x80A0000ULL
-
enum guest_stage {
GUEST_STAGE_VTIMER_CVAL = 1,
GUEST_STAGE_VTIMER_TVAL,
@@ -149,8 +145,7 @@ static void guest_code(void)
local_irq_disable();
- gic_init(GIC_V3, test_args.nr_vcpus,
- (void *)GICD_BASE_GPA, (void *)GICR_BASE_GPA);
+ gic_init(GIC_V3, test_args.nr_vcpus);
timer_set_ctl(VIRTUAL, CTL_IMASK);
timer_set_ctl(PHYSICAL, CTL_IMASK);
@@ -209,7 +204,7 @@ struct kvm_vm *test_vm_create(void)
vcpu_init_descriptor_tables(vcpus[i]);
test_init_timer_irq(vm);
- gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+ gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
__TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
/* Make all the test's cmdline args visible to the guest */
diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
index 5972905275..d29b08198b 100644
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -7,7 +7,6 @@
* hugetlbfs with a hole). It checks that the expected handling method is
* called (e.g., uffd faults with the right address and write/read flag).
*/
-#define _GNU_SOURCE
#include <linux/bitmap.h>
#include <fcntl.h>
#include <test_util.h>
@@ -375,14 +374,14 @@ static void setup_uffd(struct kvm_vm *vm, struct test_params *p,
*pt_uffd = uffd_setup_demand_paging(uffd_mode, 0,
pt_args.hva,
pt_args.paging_size,
- test->uffd_pt_handler);
+ 1, test->uffd_pt_handler);
*data_uffd = NULL;
if (test->uffd_data_handler)
*data_uffd = uffd_setup_demand_paging(uffd_mode, 0,
data_args.hva,
data_args.paging_size,
- test->uffd_data_handler);
+ 1, test->uffd_data_handler);
}
static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
index 9b004905d1..61731a950d 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_test.c
+++ b/tools/testing/selftests/kvm/aarch64/psci_test.c
@@ -11,9 +11,9 @@
* KVM_SYSTEM_EVENT_SUSPEND UAPI.
*/
-#define _GNU_SOURCE
-
+#include <linux/kernel.h>
#include <linux/psci.h>
+#include <asm/cputype.h>
#include "kvm_util.h"
#include "processor.h"
diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
index 16e2338686..a7de39fa2a 100644
--- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
@@ -327,8 +327,8 @@ uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
return ftr;
}
-static void test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
- const struct reg_ftr_bits *ftr_bits)
+static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
+ const struct reg_ftr_bits *ftr_bits)
{
uint8_t shift = ftr_bits->shift;
uint64_t mask = ftr_bits->mask;
@@ -346,6 +346,8 @@ static void test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
vcpu_set_reg(vcpu, reg, val);
vcpu_get_reg(vcpu, reg, &new_val);
TEST_ASSERT_EQ(new_val, val);
+
+ return new_val;
}
static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
@@ -374,7 +376,15 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
TEST_ASSERT_EQ(val, old_val);
}
-static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only)
+static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+
+#define encoding_to_range_idx(encoding) \
+ KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding), \
+ sys_reg_CRn(encoding), sys_reg_CRm(encoding), \
+ sys_reg_Op2(encoding))
+
+
+static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only)
{
uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
struct reg_mask_range range = {
@@ -398,9 +408,7 @@ static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only)
int idx;
/* Get the index to masks array for the idreg */
- idx = KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(reg_id), sys_reg_Op1(reg_id),
- sys_reg_CRn(reg_id), sys_reg_CRm(reg_id),
- sys_reg_Op2(reg_id));
+ idx = encoding_to_range_idx(reg_id);
for (int j = 0; ftr_bits[j].type != FTR_END; j++) {
/* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */
@@ -414,7 +422,9 @@ static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only)
TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask);
test_reg_set_fail(vcpu, reg, &ftr_bits[j]);
- test_reg_set_success(vcpu, reg, &ftr_bits[j]);
+
+ test_reg_vals[idx] = test_reg_set_success(vcpu, reg,
+ &ftr_bits[j]);
ksft_test_result_pass("%s\n", ftr_bits[j].name);
}
@@ -425,7 +435,6 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu)
{
bool done = false;
struct ucall uc;
- uint64_t val;
while (!done) {
vcpu_run(vcpu);
@@ -436,8 +445,8 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu)
break;
case UCALL_SYNC:
/* Make sure the written values are seen by guest */
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(uc.args[2]), &val);
- TEST_ASSERT_EQ(val, uc.args[3]);
+ TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])],
+ uc.args[3]);
break;
case UCALL_DONE:
done = true;
@@ -448,13 +457,85 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu)
}
}
+/* Politely lifted from arch/arm64/include/asm/cache.h */
+/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
+#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1))
+#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level))
+#define CLIDR_CTYPE(clidr, level) \
+ (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
+
+static void test_clidr(struct kvm_vcpu *vcpu)
+{
+ uint64_t clidr;
+ int level;
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), &clidr);
+
+ /* find the first empty level in the cache hierarchy */
+ for (level = 1; level < 7; level++) {
+ if (!CLIDR_CTYPE(clidr, level))
+ break;
+ }
+
+ /*
+ * If you have a mind-boggling 7 levels of cache, congratulations, you
+ * get to fix this.
+ */
+ TEST_ASSERT(level <= 7, "can't find an empty level in cache hierarchy");
+
+ /* stick in a unified cache level */
+ clidr |= BIT(2) << CLIDR_CTYPE_SHIFT(level);
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), clidr);
+ test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr;
+}
+
+static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ test_clidr(vcpu);
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &val);
+ val++;
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val);
+
+ test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val;
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding)
+{
+ size_t idx = encoding_to_range_idx(encoding);
+ uint64_t observed;
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding), &observed);
+ TEST_ASSERT_EQ(test_reg_vals[idx], observed);
+}
+
+static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Calls KVM_ARM_VCPU_INIT behind the scenes, which will do an
+ * architectural reset of the vCPU.
+ */
+ aarch64_vcpu_setup(vcpu, NULL);
+
+ for (int i = 0; i < ARRAY_SIZE(test_regs); i++)
+ test_assert_id_reg_unchanged(vcpu, test_regs[i].reg);
+
+ test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1);
+
+ ksft_test_result_pass("%s\n", __func__);
+}
+
int main(void)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
bool aarch64_only;
uint64_t val, el0;
- int ftr_cnt;
+ int test_cnt;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
@@ -467,18 +548,22 @@ int main(void)
ksft_print_header();
- ftr_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
- ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) +
- ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) -
- ARRAY_SIZE(test_regs);
+ test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
+ ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
+ ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
+ ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) +
+ ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) -
+ ARRAY_SIZE(test_regs) + 2;
- ksft_set_plan(ftr_cnt);
+ ksft_set_plan(test_cnt);
+
+ test_vm_ftr_id_regs(vcpu, aarch64_only);
+ test_vcpu_ftr_id_regs(vcpu);
- test_user_set_reg(vcpu, aarch64_only);
test_guest_reg_read(vcpu);
+ test_reset_preserves_id_regs(vcpu);
+
kvm_vm_free(vm);
ksft_finished();
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c
index ca917c71ff..b3b5fb0ff0 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_init.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c
@@ -4,7 +4,6 @@
*
* Copyright (C) 2020, Red Hat, Inc.
*/
-#define _GNU_SOURCE
#include <linux/kernel.h>
#include <sys/syscall.h>
#include <asm/kvm.h>
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
index 2e64b4856e..a51dbd2a5f 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -19,9 +19,6 @@
#include "gic_v3.h"
#include "vgic.h"
-#define GICD_BASE_GPA 0x08000000ULL
-#define GICR_BASE_GPA 0x080A0000ULL
-
/*
* Stores the user specified args; it's passed to the guest and to every test
* function.
@@ -49,9 +46,6 @@ struct test_args {
#define IRQ_DEFAULT_PRIO (LOWEST_PRIO - 1)
#define IRQ_DEFAULT_PRIO_REG (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */
-static void *dist = (void *)GICD_BASE_GPA;
-static void *redist = (void *)GICR_BASE_GPA;
-
/*
* The kvm_inject_* utilities are used by the guest to ask the host to inject
* interrupts (e.g., using the KVM_IRQ_LINE ioctl).
@@ -152,7 +146,7 @@ static void reset_stats(void)
static uint64_t gic_read_ap1r0(void)
{
- uint64_t reg = read_sysreg_s(SYS_ICV_AP1R0_EL1);
+ uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1);
dsb(sy);
return reg;
@@ -160,7 +154,7 @@ static uint64_t gic_read_ap1r0(void)
static void gic_write_ap1r0(uint64_t val)
{
- write_sysreg_s(val, SYS_ICV_AP1R0_EL1);
+ write_sysreg_s(val, SYS_ICC_AP1R0_EL1);
isb();
}
@@ -478,7 +472,7 @@ static void guest_code(struct test_args *args)
bool level_sensitive = args->level_sensitive;
struct kvm_inject_desc *f, *inject_fns;
- gic_init(GIC_V3, 1, dist, redist);
+ gic_init(GIC_V3, 1);
for (i = 0; i < nr_irqs; i++)
gic_irq_enable(i);
@@ -764,8 +758,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
vcpu_args_set(vcpu, 1, args_gva);
- gic_fd = vgic_v3_setup(vm, 1, nr_irqs,
- GICD_BASE_GPA, GICR_BASE_GPA);
+ gic_fd = vgic_v3_setup(vm, 1, nr_irqs);
__TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping");
vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c
new file mode 100644
index 0000000000..fc4fe52fb6
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic_lpi_stress - Stress test for KVM's ITS emulation
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+
+#include <linux/sizes.h>
+#include <pthread.h>
+#include <stdatomic.h>
+#include <sys/sysinfo.h>
+
+#include "kvm_util.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "gic_v3_its.h"
+#include "processor.h"
+#include "ucall.h"
+#include "vgic.h"
+
+#define TEST_MEMSLOT_INDEX 1
+
+#define GIC_LPI_OFFSET 8192
+
+static size_t nr_iterations = 1000;
+static vm_paddr_t gpa_base;
+
+static struct kvm_vm *vm;
+static struct kvm_vcpu **vcpus;
+static int gic_fd, its_fd;
+
+static struct test_data {
+ bool request_vcpus_stop;
+ u32 nr_cpus;
+ u32 nr_devices;
+ u32 nr_event_ids;
+
+ vm_paddr_t device_table;
+ vm_paddr_t collection_table;
+ vm_paddr_t cmdq_base;
+ void *cmdq_base_va;
+ vm_paddr_t itt_tables;
+
+ vm_paddr_t lpi_prop_table;
+ vm_paddr_t lpi_pend_tables;
+} test_data = {
+ .nr_cpus = 1,
+ .nr_devices = 1,
+ .nr_event_ids = 16,
+};
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ u32 intid = gic_get_and_ack_irq();
+
+ if (intid == IAR_SPURIOUS)
+ return;
+
+ GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
+ gic_set_eoi(intid);
+}
+
+static void guest_setup_its_mappings(void)
+{
+ u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
+ u32 nr_events = test_data.nr_event_ids;
+ u32 nr_devices = test_data.nr_devices;
+ u32 nr_cpus = test_data.nr_cpus;
+
+ for (coll_id = 0; coll_id < nr_cpus; coll_id++)
+ its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);
+
+ /* Round-robin the LPIs to all of the vCPUs in the VM */
+ coll_id = 0;
+ for (device_id = 0; device_id < nr_devices; device_id++) {
+ vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
+
+ its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
+ itt_base, SZ_64K, true);
+
+ for (event_id = 0; event_id < nr_events; event_id++) {
+ its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
+ event_id, coll_id, intid++);
+
+ coll_id = (coll_id + 1) % test_data.nr_cpus;
+ }
+ }
+}
+
+static void guest_invalidate_all_rdists(void)
+{
+ int i;
+
+ for (i = 0; i < test_data.nr_cpus; i++)
+ its_send_invall_cmd(test_data.cmdq_base_va, i);
+}
+
+static void guest_setup_gic(void)
+{
+ static atomic_int nr_cpus_ready = 0;
+ u32 cpuid = guest_get_vcpuid();
+
+ gic_init(GIC_V3, test_data.nr_cpus);
+ gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
+ test_data.lpi_pend_tables + (cpuid * SZ_64K));
+
+ atomic_fetch_add(&nr_cpus_ready, 1);
+
+ if (cpuid > 0)
+ return;
+
+ while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
+ cpu_relax();
+
+ its_init(test_data.collection_table, SZ_64K,
+ test_data.device_table, SZ_64K,
+ test_data.cmdq_base, SZ_64K);
+
+ guest_setup_its_mappings();
+ guest_invalidate_all_rdists();
+}
+
+static void guest_code(size_t nr_lpis)
+{
+ guest_setup_gic();
+
+ GUEST_SYNC(0);
+
+ /*
+ * Don't use WFI here to avoid blocking the vCPU thread indefinitely and
+ * never getting the stop signal.
+ */
+ while (!READ_ONCE(test_data.request_vcpus_stop))
+ cpu_relax();
+
+ GUEST_DONE();
+}
+
+static void setup_memslot(void)
+{
+ size_t pages;
+ size_t sz;
+
+ /*
+ * For the ITS:
+ * - A single level device table
+ * - A single level collection table
+ * - The command queue
+ * - An ITT for each device
+ */
+ sz = (3 + test_data.nr_devices) * SZ_64K;
+
+ /*
+ * For the redistributors:
+ * - A shared LPI configuration table
+ * - An LPI pending table for each vCPU
+ */
+ sz += (1 + test_data.nr_cpus) * SZ_64K;
+
+ pages = sz / vm->page_size;
+ gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
+ TEST_MEMSLOT_INDEX, pages, 0);
+}
+
+#define LPI_PROP_DEFAULT_PRIO 0xa0
+
+static void configure_lpis(void)
+{
+ size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
+ u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
+ size_t i;
+
+ for (i = 0; i < nr_lpis; i++) {
+ tbl[i] = LPI_PROP_DEFAULT_PRIO |
+ LPI_PROP_GROUP1 |
+ LPI_PROP_ENABLED;
+ }
+}
+
+static void setup_test_data(void)
+{
+ size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
+ u32 nr_devices = test_data.nr_devices;
+ u32 nr_cpus = test_data.nr_cpus;
+ vm_paddr_t cmdq_base;
+
+ test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
+ gpa_base,
+ TEST_MEMSLOT_INDEX);
+
+ test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
+ gpa_base,
+ TEST_MEMSLOT_INDEX);
+
+ cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
+ TEST_MEMSLOT_INDEX);
+ virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
+ test_data.cmdq_base = cmdq_base;
+ test_data.cmdq_base_va = (void *)cmdq_base;
+
+ test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
+ gpa_base, TEST_MEMSLOT_INDEX);
+
+ test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
+ gpa_base, TEST_MEMSLOT_INDEX);
+ configure_lpis();
+
+ test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
+ gpa_base, TEST_MEMSLOT_INDEX);
+
+ sync_global_to_guest(vm, test_data);
+}
+
+static void setup_gic(void)
+{
+ gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
+ __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");
+
+ its_fd = vgic_its_setup(vm);
+}
+
+static void signal_lpi(u32 device_id, u32 event_id)
+{
+ vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
+
+ struct kvm_msi msi = {
+ .address_lo = db_addr,
+ .address_hi = db_addr >> 32,
+ .data = event_id,
+ .devid = device_id,
+ .flags = KVM_MSI_VALID_DEVID,
+ };
+
+ /*
+ * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
+ * which for arm64 implies having a valid translation in the ITS.
+ */
+ TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
+ "KVM_SIGNAL_MSI ioctl failed");
+}
+
+static pthread_barrier_t test_setup_barrier;
+
+static void *lpi_worker_thread(void *data)
+{
+ u32 device_id = (size_t)data;
+ u32 event_id;
+ size_t i;
+
+ pthread_barrier_wait(&test_setup_barrier);
+
+ for (i = 0; i < nr_iterations; i++)
+ for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
+ signal_lpi(device_id, event_id);
+
+ return NULL;
+}
+
+static void *vcpu_worker_thread(void *data)
+{
+ struct kvm_vcpu *vcpu = data;
+ struct ucall uc;
+
+ while (true) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ pthread_barrier_wait(&test_setup_barrier);
+ continue;
+ case UCALL_DONE:
+ return NULL;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unknown ucall: %lu", uc.cmd);
+ }
+ }
+
+ return NULL;
+}
+
+static void report_stats(struct timespec delta)
+{
+ double nr_lpis;
+ double time;
+
+ nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;
+
+ time = delta.tv_sec;
+ time += ((double)delta.tv_nsec) / NSEC_PER_SEC;
+
+ pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
+}
+
+static void run_test(void)
+{
+ u32 nr_devices = test_data.nr_devices;
+ u32 nr_vcpus = test_data.nr_cpus;
+ pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));
+ pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));
+ struct timespec start, delta;
+ size_t i;
+
+ TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");
+
+ pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);
+
+ for (i = 0; i < nr_devices; i++)
+ pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);
+
+ pthread_barrier_wait(&test_setup_barrier);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ for (i = 0; i < nr_devices; i++)
+ pthread_join(lpi_threads[i], NULL);
+
+ delta = timespec_elapsed(start);
+ write_guest_global(vm, test_data.request_vcpus_stop, true);
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(vcpu_threads[i], NULL);
+
+ report_stats(delta);
+}
+
+static void setup_vm(void)
+{
+ int i;
+
+ vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
+ TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
+
+ vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
+
+ vm_init_descriptor_tables(vm);
+ for (i = 0; i < test_data.nr_cpus; i++)
+ vcpu_init_descriptor_tables(vcpus[i]);
+
+ vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+ setup_memslot();
+
+ setup_gic();
+
+ setup_test_data();
+}
+
+static void destroy_vm(void)
+{
+ close(its_fd);
+ close(gic_fd);
+ kvm_vm_free(vm);
+ free(vcpus);
+}
+
+static void pr_usage(const char *name)
+{
+ pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
+ pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
+ pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
+ pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
+ pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
+}
+
+int main(int argc, char **argv)
+{
+ u32 nr_threads;
+ int c;
+
+ while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
+ switch (c) {
+ case 'v':
+ test_data.nr_cpus = atoi(optarg);
+ break;
+ case 'd':
+ test_data.nr_devices = atoi(optarg);
+ break;
+ case 'e':
+ test_data.nr_event_ids = atoi(optarg);
+ break;
+ case 'i':
+ nr_iterations = strtoul(optarg, NULL, 0);
+ break;
+ case 'h':
+ default:
+ pr_usage(argv[0]);
+ return 1;
+ }
+ }
+
+ nr_threads = test_data.nr_cpus + test_data.nr_devices;
+ if (nr_threads > get_nprocs())
+ pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
+ nr_threads, get_nprocs());
+
+ setup_vm();
+
+ run_test();
+
+ destroy_vm();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
index f2fb0e3f14..d31b9f64ba 100644
--- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
+++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
@@ -404,9 +404,6 @@ static void guest_code(uint64_t expected_pmcr_n)
GUEST_DONE();
}
-#define GICD_BASE_GPA 0x8000000ULL
-#define GICR_BASE_GPA 0x80A0000ULL
-
/* Create a VM that has one vCPU with PMUv3 configured. */
static void create_vpmu_vm(void *guest_code)
{
@@ -438,8 +435,7 @@ static void create_vpmu_vm(void *guest_code)
init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
vcpu_init_descriptor_tables(vpmu_vm.vcpu);
- vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64,
- GICD_BASE_GPA, GICR_BASE_GPA);
+ vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64);
__TEST_REQUIRE(vpmu_vm.gic_fd >= 0,
"Failed to create vgic-v3, skipping");
diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c
index ae1f1a6d83..acb2cb5963 100644
--- a/tools/testing/selftests/kvm/arch_timer.c
+++ b/tools/testing/selftests/kvm/arch_timer.c
@@ -19,9 +19,6 @@
*
* Copyright (c) 2021, Google LLC.
*/
-
-#define _GNU_SOURCE
-
#include <stdlib.h>
#include <pthread.h>
#include <linux/sizes.h>
@@ -29,6 +26,7 @@
#include <sys/sysinfo.h>
#include "timer_test.h"
+#include "ucall_common.h"
struct test_args test_args = {
.nr_vcpus = NR_VCPUS_DEF,
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index bf3609f718..0202b78f86 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -6,14 +6,10 @@
* Copyright (C) 2018, Red Hat, Inc.
* Copyright (C) 2019, Google, Inc.
*/
-
-#define _GNU_SOURCE /* for pipe2 */
-
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
-#include <poll.h>
#include <pthread.h>
#include <linux/userfaultfd.h>
#include <sys/syscall.h>
@@ -22,6 +18,7 @@
#include "test_util.h"
#include "memstress.h"
#include "guest_modes.h"
+#include "ucall_common.h"
#include "userfaultfd_util.h"
#ifdef __NR_userfaultfd
@@ -77,8 +74,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
copy.mode = 0;
r = ioctl(uffd, UFFDIO_COPY, &copy);
- if (r == -1) {
- pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n",
+ /*
+ * With multiple vCPU threads fault on a single page and there are
+ * multiple readers for the UFFD, at least one of the UFFDIO_COPYs
+ * will fail with EEXIST: handle that case without signaling an
+ * error.
+ *
+ * Note that this also suppress any EEXISTs occurring from,
+ * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
+ * happens here, but a realistic VMM might potentially maintain
+ * some external state to correctly surface EEXISTs to userspace
+ * (or prevent duplicate COPY/CONTINUEs in the first place).
+ */
+ if (r == -1 && errno != EEXIST) {
+ pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d, errno = %d\n",
addr, tid, errno);
return r;
}
@@ -89,8 +98,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
cont.range.len = demand_paging_size;
r = ioctl(uffd, UFFDIO_CONTINUE, &cont);
- if (r == -1) {
- pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n",
+ /*
+ * With multiple vCPU threads fault on a single page and there are
+ * multiple readers for the UFFD, at least one of the UFFDIO_COPYs
+ * will fail with EEXIST: handle that case without signaling an
+ * error.
+ *
+ * Note that this also suppress any EEXISTs occurring from,
+ * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
+ * happens here, but a realistic VMM might potentially maintain
+ * some external state to correctly surface EEXISTs to userspace
+ * (or prevent duplicate COPY/CONTINUEs in the first place).
+ */
+ if (r == -1 && errno != EEXIST) {
+ pr_info("Failed UFFDIO_CONTINUE in 0x%lx, thread %d, errno = %d\n",
addr, tid, errno);
return r;
}
@@ -110,7 +131,9 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
struct test_params {
int uffd_mode;
+ bool single_uffd;
useconds_t uffd_delay;
+ int readers_per_uffd;
enum vm_mem_backing_src_type src_type;
bool partition_vcpu_memory_access;
};
@@ -131,10 +154,12 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct memstress_vcpu_args *vcpu_args;
struct test_params *p = arg;
struct uffd_desc **uffd_descs = NULL;
+ uint64_t uffd_region_size;
struct timespec start;
struct timespec ts_diff;
+ double vcpu_paging_rate;
struct kvm_vm *vm;
- int i;
+ int i, num_uffds = 0;
vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
p->src_type, p->partition_vcpu_memory_access);
@@ -147,7 +172,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
memset(guest_data_prototype, 0xAB, demand_paging_size);
if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
- for (i = 0; i < nr_vcpus; i++) {
+ num_uffds = p->single_uffd ? 1 : nr_vcpus;
+ for (i = 0; i < num_uffds; i++) {
vcpu_args = &memstress_args.vcpu_args[i];
prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa),
vcpu_args->pages * memstress_args.guest_page_size);
@@ -155,9 +181,13 @@ static void run_test(enum vm_guest_mode mode, void *arg)
}
if (p->uffd_mode) {
- uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *));
+ num_uffds = p->single_uffd ? 1 : nr_vcpus;
+ uffd_region_size = nr_vcpus * guest_percpu_mem_size / num_uffds;
+
+ uffd_descs = malloc(num_uffds * sizeof(struct uffd_desc *));
TEST_ASSERT(uffd_descs, "Memory allocation failed");
- for (i = 0; i < nr_vcpus; i++) {
+ for (i = 0; i < num_uffds; i++) {
+ struct memstress_vcpu_args *vcpu_args;
void *vcpu_hva;
vcpu_args = &memstress_args.vcpu_args[i];
@@ -170,7 +200,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
*/
uffd_descs[i] = uffd_setup_demand_paging(
p->uffd_mode, p->uffd_delay, vcpu_hva,
- vcpu_args->pages * memstress_args.guest_page_size,
+ uffd_region_size,
+ p->readers_per_uffd,
&handle_uffd_page_request);
}
}
@@ -187,15 +218,19 @@ static void run_test(enum vm_guest_mode mode, void *arg)
if (p->uffd_mode) {
/* Tell the user fault fd handler threads to quit */
- for (i = 0; i < nr_vcpus; i++)
+ for (i = 0; i < num_uffds; i++)
uffd_stop_demand_paging(uffd_descs[i]);
}
- pr_info("Total guest execution time: %ld.%.9lds\n",
+ pr_info("Total guest execution time:\t%ld.%.9lds\n",
ts_diff.tv_sec, ts_diff.tv_nsec);
- pr_info("Overall demand paging rate: %f pgs/sec\n",
- memstress_args.vcpu_args[0].pages * nr_vcpus /
- ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC));
+
+ vcpu_paging_rate = memstress_args.vcpu_args[0].pages /
+ ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC);
+ pr_info("Per-vcpu demand paging rate:\t%f pgs/sec/vcpu\n",
+ vcpu_paging_rate);
+ pr_info("Overall demand paging rate:\t%f pgs/sec\n",
+ vcpu_paging_rate * nr_vcpus);
memstress_destroy_vm(vm);
@@ -207,15 +242,20 @@ static void run_test(enum vm_guest_mode mode, void *arg)
static void help(char *name)
{
puts("");
- printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
- " [-b memory] [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name);
+ printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-a]\n"
+ " [-d uffd_delay_usec] [-r readers_per_uffd] [-b memory]\n"
+ " [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name);
guest_modes_help();
printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
" UFFD registration mode: 'MISSING' or 'MINOR'.\n");
kvm_print_vcpu_pinning_help();
+ printf(" -a: Use a single userfaultfd for all of guest memory, instead of\n"
+ " creating one for each region paged by a unique vCPU\n"
+ " Set implicitly with -o, and no effect without -u.\n");
printf(" -d: add a delay in usec to the User Fault\n"
" FD handler to simulate demand paging\n"
" overheads. Ignored without -u.\n");
+ printf(" -r: Set the number of reader threads per uffd.\n");
printf(" -b: specify the size of the memory region which should be\n"
" demand paged by each vCPU. e.g. 10M or 3G.\n"
" Default: 1G\n");
@@ -234,12 +274,14 @@ int main(int argc, char *argv[])
struct test_params p = {
.src_type = DEFAULT_VM_MEM_SRC,
.partition_vcpu_memory_access = true,
+ .readers_per_uffd = 1,
+ .single_uffd = false,
};
int opt;
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:c:o")) != -1) {
+ while ((opt = getopt(argc, argv, "ahom:u:d:b:s:v:c:r:")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
@@ -251,6 +293,9 @@ int main(int argc, char *argv[])
p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR;
TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'.");
break;
+ case 'a':
+ p.single_uffd = true;
+ break;
case 'd':
p.uffd_delay = strtoul(optarg, NULL, 0);
TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported.");
@@ -271,6 +316,13 @@ int main(int argc, char *argv[])
break;
case 'o':
p.partition_vcpu_memory_access = false;
+ p.single_uffd = true;
+ break;
+ case 'r':
+ p.readers_per_uffd = atoi(optarg);
+ TEST_ASSERT(p.readers_per_uffd >= 1,
+ "Invalid number of readers per uffd %d: must be >=1",
+ p.readers_per_uffd);
break;
case 'h':
default:
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 504f6fe980..9f24303acb 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -18,13 +18,11 @@
#include "test_util.h"
#include "memstress.h"
#include "guest_modes.h"
+#include "ucall_common.h"
#ifdef __aarch64__
#include "aarch64/vgic.h"
-#define GICD_BASE_GPA 0x8000000ULL
-#define GICR_BASE_GPA 0x80A0000ULL
-
static int gic_fd;
static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
@@ -33,7 +31,7 @@ static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
* The test can still run even if hardware does not support GICv3, as it
* is only an optimization to reduce guest exits.
*/
- gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+ gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
}
static void arch_cleanup_vm(struct kvm_vm *vm)
@@ -132,7 +130,6 @@ struct test_params {
enum vm_mem_backing_src_type backing_src;
int slots;
uint32_t write_percent;
- uint32_t random_seed;
bool random_access;
};
@@ -156,8 +153,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
p->slots, p->backing_src,
p->partition_vcpu_memory_access);
- pr_info("Random seed: %u\n", p->random_seed);
- memstress_set_random_seed(vm, p->random_seed);
memstress_set_write_percent(vm, p->write_percent);
guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift;
@@ -346,11 +341,13 @@ int main(int argc, char *argv[])
.partition_vcpu_memory_access = true,
.backing_src = DEFAULT_VM_MEM_SRC,
.slots = 1,
- .random_seed = 1,
.write_percent = 100,
};
int opt;
+ /* Override the seed to be deterministic by default. */
+ guest_random_seed = 1;
+
dirty_log_manual_caps =
kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
@@ -395,7 +392,7 @@ int main(int argc, char *argv[])
p.phys_offset = strtoull(optarg, NULL, 0);
break;
case 'r':
- p.random_seed = atoi_positive("Random seed", optarg);
+ guest_random_seed = atoi_positive("Random seed", optarg);
break;
case 's':
p.backing_src = parse_backing_src_type(optarg);
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index eaad5b2085..aacf80f574 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -4,9 +4,6 @@
*
* Copyright (C) 2018, Red Hat, Inc.
*/
-
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
@@ -23,6 +20,7 @@
#include "test_util.h"
#include "guest_modes.h"
#include "processor.h"
+#include "ucall_common.h"
#define DIRTY_MEM_BITS 30 /* 1G */
#define PAGE_SHIFT_4K 12
@@ -76,7 +74,6 @@
static uint64_t host_page_size;
static uint64_t guest_page_size;
static uint64_t guest_num_pages;
-static uint64_t random_array[TEST_PAGES_PER_LOOP];
static uint64_t iteration;
/*
@@ -109,19 +106,19 @@ static void guest_code(void)
*/
for (i = 0; i < guest_num_pages; i++) {
addr = guest_test_virt_mem + i * guest_page_size;
- *(uint64_t *)addr = READ_ONCE(iteration);
+ vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
}
while (true) {
for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
addr = guest_test_virt_mem;
- addr += (READ_ONCE(random_array[i]) % guest_num_pages)
+ addr += (guest_random_u64(&guest_rng) % guest_num_pages)
* guest_page_size;
addr = align_down(addr, host_page_size);
- *(uint64_t *)addr = READ_ONCE(iteration);
+
+ vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
}
- /* Tell the host that we need more random numbers */
GUEST_SYNC(1);
}
}
@@ -508,20 +505,10 @@ static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
mode->after_vcpu_run(vcpu, ret, err);
}
-static void generate_random_array(uint64_t *guest_array, uint64_t size)
-{
- uint64_t i;
-
- for (i = 0; i < size; i++)
- guest_array[i] = random();
-}
-
static void *vcpu_worker(void *data)
{
int ret;
struct kvm_vcpu *vcpu = data;
- struct kvm_vm *vm = vcpu->vm;
- uint64_t *guest_array;
uint64_t pages_count = 0;
struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
+ sizeof(sigset_t));
@@ -540,11 +527,8 @@ static void *vcpu_worker(void *data)
sigemptyset(sigset);
sigaddset(sigset, SIG_IPI);
- guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
-
while (!READ_ONCE(host_quit)) {
/* Clear any existing kick signals */
- generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
pages_count += TEST_PAGES_PER_LOOP;
/* Let the guest dirty the random pages */
ret = __vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index 92eae206ba..ba0c8e9960 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -4,8 +4,6 @@
*
* Author: Chao Peng <chao.p.peng@linux.intel.com>
*/
-
-#define _GNU_SOURCE
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
@@ -19,8 +17,8 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include "kvm_util.h"
#include "test_util.h"
-#include "kvm_util_base.h"
static void test_file_read_write(int fd)
{
diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c
index 3502caa359..8092c2d0f5 100644
--- a/tools/testing/selftests/kvm/guest_print_test.c
+++ b/tools/testing/selftests/kvm/guest_print_test.c
@@ -13,6 +13,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "ucall_common.h"
struct guest_vals {
uint64_t a;
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
index decc521fc7..bce73bcb97 100644
--- a/tools/testing/selftests/kvm/hardware_disable_test.c
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -4,9 +4,6 @@
* kvm_arch_hardware_disable is called and it attempts to unregister the user
* return notifiers.
*/
-
-#define _GNU_SOURCE
-
#include <fcntl.h>
#include <pthread.h>
#include <semaphore.h>
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h
index b217ea17ca..baeb3c8593 100644
--- a/tools/testing/selftests/kvm/include/aarch64/gic.h
+++ b/tools/testing/selftests/kvm/include/aarch64/gic.h
@@ -6,11 +6,26 @@
#ifndef SELFTEST_KVM_GIC_H
#define SELFTEST_KVM_GIC_H
+#include <asm/kvm.h>
+
enum gic_type {
GIC_V3,
GIC_TYPE_MAX,
};
+/*
+ * Note that the redistributor frames are at the end, as the range scales
+ * with the number of vCPUs in the VM.
+ */
+#define GITS_BASE_GPA 0x8000000ULL
+#define GICD_BASE_GPA (GITS_BASE_GPA + KVM_VGIC_V3_ITS_SIZE)
+#define GICR_BASE_GPA (GICD_BASE_GPA + KVM_VGIC_V3_DIST_SIZE)
+
+/* The GIC is identity-mapped into the guest at the time of setup. */
+#define GITS_BASE_GVA ((volatile void *)GITS_BASE_GPA)
+#define GICD_BASE_GVA ((volatile void *)GICD_BASE_GPA)
+#define GICR_BASE_GVA ((volatile void *)GICR_BASE_GPA)
+
#define MIN_SGI 0
#define MIN_PPI 16
#define MIN_SPI 32
@@ -21,8 +36,7 @@ enum gic_type {
#define INTID_IS_PPI(intid) (MIN_PPI <= (intid) && (intid) < MIN_SPI)
#define INTID_IS_SPI(intid) (MIN_SPI <= (intid) && (intid) <= MAX_SPI)
-void gic_init(enum gic_type type, unsigned int nr_cpus,
- void *dist_base, void *redist_base);
+void gic_init(enum gic_type type, unsigned int nr_cpus);
void gic_irq_enable(unsigned int intid);
void gic_irq_disable(unsigned int intid);
unsigned int gic_get_and_ack_irq(void);
@@ -44,4 +58,7 @@ void gic_irq_clear_pending(unsigned int intid);
bool gic_irq_get_pending(unsigned int intid);
void gic_irq_set_config(unsigned int intid, bool is_edge);
+void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
+ vm_paddr_t pend_table);
+
#endif /* SELFTEST_KVM_GIC_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
index ba0886e8a2..a76615fa39 100644
--- a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
+++ b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
@@ -1,82 +1,604 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * ARM Generic Interrupt Controller (GIC) v3 specific defines
+ * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
*/
-
-#ifndef SELFTEST_KVM_GICV3_H
-#define SELFTEST_KVM_GICV3_H
-
-#include <asm/sysreg.h>
+#ifndef __SELFTESTS_GIC_V3_H
+#define __SELFTESTS_GIC_V3_H
/*
- * Distributor registers
+ * Distributor registers. We assume we're running non-secure, with ARE
+ * being set. Secure-only and non-ARE registers are not described.
*/
#define GICD_CTLR 0x0000
#define GICD_TYPER 0x0004
+#define GICD_IIDR 0x0008
+#define GICD_TYPER2 0x000C
+#define GICD_STATUSR 0x0010
+#define GICD_SETSPI_NSR 0x0040
+#define GICD_CLRSPI_NSR 0x0048
+#define GICD_SETSPI_SR 0x0050
+#define GICD_CLRSPI_SR 0x0058
#define GICD_IGROUPR 0x0080
#define GICD_ISENABLER 0x0100
#define GICD_ICENABLER 0x0180
#define GICD_ISPENDR 0x0200
#define GICD_ICPENDR 0x0280
-#define GICD_ICACTIVER 0x0380
#define GICD_ISACTIVER 0x0300
+#define GICD_ICACTIVER 0x0380
#define GICD_IPRIORITYR 0x0400
#define GICD_ICFGR 0x0C00
+#define GICD_IGRPMODR 0x0D00
+#define GICD_NSACR 0x0E00
+#define GICD_IGROUPRnE 0x1000
+#define GICD_ISENABLERnE 0x1200
+#define GICD_ICENABLERnE 0x1400
+#define GICD_ISPENDRnE 0x1600
+#define GICD_ICPENDRnE 0x1800
+#define GICD_ISACTIVERnE 0x1A00
+#define GICD_ICACTIVERnE 0x1C00
+#define GICD_IPRIORITYRnE 0x2000
+#define GICD_ICFGRnE 0x3000
+#define GICD_IROUTER 0x6000
+#define GICD_IROUTERnE 0x8000
+#define GICD_IDREGS 0xFFD0
+#define GICD_PIDR2 0xFFE8
+
+#define ESPI_BASE_INTID 4096
/*
- * The assumption is that the guest runs in a non-secure mode.
- * The following bits of GICD_CTLR are defined accordingly.
+ * Those registers are actually from GICv2, but the spec demands that they
+ * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3).
*/
+#define GICD_ITARGETSR 0x0800
+#define GICD_SGIR 0x0F00
+#define GICD_CPENDSGIR 0x0F10
+#define GICD_SPENDSGIR 0x0F20
+
#define GICD_CTLR_RWP (1U << 31)
#define GICD_CTLR_nASSGIreq (1U << 8)
+#define GICD_CTLR_DS (1U << 6)
#define GICD_CTLR_ARE_NS (1U << 4)
#define GICD_CTLR_ENABLE_G1A (1U << 1)
#define GICD_CTLR_ENABLE_G1 (1U << 0)
+#define GICD_IIDR_IMPLEMENTER_SHIFT 0
+#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT 12
+#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT 16
+#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT 24
+#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
+/*
+ * In systems with a single security state (what we emulate in KVM)
+ * the meaning of the interrupt group enable bits is slightly different
+ */
+#define GICD_CTLR_ENABLE_SS_G1 (1U << 1)
+#define GICD_CTLR_ENABLE_SS_G0 (1U << 0)
+
+#define GICD_TYPER_RSS (1U << 26)
+#define GICD_TYPER_LPIS (1U << 17)
+#define GICD_TYPER_MBIS (1U << 16)
+#define GICD_TYPER_ESPI (1U << 8)
+
+#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1)
+#define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1)
#define GICD_TYPER_SPIS(typer) ((((typer) & 0x1f) + 1) * 32)
-#define GICD_INT_DEF_PRI_X4 0xa0a0a0a0
+#define GICD_TYPER_ESPIS(typer) \
+ (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0)
+
+#define GICD_TYPER2_nASSGIcap (1U << 8)
+#define GICD_TYPER2_VIL (1U << 7)
+#define GICD_TYPER2_VID GENMASK(4, 0)
+
+#define GICD_IROUTER_SPI_MODE_ONE (0U << 31)
+#define GICD_IROUTER_SPI_MODE_ANY (1U << 31)
+
+#define GIC_PIDR2_ARCH_MASK 0xf0
+#define GIC_PIDR2_ARCH_GICv3 0x30
+#define GIC_PIDR2_ARCH_GICv4 0x40
+
+#define GIC_V3_DIST_SIZE 0x10000
+
+#define GIC_PAGE_SIZE_4K 0ULL
+#define GIC_PAGE_SIZE_16K 1ULL
+#define GIC_PAGE_SIZE_64K 2ULL
+#define GIC_PAGE_SIZE_MASK 3ULL
/*
- * Redistributor registers
+ * Re-Distributor registers, offsets from RD_base
*/
-#define GICR_CTLR 0x000
-#define GICR_WAKER 0x014
+#define GICR_CTLR GICD_CTLR
+#define GICR_IIDR 0x0004
+#define GICR_TYPER 0x0008
+#define GICR_STATUSR GICD_STATUSR
+#define GICR_WAKER 0x0014
+#define GICR_SETLPIR 0x0040
+#define GICR_CLRLPIR 0x0048
+#define GICR_PROPBASER 0x0070
+#define GICR_PENDBASER 0x0078
+#define GICR_INVLPIR 0x00A0
+#define GICR_INVALLR 0x00B0
+#define GICR_SYNCR 0x00C0
+#define GICR_IDREGS GICD_IDREGS
+#define GICR_PIDR2 GICD_PIDR2
+
+#define GICR_CTLR_ENABLE_LPIS (1UL << 0)
+#define GICR_CTLR_CES (1UL << 1)
+#define GICR_CTLR_IR (1UL << 2)
+#define GICR_CTLR_RWP (1UL << 3)
-#define GICR_CTLR_RWP (1U << 3)
+#define GICR_TYPER_CPU_NUMBER(r) (((r) >> 8) & 0xffff)
+
+#define EPPI_BASE_INTID 1056
+
+#define GICR_TYPER_NR_PPIS(r) \
+ ({ \
+ unsigned int __ppinum = ((r) >> 27) & 0x1f; \
+ unsigned int __nr_ppis = 16; \
+ if (__ppinum == 1 || __ppinum == 2) \
+ __nr_ppis += __ppinum * 32; \
+ \
+ __nr_ppis; \
+ })
#define GICR_WAKER_ProcessorSleep (1U << 1)
#define GICR_WAKER_ChildrenAsleep (1U << 2)
+#define GIC_BASER_CACHE_nCnB 0ULL
+#define GIC_BASER_CACHE_SameAsInner 0ULL
+#define GIC_BASER_CACHE_nC 1ULL
+#define GIC_BASER_CACHE_RaWt 2ULL
+#define GIC_BASER_CACHE_RaWb 3ULL
+#define GIC_BASER_CACHE_WaWt 4ULL
+#define GIC_BASER_CACHE_WaWb 5ULL
+#define GIC_BASER_CACHE_RaWaWt 6ULL
+#define GIC_BASER_CACHE_RaWaWb 7ULL
+#define GIC_BASER_CACHE_MASK 7ULL
+#define GIC_BASER_NonShareable 0ULL
+#define GIC_BASER_InnerShareable 1ULL
+#define GIC_BASER_OuterShareable 2ULL
+#define GIC_BASER_SHAREABILITY_MASK 3ULL
+
+#define GIC_BASER_CACHEABILITY(reg, inner_outer, type) \
+ (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT)
+
+#define GIC_BASER_SHAREABILITY(reg, type) \
+ (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT)
+
+/* encode a size field of width @w containing @n - 1 units */
+#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0))
+
+#define GICR_PROPBASER_SHAREABILITY_SHIFT (10)
+#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56)
+#define GICR_PROPBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK)
+#define GICR_PROPBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK)
+#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK)
+#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_PROPBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable)
+
+#define GICR_PROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB)
+#define GICR_PROPBASER_nC GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC)
+#define GICR_PROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt)
+#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb)
+#define GICR_PROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt)
+#define GICR_PROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb)
+#define GICR_PROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt)
+#define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb)
+
+#define GICR_PROPBASER_IDBITS_MASK (0x1f)
+#define GICR_PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 12))
+#define GICR_PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 16))
+
+#define GICR_PENDBASER_SHAREABILITY_SHIFT (10)
+#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT (56)
+#define GICR_PENDBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK)
+#define GICR_PENDBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK)
+#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK)
+#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_PENDBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)
+
+#define GICR_PENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB)
+#define GICR_PENDBASER_nC GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC)
+#define GICR_PENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt)
+#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb)
+#define GICR_PENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt)
+#define GICR_PENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb)
+#define GICR_PENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt)
+#define GICR_PENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb)
+
+#define GICR_PENDBASER_PTZ BIT_ULL(62)
+
/*
- * Redistributor registers, offsets from SGI base
+ * Re-Distributor registers, offsets from SGI_base
*/
#define GICR_IGROUPR0 GICD_IGROUPR
#define GICR_ISENABLER0 GICD_ISENABLER
#define GICR_ICENABLER0 GICD_ICENABLER
#define GICR_ISPENDR0 GICD_ISPENDR
+#define GICR_ICPENDR0 GICD_ICPENDR
#define GICR_ISACTIVER0 GICD_ISACTIVER
#define GICR_ICACTIVER0 GICD_ICACTIVER
-#define GICR_ICENABLER GICD_ICENABLER
-#define GICR_ICACTIVER GICD_ICACTIVER
#define GICR_IPRIORITYR0 GICD_IPRIORITYR
+#define GICR_ICFGR0 GICD_ICFGR
+#define GICR_IGRPMODR0 GICD_IGRPMODR
+#define GICR_NSACR GICD_NSACR
+
+#define GICR_TYPER_PLPIS (1U << 0)
+#define GICR_TYPER_VLPIS (1U << 1)
+#define GICR_TYPER_DIRTY (1U << 2)
+#define GICR_TYPER_DirectLPIS (1U << 3)
+#define GICR_TYPER_LAST (1U << 4)
+#define GICR_TYPER_RVPEID (1U << 7)
+#define GICR_TYPER_COMMON_LPI_AFF GENMASK_ULL(25, 24)
+#define GICR_TYPER_AFFINITY GENMASK_ULL(63, 32)
+
+#define GICR_INVLPIR_INTID GENMASK_ULL(31, 0)
+#define GICR_INVLPIR_VPEID GENMASK_ULL(47, 32)
+#define GICR_INVLPIR_V GENMASK_ULL(63, 63)
+
+#define GICR_INVALLR_VPEID GICR_INVLPIR_VPEID
+#define GICR_INVALLR_V GICR_INVLPIR_V
+
+#define GIC_V3_REDIST_SIZE 0x20000
+
+#define LPI_PROP_GROUP1 (1 << 1)
+#define LPI_PROP_ENABLED (1 << 0)
+
+/*
+ * Re-Distributor registers, offsets from VLPI_base
+ */
+#define GICR_VPROPBASER 0x0070
+
+#define GICR_VPROPBASER_IDBITS_MASK 0x1f
+
+#define GICR_VPROPBASER_SHAREABILITY_SHIFT (10)
+#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT (56)
+
+#define GICR_VPROPBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK)
+#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK)
+#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK)
+#define GICR_VPROPBASER_CACHEABILITY_MASK \
+ GICR_VPROPBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_VPROPBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable)
+
+#define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB)
+#define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC)
+#define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt)
+#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb)
+#define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt)
+#define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb)
+#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt)
+#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb)
+
+/*
+ * GICv4.1 VPROPBASER reinvention. A subtle mix between the old
+ * VPROPBASER and ITS_BASER. Just not quite any of the two.
+ */
+#define GICR_VPROPBASER_4_1_VALID (1ULL << 63)
+#define GICR_VPROPBASER_4_1_ENTRY_SIZE GENMASK_ULL(61, 59)
+#define GICR_VPROPBASER_4_1_INDIRECT (1ULL << 55)
+#define GICR_VPROPBASER_4_1_PAGE_SIZE GENMASK_ULL(54, 53)
+#define GICR_VPROPBASER_4_1_Z (1ULL << 52)
+#define GICR_VPROPBASER_4_1_ADDR GENMASK_ULL(51, 12)
+#define GICR_VPROPBASER_4_1_SIZE GENMASK_ULL(6, 0)
+
+#define GICR_VPENDBASER 0x0078
+
+#define GICR_VPENDBASER_SHAREABILITY_SHIFT (10)
+#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT (56)
+#define GICR_VPENDBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK)
+#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK)
+#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK)
+#define GICR_VPENDBASER_CACHEABILITY_MASK \
+ GICR_VPENDBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_VPENDBASER_NonShareable \
+ GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable)
+
+#define GICR_VPENDBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable)
+
+#define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB)
+#define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC)
+#define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt)
+#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb)
+#define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt)
+#define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb)
+#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt)
+#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb)
+
+#define GICR_VPENDBASER_Dirty (1ULL << 60)
+#define GICR_VPENDBASER_PendingLast (1ULL << 61)
+#define GICR_VPENDBASER_IDAI (1ULL << 62)
+#define GICR_VPENDBASER_Valid (1ULL << 63)
+
+/*
+ * GICv4.1 VPENDBASER, used for VPE residency. On top of these fields,
+ * also use the above Valid, PendingLast and Dirty.
+ */
+#define GICR_VPENDBASER_4_1_DB (1ULL << 62)
+#define GICR_VPENDBASER_4_1_VGRP0EN (1ULL << 59)
+#define GICR_VPENDBASER_4_1_VGRP1EN (1ULL << 58)
+#define GICR_VPENDBASER_4_1_VPEID GENMASK_ULL(15, 0)
+
+#define GICR_VSGIR 0x0080
+
+#define GICR_VSGIR_VPEID GENMASK(15, 0)
+
+#define GICR_VSGIPENDR 0x0088
+
+#define GICR_VSGIPENDR_BUSY (1U << 31)
+#define GICR_VSGIPENDR_PENDING GENMASK(15, 0)
+
+/*
+ * ITS registers, offsets from ITS_base
+ */
+#define GITS_CTLR 0x0000
+#define GITS_IIDR 0x0004
+#define GITS_TYPER 0x0008
+#define GITS_MPIDR 0x0018
+#define GITS_CBASER 0x0080
+#define GITS_CWRITER 0x0088
+#define GITS_CREADR 0x0090
+#define GITS_BASER 0x0100
+#define GITS_IDREGS_BASE 0xffd0
+#define GITS_PIDR0 0xffe0
+#define GITS_PIDR1 0xffe4
+#define GITS_PIDR2 GICR_PIDR2
+#define GITS_PIDR4 0xffd0
+#define GITS_CIDR0 0xfff0
+#define GITS_CIDR1 0xfff4
+#define GITS_CIDR2 0xfff8
+#define GITS_CIDR3 0xfffc
+
+#define GITS_TRANSLATER 0x10040
+
+#define GITS_SGIR 0x20020
+
+#define GITS_SGIR_VPEID GENMASK_ULL(47, 32)
+#define GITS_SGIR_VINTID GENMASK_ULL(3, 0)
+
+#define GITS_CTLR_ENABLE (1U << 0)
+#define GITS_CTLR_ImDe (1U << 1)
+#define GITS_CTLR_ITS_NUMBER_SHIFT 4
+#define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT)
+#define GITS_CTLR_QUIESCENT (1U << 31)
+
+#define GITS_TYPER_PLPIS (1UL << 0)
+#define GITS_TYPER_VLPIS (1UL << 1)
+#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4
+#define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4)
+#define GITS_TYPER_IDBITS_SHIFT 8
+#define GITS_TYPER_DEVBITS_SHIFT 13
+#define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13)
+#define GITS_TYPER_PTA (1UL << 19)
+#define GITS_TYPER_HCC_SHIFT 24
+#define GITS_TYPER_HCC(r) (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff)
+#define GITS_TYPER_VMOVP (1ULL << 37)
+#define GITS_TYPER_VMAPP (1ULL << 40)
+#define GITS_TYPER_SVPET GENMASK_ULL(42, 41)
-/* CPU interface registers */
-#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
-#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
-#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
-#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1)
-#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4)
-#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5)
-#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
+#define GITS_IIDR_REV_SHIFT 12
+#define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT)
+#define GITS_IIDR_REV(r) (((r) >> GITS_IIDR_REV_SHIFT) & 0xf)
+#define GITS_IIDR_PRODUCTID_SHIFT 24
-#define SYS_ICV_AP1R0_EL1 sys_reg(3, 0, 12, 9, 0)
+#define GITS_CBASER_VALID (1ULL << 63)
+#define GITS_CBASER_SHAREABILITY_SHIFT (10)
+#define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59)
+#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT (53)
+#define GITS_CBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK)
+#define GITS_CBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK)
+#define GITS_CBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK)
+#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK
-#define ICC_PMR_DEF_PRIO 0xf0
+#define GITS_CBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable)
+#define GITS_CBASER_nCnB GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB)
+#define GITS_CBASER_nC GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC)
+#define GITS_CBASER_RaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt)
+#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb)
+#define GITS_CBASER_WaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt)
+#define GITS_CBASER_WaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb)
+#define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt)
+#define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb)
+
+#define GITS_CBASER_ADDRESS(cbaser) ((cbaser) & GENMASK_ULL(51, 12))
+
+#define GITS_BASER_NR_REGS 8
+
+#define GITS_BASER_VALID (1ULL << 63)
+#define GITS_BASER_INDIRECT (1ULL << 62)
+
+#define GITS_BASER_INNER_CACHEABILITY_SHIFT (59)
+#define GITS_BASER_OUTER_CACHEABILITY_SHIFT (53)
+#define GITS_BASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK)
+#define GITS_BASER_CACHEABILITY_MASK GITS_BASER_INNER_CACHEABILITY_MASK
+#define GITS_BASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK)
+#define GITS_BASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK)
+
+#define GITS_BASER_nCnB GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB)
+#define GITS_BASER_nC GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC)
+#define GITS_BASER_RaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt)
+#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)
+#define GITS_BASER_WaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt)
+#define GITS_BASER_WaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb)
+#define GITS_BASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt)
+#define GITS_BASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb)
+
+#define GITS_BASER_TYPE_SHIFT (56)
+#define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7)
+#define GITS_BASER_ENTRY_SIZE_SHIFT (48)
+#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1)
+#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48)
+#define GITS_BASER_PHYS_52_to_48(phys) \
+ (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12)
+#define GITS_BASER_ADDR_48_to_52(baser) \
+ (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48)
+
+#define GITS_BASER_SHAREABILITY_SHIFT (10)
+#define GITS_BASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
+#define GITS_BASER_PAGE_SIZE_SHIFT (8)
+#define __GITS_BASER_PSZ(sz) (GIC_PAGE_SIZE_ ## sz << GITS_BASER_PAGE_SIZE_SHIFT)
+#define GITS_BASER_PAGE_SIZE_4K __GITS_BASER_PSZ(4K)
+#define GITS_BASER_PAGE_SIZE_16K __GITS_BASER_PSZ(16K)
+#define GITS_BASER_PAGE_SIZE_64K __GITS_BASER_PSZ(64K)
+#define GITS_BASER_PAGE_SIZE_MASK __GITS_BASER_PSZ(MASK)
+#define GITS_BASER_PAGES_MAX 256
+#define GITS_BASER_PAGES_SHIFT (0)
+#define GITS_BASER_NR_PAGES(r) (((r) & 0xff) + 1)
+
+#define GITS_BASER_TYPE_NONE 0
+#define GITS_BASER_TYPE_DEVICE 1
+#define GITS_BASER_TYPE_VCPU 2
+#define GITS_BASER_TYPE_RESERVED3 3
+#define GITS_BASER_TYPE_COLLECTION 4
+#define GITS_BASER_TYPE_RESERVED5 5
+#define GITS_BASER_TYPE_RESERVED6 6
+#define GITS_BASER_TYPE_RESERVED7 7
+
+#define GITS_LVL1_ENTRY_SIZE (8UL)
+
+/*
+ * ITS commands
+ */
+#define GITS_CMD_MAPD 0x08
+#define GITS_CMD_MAPC 0x09
+#define GITS_CMD_MAPTI 0x0a
+#define GITS_CMD_MAPI 0x0b
+#define GITS_CMD_MOVI 0x01
+#define GITS_CMD_DISCARD 0x0f
+#define GITS_CMD_INV 0x0c
+#define GITS_CMD_MOVALL 0x0e
+#define GITS_CMD_INVALL 0x0d
+#define GITS_CMD_INT 0x03
+#define GITS_CMD_CLEAR 0x04
+#define GITS_CMD_SYNC 0x05
+
+/*
+ * GICv4 ITS specific commands
+ */
+#define GITS_CMD_GICv4(x) ((x) | 0x20)
+#define GITS_CMD_VINVALL GITS_CMD_GICv4(GITS_CMD_INVALL)
+#define GITS_CMD_VMAPP GITS_CMD_GICv4(GITS_CMD_MAPC)
+#define GITS_CMD_VMAPTI GITS_CMD_GICv4(GITS_CMD_MAPTI)
+#define GITS_CMD_VMOVI GITS_CMD_GICv4(GITS_CMD_MOVI)
+#define GITS_CMD_VSYNC GITS_CMD_GICv4(GITS_CMD_SYNC)
+/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */
+#define GITS_CMD_VMOVP GITS_CMD_GICv4(2)
+#define GITS_CMD_VSGI GITS_CMD_GICv4(3)
+#define GITS_CMD_INVDB GITS_CMD_GICv4(0xe)
+
+/*
+ * ITS error numbers
+ */
+#define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107
+#define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109
+#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307
+#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507
+#define E_ITS_MAPD_DEVICE_OOR 0x010801
+#define E_ITS_MAPD_ITTSIZE_OOR 0x010802
+#define E_ITS_MAPC_PROCNUM_OOR 0x010902
+#define E_ITS_MAPC_COLLECTION_OOR 0x010903
+#define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04
+#define E_ITS_MAPTI_ID_OOR 0x010a05
+#define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06
+#define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07
+#define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09
+#define E_ITS_MOVALL_PROCNUM_OOR 0x010e01
+#define E_ITS_DISCARD_UNMAPPED_INTERRUPT 0x010f07
+
+/*
+ * CPU interface registers
+ */
+#define ICC_CTLR_EL1_EOImode_SHIFT (1)
+#define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_CBPR_SHIFT 0
+#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT)
+#define ICC_CTLR_EL1_PMHE_SHIFT 6
+#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT)
+#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8
+#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT)
+#define ICC_CTLR_EL1_ID_BITS_SHIFT 11
+#define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT)
+#define ICC_CTLR_EL1_SEIS_SHIFT 14
+#define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT)
+#define ICC_CTLR_EL1_A3V_SHIFT 15
+#define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT)
+#define ICC_CTLR_EL1_RSS (0x1 << 18)
+#define ICC_CTLR_EL1_ExtRange (0x1 << 19)
+#define ICC_PMR_EL1_SHIFT 0
+#define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT)
+#define ICC_BPR0_EL1_SHIFT 0
+#define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT)
+#define ICC_BPR1_EL1_SHIFT 0
+#define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT)
+#define ICC_IGRPEN0_EL1_SHIFT 0
+#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT)
+#define ICC_IGRPEN1_EL1_SHIFT 0
+#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT)
+#define ICC_SRE_EL1_DIB (1U << 2)
+#define ICC_SRE_EL1_DFB (1U << 1)
#define ICC_SRE_EL1_SRE (1U << 0)
-#define ICC_IGRPEN1_EL1_ENABLE (1U << 0)
+/* These are for GICv2 emulation only */
+#define GICH_LR_VIRTUALID (0x3ffUL << 0)
+#define GICH_LR_PHYSID_CPUID_SHIFT (10)
+#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
+
+#define ICC_IAR1_EL1_SPURIOUS 0x3ff
+
+#define ICC_SRE_EL2_SRE (1 << 0)
+#define ICC_SRE_EL2_ENABLE (1 << 3)
-#define GICV3_MAX_CPUS 512
+#define ICC_SGI1R_TARGET_LIST_SHIFT 0
+#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT)
+#define ICC_SGI1R_AFFINITY_1_SHIFT 16
+#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT)
+#define ICC_SGI1R_SGI_ID_SHIFT 24
+#define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT)
+#define ICC_SGI1R_AFFINITY_2_SHIFT 32
+#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT)
+#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40
+#define ICC_SGI1R_RS_SHIFT 44
+#define ICC_SGI1R_RS_MASK (0xfULL << ICC_SGI1R_RS_SHIFT)
+#define ICC_SGI1R_AFFINITY_3_SHIFT 48
+#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT)
-#endif /* SELFTEST_KVM_GICV3_H */
+#endif
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h
new file mode 100644
index 0000000000..3722ed9c8f
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SELFTESTS_GIC_V3_ITS_H__
+#define __SELFTESTS_GIC_V3_ITS_H__
+
+#include <linux/sizes.h>
+
+void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
+ vm_paddr_t device_tbl, size_t device_tbl_sz,
+ vm_paddr_t cmdq, size_t cmdq_size);
+
+void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
+ size_t itt_size, bool valid);
+void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid);
+void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
+ u32 collection_id, u32 intid);
+void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
+
+#endif // __SELFTESTS_GIC_V3_ITS_H__
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index 9e518b5628..9b20a355d8 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -8,6 +8,8 @@
#define SELFTEST_KVM_PROCESSOR_H
#include "kvm_util.h"
+#include "ucall_common.h"
+
#include <linux/stringify.h>
#include <linux/types.h>
#include <asm/sysreg.h>
@@ -58,8 +60,6 @@
MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
-#define MPIDR_HWID_BITMASK (0xff00fffffful)
-
void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
struct kvm_vcpu_init *init, void *guest_code);
@@ -177,11 +177,28 @@ static __always_inline u32 __raw_readl(const volatile void *addr)
return val;
}
+static __always_inline void __raw_writeq(u64 val, volatile void *addr)
+{
+ asm volatile("str %0, [%1]" : : "rZ" (val), "r" (addr));
+}
+
+static __always_inline u64 __raw_readq(const volatile void *addr)
+{
+ u64 val;
+ asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr));
+ return val;
+}
+
#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
+#define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
+#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })
#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c));})
#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
+#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c));})
+#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; })
+
static inline void local_irq_enable(void)
{
diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/aarch64/ucall.h
index 4b68f37efd..4ec801f37f 100644
--- a/tools/testing/selftests/kvm/include/aarch64/ucall.h
+++ b/tools/testing/selftests/kvm/include/aarch64/ucall.h
@@ -2,7 +2,7 @@
#ifndef SELFTEST_KVM_UCALL_H
#define SELFTEST_KVM_UCALL_H
-#include "kvm_util_base.h"
+#include "kvm_util.h"
#define UCALL_EXIT_REASON KVM_EXIT_MMIO
diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h
index 0ac6f05c63..c481d0c00a 100644
--- a/tools/testing/selftests/kvm/include/aarch64/vgic.h
+++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h
@@ -16,8 +16,7 @@
((uint64_t)(flags) << 12) | \
index)
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
- uint64_t gicd_base_gpa, uint64_t gicr_base_gpa);
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
#define VGIC_MAX_RESERVED 1023
@@ -33,4 +32,6 @@ void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
+int vgic_its_setup(struct kvm_vm *vm);
+
#endif // SELFTEST_KVM_VGIC_H
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index c9286811a4..63c2aaae51 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -1,13 +1,1116 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * tools/testing/selftests/kvm/include/kvm_util.h
- *
* Copyright (C) 2018, Google LLC.
*/
#ifndef SELFTEST_KVM_UTIL_H
#define SELFTEST_KVM_UTIL_H
-#include "kvm_util_base.h"
-#include "ucall_common.h"
+#include "test_util.h"
+
+#include <linux/compiler.h>
+#include "linux/hashtable.h"
+#include "linux/list.h"
+#include <linux/kernel.h>
+#include <linux/kvm.h>
+#include "linux/rbtree.h"
+#include <linux/types.h>
+
+#include <asm/atomic.h>
+#include <asm/kvm.h>
+
+#include <sys/ioctl.h>
+
+#include "kvm_util_arch.h"
+#include "kvm_util_types.h"
+#include "sparsebit.h"
+
+#define KVM_DEV_PATH "/dev/kvm"
+#define KVM_MAX_VCPUS 512
+
+#define NSEC_PER_SEC 1000000000L
+
+struct userspace_mem_region {
+ struct kvm_userspace_memory_region2 region;
+ struct sparsebit *unused_phy_pages;
+ struct sparsebit *protected_phy_pages;
+ int fd;
+ off_t offset;
+ enum vm_mem_backing_src_type backing_src_type;
+ void *host_mem;
+ void *host_alias;
+ void *mmap_start;
+ void *mmap_alias;
+ size_t mmap_size;
+ struct rb_node gpa_node;
+ struct rb_node hva_node;
+ struct hlist_node slot_node;
+};
+
+struct kvm_vcpu {
+ struct list_head list;
+ uint32_t id;
+ int fd;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+#ifdef __x86_64__
+ struct kvm_cpuid2 *cpuid;
+#endif
+ struct kvm_dirty_gfn *dirty_gfns;
+ uint32_t fetch_index;
+ uint32_t dirty_gfns_count;
+};
+
+struct userspace_mem_regions {
+ struct rb_root gpa_tree;
+ struct rb_root hva_tree;
+ DECLARE_HASHTABLE(slot_hash, 9);
+};
+
+enum kvm_mem_region_type {
+ MEM_REGION_CODE,
+ MEM_REGION_DATA,
+ MEM_REGION_PT,
+ MEM_REGION_TEST_DATA,
+ NR_MEM_REGIONS,
+};
+
+struct kvm_vm {
+ int mode;
+ unsigned long type;
+ int kvm_fd;
+ int fd;
+ unsigned int pgtable_levels;
+ unsigned int page_size;
+ unsigned int page_shift;
+ unsigned int pa_bits;
+ unsigned int va_bits;
+ uint64_t max_gfn;
+ struct list_head vcpus;
+ struct userspace_mem_regions regions;
+ struct sparsebit *vpages_valid;
+ struct sparsebit *vpages_mapped;
+ bool has_irqchip;
+ bool pgd_created;
+ vm_paddr_t ucall_mmio_addr;
+ vm_paddr_t pgd;
+ vm_vaddr_t handlers;
+ uint32_t dirty_ring_size;
+ uint64_t gpa_tag_mask;
+
+ struct kvm_vm_arch arch;
+
+ /* Cache of information for binary stats interface */
+ int stats_fd;
+ struct kvm_stats_header stats_header;
+ struct kvm_stats_desc *stats_desc;
+
+ /*
+ * KVM region slots. These are the default memslots used by page
+ * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE]
+ * memslot.
+ */
+ uint32_t memslots[NR_MEM_REGIONS];
+};
+
+struct vcpu_reg_sublist {
+ const char *name;
+ long capability;
+ int feature;
+ int feature_type;
+ bool finalize;
+ __u64 *regs;
+ __u64 regs_n;
+ __u64 *rejects_set;
+ __u64 rejects_set_n;
+ __u64 *skips_set;
+ __u64 skips_set_n;
+};
+
+struct vcpu_reg_list {
+ char *name;
+ struct vcpu_reg_sublist sublists[];
+};
+
+#define for_each_sublist(c, s) \
+ for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
+
+#define kvm_for_each_vcpu(vm, i, vcpu) \
+ for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \
+ if (!((vcpu) = vm->vcpus[i])) \
+ continue; \
+ else
+
+struct userspace_mem_region *
+memslot2region(struct kvm_vm *vm, uint32_t memslot);
+
+static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm,
+ enum kvm_mem_region_type type)
+{
+ assert(type < NR_MEM_REGIONS);
+ return memslot2region(vm, vm->memslots[type]);
+}
+
+/* Minimum allocated guest virtual and physical addresses */
+#define KVM_UTIL_MIN_VADDR 0x2000
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
+#define DEFAULT_STACK_PGS 5
+
+enum vm_guest_mode {
+ VM_MODE_P52V48_4K,
+ VM_MODE_P52V48_16K,
+ VM_MODE_P52V48_64K,
+ VM_MODE_P48V48_4K,
+ VM_MODE_P48V48_16K,
+ VM_MODE_P48V48_64K,
+ VM_MODE_P40V48_4K,
+ VM_MODE_P40V48_16K,
+ VM_MODE_P40V48_64K,
+ VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
+ VM_MODE_P47V64_4K,
+ VM_MODE_P44V64_4K,
+ VM_MODE_P36V48_4K,
+ VM_MODE_P36V48_16K,
+ VM_MODE_P36V48_64K,
+ VM_MODE_P36V47_16K,
+ NUM_VM_MODES,
+};
+
+struct vm_shape {
+ uint32_t type;
+ uint8_t mode;
+ uint8_t pad0;
+ uint16_t pad1;
+};
+
+kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t));
+
+#define VM_TYPE_DEFAULT 0
+
+#define VM_SHAPE(__mode) \
+({ \
+ struct vm_shape shape = { \
+ .mode = (__mode), \
+ .type = VM_TYPE_DEFAULT \
+ }; \
+ \
+ shape; \
+})
+
+#if defined(__aarch64__)
+
+extern enum vm_guest_mode vm_mode_default;
+
+#define VM_MODE_DEFAULT vm_mode_default
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#elif defined(__x86_64__)
+
+#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#elif defined(__s390x__)
+
+#define VM_MODE_DEFAULT VM_MODE_P44V64_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 16)
+
+#elif defined(__riscv)
+
+#if __riscv_xlen == 32
+#error "RISC-V 32-bit kvm selftests not supported"
+#endif
+
+#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#endif
+
+#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT)
+
+#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT)
+#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)
+
+struct vm_guest_mode_params {
+ unsigned int pa_bits;
+ unsigned int va_bits;
+ unsigned int page_size;
+ unsigned int page_shift;
+};
+extern const struct vm_guest_mode_params vm_guest_mode_params[];
+
+int open_path_or_exit(const char *path, int flags);
+int open_kvm_dev_path_or_exit(void);
+
+bool get_kvm_param_bool(const char *param);
+bool get_kvm_intel_param_bool(const char *param);
+bool get_kvm_amd_param_bool(const char *param);
+
+int get_kvm_param_integer(const char *param);
+int get_kvm_intel_param_integer(const char *param);
+int get_kvm_amd_param_integer(const char *param);
+
+unsigned int kvm_check_cap(long cap);
+
+static inline bool kvm_has_cap(long cap)
+{
+ return kvm_check_cap(cap);
+}
+
+#define __KVM_SYSCALL_ERROR(_name, _ret) \
+ "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
+
+/*
+ * Use the "inner", double-underscore macro when reporting errors from within
+ * other macros so that the name of ioctl() and not its literal numeric value
+ * is printed on error. The "outer" macro is strongly preferred when reporting
+ * errors "directly", i.e. without an additional layer of macros, as it reduces
+ * the probability of passing in the wrong string.
+ */
+#define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret)
+#define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret)
+
+#define kvm_do_ioctl(fd, cmd, arg) \
+({ \
+ kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \
+ ioctl(fd, cmd, arg); \
+})
+
+#define __kvm_ioctl(kvm_fd, cmd, arg) \
+ kvm_do_ioctl(kvm_fd, cmd, arg)
+
+#define kvm_ioctl(kvm_fd, cmd, arg) \
+({ \
+ int ret = __kvm_ioctl(kvm_fd, cmd, arg); \
+ \
+ TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \
+})
+
+static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { }
+
+#define __vm_ioctl(vm, cmd, arg) \
+({ \
+ static_assert_is_vm(vm); \
+ kvm_do_ioctl((vm)->fd, cmd, arg); \
+})
+
+/*
+ * Assert that a VM or vCPU ioctl() succeeded, with extra magic to detect if
+ * the ioctl() failed because KVM killed/bugged the VM. To detect a dead VM,
+ * probe KVM_CAP_USER_MEMORY, which (a) has been supported by KVM since before
+ * selftests existed and (b) should never outright fail, i.e. is supposed to
+ * return 0 or 1. If KVM kills a VM, KVM returns -EIO for all ioctl()s for the
+ * VM and its vCPUs, including KVM_CHECK_EXTENSION.
+ */
+#define __TEST_ASSERT_VM_VCPU_IOCTL(cond, name, ret, vm) \
+do { \
+ int __errno = errno; \
+ \
+ static_assert_is_vm(vm); \
+ \
+ if (cond) \
+ break; \
+ \
+ if (errno == EIO && \
+ __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY) < 0) { \
+ TEST_ASSERT(errno == EIO, "KVM killed the VM, should return -EIO"); \
+ TEST_FAIL("KVM killed/bugged the VM, check the kernel log for clues"); \
+ } \
+ errno = __errno; \
+ TEST_ASSERT(cond, __KVM_IOCTL_ERROR(name, ret)); \
+} while (0)
+
+#define TEST_ASSERT_VM_VCPU_IOCTL(cond, cmd, ret, vm) \
+ __TEST_ASSERT_VM_VCPU_IOCTL(cond, #cmd, ret, vm)
+
+#define vm_ioctl(vm, cmd, arg) \
+({ \
+ int ret = __vm_ioctl(vm, cmd, arg); \
+ \
+ __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
+})
+
+static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { }
+
+#define __vcpu_ioctl(vcpu, cmd, arg) \
+({ \
+ static_assert_is_vcpu(vcpu); \
+ kvm_do_ioctl((vcpu)->fd, cmd, arg); \
+})
+
+#define vcpu_ioctl(vcpu, cmd, arg) \
+({ \
+ int ret = __vcpu_ioctl(vcpu, cmd, arg); \
+ \
+ __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \
+})
+
+/*
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+static inline int vm_check_cap(struct kvm_vm *vm, long cap)
+{
+ int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(ret >= 0, KVM_CHECK_EXTENSION, ret, vm);
+ return ret;
+}
+
+static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
+{
+ struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
+
+ return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
+}
+static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
+{
+ struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
+
+ vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
+}
+
+static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size, uint64_t attributes)
+{
+ struct kvm_memory_attributes attr = {
+ .attributes = attributes,
+ .address = gpa,
+ .size = size,
+ .flags = 0,
+ };
+
+ /*
+ * KVM_SET_MEMORY_ATTRIBUTES overwrites _all_ attributes. These flows
+ * need significant enhancements to support multiple attributes.
+ */
+ TEST_ASSERT(!attributes || attributes == KVM_MEMORY_ATTRIBUTE_PRIVATE,
+ "Update me to support multiple attributes!");
+
+ vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr);
+}
+
+
+static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
+}
+
+static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_set_memory_attributes(vm, gpa, size, 0);
+}
+
+void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size,
+ bool punch_hole);
+
+static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_guest_mem_fallocate(vm, gpa, size, true);
+}
+
+static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_guest_mem_fallocate(vm, gpa, size, false);
+}
+
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
+const char *vm_guest_mode_string(uint32_t i);
+
+void kvm_vm_free(struct kvm_vm *vmp);
+void kvm_vm_restart(struct kvm_vm *vmp);
+void kvm_vm_release(struct kvm_vm *vmp);
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
+ size_t len);
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
+int kvm_memfd_alloc(size_t size, bool hugepages);
+
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
+{
+ struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
+
+ vm_ioctl(vm, KVM_GET_DIRTY_LOG, &args);
+}
+
+static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
+ uint64_t first_page, uint32_t num_pages)
+{
+ struct kvm_clear_dirty_log args = {
+ .dirty_bitmap = log,
+ .slot = slot,
+ .first_page = first_page,
+ .num_pages = num_pages
+ };
+
+ vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args);
+}
+
+static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
+{
+ return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL);
+}
+
+static inline int vm_get_stats_fd(struct kvm_vm *vm)
+{
+ int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vm);
+ return fd;
+}
+
+static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header)
+{
+ ssize_t ret;
+
+ ret = pread(stats_fd, header, sizeof(*header), 0);
+ TEST_ASSERT(ret == sizeof(*header),
+ "Failed to read '%lu' header bytes, ret = '%ld'",
+ sizeof(*header), ret);
+}
+
+struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
+ struct kvm_stats_header *header);
+
+static inline ssize_t get_stats_descriptor_size(struct kvm_stats_header *header)
+{
+ /*
+ * The base size of the descriptor is defined by KVM's ABI, but the
+ * size of the name field is variable, as far as KVM's ABI is
+ * concerned. For a given instance of KVM, the name field is the same
+ * size for all stats and is provided in the overall stats header.
+ */
+ return sizeof(struct kvm_stats_desc) + header->name_size;
+}
+
+static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc *stats,
+ int index,
+ struct kvm_stats_header *header)
+{
+ /*
+ * Note, size_desc includes the size of the name field, which is
+ * variable. i.e. this is NOT equivalent to &stats_desc[i].
+ */
+ return (void *)stats + index * get_stats_descriptor_size(header);
+}
+
+void read_stat_data(int stats_fd, struct kvm_stats_header *header,
+ struct kvm_stats_desc *desc, uint64_t *data,
+ size_t max_elements);
+
+void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
+ size_t max_elements);
+
+static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
+{
+ uint64_t data;
+
+ __vm_get_stat(vm, stat_name, &data, 1);
+ return data;
+}
+
+void vm_create_irqchip(struct kvm_vm *vm);
+
+static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
+ uint64_t flags)
+{
+ struct kvm_create_guest_memfd guest_memfd = {
+ .size = size,
+ .flags = flags,
+ };
+
+ return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
+}
+
+static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
+ uint64_t flags)
+{
+ int fd = __vm_create_guest_memfd(vm, size, flags);
+
+ TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd));
+ return fd;
+}
+
+void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva);
+int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva);
+void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva,
+ uint32_t guest_memfd, uint64_t guest_memfd_offset);
+int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva,
+ uint32_t guest_memfd, uint64_t guest_memfd_offset);
+
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags);
+void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
+
+#ifndef vm_arch_has_protected_memory
+static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
+{
+ return false;
+}
+#endif
+
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
+struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
+void vm_populate_vaddr_bitmap(struct kvm_vm *vm);
+vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type);
+vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type);
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
+vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm,
+ enum kvm_mem_region_type type);
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
+
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ unsigned int npages);
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
+
+#ifndef vcpu_arch_put_guest
+#define vcpu_arch_put_guest(mem, val) do { (mem) = (val); } while (0)
+#endif
+
+static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+ return gpa & ~vm->gpa_tag_mask;
+}
+
+void vcpu_run(struct kvm_vcpu *vcpu);
+int _vcpu_run(struct kvm_vcpu *vcpu);
+
+static inline int __vcpu_run(struct kvm_vcpu *vcpu)
+{
+ return __vcpu_ioctl(vcpu, KVM_RUN, NULL);
+}
+
+void vcpu_run_complete_io(struct kvm_vcpu *vcpu);
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu);
+
+static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap,
+ uint64_t arg0)
+{
+ struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
+
+ vcpu_ioctl(vcpu, KVM_ENABLE_CAP, &enable_cap);
+}
+
+static inline void vcpu_guest_debug_set(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *debug)
+{
+ vcpu_ioctl(vcpu, KVM_SET_GUEST_DEBUG, debug);
+}
+
+static inline void vcpu_mp_state_get(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ vcpu_ioctl(vcpu, KVM_GET_MP_STATE, mp_state);
+}
+static inline void vcpu_mp_state_set(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ vcpu_ioctl(vcpu, KVM_SET_MP_STATE, mp_state);
+}
+
+static inline void vcpu_regs_get(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_REGS, regs);
+}
+
+static inline void vcpu_regs_set(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_REGS, regs);
+}
+static inline void vcpu_sregs_get(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_SREGS, sregs);
+
+}
+static inline void vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs);
+}
+static inline int _vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ return __vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs);
+}
+static inline void vcpu_fpu_get(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ vcpu_ioctl(vcpu, KVM_GET_FPU, fpu);
+}
+static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ vcpu_ioctl(vcpu, KVM_SET_FPU, fpu);
+}
+
+static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
+
+ return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
+}
+static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+
+ return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+}
+static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
+
+ vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
+}
+static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+
+ vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+}
+
+#ifdef __KVM_HAVE_VCPU_EVENTS
+static inline void vcpu_events_get(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ vcpu_ioctl(vcpu, KVM_GET_VCPU_EVENTS, events);
+}
+static inline void vcpu_events_set(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ vcpu_ioctl(vcpu, KVM_SET_VCPU_EVENTS, events);
+}
+#endif
+#ifdef __x86_64__
+static inline void vcpu_nested_state_get(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ vcpu_ioctl(vcpu, KVM_GET_NESTED_STATE, state);
+}
+static inline int __vcpu_nested_state_set(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ return __vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state);
+}
+
+static inline void vcpu_nested_state_set(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state);
+}
+#endif
+static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu)
+{
+ int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm);
+ return fd;
+}
+
+int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr);
+
+static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
+{
+ int ret = __kvm_has_device_attr(dev_fd, group, attr);
+
+ TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno);
+}
+
+int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val);
+
+static inline void kvm_device_attr_get(int dev_fd, uint32_t group,
+ uint64_t attr, void *val)
+{
+ int ret = __kvm_device_attr_get(dev_fd, group, attr, val);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret));
+}
+
+int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val);
+
+static inline void kvm_device_attr_set(int dev_fd, uint32_t group,
+ uint64_t attr, void *val)
+{
+ int ret = __kvm_device_attr_set(dev_fd, group, attr, val);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret));
+}
+
+static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr)
+{
+ return __kvm_has_device_attr(vcpu->fd, group, attr);
+}
+
+static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr)
+{
+ kvm_has_device_attr(vcpu->fd, group, attr);
+}
+
+static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ return __kvm_device_attr_get(vcpu->fd, group, attr, val);
+}
+
+static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ kvm_device_attr_get(vcpu->fd, group, attr, val);
+}
+
+static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ return __kvm_device_attr_set(vcpu->fd, group, attr, val);
+}
+
+static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ kvm_device_attr_set(vcpu->fd, group, attr, val);
+}
+
+int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type);
+int __kvm_create_device(struct kvm_vm *vm, uint64_t type);
+
+static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type)
+{
+ int fd = __kvm_create_device(vm, type);
+
+ TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_DEVICE, fd));
+ return fd;
+}
+
+void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu);
+
+/*
+ * VM VCPU Args Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * num - number of arguments
+ * ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first @num input parameters for the function at @vcpu's entry point,
+ * per the C calling convention of the architecture, to the values given as
+ * variable args. Each of the variable args is expected to be of type uint64_t.
+ * The maximum @num can be is specific to the architecture.
+ */
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...);
+
+void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
+int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
+
+#define KVM_MAX_IRQ_ROUTES 4096
+
+struct kvm_irq_routing *kvm_gsi_routing_create(void);
+void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
+ uint32_t gsi, uint32_t pin);
+int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
+void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
+
+const char *exit_reason_str(unsigned int exit_reason);
+
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+ uint32_t memslot);
+vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot,
+ bool protected);
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
+
+static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot)
+{
+ /*
+ * By default, allocate memory as protected for VMs that support
+ * protected memory, as the majority of memory for such VMs is
+ * protected, i.e. using shared memory is effectively opt-in.
+ */
+ return __vm_phy_pages_alloc(vm, num, paddr_min, memslot,
+ vm_arch_has_protected_memory(vm));
+}
+
+/*
+ * ____vm_create() does KVM_CREATE_VM and little else. __vm_create() also
+ * loads the test binary into guest memory and creates an IRQ chip (x86 only).
+ * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to
+ * calculate the amount of memory needed for per-vCPU data, e.g. stacks.
+ */
+struct kvm_vm *____vm_create(struct vm_shape shape);
+struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
+ uint64_t nr_extra_pages);
+
+static inline struct kvm_vm *vm_create_barebones(void)
+{
+ return ____vm_create(VM_SHAPE_DEFAULT);
+}
+
+static inline struct kvm_vm *vm_create_barebones_type(unsigned long type)
+{
+ const struct vm_shape shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = type,
+ };
+
+ return ____vm_create(shape);
+}
+
+static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus)
+{
+ return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0);
+}
+
+struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
+ uint64_t extra_mem_pages,
+ void *guest_code, struct kvm_vcpu *vcpus[]);
+
+static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus,
+ void *guest_code,
+ struct kvm_vcpu *vcpus[])
+{
+ return __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, 0,
+ guest_code, vcpus);
+}
+
+
+struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,
+ struct kvm_vcpu **vcpu,
+ uint64_t extra_mem_pages,
+ void *guest_code);
+
+/*
+ * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages
+ * additional pages of guest memory. Returns the VM and vCPU (via out param).
+ */
+static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+ uint64_t extra_mem_pages,
+ void *guest_code)
+{
+ return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu,
+ extra_mem_pages, guest_code);
+}
+
+static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+ void *guest_code)
+{
+ return __vm_create_with_one_vcpu(vcpu, 0, guest_code);
+}
+
+static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape,
+ struct kvm_vcpu **vcpu,
+ void *guest_code)
+{
+ return __vm_create_shape_with_one_vcpu(shape, vcpu, 0, guest_code);
+}
+
+struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
+
+void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
+void kvm_print_vcpu_pinning_help(void);
+void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
+ int nr_vcpus);
+
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
+unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
+unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
+unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
+static inline unsigned int
+vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
+{
+ unsigned int n;
+ n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
+#ifdef __s390x__
+ /* s390 requires 1M aligned guest sizes */
+ n = (n + 255) & ~255;
+#endif
+ return n;
+}
+
+#define sync_global_to_guest(vm, g) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ memcpy(_p, &(g), sizeof(g)); \
+})
+
+#define sync_global_from_guest(vm, g) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ memcpy(&(g), _p, sizeof(g)); \
+})
+
+/*
+ * Write a global value, but only in the VM's (guest's) domain. Primarily used
+ * for "globals" that hold per-VM values (VMs always duplicate code and global
+ * data into their own region of physical memory), but can be used anytime it's
+ * undesirable to change the host's copy of the global.
+ */
+#define write_guest_global(vm, g, val) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ typeof(g) _val = val; \
+ \
+ memcpy(_p, &(_val), sizeof(g)); \
+})
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu);
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu,
+ uint8_t indent);
+
+static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu,
+ uint8_t indent)
+{
+ vcpu_arch_dump(stream, vcpu, indent);
+}
+
+/*
+ * Adds a vCPU with reasonable defaults (e.g. a stack)
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpu_id - The id of the VCPU to add to the VM.
+ */
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code);
+
+static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ void *guest_code)
+{
+ struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id);
+
+ vcpu_arch_set_entry_point(vcpu, guest_code);
+
+ return vcpu;
+}
+
+/* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */
+struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id);
+
+static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm,
+ uint32_t vcpu_id)
+{
+ return vm_arch_vcpu_recreate(vm, vcpu_id);
+}
+
+void vcpu_arch_free(struct kvm_vcpu *vcpu);
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm);
+
+static inline void virt_pgd_alloc(struct kvm_vm *vm)
+{
+ virt_arch_pgd_alloc(vm);
+}
+
+/*
+ * VM Virtual Page Map
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - VM Virtual Address
+ * paddr - VM Physical Address
+ * memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within @vm, creates a virtual translation for the page starting
+ * at @vaddr to the page starting at @paddr.
+ */
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
+
+static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ virt_arch_pg_map(vm, vaddr, paddr);
+}
+
+
+/*
+ * Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Returns the VM physical address of the translated VM virtual
+ * address given by @gva.
+ */
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+
+static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ return addr_arch_gva2gpa(vm, gva);
+}
+
+/*
+ * Virtual Translation Tables Dump
+ *
+ * Input Args:
+ * stream - Output FILE stream
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by @stream, the contents of all the
+ * virtual translation tables for the VM given by @vm.
+ */
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ virt_arch_dump(stream, vm, indent);
+}
+
+
+static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm)
+{
+ return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0);
+}
+
+/*
+ * Arch hook that is invoked via a constructor, i.e. before exeucting main(),
+ * to allow for arch-specific setup that is common to all tests, e.g. computing
+ * the default guest "mode".
+ */
+void kvm_selftest_arch_init(void);
+
+void kvm_arch_vm_post_create(struct kvm_vm *vm);
+
+bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
+
+uint32_t guest_get_vcpuid(void);
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
deleted file mode 100644
index 3e0db283a4..0000000000
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ /dev/null
@@ -1,1135 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/kvm_util_base.h
- *
- * Copyright (C) 2018, Google LLC.
- */
-#ifndef SELFTEST_KVM_UTIL_BASE_H
-#define SELFTEST_KVM_UTIL_BASE_H
-
-#include "test_util.h"
-
-#include <linux/compiler.h>
-#include "linux/hashtable.h"
-#include "linux/list.h"
-#include <linux/kernel.h>
-#include <linux/kvm.h>
-#include "linux/rbtree.h"
-#include <linux/types.h>
-
-#include <asm/atomic.h>
-#include <asm/kvm.h>
-
-#include <sys/ioctl.h>
-
-#include "kvm_util_arch.h"
-#include "sparsebit.h"
-
-/*
- * Provide a version of static_assert() that is guaranteed to have an optional
- * message param. If _ISOC11_SOURCE is defined, glibc (/usr/include/assert.h)
- * #undefs and #defines static_assert() as a direct alias to _Static_assert(),
- * i.e. effectively makes the message mandatory. Many KVM selftests #define
- * _GNU_SOURCE for various reasons, and _GNU_SOURCE implies _ISOC11_SOURCE. As
- * a result, static_assert() behavior is non-deterministic and may or may not
- * require a message depending on #include order.
- */
-#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg)
-#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr)
-
-#define KVM_DEV_PATH "/dev/kvm"
-#define KVM_MAX_VCPUS 512
-
-#define NSEC_PER_SEC 1000000000L
-
-typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
-typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
-
-struct userspace_mem_region {
- struct kvm_userspace_memory_region2 region;
- struct sparsebit *unused_phy_pages;
- struct sparsebit *protected_phy_pages;
- int fd;
- off_t offset;
- enum vm_mem_backing_src_type backing_src_type;
- void *host_mem;
- void *host_alias;
- void *mmap_start;
- void *mmap_alias;
- size_t mmap_size;
- struct rb_node gpa_node;
- struct rb_node hva_node;
- struct hlist_node slot_node;
-};
-
-struct kvm_vcpu {
- struct list_head list;
- uint32_t id;
- int fd;
- struct kvm_vm *vm;
- struct kvm_run *run;
-#ifdef __x86_64__
- struct kvm_cpuid2 *cpuid;
-#endif
- struct kvm_dirty_gfn *dirty_gfns;
- uint32_t fetch_index;
- uint32_t dirty_gfns_count;
-};
-
-struct userspace_mem_regions {
- struct rb_root gpa_tree;
- struct rb_root hva_tree;
- DECLARE_HASHTABLE(slot_hash, 9);
-};
-
-enum kvm_mem_region_type {
- MEM_REGION_CODE,
- MEM_REGION_DATA,
- MEM_REGION_PT,
- MEM_REGION_TEST_DATA,
- NR_MEM_REGIONS,
-};
-
-struct kvm_vm {
- int mode;
- unsigned long type;
- uint8_t subtype;
- int kvm_fd;
- int fd;
- unsigned int pgtable_levels;
- unsigned int page_size;
- unsigned int page_shift;
- unsigned int pa_bits;
- unsigned int va_bits;
- uint64_t max_gfn;
- struct list_head vcpus;
- struct userspace_mem_regions regions;
- struct sparsebit *vpages_valid;
- struct sparsebit *vpages_mapped;
- bool has_irqchip;
- bool pgd_created;
- vm_paddr_t ucall_mmio_addr;
- vm_paddr_t pgd;
- vm_vaddr_t gdt;
- vm_vaddr_t tss;
- vm_vaddr_t idt;
- vm_vaddr_t handlers;
- uint32_t dirty_ring_size;
- uint64_t gpa_tag_mask;
-
- struct kvm_vm_arch arch;
-
- /* Cache of information for binary stats interface */
- int stats_fd;
- struct kvm_stats_header stats_header;
- struct kvm_stats_desc *stats_desc;
-
- /*
- * KVM region slots. These are the default memslots used by page
- * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE]
- * memslot.
- */
- uint32_t memslots[NR_MEM_REGIONS];
-};
-
-struct vcpu_reg_sublist {
- const char *name;
- long capability;
- int feature;
- int feature_type;
- bool finalize;
- __u64 *regs;
- __u64 regs_n;
- __u64 *rejects_set;
- __u64 rejects_set_n;
- __u64 *skips_set;
- __u64 skips_set_n;
-};
-
-struct vcpu_reg_list {
- char *name;
- struct vcpu_reg_sublist sublists[];
-};
-
-#define for_each_sublist(c, s) \
- for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
-
-#define kvm_for_each_vcpu(vm, i, vcpu) \
- for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \
- if (!((vcpu) = vm->vcpus[i])) \
- continue; \
- else
-
-struct userspace_mem_region *
-memslot2region(struct kvm_vm *vm, uint32_t memslot);
-
-static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm,
- enum kvm_mem_region_type type)
-{
- assert(type < NR_MEM_REGIONS);
- return memslot2region(vm, vm->memslots[type]);
-}
-
-/* Minimum allocated guest virtual and physical addresses */
-#define KVM_UTIL_MIN_VADDR 0x2000
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-
-#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
-#define DEFAULT_STACK_PGS 5
-
-enum vm_guest_mode {
- VM_MODE_P52V48_4K,
- VM_MODE_P52V48_16K,
- VM_MODE_P52V48_64K,
- VM_MODE_P48V48_4K,
- VM_MODE_P48V48_16K,
- VM_MODE_P48V48_64K,
- VM_MODE_P40V48_4K,
- VM_MODE_P40V48_16K,
- VM_MODE_P40V48_64K,
- VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
- VM_MODE_P47V64_4K,
- VM_MODE_P44V64_4K,
- VM_MODE_P36V48_4K,
- VM_MODE_P36V48_16K,
- VM_MODE_P36V48_64K,
- VM_MODE_P36V47_16K,
- NUM_VM_MODES,
-};
-
-struct vm_shape {
- uint32_t type;
- uint8_t mode;
- uint8_t subtype;
- uint16_t padding;
-};
-
-kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t));
-
-#define VM_TYPE_DEFAULT 0
-
-#define VM_SHAPE(__mode) \
-({ \
- struct vm_shape shape = { \
- .mode = (__mode), \
- .type = VM_TYPE_DEFAULT \
- }; \
- \
- shape; \
-})
-
-#if defined(__aarch64__)
-
-extern enum vm_guest_mode vm_mode_default;
-
-#define VM_MODE_DEFAULT vm_mode_default
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 8)
-
-#elif defined(__x86_64__)
-
-#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 8)
-
-#elif defined(__s390x__)
-
-#define VM_MODE_DEFAULT VM_MODE_P44V64_4K
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 16)
-
-#elif defined(__riscv)
-
-#if __riscv_xlen == 32
-#error "RISC-V 32-bit kvm selftests not supported"
-#endif
-
-#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 8)
-
-#endif
-
-#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT)
-
-#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT)
-#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)
-
-struct vm_guest_mode_params {
- unsigned int pa_bits;
- unsigned int va_bits;
- unsigned int page_size;
- unsigned int page_shift;
-};
-extern const struct vm_guest_mode_params vm_guest_mode_params[];
-
-int open_path_or_exit(const char *path, int flags);
-int open_kvm_dev_path_or_exit(void);
-
-bool get_kvm_param_bool(const char *param);
-bool get_kvm_intel_param_bool(const char *param);
-bool get_kvm_amd_param_bool(const char *param);
-
-int get_kvm_param_integer(const char *param);
-int get_kvm_intel_param_integer(const char *param);
-int get_kvm_amd_param_integer(const char *param);
-
-unsigned int kvm_check_cap(long cap);
-
-static inline bool kvm_has_cap(long cap)
-{
- return kvm_check_cap(cap);
-}
-
-#define __KVM_SYSCALL_ERROR(_name, _ret) \
- "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
-
-/*
- * Use the "inner", double-underscore macro when reporting errors from within
- * other macros so that the name of ioctl() and not its literal numeric value
- * is printed on error. The "outer" macro is strongly preferred when reporting
- * errors "directly", i.e. without an additional layer of macros, as it reduces
- * the probability of passing in the wrong string.
- */
-#define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret)
-#define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret)
-
-#define kvm_do_ioctl(fd, cmd, arg) \
-({ \
- kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \
- ioctl(fd, cmd, arg); \
-})
-
-#define __kvm_ioctl(kvm_fd, cmd, arg) \
- kvm_do_ioctl(kvm_fd, cmd, arg)
-
-#define kvm_ioctl(kvm_fd, cmd, arg) \
-({ \
- int ret = __kvm_ioctl(kvm_fd, cmd, arg); \
- \
- TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \
-})
-
-static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { }
-
-#define __vm_ioctl(vm, cmd, arg) \
-({ \
- static_assert_is_vm(vm); \
- kvm_do_ioctl((vm)->fd, cmd, arg); \
-})
-
-/*
- * Assert that a VM or vCPU ioctl() succeeded, with extra magic to detect if
- * the ioctl() failed because KVM killed/bugged the VM. To detect a dead VM,
- * probe KVM_CAP_USER_MEMORY, which (a) has been supported by KVM since before
- * selftests existed and (b) should never outright fail, i.e. is supposed to
- * return 0 or 1. If KVM kills a VM, KVM returns -EIO for all ioctl()s for the
- * VM and its vCPUs, including KVM_CHECK_EXTENSION.
- */
-#define __TEST_ASSERT_VM_VCPU_IOCTL(cond, name, ret, vm) \
-do { \
- int __errno = errno; \
- \
- static_assert_is_vm(vm); \
- \
- if (cond) \
- break; \
- \
- if (errno == EIO && \
- __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY) < 0) { \
- TEST_ASSERT(errno == EIO, "KVM killed the VM, should return -EIO"); \
- TEST_FAIL("KVM killed/bugged the VM, check the kernel log for clues"); \
- } \
- errno = __errno; \
- TEST_ASSERT(cond, __KVM_IOCTL_ERROR(name, ret)); \
-} while (0)
-
-#define TEST_ASSERT_VM_VCPU_IOCTL(cond, cmd, ret, vm) \
- __TEST_ASSERT_VM_VCPU_IOCTL(cond, #cmd, ret, vm)
-
-#define vm_ioctl(vm, cmd, arg) \
-({ \
- int ret = __vm_ioctl(vm, cmd, arg); \
- \
- __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
-})
-
-static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { }
-
-#define __vcpu_ioctl(vcpu, cmd, arg) \
-({ \
- static_assert_is_vcpu(vcpu); \
- kvm_do_ioctl((vcpu)->fd, cmd, arg); \
-})
-
-#define vcpu_ioctl(vcpu, cmd, arg) \
-({ \
- int ret = __vcpu_ioctl(vcpu, cmd, arg); \
- \
- __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \
-})
-
-/*
- * Looks up and returns the value corresponding to the capability
- * (KVM_CAP_*) given by cap.
- */
-static inline int vm_check_cap(struct kvm_vm *vm, long cap)
-{
- int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap);
-
- TEST_ASSERT_VM_VCPU_IOCTL(ret >= 0, KVM_CHECK_EXTENSION, ret, vm);
- return ret;
-}
-
-static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
-{
- struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
-
- return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
-}
-static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
-{
- struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
-
- vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
-}
-
-static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa,
- uint64_t size, uint64_t attributes)
-{
- struct kvm_memory_attributes attr = {
- .attributes = attributes,
- .address = gpa,
- .size = size,
- .flags = 0,
- };
-
- /*
- * KVM_SET_MEMORY_ATTRIBUTES overwrites _all_ attributes. These flows
- * need significant enhancements to support multiple attributes.
- */
- TEST_ASSERT(!attributes || attributes == KVM_MEMORY_ATTRIBUTE_PRIVATE,
- "Update me to support multiple attributes!");
-
- vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr);
-}
-
-
-static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa,
- uint64_t size)
-{
- vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
-}
-
-static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa,
- uint64_t size)
-{
- vm_set_memory_attributes(vm, gpa, size, 0);
-}
-
-void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size,
- bool punch_hole);
-
-static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa,
- uint64_t size)
-{
- vm_guest_mem_fallocate(vm, gpa, size, true);
-}
-
-static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa,
- uint64_t size)
-{
- vm_guest_mem_fallocate(vm, gpa, size, false);
-}
-
-void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
-const char *vm_guest_mode_string(uint32_t i);
-
-void kvm_vm_free(struct kvm_vm *vmp);
-void kvm_vm_restart(struct kvm_vm *vmp);
-void kvm_vm_release(struct kvm_vm *vmp);
-int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
- size_t len);
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
-int kvm_memfd_alloc(size_t size, bool hugepages);
-
-void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-
-static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
-{
- struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
-
- vm_ioctl(vm, KVM_GET_DIRTY_LOG, &args);
-}
-
-static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
- uint64_t first_page, uint32_t num_pages)
-{
- struct kvm_clear_dirty_log args = {
- .dirty_bitmap = log,
- .slot = slot,
- .first_page = first_page,
- .num_pages = num_pages
- };
-
- vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args);
-}
-
-static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
-{
- return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL);
-}
-
-static inline int vm_get_stats_fd(struct kvm_vm *vm)
-{
- int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL);
-
- TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vm);
- return fd;
-}
-
-static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header)
-{
- ssize_t ret;
-
- ret = pread(stats_fd, header, sizeof(*header), 0);
- TEST_ASSERT(ret == sizeof(*header),
- "Failed to read '%lu' header bytes, ret = '%ld'",
- sizeof(*header), ret);
-}
-
-struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
- struct kvm_stats_header *header);
-
-static inline ssize_t get_stats_descriptor_size(struct kvm_stats_header *header)
-{
- /*
- * The base size of the descriptor is defined by KVM's ABI, but the
- * size of the name field is variable, as far as KVM's ABI is
- * concerned. For a given instance of KVM, the name field is the same
- * size for all stats and is provided in the overall stats header.
- */
- return sizeof(struct kvm_stats_desc) + header->name_size;
-}
-
-static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc *stats,
- int index,
- struct kvm_stats_header *header)
-{
- /*
- * Note, size_desc includes the size of the name field, which is
- * variable. i.e. this is NOT equivalent to &stats_desc[i].
- */
- return (void *)stats + index * get_stats_descriptor_size(header);
-}
-
-void read_stat_data(int stats_fd, struct kvm_stats_header *header,
- struct kvm_stats_desc *desc, uint64_t *data,
- size_t max_elements);
-
-void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
- size_t max_elements);
-
-static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
-{
- uint64_t data;
-
- __vm_get_stat(vm, stat_name, &data, 1);
- return data;
-}
-
-void vm_create_irqchip(struct kvm_vm *vm);
-
-static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
- uint64_t flags)
-{
- struct kvm_create_guest_memfd guest_memfd = {
- .size = size,
- .flags = flags,
- };
-
- return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
-}
-
-static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
- uint64_t flags)
-{
- int fd = __vm_create_guest_memfd(vm, size, flags);
-
- TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd));
- return fd;
-}
-
-void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva);
-int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva);
-void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva,
- uint32_t guest_memfd, uint64_t guest_memfd_offset);
-int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva,
- uint32_t guest_memfd, uint64_t guest_memfd_offset);
-
-void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags);
-void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
-
-#ifndef vm_arch_has_protected_memory
-static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
-{
- return false;
-}
-#endif
-
-void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
-void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
-void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
-struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
-void vm_populate_vaddr_bitmap(struct kvm_vm *vm);
-vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
-vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- enum kvm_mem_region_type type);
-vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
- vm_vaddr_t vaddr_min,
- enum kvm_mem_region_type type);
-vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
-vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm,
- enum kvm_mem_region_type type);
-vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
-
-void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages);
-void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
-void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
-vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
-void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
-
-
-static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa)
-{
- return gpa & ~vm->gpa_tag_mask;
-}
-
-void vcpu_run(struct kvm_vcpu *vcpu);
-int _vcpu_run(struct kvm_vcpu *vcpu);
-
-static inline int __vcpu_run(struct kvm_vcpu *vcpu)
-{
- return __vcpu_ioctl(vcpu, KVM_RUN, NULL);
-}
-
-void vcpu_run_complete_io(struct kvm_vcpu *vcpu);
-struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu);
-
-static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap,
- uint64_t arg0)
-{
- struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
-
- vcpu_ioctl(vcpu, KVM_ENABLE_CAP, &enable_cap);
-}
-
-static inline void vcpu_guest_debug_set(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *debug)
-{
- vcpu_ioctl(vcpu, KVM_SET_GUEST_DEBUG, debug);
-}
-
-static inline void vcpu_mp_state_get(struct kvm_vcpu *vcpu,
- struct kvm_mp_state *mp_state)
-{
- vcpu_ioctl(vcpu, KVM_GET_MP_STATE, mp_state);
-}
-static inline void vcpu_mp_state_set(struct kvm_vcpu *vcpu,
- struct kvm_mp_state *mp_state)
-{
- vcpu_ioctl(vcpu, KVM_SET_MP_STATE, mp_state);
-}
-
-static inline void vcpu_regs_get(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
- vcpu_ioctl(vcpu, KVM_GET_REGS, regs);
-}
-
-static inline void vcpu_regs_set(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
- vcpu_ioctl(vcpu, KVM_SET_REGS, regs);
-}
-static inline void vcpu_sregs_get(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
-{
- vcpu_ioctl(vcpu, KVM_GET_SREGS, sregs);
-
-}
-static inline void vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
-{
- vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs);
-}
-static inline int _vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
-{
- return __vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs);
-}
-static inline void vcpu_fpu_get(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
- vcpu_ioctl(vcpu, KVM_GET_FPU, fpu);
-}
-static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
- vcpu_ioctl(vcpu, KVM_SET_FPU, fpu);
-}
-
-static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
-{
- struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
-
- return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
-}
-static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
-{
- struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
-
- return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
-}
-static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
-{
- struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
-
- vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
-}
-static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
-{
- struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
-
- vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
-}
-
-#ifdef __KVM_HAVE_VCPU_EVENTS
-static inline void vcpu_events_get(struct kvm_vcpu *vcpu,
- struct kvm_vcpu_events *events)
-{
- vcpu_ioctl(vcpu, KVM_GET_VCPU_EVENTS, events);
-}
-static inline void vcpu_events_set(struct kvm_vcpu *vcpu,
- struct kvm_vcpu_events *events)
-{
- vcpu_ioctl(vcpu, KVM_SET_VCPU_EVENTS, events);
-}
-#endif
-#ifdef __x86_64__
-static inline void vcpu_nested_state_get(struct kvm_vcpu *vcpu,
- struct kvm_nested_state *state)
-{
- vcpu_ioctl(vcpu, KVM_GET_NESTED_STATE, state);
-}
-static inline int __vcpu_nested_state_set(struct kvm_vcpu *vcpu,
- struct kvm_nested_state *state)
-{
- return __vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state);
-}
-
-static inline void vcpu_nested_state_set(struct kvm_vcpu *vcpu,
- struct kvm_nested_state *state)
-{
- vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state);
-}
-#endif
-static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu)
-{
- int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL);
-
- TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm);
- return fd;
-}
-
-int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr);
-
-static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
-{
- int ret = __kvm_has_device_attr(dev_fd, group, attr);
-
- TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno);
-}
-
-int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val);
-
-static inline void kvm_device_attr_get(int dev_fd, uint32_t group,
- uint64_t attr, void *val)
-{
- int ret = __kvm_device_attr_get(dev_fd, group, attr, val);
-
- TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret));
-}
-
-int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val);
-
-static inline void kvm_device_attr_set(int dev_fd, uint32_t group,
- uint64_t attr, void *val)
-{
- int ret = __kvm_device_attr_set(dev_fd, group, attr, val);
-
- TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret));
-}
-
-static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr)
-{
- return __kvm_has_device_attr(vcpu->fd, group, attr);
-}
-
-static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr)
-{
- kvm_has_device_attr(vcpu->fd, group, attr);
-}
-
-static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr, void *val)
-{
- return __kvm_device_attr_get(vcpu->fd, group, attr, val);
-}
-
-static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr, void *val)
-{
- kvm_device_attr_get(vcpu->fd, group, attr, val);
-}
-
-static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr, void *val)
-{
- return __kvm_device_attr_set(vcpu->fd, group, attr, val);
-}
-
-static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr, void *val)
-{
- kvm_device_attr_set(vcpu->fd, group, attr, val);
-}
-
-int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type);
-int __kvm_create_device(struct kvm_vm *vm, uint64_t type);
-
-static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type)
-{
- int fd = __kvm_create_device(vm, type);
-
- TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_DEVICE, fd));
- return fd;
-}
-
-void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu);
-
-/*
- * VM VCPU Args Set
- *
- * Input Args:
- * vm - Virtual Machine
- * num - number of arguments
- * ... - arguments, each of type uint64_t
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the first @num input parameters for the function at @vcpu's entry point,
- * per the C calling convention of the architecture, to the values given as
- * variable args. Each of the variable args is expected to be of type uint64_t.
- * The maximum @num can be is specific to the architecture.
- */
-void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...);
-
-void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
-int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
-
-#define KVM_MAX_IRQ_ROUTES 4096
-
-struct kvm_irq_routing *kvm_gsi_routing_create(void);
-void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
- uint32_t gsi, uint32_t pin);
-int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
-void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
-
-const char *exit_reason_str(unsigned int exit_reason);
-
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
- uint32_t memslot);
-vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
- vm_paddr_t paddr_min, uint32_t memslot,
- bool protected);
-vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
-
-static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
- vm_paddr_t paddr_min, uint32_t memslot)
-{
- /*
- * By default, allocate memory as protected for VMs that support
- * protected memory, as the majority of memory for such VMs is
- * protected, i.e. using shared memory is effectively opt-in.
- */
- return __vm_phy_pages_alloc(vm, num, paddr_min, memslot,
- vm_arch_has_protected_memory(vm));
-}
-
-/*
- * ____vm_create() does KVM_CREATE_VM and little else. __vm_create() also
- * loads the test binary into guest memory and creates an IRQ chip (x86 only).
- * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to
- * calculate the amount of memory needed for per-vCPU data, e.g. stacks.
- */
-struct kvm_vm *____vm_create(struct vm_shape shape);
-struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
- uint64_t nr_extra_pages);
-
-static inline struct kvm_vm *vm_create_barebones(void)
-{
- return ____vm_create(VM_SHAPE_DEFAULT);
-}
-
-#ifdef __x86_64__
-static inline struct kvm_vm *vm_create_barebones_protected_vm(void)
-{
- const struct vm_shape shape = {
- .mode = VM_MODE_DEFAULT,
- .type = KVM_X86_SW_PROTECTED_VM,
- };
-
- return ____vm_create(shape);
-}
-#endif
-
-static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus)
-{
- return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0);
-}
-
-struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
- uint64_t extra_mem_pages,
- void *guest_code, struct kvm_vcpu *vcpus[]);
-
-static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus,
- void *guest_code,
- struct kvm_vcpu *vcpus[])
-{
- return __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, 0,
- guest_code, vcpus);
-}
-
-
-struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,
- struct kvm_vcpu **vcpu,
- uint64_t extra_mem_pages,
- void *guest_code);
-
-/*
- * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages
- * additional pages of guest memory. Returns the VM and vCPU (via out param).
- */
-static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
- uint64_t extra_mem_pages,
- void *guest_code)
-{
- return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu,
- extra_mem_pages, guest_code);
-}
-
-static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
- void *guest_code)
-{
- return __vm_create_with_one_vcpu(vcpu, 0, guest_code);
-}
-
-static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape,
- struct kvm_vcpu **vcpu,
- void *guest_code)
-{
- return __vm_create_shape_with_one_vcpu(shape, vcpu, 0, guest_code);
-}
-
-struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
-
-void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
-void kvm_print_vcpu_pinning_help(void);
-void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
- int nr_vcpus);
-
-unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
-unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
-unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
-unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
-static inline unsigned int
-vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
-{
- unsigned int n;
- n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
-#ifdef __s390x__
- /* s390 requires 1M aligned guest sizes */
- n = (n + 255) & ~255;
-#endif
- return n;
-}
-
-#define sync_global_to_guest(vm, g) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
- memcpy(_p, &(g), sizeof(g)); \
-})
-
-#define sync_global_from_guest(vm, g) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
- memcpy(&(g), _p, sizeof(g)); \
-})
-
-/*
- * Write a global value, but only in the VM's (guest's) domain. Primarily used
- * for "globals" that hold per-VM values (VMs always duplicate code and global
- * data into their own region of physical memory), but can be used anytime it's
- * undesirable to change the host's copy of the global.
- */
-#define write_guest_global(vm, g, val) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
- typeof(g) _val = val; \
- \
- memcpy(_p, &(_val), sizeof(g)); \
-})
-
-void assert_on_unhandled_exception(struct kvm_vcpu *vcpu);
-
-void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu,
- uint8_t indent);
-
-static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu,
- uint8_t indent)
-{
- vcpu_arch_dump(stream, vcpu, indent);
-}
-
-/*
- * Adds a vCPU with reasonable defaults (e.g. a stack)
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpu_id - The id of the VCPU to add to the VM.
- */
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
-void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code);
-
-static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
- void *guest_code)
-{
- struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id);
-
- vcpu_arch_set_entry_point(vcpu, guest_code);
-
- return vcpu;
-}
-
-/* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */
-struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id);
-
-static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm,
- uint32_t vcpu_id)
-{
- return vm_arch_vcpu_recreate(vm, vcpu_id);
-}
-
-void vcpu_arch_free(struct kvm_vcpu *vcpu);
-
-void virt_arch_pgd_alloc(struct kvm_vm *vm);
-
-static inline void virt_pgd_alloc(struct kvm_vm *vm)
-{
- virt_arch_pgd_alloc(vm);
-}
-
-/*
- * VM Virtual Page Map
- *
- * Input Args:
- * vm - Virtual Machine
- * vaddr - VM Virtual Address
- * paddr - VM Physical Address
- * memslot - Memory region slot for new virtual translation tables
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within @vm, creates a virtual translation for the page starting
- * at @vaddr to the page starting at @paddr.
- */
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
-
-static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
-{
- virt_arch_pg_map(vm, vaddr, paddr);
-}
-
-
-/*
- * Address Guest Virtual to Guest Physical
- *
- * Input Args:
- * vm - Virtual Machine
- * gva - VM virtual address
- *
- * Output Args: None
- *
- * Return:
- * Equivalent VM physical address
- *
- * Returns the VM physical address of the translated VM virtual
- * address given by @gva.
- */
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
-
-static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- return addr_arch_gva2gpa(vm, gva);
-}
-
-/*
- * Virtual Translation Tables Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * vm - Virtual Machine
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps to the FILE stream given by @stream, the contents of all the
- * virtual translation tables for the VM given by @vm.
- */
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-
-static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
-{
- virt_arch_dump(stream, vm, indent);
-}
-
-
-static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm)
-{
- return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0);
-}
-
-/*
- * Arch hook that is invoked via a constructor, i.e. before exeucting main(),
- * to allow for arch-specific setup that is common to all tests, e.g. computing
- * the default guest "mode".
- */
-void kvm_selftest_arch_init(void);
-
-void kvm_arch_vm_post_create(struct kvm_vm *vm);
-
-bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
-
-uint32_t guest_get_vcpuid(void);
-
-#endif /* SELFTEST_KVM_UTIL_BASE_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util_types.h b/tools/testing/selftests/kvm/include/kvm_util_types.h
new file mode 100644
index 0000000000..ec787b97cf
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_util_types.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_TYPES_H
+#define SELFTEST_KVM_UTIL_TYPES_H
+
+/*
+ * Provide a version of static_assert() that is guaranteed to have an optional
+ * message param. _GNU_SOURCE is defined for all KVM selftests, _GNU_SOURCE
+ * implies _ISOC11_SOURCE, and if _ISOC11_SOURCE is defined, glibc #undefs and
+ * #defines static_assert() as a direct alias to _Static_assert() (see
+ * usr/include/assert.h). Define a custom macro instead of redefining
+ * static_assert() to avoid creating non-deterministic behavior that is
+ * dependent on include order.
+ */
+#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg)
+#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr)
+
+typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
+typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+
+#endif /* SELFTEST_KVM_UTIL_TYPES_H */
diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h
index ce4e603050..9071eb6dea 100644
--- a/tools/testing/selftests/kvm/include/memstress.h
+++ b/tools/testing/selftests/kvm/include/memstress.h
@@ -62,7 +62,6 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
void memstress_destroy_vm(struct kvm_vm *vm);
void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent);
-void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed);
void memstress_set_random_access(struct kvm_vm *vm, bool random_access);
void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vcpu_args *));
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
index ce473fe251..5f38916633 100644
--- a/tools/testing/selftests/kvm/include/riscv/processor.h
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -50,6 +50,16 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext);
+static inline bool __vcpu_has_isa_ext(struct kvm_vcpu *vcpu, uint64_t isa_ext)
+{
+ return __vcpu_has_ext(vcpu, RISCV_ISA_EXT_REG(isa_ext));
+}
+
+static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext)
+{
+ return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext));
+}
+
struct ex_regs {
unsigned long ra;
unsigned long sp;
@@ -154,45 +164,6 @@ void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handle
#define PGTBL_PAGE_SIZE PGTBL_L0_BLOCK_SIZE
#define PGTBL_PAGE_SIZE_SHIFT PGTBL_L0_BLOCK_SHIFT
-/* SBI return error codes */
-#define SBI_SUCCESS 0
-#define SBI_ERR_FAILURE -1
-#define SBI_ERR_NOT_SUPPORTED -2
-#define SBI_ERR_INVALID_PARAM -3
-#define SBI_ERR_DENIED -4
-#define SBI_ERR_INVALID_ADDRESS -5
-#define SBI_ERR_ALREADY_AVAILABLE -6
-#define SBI_ERR_ALREADY_STARTED -7
-#define SBI_ERR_ALREADY_STOPPED -8
-
-#define SBI_EXT_EXPERIMENTAL_START 0x08000000
-#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF
-
-#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END
-#define KVM_RISCV_SELFTESTS_SBI_UCALL 0
-#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1
-
-enum sbi_ext_id {
- SBI_EXT_BASE = 0x10,
- SBI_EXT_STA = 0x535441,
-};
-
-enum sbi_ext_base_fid {
- SBI_EXT_BASE_PROBE_EXT = 3,
-};
-
-struct sbiret {
- long error;
- long value;
-};
-
-struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
- unsigned long arg1, unsigned long arg2,
- unsigned long arg3, unsigned long arg4,
- unsigned long arg5);
-
-bool guest_sbi_probe_extension(int extid, long *out_val);
-
static inline void local_irq_enable(void)
{
csr_set(CSR_SSTATUS, SR_SIE);
diff --git a/tools/testing/selftests/kvm/include/riscv/sbi.h b/tools/testing/selftests/kvm/include/riscv/sbi.h
new file mode 100644
index 0000000000..046b432ae8
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/sbi.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RISC-V SBI specific definitions
+ *
+ * Copyright (C) 2024 Rivos Inc.
+ */
+
+#ifndef SELFTEST_KVM_SBI_H
+#define SELFTEST_KVM_SBI_H
+
+/* SBI spec version fields */
+#define SBI_SPEC_VERSION_DEFAULT 0x1
+#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
+#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
+#define SBI_SPEC_VERSION_MINOR_MASK 0xffffff
+
+/* SBI return error codes */
+#define SBI_SUCCESS 0
+#define SBI_ERR_FAILURE -1
+#define SBI_ERR_NOT_SUPPORTED -2
+#define SBI_ERR_INVALID_PARAM -3
+#define SBI_ERR_DENIED -4
+#define SBI_ERR_INVALID_ADDRESS -5
+#define SBI_ERR_ALREADY_AVAILABLE -6
+#define SBI_ERR_ALREADY_STARTED -7
+#define SBI_ERR_ALREADY_STOPPED -8
+
+#define SBI_EXT_EXPERIMENTAL_START 0x08000000
+#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF
+
+#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END
+#define KVM_RISCV_SELFTESTS_SBI_UCALL 0
+#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1
+
+enum sbi_ext_id {
+ SBI_EXT_BASE = 0x10,
+ SBI_EXT_STA = 0x535441,
+ SBI_EXT_PMU = 0x504D55,
+};
+
+enum sbi_ext_base_fid {
+ SBI_EXT_BASE_GET_SPEC_VERSION = 0,
+ SBI_EXT_BASE_GET_IMP_ID,
+ SBI_EXT_BASE_GET_IMP_VERSION,
+ SBI_EXT_BASE_PROBE_EXT = 3,
+};
+enum sbi_ext_pmu_fid {
+ SBI_EXT_PMU_NUM_COUNTERS = 0,
+ SBI_EXT_PMU_COUNTER_GET_INFO,
+ SBI_EXT_PMU_COUNTER_CFG_MATCH,
+ SBI_EXT_PMU_COUNTER_START,
+ SBI_EXT_PMU_COUNTER_STOP,
+ SBI_EXT_PMU_COUNTER_FW_READ,
+ SBI_EXT_PMU_COUNTER_FW_READ_HI,
+ SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
+};
+
+union sbi_pmu_ctr_info {
+ unsigned long value;
+ struct {
+ unsigned long csr:12;
+ unsigned long width:6;
+#if __riscv_xlen == 32
+ unsigned long reserved:13;
+#else
+ unsigned long reserved:45;
+#endif
+ unsigned long type:1;
+ };
+};
+
+struct riscv_pmu_snapshot_data {
+ u64 ctr_overflow_mask;
+ u64 ctr_values[64];
+ u64 reserved[447];
+};
+
+struct sbiret {
+ long error;
+ long value;
+};
+
+/** General pmu event codes specified in SBI PMU extension */
+enum sbi_pmu_hw_generic_events_t {
+ SBI_PMU_HW_NO_EVENT = 0,
+ SBI_PMU_HW_CPU_CYCLES = 1,
+ SBI_PMU_HW_INSTRUCTIONS = 2,
+ SBI_PMU_HW_CACHE_REFERENCES = 3,
+ SBI_PMU_HW_CACHE_MISSES = 4,
+ SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5,
+ SBI_PMU_HW_BRANCH_MISSES = 6,
+ SBI_PMU_HW_BUS_CYCLES = 7,
+ SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8,
+ SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9,
+ SBI_PMU_HW_REF_CPU_CYCLES = 10,
+
+ SBI_PMU_HW_GENERAL_MAX,
+};
+
+/* SBI PMU counter types */
+enum sbi_pmu_ctr_type {
+ SBI_PMU_CTR_TYPE_HW = 0x0,
+ SBI_PMU_CTR_TYPE_FW,
+};
+
+/* Flags defined for config matching function */
+#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0)
+#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1)
+#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2)
+#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3)
+#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4)
+#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5)
+#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6)
+#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7)
+
+/* Flags defined for counter start function */
+#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0)
+#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1)
+
+/* Flags defined for counter stop function */
+#define SBI_PMU_STOP_FLAG_RESET BIT(0)
+#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1)
+
+struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5);
+
+bool guest_sbi_probe_extension(int extid, long *out_val);
+
+/* Make SBI version */
+static inline unsigned long sbi_mk_version(unsigned long major,
+ unsigned long minor)
+{
+ return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT)
+ | (minor & SBI_SPEC_VERSION_MINOR_MASK);
+}
+
+unsigned long get_host_sbi_spec_version(void);
+
+#endif /* SELFTEST_KVM_SBI_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h
index be46eb32ec..a695ae36f3 100644
--- a/tools/testing/selftests/kvm/include/riscv/ucall.h
+++ b/tools/testing/selftests/kvm/include/riscv/ucall.h
@@ -3,6 +3,7 @@
#define SELFTEST_KVM_UCALL_H
#include "processor.h"
+#include "sbi.h"
#define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI
diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390x/ucall.h
index b231bf2e49..8035a872a3 100644
--- a/tools/testing/selftests/kvm/include/s390x/ucall.h
+++ b/tools/testing/selftests/kvm/include/s390x/ucall.h
@@ -2,7 +2,7 @@
#ifndef SELFTEST_KVM_UCALL_H
#define SELFTEST_KVM_UCALL_H
-#include "kvm_util_base.h"
+#include "kvm_util.h"
#define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 8a6e30612c..3e47305884 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -91,9 +91,28 @@ struct guest_random_state {
uint32_t seed;
};
+extern uint32_t guest_random_seed;
+extern struct guest_random_state guest_rng;
+
struct guest_random_state new_guest_random_state(uint32_t seed);
uint32_t guest_random_u32(struct guest_random_state *state);
+static inline bool __guest_random_bool(struct guest_random_state *state,
+ uint8_t percent)
+{
+ return (guest_random_u32(state) % 100) < percent;
+}
+
+static inline bool guest_random_bool(struct guest_random_state *state)
+{
+ return __guest_random_bool(state, 50);
+}
+
+static inline uint64_t guest_random_u64(struct guest_random_state *state)
+{
+ return ((uint64_t)guest_random_u32(state) << 32) | guest_random_u32(state);
+}
+
enum vm_mem_backing_src_type {
VM_MEM_SRC_ANONYMOUS,
VM_MEM_SRC_ANONYMOUS_THP,
diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h
index 877449c345..60f7f9d435 100644
--- a/tools/testing/selftests/kvm/include/userfaultfd_util.h
+++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h
@@ -5,9 +5,6 @@
* Copyright (C) 2018, Red Hat, Inc.
* Copyright (C) 2019-2022 Google LLC
*/
-
-#define _GNU_SOURCE /* for pipe2 */
-
#include <inttypes.h>
#include <time.h>
#include <pthread.h>
@@ -17,17 +14,27 @@
typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg);
-struct uffd_desc {
+struct uffd_reader_args {
int uffd_mode;
int uffd;
- int pipefds[2];
useconds_t delay;
uffd_handler_t handler;
- pthread_t thread;
+ /* Holds the read end of the pipe for killing the reader. */
+ int pipe;
+};
+
+struct uffd_desc {
+ int uffd;
+ uint64_t num_readers;
+ /* Holds the write ends of the pipes for killing the readers. */
+ int *pipefds;
+ pthread_t *readers;
+ struct uffd_reader_args *reader_args;
};
struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
void *hva, uint64_t len,
+ uint64_t num_readers,
uffd_handler_t handler);
void uffd_stop_demand_paging(struct uffd_desc *uffd);
diff --git a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h
index 9f1725192a..972bb1c4ab 100644
--- a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h
@@ -5,7 +5,16 @@
#include <stdbool.h>
#include <stdint.h>
+#include "kvm_util_types.h"
+#include "test_util.h"
+
+extern bool is_forced_emulation_enabled;
+
struct kvm_vm_arch {
+ vm_vaddr_t gdt;
+ vm_vaddr_t tss;
+ vm_vaddr_t idt;
+
uint64_t c_bit;
uint64_t s_bit;
int sev_fd;
@@ -20,4 +29,23 @@ static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch)
#define vm_arch_has_protected_memory(vm) \
__vm_arch_has_protected_memory(&(vm)->arch)
+#define vcpu_arch_put_guest(mem, __val) \
+do { \
+ const typeof(mem) val = (__val); \
+ \
+ if (!is_forced_emulation_enabled || guest_random_bool(&guest_rng)) { \
+ (mem) = val; \
+ } else if (guest_random_bool(&guest_rng)) { \
+ __asm__ __volatile__(KVM_FEP "mov %1, %0" \
+ : "+m" (mem) \
+ : "r" (val) : "memory"); \
+ } else { \
+ uint64_t __old = READ_ONCE(mem); \
+ \
+ __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]" \
+ : [ptr] "+m" (mem), [old] "+a" (__old) \
+ : [new]"r" (val) : "memory", "cc"); \
+ } \
+} while (0)
+
#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 81ce37ec40..c0c7c1fe93 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -18,17 +18,12 @@
#include <linux/kvm_para.h>
#include <linux/stringify.h>
-#include "../kvm_util.h"
+#include "kvm_util.h"
+#include "ucall_common.h"
extern bool host_cpu_is_intel;
extern bool host_cpu_is_amd;
-enum vm_guest_x86_subtype {
- VM_SUBTYPE_NONE = 0,
- VM_SUBTYPE_SEV,
- VM_SUBTYPE_SEV_ES,
-};
-
/* Forced emulation prefix, used to invoke the emulator unconditionally. */
#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
@@ -282,6 +277,7 @@ struct kvm_x86_cpu_property {
#define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
#define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
#define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
+#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
@@ -1139,8 +1135,6 @@ struct idt_entry {
uint32_t offset2; uint32_t reserved;
};
-void vm_init_descriptor_tables(struct kvm_vm *vm);
-void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
void vm_install_exception_handler(struct kvm_vm *vm, int vector,
void (*handler)(struct ex_regs *));
diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86_64/sev.h
index 8a1bf88474..82c11c81a9 100644
--- a/tools/testing/selftests/kvm/include/x86_64/sev.h
+++ b/tools/testing/selftests/kvm/include/x86_64/sev.h
@@ -31,8 +31,9 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
void sev_vm_launch_finish(struct kvm_vm *vm);
-struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code,
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
struct kvm_vcpu **cpu);
+void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement);
kvm_static_assert(SEV_RET_SUCCESS == 0);
@@ -67,20 +68,8 @@ kvm_static_assert(SEV_RET_SUCCESS == 0);
__TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
})
-static inline void sev_vm_init(struct kvm_vm *vm)
-{
- vm->arch.sev_fd = open_sev_dev_path_or_exit();
-
- vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
-}
-
-
-static inline void sev_es_vm_init(struct kvm_vm *vm)
-{
- vm->arch.sev_fd = open_sev_dev_path_or_exit();
-
- vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
-}
+void sev_vm_init(struct kvm_vm *vm);
+void sev_es_vm_init(struct kvm_vm *vm);
static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
struct userspace_mem_region *region)
diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86_64/ucall.h
index 06b244bd06..d3825dcc3c 100644
--- a/tools/testing/selftests/kvm/include/x86_64/ucall.h
+++ b/tools/testing/selftests/kvm/include/x86_64/ucall.h
@@ -2,7 +2,7 @@
#ifndef SELFTEST_KVM_UCALL_H
#define SELFTEST_KVM_UCALL_H
-#include "kvm_util_base.h"
+#include "kvm_util.h"
#define UCALL_EXIT_REASON KVM_EXIT_IO
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
index 698c1cfa31..f02355c3c4 100644
--- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -6,8 +6,6 @@
*
* Test the fd-based interface for KVM statistics.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index b9e23265e4..c78f34699f 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -6,8 +6,6 @@
*
* Test for KVM_CAP_MAX_VCPUS and KVM_CAP_MAX_VCPU_ID.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index e0ba97ac1c..dd8b12f626 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -8,9 +8,6 @@
* page size have been pre-allocated on your system, if you are planning to
* use hugepages to back the guest memory for testing.
*/
-
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
@@ -21,6 +18,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "guest_modes.h"
+#include "ucall_common.h"
#define TEST_MEM_SLOT_INDEX 1
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c
index 55668631d5..7abbf88665 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic.c
@@ -17,13 +17,12 @@
static const struct gic_common_ops *gic_common_ops;
static struct spinlock gic_lock;
-static void gic_cpu_init(unsigned int cpu, void *redist_base)
+static void gic_cpu_init(unsigned int cpu)
{
- gic_common_ops->gic_cpu_init(cpu, redist_base);
+ gic_common_ops->gic_cpu_init(cpu);
}
-static void
-gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base)
+static void gic_dist_init(enum gic_type type, unsigned int nr_cpus)
{
const struct gic_common_ops *gic_ops = NULL;
@@ -40,7 +39,7 @@ gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base)
GUEST_ASSERT(gic_ops);
- gic_ops->gic_init(nr_cpus, dist_base);
+ gic_ops->gic_init(nr_cpus);
gic_common_ops = gic_ops;
/* Make sure that the initialized data is visible to all the vCPUs */
@@ -49,18 +48,15 @@ gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base)
spin_unlock(&gic_lock);
}
-void gic_init(enum gic_type type, unsigned int nr_cpus,
- void *dist_base, void *redist_base)
+void gic_init(enum gic_type type, unsigned int nr_cpus)
{
uint32_t cpu = guest_get_vcpuid();
GUEST_ASSERT(type < GIC_TYPE_MAX);
- GUEST_ASSERT(dist_base);
- GUEST_ASSERT(redist_base);
GUEST_ASSERT(nr_cpus);
- gic_dist_init(type, nr_cpus, dist_base);
- gic_cpu_init(cpu, redist_base);
+ gic_dist_init(type, nr_cpus);
+ gic_cpu_init(cpu);
}
void gic_irq_enable(unsigned int intid)
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
index 75d07313c8..d24e9ecc96 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
@@ -8,8 +8,8 @@
#define SELFTEST_KVM_GIC_PRIVATE_H
struct gic_common_ops {
- void (*gic_init)(unsigned int nr_cpus, void *dist_base);
- void (*gic_cpu_init)(unsigned int cpu, void *redist_base);
+ void (*gic_init)(unsigned int nr_cpus);
+ void (*gic_cpu_init)(unsigned int cpu);
void (*gic_irq_enable)(unsigned int intid);
void (*gic_irq_disable)(unsigned int intid);
uint64_t (*gic_read_iar)(void);
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
index 263bf3ed8f..66d05506f7 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
@@ -9,12 +9,21 @@
#include "processor.h"
#include "delay.h"
+#include "gic.h"
#include "gic_v3.h"
#include "gic_private.h"
+#define GICV3_MAX_CPUS 512
+
+#define GICD_INT_DEF_PRI 0xa0
+#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\
+ (GICD_INT_DEF_PRI << 16) |\
+ (GICD_INT_DEF_PRI << 8) |\
+ GICD_INT_DEF_PRI)
+
+#define ICC_PMR_DEF_PRIO 0xf0
+
struct gicv3_data {
- void *dist_base;
- void *redist_base[GICV3_MAX_CPUS];
unsigned int nr_cpus;
unsigned int nr_spis;
};
@@ -35,17 +44,23 @@ static void gicv3_gicd_wait_for_rwp(void)
{
unsigned int count = 100000; /* 1s */
- while (readl(gicv3_data.dist_base + GICD_CTLR) & GICD_CTLR_RWP) {
+ while (readl(GICD_BASE_GVA + GICD_CTLR) & GICD_CTLR_RWP) {
GUEST_ASSERT(count--);
udelay(10);
}
}
-static void gicv3_gicr_wait_for_rwp(void *redist_base)
+static inline volatile void *gicr_base_cpu(uint32_t cpu)
+{
+ /* Align all the redistributors sequentially */
+ return GICR_BASE_GVA + cpu * SZ_64K * 2;
+}
+
+static void gicv3_gicr_wait_for_rwp(uint32_t cpu)
{
unsigned int count = 100000; /* 1s */
- while (readl(redist_base + GICR_CTLR) & GICR_CTLR_RWP) {
+ while (readl(gicr_base_cpu(cpu) + GICR_CTLR) & GICR_CTLR_RWP) {
GUEST_ASSERT(count--);
udelay(10);
}
@@ -56,7 +71,7 @@ static void gicv3_wait_for_rwp(uint32_t cpu_or_dist)
if (cpu_or_dist & DIST_BIT)
gicv3_gicd_wait_for_rwp();
else
- gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu_or_dist]);
+ gicv3_gicr_wait_for_rwp(cpu_or_dist);
}
static enum gicv3_intid_range get_intid_range(unsigned int intid)
@@ -116,15 +131,15 @@ static void gicv3_set_eoi_split(bool split)
uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset)
{
- void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base
- : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]);
+ volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
+ : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
return readl(base + offset);
}
void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val)
{
- void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base
- : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]);
+ volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
+ : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
writel(reg_val, base + offset);
}
@@ -263,7 +278,7 @@ static bool gicv3_irq_get_pending(uint32_t intid)
return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1);
}
-static void gicv3_enable_redist(void *redist_base)
+static void gicv3_enable_redist(volatile void *redist_base)
{
uint32_t val = readl(redist_base + GICR_WAKER);
unsigned int count = 100000; /* 1s */
@@ -278,21 +293,15 @@ static void gicv3_enable_redist(void *redist_base)
}
}
-static inline void *gicr_base_cpu(void *redist_base, uint32_t cpu)
+static void gicv3_cpu_init(unsigned int cpu)
{
- /* Align all the redistributors sequentially */
- return redist_base + cpu * SZ_64K * 2;
-}
-
-static void gicv3_cpu_init(unsigned int cpu, void *redist_base)
-{
- void *sgi_base;
+ volatile void *sgi_base;
unsigned int i;
- void *redist_base_cpu;
+ volatile void *redist_base_cpu;
GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
- redist_base_cpu = gicr_base_cpu(redist_base, cpu);
+ redist_base_cpu = gicr_base_cpu(cpu);
sgi_base = sgi_base_from_redist(redist_base_cpu);
gicv3_enable_redist(redist_base_cpu);
@@ -310,7 +319,7 @@ static void gicv3_cpu_init(unsigned int cpu, void *redist_base)
writel(GICD_INT_DEF_PRI_X4,
sgi_base + GICR_IPRIORITYR0 + i);
- gicv3_gicr_wait_for_rwp(redist_base_cpu);
+ gicv3_gicr_wait_for_rwp(cpu);
/* Enable the GIC system register (ICC_*) access */
write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE,
@@ -320,18 +329,15 @@ static void gicv3_cpu_init(unsigned int cpu, void *redist_base)
write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
/* Enable non-secure Group-1 interrupts */
- write_sysreg_s(ICC_IGRPEN1_EL1_ENABLE, SYS_ICC_GRPEN1_EL1);
-
- gicv3_data.redist_base[cpu] = redist_base_cpu;
+ write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
}
static void gicv3_dist_init(void)
{
- void *dist_base = gicv3_data.dist_base;
unsigned int i;
/* Disable the distributor until we set things up */
- writel(0, dist_base + GICD_CTLR);
+ writel(0, GICD_BASE_GVA + GICD_CTLR);
gicv3_gicd_wait_for_rwp();
/*
@@ -339,33 +345,32 @@ static void gicv3_dist_init(void)
* Also, deactivate and disable them.
*/
for (i = 32; i < gicv3_data.nr_spis; i += 32) {
- writel(~0, dist_base + GICD_IGROUPR + i / 8);
- writel(~0, dist_base + GICD_ICACTIVER + i / 8);
- writel(~0, dist_base + GICD_ICENABLER + i / 8);
+ writel(~0, GICD_BASE_GVA + GICD_IGROUPR + i / 8);
+ writel(~0, GICD_BASE_GVA + GICD_ICACTIVER + i / 8);
+ writel(~0, GICD_BASE_GVA + GICD_ICENABLER + i / 8);
}
/* Set a default priority for all the SPIs */
for (i = 32; i < gicv3_data.nr_spis; i += 4)
writel(GICD_INT_DEF_PRI_X4,
- dist_base + GICD_IPRIORITYR + i);
+ GICD_BASE_GVA + GICD_IPRIORITYR + i);
/* Wait for the settings to sync-in */
gicv3_gicd_wait_for_rwp();
/* Finally, enable the distributor globally with ARE */
writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A |
- GICD_CTLR_ENABLE_G1, dist_base + GICD_CTLR);
+ GICD_CTLR_ENABLE_G1, GICD_BASE_GVA + GICD_CTLR);
gicv3_gicd_wait_for_rwp();
}
-static void gicv3_init(unsigned int nr_cpus, void *dist_base)
+static void gicv3_init(unsigned int nr_cpus)
{
GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS);
gicv3_data.nr_cpus = nr_cpus;
- gicv3_data.dist_base = dist_base;
gicv3_data.nr_spis = GICD_TYPER_SPIS(
- readl(gicv3_data.dist_base + GICD_TYPER));
+ readl(GICD_BASE_GVA + GICD_TYPER));
if (gicv3_data.nr_spis > 1020)
gicv3_data.nr_spis = 1020;
@@ -396,3 +401,27 @@ const struct gic_common_ops gicv3_ops = {
.gic_irq_get_pending = gicv3_irq_get_pending,
.gic_irq_set_config = gicv3_irq_set_config,
};
+
+void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
+ vm_paddr_t pend_table)
+{
+ volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid());
+
+ u32 ctlr;
+ u64 val;
+
+ val = (cfg_table |
+ GICR_PROPBASER_InnerShareable |
+ GICR_PROPBASER_RaWaWb |
+ ((ilog2(cfg_table_size) - 1) & GICR_PROPBASER_IDBITS_MASK));
+ writeq_relaxed(val, rdist_base + GICR_PROPBASER);
+
+ val = (pend_table |
+ GICR_PENDBASER_InnerShareable |
+ GICR_PENDBASER_RaWaWb);
+ writeq_relaxed(val, rdist_base + GICR_PENDBASER);
+
+ ctlr = readl_relaxed(rdist_base + GICR_CTLR);
+ ctlr |= GICR_CTLR_ENABLE_LPIS;
+ writel_relaxed(ctlr, rdist_base + GICR_CTLR);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c
new file mode 100644
index 0000000000..09f2705456
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest ITS library, generously donated by drivers/irqchip/irq-gic-v3-its.c
+ * over in the kernel tree.
+ */
+
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm.h>
+
+#include "kvm_util.h"
+#include "vgic.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "processor.h"
+
+static u64 its_read_u64(unsigned long offset)
+{
+ return readq_relaxed(GITS_BASE_GVA + offset);
+}
+
+static void its_write_u64(unsigned long offset, u64 val)
+{
+ writeq_relaxed(val, GITS_BASE_GVA + offset);
+}
+
+static u32 its_read_u32(unsigned long offset)
+{
+ return readl_relaxed(GITS_BASE_GVA + offset);
+}
+
+static void its_write_u32(unsigned long offset, u32 val)
+{
+ writel_relaxed(val, GITS_BASE_GVA + offset);
+}
+
+static unsigned long its_find_baser(unsigned int type)
+{
+ int i;
+
+ for (i = 0; i < GITS_BASER_NR_REGS; i++) {
+ u64 baser;
+ unsigned long offset = GITS_BASER + (i * sizeof(baser));
+
+ baser = its_read_u64(offset);
+ if (GITS_BASER_TYPE(baser) == type)
+ return offset;
+ }
+
+ GUEST_FAIL("Couldn't find an ITS BASER of type %u", type);
+ return -1;
+}
+
+static void its_install_table(unsigned int type, vm_paddr_t base, size_t size)
+{
+ unsigned long offset = its_find_baser(type);
+ u64 baser;
+
+ baser = ((size / SZ_64K) - 1) |
+ GITS_BASER_PAGE_SIZE_64K |
+ GITS_BASER_InnerShareable |
+ base |
+ GITS_BASER_RaWaWb |
+ GITS_BASER_VALID;
+
+ its_write_u64(offset, baser);
+}
+
+static void its_install_cmdq(vm_paddr_t base, size_t size)
+{
+ u64 cbaser;
+
+ cbaser = ((size / SZ_4K) - 1) |
+ GITS_CBASER_InnerShareable |
+ base |
+ GITS_CBASER_RaWaWb |
+ GITS_CBASER_VALID;
+
+ its_write_u64(GITS_CBASER, cbaser);
+}
+
+void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
+ vm_paddr_t device_tbl, size_t device_tbl_sz,
+ vm_paddr_t cmdq, size_t cmdq_size)
+{
+ u32 ctlr;
+
+ its_install_table(GITS_BASER_TYPE_COLLECTION, coll_tbl, coll_tbl_sz);
+ its_install_table(GITS_BASER_TYPE_DEVICE, device_tbl, device_tbl_sz);
+ its_install_cmdq(cmdq, cmdq_size);
+
+ ctlr = its_read_u32(GITS_CTLR);
+ ctlr |= GITS_CTLR_ENABLE;
+ its_write_u32(GITS_CTLR, ctlr);
+}
+
+struct its_cmd_block {
+ union {
+ u64 raw_cmd[4];
+ __le64 raw_cmd_le[4];
+ };
+};
+
+static inline void its_fixup_cmd(struct its_cmd_block *cmd)
+{
+ /* Let's fixup BE commands */
+ cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]);
+ cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]);
+ cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]);
+ cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]);
+}
+
+static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l)
+{
+ u64 mask = GENMASK_ULL(h, l);
+ *raw_cmd &= ~mask;
+ *raw_cmd |= (val << l) & mask;
+}
+
+static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr)
+{
+ its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0);
+}
+
+static void its_encode_devid(struct its_cmd_block *cmd, u32 devid)
+{
+ its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32);
+}
+
+static void its_encode_event_id(struct its_cmd_block *cmd, u32 id)
+{
+ its_mask_encode(&cmd->raw_cmd[1], id, 31, 0);
+}
+
+static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id)
+{
+ its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32);
+}
+
+static void its_encode_size(struct its_cmd_block *cmd, u8 size)
+{
+ its_mask_encode(&cmd->raw_cmd[1], size, 4, 0);
+}
+
+static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr)
+{
+ its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8);
+}
+
+static void its_encode_valid(struct its_cmd_block *cmd, int valid)
+{
+ its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63);
+}
+
+static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr)
+{
+ its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16);
+}
+
+static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
+{
+ its_mask_encode(&cmd->raw_cmd[2], col, 15, 0);
+}
+
+#define GITS_CMDQ_POLL_ITERATIONS 0
+
+static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd)
+{
+ u64 cwriter = its_read_u64(GITS_CWRITER);
+ struct its_cmd_block *dst = cmdq_base + cwriter;
+ u64 cbaser = its_read_u64(GITS_CBASER);
+ size_t cmdq_size;
+ u64 next;
+ int i;
+
+ cmdq_size = ((cbaser & 0xFF) + 1) * SZ_4K;
+
+ its_fixup_cmd(cmd);
+
+ WRITE_ONCE(*dst, *cmd);
+ dsb(ishst);
+ next = (cwriter + sizeof(*cmd)) % cmdq_size;
+ its_write_u64(GITS_CWRITER, next);
+
+ /*
+ * Polling isn't necessary considering KVM's ITS emulation at the time
+ * of writing this, as the CMDQ is processed synchronously after a write
+ * to CWRITER.
+ */
+ for (i = 0; its_read_u64(GITS_CREADR) != next; i++) {
+ __GUEST_ASSERT(i < GITS_CMDQ_POLL_ITERATIONS,
+ "ITS didn't process command at offset %lu after %d iterations\n",
+ cwriter, i);
+
+ cpu_relax();
+ }
+}
+
+void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
+ size_t itt_size, bool valid)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_MAPD);
+ its_encode_devid(&cmd, device_id);
+ its_encode_size(&cmd, ilog2(itt_size) - 1);
+ its_encode_itt(&cmd, itt_base);
+ its_encode_valid(&cmd, valid);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_MAPC);
+ its_encode_collection(&cmd, collection_id);
+ its_encode_target(&cmd, vcpu_id);
+ its_encode_valid(&cmd, valid);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
+ u32 collection_id, u32 intid)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_MAPTI);
+ its_encode_devid(&cmd, device_id);
+ its_encode_event_id(&cmd, event_id);
+ its_encode_phys_id(&cmd, intid);
+ its_encode_collection(&cmd, collection_id);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_INVALL);
+ its_encode_collection(&cmd, collection_id);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index a9eb17295b..0ac7cc89f3 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -11,6 +11,8 @@
#include "guest_modes.h"
#include "kvm_util.h"
#include "processor.h"
+#include "ucall_common.h"
+
#include <linux/bitfield.h>
#include <linux/sizes.h>
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
index 184378d593..4427f43f73 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -3,8 +3,10 @@
* ARM Generic Interrupt Controller (GIC) v3 host support
*/
+#include <linux/kernel.h>
#include <linux/kvm.h>
#include <linux/sizes.h>
+#include <asm/cputype.h>
#include <asm/kvm_para.h>
#include <asm/kvm.h>
@@ -19,8 +21,6 @@
* Input args:
* vm - KVM VM
* nr_vcpus - Number of vCPUs supported by this VM
- * gicd_base_gpa - Guest Physical Address of the Distributor region
- * gicr_base_gpa - Guest Physical Address of the Redistributor region
*
* Output args: None
*
@@ -30,11 +30,10 @@
* redistributor regions of the guest. Since it depends on the number of
* vCPUs for the VM, it must be called after all the vCPUs have been created.
*/
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
- uint64_t gicd_base_gpa, uint64_t gicr_base_gpa)
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
{
int gic_fd;
- uint64_t redist_attr;
+ uint64_t attr;
struct list_head *iter;
unsigned int nr_gic_pages, nr_vcpus_created = 0;
@@ -60,18 +59,19 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+ attr = GICD_BASE_GPA;
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa);
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &attr);
nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
- virt_map(vm, gicd_base_gpa, gicd_base_gpa, nr_gic_pages);
+ virt_map(vm, GICD_BASE_GPA, GICD_BASE_GPA, nr_gic_pages);
/* Redistributor setup */
- redist_attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, gicr_base_gpa, 0, 0);
+ attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, GICR_BASE_GPA, 0, 0);
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &redist_attr);
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &attr);
nr_gic_pages = vm_calc_num_guest_pages(vm->mode,
KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
- virt_map(vm, gicr_base_gpa, gicr_base_gpa, nr_gic_pages);
+ virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages);
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
@@ -168,3 +168,21 @@ void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
{
vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
}
+
+int vgic_its_setup(struct kvm_vm *vm)
+{
+ int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
+ u64 attr;
+
+ attr = GITS_BASE_GPA;
+ kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &attr);
+
+ kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ virt_map(vm, GITS_BASE_GPA, GITS_BASE_GPA,
+ vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_ITS_SIZE));
+
+ return its_fd;
+}
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index 2bd25b191d..b49690658c 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -4,9 +4,6 @@
*
* Copyright (C) 2018, Google LLC.
*/
-
-#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/
-
#include "test_util.h"
#include <execinfo.h>
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index b2262b5fad..ad00e47618 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -4,11 +4,10 @@
*
* Copyright (C) 2018, Google LLC.
*/
-
-#define _GNU_SOURCE /* for program_invocation_name */
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "ucall_common.h"
#include <assert.h>
#include <sched.h>
@@ -20,6 +19,9 @@
#define KVM_UTIL_MIN_PFN 2
+uint32_t guest_random_seed;
+struct guest_random_state guest_rng;
+
static int vcpu_mmap_sz(void);
int open_path_or_exit(const char *path, int flags)
@@ -276,7 +278,6 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
vm->mode = shape.mode;
vm->type = shape.type;
- vm->subtype = shape.subtype;
vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits;
vm->va_bits = vm_guest_mode_params[vm->mode].va_bits;
@@ -433,6 +434,10 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
slot0 = memslot2region(vm, 0);
ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+ pr_info("Random seed: 0x%x\n", guest_random_seed);
+ guest_rng = new_guest_random_state(guest_random_seed);
+ sync_global_to_guest(vm, guest_rng);
+
kvm_arch_vm_post_create(vm);
return vm;
@@ -930,6 +935,10 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
errno, strerror(errno));
}
+#define TEST_REQUIRE_SET_USER_MEMORY_REGION2() \
+ __TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \
+ "KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)")
+
int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
uint64_t gpa, uint64_t size, void *hva,
uint32_t guest_memfd, uint64_t guest_memfd_offset)
@@ -944,6 +953,8 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flag
.guest_memfd_offset = guest_memfd_offset,
};
+ TEST_REQUIRE_SET_USER_MEMORY_REGION2();
+
return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, &region);
}
@@ -970,6 +981,8 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
size_t mem_size = npages * vm->page_size;
size_t alignment;
+ TEST_REQUIRE_SET_USER_MEMORY_REGION2();
+
TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
"Number of guest pages is not compatible with the host. "
"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
@@ -2306,6 +2319,8 @@ void __attribute((constructor)) kvm_selftest_init(void)
/* Tell stdout not to buffer its content. */
setbuf(stdout, NULL);
+ guest_random_seed = random();
+
kvm_selftest_arch_init();
}
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
index cf2c739713..313277486a 100644
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -2,14 +2,13 @@
/*
* Copyright (C) 2020, Google LLC.
*/
-#define _GNU_SOURCE
-
#include <inttypes.h>
#include <linux/bitmap.h>
#include "kvm_util.h"
#include "memstress.h"
#include "processor.h"
+#include "ucall_common.h"
struct memstress_args memstress_args;
@@ -56,7 +55,7 @@ void memstress_guest_code(uint32_t vcpu_idx)
uint64_t page;
int i;
- rand_state = new_guest_random_state(args->random_seed + vcpu_idx);
+ rand_state = new_guest_random_state(guest_random_seed + vcpu_idx);
gva = vcpu_args->gva;
pages = vcpu_args->pages;
@@ -76,7 +75,7 @@ void memstress_guest_code(uint32_t vcpu_idx)
addr = gva + (page * args->guest_page_size);
- if (guest_random_u32(&rand_state) % 100 < args->write_percent)
+ if (__guest_random_bool(&rand_state, args->write_percent))
*(uint64_t *)addr = 0x0123456789ABCDEF;
else
READ_ONCE(*(uint64_t *)addr);
@@ -243,12 +242,6 @@ void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent)
sync_global_to_guest(vm, memstress_args.write_percent);
}
-void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed)
-{
- memstress_args.random_seed = random_seed;
- sync_global_to_guest(vm, memstress_args.random_seed);
-}
-
void memstress_set_random_access(struct kvm_vm *vm, bool random_access)
{
memstress_args.random_access = random_access;
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index e8211f5d68..6ae47b3d6b 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -10,6 +10,7 @@
#include "kvm_util.h"
#include "processor.h"
+#include "ucall_common.h"
#define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000
@@ -502,3 +503,15 @@ bool guest_sbi_probe_extension(int extid, long *out_val)
return true;
}
+
+unsigned long get_host_sbi_spec_version(void)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_GET_SPEC_VERSION, 0,
+ 0, 0, 0, 0, 0);
+
+ GUEST_ASSERT(!ret.error);
+
+ return ret.value;
+}
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
index 14ee17151a..b5035c63d5 100644
--- a/tools/testing/selftests/kvm/lib/riscv/ucall.c
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -9,6 +9,7 @@
#include "kvm_util.h"
#include "processor.h"
+#include "sbi.h"
void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
{
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 5a8f8becb1..8ed0b74ae8 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -4,8 +4,6 @@
*
* Copyright (C) 2020, Google LLC.
*/
-
-#define _GNU_SOURCE
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c
index f5af65a41c..42151e5719 100644
--- a/tools/testing/selftests/kvm/lib/ucall_common.c
+++ b/tools/testing/selftests/kvm/lib/ucall_common.c
@@ -1,9 +1,12 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include "kvm_util.h"
#include "linux/types.h"
#include "linux/bitmap.h"
#include "linux/atomic.h"
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+
#define GUEST_UCALL_FAILED -1
struct ucall_header {
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
index f4eef6eb2d..7c9de84144 100644
--- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -6,9 +6,6 @@
* Copyright (C) 2018, Red Hat, Inc.
* Copyright (C) 2019-2022 Google LLC
*/
-
-#define _GNU_SOURCE /* for pipe2 */
-
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
@@ -16,6 +13,7 @@
#include <poll.h>
#include <pthread.h>
#include <linux/userfaultfd.h>
+#include <sys/epoll.h>
#include <sys/syscall.h>
#include "kvm_util.h"
@@ -27,76 +25,69 @@
static void *uffd_handler_thread_fn(void *arg)
{
- struct uffd_desc *uffd_desc = (struct uffd_desc *)arg;
- int uffd = uffd_desc->uffd;
- int pipefd = uffd_desc->pipefds[0];
- useconds_t delay = uffd_desc->delay;
+ struct uffd_reader_args *reader_args = (struct uffd_reader_args *)arg;
+ int uffd = reader_args->uffd;
int64_t pages = 0;
struct timespec start;
struct timespec ts_diff;
+ struct epoll_event evt;
+ int epollfd;
+
+ epollfd = epoll_create(1);
+ TEST_ASSERT(epollfd >= 0, "Failed to create epollfd.");
+
+ evt.events = EPOLLIN | EPOLLEXCLUSIVE;
+ evt.data.u32 = 0;
+ TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, uffd, &evt),
+ "Failed to add uffd to epollfd");
+
+ evt.events = EPOLLIN;
+ evt.data.u32 = 1;
+ TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, reader_args->pipe, &evt),
+ "Failed to add pipe to epollfd");
clock_gettime(CLOCK_MONOTONIC, &start);
while (1) {
struct uffd_msg msg;
- struct pollfd pollfd[2];
- char tmp_chr;
int r;
- pollfd[0].fd = uffd;
- pollfd[0].events = POLLIN;
- pollfd[1].fd = pipefd;
- pollfd[1].events = POLLIN;
+ r = epoll_wait(epollfd, &evt, 1, -1);
+ TEST_ASSERT(r == 1,
+ "Unexpected number of events (%d) from epoll, errno = %d",
+ r, errno);
- r = poll(pollfd, 2, -1);
- switch (r) {
- case -1:
- pr_info("poll err");
- continue;
- case 0:
- continue;
- case 1:
- break;
- default:
- pr_info("Polling uffd returned %d", r);
- return NULL;
- }
-
- if (pollfd[0].revents & POLLERR) {
- pr_info("uffd revents has POLLERR");
- return NULL;
- }
+ if (evt.data.u32 == 1) {
+ char tmp_chr;
- if (pollfd[1].revents & POLLIN) {
- r = read(pollfd[1].fd, &tmp_chr, 1);
+ TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
+ "Reader thread received EPOLLERR or EPOLLHUP on pipe.");
+ r = read(reader_args->pipe, &tmp_chr, 1);
TEST_ASSERT(r == 1,
- "Error reading pipefd in UFFD thread");
+ "Error reading pipefd in uffd reader thread");
break;
}
- if (!(pollfd[0].revents & POLLIN))
- continue;
+ TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
+ "Reader thread received EPOLLERR or EPOLLHUP on uffd.");
r = read(uffd, &msg, sizeof(msg));
if (r == -1) {
- if (errno == EAGAIN)
- continue;
- pr_info("Read of uffd got errno %d\n", errno);
- return NULL;
+ TEST_ASSERT(errno == EAGAIN,
+ "Error reading from UFFD: errno = %d", errno);
+ continue;
}
- if (r != sizeof(msg)) {
- pr_info("Read on uffd returned unexpected size: %d bytes", r);
- return NULL;
- }
+ TEST_ASSERT(r == sizeof(msg),
+ "Read on uffd returned unexpected number of bytes (%d)", r);
if (!(msg.event & UFFD_EVENT_PAGEFAULT))
continue;
- if (delay)
- usleep(delay);
- r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg);
- if (r < 0)
- return NULL;
+ if (reader_args->delay)
+ usleep(reader_args->delay);
+ r = reader_args->handler(reader_args->uffd_mode, uffd, &msg);
+ TEST_ASSERT(r >= 0,
+ "Reader thread handler fn returned negative value %d", r);
pages++;
}
@@ -110,6 +101,7 @@ static void *uffd_handler_thread_fn(void *arg)
struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
void *hva, uint64_t len,
+ uint64_t num_readers,
uffd_handler_t handler)
{
struct uffd_desc *uffd_desc;
@@ -118,14 +110,25 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
- int ret;
+ int ret, i;
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
is_minor ? "MINOR" : "MISSING",
is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
uffd_desc = malloc(sizeof(struct uffd_desc));
- TEST_ASSERT(uffd_desc, "malloc failed");
+ TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor");
+
+ uffd_desc->pipefds = calloc(sizeof(int), num_readers);
+ TEST_ASSERT(uffd_desc->pipefds, "Failed to alloc pipes");
+
+ uffd_desc->readers = calloc(sizeof(pthread_t), num_readers);
+ TEST_ASSERT(uffd_desc->readers, "Failed to alloc reader threads");
+
+ uffd_desc->reader_args = calloc(sizeof(struct uffd_reader_args), num_readers);
+ TEST_ASSERT(uffd_desc->reader_args, "Failed to alloc reader_args");
+
+ uffd_desc->num_readers = num_readers;
/* In order to get minor faults, prefault via the alias. */
if (is_minor)
@@ -148,18 +151,28 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
expected_ioctls, "missing userfaultfd ioctls");
- ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK);
- TEST_ASSERT(!ret, "Failed to set up pipefd");
-
- uffd_desc->uffd_mode = uffd_mode;
uffd_desc->uffd = uffd;
- uffd_desc->delay = delay;
- uffd_desc->handler = handler;
- pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn,
- uffd_desc);
+ for (i = 0; i < uffd_desc->num_readers; ++i) {
+ int pipes[2];
- PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
- hva, hva + len);
+ ret = pipe2((int *) &pipes, O_CLOEXEC | O_NONBLOCK);
+ TEST_ASSERT(!ret, "Failed to set up pipefd %i for uffd_desc %p",
+ i, uffd_desc);
+
+ uffd_desc->pipefds[i] = pipes[1];
+
+ uffd_desc->reader_args[i].uffd_mode = uffd_mode;
+ uffd_desc->reader_args[i].uffd = uffd;
+ uffd_desc->reader_args[i].delay = delay;
+ uffd_desc->reader_args[i].handler = handler;
+ uffd_desc->reader_args[i].pipe = pipes[0];
+
+ pthread_create(&uffd_desc->readers[i], NULL, uffd_handler_thread_fn,
+ &uffd_desc->reader_args[i]);
+
+ PER_VCPU_DEBUG("Created uffd thread %i for HVA range [%p, %p)\n",
+ i, hva, hva + len);
+ }
return uffd_desc;
}
@@ -167,19 +180,26 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
void uffd_stop_demand_paging(struct uffd_desc *uffd)
{
char c = 0;
- int ret;
+ int i;
- ret = write(uffd->pipefds[1], &c, 1);
- TEST_ASSERT(ret == 1, "Unable to write to pipefd");
+ for (i = 0; i < uffd->num_readers; ++i)
+ TEST_ASSERT(write(uffd->pipefds[i], &c, 1) == 1,
+ "Unable to write to pipefd %i for uffd_desc %p", i, uffd);
- ret = pthread_join(uffd->thread, NULL);
- TEST_ASSERT(ret == 0, "Pthread_join failed.");
+ for (i = 0; i < uffd->num_readers; ++i)
+ TEST_ASSERT(!pthread_join(uffd->readers[i], NULL),
+ "Pthread_join failed on reader %i for uffd_desc %p", i, uffd);
close(uffd->uffd);
- close(uffd->pipefds[1]);
- close(uffd->pipefds[0]);
+ for (i = 0; i < uffd->num_readers; ++i) {
+ close(uffd->pipefds[i]);
+ close(uffd->reader_args[i].pipe);
+ }
+ free(uffd->pipefds);
+ free(uffd->readers);
+ free(uffd->reader_args);
free(uffd);
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 74a4c736c9..594b061aef 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -15,14 +15,16 @@
#define NUM_INTERRUPTS 256
#endif
-#define DEFAULT_CODE_SELECTOR 0x8
-#define DEFAULT_DATA_SELECTOR 0x10
+#define KERNEL_CS 0x8
+#define KERNEL_DS 0x10
+#define KERNEL_TSS 0x18
#define MAX_NR_CPUID_ENTRIES 100
vm_vaddr_t exception_handlers;
bool host_cpu_is_amd;
bool host_cpu_is_intel;
+bool is_forced_emulation_enabled;
static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
{
@@ -417,7 +419,7 @@ static void kvm_seg_set_unusable(struct kvm_segment *segp)
static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
{
- void *gdt = addr_gva2hva(vm, vm->gdt);
+ void *gdt = addr_gva2hva(vm, vm->arch.gdt);
struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
desc->limit0 = segp->limit & 0xFFFF;
@@ -437,27 +439,10 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
desc->base3 = segp->base >> 32;
}
-
-/*
- * Set Long Mode Flat Kernel Code Segment
- *
- * Input Args:
- * vm - VM whose GDT is being filled, or NULL to only write segp
- * selector - selector value
- *
- * Output Args:
- * segp - Pointer to KVM segment
- *
- * Return: None
- *
- * Sets up the KVM segment pointed to by @segp, to be a code segment
- * with the selector value given by @selector.
- */
-static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
- struct kvm_segment *segp)
+static void kvm_seg_set_kernel_code_64bit(struct kvm_segment *segp)
{
memset(segp, 0, sizeof(*segp));
- segp->selector = selector;
+ segp->selector = KERNEL_CS;
segp->limit = 0xFFFFFFFFu;
segp->s = 0x1; /* kTypeCodeData */
segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
@@ -466,30 +451,12 @@ static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
segp->g = true;
segp->l = true;
segp->present = 1;
- if (vm)
- kvm_seg_fill_gdt_64bit(vm, segp);
}
-/*
- * Set Long Mode Flat Kernel Data Segment
- *
- * Input Args:
- * vm - VM whose GDT is being filled, or NULL to only write segp
- * selector - selector value
- *
- * Output Args:
- * segp - Pointer to KVM segment
- *
- * Return: None
- *
- * Sets up the KVM segment pointed to by @segp, to be a data segment
- * with the selector value given by @selector.
- */
-static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
- struct kvm_segment *segp)
+static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp)
{
memset(segp, 0, sizeof(*segp));
- segp->selector = selector;
+ segp->selector = KERNEL_DS;
segp->limit = 0xFFFFFFFFu;
segp->s = 0x1; /* kTypeCodeData */
segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
@@ -497,8 +464,6 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
*/
segp->g = true;
segp->present = true;
- if (vm)
- kvm_seg_fill_gdt_64bit(vm, segp);
}
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
@@ -516,72 +481,153 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level));
}
-static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
-{
- if (!vm->gdt)
- vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
-
- dt->base = vm->gdt;
- dt->limit = getpagesize();
-}
-
-static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
- int selector)
+static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp)
{
- if (!vm->tss)
- vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
-
memset(segp, 0, sizeof(*segp));
- segp->base = vm->tss;
+ segp->base = base;
segp->limit = 0x67;
- segp->selector = selector;
+ segp->selector = KERNEL_TSS;
segp->type = 0xb;
segp->present = 1;
- kvm_seg_fill_gdt_64bit(vm, segp);
}
-static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
struct kvm_sregs sregs;
+ TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K);
+
/* Set mode specific system register values. */
vcpu_sregs_get(vcpu, &sregs);
- sregs.idt.limit = 0;
+ sregs.idt.base = vm->arch.idt;
+ sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
+ sregs.gdt.base = vm->arch.gdt;
+ sregs.gdt.limit = getpagesize() - 1;
+
+ sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
+ sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
+ sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+
+ kvm_seg_set_unusable(&sregs.ldt);
+ kvm_seg_set_kernel_code_64bit(&sregs.cs);
+ kvm_seg_set_kernel_data_64bit(&sregs.ds);
+ kvm_seg_set_kernel_data_64bit(&sregs.es);
+ kvm_seg_set_kernel_data_64bit(&sregs.gs);
+ kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr);
+
+ sregs.cr3 = vm->pgd;
+ vcpu_sregs_set(vcpu, &sregs);
+}
+
+static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
+ int dpl, unsigned short selector)
+{
+ struct idt_entry *base =
+ (struct idt_entry *)addr_gva2hva(vm, vm->arch.idt);
+ struct idt_entry *e = &base[vector];
+
+ memset(e, 0, sizeof(*e));
+ e->offset0 = addr;
+ e->selector = selector;
+ e->ist = 0;
+ e->type = 14;
+ e->dpl = dpl;
+ e->p = 1;
+ e->offset1 = addr >> 16;
+ e->offset2 = addr >> 32;
+}
+
+static bool kvm_fixup_exception(struct ex_regs *regs)
+{
+ if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
+ return false;
- kvm_setup_gdt(vm, &sregs.gdt);
+ if (regs->vector == DE_VECTOR)
+ return false;
- switch (vm->mode) {
- case VM_MODE_PXXV48_4K:
- sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
- sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
- sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+ regs->rip = regs->r11;
+ regs->r9 = regs->vector;
+ regs->r10 = regs->error_code;
+ return true;
+}
- kvm_seg_set_unusable(&sregs.ldt);
- kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
- kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
- kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
- kvm_setup_tss_64bit(vm, &sregs.tr, 0x18);
- break;
+void route_exception(struct ex_regs *regs)
+{
+ typedef void(*handler)(struct ex_regs *);
+ handler *handlers = (handler *)exception_handlers;
- default:
- TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ if (handlers && handlers[regs->vector]) {
+ handlers[regs->vector](regs);
+ return;
}
- sregs.cr3 = vm->pgd;
- vcpu_sregs_set(vcpu, &sregs);
+ if (kvm_fixup_exception(regs))
+ return;
+
+ ucall_assert(UCALL_UNHANDLED,
+ "Unhandled exception in guest", __FILE__, __LINE__,
+ "Unhandled exception '0x%lx' at guest RIP '0x%lx'",
+ regs->vector, regs->rip);
+}
+
+static void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ extern void *idt_handlers;
+ struct kvm_segment seg;
+ int i;
+
+ vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+
+ /* Handlers have the same address in both address spaces.*/
+ for (i = 0; i < NUM_INTERRUPTS; i++)
+ set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS);
+
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+
+ kvm_seg_set_kernel_code_64bit(&seg);
+ kvm_seg_fill_gdt_64bit(vm, &seg);
+
+ kvm_seg_set_kernel_data_64bit(&seg);
+ kvm_seg_fill_gdt_64bit(vm, &seg);
+
+ kvm_seg_set_tss_64bit(vm->arch.tss, &seg);
+ kvm_seg_fill_gdt_64bit(vm, &seg);
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
+{
+ vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
+
+ handlers[vector] = (vm_vaddr_t)handler;
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED)
+ REPORT_GUEST_ASSERT(uc);
}
void kvm_arch_vm_post_create(struct kvm_vm *vm)
{
vm_create_irqchip(vm);
+ vm_init_descriptor_tables(vm);
+
sync_global_to_guest(vm, host_cpu_is_intel);
sync_global_to_guest(vm, host_cpu_is_amd);
+ sync_global_to_guest(vm, is_forced_emulation_enabled);
+
+ if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ struct kvm_sev_init init = { 0 };
- if (vm->subtype == VM_SUBTYPE_SEV)
- sev_vm_init(vm);
- else if (vm->subtype == VM_SUBTYPE_SEV_ES)
- sev_es_vm_init(vm);
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+ }
}
void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
@@ -621,7 +667,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
vcpu = __vm_vcpu_add(vm, vcpu_id);
vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
- vcpu_setup(vm, vcpu);
+ vcpu_init_sregs(vm, vcpu);
/* Setup guest general purpose registers */
vcpu_regs_get(vcpu, &regs);
@@ -1081,108 +1127,15 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
void kvm_init_vm_address_properties(struct kvm_vm *vm)
{
- if (vm->subtype == VM_SUBTYPE_SEV || vm->subtype == VM_SUBTYPE_SEV_ES) {
+ if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
vm->gpa_tag_mask = vm->arch.c_bit;
+ } else {
+ vm->arch.sev_fd = -1;
}
}
-static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
- int dpl, unsigned short selector)
-{
- struct idt_entry *base =
- (struct idt_entry *)addr_gva2hva(vm, vm->idt);
- struct idt_entry *e = &base[vector];
-
- memset(e, 0, sizeof(*e));
- e->offset0 = addr;
- e->selector = selector;
- e->ist = 0;
- e->type = 14;
- e->dpl = dpl;
- e->p = 1;
- e->offset1 = addr >> 16;
- e->offset2 = addr >> 32;
-}
-
-
-static bool kvm_fixup_exception(struct ex_regs *regs)
-{
- if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
- return false;
-
- if (regs->vector == DE_VECTOR)
- return false;
-
- regs->rip = regs->r11;
- regs->r9 = regs->vector;
- regs->r10 = regs->error_code;
- return true;
-}
-
-void route_exception(struct ex_regs *regs)
-{
- typedef void(*handler)(struct ex_regs *);
- handler *handlers = (handler *)exception_handlers;
-
- if (handlers && handlers[regs->vector]) {
- handlers[regs->vector](regs);
- return;
- }
-
- if (kvm_fixup_exception(regs))
- return;
-
- ucall_assert(UCALL_UNHANDLED,
- "Unhandled exception in guest", __FILE__, __LINE__,
- "Unhandled exception '0x%lx' at guest RIP '0x%lx'",
- regs->vector, regs->rip);
-}
-
-void vm_init_descriptor_tables(struct kvm_vm *vm)
-{
- extern void *idt_handlers;
- int i;
-
- vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
- vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
- /* Handlers have the same address in both address spaces.*/
- for (i = 0; i < NUM_INTERRUPTS; i++)
- set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
- DEFAULT_CODE_SELECTOR);
-}
-
-void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
-{
- struct kvm_vm *vm = vcpu->vm;
- struct kvm_sregs sregs;
-
- vcpu_sregs_get(vcpu, &sregs);
- sregs.idt.base = vm->idt;
- sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
- sregs.gdt.base = vm->gdt;
- sregs.gdt.limit = getpagesize() - 1;
- kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs);
- vcpu_sregs_set(vcpu, &sregs);
- *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
-}
-
-void vm_install_exception_handler(struct kvm_vm *vm, int vector,
- void (*handler)(struct ex_regs *))
-{
- vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
-
- handlers[vector] = (vm_vaddr_t)handler;
-}
-
-void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED)
- REPORT_GUEST_ASSERT(uc);
-}
-
const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
uint32_t function, uint32_t index)
{
@@ -1294,9 +1247,20 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
{
const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
unsigned long ht_gfn, max_gfn, max_pfn;
- uint8_t maxphyaddr;
+ uint8_t maxphyaddr, guest_maxphyaddr;
+
+ /*
+ * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR
+ * enumerates the max _mappable_ GPA, which can be less than the raw
+ * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU
+ * doesn't support 5-level TDP.
+ */
+ guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR);
+ guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits;
+ TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits,
+ "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR");
- max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
+ max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1;
/* Avoid reserved HyperTransport region on AMD processors. */
if (!host_cpu_is_amd)
@@ -1344,6 +1308,7 @@ void kvm_selftest_arch_init(void)
{
host_cpu_is_intel = this_cpu_is_intel();
host_cpu_is_amd = this_cpu_is_amd();
+ is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
}
bool sys_clocksource_is_based_on_tsc(void)
diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86_64/sev.c
index e248d3364b..e9535ee20b 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/sev.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/sev.c
@@ -1,5 +1,4 @@
// SPDX-License-Identifier: GPL-2.0-only
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <stdint.h>
#include <stdbool.h>
@@ -35,6 +34,32 @@ static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *regio
}
}
+void sev_vm_init(struct kvm_vm *vm)
+{
+ if (vm->type == KVM_X86_DEFAULT_VM) {
+ assert(vm->arch.sev_fd == -1);
+ vm->arch.sev_fd = open_sev_dev_path_or_exit();
+ vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
+ } else {
+ struct kvm_sev_init init = { 0 };
+ assert(vm->type == KVM_X86_SEV_VM);
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+ }
+}
+
+void sev_es_vm_init(struct kvm_vm *vm)
+{
+ if (vm->type == KVM_X86_DEFAULT_VM) {
+ assert(vm->arch.sev_fd == -1);
+ vm->arch.sev_fd = open_sev_dev_path_or_exit();
+ vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
+ } else {
+ struct kvm_sev_init init = { 0 };
+ assert(vm->type == KVM_X86_SEV_ES_VM);
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+ }
+}
+
void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
{
struct kvm_sev_launch_start launch_start = {
@@ -87,28 +112,30 @@ void sev_vm_launch_finish(struct kvm_vm *vm)
TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
}
-struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code,
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
struct kvm_vcpu **cpu)
{
struct vm_shape shape = {
- .type = VM_TYPE_DEFAULT,
.mode = VM_MODE_DEFAULT,
- .subtype = policy & SEV_POLICY_ES ? VM_SUBTYPE_SEV_ES :
- VM_SUBTYPE_SEV,
+ .type = type,
};
struct kvm_vm *vm;
struct kvm_vcpu *cpus[1];
- uint8_t measurement[512];
vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus);
*cpu = cpus[0];
+ return vm;
+}
+
+void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement)
+{
sev_vm_launch(vm, policy);
- /* TODO: Validate the measurement is as expected. */
+ if (!measurement)
+ measurement = alloca(256);
+
sev_vm_launch_measure(vm, measurement);
sev_vm_launch_finish(vm);
-
- return vm;
}
diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c
index 1a6da7389b..0b9678858b 100644
--- a/tools/testing/selftests/kvm/max_guest_memory_test.c
+++ b/tools/testing/selftests/kvm/max_guest_memory_test.c
@@ -1,6 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 1563619666..05fcf902e0 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -6,9 +6,6 @@
* Copyright (C) 2018, Red Hat, Inc.
* Copyright (C) 2020, Google, Inc.
*/
-
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include <stdio.h>
#include <stdlib.h>
#include <sys/syscall.h>
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
index 0f9cabd99f..2c792228ac 100644
--- a/tools/testing/selftests/kvm/riscv/arch_timer.c
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -7,13 +7,11 @@
*
* Copyright (c) 2024, Intel Corporation.
*/
-
-#define _GNU_SOURCE
-
#include "arch_timer.h"
#include "kvm_util.h"
#include "processor.h"
#include "timer_test.h"
+#include "ucall_common.h"
static int timer_irq = IRQ_S_TIMER;
@@ -85,7 +83,7 @@ struct kvm_vm *test_vm_create(void)
int nr_vcpus = test_args.nr_vcpus;
vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
- __TEST_REQUIRE(__vcpu_has_ext(vcpus[0], RISCV_ISA_EXT_REG(KVM_RISCV_ISA_EXT_SSTC)),
+ __TEST_REQUIRE(__vcpu_has_isa_ext(vcpus[0], KVM_RISCV_ISA_EXT_SSTC),
"SSTC not available, skipping test\n");
vm_init_vector_tables(vm);
diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c
new file mode 100644
index 0000000000..0e07128549
--- /dev/null
+++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V KVM ebreak test.
+ *
+ * Copyright 2024 Beijing ESWIN Computing Technology Co., Ltd.
+ *
+ */
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+#define LABEL_ADDRESS(v) ((uint64_t)&(v))
+
+extern unsigned char sw_bp_1, sw_bp_2;
+static uint64_t sw_bp_addr;
+
+static void guest_code(void)
+{
+ asm volatile(
+ ".option push\n"
+ ".option norvc\n"
+ "sw_bp_1: ebreak\n"
+ "sw_bp_2: ebreak\n"
+ ".option pop\n"
+ );
+ GUEST_ASSERT_EQ(READ_ONCE(sw_bp_addr), LABEL_ADDRESS(sw_bp_2));
+
+ GUEST_DONE();
+}
+
+static void guest_breakpoint_handler(struct ex_regs *regs)
+{
+ WRITE_ONCE(sw_bp_addr, regs->epc);
+ regs->epc += 4;
+}
+
+int main(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint64_t pc;
+ struct kvm_guest_debug debug = {
+ .control = KVM_GUESTDBG_ENABLE,
+ };
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_vector_tables(vm);
+ vcpu_init_vector_tables(vcpu);
+ vm_install_exception_handler(vm, EXC_BREAKPOINT,
+ guest_breakpoint_handler);
+
+ /*
+ * Enable the guest debug.
+ * ebreak should exit to the VMM with KVM_EXIT_DEBUG reason.
+ */
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &pc);
+ TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1));
+
+ /* skip sw_bp_1 */
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), pc + 4);
+
+ /*
+ * Disable all debug controls.
+ * Guest should handle the ebreak without exiting to the VMM.
+ */
+ memset(&debug, 0, sizeof(debug));
+ vcpu_guest_debug_set(vcpu, &debug);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index b882b7b9b7..222198dd6d 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -43,6 +43,7 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSCOFPMF:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSTC:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
@@ -408,6 +409,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(V),
KVM_ISA_EXT_ARR(SMSTATEEN),
KVM_ISA_EXT_ARR(SSAIA),
+ KVM_ISA_EXT_ARR(SSCOFPMF),
KVM_ISA_EXT_ARR(SSTC),
KVM_ISA_EXT_ARR(SVINVAL),
KVM_ISA_EXT_ARR(SVNAPOT),
@@ -931,6 +933,7 @@ KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F);
KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D);
KVM_ISA_EXT_SIMPLE_CONFIG(h, H);
KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN);
+KVM_ISA_EXT_SIMPLE_CONFIG(sscofpmf, SSCOFPMF);
KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC);
KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
@@ -986,6 +989,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_fp_d,
&config_h,
&config_smstateen,
+ &config_sscofpmf,
&config_sstc,
&config_svinval,
&config_svnapot,
diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
new file mode 100644
index 0000000000..f299cbfd23
--- /dev/null
+++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
@@ -0,0 +1,682 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * sbi_pmu_test.c - Tests the riscv64 SBI PMU functionality.
+ *
+ * Copyright (c) 2024, Rivos Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+#include "sbi.h"
+#include "arch_timer.h"
+#include "ucall_common.h"
+
+/* Maximum counters(firmware + hardware) */
+#define RISCV_MAX_PMU_COUNTERS 64
+union sbi_pmu_ctr_info ctrinfo_arr[RISCV_MAX_PMU_COUNTERS];
+
+/* Snapshot shared memory data */
+#define PMU_SNAPSHOT_GPA_BASE BIT(30)
+static void *snapshot_gva;
+static vm_paddr_t snapshot_gpa;
+
+static int vcpu_shared_irq_count;
+static int counter_in_use;
+
+/* Cache the available counters in a bitmask */
+static unsigned long counter_mask_available;
+
+static bool illegal_handler_invoked;
+
+#define SBI_PMU_TEST_BASIC BIT(0)
+#define SBI_PMU_TEST_EVENTS BIT(1)
+#define SBI_PMU_TEST_SNAPSHOT BIT(2)
+#define SBI_PMU_TEST_OVERFLOW BIT(3)
+
+static int disabled_tests;
+
+unsigned long pmu_csr_read_num(int csr_num)
+{
+#define switchcase_csr_read(__csr_num, __val) {\
+ case __csr_num: \
+ __val = csr_read(__csr_num); \
+ break; }
+#define switchcase_csr_read_2(__csr_num, __val) {\
+ switchcase_csr_read(__csr_num + 0, __val) \
+ switchcase_csr_read(__csr_num + 1, __val)}
+#define switchcase_csr_read_4(__csr_num, __val) {\
+ switchcase_csr_read_2(__csr_num + 0, __val) \
+ switchcase_csr_read_2(__csr_num + 2, __val)}
+#define switchcase_csr_read_8(__csr_num, __val) {\
+ switchcase_csr_read_4(__csr_num + 0, __val) \
+ switchcase_csr_read_4(__csr_num + 4, __val)}
+#define switchcase_csr_read_16(__csr_num, __val) {\
+ switchcase_csr_read_8(__csr_num + 0, __val) \
+ switchcase_csr_read_8(__csr_num + 8, __val)}
+#define switchcase_csr_read_32(__csr_num, __val) {\
+ switchcase_csr_read_16(__csr_num + 0, __val) \
+ switchcase_csr_read_16(__csr_num + 16, __val)}
+
+ unsigned long ret = 0;
+
+ switch (csr_num) {
+ switchcase_csr_read_32(CSR_CYCLE, ret)
+ switchcase_csr_read_32(CSR_CYCLEH, ret)
+ default :
+ break;
+ }
+
+ return ret;
+#undef switchcase_csr_read_32
+#undef switchcase_csr_read_16
+#undef switchcase_csr_read_8
+#undef switchcase_csr_read_4
+#undef switchcase_csr_read_2
+#undef switchcase_csr_read
+}
+
+static inline void dummy_func_loop(uint64_t iter)
+{
+ int i = 0;
+
+ while (i < iter) {
+ asm volatile("nop");
+ i++;
+ }
+}
+
+static void start_counter(unsigned long counter, unsigned long start_flags,
+ unsigned long ival)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, counter, 1, start_flags,
+ ival, 0, 0);
+ __GUEST_ASSERT(ret.error == 0, "Unable to start counter %ld\n", counter);
+}
+
+/* This should be invoked only for reset counter use case */
+static void stop_reset_counter(unsigned long counter, unsigned long stop_flags)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1,
+ stop_flags | SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
+ __GUEST_ASSERT(ret.error == SBI_ERR_ALREADY_STOPPED,
+ "Unable to stop counter %ld\n", counter);
+}
+
+static void stop_counter(unsigned long counter, unsigned long stop_flags)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags,
+ 0, 0, 0);
+ __GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n",
+ counter, ret.error);
+}
+
+static void guest_illegal_exception_handler(struct ex_regs *regs)
+{
+ __GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL,
+ "Unexpected exception handler %lx\n", regs->cause);
+
+ illegal_handler_invoked = true;
+ /* skip the trapping instruction */
+ regs->epc += 4;
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+ unsigned long overflown_mask;
+ unsigned long counter_val = 0;
+
+ /* Validate that we are in the correct irq handler */
+ GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF);
+
+ /* Stop all counters first to avoid further interrupts */
+ stop_counter(counter_in_use, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ csr_clear(CSR_SIP, BIT(IRQ_PMU_OVF));
+
+ overflown_mask = READ_ONCE(snapshot_data->ctr_overflow_mask);
+ GUEST_ASSERT(overflown_mask & 0x01);
+
+ WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1);
+
+ counter_val = READ_ONCE(snapshot_data->ctr_values[0]);
+ /* Now start the counter to mimick the real driver behavior */
+ start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val);
+}
+
+static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask,
+ unsigned long cflags,
+ unsigned long event)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask,
+ cflags, event, 0, 0);
+ __GUEST_ASSERT(ret.error == 0, "config matching failed %ld\n", ret.error);
+ GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS);
+ GUEST_ASSERT(BIT(ret.value) & counter_mask_available);
+
+ return ret.value;
+}
+
+static unsigned long get_num_counters(void)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0);
+
+ __GUEST_ASSERT(ret.error == 0, "Unable to retrieve number of counters from SBI PMU");
+ __GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS,
+ "Invalid number of counters %ld\n", ret.value);
+
+ return ret.value;
+}
+
+static void update_counter_info(int num_counters)
+{
+ int i = 0;
+ struct sbiret ret;
+
+ for (i = 0; i < num_counters; i++) {
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0);
+
+ /* There can be gaps in logical counter indicies*/
+ if (ret.error)
+ continue;
+ GUEST_ASSERT_NE(ret.value, 0);
+
+ ctrinfo_arr[i].value = ret.value;
+ counter_mask_available |= BIT(i);
+ }
+
+ GUEST_ASSERT(counter_mask_available > 0);
+}
+
+static unsigned long read_fw_counter(int idx, union sbi_pmu_ctr_info ctrinfo)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, idx, 0, 0, 0, 0, 0);
+ GUEST_ASSERT(ret.error == 0);
+ return ret.value;
+}
+
+static unsigned long read_counter(int idx, union sbi_pmu_ctr_info ctrinfo)
+{
+ unsigned long counter_val = 0;
+
+ __GUEST_ASSERT(ctrinfo.type < 2, "Invalid counter type %d", ctrinfo.type);
+
+ if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW)
+ counter_val = pmu_csr_read_num(ctrinfo.csr);
+ else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW)
+ counter_val = read_fw_counter(idx, ctrinfo);
+
+ return counter_val;
+}
+
+static inline void verify_sbi_requirement_assert(void)
+{
+ long out_val = 0;
+ bool probe;
+
+ probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
+ GUEST_ASSERT(probe && out_val == 1);
+
+ if (get_host_sbi_spec_version() < sbi_mk_version(2, 0))
+ __GUEST_ASSERT(0, "SBI implementation version doesn't support PMU Snapshot");
+}
+
+static void snapshot_set_shmem(vm_paddr_t gpa, unsigned long flags)
+{
+ unsigned long lo = (unsigned long)gpa;
+#if __riscv_xlen == 32
+ unsigned long hi = (unsigned long)(gpa >> 32);
+#else
+ unsigned long hi = gpa == -1 ? -1 : 0;
+#endif
+ struct sbiret ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
+ lo, hi, flags, 0, 0, 0);
+
+ GUEST_ASSERT(ret.value == 0 && ret.error == 0);
+}
+
+static void test_pmu_event(unsigned long event)
+{
+ unsigned long counter;
+ unsigned long counter_value_pre, counter_value_post;
+ unsigned long counter_init_value = 100;
+
+ counter = get_counter_index(0, counter_mask_available, 0, event);
+ counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
+
+ /* Do not set the initial value */
+ start_counter(counter, 0, 0);
+ dummy_func_loop(10000);
+ stop_counter(counter, 0);
+
+ counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
+ __GUEST_ASSERT(counter_value_post > counter_value_pre,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /*
+ * We can't just update the counter without starting it.
+ * Do start/stop twice to simulate that by first initializing to a very
+ * high value and a low value after that.
+ */
+ start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, ULONG_MAX/2);
+ stop_counter(counter, 0);
+ counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
+
+ start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value);
+ stop_counter(counter, 0);
+ counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
+ __GUEST_ASSERT(counter_value_pre > counter_value_post,
+ "Counter reinitialization verification failed : post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /* Now set the initial value and compare */
+ start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value);
+ dummy_func_loop(10000);
+ stop_counter(counter, 0);
+
+ counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
+ __GUEST_ASSERT(counter_value_post > counter_init_value,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_init_value);
+
+ stop_reset_counter(counter, 0);
+}
+
+static void test_pmu_event_snapshot(unsigned long event)
+{
+ unsigned long counter;
+ unsigned long counter_value_pre, counter_value_post;
+ unsigned long counter_init_value = 100;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+
+ counter = get_counter_index(0, counter_mask_available, 0, event);
+ counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
+
+ /* Do not set the initial value */
+ start_counter(counter, 0, 0);
+ dummy_func_loop(10000);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ /* The counter value is updated w.r.t relative index of cbase */
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ __GUEST_ASSERT(counter_value_post > counter_value_pre,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /*
+ * We can't just update the counter without starting it.
+ * Do start/stop twice to simulate that by first initializing to a very
+ * high value and a low value after that.
+ */
+ WRITE_ONCE(snapshot_data->ctr_values[0], ULONG_MAX/2);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+ counter_value_pre = READ_ONCE(snapshot_data->ctr_values[0]);
+
+ WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ __GUEST_ASSERT(counter_value_pre > counter_value_post,
+ "Counter reinitialization verification failed : post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /* Now set the initial value and compare */
+ WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ dummy_func_loop(10000);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ __GUEST_ASSERT(counter_value_post > counter_init_value,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_init_value);
+
+ stop_reset_counter(counter, 0);
+}
+
+static void test_pmu_event_overflow(unsigned long event)
+{
+ unsigned long counter;
+ unsigned long counter_value_post;
+ unsigned long counter_init_value = ULONG_MAX - 10000;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+
+ counter = get_counter_index(0, counter_mask_available, 0, event);
+ counter_in_use = counter;
+
+ /* The counter value is updated w.r.t relative index of cbase passed to start/stop */
+ WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ dummy_func_loop(10000);
+ udelay(msecs_to_usecs(2000));
+ /* irq handler should have stopped the counter */
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ /* The counter value after stopping should be less the init value due to overflow */
+ __GUEST_ASSERT(counter_value_post < counter_init_value,
+ "counter_value_post %lx counter_init_value %lx for counter\n",
+ counter_value_post, counter_init_value);
+
+ stop_reset_counter(counter, 0);
+}
+
+static void test_invalid_event(void)
+{
+ struct sbiret ret;
+ unsigned long event = 0x1234; /* A random event */
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, 0,
+ counter_mask_available, 0, event, 0, 0);
+ GUEST_ASSERT_EQ(ret.error, SBI_ERR_NOT_SUPPORTED);
+}
+
+static void test_pmu_events(void)
+{
+ int num_counters = 0;
+
+ /* Get the counter details */
+ num_counters = get_num_counters();
+ update_counter_info(num_counters);
+
+ /* Sanity testing for any random invalid event */
+ test_invalid_event();
+
+ /* Only these two events are guaranteed to be present */
+ test_pmu_event(SBI_PMU_HW_CPU_CYCLES);
+ test_pmu_event(SBI_PMU_HW_INSTRUCTIONS);
+
+ GUEST_DONE();
+}
+
+static void test_pmu_basic_sanity(void)
+{
+ long out_val = 0;
+ bool probe;
+ struct sbiret ret;
+ int num_counters = 0, i;
+ union sbi_pmu_ctr_info ctrinfo;
+
+ probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
+ GUEST_ASSERT(probe && out_val == 1);
+
+ num_counters = get_num_counters();
+
+ for (i = 0; i < num_counters; i++) {
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i,
+ 0, 0, 0, 0, 0);
+
+ /* There can be gaps in logical counter indicies*/
+ if (ret.error)
+ continue;
+ GUEST_ASSERT_NE(ret.value, 0);
+
+ ctrinfo.value = ret.value;
+
+ /**
+ * Accessibility check of hardware and read capability of firmware counters.
+ * The spec doesn't mandate any initial value. No need to check any value.
+ */
+ if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW) {
+ pmu_csr_read_num(ctrinfo.csr);
+ GUEST_ASSERT(illegal_handler_invoked);
+ } else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) {
+ read_fw_counter(i, ctrinfo);
+ }
+ }
+
+ GUEST_DONE();
+}
+
+static void test_pmu_events_snaphost(void)
+{
+ int num_counters = 0;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+ int i;
+
+ /* Verify presence of SBI PMU and minimum requrired SBI version */
+ verify_sbi_requirement_assert();
+
+ snapshot_set_shmem(snapshot_gpa, 0);
+
+ /* Get the counter details */
+ num_counters = get_num_counters();
+ update_counter_info(num_counters);
+
+ /* Validate shared memory access */
+ GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_overflow_mask), 0);
+ for (i = 0; i < num_counters; i++) {
+ if (counter_mask_available & (BIT(i)))
+ GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_values[i]), 0);
+ }
+ /* Only these two events are guranteed to be present */
+ test_pmu_event_snapshot(SBI_PMU_HW_CPU_CYCLES);
+ test_pmu_event_snapshot(SBI_PMU_HW_INSTRUCTIONS);
+
+ GUEST_DONE();
+}
+
+static void test_pmu_events_overflow(void)
+{
+ int num_counters = 0;
+
+ /* Verify presence of SBI PMU and minimum requrired SBI version */
+ verify_sbi_requirement_assert();
+
+ snapshot_set_shmem(snapshot_gpa, 0);
+ csr_set(CSR_IE, BIT(IRQ_PMU_OVF));
+ local_irq_enable();
+
+ /* Get the counter details */
+ num_counters = get_num_counters();
+ update_counter_info(num_counters);
+
+ /*
+ * Qemu supports overflow for cycle/instruction.
+ * This test may fail on any platform that do not support overflow for these two events.
+ */
+ test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1);
+
+ test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2);
+
+ GUEST_DONE();
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ case UCALL_SYNC:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ break;
+ }
+}
+
+void test_vm_destroy(struct kvm_vm *vm)
+{
+ memset(ctrinfo_arr, 0, sizeof(union sbi_pmu_ctr_info) * RISCV_MAX_PMU_COUNTERS);
+ counter_mask_available = 0;
+ kvm_vm_free(vm);
+}
+
+static void test_vm_basic_test(void *guest_code)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+ vm_init_vector_tables(vm);
+ /* Illegal instruction handler is required to verify read access without configuration */
+ vm_install_exception_handler(vm, EXC_INST_ILLEGAL, guest_illegal_exception_handler);
+
+ vcpu_init_vector_tables(vcpu);
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_vm_events_test(void *guest_code)
+{
+ struct kvm_vm *vm = NULL;
+ struct kvm_vcpu *vcpu = NULL;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_vm_setup_snapshot_mem(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ /* PMU Snapshot requires single page only */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, PMU_SNAPSHOT_GPA_BASE, 1, 1, 0);
+ /* PMU_SNAPSHOT_GPA_BASE is identity mapped */
+ virt_map(vm, PMU_SNAPSHOT_GPA_BASE, PMU_SNAPSHOT_GPA_BASE, 1);
+
+ snapshot_gva = (void *)(PMU_SNAPSHOT_GPA_BASE);
+ snapshot_gpa = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)snapshot_gva);
+ sync_global_to_guest(vcpu->vm, snapshot_gva);
+ sync_global_to_guest(vcpu->vm, snapshot_gpa);
+}
+
+static void test_vm_events_snapshot_test(void *guest_code)
+{
+ struct kvm_vm *vm = NULL;
+ struct kvm_vcpu *vcpu;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+
+ test_vm_setup_snapshot_mem(vm, vcpu);
+
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_vm_events_overflow(void *guest_code)
+{
+ struct kvm_vm *vm = NULL;
+ struct kvm_vcpu *vcpu;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+
+ __TEST_REQUIRE(__vcpu_has_isa_ext(vcpu, KVM_RISCV_ISA_EXT_SSCOFPMF),
+ "Sscofpmf is not available, skipping overflow test");
+
+ test_vm_setup_snapshot_mem(vm, vcpu);
+ vm_init_vector_tables(vm);
+ vm_install_interrupt_handler(vm, guest_irq_handler);
+
+ vcpu_init_vector_tables(vcpu);
+ /* Initialize guest timer frequency. */
+ vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency), &timer_freq);
+ sync_global_to_guest(vm, timer_freq);
+
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_print_help(char *name)
+{
+ pr_info("Usage: %s [-h] [-d <test name>]\n", name);
+ pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+ pr_info("\t-h: print this help screen\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hd:")) != -1) {
+ switch (opt) {
+ case 'd':
+ if (!strncmp("basic", optarg, 5))
+ disabled_tests |= SBI_PMU_TEST_BASIC;
+ else if (!strncmp("events", optarg, 6))
+ disabled_tests |= SBI_PMU_TEST_EVENTS;
+ else if (!strncmp("snapshot", optarg, 8))
+ disabled_tests |= SBI_PMU_TEST_SNAPSHOT;
+ else if (!strncmp("overflow", optarg, 8))
+ disabled_tests |= SBI_PMU_TEST_OVERFLOW;
+ else
+ goto done;
+ break;
+ case 'h':
+ default:
+ goto done;
+ }
+ }
+
+ return true;
+done:
+ test_print_help(argv[0]);
+ return false;
+}
+
+int main(int argc, char *argv[])
+{
+ if (!parse_args(argc, argv))
+ exit(KSFT_SKIP);
+
+ if (!(disabled_tests & SBI_PMU_TEST_BASIC)) {
+ test_vm_basic_test(test_pmu_basic_sanity);
+ pr_info("SBI PMU basic test : PASS\n");
+ }
+
+ if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) {
+ test_vm_events_test(test_pmu_events);
+ pr_info("SBI PMU event verification test : PASS\n");
+ }
+
+ if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
+ test_vm_events_snapshot_test(test_pmu_events_snaphost);
+ pr_info("SBI PMU event verification with snapshot test : PASS\n");
+ }
+
+ if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
+ test_vm_events_overflow(test_pmu_events_overflow);
+ pr_info("SBI PMU event verification with overflow test : PASS\n");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index 28f97fb520..e5898678bf 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -1,5 +1,13 @@
// SPDX-License-Identifier: GPL-2.0-only
-#define _GNU_SOURCE /* for program_invocation_short_name */
+
+/*
+ * Include rseq.c without _GNU_SOURCE defined, before including any headers, so
+ * that rseq.c is compiled with its configuration, not KVM selftests' config.
+ */
+#undef _GNU_SOURCE
+#include "../rseq/rseq.c"
+#define _GNU_SOURCE
+
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
@@ -19,8 +27,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "test_util.h"
-
-#include "../rseq/rseq.c"
+#include "ucall_common.h"
/*
* Any bug related to task migration is likely to be timing-dependent; perform
@@ -186,12 +193,35 @@ static void calc_min_max_cpu(void)
"Only one usable CPU, task migration not possible");
}
+static void help(const char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-u]\n", name);
+ printf(" -u: Don't sanity check the number of successful KVM_RUNs\n");
+ puts("");
+ exit(0);
+}
+
int main(int argc, char *argv[])
{
+ bool skip_sanity_check = false;
int r, i, snapshot;
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
u32 cpu, rseq_cpu;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hu")) != -1) {
+ switch (opt) {
+ case 'u':
+ skip_sanity_check = true;
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
@@ -254,9 +284,17 @@ int main(int argc, char *argv[])
* getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a fairly
* conservative ratio on x86-64, which can do _more_ KVM_RUNs than
* migrations given the 1us+ delay in the migration task.
+ *
+ * Another reason why it may have small migration:KVM_RUN ratio is that,
+ * on systems with large low power mode wakeup latency, it may happen
+ * quite often that the scheduler is not able to wake up the target CPU
+ * before the vCPU thread is scheduled to another CPU.
*/
- TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2),
- "Only performed %d KVM_RUNs, task stalled too much?", i);
+ TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2),
+ "Only performed %d KVM_RUNs, task stalled too much?\n\n"
+ " Try disabling deep sleep states to reduce CPU wakeup latency,\n"
+ " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n"
+ " or run with -u to disable this sanity check.", i);
pthread_join(migration_thread, NULL);
diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c
index 626a2b8a20..b390338447 100644
--- a/tools/testing/selftests/kvm/s390x/cmma_test.c
+++ b/tools/testing/selftests/kvm/s390x/cmma_test.c
@@ -7,8 +7,6 @@
* Authors:
* Nico Boehr <nrb@linux.ibm.com>
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -18,6 +16,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "kselftest.h"
+#include "ucall_common.h"
#define MAIN_PAGE_COUNT 512
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index 48cb910e66..f2df7416be 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -15,6 +15,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "kselftest.h"
+#include "ucall_common.h"
enum mop_target {
LOGICAL,
diff --git a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c
new file mode 100644
index 0000000000..bba0d9a6dc
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test shared zeropage handling (with/without storage keys)
+ *
+ * Copyright (C) 2024, Red Hat, Inc.
+ */
+#include <sys/mman.h>
+
+#include <linux/fs.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+
+static void set_storage_key(void *addr, uint8_t skey)
+{
+ asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
+}
+
+static void guest_code(void)
+{
+ /* Issue some storage key instruction. */
+ set_storage_key((void *)0, 0x98);
+ GUEST_DONE();
+}
+
+/*
+ * Returns 1 if the shared zeropage is mapped, 0 if something else is mapped.
+ * Returns < 0 on error or if nothing is mapped.
+ */
+static int maps_shared_zeropage(int pagemap_fd, void *addr)
+{
+ struct page_region region;
+ struct pm_scan_arg arg = {
+ .start = (uintptr_t)addr,
+ .end = (uintptr_t)addr + 4096,
+ .vec = (uintptr_t)&region,
+ .vec_len = 1,
+ .size = sizeof(struct pm_scan_arg),
+ .category_mask = PAGE_IS_PFNZERO,
+ .category_anyof_mask = PAGE_IS_PRESENT,
+ .return_mask = PAGE_IS_PFNZERO,
+ };
+ return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+}
+
+int main(int argc, char *argv[])
+{
+ char *mem, *page0, *page1, *page2, tmp;
+ const size_t pagesize = getpagesize();
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int pagemap_fd;
+
+ ksft_print_header();
+ ksft_set_plan(3);
+
+ /*
+ * We'll use memory that is not mapped into the VM for simplicity.
+ * Shared zeropages are enabled/disabled per-process.
+ */
+ mem = mmap(0, 3 * pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
+ TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
+
+ /* Disable THP. Ignore errors on older kernels. */
+ madvise(mem, 3 * pagesize, MADV_NOHUGEPAGE);
+
+ page0 = mem;
+ page1 = page0 + pagesize;
+ page2 = page1 + pagesize;
+
+ /* Can we even detect shared zeropages? */
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ TEST_REQUIRE(pagemap_fd >= 0);
+
+ tmp = *page0;
+ asm volatile("" : "+r" (tmp));
+ TEST_REQUIRE(maps_shared_zeropage(pagemap_fd, page0) == 1);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /* Verify that we get the shared zeropage after VM creation. */
+ tmp = *page1;
+ asm volatile("" : "+r" (tmp));
+ ksft_test_result(maps_shared_zeropage(pagemap_fd, page1) == 1,
+ "Shared zeropages should be enabled\n");
+
+ /*
+ * Let our VM execute a storage key instruction that should
+ * unshare all shared zeropages.
+ */
+ vcpu_run(vcpu);
+ get_ucall(vcpu, &uc);
+ TEST_ASSERT_EQ(uc.cmd, UCALL_DONE);
+
+ /* Verify that we don't have a shared zeropage anymore. */
+ ksft_test_result(!maps_shared_zeropage(pagemap_fd, page1),
+ "Shared zeropage should be gone\n");
+
+ /* Verify that we don't get any new shared zeropages. */
+ tmp = *page2;
+ asm volatile("" : "+r" (tmp));
+ ksft_test_result(!maps_shared_zeropage(pagemap_fd, page2),
+ "Shared zeropages should be disabled\n");
+
+ kvm_vm_free(vm);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
index 43fb25ddc3..53def355cc 100644
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
@@ -10,8 +10,6 @@
*
* Test expected behavior of the KVM_CAP_SYNC_REGS functionality.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c
index c73f948c9b..7a742a673b 100644
--- a/tools/testing/selftests/kvm/s390x/tprot.c
+++ b/tools/testing/selftests/kvm/s390x/tprot.c
@@ -8,6 +8,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "kselftest.h"
+#include "ucall_common.h"
#define PAGE_SHIFT 12
#define PAGE_SIZE (1 << PAGE_SHIFT)
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index bd57d991e2..bb8002084f 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -1,5 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <pthread.h>
#include <sched.h>
@@ -221,8 +220,20 @@ static void test_move_memory_region(void)
static void guest_code_delete_memory_region(void)
{
+ struct desc_ptr idt;
uint64_t val;
+ /*
+ * Clobber the IDT so that a #PF due to the memory region being deleted
+ * escalates to triple-fault shutdown. Because the memory region is
+ * deleted, there will be no valid mappings. As a result, KVM will
+ * repeatedly intercepts the state-2 page fault that occurs when trying
+ * to vector the guest's #PF. I.e. trying to actually handle the #PF
+ * in the guest will never succeed, and so isn't an option.
+ */
+ memset(&idt, 0, sizeof(idt));
+ __asm__ __volatile__("lidt %0" :: "m"(idt));
+
GUEST_SYNC(0);
/* Spin until the memory region is deleted. */
@@ -339,7 +350,7 @@ static void test_invalid_memory_region_flags(void)
#ifdef __x86_64__
if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
- vm = vm_create_barebones_protected_vm();
+ vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM);
else
#endif
vm = vm_create_barebones();
@@ -462,7 +473,7 @@ static void test_add_private_memory_region(void)
pr_info("Testing ADD of KVM_MEM_GUEST_MEMFD memory regions\n");
- vm = vm_create_barebones_protected_vm();
+ vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM);
test_invalid_guest_memfd(vm, vm->kvm_fd, 0, "KVM fd should fail");
test_invalid_guest_memfd(vm, vm->fd, 0, "VM's fd should fail");
@@ -471,7 +482,7 @@ static void test_add_private_memory_region(void)
test_invalid_guest_memfd(vm, memfd, 0, "Regular memfd() should fail");
close(memfd);
- vm2 = vm_create_barebones_protected_vm();
+ vm2 = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM);
memfd = vm_create_guest_memfd(vm2, MEM_REGION_SIZE, 0);
test_invalid_guest_memfd(vm, memfd, 0, "Other VM's guest_memfd() should fail");
@@ -499,7 +510,7 @@ static void test_add_overlapping_private_memory_regions(void)
pr_info("Testing ADD of overlapping KVM_MEM_GUEST_MEMFD memory regions\n");
- vm = vm_create_barebones_protected_vm();
+ vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM);
memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE * 4, 0);
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index bae0c5026f..a8d3afa0b8 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -4,20 +4,22 @@
*
* Copyright (C) 2020, Red Hat, Inc.
*/
-#define _GNU_SOURCE
#include <stdio.h>
#include <time.h>
#include <sched.h>
#include <pthread.h>
#include <linux/kernel.h>
#include <asm/kvm.h>
-#ifndef __riscv
+#ifdef __riscv
+#include "sbi.h"
+#else
#include <asm/kvm_para.h>
#endif
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "ucall_common.h"
#define NR_VCPUS 4
#define ST_GPA_BASE (1 << 30)
@@ -83,20 +85,18 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
{
struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
- int i;
- pr_info("VCPU%d:\n", vcpu_idx);
- pr_info(" steal: %lld\n", st->steal);
- pr_info(" version: %d\n", st->version);
- pr_info(" flags: %d\n", st->flags);
- pr_info(" preempted: %d\n", st->preempted);
- pr_info(" u8_pad: ");
- for (i = 0; i < 3; ++i)
- pr_info("%d", st->u8_pad[i]);
- pr_info("\n pad: ");
- for (i = 0; i < 11; ++i)
- pr_info("%d", st->pad[i]);
- pr_info("\n");
+ ksft_print_msg("VCPU%d:\n", vcpu_idx);
+ ksft_print_msg(" steal: %lld\n", st->steal);
+ ksft_print_msg(" version: %d\n", st->version);
+ ksft_print_msg(" flags: %d\n", st->flags);
+ ksft_print_msg(" preempted: %d\n", st->preempted);
+ ksft_print_msg(" u8_pad: %d %d %d\n",
+ st->u8_pad[0], st->u8_pad[1], st->u8_pad[2]);
+ ksft_print_msg(" pad: %d %d %d %d %d %d %d %d %d %d %d\n",
+ st->pad[0], st->pad[1], st->pad[2], st->pad[3],
+ st->pad[4], st->pad[5], st->pad[6], st->pad[7],
+ st->pad[8], st->pad[9], st->pad[10]);
}
#elif defined(__aarch64__)
@@ -199,10 +199,10 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
{
struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
- pr_info("VCPU%d:\n", vcpu_idx);
- pr_info(" rev: %d\n", st->rev);
- pr_info(" attr: %d\n", st->attr);
- pr_info(" st_time: %ld\n", st->st_time);
+ ksft_print_msg("VCPU%d:\n", vcpu_idx);
+ ksft_print_msg(" rev: %d\n", st->rev);
+ ksft_print_msg(" attr: %d\n", st->attr);
+ ksft_print_msg(" st_time: %ld\n", st->st_time);
}
#elif defined(__riscv)
@@ -366,7 +366,9 @@ int main(int ac, char **av)
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages);
+ ksft_print_header();
TEST_REQUIRE(is_steal_time_supported(vcpus[0]));
+ ksft_set_plan(NR_VCPUS);
/* Run test on each VCPU */
for (i = 0; i < NR_VCPUS; ++i) {
@@ -407,14 +409,15 @@ int main(int ac, char **av)
run_delay, stolen_time);
if (verbose) {
- pr_info("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld", i,
- guest_stolen_time[i], stolen_time);
- if (stolen_time == run_delay)
- pr_info(" (BONUS: guest test-stolen-time even exactly matches test-run_delay)");
- pr_info("\n");
+ ksft_print_msg("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld%s\n",
+ i, guest_stolen_time[i], stolen_time,
+ stolen_time == run_delay ?
+ " (BONUS: guest test-stolen-time even exactly matches test-run_delay)" : "");
steal_time_dump(vm, i);
}
+ ksft_test_result_pass("vcpu%d\n", i);
}
- return 0;
+ /* Print results and exit() accordingly */
+ ksft_finished();
}
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
index eae521f050..903940c54d 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -6,8 +6,6 @@
*
* Tests for amx #NM exception and save/restore.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -246,8 +244,6 @@ int main(int argc, char *argv[])
vcpu_regs_get(vcpu, &regs1);
/* Register #NM handler */
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
/* amx cfg for guest_code */
diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
index ee3b384b99..2929c067c2 100644
--- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
+++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
@@ -17,6 +17,7 @@
#include "test_util.h"
#include "memstress.h"
#include "guest_modes.h"
+#include "ucall_common.h"
#define VCPUS 2
#define SLOTS 2
diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
index 6c2e5e0ceb..81055476d3 100644
--- a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
+++ b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
@@ -4,12 +4,9 @@
*
* Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
-
#include "flds_emulation.h"
-
#include "test_util.h"
+#include "ucall_common.h"
#define MMIO_GPA 0x700000000
#define MMIO_GVA MMIO_GPA
diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
index f3c2239228..762628f7d4 100644
--- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
@@ -110,8 +110,6 @@ static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk)
{
struct kvm_vm *vm = vcpu->vm;
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
if (disable_quirk)
diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
index df351ae170..10b1b0ba37 100644
--- a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
@@ -2,8 +2,6 @@
/*
* Copyright (C) 2023, Google LLC.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <sys/ioctl.h>
#include "test_util.h"
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 5c27efbf40..4f5881d4ef 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -7,8 +7,6 @@
* This work is licensed under the terms of the GNU GPL, version 2.
*
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
index 4c7257ecd2..e192720bfe 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
@@ -4,7 +4,6 @@
*
* Tests for Enlightened VMCS, including nested guest state.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -258,8 +257,6 @@ int main(int argc, char *argv[])
vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index b923a285e9..068e9c6971 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -156,9 +156,6 @@ static void guest_test_msrs_access(void)
vcpu_init_cpuid(vcpu, prev_cpuid);
}
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
/* TODO: Make this entire test easier to maintain. */
if (stage >= 21)
vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0);
@@ -532,9 +529,6 @@ static void guest_test_hcalls_access(void)
while (true) {
vm = vm_create_with_one_vcpu(&vcpu, guest_hcall);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
/* Hypercall input/output */
hcall_page = vm_vaddr_alloc_pages(vm, 2);
memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
index f1617762c2..22c0c12458 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
@@ -5,8 +5,6 @@
* Copyright (C) 2022, Red Hat, Inc.
*
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <pthread.h>
#include <inttypes.h>
@@ -256,16 +254,13 @@ int main(int argc, char *argv[])
hcall_page = vm_vaddr_alloc_pages(vm, 2);
memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
- vm_init_descriptor_tables(vm);
vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code);
- vcpu_init_descriptor_tables(vcpu[1]);
vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1);
vcpu_set_hv_cpuid(vcpu[1]);
vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code);
- vcpu_init_descriptor_tables(vcpu[2]);
vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2);
vcpu_set_hv_cpuid(vcpu[2]);
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
index c9b18707ed..b987a3d797 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -4,7 +4,6 @@
*
* Tests for Hyper-V extensions to SVM.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
index 05b56095cf..077cd0ec30 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
@@ -5,8 +5,6 @@
* Copyright (C) 2022, Red Hat, Inc.
*
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <asm/barrier.h>
#include <pthread.h>
#include <inttypes.h>
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
index 40cc59f4e6..78878b3a27 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -183,9 +183,6 @@ int main(void)
vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
enter_guest(vcpu);
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
index 853802641e..2b550eff35 100644
--- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
@@ -75,14 +75,12 @@ int main(int argc, char *argv[])
struct ucall uc;
int testcase;
+ TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
while (1) {
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
index 3670331adf..3eb0313ffa 100644
--- a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
@@ -1,6 +1,4 @@
// SPDX-License-Identifier: GPL-2.0-only
-#define _GNU_SOURCE /* for program_invocation_short_name */
-
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
index 17bbb96fc4..e7efb2b35f 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -5,9 +5,6 @@
*
* Copyright (C) 2022, Google LLC.
*/
-
-#define _GNU_SOURCE
-
#include <fcntl.h>
#include <stdint.h>
#include <time.h>
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
index 7cbb409801..caad084b8b 100755
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
@@ -13,10 +13,21 @@ NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_reco
NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms)
HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages)
+# If we're already root, the host might not have sudo.
+if [ $(whoami) == "root" ]; then
+ function do_sudo () {
+ "$@"
+ }
+else
+ function do_sudo () {
+ sudo "$@"
+ }
+fi
+
set +e
function sudo_echo () {
- echo "$1" | sudo tee -a "$2" > /dev/null
+ echo "$1" | do_sudo tee -a "$2" > /dev/null
}
NXECUTABLE="$(dirname $0)/nx_huge_pages_test"
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index 87011965dc..eda88080c1 100644
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -9,8 +9,6 @@
* Verifies expected behavior of controlling guest access to
* MSR_PLATFORM_INFO.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -26,36 +24,18 @@
static void guest_code(void)
{
uint64_t msr_platform_info;
+ uint8_t vector;
- for (;;) {
- msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
- GUEST_SYNC(msr_platform_info);
- asm volatile ("inc %r11");
- }
-}
-
-static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, true);
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ GUEST_SYNC(true);
+ msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
+ GUEST_ASSERT_EQ(msr_platform_info & MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
+ MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
- get_ucall(vcpu, &uc);
- TEST_ASSERT(uc.cmd == UCALL_SYNC,
- "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
- TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
- MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
- "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
- MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
-}
+ GUEST_SYNC(false);
+ vector = rdmsr_safe(MSR_PLATFORM_INFO, &msr_platform_info);
+ GUEST_ASSERT_EQ(vector, GP_VECTOR);
-static void test_msr_platform_info_disabled(struct kvm_vcpu *vcpu)
-{
- vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, false);
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+ GUEST_DONE();
}
int main(int argc, char *argv[])
@@ -63,6 +43,7 @@ int main(int argc, char *argv[])
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
uint64_t msr_platform_info;
+ struct ucall uc;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
@@ -71,8 +52,26 @@ int main(int argc, char *argv[])
msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO);
vcpu_set_msr(vcpu, MSR_PLATFORM_INFO,
msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
- test_msr_platform_info_enabled(vcpu);
- test_msr_platform_info_disabled(vcpu);
+
+ for (;;) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ vm_enable_cap(vm, KVM_CAP_MSR_PLATFORM_INFO, uc.args[1]);
+ break;
+ case UCALL_DONE:
+ goto done;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected ucall %lu", uc.cmd);
+ break;
+ }
+ }
+
+done:
vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info);
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
index 26c85815f7..96446134c0 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
@@ -2,8 +2,6 @@
/*
* Copyright (C) 2023, Tencent, Inc.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <x86intrin.h>
#include "pmu.h"
@@ -21,7 +19,6 @@
static uint8_t kvm_pmu_version;
static bool kvm_has_perf_caps;
-static bool is_forced_emulation_enabled;
static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
void *guest_code,
@@ -31,11 +28,7 @@ static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
struct kvm_vm *vm;
vm = vm_create_with_one_vcpu(vcpu, guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(*vcpu);
-
sync_global_to_guest(vm, kvm_pmu_version);
- sync_global_to_guest(vm, is_forced_emulation_enabled);
/*
* Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
@@ -630,7 +623,6 @@ int main(int argc, char *argv[])
kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
- is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
test_intel_counters();
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index 3c85d1ae98..26b3e7efe5 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -9,9 +9,6 @@
* Verifies the expected behavior of allow lists and deny lists for
* virtual PMU events.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
-
#include "kvm_util.h"
#include "pmu.h"
#include "processor.h"
@@ -337,9 +334,6 @@ static void test_pmu_config_disable(void (*guest_code)(void))
vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE);
vcpu = vm_vcpu_add(vm, 0, guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
TEST_ASSERT(!sanity_check_pmu(vcpu),
"Guest should not be able to use disabled PMU.");
@@ -876,9 +870,6 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
TEST_REQUIRE(sanity_check_pmu(vcpu));
if (use_amd_pmu())
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
index e0f642d2a3..82a8d88b53 100644
--- a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
@@ -2,7 +2,6 @@
/*
* Copyright (C) 2022, Google LLC.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <limits.h>
#include <pthread.h>
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
index 366cf18600..d691d86e5b 100644
--- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
+++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
@@ -4,7 +4,6 @@
*
* Copyright (C) 2020, Red Hat, Inc.
*/
-#define _GNU_SOURCE /* for program_invocation_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
index 3610981d91..c021c0795a 100644
--- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
@@ -10,7 +10,6 @@
* That bug allowed a user-mode program that called the KVM_SET_SREGS
* ioctl to put a VCPU's local APIC into an invalid state.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c
new file mode 100644
index 0000000000..3fb967f40c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm.h>
+#include <linux/psp-sev.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+#define SVM_SEV_FEAT_DEBUG_SWAP 32u
+
+/*
+ * Some features may have hidden dependencies, or may only work
+ * for certain VM types. Err on the side of safety and don't
+ * expect that all supported features can be passed one by one
+ * to KVM_SEV_INIT2.
+ *
+ * (Well, right now there's only one...)
+ */
+#define KNOWN_FEATURES SVM_SEV_FEAT_DEBUG_SWAP
+
+int kvm_fd;
+u64 supported_vmsa_features;
+bool have_sev_es;
+
+static int __sev_ioctl(int vm_fd, int cmd_id, void *data)
+{
+ struct kvm_sev_cmd cmd = {
+ .id = cmd_id,
+ .data = (uint64_t)data,
+ .sev_fd = open_sev_dev_path_or_exit(),
+ };
+ int ret;
+
+ ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
+ TEST_ASSERT(ret < 0 || cmd.error == SEV_RET_SUCCESS,
+ "%d failed: fw error: %d\n",
+ cmd_id, cmd.error);
+
+ return ret;
+}
+
+static void test_init2(unsigned long vm_type, struct kvm_sev_init *init)
+{
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones_type(vm_type);
+ ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
+ TEST_ASSERT(ret == 0,
+ "KVM_SEV_INIT2 return code is %d (expected 0), errno: %d",
+ ret, errno);
+ kvm_vm_free(vm);
+}
+
+static void test_init2_invalid(unsigned long vm_type, struct kvm_sev_init *init, const char *msg)
+{
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones_type(vm_type);
+ ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "KVM_SEV_INIT2 should fail, %s.",
+ msg);
+ kvm_vm_free(vm);
+}
+
+void test_vm_types(void)
+{
+ test_init2(KVM_X86_SEV_VM, &(struct kvm_sev_init){});
+
+ /*
+ * TODO: check that unsupported types cannot be created. Probably
+ * a separate selftest.
+ */
+ if (have_sev_es)
+ test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){});
+
+ test_init2_invalid(0, &(struct kvm_sev_init){},
+ "VM type is KVM_X86_DEFAULT_VM");
+ if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
+ test_init2_invalid(KVM_X86_SW_PROTECTED_VM, &(struct kvm_sev_init){},
+ "VM type is KVM_X86_SW_PROTECTED_VM");
+}
+
+void test_flags(uint32_t vm_type)
+{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ test_init2_invalid(vm_type,
+ &(struct kvm_sev_init){ .flags = BIT(i) },
+ "invalid flag");
+}
+
+void test_features(uint32_t vm_type, uint64_t supported_features)
+{
+ int i;
+
+ for (i = 0; i < 64; i++) {
+ if (!(supported_features & BIT_ULL(i)))
+ test_init2_invalid(vm_type,
+ &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) },
+ "unknown feature");
+ else if (KNOWN_FEATURES & BIT_ULL(i))
+ test_init2(vm_type,
+ &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) });
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int kvm_fd = open_kvm_dev_path_or_exit();
+ bool have_sev;
+
+ TEST_REQUIRE(__kvm_has_device_attr(kvm_fd, KVM_X86_GRP_SEV,
+ KVM_X86_SEV_VMSA_FEATURES) == 0);
+ kvm_device_attr_get(kvm_fd, KVM_X86_GRP_SEV,
+ KVM_X86_SEV_VMSA_FEATURES,
+ &supported_vmsa_features);
+
+ have_sev = kvm_cpu_has(X86_FEATURE_SEV);
+ TEST_ASSERT(have_sev == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)),
+ "sev: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
+ kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM);
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM));
+ have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
+
+ TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)),
+ "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
+ kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
+
+ test_vm_types();
+
+ test_flags(KVM_X86_SEV_VM);
+ if (have_sev_es)
+ test_flags(KVM_X86_SEV_ES_VM);
+
+ test_features(KVM_X86_SEV_VM, 0);
+ if (have_sev_es)
+ test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
index 026779f3ed..7c70c0da4f 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
@@ -4,6 +4,7 @@
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
+#include <math.h>
#include "test_util.h"
#include "kvm_util.h"
@@ -13,6 +14,8 @@
#include "sev.h"
+#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
+
static void guest_sev_es_code(void)
{
/* TODO: Check CPUID after GHCB-based hypercall support is added. */
@@ -35,13 +38,98 @@ static void guest_sev_code(void)
GUEST_DONE();
}
+/* Stash state passed via VMSA before any compiled code runs. */
+extern void guest_code_xsave(void);
+asm("guest_code_xsave:\n"
+ "mov $-1, %eax\n"
+ "mov $-1, %edx\n"
+ "xsave (%rdi)\n"
+ "jmp guest_sev_es_code");
+
+static void compare_xsave(u8 *from_host, u8 *from_guest)
+{
+ int i;
+ bool bad = false;
+ for (i = 0; i < 4095; i++) {
+ if (from_host[i] != from_guest[i]) {
+ printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]);
+ bad = true;
+ }
+ }
+
+ if (bad)
+ abort();
+}
+
+static void test_sync_vmsa(uint32_t policy)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t gva;
+ void *hva;
+
+ double x87val = M_PI;
+ struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 };
+ struct kvm_sregs sregs;
+ struct kvm_xcrs xcrs = {
+ .nr_xcrs = 1,
+ .xcrs[0].xcr = 0,
+ .xcrs[0].value = XFEATURE_MASK_X87_AVX,
+ };
+
+ vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu);
+ gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
+ MEM_REGION_TEST_DATA);
+ hva = addr_gva2hva(vm, gva);
+
+ vcpu_args_set(vcpu, 1, gva);
+
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXSAVE;
+ vcpu_sregs_set(vcpu, &sregs);
+
+ vcpu_xcrs_set(vcpu, &xcrs);
+ asm("fninit\n"
+ "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n"
+ "fldl %3\n"
+ "xsave (%2)\n"
+ "fstp %%st\n"
+ : "=m"(xsave)
+ : "A"(XFEATURE_MASK_X87_AVX), "r"(&xsave), "m" (x87val)
+ : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
+ vcpu_xsave_set(vcpu, &xsave);
+
+ vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL);
+
+ /* This page is shared, so make it decrypted. */
+ memset(hva, 0, 4096);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
+ "Wanted SYSTEM_EVENT, got %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
+ TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
+ TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
+
+ compare_xsave((u8 *)&xsave, (u8 *)hva);
+
+ kvm_vm_free(vm);
+}
+
static void test_sev(void *guest_code, uint64_t policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
- vm = vm_sev_create_with_one_vcpu(policy, guest_code, &vcpu);
+ uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
+
+ vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu);
+
+ /* TODO: Validate the measurement is as expected. */
+ vm_sev_launch(vm, policy, NULL);
for (;;) {
vcpu_run(vcpu);
@@ -82,6 +170,12 @@ int main(int argc, char *argv[])
if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
test_sev(guest_sev_es_code, SEV_POLICY_ES);
+
+ if (kvm_has_cap(KVM_CAP_XCRS) &&
+ (xgetbv(0) & XFEATURE_MASK_X87_AVX) == XFEATURE_MASK_X87_AVX) {
+ test_sync_vmsa(0);
+ test_sync_vmsa(SEV_POLICY_NO_DBG);
+ }
}
return 0;
diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
index 416207c38a..fabeeaddfb 100644
--- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
@@ -5,9 +5,6 @@
* Test that KVM emulates instructions in response to EPT violations when
* allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
-
#include "flds_emulation.h"
#include "test_util.h"
@@ -60,9 +57,6 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled());
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR);
rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index e18b86666e..55c88d664a 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -4,7 +4,6 @@
*
* Tests for SMM.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 88b58aab72..1c756db329 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -6,7 +6,6 @@
*
* Tests for vCPU state save/restore, including nested guest state.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
index 32bef39bec..916e04248f 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
@@ -93,9 +93,6 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
index d6fcdcc3af..00135cbba3 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
@@ -48,12 +48,9 @@ int main(int argc, char *argv[])
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
vcpu_alloc_svm(vm, &svm_gva);
- vcpu_args_set(vcpu, 2, svm_gva, vm->idt);
+ vcpu_args_set(vcpu, 2, svm_gva, vm->arch.idt);
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
index 0c7ce3d4e8..7b6481d6c0 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
@@ -152,9 +152,6 @@ static void run_test(bool is_nmi)
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler);
vm_install_exception_handler(vm, INT_NR, guest_int_handler);
@@ -166,7 +163,7 @@ static void run_test(bool is_nmi)
idt_alt_vm = vm_vaddr_alloc_page(vm);
idt_alt = addr_gva2hva(vm, idt_alt_vm);
- idt = addr_gva2hva(vm, vm->idt);
+ idt = addr_gva2hva(vm, vm->arch.idt);
memcpy(idt_alt, idt, getpagesize());
} else {
idt_alt_vm = 0;
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index adb5593daf..8fa3948b01 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -8,8 +8,6 @@
* including requesting an invalid register set, updates to/from values
* in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
index dcbb3c29fb..57f157c06b 100644
--- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
+++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
@@ -17,14 +17,11 @@
* delivered into the guest or not.
*
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <pthread.h>
#include <inttypes.h>
#include <string.h>
#include <time.h>
-#include "kvm_util_base.h"
#include "kvm_util.h"
#include "mce.h"
#include "processor.h"
@@ -285,10 +282,6 @@ int main(int argc, char *argv[])
cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code);
cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(ucna_vcpu);
- vcpu_init_descriptor_tables(cmcidis_vcpu);
- vcpu_init_descriptor_tables(cmci_vcpu);
vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler);
vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
index f4f61a2d24..32b2794b78 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -4,8 +4,6 @@
*
* Tests for exiting into userspace on registered MSRs
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <sys/ioctl.h>
#include "kvm_test_harness.h"
@@ -13,8 +11,6 @@
#include "kvm_util.h"
#include "vmx.h"
-static bool fep_available;
-
#define MSR_NON_EXISTENT 0x474f4f00
static u64 deny_bits = 0;
@@ -258,7 +254,7 @@ static void guest_code_filter_allow(void)
GUEST_ASSERT(data == 2);
GUEST_ASSERT(guest_exception_count == 0);
- if (fep_available) {
+ if (is_forced_emulation_enabled) {
/* Let userspace know we aren't done. */
GUEST_SYNC(0);
@@ -520,8 +516,6 @@ KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
uint64_t cmd;
int rc;
- sync_global_to_guest(vm, fep_available);
-
rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
@@ -531,9 +525,6 @@ KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
/* Process guest code userspace exits. */
@@ -551,7 +542,7 @@ KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
vcpu_run(vcpu);
cmd = process_ucall(vcpu);
- if (fep_available) {
+ if (is_forced_emulation_enabled) {
TEST_ASSERT_EQ(cmd, UCALL_SYNC);
vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
@@ -774,7 +765,5 @@ KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL)
int main(int argc, char *argv[])
{
- fep_available = kvm_is_forced_emulation_enabled();
-
return test_harness_run(argc, argv);
}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index 977948fd52..fa512d0332 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -4,9 +4,6 @@
*
* Copyright (C) 2018, Red Hat, Inc.
*/
-
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include <stdio.h>
#include <stdlib.h>
#include <linux/bitmap.h>
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
index fad3634fd9..3fd6eceab4 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
@@ -115,9 +115,6 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
get_set_sigalrm_vcpu(vcpu);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
/*
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
index ea0cb3cae0..7c92536551 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
@@ -10,7 +10,6 @@
* and check it can be retrieved with KVM_GET_MSR, also test
* the invalid LBR formats are rejected.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <sys/ioctl.h>
#include <linux/bitmap.h>
@@ -86,9 +85,6 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code)
struct ucall uc;
int r, i;
- vm_init_descriptor_tables(vcpu->vm);
- vcpu_init_descriptor_tables(vcpu);
-
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
vcpu_args_set(vcpu, 1, host_cap.capabilities);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
index affc328001..00dd2ac07a 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -9,7 +9,6 @@
* value instead of partially decayed timer value
*
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
index 725c206ba0..a76078a08f 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -19,8 +19,6 @@
* Migration is a command line option. When used on non-numa machines will
* exit with error. Test is still usefull on non-numa for testing IPIs.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <getopt.h>
#include <pthread.h>
#include <inttypes.h>
@@ -410,8 +408,6 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&params[0].vcpu, halter_guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(params[0].vcpu);
vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
index ab75b873a4..69849acd95 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
@@ -1,5 +1,4 @@
// SPDX-License-Identifier: GPL-2.0-only
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
index 25a0b0db5c..95ce192d07 100644
--- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
@@ -109,9 +109,6 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
run = vcpu->run;
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
while (1) {
vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index d2ea0435f4..a59b3c799b 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -125,7 +125,7 @@ struct compat_vcpu_runstate_info {
uint32_t state;
uint64_t state_entry_time;
uint64_t time[5];
-} __attribute__((__packed__));;
+} __attribute__((__packed__));
struct arch_vcpu_info {
unsigned long cr2;
@@ -171,8 +171,9 @@ static volatile bool guest_saw_irq;
static void evtchn_handler(struct ex_regs *regs)
{
struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
- vi->evtchn_upcall_pending = 0;
- vi->evtchn_pending_sel = 0;
+
+ vcpu_arch_put_guest(vi->evtchn_upcall_pending, 0);
+ vcpu_arch_put_guest(vi->evtchn_pending_sel, 0);
guest_saw_irq = true;
GUEST_SYNC(TEST_GUEST_SAW_IRQ);
@@ -380,20 +381,6 @@ wait_for_timer:
GUEST_SYNC(TEST_DONE);
}
-static int cmp_timespec(struct timespec *a, struct timespec *b)
-{
- if (a->tv_sec > b->tv_sec)
- return 1;
- else if (a->tv_sec < b->tv_sec)
- return -1;
- else if (a->tv_nsec > b->tv_nsec)
- return 1;
- else if (a->tv_nsec < b->tv_nsec)
- return -1;
- else
- return 0;
-}
-
static struct shared_info *shinfo;
static struct vcpu_info *vinfo;
static struct kvm_vcpu *vcpu;
@@ -449,7 +436,6 @@ static void *juggle_shinfo_state(void *arg)
int main(int argc, char *argv[])
{
- struct timespec min_ts, max_ts, vm_ts;
struct kvm_xen_hvm_attr evt_reset;
struct kvm_vm *vm;
pthread_t thread;
@@ -468,8 +454,6 @@ int main(int argc, char *argv[])
bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA);
- clock_gettime(CLOCK_REALTIME, &min_ts);
-
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
/* Map a region for the shared_info page */
@@ -553,8 +537,6 @@ int main(int argc, char *argv[])
};
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
if (do_runstate_tests) {
@@ -1010,7 +992,6 @@ int main(int argc, char *argv[])
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset);
alarm(0);
- clock_gettime(CLOCK_REALTIME, &max_ts);
/*
* Just a *really* basic check that things are being put in the
@@ -1019,6 +1000,8 @@ int main(int argc, char *argv[])
*/
struct pvclock_wall_clock *wc;
struct pvclock_vcpu_time_info *ti, *ti2;
+ struct kvm_clock_data kcdata;
+ long long delta;
wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
@@ -1034,12 +1017,34 @@ int main(int argc, char *argv[])
ti2->tsc_shift, ti2->flags);
}
- vm_ts.tv_sec = wc->sec;
- vm_ts.tv_nsec = wc->nsec;
TEST_ASSERT(wc->version && !(wc->version & 1),
"Bad wallclock version %x", wc->version);
- TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
- TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
+
+ vm_ioctl(vm, KVM_GET_CLOCK, &kcdata);
+
+ if (kcdata.flags & KVM_CLOCK_REALTIME) {
+ if (verbose) {
+ printf("KVM_GET_CLOCK clock: %lld.%09lld\n",
+ kcdata.clock / NSEC_PER_SEC, kcdata.clock % NSEC_PER_SEC);
+ printf("KVM_GET_CLOCK realtime: %lld.%09lld\n",
+ kcdata.realtime / NSEC_PER_SEC, kcdata.realtime % NSEC_PER_SEC);
+ }
+
+ delta = (wc->sec * NSEC_PER_SEC + wc->nsec) - (kcdata.realtime - kcdata.clock);
+
+ /*
+ * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but
+ * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s
+ * delta as testing clock accuracy is not the goal here. The test just needs to
+ * check that the value in shinfo is somewhat sane.
+ */
+ TEST_ASSERT(llabs(delta) < NSEC_PER_SEC,
+ "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld",
+ wc->sec, wc->nsec, (kcdata.realtime - kcdata.clock) / NSEC_PER_SEC,
+ (kcdata.realtime - kcdata.clock) % NSEC_PER_SEC);
+ } else {
+ pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n");
+ }
TEST_ASSERT(ti->version && !(ti->version & 1),
"Bad time_info version %x", ti->version);
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
index 167c97abff..f331a4e9ba 100644
--- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
@@ -4,8 +4,6 @@
*
* Tests for the IA32_XSS MSR.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <sys/ioctl.h>
#include "test_util.h"
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index a6f89aaea7..3b26bf3cf5 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -9,6 +9,7 @@
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
+#include <linux/keyctl.h>
#include <linux/landlock.h>
#include <string.h>
#include <sys/prctl.h>
@@ -75,7 +76,7 @@ TEST(abi_version)
const struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
};
- ASSERT_EQ(4, landlock_create_ruleset(NULL, 0,
+ ASSERT_EQ(5, landlock_create_ruleset(NULL, 0,
LANDLOCK_CREATE_RULESET_VERSION));
ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
@@ -326,4 +327,77 @@ TEST(ruleset_fd_transfer)
ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
}
+TEST(cred_transfer)
+{
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+ };
+ int ruleset_fd, dir_fd;
+ pid_t child;
+ int status;
+
+ drop_caps(_metadata);
+
+ dir_fd = open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+ EXPECT_LE(0, dir_fd);
+ EXPECT_EQ(0, close(dir_fd));
+
+ /* Denies opening directories. */
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* Checks ruleset enforcement. */
+ EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+ EXPECT_EQ(EACCES, errno);
+
+ /* Needed for KEYCTL_SESSION_TO_PARENT permission checks */
+ EXPECT_NE(-1, syscall(__NR_keyctl, KEYCTL_JOIN_SESSION_KEYRING, NULL, 0,
+ 0, 0))
+ {
+ TH_LOG("Failed to join session keyring: %s", strerror(errno));
+ }
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ /* Checks ruleset enforcement. */
+ EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+ EXPECT_EQ(EACCES, errno);
+
+ /*
+ * KEYCTL_SESSION_TO_PARENT is a no-op unless we have a
+ * different session keyring in the child, so make that happen.
+ */
+ EXPECT_NE(-1, syscall(__NR_keyctl, KEYCTL_JOIN_SESSION_KEYRING,
+ NULL, 0, 0, 0));
+
+ /*
+ * KEYCTL_SESSION_TO_PARENT installs credentials on the parent
+ * that never go through the cred_prepare hook, this path uses
+ * cred_transfer instead.
+ */
+ EXPECT_EQ(0, syscall(__NR_keyctl, KEYCTL_SESSION_TO_PARENT, 0,
+ 0, 0, 0));
+
+ /* Re-checks ruleset enforcement. */
+ EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+ EXPECT_EQ(EACCES, errno);
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /* Re-checks ruleset enforcement. */
+ EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+ EXPECT_EQ(EACCES, errno);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config
index 0086efaa7b..29af19c4e9 100644
--- a/tools/testing/selftests/landlock/config
+++ b/tools/testing/selftests/landlock/config
@@ -2,6 +2,7 @@ CONFIG_CGROUPS=y
CONFIG_CGROUP_SCHED=y
CONFIG_INET=y
CONFIG_IPV6=y
+CONFIG_KEYS=y
CONFIG_NET=y
CONFIG_NET_NS=y
CONFIG_OVERLAY_FS=y
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 27744524df..7d063c652b 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -8,22 +8,35 @@
*/
#define _GNU_SOURCE
+#include <asm/termbits.h>
#include <fcntl.h>
#include <libgen.h>
+#include <linux/fiemap.h>
#include <linux/landlock.h>
#include <linux/magic.h>
#include <sched.h>
+#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <sys/capability.h>
+#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/sendfile.h>
+#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
+#include <sys/un.h>
#include <sys/vfs.h>
#include <unistd.h>
+/*
+ * Intentionally included last to work around header conflict.
+ * See https://sourceware.org/glibc/wiki/Synchronizing_Headers.
+ */
+#include <linux/fs.h>
+#include <linux/mount.h>
+
#include "common.h"
#ifndef renameat2
@@ -35,6 +48,13 @@ int renameat2(int olddirfd, const char *oldpath, int newdirfd,
}
#endif
+#ifndef open_tree
+int open_tree(int dfd, const char *filename, unsigned int flags)
+{
+ return syscall(__NR_open_tree, dfd, filename, flags);
+}
+#endif
+
#ifndef RENAME_EXCHANGE
#define RENAME_EXCHANGE (1 << 1)
#endif
@@ -536,9 +556,10 @@ TEST_F_FORK(layout1, inval)
LANDLOCK_ACCESS_FS_EXECUTE | \
LANDLOCK_ACCESS_FS_WRITE_FILE | \
LANDLOCK_ACCESS_FS_READ_FILE | \
- LANDLOCK_ACCESS_FS_TRUNCATE)
+ LANDLOCK_ACCESS_FS_TRUNCATE | \
+ LANDLOCK_ACCESS_FS_IOCTL_DEV)
-#define ACCESS_LAST LANDLOCK_ACCESS_FS_TRUNCATE
+#define ACCESS_LAST LANDLOCK_ACCESS_FS_IOCTL_DEV
#define ACCESS_ALL ( \
ACCESS_FILE | \
@@ -743,6 +764,9 @@ static int create_ruleset(struct __test_metadata *const _metadata,
}
for (i = 0; rules[i].path; i++) {
+ if (!rules[i].access)
+ continue;
+
add_path_beneath(_metadata, ruleset_fd, rules[i].access,
rules[i].path);
}
@@ -2384,6 +2408,43 @@ TEST_F_FORK(layout1, refer_denied_by_default4)
layer_dir_s1d1_refer);
}
+/*
+ * Tests walking through a denied root mount.
+ */
+TEST_F_FORK(layout1, refer_mount_root_deny)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_DIR,
+ };
+ int root_fd, ruleset_fd;
+
+ /* Creates a mount object from a non-mount point. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ root_fd =
+ open_tree(AT_FDCWD, dir_s1d1,
+ AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_LE(0, root_fd);
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* Link denied by Landlock: EACCES. */
+ EXPECT_EQ(-1, linkat(root_fd, ".", root_fd, "does_not_exist", 0));
+ EXPECT_EQ(EACCES, errno);
+
+ /* renameat2() always returns EBUSY. */
+ EXPECT_EQ(-1, renameat2(root_fd, ".", root_fd, "does_not_exist", 0));
+ EXPECT_EQ(EBUSY, errno);
+
+ EXPECT_EQ(0, close(root_fd));
+}
+
TEST_F_FORK(layout1, reparent_link)
{
const struct rule layer1[] = {
@@ -3451,7 +3512,7 @@ TEST_F_FORK(layout1, truncate_unhandled)
LANDLOCK_ACCESS_FS_WRITE_FILE;
int ruleset_fd;
- /* Enable Landlock. */
+ /* Enables Landlock. */
ruleset_fd = create_ruleset(_metadata, handled, rules);
ASSERT_LE(0, ruleset_fd);
@@ -3534,7 +3595,7 @@ TEST_F_FORK(layout1, truncate)
LANDLOCK_ACCESS_FS_TRUNCATE;
int ruleset_fd;
- /* Enable Landlock. */
+ /* Enables Landlock. */
ruleset_fd = create_ruleset(_metadata, handled, rules);
ASSERT_LE(0, ruleset_fd);
@@ -3760,7 +3821,7 @@ TEST_F_FORK(ftruncate, open_and_ftruncate)
};
int fd, ruleset_fd;
- /* Enable Landlock. */
+ /* Enables Landlock. */
ruleset_fd = create_ruleset(_metadata, variant->handled, rules);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
@@ -3837,20 +3898,469 @@ TEST_F_FORK(ftruncate, open_and_ftruncate_in_different_processes)
ASSERT_EQ(0, close(socket_fds[1]));
}
-TEST(memfd_ftruncate)
+/* Invokes the FS_IOC_GETFLAGS IOCTL and returns its errno or 0. */
+static int test_fs_ioc_getflags_ioctl(int fd)
{
- int fd;
+ uint32_t flags;
+
+ if (ioctl(fd, FS_IOC_GETFLAGS, &flags) < 0)
+ return errno;
+ return 0;
+}
+
+TEST(memfd_ftruncate_and_ioctl)
+{
+ const struct landlock_ruleset_attr attr = {
+ .handled_access_fs = ACCESS_ALL,
+ };
+ int ruleset_fd, fd, i;
+
+ /*
+ * We exercise the same test both with and without Landlock enabled, to
+ * ensure that it behaves the same in both cases.
+ */
+ for (i = 0; i < 2; i++) {
+ /* Creates a new memfd. */
+ fd = memfd_create("name", MFD_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ /*
+ * Checks that operations associated with the opened file
+ * (ftruncate, ioctl) are permitted on file descriptors that are
+ * created in ways other than open(2).
+ */
+ EXPECT_EQ(0, test_ftruncate(fd));
+ EXPECT_EQ(0, test_fs_ioc_getflags_ioctl(fd));
+
+ ASSERT_EQ(0, close(fd));
+
+ /* Enables Landlock. */
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+ }
+}
+
+static int test_fionread_ioctl(int fd)
+{
+ size_t sz = 0;
+
+ if (ioctl(fd, FIONREAD, &sz) < 0 && errno == EACCES)
+ return errno;
+ return 0;
+}
+
+TEST_F_FORK(layout1, o_path_ftruncate_and_ioctl)
+{
+ const struct landlock_ruleset_attr attr = {
+ .handled_access_fs = ACCESS_ALL,
+ };
+ int ruleset_fd, fd;
+
+ /*
+ * Checks that for files opened with O_PATH, both ioctl(2) and
+ * ftruncate(2) yield EBADF, as it is documented in open(2) for the
+ * O_PATH flag.
+ */
+ fd = open(dir_s1d1, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ EXPECT_EQ(EBADF, test_ftruncate(fd));
+ EXPECT_EQ(EBADF, test_fs_ioc_getflags_ioctl(fd));
+
+ ASSERT_EQ(0, close(fd));
+
+ /* Enables Landlock. */
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Checks that after enabling Landlock,
+ * - the file can still be opened with O_PATH
+ * - both ioctl and truncate still yield EBADF (not EACCES).
+ */
+ fd = open(dir_s1d1, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ EXPECT_EQ(EBADF, test_ftruncate(fd));
+ EXPECT_EQ(EBADF, test_fs_ioc_getflags_ioctl(fd));
+
+ ASSERT_EQ(0, close(fd));
+}
+
+/*
+ * ioctl_error - generically call the given ioctl with a pointer to a
+ * sufficiently large zeroed-out memory region.
+ *
+ * Returns the IOCTLs error, or 0.
+ */
+static int ioctl_error(struct __test_metadata *const _metadata, int fd,
+ unsigned int cmd)
+{
+ char buf[128]; /* sufficiently large */
+ int res, stdinbak_fd;
+
+ /*
+ * Depending on the IOCTL command, parts of the zeroed-out buffer might
+ * be interpreted as file descriptor numbers. We do not want to
+ * accidentally operate on file descriptor 0 (stdin), so we temporarily
+ * move stdin to a different FD and close FD 0 for the IOCTL call.
+ */
+ stdinbak_fd = dup(0);
+ ASSERT_LT(0, stdinbak_fd);
+ ASSERT_EQ(0, close(0));
+
+ /* Invokes the IOCTL with a zeroed-out buffer. */
+ bzero(&buf, sizeof(buf));
+ res = ioctl(fd, cmd, &buf);
+
+ /* Restores the old FD 0 and closes the backup FD. */
+ ASSERT_EQ(0, dup2(stdinbak_fd, 0));
+ ASSERT_EQ(0, close(stdinbak_fd));
+
+ if (res < 0)
+ return errno;
+
+ return 0;
+}
+
+/* Define some linux/falloc.h IOCTL commands which are not available in uapi headers. */
+struct space_resv {
+ __s16 l_type;
+ __s16 l_whence;
+ __s64 l_start;
+ __s64 l_len; /* len == 0 means until end of file */
+ __s32 l_sysid;
+ __u32 l_pid;
+ __s32 l_pad[4]; /* reserved area */
+};
+
+#define FS_IOC_RESVSP _IOW('X', 40, struct space_resv)
+#define FS_IOC_UNRESVSP _IOW('X', 41, struct space_resv)
+#define FS_IOC_RESVSP64 _IOW('X', 42, struct space_resv)
+#define FS_IOC_UNRESVSP64 _IOW('X', 43, struct space_resv)
+#define FS_IOC_ZERO_RANGE _IOW('X', 57, struct space_resv)
+
+/*
+ * Tests a series of blanket-permitted and denied IOCTLs.
+ */
+TEST_F_FORK(layout1, blanket_permitted_ioctls)
+{
+ const struct landlock_ruleset_attr attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ };
+ int ruleset_fd, fd;
+
+ /* Enables Landlock. */
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
- fd = memfd_create("name", MFD_CLOEXEC);
+ fd = open("/dev/null", O_RDWR | O_CLOEXEC);
ASSERT_LE(0, fd);
/*
- * Checks that ftruncate is permitted on file descriptors that are
- * created in ways other than open(2).
+ * Checks permitted commands.
+ * These ones may return errors, but should not be blocked by Landlock.
*/
- EXPECT_EQ(0, test_ftruncate(fd));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIOCLEX));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIONCLEX));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIONBIO));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIOASYNC));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIOQSIZE));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIFREEZE));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FITHAW));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FS_IOC_FIEMAP));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIGETBSZ));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FICLONE));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FICLONERANGE));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIDEDUPERANGE));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FS_IOC_GETFSUUID));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FS_IOC_GETFSSYSFSPATH));
+
+ /*
+ * Checks blocked commands.
+ * A call to a blocked IOCTL command always returns EACCES.
+ */
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FIONREAD));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_GETFLAGS));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_SETFLAGS));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_FSGETXATTR));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_FSSETXATTR));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FIBMAP));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_RESVSP));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_RESVSP64));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_UNRESVSP));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_UNRESVSP64));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_ZERO_RANGE));
+
+ /* Default case is also blocked. */
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, 0xc00ffeee));
+
+ ASSERT_EQ(0, close(fd));
+}
+
+/*
+ * Named pipes are not governed by the LANDLOCK_ACCESS_FS_IOCTL_DEV right,
+ * because they are not character or block devices.
+ */
+TEST_F_FORK(layout1, named_pipe_ioctl)
+{
+ pid_t child_pid;
+ int fd, ruleset_fd;
+ const char *const path = file1_s1d1;
+ const struct landlock_ruleset_attr attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ };
+
+ ASSERT_EQ(0, unlink(path));
+ ASSERT_EQ(0, mkfifo(path, 0600));
+
+ /* Enables Landlock. */
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* The child process opens the pipe for writing. */
+ child_pid = fork();
+ ASSERT_NE(-1, child_pid);
+ if (child_pid == 0) {
+ fd = open(path, O_WRONLY);
+ close(fd);
+ exit(0);
+ }
+
+ fd = open(path, O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ /* FIONREAD is implemented by pipefifo_fops. */
+ EXPECT_EQ(0, test_fionread_ioctl(fd));
ASSERT_EQ(0, close(fd));
+ ASSERT_EQ(0, unlink(path));
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, NULL, 0));
+}
+
+/* For named UNIX domain sockets, no IOCTL restrictions apply. */
+TEST_F_FORK(layout1, named_unix_domain_socket_ioctl)
+{
+ const char *const path = file1_s1d1;
+ int srv_fd, cli_fd, ruleset_fd;
+ socklen_t size;
+ struct sockaddr_un srv_un, cli_un;
+ const struct landlock_ruleset_attr attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ };
+
+ /* Sets up a server */
+ srv_un.sun_family = AF_UNIX;
+ strncpy(srv_un.sun_path, path, sizeof(srv_un.sun_path));
+
+ ASSERT_EQ(0, unlink(path));
+ srv_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, srv_fd);
+
+ size = offsetof(struct sockaddr_un, sun_path) + strlen(srv_un.sun_path);
+ ASSERT_EQ(0, bind(srv_fd, (struct sockaddr *)&srv_un, size));
+ ASSERT_EQ(0, listen(srv_fd, 10 /* qlen */));
+
+ /* Enables Landlock. */
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Sets up a client connection to it */
+ cli_un.sun_family = AF_UNIX;
+ cli_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, cli_fd);
+
+ size = offsetof(struct sockaddr_un, sun_path) + strlen(cli_un.sun_path);
+ ASSERT_EQ(0, bind(cli_fd, (struct sockaddr *)&cli_un, size));
+
+ bzero(&cli_un, sizeof(cli_un));
+ cli_un.sun_family = AF_UNIX;
+ strncpy(cli_un.sun_path, path, sizeof(cli_un.sun_path));
+ size = offsetof(struct sockaddr_un, sun_path) + strlen(cli_un.sun_path);
+
+ ASSERT_EQ(0, connect(cli_fd, (struct sockaddr *)&cli_un, size));
+
+ /* FIONREAD and other IOCTLs should not be forbidden. */
+ EXPECT_EQ(0, test_fionread_ioctl(cli_fd));
+
+ ASSERT_EQ(0, close(cli_fd));
+}
+
+/* clang-format off */
+FIXTURE(ioctl) {};
+
+FIXTURE_SETUP(ioctl) {};
+
+FIXTURE_TEARDOWN(ioctl) {};
+/* clang-format on */
+
+FIXTURE_VARIANT(ioctl)
+{
+ const __u64 handled;
+ const __u64 allowed;
+ const mode_t open_mode;
+ /*
+ * FIONREAD is used as a characteristic device-specific IOCTL command.
+ * It is implemented in fs/ioctl.c for regular files,
+ * but we do not blanket-permit it for devices.
+ */
+ const int expected_fionread_result;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ioctl, handled_i_allowed_none) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ .allowed = 0,
+ .open_mode = O_RDWR,
+ .expected_fionread_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ioctl, handled_i_allowed_i) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ .allowed = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ .open_mode = O_RDWR,
+ .expected_fionread_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ioctl, unhandled) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_EXECUTE,
+ .allowed = LANDLOCK_ACCESS_FS_EXECUTE,
+ .open_mode = O_RDWR,
+ .expected_fionread_result = 0,
+};
+
+TEST_F_FORK(ioctl, handle_dir_access_file)
+{
+ const int flag = 0;
+ const struct rule rules[] = {
+ {
+ .path = "/dev",
+ .access = variant->allowed,
+ },
+ {},
+ };
+ int file_fd, ruleset_fd;
+
+ /* Enables Landlock. */
+ ruleset_fd = create_ruleset(_metadata, variant->handled, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ file_fd = open("/dev/zero", variant->open_mode);
+ ASSERT_LE(0, file_fd);
+
+ /* Checks that IOCTL commands return the expected errors. */
+ EXPECT_EQ(variant->expected_fionread_result,
+ test_fionread_ioctl(file_fd));
+
+ /* Checks that unrestrictable commands are unrestricted. */
+ EXPECT_EQ(0, ioctl(file_fd, FIOCLEX));
+ EXPECT_EQ(0, ioctl(file_fd, FIONCLEX));
+ EXPECT_EQ(0, ioctl(file_fd, FIONBIO, &flag));
+ EXPECT_EQ(0, ioctl(file_fd, FIOASYNC, &flag));
+ EXPECT_EQ(0, ioctl(file_fd, FIGETBSZ, &flag));
+
+ ASSERT_EQ(0, close(file_fd));
+}
+
+TEST_F_FORK(ioctl, handle_dir_access_dir)
+{
+ const int flag = 0;
+ const struct rule rules[] = {
+ {
+ .path = "/dev",
+ .access = variant->allowed,
+ },
+ {},
+ };
+ int dir_fd, ruleset_fd;
+
+ /* Enables Landlock. */
+ ruleset_fd = create_ruleset(_metadata, variant->handled, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Ignore variant->open_mode for this test, as we intend to open a
+ * directory. If the directory can not be opened, the variant is
+ * infeasible to test with an opened directory.
+ */
+ dir_fd = open("/dev", O_RDONLY);
+ if (dir_fd < 0)
+ return;
+
+ /*
+ * Checks that IOCTL commands return the expected errors.
+ * We do not use the expected values from the fixture here.
+ *
+ * When using IOCTL on a directory, no Landlock restrictions apply.
+ */
+ EXPECT_EQ(0, test_fionread_ioctl(dir_fd));
+
+ /* Checks that unrestrictable commands are unrestricted. */
+ EXPECT_EQ(0, ioctl(dir_fd, FIOCLEX));
+ EXPECT_EQ(0, ioctl(dir_fd, FIONCLEX));
+ EXPECT_EQ(0, ioctl(dir_fd, FIONBIO, &flag));
+ EXPECT_EQ(0, ioctl(dir_fd, FIOASYNC, &flag));
+ EXPECT_EQ(0, ioctl(dir_fd, FIGETBSZ, &flag));
+
+ ASSERT_EQ(0, close(dir_fd));
+}
+
+TEST_F_FORK(ioctl, handle_file_access_file)
+{
+ const int flag = 0;
+ const struct rule rules[] = {
+ {
+ .path = "/dev/zero",
+ .access = variant->allowed,
+ },
+ {},
+ };
+ int file_fd, ruleset_fd;
+
+ /* Enables Landlock. */
+ ruleset_fd = create_ruleset(_metadata, variant->handled, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ file_fd = open("/dev/zero", variant->open_mode);
+ ASSERT_LE(0, file_fd)
+ {
+ TH_LOG("Failed to open /dev/zero: %s", strerror(errno));
+ }
+
+ /* Checks that IOCTL commands return the expected errors. */
+ EXPECT_EQ(variant->expected_fionread_result,
+ test_fionread_ioctl(file_fd));
+
+ /* Checks that unrestrictable commands are unrestricted. */
+ EXPECT_EQ(0, ioctl(file_fd, FIOCLEX));
+ EXPECT_EQ(0, ioctl(file_fd, FIONCLEX));
+ EXPECT_EQ(0, ioctl(file_fd, FIONBIO, &flag));
+ EXPECT_EQ(0, ioctl(file_fd, FIOASYNC, &flag));
+ EXPECT_EQ(0, ioctl(file_fd, FIGETBSZ, &flag));
+
+ ASSERT_EQ(0, close(file_fd));
}
/* clang-format off */
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 8ae203d8ed..429535816d 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -52,10 +52,33 @@ endif
selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST))))
top_srcdir = $(selfdir)/../../..
+# msg: emit succinct information message describing current building step
+# $1 - generic step name (e.g., CC, LINK, etc);
+# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor];
+# $3 - target (assumed to be file); only file name will be emitted;
+# $4 - optional extra arg, emitted as-is, if provided.
+ifeq ($(V),1)
+Q =
+msg =
+else
+Q = @
+msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))";
+MAKEFLAGS += --no-print-directory
+endif
+
ifeq ($(KHDR_INCLUDES),)
KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include
endif
+# In order to use newer items that haven't yet been added to the user's system
+# header files, add $(TOOLS_INCLUDES) to the compiler invocation in each
+# each selftest.
+# You may need to add files to that location, or to refresh an existing file. In
+# order to do that, run "make headers" from $(top_srcdir), then copy the
+# header file that you want from $(top_srcdir)/usr/include/... , to the matching
+# subdir in $(TOOLS_INCLUDE).
+TOOLS_INCLUDES := -isystem $(top_srcdir)/tools/include/uapi
+
# The following are built by lib.mk common compile rules.
# TEST_CUSTOM_PROGS should be used by tests that require
# custom build rule and prevent common build rule use.
@@ -184,7 +207,8 @@ endif
ifeq ($(OVERRIDE_TARGETS),)
LOCAL_HDRS += $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h
$(OUTPUT)/%:%.c $(LOCAL_HDRS)
- $(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@
+ $(call msg,CC,,$@)
+ $(Q)$(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@
$(OUTPUT)/%.o:%.S
$(COMPILE.S) $^ -o $@
diff --git a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
index a9cc17facf..4e14dba812 100644
--- a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
+++ b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
@@ -69,5 +69,5 @@ int main(int argc, char **argv)
/* Multi-threaded */
test_mt_membarrier();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
index 4cdc8b1d12..fa3f1d6c37 100644
--- a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
+++ b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
@@ -24,5 +24,5 @@ int main(int argc, char **argv)
test_membarrier_get_registrations(/*cmd=*/0);
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
index 93798c8c5d..dbc171a380 100644
--- a/tools/testing/selftests/memfd/fuse_test.c
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -306,7 +306,7 @@ int main(int argc, char **argv)
* then the kernel did a page-replacement or canceled the read() (or
* whatever magic it did..). In that case, the memfd object is still
* all zero.
- * In case the memfd-object was *not* sealed, the read() was successfull
+ * In case the memfd-object was *not* sealed, the read() was successful
* and the memfd object must *not* be all zero.
* Note that in real scenarios, there might be a mixture of both, but
* in this test-cases, we have explicit 200ms delays which should be
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 18f585684e..95af2d78fd 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -1528,7 +1528,7 @@ static void test_share_open(char *banner, char *b_suffix)
/*
* Test sharing via fork()
- * Test whether seal-modifications work as expected with forked childs.
+ * Test whether seal-modifications work as expected with forked children.
*/
static void test_share_fork(char *banner, char *b_suffix)
{
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index d26e962f2a..0b9ab98760 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -47,3 +47,5 @@ mkdirty
va_high_addr_switch
hugetlb_fault_after_madv
hugetlb_madv_vs_map
+mseal_test
+seal_elf
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 410495e0a6..3b49bc3d0a 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -32,7 +32,7 @@ endif
# LDLIBS.
MAKEFLAGS += --no-builtin-rules
-CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
+CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
LDLIBS = -lrt -lpthread -lm
TEST_GEN_FILES = cow
@@ -59,6 +59,8 @@ TEST_GEN_FILES += mlock2-tests
TEST_GEN_FILES += mrelease_test
TEST_GEN_FILES += mremap_dontunmap
TEST_GEN_FILES += mremap_test
+TEST_GEN_FILES += mseal_test
+TEST_GEN_FILES += seal_elf
TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += pagemap_ioctl
TEST_GEN_FILES += thuge-gen
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index fe078d6e18..32c6ccc2a6 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -199,7 +199,7 @@ static int child_vmsplice_memcmp_fn(char *mem, size_t size,
typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes);
static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
- child_fn fn)
+ child_fn fn, bool xfail)
{
struct comm_pipes comm_pipes;
char buf;
@@ -247,33 +247,47 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
else
ret = -EINVAL;
- ksft_test_result(!ret, "No leak from parent into child\n");
+ if (!ret) {
+ ksft_test_result_pass("No leak from parent into child\n");
+ } else if (xfail) {
+ /*
+ * With hugetlb, some vmsplice() tests are currently expected to
+ * fail because (a) harder to fix and (b) nobody really cares.
+ * Flag them as expected failure for now.
+ */
+ ksft_test_result_xfail("Leak from parent into child\n");
+ } else {
+ ksft_test_result_fail("Leak from parent into child\n");
+ }
close_comm_pipes:
close_comm_pipes(&comm_pipes);
}
-static void test_cow_in_parent(char *mem, size_t size)
+static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb)
{
- do_test_cow_in_parent(mem, size, false, child_memcmp_fn);
+ do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false);
}
-static void test_cow_in_parent_mprotect(char *mem, size_t size)
+static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb)
{
- do_test_cow_in_parent(mem, size, true, child_memcmp_fn);
+ do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false);
}
-static void test_vmsplice_in_child(char *mem, size_t size)
+static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb)
{
- do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn);
+ do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn,
+ is_hugetlb);
}
-static void test_vmsplice_in_child_mprotect(char *mem, size_t size)
+static void test_vmsplice_in_child_mprotect(char *mem, size_t size,
+ bool is_hugetlb)
{
- do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn);
+ do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn,
+ is_hugetlb);
}
static void do_test_vmsplice_in_parent(char *mem, size_t size,
- bool before_fork)
+ bool before_fork, bool xfail)
{
struct iovec iov = {
.iov_base = mem,
@@ -355,8 +369,18 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
}
}
- ksft_test_result(!memcmp(old, new, transferred),
- "No leak from child into parent\n");
+ if (!memcmp(old, new, transferred)) {
+ ksft_test_result_pass("No leak from child into parent\n");
+ } else if (xfail) {
+ /*
+ * With hugetlb, some vmsplice() tests are currently expected to
+ * fail because (a) harder to fix and (b) nobody really cares.
+ * Flag them as expected failure for now.
+ */
+ ksft_test_result_xfail("Leak from child into parent\n");
+ } else {
+ ksft_test_result_fail("Leak from child into parent\n");
+ }
close_pipe:
close(fds[0]);
close(fds[1]);
@@ -367,14 +391,14 @@ free:
free(new);
}
-static void test_vmsplice_before_fork(char *mem, size_t size)
+static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb)
{
- do_test_vmsplice_in_parent(mem, size, true);
+ do_test_vmsplice_in_parent(mem, size, true, is_hugetlb);
}
-static void test_vmsplice_after_fork(char *mem, size_t size)
+static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb)
{
- do_test_vmsplice_in_parent(mem, size, false);
+ do_test_vmsplice_in_parent(mem, size, false, is_hugetlb);
}
#ifdef LOCAL_CONFIG_HAVE_LIBURING
@@ -529,12 +553,12 @@ close_comm_pipes:
close_comm_pipes(&comm_pipes);
}
-static void test_iouring_ro(char *mem, size_t size)
+static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb)
{
do_test_iouring(mem, size, false);
}
-static void test_iouring_fork(char *mem, size_t size)
+static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb)
{
do_test_iouring(mem, size, true);
}
@@ -678,37 +702,41 @@ free_tmp:
free(tmp);
}
-static void test_ro_pin_on_shared(char *mem, size_t size)
+static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
{
do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false);
}
-static void test_ro_fast_pin_on_shared(char *mem, size_t size)
+static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
{
do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true);
}
-static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size)
+static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size,
+ bool is_hugetlb)
{
do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false);
}
-static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size)
+static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size,
+ bool is_hugetlb)
{
do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true);
}
-static void test_ro_pin_on_ro_exclusive(char *mem, size_t size)
+static void test_ro_pin_on_ro_exclusive(char *mem, size_t size,
+ bool is_hugetlb)
{
do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false);
}
-static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size)
+static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size,
+ bool is_hugetlb)
{
do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true);
}
-typedef void (*test_fn)(char *mem, size_t size);
+typedef void (*test_fn)(char *mem, size_t size, bool hugetlb);
static void do_run_with_base_page(test_fn fn, bool swapout)
{
@@ -740,7 +768,7 @@ static void do_run_with_base_page(test_fn fn, bool swapout)
}
}
- fn(mem, pagesize);
+ fn(mem, pagesize, false);
munmap:
munmap(mem, pagesize);
}
@@ -904,7 +932,7 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
break;
}
- fn(mem, size);
+ fn(mem, size, false);
munmap:
munmap(mmap_mem, mmap_size);
if (mremap_mem != MAP_FAILED)
@@ -997,7 +1025,7 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
}
munmap(dummy, hugetlbsize);
- fn(mem, hugetlbsize);
+ fn(mem, hugetlbsize, true);
munmap:
munmap(mem, hugetlbsize);
}
@@ -1036,7 +1064,7 @@ static const struct test_case anon_test_cases[] = {
*/
{
"vmsplice() + unmap in child",
- test_vmsplice_in_child
+ test_vmsplice_in_child,
},
/*
* vmsplice() test, but do an additional mprotect(PROT_READ)+
@@ -1044,7 +1072,7 @@ static const struct test_case anon_test_cases[] = {
*/
{
"vmsplice() + unmap in child with mprotect() optimization",
- test_vmsplice_in_child_mprotect
+ test_vmsplice_in_child_mprotect,
},
/*
* vmsplice() [R/O GUP] in parent before fork(), unmap in parent after
@@ -1322,23 +1350,31 @@ close_comm_pipes:
close_comm_pipes(&comm_pipes);
}
-static void test_anon_thp_collapse_unshared(char *mem, size_t size)
+static void test_anon_thp_collapse_unshared(char *mem, size_t size,
+ bool is_hugetlb)
{
+ assert(!is_hugetlb);
do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED);
}
-static void test_anon_thp_collapse_fully_shared(char *mem, size_t size)
+static void test_anon_thp_collapse_fully_shared(char *mem, size_t size,
+ bool is_hugetlb)
{
+ assert(!is_hugetlb);
do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED);
}
-static void test_anon_thp_collapse_lower_shared(char *mem, size_t size)
+static void test_anon_thp_collapse_lower_shared(char *mem, size_t size,
+ bool is_hugetlb)
{
+ assert(!is_hugetlb);
do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED);
}
-static void test_anon_thp_collapse_upper_shared(char *mem, size_t size)
+static void test_anon_thp_collapse_upper_shared(char *mem, size_t size,
+ bool is_hugetlb)
{
+ assert(!is_hugetlb);
do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED);
}
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index d7eaca5bbe..9423ad439a 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -118,15 +118,22 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
return;
}
- /*
- * Fault in the page writable such that GUP-fast can eventually pin
- * it immediately.
- */
+ /* Fault in the page such that GUP-fast can pin it directly. */
memset(mem, 0, size);
switch (type) {
case TEST_TYPE_RO:
case TEST_TYPE_RO_FAST:
+ /*
+ * Cover more cases regarding unsharing decisions when
+ * long-term R/O pinning by mapping the page R/O.
+ */
+ ret = mprotect(mem, size, PROT_READ);
+ if (ret) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto munmap;
+ }
+ /* FALLTHROUGH */
case TEST_TYPE_RW:
case TEST_TYPE_RW_FAST: {
struct pin_longterm_test args;
@@ -228,6 +235,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
assert(false);
}
+munmap:
munmap(mem, size);
}
diff --git a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
index d01e8d4901..8f122a0f08 100644
--- a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
+++ b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
@@ -27,9 +27,9 @@
#include "vm_util.h"
#include "../kselftest.h"
-#define MMAP_SIZE (1 << 21)
#define INLOOP_ITER 100
+size_t mmap_size;
char *huge_ptr;
/* Touch the memory while it is being madvised() */
@@ -44,7 +44,7 @@ void *touch(void *unused)
void *madv(void *unused)
{
for (int i = 0; i < INLOOP_ITER; i++)
- madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED);
+ madvise(huge_ptr, mmap_size, MADV_DONTNEED);
return NULL;
}
@@ -59,7 +59,7 @@ void *map_extra(void *unused)
void *ptr;
for (int i = 0; i < INLOOP_ITER; i++) {
- ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE,
+ ptr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1, 0);
@@ -93,14 +93,16 @@ int main(void)
free_hugepages);
}
+ mmap_size = default_huge_page_size();
+
while (max--) {
- huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE,
+ huge_ptr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1, 0);
if ((unsigned long)huge_ptr == -1) {
- ksft_exit_skip("Failed to allocated huge page\n");
- return KSFT_SKIP;
+ ksft_test_result_fail("Failed to allocate huge page\n");
+ return KSFT_FAIL;
}
pthread_create(&thread1, NULL, madv, NULL);
@@ -117,7 +119,7 @@ int main(void)
}
/* Unmap and restart */
- munmap(huge_ptr, MMAP_SIZE);
+ munmap(huge_ptr, mmap_size);
}
return KSFT_PASS;
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index 508287560c..b61803e36d 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -28,6 +28,15 @@
#define MiB (1024 * KiB)
#define FORK_EXEC_CHILD_PRG_NAME "ksm_fork_exec_child"
+#define MAP_MERGE_FAIL ((void *)-1)
+#define MAP_MERGE_SKIP ((void *)-2)
+
+enum ksm_merge_mode {
+ KSM_MERGE_PRCTL,
+ KSM_MERGE_MADVISE,
+ KSM_MERGE_NONE, /* PRCTL already set */
+};
+
static int mem_fd;
static int ksm_fd;
static int ksm_full_scans_fd;
@@ -146,33 +155,34 @@ static int ksm_unmerge(void)
return 0;
}
-static char *mmap_and_merge_range(char val, unsigned long size, int prot,
- bool use_prctl)
+static char *__mmap_and_merge_range(char val, unsigned long size, int prot,
+ enum ksm_merge_mode mode)
{
char *map;
+ char *err_map = MAP_MERGE_FAIL;
int ret;
/* Stabilize accounting by disabling KSM completely. */
if (ksm_unmerge()) {
- ksft_test_result_fail("Disabling (unmerging) KSM failed\n");
- return MAP_FAILED;
+ ksft_print_msg("Disabling (unmerging) KSM failed\n");
+ return err_map;
}
if (get_my_merging_pages() > 0) {
- ksft_test_result_fail("Still pages merged\n");
- return MAP_FAILED;
+ ksft_print_msg("Still pages merged\n");
+ return err_map;
}
map = mmap(NULL, size, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANON, -1, 0);
if (map == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
- return MAP_FAILED;
+ ksft_print_msg("mmap() failed\n");
+ return err_map;
}
/* Don't use THP. Ignore if THP are not around on a kernel. */
if (madvise(map, size, MADV_NOHUGEPAGE) && errno != EINVAL) {
- ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
+ ksft_print_msg("MADV_NOHUGEPAGE failed\n");
goto unmap;
}
@@ -180,27 +190,36 @@ static char *mmap_and_merge_range(char val, unsigned long size, int prot,
memset(map, val, size);
if (mprotect(map, size, prot)) {
- ksft_test_result_skip("mprotect() failed\n");
+ ksft_print_msg("mprotect() failed\n");
+ err_map = MAP_MERGE_SKIP;
goto unmap;
}
- if (use_prctl) {
+ switch (mode) {
+ case KSM_MERGE_PRCTL:
ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
if (ret < 0 && errno == EINVAL) {
- ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
+ ksft_print_msg("PR_SET_MEMORY_MERGE not supported\n");
+ err_map = MAP_MERGE_SKIP;
goto unmap;
} else if (ret) {
- ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
+ ksft_print_msg("PR_SET_MEMORY_MERGE=1 failed\n");
goto unmap;
}
- } else if (madvise(map, size, MADV_MERGEABLE)) {
- ksft_test_result_fail("MADV_MERGEABLE failed\n");
- goto unmap;
+ break;
+ case KSM_MERGE_MADVISE:
+ if (madvise(map, size, MADV_MERGEABLE)) {
+ ksft_print_msg("MADV_MERGEABLE failed\n");
+ goto unmap;
+ }
+ break;
+ case KSM_MERGE_NONE:
+ break;
}
/* Run KSM to trigger merging and wait. */
if (ksm_merge()) {
- ksft_test_result_fail("Running KSM failed\n");
+ ksft_print_msg("Running KSM failed\n");
goto unmap;
}
@@ -209,14 +228,31 @@ static char *mmap_and_merge_range(char val, unsigned long size, int prot,
* accounted differently (depending on kernel support).
*/
if (val && !get_my_merging_pages()) {
- ksft_test_result_fail("No pages got merged\n");
+ ksft_print_msg("No pages got merged\n");
goto unmap;
}
return map;
unmap:
munmap(map, size);
- return MAP_FAILED;
+ return err_map;
+}
+
+static char *mmap_and_merge_range(char val, unsigned long size, int prot,
+ enum ksm_merge_mode mode)
+{
+ char *map;
+ char *ret = MAP_FAILED;
+
+ map = __mmap_and_merge_range(val, size, prot, mode);
+ if (map == MAP_MERGE_FAIL)
+ ksft_test_result_fail("Merging memory failed");
+ else if (map == MAP_MERGE_SKIP)
+ ksft_test_result_skip("Merging memory skipped");
+ else
+ ret = map;
+
+ return ret;
}
static void test_unmerge(void)
@@ -226,7 +262,7 @@ static void test_unmerge(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
if (map == MAP_FAILED)
return;
@@ -264,7 +300,7 @@ static void test_unmerge_zero_pages(void)
}
/* Let KSM deduplicate zero pages. */
- map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, false);
+ map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
if (map == MAP_FAILED)
return;
@@ -312,7 +348,7 @@ static void test_unmerge_discarded(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
if (map == MAP_FAILED)
return;
@@ -344,7 +380,7 @@ static void test_unmerge_uffd_wp(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
if (map == MAP_FAILED)
return;
@@ -439,6 +475,36 @@ static void test_prctl(void)
ksft_test_result_pass("Setting/clearing PR_SET_MEMORY_MERGE works\n");
}
+static int test_child_ksm(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+
+ /* Test if KSM is enabled for the process. */
+ if (prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) != 1)
+ return -1;
+
+ /* Test if merge could really happen. */
+ map = __mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_NONE);
+ if (map == MAP_MERGE_FAIL)
+ return -2;
+ else if (map == MAP_MERGE_SKIP)
+ return -3;
+
+ munmap(map, size);
+ return 0;
+}
+
+static void test_child_ksm_err(int status)
+{
+ if (status == -1)
+ ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n");
+ else if (status == -2)
+ ksft_test_result_fail("Merge in child failed\n");
+ else if (status == -3)
+ ksft_test_result_skip("Merge in child skipped\n");
+}
+
/* Verify that prctl ksm flag is inherited. */
static void test_prctl_fork(void)
{
@@ -458,7 +524,7 @@ static void test_prctl_fork(void)
child_pid = fork();
if (!child_pid) {
- exit(prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0));
+ exit(test_child_ksm());
} else if (child_pid < 0) {
ksft_test_result_fail("fork() failed\n");
return;
@@ -467,8 +533,11 @@ static void test_prctl_fork(void)
if (waitpid(child_pid, &status, 0) < 0) {
ksft_test_result_fail("waitpid() failed\n");
return;
- } else if (WEXITSTATUS(status) != 1) {
- ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n");
+ }
+
+ status = WEXITSTATUS(status);
+ if (status) {
+ test_child_ksm_err(status);
return;
}
@@ -480,12 +549,6 @@ static void test_prctl_fork(void)
ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
}
-static int ksm_fork_exec_child(void)
-{
- /* Test if KSM is enabled for the process. */
- return prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) == 1;
-}
-
static void test_prctl_fork_exec(void)
{
int ret, status;
@@ -518,7 +581,7 @@ static void test_prctl_fork_exec(void)
if (WIFEXITED(status)) {
status = WEXITSTATUS(status);
if (status) {
- ksft_test_result_fail("KSM not enabled\n");
+ test_child_ksm_err(status);
return;
}
} else {
@@ -545,7 +608,7 @@ static void test_prctl_unmerge(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, true);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_PRCTL);
if (map == MAP_FAILED)
return;
@@ -568,7 +631,7 @@ static void test_prot_none(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0x11, size, PROT_NONE, false);
+ map = mmap_and_merge_range(0x11, size, PROT_NONE, KSM_MERGE_MADVISE);
if (map == MAP_FAILED)
goto unmap;
@@ -593,13 +656,34 @@ unmap:
munmap(map, size);
}
+static void init_global_file_handles(void)
+{
+ mem_fd = open("/proc/self/mem", O_RDWR);
+ if (mem_fd < 0)
+ ksft_exit_fail_msg("opening /proc/self/mem failed\n");
+ ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+ if (ksm_fd < 0)
+ ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n");
+ ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY);
+ if (ksm_full_scans_fd < 0)
+ ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n");
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n");
+ proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY);
+ proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages",
+ O_RDONLY);
+ ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR);
+}
+
int main(int argc, char **argv)
{
unsigned int tests = 8;
int err;
if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) {
- exit(ksm_fork_exec_child() == 1 ? 0 : 1);
+ init_global_file_handles();
+ exit(test_child_ksm());
}
#ifdef __NR_userfaultfd
@@ -611,22 +695,7 @@ int main(int argc, char **argv)
pagesize = getpagesize();
- mem_fd = open("/proc/self/mem", O_RDWR);
- if (mem_fd < 0)
- ksft_exit_fail_msg("opening /proc/self/mem failed\n");
- ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
- if (ksm_fd < 0)
- ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n");
- ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY);
- if (ksm_full_scans_fd < 0)
- ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n");
- pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
- if (pagemap_fd < 0)
- ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n");
- proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY);
- proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages",
- O_RDONLY);
- ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR);
+ init_global_file_handles();
test_unmerge();
test_unmerge_zero_pages();
diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c
index b74813fdc9..d53de24860 100644
--- a/tools/testing/selftests/mm/map_fixed_noreplace.c
+++ b/tools/testing/selftests/mm/map_fixed_noreplace.c
@@ -67,7 +67,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error: munmap failed!?\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n");
addr = base_addr + page_size;
size = 3 * page_size;
@@ -76,7 +77,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error: first mmap() failed unexpectedly\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 3*PAGE_SIZE at base+PAGE_SIZE\n");
/*
* Exact same mapping again:
@@ -93,7 +95,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n");
/*
* Second mapping contained within first:
@@ -111,7 +114,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:2: mmap() succeeded when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+PAGE_SIZE\n");
/*
* Overlap end of existing mapping:
@@ -128,7 +132,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:3: mmap() succeeded when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+(3*PAGE_SIZE)\n");
/*
* Overlap start of existing mapping:
@@ -145,7 +150,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:4: mmap() succeeded when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 2*PAGE_SIZE bytes at base\n");
/*
* Adjacent to start of existing mapping:
@@ -162,7 +168,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:5: mmap() failed when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() PAGE_SIZE at base\n");
/*
* Adjacent to end of existing mapping:
@@ -179,7 +186,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:6: mmap() failed when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() PAGE_SIZE at base+(4*PAGE_SIZE)\n");
addr = base_addr;
size = 5 * page_size;
diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c
index 9b298f6a04..9a0597310a 100644
--- a/tools/testing/selftests/mm/memfd_secret.c
+++ b/tools/testing/selftests/mm/memfd_secret.c
@@ -20,6 +20,7 @@
#include <unistd.h>
#include <errno.h>
#include <stdio.h>
+#include <fcntl.h>
#include "../kselftest.h"
@@ -83,6 +84,45 @@ static void test_mlock_limit(int fd)
pass("mlock limit is respected\n");
}
+static void test_vmsplice(int fd, const char *desc)
+{
+ ssize_t transferred;
+ struct iovec iov;
+ int pipefd[2];
+ char *mem;
+
+ if (pipe(pipefd)) {
+ fail("pipe failed: %s\n", strerror(errno));
+ return;
+ }
+
+ mem = mmap(NULL, page_size, prot, mode, fd, 0);
+ if (mem == MAP_FAILED) {
+ fail("Unable to mmap secret memory\n");
+ goto close_pipe;
+ }
+
+ /*
+ * vmsplice() may use GUP-fast, which must also fail. Prefault the
+ * page table, so GUP-fast could find it.
+ */
+ memset(mem, PATTERN, page_size);
+
+ iov.iov_base = mem;
+ iov.iov_len = page_size;
+ transferred = vmsplice(pipefd[1], &iov, 1, 0);
+
+ if (transferred < 0 && errno == EFAULT)
+ pass("vmsplice is blocked as expected with %s\n", desc);
+ else
+ fail("vmsplice: unexpected memory access with %s\n", desc);
+
+ munmap(mem, page_size);
+close_pipe:
+ close(pipefd[0]);
+ close(pipefd[1]);
+}
+
static void try_process_vm_read(int fd, int pipefd[2])
{
struct iovec liov, riov;
@@ -187,7 +227,6 @@ static void test_remote_access(int fd, const char *name,
return;
}
- ftruncate(fd, page_size);
memset(mem, PATTERN, page_size);
if (write(pipefd[1], &mem, sizeof(mem)) < 0) {
@@ -258,7 +297,7 @@ static void prepare(void)
strerror(errno));
}
-#define NUM_TESTS 4
+#define NUM_TESTS 6
int main(int argc, char *argv[])
{
@@ -277,9 +316,17 @@ int main(int argc, char *argv[])
ksft_exit_fail_msg("memfd_secret failed: %s\n",
strerror(errno));
}
+ if (ftruncate(fd, page_size))
+ ksft_exit_fail_msg("ftruncate failed: %s\n", strerror(errno));
test_mlock_limit(fd);
test_file_apis(fd);
+ /*
+ * We have to run the first vmsplice test before any secretmem page was
+ * allocated for this fd.
+ */
+ test_vmsplice(fd, "fresh page");
+ test_vmsplice(fd, "existing page");
test_process_vm_read(fd);
test_ptrace(fd);
diff --git a/tools/testing/selftests/mm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c
index 26f744188a..7f0d50fa36 100644
--- a/tools/testing/selftests/mm/mlock2-tests.c
+++ b/tools/testing/selftests/mm/mlock2-tests.c
@@ -20,8 +20,6 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area)
FILE *file;
int ret = 1;
char line[1024] = {0};
- char *end_addr;
- char *stop;
unsigned long start;
unsigned long end;
@@ -37,21 +35,10 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area)
memset(area, 0, sizeof(struct vm_boundaries));
while(fgets(line, 1024, file)) {
- end_addr = strchr(line, '-');
- if (!end_addr) {
+ if (sscanf(line, "%lx-%lx", &start, &end) != 2) {
ksft_print_msg("cannot parse /proc/self/maps\n");
goto out;
}
- *end_addr = '\0';
- end_addr++;
- stop = strchr(end_addr, ' ');
- if (!stop) {
- ksft_print_msg("cannot parse /proc/self/maps\n");
- goto out;
- }
-
- sscanf(line, "%lx", &start);
- sscanf(end_addr, "%lx", &end);
if (start <= addr && end > addr) {
area->start = start;
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index 2f8b991f78..1b03bcfaef 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -23,6 +23,7 @@
#define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
#define SIZE_MB(m) ((size_t)m * (1024 * 1024))
#define SIZE_KB(k) ((size_t)k * 1024)
@@ -69,6 +70,27 @@ enum {
.expect_failure = should_fail \
}
+/* compute square root using binary search */
+static unsigned long get_sqrt(unsigned long val)
+{
+ unsigned long low = 1;
+
+ /* assuming rand_size is less than 1TB */
+ unsigned long high = (1UL << 20);
+
+ while (low <= high) {
+ unsigned long mid = low + (high - low) / 2;
+ unsigned long temp = mid * mid;
+
+ if (temp == val)
+ return mid;
+ if (temp < val)
+ low = mid + 1;
+ high = mid - 1;
+ }
+ return low;
+}
+
/*
* Returns false if the requested remap region overlaps with an
* existing mapping (e.g text, stack) else returns true.
@@ -126,19 +148,21 @@ static unsigned long long get_mmap_min_addr(void)
* Using /proc/self/maps, assert that the specified address range is contained
* within a single mapping.
*/
-static bool is_range_mapped(FILE *maps_fp, void *start, void *end)
+static bool is_range_mapped(FILE *maps_fp, unsigned long start,
+ unsigned long end)
{
char *line = NULL;
size_t len = 0;
bool success = false;
+ unsigned long first_val, second_val;
rewind(maps_fp);
while (getline(&line, &len, maps_fp) != -1) {
- char *first = strtok(line, "- ");
- void *first_val = (void *)strtol(first, NULL, 16);
- char *second = strtok(NULL, "- ");
- void *second_val = (void *) strtol(second, NULL, 16);
+ if (sscanf(line, "%lx-%lx", &first_val, &second_val) != 2) {
+ ksft_exit_fail_msg("cannot parse /proc/self/maps\n");
+ break;
+ }
if (first_val <= start && second_val >= end) {
success = true;
@@ -233,7 +257,8 @@ static void mremap_expand_merge(FILE *maps_fp, unsigned long page_size)
goto out;
}
- success = is_range_mapped(maps_fp, start, start + 3 * page_size);
+ success = is_range_mapped(maps_fp, (unsigned long)start,
+ (unsigned long)(start + 3 * page_size));
munmap(start, 3 * page_size);
out:
@@ -272,7 +297,8 @@ static void mremap_expand_merge_offset(FILE *maps_fp, unsigned long page_size)
goto out;
}
- success = is_range_mapped(maps_fp, start, start + 3 * page_size);
+ success = is_range_mapped(maps_fp, (unsigned long)start,
+ (unsigned long)(start + 3 * page_size));
munmap(start, 3 * page_size);
out:
@@ -296,7 +322,7 @@ out:
*
* |DDDDddddSSSSssss|
*/
-static void mremap_move_within_range(char pattern_seed)
+static void mremap_move_within_range(unsigned int pattern_seed, char *rand_addr)
{
char *test_name = "mremap mremap move within range";
void *src, *dest;
@@ -316,10 +342,7 @@ static void mremap_move_within_range(char pattern_seed)
src = (void *)((unsigned long)src & ~(SIZE_MB(2) - 1));
/* Set byte pattern for source block. */
- srand(pattern_seed);
- for (i = 0; i < SIZE_MB(2); i++) {
- ((char *)src)[i] = (char) rand();
- }
+ memcpy(src, rand_addr, SIZE_MB(2));
dest = src - SIZE_MB(2);
@@ -357,14 +380,14 @@ out:
/* Returns the time taken for the remap on success else returns -1. */
static long long remap_region(struct config c, unsigned int threshold_mb,
- char pattern_seed)
+ char *rand_addr)
{
void *addr, *src_addr, *dest_addr, *dest_preamble_addr;
- int d;
- unsigned long long t;
+ unsigned long long t, d;
struct timespec t_start = {0, 0}, t_end = {0, 0};
long long start_ns, end_ns, align_mask, ret, offset;
unsigned long long threshold;
+ unsigned long num_chunks;
if (threshold_mb == VALIDATION_NO_THRESHOLD)
threshold = c.region_size;
@@ -378,9 +401,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
}
/* Set byte pattern for source block. */
- srand(pattern_seed);
- for (t = 0; t < threshold; t++)
- memset((char *) src_addr + t, (char) rand(), 1);
+ memcpy(src_addr, rand_addr, threshold);
/* Mask to zero out lower bits of address for alignment */
align_mask = ~(c.dest_alignment - 1);
@@ -420,9 +441,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
}
/* Set byte pattern for the dest preamble block. */
- srand(pattern_seed);
- for (d = 0; d < c.dest_preamble_size; d++)
- memset((char *) dest_preamble_addr + d, (char) rand(), 1);
+ memcpy(dest_preamble_addr, rand_addr, c.dest_preamble_size);
}
clock_gettime(CLOCK_MONOTONIC, &t_start);
@@ -436,15 +455,42 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
goto clean_up_dest_preamble;
}
- /* Verify byte pattern after remapping */
- srand(pattern_seed);
- for (t = 0; t < threshold; t++) {
- char c = (char) rand();
+ /*
+ * Verify byte pattern after remapping. Employ an algorithm with a
+ * square root time complexity in threshold: divide the range into
+ * chunks, if memcmp() returns non-zero, only then perform an
+ * iteration in that chunk to find the mismatch index.
+ */
+ num_chunks = get_sqrt(threshold);
+ for (unsigned long i = 0; i < num_chunks; ++i) {
+ size_t chunk_size = threshold / num_chunks;
+ unsigned long shift = i * chunk_size;
+
+ if (!memcmp(dest_addr + shift, rand_addr + shift, chunk_size))
+ continue;
+
+ /* brute force iteration only over mismatch segment */
+ for (t = shift; t < shift + chunk_size; ++t) {
+ if (((char *) dest_addr)[t] != rand_addr[t]) {
+ ksft_print_msg("Data after remap doesn't match at offset %llu\n",
+ t);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[t] & 0xff,
+ ((char *) dest_addr)[t] & 0xff);
+ ret = -1;
+ goto clean_up_dest;
+ }
+ }
+ }
- if (((char *) dest_addr)[t] != c) {
+ /*
+ * if threshold is not divisible by num_chunks, then check the
+ * last chunk
+ */
+ for (t = num_chunks * (threshold / num_chunks); t < threshold; ++t) {
+ if (((char *) dest_addr)[t] != rand_addr[t]) {
ksft_print_msg("Data after remap doesn't match at offset %llu\n",
- t);
- ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+ t);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[t] & 0xff,
((char *) dest_addr)[t] & 0xff);
ret = -1;
goto clean_up_dest;
@@ -452,22 +498,44 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
}
/* Verify the dest preamble byte pattern after remapping */
- if (c.dest_preamble_size) {
- srand(pattern_seed);
- for (d = 0; d < c.dest_preamble_size; d++) {
- char c = (char) rand();
-
- if (((char *) dest_preamble_addr)[d] != c) {
- ksft_print_msg("Preamble data after remap doesn't match at offset %d\n",
- d);
- ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
- ((char *) dest_preamble_addr)[d] & 0xff);
+ if (!c.dest_preamble_size)
+ goto no_preamble;
+
+ num_chunks = get_sqrt(c.dest_preamble_size);
+
+ for (unsigned long i = 0; i < num_chunks; ++i) {
+ size_t chunk_size = c.dest_preamble_size / num_chunks;
+ unsigned long shift = i * chunk_size;
+
+ if (!memcmp(dest_preamble_addr + shift, rand_addr + shift,
+ chunk_size))
+ continue;
+
+ /* brute force iteration only over mismatched segment */
+ for (d = shift; d < shift + chunk_size; ++d) {
+ if (((char *) dest_preamble_addr)[d] != rand_addr[d]) {
+ ksft_print_msg("Preamble data after remap doesn't match at offset %llu\n",
+ d);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[d] & 0xff,
+ ((char *) dest_preamble_addr)[d] & 0xff);
ret = -1;
goto clean_up_dest;
}
}
}
+ for (d = num_chunks * (c.dest_preamble_size / num_chunks); d < c.dest_preamble_size; ++d) {
+ if (((char *) dest_preamble_addr)[d] != rand_addr[d]) {
+ ksft_print_msg("Preamble data after remap doesn't match at offset %llu\n",
+ d);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[d] & 0xff,
+ ((char *) dest_preamble_addr)[d] & 0xff);
+ ret = -1;
+ goto clean_up_dest;
+ }
+ }
+
+no_preamble:
start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec;
end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec;
ret = end_ns - start_ns;
@@ -494,7 +562,8 @@ out:
* the beginning of the mapping just because the aligned
* down address landed on a mapping that maybe does not exist.
*/
-static void mremap_move_1mb_from_start(char pattern_seed)
+static void mremap_move_1mb_from_start(unsigned int pattern_seed,
+ char *rand_addr)
{
char *test_name = "mremap move 1mb from start at 1MB+256KB aligned src";
void *src = NULL, *dest = NULL;
@@ -520,10 +589,7 @@ static void mremap_move_1mb_from_start(char pattern_seed)
}
/* Set byte pattern for source block. */
- srand(pattern_seed);
- for (i = 0; i < SIZE_MB(2); i++) {
- ((char *)src)[i] = (char) rand();
- }
+ memcpy(src, rand_addr, SIZE_MB(2));
/*
* Unmap the beginning of dest so that the aligned address
@@ -568,10 +634,10 @@ out:
static void run_mremap_test_case(struct test test_case, int *failures,
unsigned int threshold_mb,
- unsigned int pattern_seed)
+ unsigned int pattern_seed, char *rand_addr)
{
long long remap_time = remap_region(test_case.config, threshold_mb,
- pattern_seed);
+ rand_addr);
if (remap_time < 0) {
if (test_case.expect_failure)
@@ -642,7 +708,15 @@ int main(int argc, char **argv)
int failures = 0;
int i, run_perf_tests;
unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
+
+ /* hard-coded test configs */
+ size_t max_test_variable_region_size = _2GB;
+ size_t max_test_constant_region_size = _2MB;
+ size_t dest_preamble_size = 10 * _4MB;
+
unsigned int pattern_seed;
+ char *rand_addr;
+ size_t rand_size;
int num_expand_tests = 2;
int num_misc_tests = 2;
struct test test_cases[MAX_TEST] = {};
@@ -659,6 +733,31 @@ int main(int argc, char **argv)
ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n",
threshold_mb, pattern_seed);
+ /*
+ * set preallocated random array according to test configs; see the
+ * functions for the logic of setting the size
+ */
+ if (!threshold_mb)
+ rand_size = MAX(max_test_variable_region_size,
+ max_test_constant_region_size);
+ else
+ rand_size = MAX(MIN(threshold_mb * _1MB,
+ max_test_variable_region_size),
+ max_test_constant_region_size);
+ rand_size = MAX(dest_preamble_size, rand_size);
+
+ rand_addr = (char *)mmap(NULL, rand_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (rand_addr == MAP_FAILED) {
+ perror("mmap");
+ ksft_exit_fail_msg("cannot mmap rand_addr\n");
+ }
+
+ /* fill stream of random bytes */
+ srand(pattern_seed);
+ for (unsigned long i = 0; i < rand_size; ++i)
+ rand_addr[i] = (char) rand();
+
page_size = sysconf(_SC_PAGESIZE);
/* Expected mremap failures */
@@ -730,13 +829,13 @@ int main(int argc, char **argv)
for (i = 0; i < ARRAY_SIZE(test_cases); i++)
run_mremap_test_case(test_cases[i], &failures, threshold_mb,
- pattern_seed);
+ pattern_seed, rand_addr);
maps_fp = fopen("/proc/self/maps", "r");
if (maps_fp == NULL) {
- ksft_print_msg("Failed to read /proc/self/maps: %s\n", strerror(errno));
- exit(KSFT_FAIL);
+ munmap(rand_addr, rand_size);
+ ksft_exit_fail_msg("Failed to read /proc/self/maps: %s\n", strerror(errno));
}
mremap_expand_merge(maps_fp, page_size);
@@ -744,17 +843,20 @@ int main(int argc, char **argv)
fclose(maps_fp);
- mremap_move_within_range(pattern_seed);
- mremap_move_1mb_from_start(pattern_seed);
+ mremap_move_within_range(pattern_seed, rand_addr);
+ mremap_move_1mb_from_start(pattern_seed, rand_addr);
if (run_perf_tests) {
ksft_print_msg("\n%s\n",
"mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++)
run_mremap_test_case(perf_test_cases[i], &failures,
- threshold_mb, pattern_seed);
+ threshold_mb, pattern_seed,
+ rand_addr);
}
+ munmap(rand_addr, rand_size);
+
if (failures > 0)
ksft_exit_fail();
else
diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c
new file mode 100644
index 0000000000..41998cf1dc
--- /dev/null
+++ b/tools/testing/selftests/mm/mseal_test.c
@@ -0,0 +1,1894 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <stdbool.h>
+#include "../kselftest.h"
+#include <syscall.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/vfs.h>
+#include <sys/stat.h>
+
+/*
+ * need those definition for manually build using gcc.
+ * gcc -I ../../../../usr/include -DDEBUG -O3 -DDEBUG -O3 mseal_test.c -o mseal_test
+ */
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS 0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE 0x2
+#endif
+
+#ifndef PKEY_BITS_PER_PKEY
+#define PKEY_BITS_PER_PKEY 2
+#endif
+
+#ifndef PKEY_MASK
+#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
+#endif
+
+#define FAIL_TEST_IF_FALSE(c) do {\
+ if (!(c)) {\
+ ksft_test_result_fail("%s, line:%d\n", __func__, __LINE__);\
+ goto test_end;\
+ } \
+ } \
+ while (0)
+
+#define SKIP_TEST_IF_FALSE(c) do {\
+ if (!(c)) {\
+ ksft_test_result_skip("%s, line:%d\n", __func__, __LINE__);\
+ goto test_end;\
+ } \
+ } \
+ while (0)
+
+
+#define TEST_END_CHECK() {\
+ ksft_test_result_pass("%s\n", __func__);\
+ return;\
+test_end:\
+ return;\
+}
+
+#ifndef u64
+#define u64 unsigned long long
+#endif
+
+static unsigned long get_vma_size(void *addr, int *prot)
+{
+ FILE *maps;
+ char line[256];
+ int size = 0;
+ uintptr_t addr_start, addr_end;
+ char protstr[5];
+ *prot = 0;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps)
+ return 0;
+
+ while (fgets(line, sizeof(line), maps)) {
+ if (sscanf(line, "%lx-%lx %4s", &addr_start, &addr_end, protstr) == 3) {
+ if (addr_start == (uintptr_t) addr) {
+ size = addr_end - addr_start;
+ if (protstr[0] == 'r')
+ *prot |= 0x4;
+ if (protstr[1] == 'w')
+ *prot |= 0x2;
+ if (protstr[2] == 'x')
+ *prot |= 0x1;
+ break;
+ }
+ }
+ }
+ fclose(maps);
+ return size;
+}
+
+/*
+ * define sys_xyx to call syscall directly.
+ */
+static int sys_mseal(void *start, size_t len)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_mseal, start, len, 0);
+ return sret;
+}
+
+static int sys_mprotect(void *ptr, size_t size, unsigned long prot)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_mprotect, ptr, size, prot);
+ return sret;
+}
+
+static int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey);
+ return sret;
+}
+
+static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
+ unsigned long flags, unsigned long fd, unsigned long offset)
+{
+ void *sret;
+
+ errno = 0;
+ sret = (void *) syscall(__NR_mmap, addr, len, prot,
+ flags, fd, offset);
+ return sret;
+}
+
+static int sys_munmap(void *ptr, size_t size)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_munmap, ptr, size);
+ return sret;
+}
+
+static int sys_madvise(void *start, size_t len, int types)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_madvise, start, len, types);
+ return sret;
+}
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
+{
+ int ret = syscall(__NR_pkey_alloc, flags, init_val);
+
+ return ret;
+}
+
+static unsigned int __read_pkey_reg(void)
+{
+ unsigned int pkey_reg = 0;
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ unsigned int eax, edx;
+ unsigned int ecx = 0;
+
+ asm volatile(".byte 0x0f,0x01,0xee\n\t"
+ : "=a" (eax), "=d" (edx)
+ : "c" (ecx));
+ pkey_reg = eax;
+#endif
+ return pkey_reg;
+}
+
+static void __write_pkey_reg(u64 pkey_reg)
+{
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ unsigned int eax = pkey_reg;
+ unsigned int ecx = 0;
+ unsigned int edx = 0;
+
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (eax), "c" (ecx), "d" (edx));
+#endif
+}
+
+static unsigned long pkey_bit_position(int pkey)
+{
+ return pkey * PKEY_BITS_PER_PKEY;
+}
+
+static u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
+{
+ unsigned long shift = pkey_bit_position(pkey);
+
+ /* mask out bits from pkey in old value */
+ reg &= ~((u64)PKEY_MASK << shift);
+ /* OR in new bits for pkey */
+ reg |= (flags & PKEY_MASK) << shift;
+ return reg;
+}
+
+static void set_pkey(int pkey, unsigned long pkey_value)
+{
+ u64 new_pkey_reg;
+
+ new_pkey_reg = set_pkey_bits(__read_pkey_reg(), pkey, pkey_value);
+ __write_pkey_reg(new_pkey_reg);
+}
+
+static void setup_single_address(int size, void **ptrOut)
+{
+ void *ptr;
+
+ ptr = sys_mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ *ptrOut = ptr;
+}
+
+static void setup_single_address_rw(int size, void **ptrOut)
+{
+ void *ptr;
+ unsigned long mapflags = MAP_ANONYMOUS | MAP_PRIVATE;
+
+ ptr = sys_mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0);
+ *ptrOut = ptr;
+}
+
+static int clean_single_address(void *ptr, int size)
+{
+ int ret;
+ ret = munmap(ptr, size);
+ return ret;
+}
+
+static int seal_single_address(void *ptr, int size)
+{
+ int ret;
+ ret = sys_mseal(ptr, size);
+ return ret;
+}
+
+bool seal_support(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+
+ ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (ptr == (void *) -1)
+ return false;
+
+ ret = sys_mseal(ptr, page_size);
+ if (ret < 0)
+ return false;
+
+ return true;
+}
+
+bool pkey_supported(void)
+{
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ int pkey = sys_pkey_alloc(0, 0);
+
+ if (pkey > 0)
+ return true;
+#endif
+ return false;
+}
+
+static void test_seal_addseal(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_unmapped_start(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* munmap 2 pages from ptr. */
+ ret = sys_munmap(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail because 2 pages from ptr are unmapped. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail because 2 pages from ptr are unmapped. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ ret = sys_mseal(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_unmapped_middle(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* munmap 2 pages from ptr + page. */
+ ret = sys_munmap(ptr + page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail, since middle 2 pages are unmapped. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail as well. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* we still can add seal to the first page and last page*/
+ ret = sys_mseal(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mseal(ptr + 3 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_unmapped_end(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap last 2 pages. */
+ ret = sys_munmap(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail since last 2 pages are unmapped. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail as well. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* The first 2 pages is not sealed, and can add seals */
+ ret = sys_mseal(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_multiple_vmas(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split the vma into 3. */
+ ret = sys_mprotect(ptr + page_size, 2 * page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will get applied to all 4 pages - 3 VMAs. */
+ ret = sys_mprotect(ptr, size, PROT_READ);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* use mprotect to split the vma into 3. */
+ ret = sys_mprotect(ptr + page_size, 2 * page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mseal get applied to all 4 pages - 3 VMAs. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_split_start(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split at middle */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the first page, this will split the VMA */
+ ret = sys_mseal(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* add seal to the remain 3 pages */
+ ret = sys_mseal(ptr + page_size, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_split_end(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split at middle */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the last page */
+ ret = sys_mseal(ptr + 3 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* Adding seals to the first 3 pages */
+ ret = sys_mseal(ptr, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_invalid_input(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(8 * page_size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ ret = clean_single_address(ptr + 4 * page_size, 4 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* invalid flag */
+ ret = syscall(__NR_mseal, ptr, size, 0x20);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* unaligned address */
+ ret = sys_mseal(ptr + 1, 2 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* length too big */
+ ret = sys_mseal(ptr, 5 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* length overflow */
+ ret = sys_mseal(ptr, UINT64_MAX/page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* start is not in a valid VMA */
+ ret = sys_mseal(ptr - page_size, 5 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_zero_length(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_mprotect(ptr, 0, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal 0 length will be OK, same as mprotect */
+ ret = sys_mseal(ptr, 0);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* verify the 4 pages are not sealed by previous call. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_zero_address(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int prot;
+
+ /* use mmap to change protection. */
+ ptr = sys_mmap(0, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ FAIL_TEST_IF_FALSE(ptr == 0);
+
+ size = get_vma_size(ptr, &prot);
+ FAIL_TEST_IF_FALSE(size == 4 * page_size);
+
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* verify the 4 pages are sealed by previous call. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_twice(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* apply the same seal will be OK. idempotent. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_start_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* the first page is sealed. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* pages after the first page is not sealed. */
+ ret = sys_mprotect(ptr + page_size, page_size * 3,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_end_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr + page_size, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* first page is not sealed */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* last 3 page are sealed */
+ ret = sys_mprotect(ptr + page_size, page_size * 3,
+ PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_unalign_len(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, page_size * 2 - 1);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 2 pages are sealed. */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mprotect(ptr + page_size * 2, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_unalign_len_variant_2(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ if (seal) {
+ ret = seal_single_address(ptr, page_size * 2 + 1);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 3 pages are sealed. */
+ ret = sys_mprotect(ptr, page_size * 3, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mprotect(ptr + page_size * 3, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_two_vma(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = seal_single_address(ptr, page_size * 4);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mprotect(ptr + page_size * 2, page_size * 2,
+ PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_two_vma_with_split(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split as two vma. */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mseal can apply across 2 vma, also split them. */
+ if (seal) {
+ ret = seal_single_address(ptr + page_size, page_size * 2);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* the first page is not sealed. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* the second page is sealed. */
+ ret = sys_mprotect(ptr + page_size, page_size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* the third page is sealed. */
+ ret = sys_mprotect(ptr + 2 * page_size, page_size,
+ PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* the fouth page is not sealed. */
+ ret = sys_mprotect(ptr + 3 * page_size, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_partial_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* seal one page. */
+ if (seal) {
+ ret = seal_single_address(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mprotect first 2 page will fail, since the first page are sealed. */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_two_vma_with_gap(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* use mprotect to split. */
+ ret = sys_mprotect(ptr + 3 * page_size, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* use munmap to free two pages in the middle */
+ ret = sys_munmap(ptr + page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail, because there is a gap in the address. */
+ /* notes, internally mprotect still updated the first page. */
+ ret = sys_mprotect(ptr, 4 * page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail as well. */
+ ret = sys_mseal(ptr, 4 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* the first page is not sealed. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret == 0);
+
+ /* the last page is not sealed. */
+ ret = sys_mprotect(ptr + 3 * page_size, page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret == 0);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_split(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal all 4 pages. */
+ if (seal) {
+ ret = sys_mseal(ptr, 4 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mprotect is sealed. */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+
+ ret = sys_mprotect(ptr + 2 * page_size, 2 * page_size, PROT_READ);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mprotect_merge(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split one page. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal first two pages. */
+ if (seal) {
+ ret = sys_mseal(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 2 pages are sealed. */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* last 2 pages are not sealed. */
+ ret = sys_mprotect(ptr + 2 * page_size, 2 * page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret == 0);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_munmap(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 4 pages are sealed. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+/*
+ * allocate 4 pages,
+ * use mprotect to split it as two VMAs
+ * seal the whole range
+ * munmap will fail on both
+ */
+static void test_seal_munmap_two_vma(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_munmap(ptr, page_size * 2);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr + page_size, page_size * 2);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+/*
+ * allocate a VMA with 4 pages.
+ * munmap the middle 2 pages.
+ * seal the whole 4 pages, will fail.
+ * munmap the first page will be OK.
+ * munmap the last page will be OK.
+ */
+static void test_seal_munmap_vma_with_gap(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_munmap(ptr + page_size, page_size * 2);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ /* can't have gap in the middle. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+ }
+
+ ret = sys_munmap(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr + page_size * 2, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_munmap_start_freed(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int prot;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap the first page. */
+ ret = sys_munmap(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the last 3 pages. */
+ if (seal) {
+ ret = sys_mseal(ptr + page_size, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* unmap from the first page. */
+ ret = sys_munmap(ptr, size);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size * 3);
+ } else {
+ /* note: this will be OK, even the first page is */
+ /* already unmapped. */
+ FAIL_TEST_IF_FALSE(!ret);
+
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 0);
+ }
+
+ TEST_END_CHECK();
+}
+
+static void test_munmap_end_freed(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap last page. */
+ ret = sys_munmap(ptr + page_size * 3, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the first 3 pages. */
+ if (seal) {
+ ret = sys_mseal(ptr, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* unmap all pages. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_munmap_middle_freed(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int prot;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap 2 pages in the middle. */
+ ret = sys_munmap(ptr + page_size, page_size * 2);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the first page. */
+ if (seal) {
+ ret = sys_mseal(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* munmap all 4 pages. */
+ ret = sys_munmap(ptr, size);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ size = get_vma_size(ptr, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+
+ size = get_vma_size(ptr + page_size * 3, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+ } else {
+ FAIL_TEST_IF_FALSE(!ret);
+
+ size = get_vma_size(ptr, &prot);
+ FAIL_TEST_IF_FALSE(size == 0);
+
+ size = get_vma_size(ptr + page_size * 3, &prot);
+ FAIL_TEST_IF_FALSE(size == 0);
+ }
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_shrink(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* shrink from 4 pages to 2 pages. */
+ ret2 = mremap(ptr, size, 2 * page_size, 0, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED);
+
+ }
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_expand(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ /* ummap last 2 pages. */
+ ret = sys_munmap(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* expand from 2 page to 4 pages. */
+ ret2 = mremap(ptr, 2 * page_size, 4 * page_size, 0, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ }
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_move(bool seal)
+{
+ void *ptr, *newPtr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newPtr);
+ FAIL_TEST_IF_FALSE(newPtr != (void *)-1);
+ ret = clean_single_address(newPtr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* move from ptr to fixed address. */
+ ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newPtr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED);
+
+ }
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mmap_overwrite_prot(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* use mmap to change protection. */
+ ret2 = sys_mmap(ptr, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mmap_expand(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 12 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ /* ummap last 4 pages. */
+ ret = sys_munmap(ptr + 8 * page_size, 4 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, 8 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* use mmap to expand. */
+ ret2 = sys_mmap(ptr, size, PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mmap_shrink(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 12 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* use mmap to shrink. */
+ ret2 = sys_mmap(ptr, 8 * page_size, PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_shrink_fixed(bool seal)
+{
+ void *ptr;
+ void *newAddr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newAddr);
+ FAIL_TEST_IF_FALSE(newAddr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move and shrink to fixed address */
+ ret2 = mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED,
+ newAddr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == newAddr);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_expand_fixed(bool seal)
+{
+ void *ptr;
+ void *newAddr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(page_size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newAddr);
+ FAIL_TEST_IF_FALSE(newAddr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(newAddr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move and expand to fixed address */
+ ret2 = mremap(ptr, page_size, size, MREMAP_MAYMOVE | MREMAP_FIXED,
+ newAddr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == newAddr);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_move_fixed(bool seal)
+{
+ void *ptr;
+ void *newAddr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newAddr);
+ FAIL_TEST_IF_FALSE(newAddr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(newAddr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move to fixed address */
+ ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newAddr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == newAddr);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_move_fixed_zero(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /*
+ * MREMAP_FIXED can move the mapping to zero address
+ */
+ ret2 = mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED,
+ 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 == 0);
+
+ }
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_move_dontunmap(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move, and don't unmap src addr. */
+ ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED);
+
+ }
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_mremap_move_dontunmap_anyaddr(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /*
+ * The 0xdeaddead should not have effect on dest addr
+ * when MREMAP_DONTUNMAP is set.
+ */
+ ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP,
+ 0xdeaddead);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED);
+ FAIL_TEST_IF_FALSE((long)ret2 != 0xdeaddead);
+
+ }
+
+ TEST_END_CHECK();
+}
+
+
+static void test_seal_merge_and_split(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size;
+ int ret;
+ int prot;
+
+ /* (24 RO) */
+ setup_single_address(24 * page_size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect(NONE) to set out boundary */
+ /* (1 NONE) (22 RO) (1 NONE) */
+ ret = sys_mprotect(ptr, page_size, PROT_NONE);
+ FAIL_TEST_IF_FALSE(!ret);
+ ret = sys_mprotect(ptr + 23 * page_size, page_size, PROT_NONE);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 22 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 4);
+
+ /* use mseal to split from beginning */
+ /* (1 NONE) (1 RO_SEAL) (21 RO) (1 NONE) */
+ ret = sys_mseal(ptr + page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ size = get_vma_size(ptr + 2 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 21 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* use mseal to split from the end. */
+ /* (1 NONE) (1 RO_SEAL) (20 RO) (1 RO_SEAL) (1 NONE) */
+ ret = sys_mseal(ptr + 22 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + 22 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ size = get_vma_size(ptr + 2 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 20 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* merge with prev. */
+ /* (1 NONE) (2 RO_SEAL) (19 RO) (1 RO_SEAL) (1 NONE) */
+ ret = sys_mseal(ptr + 2 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 2 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* merge with after. */
+ /* (1 NONE) (2 RO_SEAL) (18 RO) (2 RO_SEALS) (1 NONE) */
+ ret = sys_mseal(ptr + 21 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + 21 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 2 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* split and merge from prev */
+ /* (1 NONE) (3 RO_SEAL) (17 RO) (2 RO_SEALS) (1 NONE) */
+ ret = sys_mseal(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + 1 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 3 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ ret = sys_munmap(ptr + page_size, page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+ ret = sys_mprotect(ptr + 2 * page_size, page_size, PROT_NONE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* split and merge from next */
+ /* (1 NONE) (3 RO_SEAL) (16 RO) (3 RO_SEALS) (1 NONE) */
+ ret = sys_mseal(ptr + 20 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ size = get_vma_size(ptr + 20 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 3 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* merge from middle of prev and middle of next. */
+ /* (1 NONE) (22 RO_SEAL) (1 NONE) */
+ ret = sys_mseal(ptr + 2 * page_size, 20 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 22 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_discard_ro_anon_on_rw(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address_rw(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't take effect on RW memory. */
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* base seal still apply. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_discard_ro_anon_on_pkey(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int pkey;
+
+ SKIP_TEST_IF_FALSE(pkey_supported());
+
+ setup_single_address_rw(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ pkey = sys_pkey_alloc(0, 0);
+ FAIL_TEST_IF_FALSE(pkey > 0);
+
+ ret = sys_mprotect_pkey((void *)ptr, size, PROT_READ | PROT_WRITE, pkey);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't take effect if PKRU allow write. */
+ set_pkey(pkey, 0);
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* sealing will take effect if PKRU deny write. */
+ set_pkey(pkey, PKEY_DISABLE_WRITE);
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* base seal still apply. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_discard_ro_anon_on_filebacked(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int fd;
+ unsigned long mapflags = MAP_PRIVATE;
+
+ fd = memfd_create("test", 0);
+ FAIL_TEST_IF_FALSE(fd > 0);
+
+ ret = fallocate(fd, 0, 0, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ptr = sys_mmap(NULL, size, PROT_READ, mapflags, fd, 0);
+ FAIL_TEST_IF_FALSE(ptr != MAP_FAILED);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't apply for file backed mapping. */
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+ close(fd);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_discard_ro_anon_on_shared(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ unsigned long mapflags = MAP_ANONYMOUS | MAP_SHARED;
+
+ ptr = sys_mmap(NULL, size, PROT_READ, mapflags, -1, 0);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't apply for shared mapping. */
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+static void test_seal_discard_ro_anon(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ TEST_END_CHECK();
+}
+
+int main(int argc, char **argv)
+{
+ bool test_seal = seal_support();
+
+ ksft_print_header();
+
+ if (!test_seal)
+ ksft_exit_skip("sealing not supported, check CONFIG_64BIT\n");
+
+ if (!pkey_supported())
+ ksft_print_msg("PKEY not supported\n");
+
+ ksft_set_plan(80);
+
+ test_seal_addseal();
+ test_seal_unmapped_start();
+ test_seal_unmapped_middle();
+ test_seal_unmapped_end();
+ test_seal_multiple_vmas();
+ test_seal_split_start();
+ test_seal_split_end();
+ test_seal_invalid_input();
+ test_seal_zero_length();
+ test_seal_twice();
+
+ test_seal_mprotect(false);
+ test_seal_mprotect(true);
+
+ test_seal_start_mprotect(false);
+ test_seal_start_mprotect(true);
+
+ test_seal_end_mprotect(false);
+ test_seal_end_mprotect(true);
+
+ test_seal_mprotect_unalign_len(false);
+ test_seal_mprotect_unalign_len(true);
+
+ test_seal_mprotect_unalign_len_variant_2(false);
+ test_seal_mprotect_unalign_len_variant_2(true);
+
+ test_seal_mprotect_two_vma(false);
+ test_seal_mprotect_two_vma(true);
+
+ test_seal_mprotect_two_vma_with_split(false);
+ test_seal_mprotect_two_vma_with_split(true);
+
+ test_seal_mprotect_partial_mprotect(false);
+ test_seal_mprotect_partial_mprotect(true);
+
+ test_seal_mprotect_two_vma_with_gap(false);
+ test_seal_mprotect_two_vma_with_gap(true);
+
+ test_seal_mprotect_merge(false);
+ test_seal_mprotect_merge(true);
+
+ test_seal_mprotect_split(false);
+ test_seal_mprotect_split(true);
+
+ test_seal_munmap(false);
+ test_seal_munmap(true);
+ test_seal_munmap_two_vma(false);
+ test_seal_munmap_two_vma(true);
+ test_seal_munmap_vma_with_gap(false);
+ test_seal_munmap_vma_with_gap(true);
+
+ test_munmap_start_freed(false);
+ test_munmap_start_freed(true);
+ test_munmap_middle_freed(false);
+ test_munmap_middle_freed(true);
+ test_munmap_end_freed(false);
+ test_munmap_end_freed(true);
+
+ test_seal_mremap_shrink(false);
+ test_seal_mremap_shrink(true);
+ test_seal_mremap_expand(false);
+ test_seal_mremap_expand(true);
+ test_seal_mremap_move(false);
+ test_seal_mremap_move(true);
+
+ test_seal_mremap_shrink_fixed(false);
+ test_seal_mremap_shrink_fixed(true);
+ test_seal_mremap_expand_fixed(false);
+ test_seal_mremap_expand_fixed(true);
+ test_seal_mremap_move_fixed(false);
+ test_seal_mremap_move_fixed(true);
+ test_seal_mremap_move_dontunmap(false);
+ test_seal_mremap_move_dontunmap(true);
+ test_seal_mremap_move_fixed_zero(false);
+ test_seal_mremap_move_fixed_zero(true);
+ test_seal_mremap_move_dontunmap_anyaddr(false);
+ test_seal_mremap_move_dontunmap_anyaddr(true);
+ test_seal_discard_ro_anon(false);
+ test_seal_discard_ro_anon(true);
+ test_seal_discard_ro_anon_on_rw(false);
+ test_seal_discard_ro_anon_on_rw(true);
+ test_seal_discard_ro_anon_on_shared(false);
+ test_seal_discard_ro_anon_on_shared(true);
+ test_seal_discard_ro_anon_on_filebacked(false);
+ test_seal_discard_ro_anon_on_filebacked(true);
+ test_seal_mmap_overwrite_prot(false);
+ test_seal_mmap_overwrite_prot(true);
+ test_seal_mmap_expand(false);
+ test_seal_mmap_expand(true);
+ test_seal_mmap_shrink(false);
+ test_seal_mmap_shrink(true);
+
+ test_seal_merge_and_split();
+ test_seal_zero_address();
+
+ test_seal_discard_ro_anon_on_pkey(false);
+ test_seal_discard_ro_anon_on_pkey(true);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 4bdb3a0c7a..3157204b90 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -152,9 +152,13 @@ done < /proc/meminfo
# both of these requirements into account and attempt to increase
# number of huge pages available.
nr_cpus=$(nproc)
-hpgsize_MB=$((hpgsize_KB / 1024))
-half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128))
-needmem_KB=$((half_ufd_size_MB * 2 * 1024))
+uffd_min_KB=$((hpgsize_KB * nr_cpus * 2))
+hugetlb_min_KB=$((256 * 1024))
+if [[ $uffd_min_KB -gt $hugetlb_min_KB ]]; then
+ needmem_KB=$uffd_min_KB
+else
+ needmem_KB=$hugetlb_min_KB
+fi
# set proper nr_hugepages
if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
@@ -294,7 +298,8 @@ CATEGORY="userfaultfd" run_test ./uffd-unit-tests
uffd_stress_bin=./uffd-stress
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16
# Hugetlb tests require source and destination huge pages. Pass in half
-# the size ($half_ufd_size_MB), which is used for *each*.
+# the size of the free pages we have, which is used for *each*.
+half_ufd_size_MB=$((freepgs / 2))
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16
diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c
new file mode 100644
index 0000000000..f2babec79b
--- /dev/null
+++ b/tools/testing/selftests/mm/seal_elf.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sys/mman.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <stdbool.h>
+#include "../kselftest.h"
+#include <syscall.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/vfs.h>
+#include <sys/stat.h>
+
+/*
+ * need those definition for manually build using gcc.
+ * gcc -I ../../../../usr/include -DDEBUG -O3 -DDEBUG -O3 seal_elf.c -o seal_elf
+ */
+#define FAIL_TEST_IF_FALSE(c) do {\
+ if (!(c)) {\
+ ksft_test_result_fail("%s, line:%d\n", __func__, __LINE__);\
+ goto test_end;\
+ } \
+ } \
+ while (0)
+
+#define SKIP_TEST_IF_FALSE(c) do {\
+ if (!(c)) {\
+ ksft_test_result_skip("%s, line:%d\n", __func__, __LINE__);\
+ goto test_end;\
+ } \
+ } \
+ while (0)
+
+
+#define TEST_END_CHECK() {\
+ ksft_test_result_pass("%s\n", __func__);\
+ return;\
+test_end:\
+ return;\
+}
+
+#ifndef u64
+#define u64 unsigned long long
+#endif
+
+/*
+ * define sys_xyx to call syscall directly.
+ */
+static int sys_mseal(void *start, size_t len)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_mseal, start, len, 0);
+ return sret;
+}
+
+static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
+ unsigned long flags, unsigned long fd, unsigned long offset)
+{
+ void *sret;
+
+ errno = 0;
+ sret = (void *) syscall(__NR_mmap, addr, len, prot,
+ flags, fd, offset);
+ return sret;
+}
+
+static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_mprotect, ptr, size, prot);
+ return sret;
+}
+
+static bool seal_support(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+
+ ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (ptr == (void *) -1)
+ return false;
+
+ ret = sys_mseal(ptr, page_size);
+ if (ret < 0)
+ return false;
+
+ return true;
+}
+
+const char somestr[4096] = {"READONLY"};
+
+static void test_seal_elf(void)
+{
+ int ret;
+ FILE *maps;
+ char line[512];
+ uintptr_t addr_start, addr_end;
+ char prot[5];
+ char filename[256];
+ unsigned long page_size = getpagesize();
+ unsigned long long ptr = (unsigned long long) somestr;
+ char *somestr2 = (char *)somestr;
+
+ /*
+ * Modify the protection of readonly somestr
+ */
+ if (((unsigned long long)ptr % page_size) != 0)
+ ptr = (unsigned long long)ptr & ~(page_size - 1);
+
+ ksft_print_msg("somestr = %s\n", somestr);
+ ksft_print_msg("change protection to rw\n");
+ ret = sys_mprotect((void *)ptr, page_size, PROT_READ|PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+ *somestr2 = 'A';
+ ksft_print_msg("somestr is modified to: %s\n", somestr);
+ ret = sys_mprotect((void *)ptr, page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ maps = fopen("/proc/self/maps", "r");
+ FAIL_TEST_IF_FALSE(maps);
+
+ /*
+ * apply sealing to elf binary
+ */
+ while (fgets(line, sizeof(line), maps)) {
+ if (sscanf(line, "%lx-%lx %4s %*x %*x:%*x %*u %255[^\n]",
+ &addr_start, &addr_end, prot, filename) == 4) {
+ if (strlen(filename)) {
+ /*
+ * seal the mapping if read only.
+ */
+ if (strstr(prot, "r-")) {
+ ret = sys_mseal((void *)addr_start, addr_end - addr_start);
+ FAIL_TEST_IF_FALSE(!ret);
+ ksft_print_msg("sealed: %lx-%lx %s %s\n",
+ addr_start, addr_end, prot, filename);
+ if ((uintptr_t) somestr >= addr_start &&
+ (uintptr_t) somestr <= addr_end)
+ ksft_print_msg("mapping for somestr found\n");
+ }
+ }
+ }
+ }
+ fclose(maps);
+
+ ret = sys_mprotect((void *)ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+ ksft_print_msg("somestr is sealed, mprotect is rejected\n");
+
+ TEST_END_CHECK();
+}
+
+int main(int argc, char **argv)
+{
+ bool test_seal = seal_support();
+
+ ksft_print_header();
+ ksft_print_msg("pid=%d\n", getpid());
+
+ if (!test_seal)
+ ksft_exit_skip("sealing not supported, check CONFIG_64BIT\n");
+
+ ksft_set_plan(1);
+
+ test_seal_elf();
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c
index d9dbf87974..bdfa5d085f 100644
--- a/tools/testing/selftests/mm/soft-dirty.c
+++ b/tools/testing/selftests/mm/soft-dirty.c
@@ -209,5 +209,5 @@ int main(int argc, char **argv)
close(pagemap_fd);
- ksft_exit_pass();
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c
index 7bcf8d4825..4e4c1e3112 100644
--- a/tools/testing/selftests/mm/virtual_address_range.c
+++ b/tools/testing/selftests/mm/virtual_address_range.c
@@ -12,6 +12,8 @@
#include <errno.h>
#include <sys/mman.h>
#include <sys/time.h>
+#include <fcntl.h>
+
#include "../kselftest.h"
/*
@@ -85,7 +87,7 @@ static int validate_lower_address_hint(void)
char *ptr;
ptr = mmap((void *) (1UL << 45), MAP_CHUNK_SIZE, PROT_READ |
- PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED)
return 0;
@@ -93,6 +95,66 @@ static int validate_lower_address_hint(void)
return 1;
}
+static int validate_complete_va_space(void)
+{
+ unsigned long start_addr, end_addr, prev_end_addr;
+ char line[400];
+ char prot[6];
+ FILE *file;
+ int fd;
+
+ fd = open("va_dump", O_CREAT | O_WRONLY, 0600);
+ unlink("va_dump");
+ if (fd < 0) {
+ ksft_test_result_skip("cannot create or open dump file\n");
+ ksft_finished();
+ }
+
+ file = fopen("/proc/self/maps", "r");
+ if (file == NULL)
+ ksft_exit_fail_msg("cannot open /proc/self/maps\n");
+
+ prev_end_addr = 0;
+ while (fgets(line, sizeof(line), file)) {
+ unsigned long hop;
+
+ if (sscanf(line, "%lx-%lx %s[rwxp-]",
+ &start_addr, &end_addr, prot) != 3)
+ ksft_exit_fail_msg("cannot parse /proc/self/maps\n");
+
+ /* end of userspace mappings; ignore vsyscall mapping */
+ if (start_addr & (1UL << 63))
+ return 0;
+
+ /* /proc/self/maps must have gaps less than MAP_CHUNK_SIZE */
+ if (start_addr - prev_end_addr >= MAP_CHUNK_SIZE)
+ return 1;
+
+ prev_end_addr = end_addr;
+
+ if (prot[0] != 'r')
+ continue;
+
+ /*
+ * Confirm whether MAP_CHUNK_SIZE chunk can be found or not.
+ * If write succeeds, no need to check MAP_CHUNK_SIZE - 1
+ * addresses after that. If the address was not held by this
+ * process, write would fail with errno set to EFAULT.
+ * Anyways, if write returns anything apart from 1, exit the
+ * program since that would mean a bug in /proc/self/maps.
+ */
+ hop = 0;
+ while (start_addr + hop < end_addr) {
+ if (write(fd, (void *)(start_addr + hop), 1) != 1)
+ return 1;
+ lseek(fd, 0, SEEK_SET);
+
+ hop += MAP_CHUNK_SIZE;
+ }
+ }
+ return 0;
+}
+
int main(int argc, char *argv[])
{
char *ptr[NR_CHUNKS_LOW];
@@ -105,13 +167,11 @@ int main(int argc, char *argv[])
for (i = 0; i < NR_CHUNKS_LOW; i++) {
ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr[i] == MAP_FAILED) {
- if (validate_lower_address_hint()) {
- ksft_test_result_skip("Memory constraint not fulfilled\n");
- ksft_finished();
- }
+ if (validate_lower_address_hint())
+ ksft_exit_fail_msg("mmap unexpectedly succeeded with hint\n");
break;
}
@@ -127,7 +187,7 @@ int main(int argc, char *argv[])
for (i = 0; i < NR_CHUNKS_HIGH; i++) {
hint = hind_addr();
hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (hptr[i] == MAP_FAILED)
break;
@@ -135,6 +195,10 @@ int main(int argc, char *argv[])
validate_addr(hptr[i], 1);
}
hchunks = i;
+ if (validate_complete_va_space()) {
+ ksft_test_result_fail("BUG in mmap() or /proc/self/maps\n");
+ ksft_finished();
+ }
for (i = 0; i < lchunks; i++)
munmap(ptr[i], MAP_CHUNK_SIZE);
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 2f9d378ede..666ab7d939 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -2,9 +2,9 @@
bind_bhash
bind_timewait
bind_wildcard
-csum
cmsg_sender
diag_uid
+epoll_busy_poll
fin_ack_lat
gro
hwtstamp_config
@@ -31,6 +31,7 @@ reuseport_dualstack
rxtimestamp
sctp_hello
scm_pidfd
+scm_rights
sk_bind_sendto_listen
sk_connect_zero_addr
socket
@@ -42,7 +43,6 @@ tap
tcp_fastopen_backup_key
tcp_inq
tcp_mmap
-test_unix_oob
timestamping
tls
toeplitz
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 7b6918d5f4..d9393569d0 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -20,7 +20,6 @@ TEST_PROGS += reuseaddr_ports_exhausted.sh
TEST_PROGS += txtimestamp.sh
TEST_PROGS += vrf-xfrm-tests.sh
TEST_PROGS += rxtimestamp.sh
-TEST_PROGS += devlink_port_split.py
TEST_PROGS += drop_monitor_tests.sh
TEST_PROGS += vrf_route_leaking.sh
TEST_PROGS += bareudp.sh
@@ -35,6 +34,7 @@ TEST_PROGS += gre_gso.sh
TEST_PROGS += cmsg_so_mark.sh
TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
TEST_PROGS += netns-name.sh
+TEST_PROGS += nl_netdev.py
TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -43,6 +43,8 @@ TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh
TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh
TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh
TEST_PROGS += srv6_end_flavors_test.sh
+TEST_PROGS += srv6_end_dx4_netfilter_test.sh
+TEST_PROGS += srv6_end_dx6_netfilter_test.sh
TEST_PROGS += vrf_strict_mode_test.sh
TEST_PROGS += arp_ndisc_evict_nocarrier.sh
TEST_PROGS += ndisc_unsolicited_na_test.sh
@@ -67,7 +69,7 @@ TEST_GEN_FILES += ipsec
TEST_GEN_FILES += ioam6_parser
TEST_GEN_FILES += gro
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_poll
TEST_GEN_FILES += toeplitz
TEST_GEN_FILES += cmsg_sender
TEST_GEN_FILES += stress_reuseport_listen
@@ -81,9 +83,6 @@ TEST_PROGS += test_ingress_egress_chaining.sh
TEST_GEN_PROGS += so_incoming_cpu
TEST_PROGS += sctp_vrf.sh
TEST_GEN_FILES += sctp_hello
-TEST_GEN_FILES += csum
-TEST_GEN_FILES += nat6to4.o
-TEST_GEN_FILES += xdp_dummy.o
TEST_GEN_FILES += ip_local_port_range
TEST_GEN_FILES += bind_wildcard
TEST_PROGS += test_vxlan_mdb.sh
@@ -93,63 +92,22 @@ TEST_PROGS += test_bridge_backup_port.sh
TEST_PROGS += fdb_flush.sh
TEST_PROGS += fq_band_pktlimit.sh
TEST_PROGS += vlan_hw_filter.sh
+TEST_PROGS += bpf_offload.py
TEST_FILES := settings
TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
+TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
+
TEST_INCLUDES := forwarding/lib.sh
include ../lib.mk
+$(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto
$(OUTPUT)/tcp_inq: LDLIBS += -lpthread
$(OUTPUT)/bind_bhash: LDLIBS += -lpthread
$(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/
-# Rules to generate bpf objs
-CLANG ?= clang
-SCRATCH_DIR := $(OUTPUT)/tools
-BUILD_DIR := $(SCRATCH_DIR)/build
-BPFDIR := $(abspath ../../../lib/bpf)
-APIDIR := $(abspath ../../../include/uapi)
-
-CCINCLUDE += -I../bpf
-CCINCLUDE += -I../../../../usr/include/
-CCINCLUDE += -I$(SCRATCH_DIR)/include
-
-BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
-
-MAKE_DIRS := $(BUILD_DIR)/libbpf
-$(MAKE_DIRS):
- mkdir -p $@
-
-# Get Clang's default includes on this system, as opposed to those seen by
-# '--target=bpf'. This fixes "missing" files on some architectures/distros,
-# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
-#
-# Use '-idirafter': Don't interfere with include mechanics except where the
-# build would have failed anyways.
-define get_sys_includes
-$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
- | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
-$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
-endef
-
-ifneq ($(CROSS_COMPILE),)
-CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
-endif
-
-CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
-
-$(OUTPUT)/nat6to4.o $(OUTPUT)/xdp_dummy.o: $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS)
- $(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
-
-$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
- $(APIDIR)/linux/bpf.h \
- | $(BUILD_DIR)/libbpf
- $(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
- EXTRA_CFLAGS='-g -O0' \
- DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
-
-EXTRA_CLEAN := $(SCRATCH_DIR)
+include bpf.mk
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 221c387a7d..5058447954 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,4 @@
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd
+TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config
new file mode 100644
index 0000000000..3736856776
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/config
@@ -0,0 +1,3 @@
+CONFIG_UNIX=y
+CONFIG_AF_UNIX_OOB=y
+CONFIG_UNIX_DIAG=m
diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c
new file mode 100644
index 0000000000..16d0c172ea
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/msg_oob.c
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <netinet/in.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/signalfd.h>
+#include <sys/socket.h>
+
+#include "../../kselftest_harness.h"
+
+#define BUF_SZ 32
+
+FIXTURE(msg_oob)
+{
+ int fd[4]; /* 0: AF_UNIX sender
+ * 1: AF_UNIX receiver
+ * 2: TCP sender
+ * 3: TCP receiver
+ */
+ int signal_fd;
+ int epoll_fd[2]; /* 0: AF_UNIX receiver
+ * 1: TCP receiver
+ */
+ bool tcp_compliant;
+};
+
+FIXTURE_VARIANT(msg_oob)
+{
+ bool peek;
+};
+
+FIXTURE_VARIANT_ADD(msg_oob, no_peek)
+{
+ .peek = false,
+};
+
+FIXTURE_VARIANT_ADD(msg_oob, peek)
+{
+ .peek = true
+};
+
+static void create_unix_socketpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ int ret;
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, self->fd);
+ ASSERT_EQ(ret, 0);
+}
+
+static void create_tcp_socketpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ struct sockaddr_in addr;
+ socklen_t addrlen;
+ int listen_fd;
+ int ret;
+
+ listen_fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(listen_fd, 0);
+
+ ret = listen(listen_fd, -1);
+ ASSERT_EQ(ret, 0);
+
+ addrlen = sizeof(addr);
+ ret = getsockname(listen_fd, (struct sockaddr *)&addr, &addrlen);
+ ASSERT_EQ(ret, 0);
+
+ self->fd[2] = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(self->fd[2], 0);
+
+ ret = connect(self->fd[2], (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(ret, 0);
+
+ self->fd[3] = accept(listen_fd, (struct sockaddr *)&addr, &addrlen);
+ ASSERT_GE(self->fd[3], 0);
+
+ ret = fcntl(self->fd[3], F_SETFL, O_NONBLOCK);
+ ASSERT_EQ(ret, 0);
+}
+
+static void setup_sigurg(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ struct signalfd_siginfo siginfo;
+ int pid = getpid();
+ sigset_t mask;
+ int i, ret;
+
+ for (i = 0; i < 2; i++) {
+ ret = ioctl(self->fd[i * 2 + 1], FIOSETOWN, &pid);
+ ASSERT_EQ(ret, 0);
+ }
+
+ ret = sigemptyset(&mask);
+ ASSERT_EQ(ret, 0);
+
+ ret = sigaddset(&mask, SIGURG);
+ ASSERT_EQ(ret, 0);
+
+ ret = sigprocmask(SIG_BLOCK, &mask, NULL);
+ ASSERT_EQ(ret, 0);
+
+ self->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK);
+ ASSERT_GE(self->signal_fd, 0);
+
+ ret = read(self->signal_fd, &siginfo, sizeof(siginfo));
+ ASSERT_EQ(ret, -1);
+}
+
+static void setup_epollpri(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ struct epoll_event event = {
+ .events = EPOLLPRI,
+ };
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ self->epoll_fd[i] = epoll_create1(0);
+ ASSERT_GE(self->epoll_fd[i], 0);
+
+ ret = epoll_ctl(self->epoll_fd[i], EPOLL_CTL_ADD, self->fd[i * 2 + 1], &event);
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+static void close_sockets(FIXTURE_DATA(msg_oob) *self)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ close(self->fd[i]);
+}
+
+FIXTURE_SETUP(msg_oob)
+{
+ create_unix_socketpair(_metadata, self);
+ create_tcp_socketpair(_metadata, self);
+
+ setup_sigurg(_metadata, self);
+ setup_epollpri(_metadata, self);
+
+ self->tcp_compliant = true;
+}
+
+FIXTURE_TEARDOWN(msg_oob)
+{
+ close_sockets(self);
+}
+
+static void __epollpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ bool oob_remaining)
+{
+ struct epoll_event event[2] = {};
+ int i, ret[2];
+
+ for (i = 0; i < 2; i++)
+ ret[i] = epoll_wait(self->epoll_fd[i], &event[i], 1, 0);
+
+ ASSERT_EQ(ret[0], oob_remaining);
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(ret[0], ret[1]);
+
+ if (oob_remaining) {
+ ASSERT_EQ(event[0].events, EPOLLPRI);
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(event[0].events, event[1].events);
+ }
+}
+
+static void __sendpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ const void *buf, size_t len, int flags)
+{
+ int i, ret[2];
+
+ for (i = 0; i < 2; i++) {
+ struct signalfd_siginfo siginfo = {};
+ int bytes;
+
+ ret[i] = send(self->fd[i * 2], buf, len, flags);
+
+ bytes = read(self->signal_fd, &siginfo, sizeof(siginfo));
+
+ if (flags & MSG_OOB) {
+ ASSERT_EQ(bytes, sizeof(siginfo));
+ ASSERT_EQ(siginfo.ssi_signo, SIGURG);
+
+ bytes = read(self->signal_fd, &siginfo, sizeof(siginfo));
+ }
+
+ ASSERT_EQ(bytes, -1);
+ }
+
+ ASSERT_EQ(ret[0], len);
+ ASSERT_EQ(ret[0], ret[1]);
+}
+
+static void __recvpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ const void *expected_buf, int expected_len,
+ int buf_len, int flags)
+{
+ int i, ret[2], recv_errno[2], expected_errno = 0;
+ char recv_buf[2][BUF_SZ] = {};
+ bool printed = false;
+
+ ASSERT_GE(BUF_SZ, buf_len);
+
+ errno = 0;
+
+ for (i = 0; i < 2; i++) {
+ ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags);
+ recv_errno[i] = errno;
+ }
+
+ if (expected_len < 0) {
+ expected_errno = -expected_len;
+ expected_len = -1;
+ }
+
+ if (ret[0] != expected_len || recv_errno[0] != expected_errno) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf);
+
+ ASSERT_EQ(ret[0], expected_len);
+ ASSERT_EQ(recv_errno[0], expected_errno);
+ }
+
+ if (ret[0] != ret[1] || recv_errno[0] != recv_errno[1]) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]);
+
+ printed = true;
+
+ if (self->tcp_compliant) {
+ ASSERT_EQ(ret[0], ret[1]);
+ ASSERT_EQ(recv_errno[0], recv_errno[1]);
+ }
+ }
+
+ if (expected_len >= 0) {
+ int cmp;
+
+ cmp = strncmp(expected_buf, recv_buf[0], expected_len);
+ if (cmp) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf);
+
+ ASSERT_EQ(cmp, 0);
+ }
+
+ cmp = strncmp(recv_buf[0], recv_buf[1], expected_len);
+ if (cmp) {
+ if (!printed) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]);
+ }
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(cmp, 0);
+ }
+ }
+}
+
+static void __setinlinepair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ int i, oob_inline = 1;
+
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ ret = setsockopt(self->fd[i * 2 + 1], SOL_SOCKET, SO_OOBINLINE,
+ &oob_inline, sizeof(oob_inline));
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+static void __siocatmarkpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ bool oob_head)
+{
+ int answ[2] = {};
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ ret = ioctl(self->fd[i * 2 + 1], SIOCATMARK, &answ[i]);
+ ASSERT_EQ(ret, 0);
+ }
+
+ ASSERT_EQ(answ[0], oob_head);
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(answ[0], answ[1]);
+}
+
+#define sendpair(buf, len, flags) \
+ __sendpair(_metadata, self, buf, len, flags)
+
+#define recvpair(expected_buf, expected_len, buf_len, flags) \
+ do { \
+ if (variant->peek) \
+ __recvpair(_metadata, self, \
+ expected_buf, expected_len, \
+ buf_len, (flags) | MSG_PEEK); \
+ __recvpair(_metadata, self, \
+ expected_buf, expected_len, buf_len, flags); \
+ } while (0)
+
+#define epollpair(oob_remaining) \
+ __epollpair(_metadata, self, oob_remaining)
+
+#define siocatmarkpair(oob_head) \
+ __siocatmarkpair(_metadata, self, oob_head)
+
+#define setinlinepair() \
+ __setinlinepair(_metadata, self)
+
+#define tcp_incompliant \
+ for (self->tcp_compliant = false; \
+ self->tcp_compliant == false; \
+ self->tcp_compliant = true)
+
+TEST_F(msg_oob, non_oob)
+{
+ sendpair("x", 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+}
+
+TEST_F(msg_oob, oob_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("", -EAGAIN, 1, 0); /* Drop OOB. */
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob_ahead)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 4, 0);
+ epollpair(false);
+ siocatmarkpair(true);
+}
+
+TEST_F(msg_oob, oob_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 5, 0); /* Break at OOB even with enough buffer. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ recvpair("", -EAGAIN, 1, 0);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob_ahead_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 9, 0); /* Break at OOB even after it's recv()ed. */
+ epollpair(false);
+ siocatmarkpair(true);
+
+ recvpair("world", 5, 5, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob_break_drop)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 10, 0); /* Break at OOB even with enough buffer. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("world", 5, 10, 0); /* Drop OOB and recv() the next skb. */
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("wor", 3, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("ld", 2, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("r", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ recvpair("ld", 2, 2, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */
+ epollpair(true);
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0); /* TCP drops "y" by passing through it. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, MSG_OOB); /* TCP returns -EINVAL. */
+ epollpair(false);
+ siocatmarkpair(true);
+ }
+}
+
+TEST_F(msg_oob, ex_oob_drop_2)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */
+ epollpair(true);
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+ }
+
+ recvpair("y", 1, 1, MSG_OOB);
+ epollpair(false);
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0); /* TCP returns -EAGAIN. */
+ epollpair(false);
+ siocatmarkpair(true);
+ }
+}
+
+TEST_F(msg_oob, ex_oob_ahead_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("wor", 3, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("r", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ sendpair("ld", 2, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ tcp_incompliant {
+ recvpair("hellowol", 8, 10, 0); /* TCP recv()s "helloworl", why "r" ?? */
+ }
+
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("d", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+}
+
+TEST_F(msg_oob, ex_oob_siocatmark)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_oob)
+{
+ setinlinepair();
+
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_oob_break)
+{
+ setinlinepair();
+
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 5, 0); /* Break at OOB but not at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("o", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_oob_ahead_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ setinlinepair();
+
+ recvpair("hell", 4, 9, 0); /* Break at OOB even with enough buffer. */
+ epollpair(false);
+ siocatmarkpair(true);
+
+ tcp_incompliant {
+ recvpair("world", 5, 6, 0); /* TCP recv()s "oworld", ... "o" ??? */
+ }
+
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_ex_oob_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("wor", 3, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("ld", 2, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ setinlinepair();
+
+ recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("rld", 3, 3, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_ex_oob_no_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ setinlinepair();
+
+ sendpair("y", 1, MSG_OOB); /* TCP does NOT drops "x" at this moment. */
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_ex_oob_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */
+ epollpair(true);
+
+ setinlinepair();
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0); /* TCP recv()s "y". */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, 0); /* TCP returns -EAGAIN. */
+ epollpair(false);
+ siocatmarkpair(false);
+ }
+}
+
+TEST_F(msg_oob, inline_ex_oob_siocatmark)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ setinlinepair();
+
+ sendpair("world", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(false);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c
new file mode 100644
index 0000000000..d663362565
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/scm_rights.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(scm_rights)
+{
+ int fd[32];
+};
+
+FIXTURE_VARIANT(scm_rights)
+{
+ char name[32];
+ int type;
+ int flags;
+ bool test_listener;
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, dgram)
+{
+ .name = "UNIX ",
+ .type = SOCK_DGRAM,
+ .flags = 0,
+ .test_listener = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = true,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = true,
+};
+
+static int count_sockets(struct __test_metadata *_metadata,
+ const FIXTURE_VARIANT(scm_rights) *variant)
+{
+ int sockets = -1, len, ret;
+ char *line = NULL;
+ size_t unused;
+ FILE *f;
+
+ f = fopen("/proc/net/protocols", "r");
+ ASSERT_NE(NULL, f);
+
+ len = strlen(variant->name);
+
+ while (getline(&line, &unused, f) != -1) {
+ int unused2;
+
+ if (strncmp(line, variant->name, len))
+ continue;
+
+ ret = sscanf(line + len, "%d %d", &unused2, &sockets);
+ ASSERT_EQ(2, ret);
+
+ break;
+ }
+
+ free(line);
+
+ ret = fclose(f);
+ ASSERT_EQ(0, ret);
+
+ return sockets;
+}
+
+FIXTURE_SETUP(scm_rights)
+{
+ int ret;
+
+ ret = unshare(CLONE_NEWNET);
+ ASSERT_EQ(0, ret);
+
+ ret = count_sockets(_metadata, variant);
+ ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(scm_rights)
+{
+ int ret;
+
+ sleep(1);
+
+ ret = count_sockets(_metadata, variant);
+ ASSERT_EQ(0, ret);
+}
+
+static void create_listeners(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ int n)
+{
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ socklen_t addrlen;
+ int i, ret;
+
+ for (i = 0; i < n * 2; i += 2) {
+ self->fd[i] = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, self->fd[i]);
+
+ addrlen = sizeof(addr.sun_family);
+ ret = bind(self->fd[i], (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(0, ret);
+
+ ret = listen(self->fd[i], -1);
+ ASSERT_EQ(0, ret);
+
+ addrlen = sizeof(addr);
+ ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen);
+ ASSERT_EQ(0, ret);
+
+ self->fd[i + 1] = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, self->fd[i + 1]);
+
+ ret = connect(self->fd[i + 1], (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(0, ret);
+ }
+}
+
+static void create_socketpairs(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
+ int n)
+{
+ int i, ret;
+
+ ASSERT_GE(sizeof(self->fd) / sizeof(int), n);
+
+ for (i = 0; i < n * 2; i += 2) {
+ ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i);
+ ASSERT_EQ(0, ret);
+ }
+}
+
+static void __create_sockets(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
+ int n)
+{
+ ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0]));
+
+ if (variant->test_listener)
+ create_listeners(_metadata, self, n);
+ else
+ create_socketpairs(_metadata, self, variant, n);
+}
+
+static void __close_sockets(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ int n)
+{
+ int i, ret;
+
+ ASSERT_GE(sizeof(self->fd) / sizeof(int), n);
+
+ for (i = 0; i < n * 2; i++) {
+ ret = close(self->fd[i]);
+ ASSERT_EQ(0, ret);
+ }
+}
+
+void __send_fd(struct __test_metadata *_metadata,
+ const FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
+ int inflight, int receiver)
+{
+#define MSG "x"
+#define MSGLEN 1
+ struct {
+ struct cmsghdr cmsghdr;
+ int fd[2];
+ } cmsg = {
+ .cmsghdr = {
+ .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)),
+ .cmsg_level = SOL_SOCKET,
+ .cmsg_type = SCM_RIGHTS,
+ },
+ .fd = {
+ self->fd[inflight * 2],
+ self->fd[inflight * 2],
+ },
+ };
+ struct iovec iov = {
+ .iov_base = MSG,
+ .iov_len = MSGLEN,
+ };
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &cmsg,
+ .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)),
+ };
+ int ret;
+
+ ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags);
+ ASSERT_EQ(MSGLEN, ret);
+}
+
+#define create_sockets(n) \
+ __create_sockets(_metadata, self, variant, n)
+#define close_sockets(n) \
+ __close_sockets(_metadata, self, n)
+#define send_fd(inflight, receiver) \
+ __send_fd(_metadata, self, variant, inflight, receiver)
+
+TEST_F(scm_rights, self_ref)
+{
+ create_sockets(2);
+
+ send_fd(0, 0);
+
+ send_fd(1, 1);
+
+ close_sockets(2);
+}
+
+TEST_F(scm_rights, triangle)
+{
+ create_sockets(6);
+
+ send_fd(0, 1);
+ send_fd(1, 2);
+ send_fd(2, 0);
+
+ send_fd(3, 4);
+ send_fd(4, 5);
+ send_fd(5, 3);
+
+ close_sockets(6);
+}
+
+TEST_F(scm_rights, cross_edge)
+{
+ create_sockets(8);
+
+ send_fd(0, 1);
+ send_fd(1, 2);
+ send_fd(2, 0);
+ send_fd(1, 3);
+ send_fd(3, 2);
+
+ send_fd(4, 5);
+ send_fd(5, 6);
+ send_fd(6, 4);
+ send_fd(5, 7);
+ send_fd(7, 6);
+
+ close_sockets(8);
+}
+
+TEST_F(scm_rights, backtrack_from_scc)
+{
+ create_sockets(10);
+
+ send_fd(0, 1);
+ send_fd(0, 4);
+ send_fd(1, 2);
+ send_fd(2, 3);
+ send_fd(3, 1);
+
+ send_fd(5, 6);
+ send_fd(5, 9);
+ send_fd(6, 7);
+ send_fd(7, 8);
+ send_fd(8, 6);
+
+ close_sockets(10);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
deleted file mode 100644
index a7c51889ac..0000000000
--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
+++ /dev/null
@@ -1,436 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <string.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <errno.h>
-#include <netinet/tcp.h>
-#include <sys/un.h>
-#include <sys/signal.h>
-#include <sys/poll.h>
-
-static int pipefd[2];
-static int signal_recvd;
-static pid_t producer_id;
-static char sock_name[32];
-
-static void sig_hand(int sn, siginfo_t *si, void *p)
-{
- signal_recvd = sn;
-}
-
-static int set_sig_handler(int signal)
-{
- struct sigaction sa;
-
- sa.sa_sigaction = sig_hand;
- sigemptyset(&sa.sa_mask);
- sa.sa_flags = SA_SIGINFO | SA_RESTART;
-
- return sigaction(signal, &sa, NULL);
-}
-
-static void set_filemode(int fd, int set)
-{
- int flags = fcntl(fd, F_GETFL, 0);
-
- if (set)
- flags &= ~O_NONBLOCK;
- else
- flags |= O_NONBLOCK;
- fcntl(fd, F_SETFL, flags);
-}
-
-static void signal_producer(int fd)
-{
- char cmd;
-
- cmd = 'S';
- write(fd, &cmd, sizeof(cmd));
-}
-
-static void wait_for_signal(int fd)
-{
- char buf[5];
-
- read(fd, buf, 5);
-}
-
-static void die(int status)
-{
- fflush(NULL);
- unlink(sock_name);
- kill(producer_id, SIGTERM);
- exit(status);
-}
-
-int is_sioctatmark(int fd)
-{
- int ans = -1;
-
- if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
-#ifdef DEBUG
- perror("SIOCATMARK Failed");
-#endif
- }
- return ans;
-}
-
-void read_oob(int fd, char *c)
-{
-
- *c = ' ';
- if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
-#ifdef DEBUG
- perror("Reading MSG_OOB Failed");
-#endif
- }
-}
-
-int read_data(int pfd, char *buf, int size)
-{
- int len = 0;
-
- memset(buf, size, '0');
- len = read(pfd, buf, size);
-#ifdef DEBUG
- if (len < 0)
- perror("read failed");
-#endif
- return len;
-}
-
-static void wait_for_data(int pfd, int event)
-{
- struct pollfd pfds[1];
-
- pfds[0].fd = pfd;
- pfds[0].events = event;
- poll(pfds, 1, -1);
-}
-
-void producer(struct sockaddr_un *consumer_addr)
-{
- int cfd;
- char buf[64];
- int i;
-
- memset(buf, 'x', sizeof(buf));
- cfd = socket(AF_UNIX, SOCK_STREAM, 0);
-
- wait_for_signal(pipefd[0]);
- if (connect(cfd, (struct sockaddr *)consumer_addr,
- sizeof(*consumer_addr)) != 0) {
- perror("Connect failed");
- kill(0, SIGTERM);
- exit(1);
- }
-
- for (i = 0; i < 2; i++) {
- /* Test 1: Test for SIGURG and OOB */
- wait_for_signal(pipefd[0]);
- memset(buf, 'x', sizeof(buf));
- buf[63] = '@';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- wait_for_signal(pipefd[0]);
-
- /* Test 2: Test for OOB being overwitten */
- memset(buf, 'x', sizeof(buf));
- buf[63] = '%';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- memset(buf, 'x', sizeof(buf));
- buf[63] = '#';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- wait_for_signal(pipefd[0]);
-
- /* Test 3: Test for SIOCATMARK */
- memset(buf, 'x', sizeof(buf));
- buf[63] = '@';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- memset(buf, 'x', sizeof(buf));
- buf[63] = '%';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- memset(buf, 'x', sizeof(buf));
- send(cfd, buf, sizeof(buf), 0);
-
- wait_for_signal(pipefd[0]);
-
- /* Test 4: Test for 1byte OOB msg */
- memset(buf, 'x', sizeof(buf));
- buf[0] = '@';
- send(cfd, buf, 1, MSG_OOB);
- }
-}
-
-int
-main(int argc, char **argv)
-{
- int lfd, pfd;
- struct sockaddr_un consumer_addr, paddr;
- socklen_t len = sizeof(consumer_addr);
- char buf[1024];
- int on = 0;
- char oob;
- int atmark;
-
- lfd = socket(AF_UNIX, SOCK_STREAM, 0);
- memset(&consumer_addr, 0, sizeof(consumer_addr));
- consumer_addr.sun_family = AF_UNIX;
- sprintf(sock_name, "unix_oob_%d", getpid());
- unlink(sock_name);
- strcpy(consumer_addr.sun_path, sock_name);
-
- if ((bind(lfd, (struct sockaddr *)&consumer_addr,
- sizeof(consumer_addr))) != 0) {
- perror("socket bind failed");
- exit(1);
- }
-
- pipe(pipefd);
-
- listen(lfd, 1);
-
- producer_id = fork();
- if (producer_id == 0) {
- producer(&consumer_addr);
- exit(0);
- }
-
- set_sig_handler(SIGURG);
- signal_producer(pipefd[1]);
-
- pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
- fcntl(pfd, F_SETOWN, getpid());
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 1:
- * veriyf that SIGURG is
- * delivered, 63 bytes are
- * read, oob is '@', and POLLPRI works.
- */
- wait_for_data(pfd, POLLPRI);
- read_oob(pfd, &oob);
- len = read_data(pfd, buf, 1024);
- if (!signal_recvd || len != 63 || oob != '@') {
- fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 2:
- * Verify that the first OOB is over written by
- * the 2nd one and the first OOB is returned as
- * part of the read, and sigurg is received.
- */
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = 0;
- while (len < 70)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- read_oob(pfd, &oob);
- if (!signal_recvd || len != 127 || oob != '#') {
- fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 3:
- * verify that 2nd oob over writes
- * the first one and read breaks at
- * oob boundary returning 127 bytes
- * and sigurg is received and atmark
- * is set.
- * oob is '%' and second read returns
- * 64 bytes.
- */
- len = 0;
- wait_for_data(pfd, POLLIN | POLLPRI);
- while (len < 150)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- read_oob(pfd, &oob);
-
- if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
- fprintf(stderr,
- "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n",
- signal_recvd, len, oob, atmark);
- die(1);
- }
-
- signal_recvd = 0;
-
- len = read_data(pfd, buf, 1024);
- if (len != 64) {
- fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 4:
- * verify that a single byte
- * oob message is delivered.
- * set non blocking mode and
- * check proper error is
- * returned and sigurg is
- * received and correct
- * oob is read.
- */
-
- set_filemode(pfd, 0);
-
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = read_data(pfd, buf, 1024);
- if ((len == -1) && (errno == 11))
- len = 0;
-
- read_oob(pfd, &oob);
-
- if (!signal_recvd || len != 0 || oob != '@') {
- fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- set_filemode(pfd, 1);
-
- /* Inline Testing */
-
- on = 1;
- if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
- perror("SO_OOBINLINE");
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 1 -- Inline:
- * Check that SIGURG is
- * delivered and 63 bytes are
- * read and oob is '@'
- */
-
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = read_data(pfd, buf, 1024);
-
- if (!signal_recvd || len != 63) {
- fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
- signal_recvd, len);
- die(1);
- }
-
- len = read_data(pfd, buf, 1024);
-
- if (len != 1) {
- fprintf(stderr,
- "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 2 -- Inline:
- * Verify that the first OOB is over written by
- * the 2nd one and read breaks correctly on
- * 2nd OOB boundary with the first OOB returned as
- * part of the read, and sigurg is delivered and
- * siocatmark returns true.
- * next read returns one byte, the oob byte
- * and siocatmark returns false.
- */
- len = 0;
- wait_for_data(pfd, POLLIN | POLLPRI);
- while (len < 70)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (len != 127 || atmark != 1 || !signal_recvd) {
- fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
- len, atmark);
- die(1);
- }
-
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (len != 1 || buf[0] != '#' || atmark == 1) {
- fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
- len, buf[0], atmark);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 3 -- Inline:
- * verify that 2nd oob over writes
- * the first one and read breaks at
- * oob boundary returning 127 bytes
- * and sigurg is received and siocatmark
- * is true after the read.
- * subsequent read returns 65 bytes
- * because of oob which should be '%'.
- */
- len = 0;
- wait_for_data(pfd, POLLIN | POLLPRI);
- while (len < 126)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (!signal_recvd || len != 127 || !atmark) {
- fprintf(stderr,
- "Test 3 Inline failed, sigurg %d len %d data %c\n",
- signal_recvd, len, buf[0]);
- die(1);
- }
-
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (len != 65 || buf[0] != '%' || atmark != 0) {
- fprintf(stderr,
- "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
- len, buf[0], atmark);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 4 -- Inline:
- * verify that a single
- * byte oob message is delivered
- * and read returns one byte, the oob
- * byte and sigurg is received
- */
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = read_data(pfd, buf, 1024);
- if (!signal_recvd || len != 1 || buf[0] != '@') {
- fprintf(stderr,
- "Test 4 Inline failed, signal %d len %d data %c\n",
- signal_recvd, len, buf[0]);
- die(1);
- }
- die(0);
-}
diff --git a/tools/testing/selftests/net/bpf.mk b/tools/testing/selftests/net/bpf.mk
new file mode 100644
index 0000000000..a4f6755dd8
--- /dev/null
+++ b/tools/testing/selftests/net/bpf.mk
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+# Rules to generate bpf objs
+CLANG ?= clang
+SCRATCH_DIR := $(OUTPUT)/tools
+BUILD_DIR := $(SCRATCH_DIR)/build
+BPFDIR := $(top_srcdir)/tools/lib/bpf
+APIDIR := $(top_srcdir)/tools/include/uapi
+
+CCINCLUDE += -I$(selfdir)/bpf
+CCINCLUDE += -I$(top_srcdir)/usr/include/
+CCINCLUDE += -I$(SCRATCH_DIR)/include
+
+BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+
+MAKE_DIRS := $(BUILD_DIR)/libbpf
+$(MAKE_DIRS):
+ $(call msg,MKDIR,,$@)
+ $(Q)mkdir -p $@
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+endef
+
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
+
+BPF_PROG_OBJS := $(patsubst %.c,$(OUTPUT)/%.o,$(wildcard *.bpf.c))
+
+$(BPF_PROG_OBJS): $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS)
+ $(call msg,BPF_PROG,,$@)
+ $(Q)$(CLANG) -O2 -g --target=bpf $(CCINCLUDE) $(CLANG_SYS_INCLUDES) \
+ -c $< -o $@
+
+$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
+ $(APIDIR)/linux/bpf.h \
+ | $(BUILD_DIR)/libbpf
+ $(call msg,MAKE,,$@)
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
+ EXTRA_CFLAGS='-g -O0' \
+ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
+
+EXTRA_CLEAN += $(SCRATCH_DIR)
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/net/bpf_offload.py
index 6157f884d0..3efe44f6e9 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/net/bpf_offload.py
@@ -29,6 +29,9 @@ import subprocess
import time
import traceback
+from lib.py import NetdevSim, NetdevSimDev
+
+
logfile = None
log_level = 1
skip_extack = False
@@ -145,8 +148,10 @@ def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False):
if JSON:
params += "%s " % (flags["json"])
- if ns != "":
+ if ns:
ns = "ip netns exec %s " % (ns)
+ elif ns is None:
+ ns = ""
if include_stderr:
ret, stdout, stderr = cmd(ns + name + " " + params + args,
@@ -201,11 +206,11 @@ def bpftool_prog_list_wait(expected=0, n_retry=20):
time.sleep(0.05)
raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
-def bpftool_map_list_wait(expected=0, n_retry=20):
+def bpftool_map_list_wait(expected=0, n_retry=20, ns=""):
for i in range(n_retry):
- nmaps = len(bpftool_map_list())
- if nmaps == expected:
- return
+ maps = bpftool_map_list(ns=ns)
+ if len(maps) == expected:
+ return maps
time.sleep(0.05)
raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
@@ -237,7 +242,7 @@ def tc(args, JSON=True, ns="", fail=True, include_stderr=False):
def ethtool(dev, opt, args, fail=True):
return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
-def bpf_obj(name, sec=".text", path=bpf_test_dir,):
+def bpf_obj(name, sec="xdp", path=bpf_test_dir,):
return "obj %s sec %s" % (os.path.join(path, name), sec)
def bpf_pinned(name):
@@ -334,72 +339,16 @@ class DebugfsDir:
return dfs
-class NetdevSimDev:
+class BpfNetdevSimDev(NetdevSimDev):
"""
Class for netdevsim bus device and its attributes.
"""
- @staticmethod
- def ctrl_write(path, val):
- fullpath = os.path.join("/sys/bus/netdevsim/", path)
- try:
- with open(fullpath, "w") as f:
- f.write(val)
- except OSError as e:
- log("WRITE %s: %r" % (fullpath, val), -e.errno)
- raise e
- log("WRITE %s: %r" % (fullpath, val), 0)
-
- def __init__(self, port_count=1):
- addr = 0
- while True:
- try:
- self.ctrl_write("new_device", "%u %u" % (addr, port_count))
- except OSError as e:
- if e.errno == errno.ENOSPC:
- addr += 1
- continue
- raise e
- break
- self.addr = addr
-
- # As probe of netdevsim device might happen from a workqueue,
- # so wait here until all netdevs appear.
- self.wait_for_netdevs(port_count)
-
- ret, out = cmd("udevadm settle", fail=False)
- if ret:
- raise Exception("udevadm settle failed")
- ifnames = self.get_ifnames()
-
+ def __init__(self, port_count=1, ns=None):
+ super().__init__(port_count, ns=ns)
devs.append(self)
- self.dfs_dir = "/sys/kernel/debug/netdevsim/netdevsim%u/" % addr
-
- self.nsims = []
- for port_index in range(port_count):
- self.nsims.append(NetdevSim(self, port_index, ifnames[port_index]))
-
- def get_ifnames(self):
- ifnames = []
- listdir = os.listdir("/sys/bus/netdevsim/devices/netdevsim%u/net/" % self.addr)
- for ifname in listdir:
- ifnames.append(ifname)
- ifnames.sort()
- return ifnames
-
- def wait_for_netdevs(self, port_count):
- timeout = 5
- timeout_start = time.time()
-
- while True:
- try:
- ifnames = self.get_ifnames()
- except FileNotFoundError as e:
- ifnames = []
- if len(ifnames) == port_count:
- break
- if time.time() < timeout_start + timeout:
- continue
- raise Exception("netdevices did not appear within timeout")
+
+ def _make_port(self, port_index, ifname):
+ return BpfNetdevSim(self, port_index, ifname, self.ns)
def dfs_num_bound_progs(self):
path = os.path.join(self.dfs_dir, "bpf_bound_progs")
@@ -415,33 +364,20 @@ class NetdevSimDev:
return progs
def remove(self):
- self.ctrl_write("del_device", "%u" % (self.addr, ))
+ super().remove()
devs.remove(self)
- def remove_nsim(self, nsim):
- self.nsims.remove(nsim)
- self.ctrl_write("devices/netdevsim%u/del_port" % (self.addr, ),
- "%u" % (nsim.port_index, ))
-class NetdevSim:
+class BpfNetdevSim(NetdevSim):
"""
Class for netdevsim netdevice and its attributes.
"""
- def __init__(self, nsimdev, port_index, ifname):
- # In case udev renamed the netdev to according to new schema,
- # check if the name matches the port_index.
- nsimnamere = re.compile("eni\d+np(\d+)")
- match = nsimnamere.match(ifname)
- if match and int(match.groups()[0]) != port_index + 1:
- raise Exception("netdevice name mismatches the expected one")
-
- self.nsimdev = nsimdev
- self.port_index = port_index
- self.ns = ""
+ def __init__(self, nsimdev, port_index, ifname, ns=None):
+ super().__init__(nsimdev, port_index, ifname, ns=ns)
+
self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index)
self.dfs_refresh()
- _, [self.dev] = ip("link show dev %s" % ifname)
def __getitem__(self, key):
return self.dev[key]
@@ -468,7 +404,7 @@ class NetdevSim:
raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs))
def set_ns(self, ns):
- name = "1" if ns == "" else ns
+ name = ns if ns else "1"
ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns)
self.ns = ns
@@ -605,7 +541,7 @@ def pin_prog(file_name, idx=0):
return file_name, bpf_pinned(file_name)
def pin_map(file_name, idx=0, expected=1):
- maps = bpftool_map_list(expected=expected)
+ maps = bpftool_map_list_wait(expected=expected)
m = maps[idx]
bpftool("map pin id %d %s" % (m["id"], file_name))
files.append(file_name)
@@ -618,7 +554,7 @@ def check_dev_info_removed(prog_file=None, map_file=None):
ret, err = bpftool("prog show pin %s" % (prog_file), fail=False)
fail(ret != 0, "failed to show prog with removed device")
- bpftool_map_list(expected=0)
+ bpftool_map_list_wait(expected=0)
ret, err = bpftool("map show pin %s" % (map_file), fail=False)
fail(ret == 0, "Showing map with removed device did not fail")
fail(err["error"].find("No such device") == -1,
@@ -642,7 +578,7 @@ def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False):
else:
fail("ifname" in dev.keys(), "Ifname is reported for other ns")
- maps = bpftool_map_list(expected=2, ns=ns)
+ maps = bpftool_map_list_wait(expected=2, ns=ns)
for m in maps:
fail("dev" not in m.keys(), "Device parameters not reported")
fail(dev != m["dev"], "Map's device different than program's")
@@ -744,7 +680,7 @@ def test_multi_prog(simdev, sim, obj, modename, modeid):
start_test("Test multi-attachment XDP - device remove...")
simdev.remove()
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
sim.set_ethtool_tc_offloads(True)
return [simdev, sim]
@@ -809,13 +745,13 @@ try:
bytecode = bpf_bytecode("1,6 0 0 4294967295,")
start_test("Test destruction of generic XDP...")
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
sim.set_xdp(obj, "generic")
simdev.remove()
bpftool_prog_list_wait(expected=0)
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
sim.tc_add_ingress()
@@ -967,7 +903,7 @@ try:
simdev.remove()
bpftool_prog_list_wait(expected=0)
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
sim.set_ethtool_tc_offloads(True)
@@ -976,7 +912,7 @@ try:
simdev.remove()
bpftool_prog_list_wait(expected=0)
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
sim.set_ethtool_tc_offloads(True)
@@ -1080,7 +1016,7 @@ try:
bpftool_prog_list_wait(expected=0)
start_test("Test attempt to use a program for a wrong device...")
- simdev2 = NetdevSimDev()
+ simdev2 = BpfNetdevSimDev()
sim2, = simdev2.nsims
sim2.set_xdp(obj, "offload")
pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
@@ -1169,7 +1105,7 @@ try:
clean_up()
bpftool_prog_list_wait(expected=0)
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
map_obj = bpf_obj("sample_map_ret0.bpf.o")
start_test("Test loading program with maps...")
@@ -1201,12 +1137,12 @@ try:
clean_up()
bpftool_prog_list_wait(expected=0)
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
start_test("Test map update (no flags)...")
sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
- maps = bpftool_map_list(expected=2)
+ maps = bpftool_map_list_wait(expected=2)
array = maps[0] if maps[0]["type"] == "array" else maps[1]
htab = maps[0] if maps[0]["type"] == "hash" else maps[1]
for m in maps:
@@ -1285,14 +1221,14 @@ try:
bpftool_map_list_wait(expected=0)
simdev.remove()
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
simdev.remove()
bpftool_map_list_wait(expected=0)
start_test("Test map creation fail path...")
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
sim.dfs["bpf_map_accept"] = "N"
ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False)
@@ -1302,9 +1238,9 @@ try:
simdev.remove()
start_test("Test multi-dev ASIC program reuse...")
- simdevA = NetdevSimDev()
+ simdevA = BpfNetdevSimDev()
simA, = simdevA.nsims
- simdevB = NetdevSimDev(3)
+ simdevB = BpfNetdevSimDev(3)
simB1, simB2, simB3 = simdevB.nsims
sims = (simA, simB1, simB2, simB3)
simB = (simB1, simB2, simB3)
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 161db24e3c..876c2db02a 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -260,15 +260,8 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit);
if (opt.txtime.ena) {
- struct sock_txtime so_txtime = {
- .clockid = CLOCK_MONOTONIC,
- };
__u64 txtime;
- if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
- &so_txtime, sizeof(so_txtime)))
- error(ERN_SOCKOPT, errno, "setsockopt TXTIME");
-
txtime = time_start_mono.tv_sec * (1000ULL * 1000 * 1000) +
time_start_mono.tv_nsec +
opt.txtime.delay * 1000;
@@ -284,13 +277,6 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
memcpy(CMSG_DATA(cmsg), &txtime, sizeof(txtime));
}
if (opt.ts.ena) {
- __u32 val = SOF_TIMESTAMPING_SOFTWARE |
- SOF_TIMESTAMPING_OPT_TSONLY;
-
- if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
- &val, sizeof(val)))
- error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING");
-
cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
cmsg_len += CMSG_SPACE(sizeof(__u32));
if (cbuf_sz < cmsg_len)
@@ -426,6 +412,24 @@ static void ca_set_sockopts(int fd)
setsockopt(fd, SOL_SOCKET, SO_PRIORITY,
&opt.sockopt.priority, sizeof(opt.sockopt.priority)))
error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY");
+
+ if (opt.txtime.ena) {
+ struct sock_txtime so_txtime = {
+ .clockid = CLOCK_MONOTONIC,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
+ &so_txtime, sizeof(so_txtime)))
+ error(ERN_SOCKOPT, errno, "setsockopt TXTIME");
+ }
+ if (opt.ts.ena) {
+ __u32 val = SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_TSONLY;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
+ &val, sizeof(val)))
+ error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING");
+ }
}
int main(int argc, char *argv[])
diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh
index af85267ad1..1d7e756644 100755
--- a/tools/testing/selftests/net/cmsg_time.sh
+++ b/tools/testing/selftests/net/cmsg_time.sh
@@ -66,10 +66,13 @@ for i in "-4 $TGT4" "-6 $TGT6"; do
awk '/SND/ { if ($3 > 1000) print "OK"; }')
check_result $? "$ts" "OK" "$prot - TXTIME abs"
- ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 |
+ [ "$KSFT_MACHINE_SLOW" = yes ] && delay=8000 || delay=1000
+
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d $delay |
awk '/SND/ {snd=$3}
/SCHED/ {sch=$3}
- END { if (snd - sch > 500) print "OK"; }')
+ END { if (snd - sch > '$((delay/2))') print "OK";
+ else print snd, "-", sch, "<", '$((delay/2))'; }')
check_result $? "$ts" "OK" "$prot - TXTIME rel"
done
done
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 04de7a6ba6..d4891f7a2b 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -101,3 +101,5 @@ CONFIG_NETFILTER_XT_MATCH_POLICY=m
CONFIG_CRYPTO_ARIA=y
CONFIG_XFRM_INTERFACE=m
CONFIG_XFRM_USER=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
diff --git a/tools/testing/selftests/net/epoll_busy_poll.c b/tools/testing/selftests/net/epoll_busy_poll.c
new file mode 100644
index 0000000000..16e457c2f8
--- /dev/null
+++ b/tools/testing/selftests/net/epoll_busy_poll.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* Basic per-epoll context busy poll test.
+ *
+ * Only tests the ioctls, but should be expanded to test two connected hosts in
+ * the future
+ */
+
+#define _GNU_SOURCE
+
+#include <error.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/capability.h>
+
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include "../kselftest_harness.h"
+
+/* if the headers haven't been updated, we need to define some things */
+#if !defined(EPOLL_IOC_TYPE)
+struct epoll_params {
+ uint32_t busy_poll_usecs;
+ uint16_t busy_poll_budget;
+ uint8_t prefer_busy_poll;
+
+ /* pad the struct to a multiple of 64bits */
+ uint8_t __pad;
+};
+
+#define EPOLL_IOC_TYPE 0x8A
+#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
+#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
+#endif
+
+FIXTURE(invalid_fd)
+{
+ int invalid_fd;
+ struct epoll_params params;
+};
+
+FIXTURE_SETUP(invalid_fd)
+{
+ int ret;
+
+ ret = socket(AF_UNIX, SOCK_DGRAM, 0);
+ EXPECT_NE(-1, ret)
+ TH_LOG("error creating unix socket");
+
+ self->invalid_fd = ret;
+}
+
+FIXTURE_TEARDOWN(invalid_fd)
+{
+ int ret;
+
+ ret = close(self->invalid_fd);
+ EXPECT_EQ(0, ret);
+}
+
+TEST_F(invalid_fd, test_invalid_fd)
+{
+ int ret;
+
+ ret = ioctl(self->invalid_fd, EPIOCGPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCGPARAMS on invalid epoll FD should error");
+
+ EXPECT_EQ(ENOTTY, errno)
+ TH_LOG("EPIOCGPARAMS on invalid epoll FD should set errno to ENOTTY");
+
+ memset(&self->params, 0, sizeof(struct epoll_params));
+
+ ret = ioctl(self->invalid_fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS on invalid epoll FD should error");
+
+ EXPECT_EQ(ENOTTY, errno)
+ TH_LOG("EPIOCSPARAMS on invalid epoll FD should set errno to ENOTTY");
+}
+
+FIXTURE(epoll_busy_poll)
+{
+ int fd;
+ struct epoll_params params;
+ struct epoll_params *invalid_params;
+ cap_t caps;
+};
+
+FIXTURE_SETUP(epoll_busy_poll)
+{
+ int ret;
+
+ ret = epoll_create1(0);
+ EXPECT_NE(-1, ret)
+ TH_LOG("epoll_create1 failed?");
+
+ self->fd = ret;
+
+ self->caps = cap_get_proc();
+ EXPECT_NE(NULL, self->caps);
+}
+
+FIXTURE_TEARDOWN(epoll_busy_poll)
+{
+ int ret;
+
+ ret = close(self->fd);
+ EXPECT_EQ(0, ret);
+
+ ret = cap_free(self->caps);
+ EXPECT_NE(-1, ret)
+ TH_LOG("unable to free capabilities");
+}
+
+TEST_F(epoll_busy_poll, test_get_params)
+{
+ /* begin by getting the epoll params from the kernel
+ *
+ * the default should be default and all fields should be zero'd by the
+ * kernel, so set params fields to garbage to test this.
+ */
+ int ret = 0;
+
+ self->params.busy_poll_usecs = 0xff;
+ self->params.busy_poll_budget = 0xff;
+ self->params.prefer_busy_poll = 1;
+ self->params.__pad = 0xf;
+
+ ret = ioctl(self->fd, EPIOCGPARAMS, &self->params);
+ EXPECT_EQ(0, ret)
+ TH_LOG("ioctl EPIOCGPARAMS should succeed");
+
+ EXPECT_EQ(0, self->params.busy_poll_usecs)
+ TH_LOG("EPIOCGPARAMS busy_poll_usecs should have been 0");
+
+ EXPECT_EQ(0, self->params.busy_poll_budget)
+ TH_LOG("EPIOCGPARAMS busy_poll_budget should have been 0");
+
+ EXPECT_EQ(0, self->params.prefer_busy_poll)
+ TH_LOG("EPIOCGPARAMS prefer_busy_poll should have been 0");
+
+ EXPECT_EQ(0, self->params.__pad)
+ TH_LOG("EPIOCGPARAMS __pad should have been 0");
+
+ self->invalid_params = (struct epoll_params *)0xdeadbeef;
+ ret = ioctl(self->fd, EPIOCGPARAMS, self->invalid_params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCGPARAMS should error with invalid params");
+
+ EXPECT_EQ(EFAULT, errno)
+ TH_LOG("EPIOCGPARAMS with invalid params should set errno to EFAULT");
+}
+
+TEST_F(epoll_busy_poll, test_set_invalid)
+{
+ int ret;
+
+ memset(&self->params, 0, sizeof(struct epoll_params));
+
+ self->params.__pad = 1;
+
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS non-zero __pad should error");
+
+ EXPECT_EQ(EINVAL, errno)
+ TH_LOG("EPIOCSPARAMS non-zero __pad errno should be EINVAL");
+
+ self->params.__pad = 0;
+ self->params.busy_poll_usecs = (uint32_t)INT_MAX + 1;
+
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS should error busy_poll_usecs > S32_MAX");
+
+ EXPECT_EQ(EINVAL, errno)
+ TH_LOG("EPIOCSPARAMS busy_poll_usecs > S32_MAX errno should be EINVAL");
+
+ self->params.__pad = 0;
+ self->params.busy_poll_usecs = 32;
+ self->params.prefer_busy_poll = 2;
+
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS should error prefer_busy_poll > 1");
+
+ EXPECT_EQ(EINVAL, errno)
+ TH_LOG("EPIOCSPARAMS prefer_busy_poll > 1 errno should be EINVAL");
+
+ self->params.__pad = 0;
+ self->params.busy_poll_usecs = 32;
+ self->params.prefer_busy_poll = 1;
+
+ /* set budget well above kernel's NAPI_POLL_WEIGHT of 64 */
+ self->params.busy_poll_budget = UINT16_MAX;
+
+ /* test harness should run with CAP_NET_ADMIN, but let's make sure */
+ cap_flag_value_t tmp;
+
+ ret = cap_get_flag(self->caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &tmp);
+ EXPECT_EQ(0, ret)
+ TH_LOG("unable to get CAP_NET_ADMIN cap flag");
+
+ EXPECT_EQ(CAP_SET, tmp)
+ TH_LOG("expecting CAP_NET_ADMIN to be set for the test harness");
+
+ /* at this point we know CAP_NET_ADMIN is available, so setting the
+ * params with a busy_poll_budget > NAPI_POLL_WEIGHT should succeed
+ */
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(0, ret)
+ TH_LOG("EPIOCSPARAMS should allow busy_poll_budget > NAPI_POLL_WEIGHT");
+
+ /* remove CAP_NET_ADMIN from our effective set */
+ cap_value_t net_admin[] = { CAP_NET_ADMIN };
+
+ ret = cap_set_flag(self->caps, CAP_EFFECTIVE, 1, net_admin, CAP_CLEAR);
+ EXPECT_EQ(0, ret)
+ TH_LOG("couldn't clear CAP_NET_ADMIN");
+
+ ret = cap_set_proc(self->caps);
+ EXPECT_EQ(0, ret)
+ TH_LOG("cap_set_proc should drop CAP_NET_ADMIN");
+
+ /* this is now expected to fail */
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS should error busy_poll_budget > NAPI_POLL_WEIGHT");
+
+ EXPECT_EQ(EPERM, errno)
+ TH_LOG("EPIOCSPARAMS errno should be EPERM busy_poll_budget > NAPI_POLL_WEIGHT");
+
+ /* restore CAP_NET_ADMIN to our effective set */
+ ret = cap_set_flag(self->caps, CAP_EFFECTIVE, 1, net_admin, CAP_SET);
+ EXPECT_EQ(0, ret)
+ TH_LOG("couldn't restore CAP_NET_ADMIN");
+
+ ret = cap_set_proc(self->caps);
+ EXPECT_EQ(0, ret)
+ TH_LOG("cap_set_proc should set CAP_NET_ADMIN");
+
+ self->invalid_params = (struct epoll_params *)0xdeadbeef;
+ ret = ioctl(self->fd, EPIOCSPARAMS, self->invalid_params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS should error when epoll_params is invalid");
+
+ EXPECT_EQ(EFAULT, errno)
+ TH_LOG("EPIOCSPARAMS should set errno to EFAULT when epoll_params is invalid");
+}
+
+TEST_F(epoll_busy_poll, test_set_and_get_valid)
+{
+ int ret;
+
+ memset(&self->params, 0, sizeof(struct epoll_params));
+
+ self->params.busy_poll_usecs = 25;
+ self->params.busy_poll_budget = 16;
+ self->params.prefer_busy_poll = 1;
+
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(0, ret)
+ TH_LOG("EPIOCSPARAMS with valid params should not error");
+
+ /* check that the kernel returns the same values back */
+
+ memset(&self->params, 0, sizeof(struct epoll_params));
+
+ ret = ioctl(self->fd, EPIOCGPARAMS, &self->params);
+
+ EXPECT_EQ(0, ret)
+ TH_LOG("EPIOCGPARAMS should not error");
+
+ EXPECT_EQ(25, self->params.busy_poll_usecs)
+ TH_LOG("params.busy_poll_usecs incorrect");
+
+ EXPECT_EQ(16, self->params.busy_poll_budget)
+ TH_LOG("params.busy_poll_budget incorrect");
+
+ EXPECT_EQ(1, self->params.prefer_busy_poll)
+ TH_LOG("params.prefer_busy_poll incorrect");
+
+ EXPECT_EQ(0, self->params.__pad)
+ TH_LOG("params.__pad was not 0");
+}
+
+TEST_F(epoll_busy_poll, test_invalid_ioctl)
+{
+ int invalid_ioctl = EPIOCGPARAMS + 10;
+ int ret;
+
+ ret = ioctl(self->fd, invalid_ioctl, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("invalid ioctl should return error");
+
+ EXPECT_EQ(EINVAL, errno)
+ TH_LOG("invalid ioctl should set errno to EINVAL");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 51157a5559..7c01f58a20 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -9,6 +9,7 @@ PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
RTABLE=100
RTABLE_PEER=101
+RTABLE_VRF=102
GW_IP4=192.51.100.2
SRC_IP=192.51.100.3
GW_IP6=2001:db8:1::2
@@ -17,7 +18,14 @@ SRC_IP6=2001:db8:1::3
DEV_ADDR=192.51.100.1
DEV_ADDR6=2001:db8:1::1
DEV=dummy0
-TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect"
+TESTS="
+ fib_rule6
+ fib_rule4
+ fib_rule6_connect
+ fib_rule4_connect
+ fib_rule6_vrf
+ fib_rule4_vrf
+"
SELFTEST_PATH=""
@@ -27,13 +35,18 @@ log_test()
local expected=$2
local msg="$3"
+ $IP rule show | grep -q l3mdev
+ if [ $? -eq 0 ]; then
+ msg="$msg (VRF)"
+ fi
+
if [ ${rc} -eq ${expected} ]; then
nsuccess=$((nsuccess+1))
- printf "\n TEST: %-50s [ OK ]\n" "${msg}"
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
else
ret=1
nfail=$((nfail+1))
- printf "\n TEST: %-50s [FAIL]\n" "${msg}"
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
@@ -130,6 +143,17 @@ cleanup_peer()
ip netns del $peerns
}
+setup_vrf()
+{
+ $IP link add name vrf0 up type vrf table $RTABLE_VRF
+ $IP link set dev $DEV master vrf0
+}
+
+cleanup_vrf()
+{
+ $IP link del dev vrf0
+}
+
fib_check_iproute_support()
{
ip rule help 2>&1 | grep -q $1
@@ -248,6 +272,13 @@ fib_rule6_test()
fi
}
+fib_rule6_vrf_test()
+{
+ setup_vrf
+ fib_rule6_test
+ cleanup_vrf
+}
+
# Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly
# taken into account when connecting the socket and when sending packets.
fib_rule6_connect_test()
@@ -385,6 +416,13 @@ fib_rule4_test()
fi
}
+fib_rule4_vrf_test()
+{
+ setup_vrf
+ fib_rule4_test
+ cleanup_vrf
+}
+
# Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken
# into account when connecting the socket and when sending packets.
fib_rule4_connect_test()
@@ -467,6 +505,8 @@ do
fib_rule4_test|fib_rule4) fib_rule4_test;;
fib_rule6_connect_test|fib_rule6_connect) fib_rule6_connect_test;;
fib_rule4_connect_test|fib_rule4_connect) fib_rule4_connect_test;;
+ fib_rule6_vrf_test|fib_rule6_vrf) fib_rule6_vrf_test;;
+ fib_rule4_vrf_test|fib_rule4_vrf) fib_rule4_vrf_test;;
help) echo "Test names: $TESTS"; exit 0;;
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 73895711cd..5f3c28fc86 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -1737,53 +1737,53 @@ ipv4_rt_dsfield()
# DSCP 0x10 should match the specific route, no matter the ECN bits
$IP route get fibmatch 172.16.102.1 dsfield 0x10 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:Not-ECT"
$IP route get fibmatch 172.16.102.1 dsfield 0x11 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:ECT(1)"
$IP route get fibmatch 172.16.102.1 dsfield 0x12 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:ECT(0)"
$IP route get fibmatch 172.16.102.1 dsfield 0x13 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:CE"
# Unknown DSCP should match the generic route, no matter the ECN bits
$IP route get fibmatch 172.16.102.1 dsfield 0x14 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:Not-ECT"
$IP route get fibmatch 172.16.102.1 dsfield 0x15 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(1)"
$IP route get fibmatch 172.16.102.1 dsfield 0x16 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(0)"
$IP route get fibmatch 172.16.102.1 dsfield 0x17 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:CE"
# Null DSCP should match the generic route, no matter the ECN bits
$IP route get fibmatch 172.16.102.1 dsfield 0x00 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:Not-ECT"
$IP route get fibmatch 172.16.102.1 dsfield 0x01 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(1)"
$IP route get fibmatch 172.16.102.1 dsfield 0x02 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(0)"
$IP route get fibmatch 172.16.102.1 dsfield 0x03 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:CE"
}
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 535865b3d1..fa7b59ff40 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -15,18 +15,12 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
bridge_vlan_unaware.sh \
custom_multipath_hash.sh \
dual_vxlan_bridge.sh \
- ethtool_extended_state.sh \
- ethtool_mm.sh \
- ethtool_rmon.sh \
- ethtool.sh \
gre_custom_multipath_hash.sh \
gre_inner_v4_multipath.sh \
gre_inner_v6_multipath.sh \
gre_multipath_nh_res.sh \
gre_multipath_nh.sh \
gre_multipath.sh \
- hw_stats_l3.sh \
- hw_stats_l3_gre.sh \
ip6_forward_instats_vrf.sh \
ip6gre_custom_multipath_hash.sh \
ip6gre_flat_key.sh \
@@ -43,8 +37,8 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
ipip_hier_gre_key.sh \
ipip_hier_gre_keys.sh \
ipip_hier_gre.sh \
+ lib_sh_test.sh \
local_termination.sh \
- loopback.sh \
mirror_gre_bound.sh \
mirror_gre_bridge_1d.sh \
mirror_gre_bridge_1d_vlan.sh \
@@ -113,7 +107,6 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
vxlan_symmetric.sh
TEST_FILES := devlink_lib.sh \
- ethtool_lib.sh \
fib_offload_lib.sh \
forwarding.config.sample \
ip6gre_lib.sh \
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
index b8a2af8fcf..7fdb6a9ca5 100644
--- a/tools/testing/selftests/net/forwarding/README
+++ b/tools/testing/selftests/net/forwarding/README
@@ -56,3 +56,36 @@ o Checks shall be added to lib.sh for any external dependencies.
o Code shall be checked using ShellCheck [1] prior to submission.
1. https://www.shellcheck.net/
+
+Customization
+=============
+
+The forwarding selftests framework uses a number of variables that
+influence its behavior and tools it invokes, and how it invokes them, in
+various ways. A number of these variables can be overridden. The way these
+overridable variables are specified is typically one of the following two
+syntaxes:
+
+ : "${VARIABLE:=default_value}"
+ VARIABLE=${VARIABLE:=default_value}
+
+Any of these variables can be overridden. Notably net/forwarding/lib.sh and
+net/lib.sh contain a number of overridable variables.
+
+One way of overriding these variables is through the environment:
+
+ PAUSE_ON_FAIL=yes ./some_test.sh
+
+The variable NETIFS is special. Since it is an array variable, there is no
+way to pass it through the environment. Its value can instead be given as
+consecutive arguments to the selftest:
+
+ ./some_test.sh swp{1..8}
+
+A way to customize variables in a persistent fashion is to create a file
+named forwarding.config in this directory. lib.sh sources the file if
+present, so it can contain any shell code. Typically it will contain
+assignments of variables whose value should be overridden.
+
+forwarding.config.sample is available in the directory as an example of
+how forwarding.config might look.
diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
index 0760a34b71..a21b7085da 100755
--- a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
@@ -178,6 +178,22 @@ fdb_del()
check_err $? "Failed to remove a FDB entry of type ${type}"
}
+check_fdb_n_learned_support()
+{
+ if ! ip link help bridge 2>&1 | grep -q "fdb_max_learned"; then
+ echo "SKIP: iproute2 too old, missing bridge max learned support"
+ exit $ksft_skip
+ fi
+
+ ip link add dev br0 type bridge
+ local learned=$(fdb_get_n_learned)
+ ip link del dev br0
+ if [ "$learned" == "null" ]; then
+ echo "SKIP: kernel too old; bridge fdb_n_learned feature not supported."
+ exit $ksft_skip
+ fi
+}
+
check_accounting_one_type()
{
local type=$1 is_counted=$2 overrides_learned=$3
@@ -274,6 +290,8 @@ check_limit()
done
}
+check_fdb_n_learned_support
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index f1de525cfa..62a05bca1e 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -122,6 +122,8 @@ devlink_reload()
still_pending=$(devlink resource show "$DEVLINK_DEV" | \
grep -c "size_new")
check_err $still_pending "Failed reload - There are still unset sizes"
+
+ udevadm settle
}
declare -A DEVLINK_ORIG
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index 1fc4f0242f..f1ca95e79a 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -3,51 +3,28 @@
##############################################################################
# Topology description. p1 looped back to p2, p3 to p4 and so on.
-declare -A NETIFS
-NETIFS[p1]=veth0
-NETIFS[p2]=veth1
-NETIFS[p3]=veth2
-NETIFS[p4]=veth3
-NETIFS[p5]=veth4
-NETIFS[p6]=veth5
-NETIFS[p7]=veth6
-NETIFS[p8]=veth7
-NETIFS[p9]=veth8
-NETIFS[p10]=veth9
+NETIFS=(
+ [p1]=veth0
+ [p2]=veth1
+ [p3]=veth2
+ [p4]=veth3
+ [p5]=veth4
+ [p6]=veth5
+ [p7]=veth6
+ [p8]=veth7
+ [p9]=veth8
+ [p10]=veth9
+)
# Port that does not have a cable connected.
NETIF_NO_CABLE=eth8
##############################################################################
-# Defines
+# In addition to the topology-related variables, it is also possible to override
+# in this file other variables that net/lib.sh, net/forwarding/lib.sh or other
+# libraries or selftests use. E.g.:
-# IPv4 ping utility name
-PING=ping
-# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
PING6=ping6
-# Packet generator. Some distributions use 'mz'.
MZ=mausezahn
-# mausezahn delay between transmissions in microseconds.
-MZ_DELAY=0
-# Time to wait after interfaces participating in the test are all UP
WAIT_TIME=5
-# Whether to pause on failure or not.
-PAUSE_ON_FAIL=no
-# Whether to pause on cleanup or not.
-PAUSE_ON_CLEANUP=no
-# Type of network interface to create
-NETIF_TYPE=veth
-# Whether to create virtual interfaces (veth) or not
-NETIF_CREATE=yes
-# Timeout (in seconds) before ping exits regardless of how many packets have
-# been sent or received
-PING_TIMEOUT=5
-# Minimum ageing_time (in centiseconds) supported by hardware
-LOW_AGEING_TIME=1000
-# Flag for tc match, supposed to be skip_sw/skip_hw which means do not process
-# filter by software/hardware
-TC_FLAG=skip_hw
-# IPv6 traceroute utility name.
-TROUTE6=traceroute6
-
diff --git a/tools/testing/selftests/net/forwarding/ipip_lib.sh b/tools/testing/selftests/net/forwarding/ipip_lib.sh
index 30f36a57ba..01e62c4ac9 100644
--- a/tools/testing/selftests/net/forwarding/ipip_lib.sh
+++ b/tools/testing/selftests/net/forwarding/ipip_lib.sh
@@ -141,7 +141,6 @@
# | $h2 + |
# | 192.0.2.18/28 |
# +---------------------------+
-source lib.sh
h1_create()
{
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index e78f11140e..eabbdf00d8 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -2,33 +2,124 @@
# SPDX-License-Identifier: GPL-2.0
##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+
+declare -A NETIFS=(
+ [p1]=veth0
+ [p2]=veth1
+ [p3]=veth2
+ [p4]=veth3
+ [p5]=veth4
+ [p6]=veth5
+ [p7]=veth6
+ [p8]=veth7
+ [p9]=veth8
+ [p10]=veth9
+)
+
+# Port that does not have a cable connected.
+: "${NETIF_NO_CABLE:=eth8}"
+
+##############################################################################
# Defines
-# Can be overridden by the configuration file.
-PING=${PING:=ping}
-PING6=${PING6:=ping6}
-MZ=${MZ:=mausezahn}
-MZ_DELAY=${MZ_DELAY:=0}
-ARPING=${ARPING:=arping}
-TEAMD=${TEAMD:=teamd}
-WAIT_TIME=${WAIT_TIME:=5}
-PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
-PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
-NETIF_TYPE=${NETIF_TYPE:=veth}
-NETIF_CREATE=${NETIF_CREATE:=yes}
-MCD=${MCD:=smcrouted}
-MC_CLI=${MC_CLI:=smcroutectl}
-PING_COUNT=${PING_COUNT:=10}
-PING_TIMEOUT=${PING_TIMEOUT:=5}
-WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
-INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
-LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
-REQUIRE_JQ=${REQUIRE_JQ:=yes}
-REQUIRE_MZ=${REQUIRE_MZ:=yes}
-REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
-STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
-TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
-TROUTE6=${TROUTE6:=traceroute6}
+# Networking utilities.
+: "${PING:=ping}"
+: "${PING6:=ping6}" # Some distros just use ping.
+: "${ARPING:=arping}"
+: "${TROUTE6:=traceroute6}"
+
+# Packet generator.
+: "${MZ:=mausezahn}" # Some distributions use 'mz'.
+: "${MZ_DELAY:=0}"
+
+# Host configuration tools.
+: "${TEAMD:=teamd}"
+: "${MCD:=smcrouted}"
+: "${MC_CLI:=smcroutectl}"
+
+# Constants for netdevice bring-up:
+# Default time in seconds to wait for an interface to come up before giving up
+# and bailing out. Used during initial setup.
+: "${INTERFACE_TIMEOUT:=600}"
+# Like INTERFACE_TIMEOUT, but default for ad-hoc waiting in testing scripts.
+: "${WAIT_TIMEOUT:=20}"
+# Time to wait after interfaces participating in the test are all UP.
+: "${WAIT_TIME:=5}"
+
+# Whether to pause on, respectively, after a failure and before cleanup.
+: "${PAUSE_ON_FAIL:=no}"
+: "${PAUSE_ON_CLEANUP:=no}"
+
+# Whether to create virtual interfaces, and what netdevice type they should be.
+: "${NETIF_CREATE:=yes}"
+: "${NETIF_TYPE:=veth}"
+
+# Constants for ping tests:
+# How many packets should be sent.
+: "${PING_COUNT:=10}"
+# Timeout (in seconds) before ping exits regardless of how many packets have
+# been sent or received
+: "${PING_TIMEOUT:=5}"
+
+# Minimum ageing_time (in centiseconds) supported by hardware
+: "${LOW_AGEING_TIME:=1000}"
+
+# Whether to check for availability of certain tools.
+: "${REQUIRE_JQ:=yes}"
+: "${REQUIRE_MZ:=yes}"
+: "${REQUIRE_MTOOLS:=no}"
+
+# Whether to override MAC addresses on interfaces participating in the test.
+: "${STABLE_MAC_ADDRS:=no}"
+
+# Flags for tcpdump
+: "${TCPDUMP_EXTRA_FLAGS:=}"
+
+# Flags for TC filters.
+: "${TC_FLAG:=skip_hw}"
+
+# Whether the machine is "slow" -- i.e. might be incapable of running tests
+# involving heavy traffic. This might be the case on a debug kernel, a VM, or
+# e.g. a low-power board.
+: "${KSFT_MACHINE_SLOW:=no}"
+
+##############################################################################
+# Find netifs by test-specified driver name
+
+driver_name_get()
+{
+ local dev=$1; shift
+ local driver_path="/sys/class/net/$dev/device/driver"
+
+ if [[ -L $driver_path ]]; then
+ basename `realpath $driver_path`
+ fi
+}
+
+netif_find_driver()
+{
+ local ifnames=`ip -j link show | jq -r ".[].ifname"`
+ local count=0
+
+ for ifname in $ifnames
+ do
+ local driver_name=`driver_name_get $ifname`
+ if [[ ! -z $driver_name && $driver_name == $NETIF_FIND_DRIVER ]]; then
+ count=$((count + 1))
+ NETIFS[p$count]="$ifname"
+ fi
+ done
+}
+
+# Whether to find netdevice according to the driver speficied by the importer
+: "${NETIF_FIND_DRIVER:=}"
+
+if [[ $NETIF_FIND_DRIVER ]]; then
+ unset NETIFS
+ declare -A NETIFS
+ netif_find_driver
+fi
net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
@@ -179,22 +270,23 @@ check_port_mab_support()
fi
}
-skip_on_veth()
+if [[ "$(id -u)" -ne 0 ]]; then
+ echo "SKIP: need root privileges"
+ exit $ksft_skip
+fi
+
+check_driver()
{
- local kind=$(ip -j -d link show dev ${NETIFS[p1]} |
- jq -r '.[].linkinfo.info_kind')
+ local dev=$1; shift
+ local expected=$1; shift
+ local driver_name=`driver_name_get $dev`
- if [[ $kind == veth ]]; then
- echo "SKIP: Test cannot be run with veth pairs"
+ if [[ $driver_name != $expected ]]; then
+ echo "SKIP: expected driver $expected for $dev, got $driver_name instead"
exit $ksft_skip
fi
}
-if [[ "$(id -u)" -ne 0 ]]; then
- echo "SKIP: need root privileges"
- exit $ksft_skip
-fi
-
if [[ "$CHECK_TC" = "yes" ]]; then
check_tc_version
fi
@@ -209,6 +301,21 @@ require_command()
fi
}
+# IPv6 support was added in v3.0
+check_mtools_version()
+{
+ local version="$(msend -v)"
+ local major
+
+ version=${version##msend version }
+ major=$(echo $version | cut -d. -f1)
+
+ if [ $major -lt 3 ]; then
+ echo "SKIP: expected mtools version 3.0, got $version"
+ exit $ksft_skip
+ fi
+}
+
if [[ "$REQUIRE_JQ" = "yes" ]]; then
require_command jq
fi
@@ -216,15 +323,10 @@ if [[ "$REQUIRE_MZ" = "yes" ]]; then
require_command $MZ
fi
if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
- # https://github.com/vladimiroltean/mtools/
- # patched for IPv6 support
+ # https://github.com/troglobit/mtools
require_command msend
require_command mreceive
-fi
-
-if [[ ! -v NUM_NETIFS ]]; then
- echo "SKIP: importer does not define \"NUM_NETIFS\""
- exit $ksft_skip
+ check_mtools_version
fi
##############################################################################
@@ -245,6 +347,23 @@ done
##############################################################################
# Network interfaces configuration
+if [[ ! -v NUM_NETIFS ]]; then
+ echo "SKIP: importer does not define \"NUM_NETIFS\""
+ exit $ksft_skip
+fi
+
+if (( NUM_NETIFS > ${#NETIFS[@]} )); then
+ echo "SKIP: Importer requires $NUM_NETIFS NETIFS, but only ${#NETIFS[@]} are defined (${NETIFS[@]})"
+ exit $ksft_skip
+fi
+
+for i in $(seq ${#NETIFS[@]}); do
+ if [[ ! ${NETIFS[p$i]} ]]; then
+ echo "SKIP: NETIFS[p$i] not given"
+ exit $ksft_skip
+ fi
+done
+
create_netif_veth()
{
local i
@@ -343,13 +462,20 @@ ret_set_ksft_status()
fi
}
+# Whether FAILs should be interpreted as XFAILs. Internal.
+FAIL_TO_XFAIL=
+
check_err()
{
local err=$1
local msg=$2
if ((err)); then
- ret_set_ksft_status $ksft_fail "$msg"
+ if [[ $FAIL_TO_XFAIL = yes ]]; then
+ ret_set_ksft_status $ksft_xfail "$msg"
+ else
+ ret_set_ksft_status $ksft_fail "$msg"
+ fi
fi
}
@@ -374,6 +500,29 @@ check_err_fail()
fi
}
+xfail_on_slow()
+{
+ if [[ $KSFT_MACHINE_SLOW = yes ]]; then
+ FAIL_TO_XFAIL=yes "$@"
+ else
+ "$@"
+ fi
+}
+
+xfail_on_veth()
+{
+ local dev=$1; shift
+ local kind
+
+ kind=$(ip -j -d link show dev $dev |
+ jq -r '.[].linkinfo.info_kind')
+ if [[ $kind = veth ]]; then
+ FAIL_TO_XFAIL=yes "$@"
+ else
+ "$@"
+ fi
+}
+
log_test_result()
{
local test_name=$1; shift
@@ -569,6 +718,19 @@ setup_wait()
sleep $WAIT_TIME
}
+wait_for_dev()
+{
+ local dev=$1; shift
+ local timeout=${1:-$WAIT_TIMEOUT}; shift
+
+ slowwait $timeout ip link show dev $dev &> /dev/null
+ if (( $? )); then
+ check_err 1
+ log_test wait_for_dev "Interface $dev did not appear."
+ exit $EXIT_STATUS
+ fi
+}
+
cmd_jq()
{
local cmd=$1
@@ -1995,6 +2157,8 @@ bail_on_lldpad()
{
local reason1="$1"; shift
local reason2="$1"; shift
+ local caller=${FUNCNAME[1]}
+ local src=${BASH_SOURCE[1]}
if systemctl is-active --quiet lldpad; then
@@ -2015,7 +2179,8 @@ bail_on_lldpad()
an environment variable ALLOW_LLDPAD to a
non-empty string.
EOF
- exit 1
+ log_test_skip $src:$caller
+ exit $EXIT_STATUS
else
return
fi
diff --git a/tools/testing/selftests/net/forwarding/lib_sh_test.sh b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
new file mode 100755
index 0000000000..ff2accccaf
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
@@ -0,0 +1,208 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This tests the operation of lib.sh itself.
+
+ALL_TESTS="
+ test_ret
+ test_exit_status
+"
+NUM_NETIFS=0
+source lib.sh
+
+# Simulated checks.
+
+do_test()
+{
+ local msg=$1; shift
+
+ "$@"
+ check_err $? "$msg"
+}
+
+tpass()
+{
+ do_test "tpass" true
+}
+
+tfail()
+{
+ do_test "tfail" false
+}
+
+txfail()
+{
+ FAIL_TO_XFAIL=yes do_test "txfail" false
+}
+
+# Simulated tests.
+
+pass()
+{
+ RET=0
+ do_test "true" true
+ log_test "true"
+}
+
+fail()
+{
+ RET=0
+ do_test "false" false
+ log_test "false"
+}
+
+xfail()
+{
+ RET=0
+ FAIL_TO_XFAIL=yes do_test "xfalse" false
+ log_test "xfalse"
+}
+
+skip()
+{
+ RET=0
+ log_test_skip "skip"
+}
+
+slow_xfail()
+{
+ RET=0
+ xfail_on_slow do_test "slow_false" false
+ log_test "slow_false"
+}
+
+# lib.sh tests.
+
+ret_tests_run()
+{
+ local t
+
+ RET=0
+ retmsg=
+ for t in "$@"; do
+ $t
+ done
+ echo "$retmsg"
+ return $RET
+}
+
+ret_subtest()
+{
+ local expect_ret=$1; shift
+ local expect_retmsg=$1; shift
+ local -a tests=( "$@" )
+
+ local status_names=(pass fail xfail xpass skip)
+ local ret
+ local out
+
+ RET=0
+
+ # Run this in a subshell, so that our environment is intact.
+ out=$(ret_tests_run "${tests[@]}")
+ ret=$?
+
+ (( ret == expect_ret ))
+ check_err $? "RET=$ret expected $expect_ret"
+
+ [[ $out == $expect_retmsg ]]
+ check_err $? "retmsg=$out expected $expect_retmsg"
+
+ log_test "RET $(echo ${tests[@]}) -> ${status_names[$ret]}"
+}
+
+test_ret()
+{
+ ret_subtest $ksft_pass ""
+
+ ret_subtest $ksft_pass "" tpass
+ ret_subtest $ksft_fail "tfail" tfail
+ ret_subtest $ksft_xfail "txfail" txfail
+
+ ret_subtest $ksft_pass "" tpass tpass
+ ret_subtest $ksft_fail "tfail" tpass tfail
+ ret_subtest $ksft_xfail "txfail" tpass txfail
+
+ ret_subtest $ksft_fail "tfail" tfail tpass
+ ret_subtest $ksft_xfail "txfail" txfail tpass
+
+ ret_subtest $ksft_fail "tfail" tfail tfail
+ ret_subtest $ksft_fail "tfail" tfail txfail
+
+ ret_subtest $ksft_fail "tfail" txfail tfail
+
+ ret_subtest $ksft_xfail "txfail" txfail txfail
+}
+
+exit_status_tests_run()
+{
+ EXIT_STATUS=0
+ tests_run > /dev/null
+ return $EXIT_STATUS
+}
+
+exit_status_subtest()
+{
+ local expect_exit_status=$1; shift
+ local tests=$1; shift
+ local what=$1; shift
+
+ local status_names=(pass fail xfail xpass skip)
+ local exit_status
+ local out
+
+ RET=0
+
+ # Run this in a subshell, so that our environment is intact.
+ out=$(TESTS="$tests" exit_status_tests_run)
+ exit_status=$?
+
+ (( exit_status == expect_exit_status ))
+ check_err $? "EXIT_STATUS=$exit_status, expected $expect_exit_status"
+
+ log_test "EXIT_STATUS $tests$what -> ${status_names[$exit_status]}"
+}
+
+test_exit_status()
+{
+ exit_status_subtest $ksft_pass ":"
+
+ exit_status_subtest $ksft_pass "pass"
+ exit_status_subtest $ksft_fail "fail"
+ exit_status_subtest $ksft_pass "xfail"
+ exit_status_subtest $ksft_skip "skip"
+
+ exit_status_subtest $ksft_pass "pass pass"
+ exit_status_subtest $ksft_fail "pass fail"
+ exit_status_subtest $ksft_pass "pass xfail"
+ exit_status_subtest $ksft_skip "pass skip"
+
+ exit_status_subtest $ksft_fail "fail pass"
+ exit_status_subtest $ksft_pass "xfail pass"
+ exit_status_subtest $ksft_skip "skip pass"
+
+ exit_status_subtest $ksft_fail "fail fail"
+ exit_status_subtest $ksft_fail "fail xfail"
+ exit_status_subtest $ksft_fail "fail skip"
+
+ exit_status_subtest $ksft_fail "xfail fail"
+ exit_status_subtest $ksft_fail "skip fail"
+
+ exit_status_subtest $ksft_pass "xfail xfail"
+ exit_status_subtest $ksft_skip "xfail skip"
+ exit_status_subtest $ksft_skip "skip xfail"
+
+ exit_status_subtest $ksft_skip "skip skip"
+
+ KSFT_MACHINE_SLOW=yes \
+ exit_status_subtest $ksft_pass "slow_xfail" ": slow"
+
+ KSFT_MACHINE_SLOW=no \
+ exit_status_subtest $ksft_fail "slow_xfail" ": fast"
+}
+
+trap pre_cleanup EXIT
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
index c5b0cbc85b..4b364cdf3e 100755
--- a/tools/testing/selftests/net/forwarding/local_termination.sh
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -155,25 +155,30 @@ run_test()
"$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \
true
- check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
- "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
- false
+ xfail_on_veth $h1 \
+ check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
+ "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
+ false
check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \
"$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \
true
- check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, allmulti" \
- "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
- false
+ xfail_on_veth $h1 \
+ check_rcv $rcv_if_name \
+ "Unicast IPv4 to unknown MAC address, allmulti" \
+ "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
+ false
check_rcv $rcv_if_name "Multicast IPv4 to joined group" \
"$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \
true
- check_rcv $rcv_if_name "Multicast IPv4 to unknown group" \
- "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \
- false
+ xfail_on_veth $h1 \
+ check_rcv $rcv_if_name \
+ "Multicast IPv4 to unknown group" \
+ "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \
+ false
check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \
"$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \
@@ -187,9 +192,10 @@ run_test()
"$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \
true
- check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \
- "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \
- false
+ xfail_on_veth $h1 \
+ check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \
+ "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \
+ false
check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \
"$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index 3f0f5dc955..2ba44247c6 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -1,6 +1,41 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 192.0.2.2/24 | |
+# | 2001:db8:1::2/64 | |
+# +-------------------|-----+
+# |
+# +-------------------|----------------------+
+# | | R1 |
+# | $rp11 + |
+# | 192.0.2.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# | + $rp12 + $rp13 |
+# | | 169.254.2.12/24 | 169.254.3.13/24 |
+# | | fe80:2::12/64 | fe80:3::13/64 |
+# +--|--------------------|------------------+
+# | |
+# +--|--------------------|------------------+
+# | + $rp22 + $rp23 |
+# | 169.254.2.22/24 169.254.3.23/24 |
+# | fe80:2::22/64 fe80:3::23/64 |
+# | |
+# | $rp21 + |
+# | 198.51.100.1/24 | |
+# | 2001:db8:2::1/64 | R2 |
+# +-------------------|----------------------+
+# |
+# +-------------------|-----+
+# | | |
+# | $h2 + |
+# | 198.51.100.2/24 |
+# | 2001:db8:2::2/64 H2 |
+# +-------------------------+
+
ALL_TESTS="
ping_ipv4
ping_ipv6
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
index b2d2c6cecc..2903294d8b 100644
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
@@ -56,21 +56,12 @@ nh_stats_test_dispatch_swhw()
local group_id=$1; shift
local mz="$@"
- local used
-
nh_stats_do_test "$what" "$nh1_id" "$nh2_id" "$group_id" \
nh_stats_get "${mz[@]}"
- used=$(ip -s -j -d nexthop show id $group_id |
- jq '.[].hw_stats.used')
- kind=$(ip -j -d link show dev $rp11 |
- jq -r '.[].linkinfo.info_kind')
- if [[ $used == true ]]; then
+ xfail_on_veth $rp11 \
nh_stats_do_test "HW $what" "$nh1_id" "$nh2_id" "$group_id" \
nh_stats_get_hw "${mz[@]}"
- elif [[ $kind == veth ]]; then
- log_test_xfail "HW stats not offloaded on veth topology"
- fi
}
nh_stats_test_dispatch()
@@ -83,7 +74,6 @@ nh_stats_test_dispatch()
local mz="$@"
local enabled
- local kind
if ! ip nexthop help 2>&1 | grep -q hw_stats; then
log_test_skip "NH stats test: ip doesn't support HW stats"
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
index 4b483d24ad..cd9e346436 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -1,6 +1,41 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 192.0.2.2/24 | |
+# | 2001:db8:1::2/64 | |
+# +-------------------|-----+
+# |
+# +-------------------|----------------------+
+# | | R1 |
+# | $rp11 + |
+# | 192.0.2.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# | + $rp12 + $rp13 |
+# | | 169.254.2.12/24 | 169.254.3.13/24 |
+# | | fe80:2::12/64 | fe80:3::13/64 |
+# +--|--------------------|------------------+
+# | |
+# +--|--------------------|------------------+
+# | + $rp22 + $rp23 |
+# | 169.254.2.22/24 169.254.3.23/24 |
+# | fe80:2::22/64 fe80:3::23/64 |
+# | |
+# | $rp21 + |
+# | 198.51.100.1/24 | |
+# | 2001:db8:2::1/64 | R2 |
+# +-------------------|----------------------+
+# |
+# +-------------------|-----+
+# | | |
+# | $h2 + |
+# | 198.51.100.2/24 |
+# | 2001:db8:2::2/64 H2 |
+# +-------------------------+
+
ALL_TESTS="
ping_ipv4
ping_ipv6
diff --git a/tools/testing/selftests/net/forwarding/router_nh.sh b/tools/testing/selftests/net/forwarding/router_nh.sh
index f3a53738bd..92904b01ea 100755
--- a/tools/testing/selftests/net/forwarding/router_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_nh.sh
@@ -1,6 +1,20 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# +-------------------------+ +-------------------------+
+# | H1 | | H2 |
+# | $h1 + | | $h2 + |
+# | 192.0.2.2/24 | | | 198.51.100.2/24 | |
+# | 2001:db8:1::2/64 | | | 2001:db8:2::2/64 | |
+# +-------------------|-----+ +-------------------|-----+
+# | |
+# +-------------------|----------------------------|-----+
+# | R1 | | |
+# | $rp1 + $rp2 + |
+# | 192.0.2.1/24 198.51.100.1/24 |
+# | 2001:db8:1::1/64 2001:db8:2::1/64 |
+# +------------------------------------------------------+
+
ALL_TESTS="
ping_ipv4
ping_ipv6
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
index cdf689e994..f9d26a7911 100644
--- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
@@ -199,25 +199,28 @@ ets_set_dwrr_two_bands()
ets_test_strict()
{
ets_set_strict
- ets_dwrr_test_01
- ets_dwrr_test_12
+ xfail_on_slow ets_dwrr_test_01
+ xfail_on_slow ets_dwrr_test_12
}
ets_test_mixed()
{
ets_set_mixed
- ets_dwrr_test_01
- ets_dwrr_test_12
+ xfail_on_slow ets_dwrr_test_01
+ xfail_on_slow ets_dwrr_test_12
}
ets_test_dwrr()
{
ets_set_dwrr_uniform
- ets_dwrr_test_012
+ xfail_on_slow ets_dwrr_test_012
+
ets_set_dwrr_varying
- ets_dwrr_test_012
+ xfail_on_slow ets_dwrr_test_012
+
ets_change_quantum
- ets_dwrr_test_012
+ xfail_on_slow ets_dwrr_test_012
+
ets_set_dwrr_two_bands
- ets_dwrr_test_01
+ xfail_on_slow ets_dwrr_test_01
}
diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh
index 81f31179ac..17f2864456 100755
--- a/tools/testing/selftests/net/forwarding/sch_red.sh
+++ b/tools/testing/selftests/net/forwarding/sch_red.sh
@@ -451,35 +451,35 @@ uninstall_qdisc()
ecn_test()
{
install_qdisc ecn
- do_ecn_test $BACKLOG
+ xfail_on_slow do_ecn_test $BACKLOG
uninstall_qdisc
}
ecn_nodrop_test()
{
install_qdisc ecn nodrop
- do_ecn_nodrop_test $BACKLOG
+ xfail_on_slow do_ecn_nodrop_test $BACKLOG
uninstall_qdisc
}
red_test()
{
install_qdisc
- do_red_test $BACKLOG
+ xfail_on_slow do_red_test $BACKLOG
uninstall_qdisc
}
red_qevent_test()
{
install_qdisc qevent early_drop block 10
- do_red_qevent_test $BACKLOG
+ xfail_on_slow do_red_qevent_test $BACKLOG
uninstall_qdisc
}
ecn_qevent_test()
{
install_qdisc ecn qevent mark block 10
- do_ecn_qevent_test $BACKLOG
+ xfail_on_slow do_ecn_qevent_test $BACKLOG
uninstall_qdisc
}
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
index d1f26cb7cd..9cd884d4a5 100644
--- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
@@ -227,7 +227,7 @@ do_tbf_test()
local nr=$(rate $t2 $t3 10)
local nr_pct=$((100 * (nr - er) / er))
((-5 <= nr_pct && nr_pct <= 5))
- check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%."
+ xfail_on_slow check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%."
log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit"
}
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
index bce8bb8d2b..2e3326edfa 100644
--- a/tools/testing/selftests/net/forwarding/tc_common.sh
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -4,7 +4,7 @@
CHECK_TC="yes"
# Can be overridden by the configuration file. See lib.sh
-TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms
+: "${TC_HIT_TIMEOUT:=1000}" # ms
tc_check_packets()
{
diff --git a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
index 5a5dd90348..79775b10b9 100755
--- a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
+++ b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
@@ -1,7 +1,5 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
ALL_TESTS="tunnel_key_nofrag_test"
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
index 6038b96ece..b2184847e3 100644
--- a/tools/testing/selftests/net/gro.c
+++ b/tools/testing/selftests/net/gro.c
@@ -93,6 +93,7 @@ static bool tx_socket = true;
static int tcp_offset = -1;
static int total_hdr_len = -1;
static int ethhdr_proto = -1;
+static const int num_flush_id_cases = 6;
static void vlog(const char *fmt, ...)
{
@@ -620,6 +621,113 @@ static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext
iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
}
+static void fix_ip4_checksum(struct iphdr *iph)
+{
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+}
+
+static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
+{
+ static char buf1[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf2[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf3[MAX_HDR_LEN + PAYLOAD_LEN];
+ bool send_three = false;
+ struct iphdr *iph1;
+ struct iphdr *iph2;
+ struct iphdr *iph3;
+
+ iph1 = (struct iphdr *)(buf1 + ETH_HLEN);
+ iph2 = (struct iphdr *)(buf2 + ETH_HLEN);
+ iph3 = (struct iphdr *)(buf3 + ETH_HLEN);
+
+ create_packet(buf1, 0, 0, PAYLOAD_LEN, 0);
+ create_packet(buf2, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+
+ switch (tcase) {
+ case 0: /* DF=1, Incrementing - should coalesce */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(9);
+ break;
+
+ case 1: /* DF=1, Fixed - should coalesce */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(8);
+ break;
+
+ case 2: /* DF=0, Incrementing - should coalesce */
+ iph1->frag_off &= ~htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off &= ~htons(IP_DF);
+ iph2->id = htons(9);
+ break;
+
+ case 3: /* DF=0, Fixed - should not coalesce */
+ iph1->frag_off &= ~htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off &= ~htons(IP_DF);
+ iph2->id = htons(8);
+ break;
+
+ case 4: /* DF=1, two packets incrementing, and one fixed - should
+ * coalesce only the first two packets
+ */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(9);
+
+ iph3->frag_off |= htons(IP_DF);
+ iph3->id = htons(9);
+ send_three = true;
+ break;
+
+ case 5: /* DF=1, two packets fixed, and one incrementing - should
+ * coalesce only the first two packets
+ */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(8);
+
+ iph3->frag_off |= htons(IP_DF);
+ iph3->id = htons(9);
+ send_three = true;
+ break;
+ }
+
+ fix_ip4_checksum(iph1);
+ fix_ip4_checksum(iph2);
+ write_packet(fd, buf1, total_hdr_len + PAYLOAD_LEN, daddr);
+ write_packet(fd, buf2, total_hdr_len + PAYLOAD_LEN, daddr);
+
+ if (send_three) {
+ fix_ip4_checksum(iph3);
+ write_packet(fd, buf3, total_hdr_len + PAYLOAD_LEN, daddr);
+ }
+}
+
+static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt)
+{
+ for (int i = 0; i < num_flush_id_cases; i++) {
+ sleep(1);
+ send_flush_id_case(fd, daddr, i);
+ sleep(1);
+ write_packet(fd, fin_pkt, total_hdr_len, daddr);
+ }
+}
+
static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
{
static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
@@ -938,6 +1046,8 @@ static void gro_sender(void)
send_fragment4(txfd, &daddr);
sleep(1);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ test_flush_id(txfd, &daddr, fin_pkt);
} else if (proto == PF_INET6) {
sleep(1);
send_fragment6(txfd, &daddr);
@@ -1064,6 +1174,34 @@ static void gro_receiver(void)
printf("fragmented ip4 doesn't coalesce: ");
check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* is_atomic checks */
+ printf("DF=1, Incrementing - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("DF=1, Fixed - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("DF=0, Incrementing - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("DF=0, Fixed - should not coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
} else if (proto == PF_INET6) {
/* GRO doesn't check for ipv6 hop limit when flushing.
* Hence no corresponding test to the ipv4 case.
diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile
index 92c1d9d080..884cd2cc06 100644
--- a/tools/testing/selftests/net/hsr/Makefile
+++ b/tools/testing/selftests/net/hsr/Makefile
@@ -2,6 +2,7 @@
top_srcdir = ../../../../..
-TEST_PROGS := hsr_ping.sh
+TEST_PROGS := hsr_ping.sh hsr_redbox.sh
+TEST_FILES += hsr_common.sh
include ../../lib.mk
diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config
index 22061204fb..241542441c 100644
--- a/tools/testing/selftests/net/hsr/config
+++ b/tools/testing/selftests/net/hsr/config
@@ -2,3 +2,4 @@ CONFIG_IPV6=y
CONFIG_NET_SCH_NETEM=m
CONFIG_HSR=y
CONFIG_VETH=y
+CONFIG_BRIDGE=y
diff --git a/tools/testing/selftests/net/hsr/hsr_common.sh b/tools/testing/selftests/net/hsr/hsr_common.sh
new file mode 100644
index 0000000000..8e97b1f2e7
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/hsr_common.sh
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: GPL-2.0
+# Common code for HSR testing scripts
+
+source ../lib.sh
+ret=0
+ksft_skip=4
+
+# $1: IP address
+is_v6()
+{
+ [ -z "${1##*:*}" ]
+}
+
+do_ping()
+{
+ local netns="$1"
+ local connect_addr="$2"
+ local ping_args="-q -c 2"
+
+ if is_v6 "${connect_addr}"; then
+ $ipv6 || return 0
+ ping_args="${ping_args} -6"
+ fi
+
+ ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null
+ if [ $? -ne 0 ] ; then
+ echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+do_ping_long()
+{
+ local netns="$1"
+ local connect_addr="$2"
+ local ping_args="-q -c 10"
+
+ if is_v6 "${connect_addr}"; then
+ $ipv6 || return 0
+ ping_args="${ping_args} -6"
+ fi
+
+ OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)"
+ if [ $? -ne 0 ] ; then
+ echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2
+ ret=1
+ return 1
+ fi
+
+ VAL="$(echo $OUT | cut -d' ' -f1-8)"
+ SED_VAL="$(echo ${VAL} | sed -r -e 's/([0-9]{2}).*([0-9]{2}).*[[:space:]]([0-9]+%).*/\1 transmitted \2 received \3 loss/')"
+ if [ "${SED_VAL}" != "10 transmitted 10 received 0% loss" ]
+ then
+ echo "$netns -> $connect_addr ping TEST [ FAIL ]"
+ echo "Expect to send and receive 10 packets and no duplicates."
+ echo "Full message: ${OUT}."
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+stop_if_error()
+{
+ local msg="$1"
+
+ if [ ${ret} -ne 0 ]; then
+ echo "FAIL: ${msg}" 1>&2
+ exit ${ret}
+ fi
+}
+
+check_prerequisites()
+{
+ ip -Version > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+ fi
+}
diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
index 1c6457e546..3684b813b0 100755
--- a/tools/testing/selftests/net/hsr/hsr_ping.sh
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -1,10 +1,10 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ret=0
-ksft_skip=4
ipv6=true
+source ./hsr_common.sh
+
optstring="h4"
usage() {
echo "Usage: $0 [OPTION]"
@@ -27,88 +27,6 @@ while getopts "$optstring" option;do
esac
done
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
-ns2="ns2-$rndh"
-ns3="ns3-$rndh"
-
-cleanup()
-{
- local netns
- for netns in "$ns1" "$ns2" "$ns3" ;do
- ip netns del $netns
- done
-}
-
-# $1: IP address
-is_v6()
-{
- [ -z "${1##*:*}" ]
-}
-
-do_ping()
-{
- local netns="$1"
- local connect_addr="$2"
- local ping_args="-q -c 2"
-
- if is_v6 "${connect_addr}"; then
- $ipv6 || return 0
- ping_args="${ping_args} -6"
- fi
-
- ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null
- if [ $? -ne 0 ] ; then
- echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2
- ret=1
- return 1
- fi
-
- return 0
-}
-
-do_ping_long()
-{
- local netns="$1"
- local connect_addr="$2"
- local ping_args="-q -c 10"
-
- if is_v6 "${connect_addr}"; then
- $ipv6 || return 0
- ping_args="${ping_args} -6"
- fi
-
- OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)"
- if [ $? -ne 0 ] ; then
- echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2
- ret=1
- return 1
- fi
-
- VAL="$(echo $OUT | cut -d' ' -f1-8)"
- if [ "$VAL" != "10 packets transmitted, 10 received, 0% packet loss," ]
- then
- echo "$netns -> $connect_addr ping TEST [ FAIL ]"
- echo "Expect to send and receive 10 packets and no duplicates."
- echo "Full message: ${OUT}."
- ret=1
- return 1
- fi
-
- return 0
-}
-
-stop_if_error()
-{
- local msg="$1"
-
- if [ ${ret} -ne 0 ]; then
- echo "FAIL: ${msg}" 1>&2
- exit ${ret}
- fi
-}
-
do_complete_ping_test()
{
echo "INFO: Initial validation ping."
@@ -248,27 +166,15 @@ setup_hsr_interfaces()
ip -net "$ns3" link set hsr3 up
}
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
+check_prerequisites
+setup_ns ns1 ns2 ns3
-trap cleanup EXIT
-
-for i in "$ns1" "$ns2" "$ns3" ;do
- ip netns add $i || exit $ksft_skip
- ip -net $i link set lo up
-done
+trap cleanup_all_ns EXIT
setup_hsr_interfaces 0
do_complete_ping_test
-cleanup
-for i in "$ns1" "$ns2" "$ns3" ;do
- ip netns add $i || exit $ksft_skip
- ip -net $i link set lo up
-done
+setup_ns ns1 ns2 ns3
setup_hsr_interfaces 1
do_complete_ping_test
diff --git a/tools/testing/selftests/net/hsr/hsr_redbox.sh b/tools/testing/selftests/net/hsr/hsr_redbox.sh
new file mode 100755
index 0000000000..1f36785347
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/hsr_redbox.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ipv6=false
+
+source ./hsr_common.sh
+
+do_complete_ping_test()
+{
+ echo "INFO: Initial validation ping (HSR-SAN/RedBox)."
+ # Each node has to be able to reach each one.
+ do_ping "${ns1}" 100.64.0.2
+ do_ping "${ns2}" 100.64.0.1
+ # Ping between SANs (test bridge)
+ do_ping "${ns4}" 100.64.0.51
+ do_ping "${ns5}" 100.64.0.41
+ # Ping from SANs to hsr1 (via hsr2) (and opposite)
+ do_ping "${ns3}" 100.64.0.1
+ do_ping "${ns1}" 100.64.0.3
+ do_ping "${ns1}" 100.64.0.41
+ do_ping "${ns4}" 100.64.0.1
+ do_ping "${ns1}" 100.64.0.51
+ do_ping "${ns5}" 100.64.0.1
+ stop_if_error "Initial validation failed."
+
+ # Wait for MGNT HSR frames being received and nodes being
+ # merged.
+ sleep 5
+
+ echo "INFO: Longer ping test (HSR-SAN/RedBox)."
+ # Ping from SAN to hsr1 (via hsr2)
+ do_ping_long "${ns3}" 100.64.0.1
+ # Ping from hsr1 (via hsr2) to SANs (and opposite)
+ do_ping_long "${ns1}" 100.64.0.3
+ do_ping_long "${ns1}" 100.64.0.41
+ do_ping_long "${ns4}" 100.64.0.1
+ do_ping_long "${ns1}" 100.64.0.51
+ do_ping_long "${ns5}" 100.64.0.1
+ stop_if_error "Longer ping test failed."
+
+ echo "INFO: All good."
+}
+
+setup_hsr_interfaces()
+{
+ local HSRv="$1"
+
+ echo "INFO: preparing interfaces for HSRv${HSRv} (HSR-SAN/RedBox)."
+#
+# IPv4 addresses (100.64.X.Y/24), and [X.Y] is presented on below diagram:
+#
+#
+# |NS1 | |NS4 |
+# | [0.1] | | |
+# | /-- hsr1 --\ | | [0.41] |
+# | ns1eth1 ns1eth2 | | ns4eth1 (SAN) |
+# |------------------------| |-------------------|
+# | | |
+# | | |
+# | | |
+# |------------------------| |-------------------------------|
+# | ns2eth1 ns2eth2 | | ns3eth2 |
+# | \-- hsr2 --/ | | / |
+# | [0.2] \ | | / | |------------|
+# | ns2eth3 |---| ns3eth1 -- ns3br1 -- ns3eth3--|--| ns5eth1 |
+# | (interlink)| | [0.3] [0.11] | | [0.51] |
+# |NS2 (RedBOX) | |NS3 (BR) | | NS5 (SAN) |
+#
+#
+ # Check if iproute2 supports adding interlink port to hsrX device
+ ip link help hsr | grep -q INTERLINK
+ [ $? -ne 0 ] && { echo "iproute2: HSR interlink interface not supported!"; exit 0; }
+
+ # Create interfaces for name spaces
+ ip link add ns1eth1 netns "${ns1}" type veth peer name ns2eth1 netns "${ns2}"
+ ip link add ns1eth2 netns "${ns1}" type veth peer name ns2eth2 netns "${ns2}"
+ ip link add ns2eth3 netns "${ns2}" type veth peer name ns3eth1 netns "${ns3}"
+ ip link add ns3eth2 netns "${ns3}" type veth peer name ns4eth1 netns "${ns4}"
+ ip link add ns3eth3 netns "${ns3}" type veth peer name ns5eth1 netns "${ns5}"
+
+ sleep 1
+
+ ip -n "${ns1}" link set ns1eth1 up
+ ip -n "${ns1}" link set ns1eth2 up
+
+ ip -n "${ns2}" link set ns2eth1 up
+ ip -n "${ns2}" link set ns2eth2 up
+ ip -n "${ns2}" link set ns2eth3 up
+
+ ip -n "${ns3}" link add name ns3br1 type bridge
+ ip -n "${ns3}" link set ns3br1 up
+ ip -n "${ns3}" link set ns3eth1 master ns3br1 up
+ ip -n "${ns3}" link set ns3eth2 master ns3br1 up
+ ip -n "${ns3}" link set ns3eth3 master ns3br1 up
+
+ ip -n "${ns4}" link set ns4eth1 up
+ ip -n "${ns5}" link set ns5eth1 up
+
+ ip -net "${ns1}" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version ${HSRv} proto 0
+ ip -net "${ns2}" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 interlink ns2eth3 supervision 45 version ${HSRv} proto 0
+
+ ip -n "${ns1}" addr add 100.64.0.1/24 dev hsr1
+ ip -n "${ns2}" addr add 100.64.0.2/24 dev hsr2
+ ip -n "${ns3}" addr add 100.64.0.11/24 dev ns3br1
+ ip -n "${ns3}" addr add 100.64.0.3/24 dev ns3eth1
+ ip -n "${ns4}" addr add 100.64.0.41/24 dev ns4eth1
+ ip -n "${ns5}" addr add 100.64.0.51/24 dev ns5eth1
+
+ ip -n "${ns1}" link set hsr1 up
+ ip -n "${ns2}" link set hsr2 up
+}
+
+check_prerequisites
+setup_ns ns1 ns2 ns3 ns4 ns5
+
+trap cleanup_all_ns EXIT
+
+setup_hsr_interfaces 1
+do_complete_ping_test
+
+exit $ret
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 16372ca167..9155c914c0 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -4,11 +4,16 @@
##############################################################################
# Defines
-WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
+: "${WAIT_TIMEOUT:=20}"
+
BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
-# Kselftest framework requirement - SKIP code is 4.
+# Kselftest framework constants.
+ksft_pass=0
+ksft_fail=1
+ksft_xfail=2
ksft_skip=4
+
# namespace list created by setup_ns
NS_LIST=()
diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore
new file mode 100644
index 0000000000..1ebc6187f4
--- /dev/null
+++ b/tools/testing/selftests/net/lib/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+csum
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
new file mode 100644
index 0000000000..82c3264b11
--- /dev/null
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES)
+# Additional include paths needed by kselftest.h
+CFLAGS += -I../../
+
+TEST_FILES := ../../../../../Documentation/netlink/specs
+TEST_FILES += ../../../../net/ynl
+
+TEST_GEN_FILES += csum
+
+TEST_INCLUDES := $(wildcard py/*.py)
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/lib/csum.c
index 90eb06fefa..b9f3fc3c34 100644
--- a/tools/testing/selftests/net/csum.c
+++ b/tools/testing/selftests/net/lib/csum.c
@@ -682,7 +682,7 @@ static int recv_verify_packet_ipv6(void *nh, int len)
}
/* return whether auxdata includes TP_STATUS_CSUM_VALID */
-static bool recv_verify_packet_csum(struct msghdr *msg)
+static uint32_t recv_get_packet_csum_status(struct msghdr *msg)
{
struct tpacket_auxdata *aux = NULL;
struct cmsghdr *cm;
@@ -706,7 +706,7 @@ static bool recv_verify_packet_csum(struct msghdr *msg)
if (!aux)
error(1, 0, "cmsg: no auxdata");
- return aux->tp_status & TP_STATUS_CSUM_VALID;
+ return aux->tp_status;
}
static int recv_packet(int fd)
@@ -716,6 +716,7 @@ static int recv_packet(int fd)
char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
struct pkt *buf = (void *)_buf;
struct msghdr msg = {0};
+ uint32_t tp_status;
struct iovec iov;
int len, ret;
@@ -737,6 +738,17 @@ static int recv_packet(int fd)
if (len == -1)
error(1, errno, "recv p");
+ tp_status = recv_get_packet_csum_status(&msg);
+
+ /* GRO might coalesce randomized packets. Such GSO packets are
+ * then reinitialized for csum offload (CHECKSUM_PARTIAL), with
+ * a pseudo csum. Do not try to validate these checksums.
+ */
+ if (tp_status & TP_STATUS_CSUMNOTREADY) {
+ fprintf(stderr, "cmsg: GSO packet has partial csum: skip\n");
+ continue;
+ }
+
if (cfg_family == PF_INET6)
ret = recv_verify_packet_ipv6(buf, len);
else
@@ -753,7 +765,7 @@ static int recv_packet(int fd)
* Do not fail if kernel does not validate a good csum:
* Absence of validation does not imply invalid.
*/
- if (recv_verify_packet_csum(&msg) && cfg_bad_csum) {
+ if (tp_status & TP_STATUS_CSUM_VALID && cfg_bad_csum) {
fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n");
bad_validations++;
}
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
new file mode 100644
index 0000000000..b6d498d125
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from .consts import KSRC
+from .ksft import *
+from .netns import NetNS
+from .nsim import *
+from .utils import *
+from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily
diff --git a/tools/testing/selftests/net/lib/py/consts.py b/tools/testing/selftests/net/lib/py/consts.py
new file mode 100644
index 0000000000..f518ce79d8
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/consts.py
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import sys
+from pathlib import Path
+
+KSFT_DIR = (Path(__file__).parent / "../../..").resolve()
+KSRC = (Path(__file__).parent / "../../../../../..").resolve()
+
+KSFT_MAIN_NAME = Path(sys.argv[0]).with_suffix("").name
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
new file mode 100644
index 0000000000..4769b4eb1e
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -0,0 +1,159 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import builtins
+import inspect
+import sys
+import time
+import traceback
+from .consts import KSFT_MAIN_NAME
+
+KSFT_RESULT = None
+KSFT_RESULT_ALL = True
+
+
+class KsftFailEx(Exception):
+ pass
+
+
+class KsftSkipEx(Exception):
+ pass
+
+
+class KsftXfailEx(Exception):
+ pass
+
+
+def ksft_pr(*objs, **kwargs):
+ print("#", *objs, **kwargs)
+
+
+def _fail(*args):
+ global KSFT_RESULT
+ KSFT_RESULT = False
+
+ frame = inspect.stack()[2]
+ ksft_pr("At " + frame.filename + " line " + str(frame.lineno) + ":")
+ ksft_pr(*args)
+
+
+def ksft_eq(a, b, comment=""):
+ global KSFT_RESULT
+ if a != b:
+ _fail("Check failed", a, "!=", b, comment)
+
+
+def ksft_true(a, comment=""):
+ if not a:
+ _fail("Check failed", a, "does not eval to True", comment)
+
+
+def ksft_in(a, b, comment=""):
+ if a not in b:
+ _fail("Check failed", a, "not in", b, comment)
+
+
+def ksft_ge(a, b, comment=""):
+ if a < b:
+ _fail("Check failed", a, "<", b, comment)
+
+
+class ksft_raises:
+ def __init__(self, expected_type):
+ self.exception = None
+ self.expected_type = expected_type
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ if exc_type is None:
+ _fail(f"Expected exception {str(self.expected_type.__name__)}, none raised")
+ elif self.expected_type != exc_type:
+ _fail(f"Expected exception {str(self.expected_type.__name__)}, raised {str(exc_type.__name__)}")
+ self.exception = exc_val
+ # Suppress the exception if its the expected one
+ return self.expected_type == exc_type
+
+
+def ksft_busy_wait(cond, sleep=0.005, deadline=1, comment=""):
+ end = time.monotonic() + deadline
+ while True:
+ if cond():
+ return
+ if time.monotonic() > end:
+ _fail("Waiting for condition timed out", comment)
+ return
+ time.sleep(sleep)
+
+
+def ktap_result(ok, cnt=1, case="", comment=""):
+ global KSFT_RESULT_ALL
+ KSFT_RESULT_ALL = KSFT_RESULT_ALL and ok
+
+ res = ""
+ if not ok:
+ res += "not "
+ res += "ok "
+ res += str(cnt) + " "
+ res += KSFT_MAIN_NAME
+ if case:
+ res += "." + str(case.__name__)
+ if comment:
+ res += " # " + comment
+ print(res)
+
+
+def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
+ cases = cases or []
+
+ if globs and case_pfx:
+ for key, value in globs.items():
+ if not callable(value):
+ continue
+ for prefix in case_pfx:
+ if key.startswith(prefix):
+ cases.append(value)
+ break
+
+ totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
+
+ print("KTAP version 1")
+ print("1.." + str(len(cases)))
+
+ global KSFT_RESULT
+ cnt = 0
+ for case in cases:
+ KSFT_RESULT = True
+ cnt += 1
+ try:
+ case(*args)
+ except KsftSkipEx as e:
+ ktap_result(True, cnt, case, comment="SKIP " + str(e))
+ totals['skip'] += 1
+ continue
+ except KsftXfailEx as e:
+ ktap_result(True, cnt, case, comment="XFAIL " + str(e))
+ totals['xfail'] += 1
+ continue
+ except Exception as e:
+ tb = traceback.format_exc()
+ for line in tb.strip().split('\n'):
+ ksft_pr("Exception|", line)
+ ktap_result(False, cnt, case)
+ totals['fail'] += 1
+ continue
+
+ ktap_result(KSFT_RESULT, cnt, case)
+ if KSFT_RESULT:
+ totals['pass'] += 1
+ else:
+ totals['fail'] += 1
+
+ print(
+ f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0"
+ )
+
+
+def ksft_exit():
+ global KSFT_RESULT_ALL
+ sys.exit(0 if KSFT_RESULT_ALL else 1)
diff --git a/tools/testing/selftests/net/lib/py/netns.py b/tools/testing/selftests/net/lib/py/netns.py
new file mode 100644
index 0000000000..ecff85f907
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/netns.py
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from .utils import ip
+import random
+import string
+
+
+class NetNS:
+ def __init__(self, name=None):
+ if name:
+ self.name = name
+ else:
+ self.name = ''.join(random.choice(string.ascii_lowercase) for _ in range(8))
+ ip('netns add ' + self.name)
+
+ def __del__(self):
+ if self.name:
+ ip('netns del ' + self.name)
+ self.name = None
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ self.__del__()
+
+ def __str__(self):
+ return self.name
+
+ def __repr__(self):
+ return f"NetNS({self.name})"
diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py
new file mode 100644
index 0000000000..f571a8b313
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/nsim.py
@@ -0,0 +1,134 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import json
+import os
+import random
+import re
+import time
+from .utils import cmd, ip
+
+
+class NetdevSim:
+ """
+ Class for netdevsim netdevice and its attributes.
+ """
+
+ def __init__(self, nsimdev, port_index, ifname, ns=None):
+ # In case udev renamed the netdev to according to new schema,
+ # check if the name matches the port_index.
+ nsimnamere = re.compile(r"eni\d+np(\d+)")
+ match = nsimnamere.match(ifname)
+ if match and int(match.groups()[0]) != port_index + 1:
+ raise Exception("netdevice name mismatches the expected one")
+
+ self.ifname = ifname
+ self.nsimdev = nsimdev
+ self.port_index = port_index
+ self.ns = ns
+ self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index)
+ ret = ip("-j link show dev %s" % ifname, ns=ns)
+ self.dev = json.loads(ret.stdout)[0]
+ self.ifindex = self.dev["ifindex"]
+
+ def dfs_write(self, path, val):
+ self.nsimdev.dfs_write(f'ports/{self.port_index}/' + path, val)
+
+
+class NetdevSimDev:
+ """
+ Class for netdevsim bus device and its attributes.
+ """
+ @staticmethod
+ def ctrl_write(path, val):
+ fullpath = os.path.join("/sys/bus/netdevsim/", path)
+ with open(fullpath, "w") as f:
+ f.write(val)
+
+ def dfs_write(self, path, val):
+ fullpath = os.path.join(f"/sys/kernel/debug/netdevsim/netdevsim{self.addr}/", path)
+ with open(fullpath, "w") as f:
+ f.write(val)
+
+ def __init__(self, port_count=1, queue_count=1, ns=None):
+ # nsim will spawn in init_net, we'll set to actual ns once we switch it there
+ self.ns = None
+
+ if not os.path.exists("/sys/bus/netdevsim"):
+ cmd("modprobe netdevsim")
+
+ addr = random.randrange(1 << 15)
+ while True:
+ try:
+ self.ctrl_write("new_device", "%u %u %u" % (addr, port_count, queue_count))
+ except OSError as e:
+ if e.errno == errno.ENOSPC:
+ addr = random.randrange(1 << 15)
+ continue
+ raise e
+ break
+ self.addr = addr
+
+ # As probe of netdevsim device might happen from a workqueue,
+ # so wait here until all netdevs appear.
+ self.wait_for_netdevs(port_count)
+
+ if ns:
+ cmd(f"devlink dev reload netdevsim/netdevsim{addr} netns {ns.name}")
+ self.ns = ns
+
+ cmd("udevadm settle", ns=self.ns)
+ ifnames = self.get_ifnames()
+
+ self.dfs_dir = "/sys/kernel/debug/netdevsim/netdevsim%u/" % addr
+
+ self.nsims = []
+ for port_index in range(port_count):
+ self.nsims.append(self._make_port(port_index, ifnames[port_index]))
+
+ self.removed = False
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ """
+ __exit__ gets called at the end of a "with" block.
+ """
+ self.remove()
+
+ def _make_port(self, port_index, ifname):
+ return NetdevSim(self, port_index, ifname, self.ns)
+
+ def get_ifnames(self):
+ ifnames = []
+ listdir = cmd(f"ls /sys/bus/netdevsim/devices/netdevsim{self.addr}/net/",
+ ns=self.ns).stdout.split()
+ for ifname in listdir:
+ ifnames.append(ifname)
+ ifnames.sort()
+ return ifnames
+
+ def wait_for_netdevs(self, port_count):
+ timeout = 5
+ timeout_start = time.time()
+
+ while True:
+ try:
+ ifnames = self.get_ifnames()
+ except FileNotFoundError as e:
+ ifnames = []
+ if len(ifnames) == port_count:
+ break
+ if time.time() < timeout_start + timeout:
+ continue
+ raise Exception("netdevices did not appear within timeout")
+
+ def remove(self):
+ if not self.removed:
+ self.ctrl_write("del_device", "%u" % (self.addr, ))
+ self.removed = True
+
+ def remove_nsim(self, nsim):
+ self.nsims.remove(nsim)
+ self.ctrl_write("devices/netdevsim%u/del_port" % (self.addr, ),
+ "%u" % (nsim.port_index, ))
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
new file mode 100644
index 0000000000..0540ea2492
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import json as _json
+import random
+import re
+import subprocess
+import time
+
+
+class cmd:
+ def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None, timeout=5):
+ if ns:
+ comm = f'ip netns exec {ns} ' + comm
+
+ self.stdout = None
+ self.stderr = None
+ self.ret = None
+
+ self.comm = comm
+ if host:
+ self.proc = host.cmd(comm)
+ else:
+ self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ if not background:
+ self.process(terminate=False, fail=fail, timeout=timeout)
+
+ def process(self, terminate=True, fail=None, timeout=5):
+ if fail is None:
+ fail = not terminate
+
+ if terminate:
+ self.proc.terminate()
+ stdout, stderr = self.proc.communicate(timeout)
+ self.stdout = stdout.decode("utf-8")
+ self.stderr = stderr.decode("utf-8")
+ self.proc.stdout.close()
+ self.proc.stderr.close()
+ self.ret = self.proc.returncode
+
+ if self.proc.returncode != 0 and fail:
+ if len(stderr) > 0 and stderr[-1] == "\n":
+ stderr = stderr[:-1]
+ raise Exception("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" %
+ (self.proc.args, stdout, stderr))
+
+
+class bkg(cmd):
+ def __init__(self, comm, shell=True, fail=None, ns=None, host=None,
+ exit_wait=False):
+ super().__init__(comm, background=True,
+ shell=shell, fail=fail, ns=ns, host=host)
+ self.terminate = not exit_wait
+ self.check_fail = fail
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ return self.process(terminate=self.terminate, fail=self.check_fail)
+
+
+def tool(name, args, json=None, ns=None, host=None):
+ cmd_str = name + ' '
+ if json:
+ cmd_str += '--json '
+ cmd_str += args
+ cmd_obj = cmd(cmd_str, ns=ns, host=host)
+ if json:
+ return _json.loads(cmd_obj.stdout)
+ return cmd_obj
+
+
+def ip(args, json=None, ns=None, host=None):
+ if ns:
+ args = f'-netns {ns} ' + args
+ return tool('ip', args, json=json, host=host)
+
+
+def rand_port():
+ """
+ Get unprivileged port, for now just random, one day we may decide to check if used.
+ """
+ return random.randint(10000, 65535)
+
+
+def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5):
+ end = time.monotonic() + deadline
+
+ pattern = f":{port:04X} .* "
+ if proto == "tcp": # for tcp protocol additionally check the socket state
+ pattern += "0A"
+ pattern = re.compile(pattern)
+
+ while True:
+ data = cmd(f'cat /proc/net/{proto}*', ns=ns, host=host, shell=True).stdout
+ for row in data.split("\n"):
+ if pattern.search(row):
+ return
+ if time.monotonic() > end:
+ raise Exception("Waiting for port listen timed out")
+ time.sleep(sleep)
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
new file mode 100644
index 0000000000..1ace58370c
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import sys
+from pathlib import Path
+from .consts import KSRC, KSFT_DIR
+from .ksft import ksft_pr, ktap_result
+
+# Resolve paths
+try:
+ if (KSFT_DIR / "kselftest-list.txt").exists():
+ # Running in "installed" selftests
+ tools_full_path = KSFT_DIR
+ SPEC_PATH = KSFT_DIR / "net/lib/specs"
+
+ sys.path.append(tools_full_path.as_posix())
+ from net.lib.ynl.lib import YnlFamily, NlError
+ else:
+ # Running in tree
+ tools_full_path = KSRC / "tools"
+ SPEC_PATH = KSRC / "Documentation/netlink/specs"
+
+ sys.path.append(tools_full_path.as_posix())
+ from net.ynl.lib import YnlFamily, NlError
+except ModuleNotFoundError as e:
+ ksft_pr("Failed importing `ynl` library from kernel sources")
+ ksft_pr(str(e))
+ ktap_result(True, comment="SKIP")
+ sys.exit(4)
+
+#
+# Wrapper classes, loading the right specs
+# Set schema='' to avoid jsonschema validation, it's slow
+#
+class EthtoolFamily(YnlFamily):
+ def __init__(self):
+ super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(),
+ schema='')
+
+
+class RtnlFamily(YnlFamily):
+ def __init__(self):
+ super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(),
+ schema='')
+
+
+class NetdevFamily(YnlFamily):
+ def __init__(self):
+ super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(),
+ schema='')
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index bc97ab33a0..776d43a692 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -200,6 +200,58 @@ chk_msk_cestab()
"${expected}" "${msg}" ""
}
+msk_info_get_value()
+{
+ local port="${1}"
+ local info="${2}"
+
+ ss -N "${ns}" -inHM dport "${port}" | \
+ mptcp_lib_get_info_value "${info}" "${info}"
+}
+
+chk_msk_info()
+{
+ local port="${1}"
+ local info="${2}"
+ local cnt="${3}"
+ local msg="....chk ${info}"
+ local delta_ms=250 # half what we waited before, just to be sure
+ local now
+
+ now=$(msk_info_get_value "${port}" "${info}")
+
+ mptcp_lib_print_title "${msg}"
+ if { [ -z "${cnt}" ] || [ -z "${now}" ]; } &&
+ ! mptcp_lib_expect_all_features; then
+ mptcp_lib_pr_skip "Feature probably not supported"
+ mptcp_lib_result_skip "${msg}"
+ elif [ "$((cnt + delta_ms))" -lt "${now}" ]; then
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "${msg}"
+ else
+ mptcp_lib_pr_fail "value of ${info} changed by $((now - cnt))ms," \
+ "expected at least ${delta_ms}ms"
+ mptcp_lib_result_fail "${msg}"
+ ret=${KSFT_FAIL}
+ fi
+}
+
+chk_last_time_info()
+{
+ local port="${1}"
+ local data_sent data_recv ack_recv
+
+ data_sent=$(msk_info_get_value "${port}" "last_data_sent")
+ data_recv=$(msk_info_get_value "${port}" "last_data_recv")
+ ack_recv=$(msk_info_get_value "${port}" "last_ack_recv")
+
+ sleep 0.5 # wait to check after if the timestamps difference
+
+ chk_msk_info "${port}" "last_data_sent" "${data_sent}"
+ chk_msk_info "${port}" "last_data_recv" "${data_recv}"
+ chk_msk_info "${port}" "last_ack_recv" "${ack_recv}"
+}
+
wait_connected()
{
local listener_ns="${1}"
@@ -233,6 +285,7 @@ echo "b" | \
127.0.0.1 >/dev/null &
wait_connected $ns 10000
chk_msk_nr 2 "after MPC handshake "
+chk_last_time_info 10000
chk_msk_remote_key_nr 2 "....chk remote_key"
chk_msk_fallback_nr 0 "....chk no fallback"
chk_msk_inuse 2
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 4131f3263a..b77fb7065b 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -147,7 +147,7 @@ cleanup()
mptcp_lib_check_mptcp
mptcp_lib_check_kallsyms
-mptcp_lib_check_tools ip
+mptcp_lib_check_tools ip tc
sin=$(mktemp)
sout=$(mktemp)
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 2290125f6d..108aeeb84e 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -31,7 +31,6 @@ timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
capture=false
checksum=false
-ip_mptcp=0
check_invert=0
validate_checksum=false
init=0
@@ -142,7 +141,7 @@ init() {
mptcp_lib_check_mptcp
mptcp_lib_check_kallsyms
- mptcp_lib_check_tools ip ss "${iptables}" "${ip6tables}"
+ mptcp_lib_check_tools ip tc ss "${iptables}" "${ip6tables}"
sin=$(mktemp)
sout=$(mktemp)
@@ -610,173 +609,65 @@ kill_events_pids()
pm_nl_set_limits()
{
- local ns=$1
- local addrs=$2
- local subflows=$3
-
- if [ $ip_mptcp -eq 1 ]; then
- ip -n $ns mptcp limits set add_addr_accepted $addrs subflows $subflows
- else
- ip netns exec $ns ./pm_nl_ctl limits $addrs $subflows
- fi
+ mptcp_lib_pm_nl_set_limits "${@}"
}
pm_nl_add_endpoint()
{
- local ns=$1
- local addr=$2
- local flags _flags
- local port _port
- local dev _dev
- local id _id
- local nr=2
-
- local p
- for p in "${@}"
- do
- if [ $p = "flags" ]; then
- eval _flags=\$"$nr"
- [ -n "$_flags" ]; flags="flags $_flags"
- fi
- if [ $p = "dev" ]; then
- eval _dev=\$"$nr"
- [ -n "$_dev" ]; dev="dev $_dev"
- fi
- if [ $p = "id" ]; then
- eval _id=\$"$nr"
- [ -n "$_id" ]; id="id $_id"
- fi
- if [ $p = "port" ]; then
- eval _port=\$"$nr"
- [ -n "$_port" ]; port="port $_port"
- fi
-
- nr=$((nr + 1))
- done
-
- if [ $ip_mptcp -eq 1 ]; then
- ip -n $ns mptcp endpoint add $addr ${_flags//","/" "} $dev $id $port
- else
- ip netns exec $ns ./pm_nl_ctl add $addr $flags $dev $id $port
- fi
+ mptcp_lib_pm_nl_add_endpoint "${@}"
}
pm_nl_del_endpoint()
{
- local ns=$1
- local id=$2
- local addr=$3
-
- if [ $ip_mptcp -eq 1 ]; then
- [ $id -ne 0 ] && addr=''
- ip -n $ns mptcp endpoint delete id $id $addr
- else
- ip netns exec $ns ./pm_nl_ctl del $id $addr
- fi
+ mptcp_lib_pm_nl_del_endpoint "${@}"
}
pm_nl_flush_endpoint()
{
- local ns=$1
-
- if [ $ip_mptcp -eq 1 ]; then
- ip -n $ns mptcp endpoint flush
- else
- ip netns exec $ns ./pm_nl_ctl flush
- fi
+ mptcp_lib_pm_nl_flush_endpoint "${@}"
}
pm_nl_show_endpoints()
{
- local ns=$1
-
- if [ $ip_mptcp -eq 1 ]; then
- ip -n $ns mptcp endpoint show
- else
- ip netns exec $ns ./pm_nl_ctl dump
- fi
+ mptcp_lib_pm_nl_show_endpoints "${@}"
}
pm_nl_change_endpoint()
{
- local ns=$1
- local id=$2
- local flags=$3
-
- if [ $ip_mptcp -eq 1 ]; then
- ip -n $ns mptcp endpoint change id $id ${flags//","/" "}
- else
- ip netns exec $ns ./pm_nl_ctl set id $id flags $flags
- fi
+ mptcp_lib_pm_nl_change_endpoint "${@}"
}
pm_nl_check_endpoint()
{
- local line expected_line
local msg="$1"
local ns=$2
local addr=$3
- local _flags=""
- local flags
- local _port
- local port
- local dev
- local _id
- local id
+ local flags dev id port
print_check "${msg}"
shift 3
while [ -n "$1" ]; do
- if [ $1 = "flags" ]; then
- _flags=$2
- [ -n "$_flags" ]; flags="flags $_flags"
- shift
- elif [ $1 = "dev" ]; then
- [ -n "$2" ]; dev="dev $2"
+ case "${1}" in
+ "flags" | "dev" | "id" | "port")
+ eval "${1}"="${2}"
shift
- elif [ $1 = "id" ]; then
- _id=$2
- [ -n "$_id" ]; id="id $_id"
- shift
- elif [ $1 = "port" ]; then
- _port=$2
- [ -n "$_port" ]; port=" port $_port"
- shift
- fi
+ ;;
+ *)
+ ;;
+ esac
shift
done
- if [ -z "$id" ]; then
+ if [ -z "${id}" ]; then
test_fail "bad test - missing endpoint id"
return
fi
- if [ $ip_mptcp -eq 1 ]; then
- # get line and trim trailing whitespace
- line=$(ip -n $ns mptcp endpoint show $id)
- line="${line% }"
- # the dump order is: address id flags port dev
- [ -n "$addr" ] && expected_line="$addr"
- expected_line+=" $id"
- [ -n "$_flags" ] && expected_line+=" ${_flags//","/" "}"
- [ -n "$dev" ] && expected_line+=" $dev"
- [ -n "$port" ] && expected_line+=" $port"
- else
- line=$(ip netns exec $ns ./pm_nl_ctl get $_id)
- # the dump order is: id flags dev address port
- expected_line="$id"
- [ -n "$flags" ] && expected_line+=" $flags"
- [ -n "$dev" ] && expected_line+=" $dev"
- [ -n "$addr" ] && expected_line+=" $addr"
- [ -n "$_port" ] && expected_line+=" $_port"
- fi
- if [ "$line" = "$expected_line" ]; then
- print_ok
- else
- fail_test "expected '$expected_line' found '$line'"
- fi
+ check_output "mptcp_lib_pm_nl_get_endpoint ${ns} ${id}" \
+ "$(mptcp_lib_pm_nl_format_endpoints \
+ "${id},${addr},${flags//","/" "},${dev},${port}")"
}
pm_nl_set_endpoint()
@@ -3711,7 +3602,7 @@ while getopts "${all_tests_args}cCih" opt; do
checksum=true
;;
i)
- ip_mptcp=1
+ mptcp_lib_set_ip_mptcp
;;
h)
usage
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index d529b4b37a..6ffa9b7a32 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -21,8 +21,10 @@ declare -rx MPTCP_LIB_AF_INET6=10
MPTCP_LIB_SUBTESTS=()
MPTCP_LIB_SUBTESTS_DUPLICATED=0
+MPTCP_LIB_SUBTEST_FLAKY=0
MPTCP_LIB_TEST_COUNTER=0
MPTCP_LIB_TEST_FORMAT="%02u %-50s"
+MPTCP_LIB_IP_MPTCP=0
# only if supported (or forced) and not disabled, see no-color.org
if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } &&
@@ -40,6 +42,16 @@ else
readonly MPTCP_LIB_COLOR_RESET=
fi
+# SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY env var can be set not to ignore errors
+# from subtests marked as flaky
+mptcp_lib_override_flaky() {
+ [ "${SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY:-}" = 1 ]
+}
+
+mptcp_lib_subtest_is_flaky() {
+ [ "${MPTCP_LIB_SUBTEST_FLAKY}" = 1 ] && ! mptcp_lib_override_flaky
+}
+
# $1: color, $2: text
mptcp_lib_print_color() {
echo -e "${MPTCP_LIB_START_PRINT:-}${*}${MPTCP_LIB_COLOR_RESET}"
@@ -71,7 +83,16 @@ mptcp_lib_pr_skip() {
}
mptcp_lib_pr_fail() {
- mptcp_lib_print_err "[FAIL]${1:+ ${*}}"
+ local title cmt
+
+ if mptcp_lib_subtest_is_flaky; then
+ title="IGNO"
+ cmt=" (flaky)"
+ else
+ title="FAIL"
+ fi
+
+ mptcp_lib_print_err "[${title}]${cmt}${1:+ ${*}}"
}
mptcp_lib_pr_info() {
@@ -207,7 +228,13 @@ mptcp_lib_result_pass() {
# $1: test name
mptcp_lib_result_fail() {
- __mptcp_lib_result_add "not ok" "${1}"
+ if mptcp_lib_subtest_is_flaky; then
+ # It might sound better to use 'not ok # TODO' or 'ok # SKIP',
+ # but some CIs don't understand 'TODO' and treat SKIP as errors.
+ __mptcp_lib_result_add "ok" "${1} # IGNORE Flaky"
+ else
+ __mptcp_lib_result_add "not ok" "${1}"
+ fi
}
# $1: test name
@@ -384,6 +411,12 @@ mptcp_lib_check_tools() {
exit ${KSFT_SKIP}
fi
;;
+ "tc")
+ if ! tc -help &> /dev/null; then
+ mptcp_lib_pr_skip "Could not run test without tc tool"
+ exit ${KSFT_SKIP}
+ fi
+ ;;
"ss")
if ! ss -h | grep -q MPTCP; then
mptcp_lib_pr_skip "ss tool does not support MPTCP"
@@ -505,3 +538,131 @@ mptcp_lib_verify_listener_events() {
mptcp_lib_check_expected "type" "family" "saddr" "sport" || rc="${?}"
return "${rc}"
}
+
+mptcp_lib_set_ip_mptcp() {
+ MPTCP_LIB_IP_MPTCP=1
+}
+
+mptcp_lib_is_ip_mptcp() {
+ [ "${MPTCP_LIB_IP_MPTCP}" = "1" ]
+}
+
+# format: <id>,<ip>,<flags>,<dev>
+mptcp_lib_pm_nl_format_endpoints() {
+ local entry id ip flags dev port
+
+ for entry in "${@}"; do
+ IFS=, read -r id ip flags dev port <<< "${entry}"
+ if mptcp_lib_is_ip_mptcp; then
+ echo -n "${ip}"
+ [ -n "${port}" ] && echo -n " port ${port}"
+ echo -n " id ${id}"
+ [ -n "${flags}" ] && echo -n " ${flags}"
+ [ -n "${dev}" ] && echo -n " dev ${dev}"
+ echo " " # always a space at the end
+ else
+ echo -n "id ${id}"
+ echo -n " flags ${flags//" "/","}"
+ [ -n "${dev}" ] && echo -n " dev ${dev}"
+ echo -n " ${ip}"
+ [ -n "${port}" ] && echo -n " ${port}"
+ echo ""
+ fi
+ done
+}
+
+mptcp_lib_pm_nl_get_endpoint() {
+ local ns=${1}
+ local id=${2}
+
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns}" mptcp endpoint show id "${id}"
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl get "${id}"
+ fi
+}
+
+mptcp_lib_pm_nl_set_limits() {
+ local ns=${1}
+ local addrs=${2}
+ local subflows=${3}
+
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns}" mptcp limits set add_addr_accepted "${addrs}" subflows "${subflows}"
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl limits "${addrs}" "${subflows}"
+ fi
+}
+
+mptcp_lib_pm_nl_add_endpoint() {
+ local ns=${1}
+ local addr=${2}
+ local flags dev id port
+ local nr=2
+
+ local p
+ for p in "${@}"; do
+ case "${p}" in
+ "flags" | "dev" | "id" | "port")
+ eval "${p}"=\$"${nr}"
+ ;;
+ esac
+
+ nr=$((nr + 1))
+ done
+
+ if mptcp_lib_is_ip_mptcp; then
+ # shellcheck disable=SC2086 # blanks in flags, no double quote
+ ip -n "${ns}" mptcp endpoint add "${addr}" ${flags//","/" "} \
+ ${dev:+dev "${dev}"} ${id:+id "${id}"} ${port:+port "${port}"}
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl add "${addr}" ${flags:+flags "${flags}"} \
+ ${dev:+dev "${dev}"} ${id:+id "${id}"} ${port:+port "${port}"}
+ fi
+}
+
+mptcp_lib_pm_nl_del_endpoint() {
+ local ns=${1}
+ local id=${2}
+ local addr=${3}
+
+ if mptcp_lib_is_ip_mptcp; then
+ [ "${id}" -ne 0 ] && addr=''
+ ip -n "${ns}" mptcp endpoint delete id "${id}" ${addr:+"${addr}"}
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl del "${id}" "${addr}"
+ fi
+}
+
+mptcp_lib_pm_nl_flush_endpoint() {
+ local ns=${1}
+
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns}" mptcp endpoint flush
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl flush
+ fi
+}
+
+mptcp_lib_pm_nl_show_endpoints() {
+ local ns=${1}
+
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns}" mptcp endpoint show
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl dump
+ fi
+}
+
+mptcp_lib_pm_nl_change_endpoint() {
+ local ns=${1}
+ local id=${2}
+ local flags=${3}
+
+ if mptcp_lib_is_ip_mptcp; then
+ # shellcheck disable=SC2086 # blanks in flags, no double quote
+ ip -n "${ns}" mptcp endpoint change id "${id}" ${flags//","/" "}
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl set id "${id}" flags "${flags}"
+ fi
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index e2d70c1878..68899a303a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -22,6 +22,28 @@ ns1=""
ns2=""
ns_sbox=""
+usage() {
+ echo "Usage: $0 [ -i ] [ -h ]"
+ echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'"
+ echo -e "\t-h: help"
+}
+
+while getopts "hi" option;do
+ case "$option" in
+ "h")
+ usage "$0"
+ exit ${KSFT_PASS}
+ ;;
+ "i")
+ mptcp_lib_set_ip_mptcp
+ ;;
+ "?")
+ usage "$0"
+ exit ${KSFT_FAIL}
+ ;;
+ esac
+done
+
add_mark_rules()
{
local ns=$1
@@ -58,15 +80,15 @@ init()
# let $ns2 reach any $ns1 address from any interface
ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.$i.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add dead:beef:$i::1 flags signal
+ mptcp_lib_pm_nl_add_endpoint "${ns1}" "10.0.${i}.1" flags signal
+ mptcp_lib_pm_nl_add_endpoint "${ns1}" "dead:beef:${i}::1" flags signal
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.$i.2 flags signal
- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:$i::2 flags signal
+ mptcp_lib_pm_nl_add_endpoint "${ns2}" "10.0.${i}.2" flags signal
+ mptcp_lib_pm_nl_add_endpoint "${ns2}" "dead:beef:${i}::2" flags signal
done
- ip netns exec $ns1 ./pm_nl_ctl limits 8 8
- ip netns exec $ns2 ./pm_nl_ctl limits 8 8
+ mptcp_lib_pm_nl_set_limits "${ns1}" 8 8
+ mptcp_lib_pm_nl_set_limits "${ns2}" 8 8
add_mark_rules $ns1 1
add_mark_rules $ns2 2
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 6ab8c5d363..2757378b1b 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -1,28 +1,28 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Double quotes to prevent globbing and word splitting is recommended in new
-# code but we accept it, especially because there were too many before having
-# address all other issues detected by shellcheck.
-#shellcheck disable=SC2086
-
. "$(dirname "${0}")/mptcp_lib.sh"
ret=0
usage() {
- echo "Usage: $0 [ -h ]"
+ echo "Usage: $0 [ -i ] [ -h ]"
+ echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'"
+ echo -e "\t-h: help"
}
-optstring=h
+optstring=hi
while getopts "$optstring" option;do
case "$option" in
"h")
- usage $0
+ usage "$0"
exit ${KSFT_PASS}
;;
+ "i")
+ mptcp_lib_set_ip_mptcp
+ ;;
"?")
- usage $0
+ usage "$0"
exit ${KSFT_FAIL}
;;
esac
@@ -35,7 +35,7 @@ err=$(mktemp)
#shellcheck disable=SC2317
cleanup()
{
- rm -f $err
+ rm -f "${err}"
mptcp_lib_ns_exit "${ns1}"
}
@@ -46,6 +46,76 @@ trap cleanup EXIT
mptcp_lib_ns_init ns1
+format_limits() {
+ local accept="${1}"
+ local subflows="${2}"
+
+ if mptcp_lib_is_ip_mptcp; then
+ # with a space at the end
+ printf "add_addr_accepted %d subflows %d \n" "${accept}" "${subflows}"
+ else
+ printf "accept %d\nsubflows %d\n" "${accept}" "${subflows}"
+ fi
+}
+
+get_limits() {
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns1}" mptcp limits
+ else
+ ip netns exec "${ns1}" ./pm_nl_ctl limits
+ fi
+}
+
+format_endpoints() {
+ mptcp_lib_pm_nl_format_endpoints "${@}"
+}
+
+get_endpoint() {
+ # shellcheck disable=SC2317 # invoked indirectly
+ mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}"
+}
+
+change_address() {
+ local addr=${1}
+ local flags=${2}
+
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns1}" mptcp endpoint change "${addr}" "${flags}"
+ else
+ ip netns exec "${ns1}" ./pm_nl_ctl set "${addr}" flags "${flags}"
+ fi
+}
+
+set_limits()
+{
+ mptcp_lib_pm_nl_set_limits "${ns1}" "${@}"
+}
+
+add_endpoint()
+{
+ mptcp_lib_pm_nl_add_endpoint "${ns1}" "${@}"
+}
+
+del_endpoint()
+{
+ mptcp_lib_pm_nl_del_endpoint "${ns1}" "${@}"
+}
+
+flush_endpoint()
+{
+ mptcp_lib_pm_nl_flush_endpoint "${ns1}"
+}
+
+show_endpoints()
+{
+ mptcp_lib_pm_nl_show_endpoints "${ns1}"
+}
+
+change_endpoint()
+{
+ mptcp_lib_pm_nl_change_endpoint "${ns1}" "${@}"
+}
+
check()
{
local cmd="$1"
@@ -67,125 +137,126 @@ check()
fi
}
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list"
+check "show_endpoints" "" "defaults addr list"
-default_limits="$(ip netns exec $ns1 ./pm_nl_ctl limits)"
+default_limits="$(get_limits)"
if mptcp_lib_expect_all_features; then
- check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
-subflows 2" "defaults limits"
+ check "get_limits" "$(format_limits 0 2)" "defaults limits"
fi
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 flags signal,backup
-check "ip netns exec $ns1 ./pm_nl_ctl get 1" "id 1 flags 10.0.1.1" "simple add/get addr"
+add_endpoint 10.0.1.1
+add_endpoint 10.0.1.2 flags subflow dev lo
+add_endpoint 10.0.1.3 flags signal,backup
+check "get_endpoint 1" "$(format_endpoints "1,10.0.1.1")" "simple add/get addr"
-check "ip netns exec $ns1 ./pm_nl_ctl dump" \
-"id 1 flags 10.0.1.1
-id 2 flags subflow dev lo 10.0.1.2
-id 3 flags signal,backup 10.0.1.3" "dump addrs"
+check "show_endpoints" \
+ "$(format_endpoints "1,10.0.1.1" \
+ "2,10.0.1.2,subflow,lo" \
+ "3,10.0.1.3,signal backup")" "dump addrs"
-ip netns exec $ns1 ./pm_nl_ctl del 2
-check "ip netns exec $ns1 ./pm_nl_ctl get 2" "" "simple del addr"
-check "ip netns exec $ns1 ./pm_nl_ctl dump" \
-"id 1 flags 10.0.1.1
-id 3 flags signal,backup 10.0.1.3" "dump addrs after del"
+del_endpoint 2
+check "get_endpoint 2" "" "simple del addr"
+check "show_endpoints" \
+ "$(format_endpoints "1,10.0.1.1" \
+ "3,10.0.1.3,signal backup")" "dump addrs after del"
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 2>/dev/null
-check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr"
+add_endpoint 10.0.1.3 2>/dev/null
+check "get_endpoint 4" "" "duplicate addr"
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 flags signal
-check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment"
+add_endpoint 10.0.1.4 flags signal
+check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment"
for i in $(seq 5 9); do
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1
+ add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1
done
-check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit"
-check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit"
+check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit"
+check "get_endpoint 10" "" "above hard addr limit"
-ip netns exec $ns1 ./pm_nl_ctl del 9
+del_endpoint 9
for i in $(seq 10 255); do
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i
- ip netns exec $ns1 ./pm_nl_ctl del $i
+ add_endpoint 10.0.0.9 id "${i}"
+ del_endpoint "${i}"
done
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1
-id 3 flags signal,backup 10.0.1.3
-id 4 flags signal 10.0.1.4
-id 5 flags signal 10.0.1.5
-id 6 flags signal 10.0.1.6
-id 7 flags signal 10.0.1.7
-id 8 flags signal 10.0.1.8" "id limit"
-
-ip netns exec $ns1 ./pm_nl_ctl flush
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs"
-
-ip netns exec $ns1 ./pm_nl_ctl limits 9 1 2>/dev/null
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "rcv addrs above hard limit"
-
-ip netns exec $ns1 ./pm_nl_ctl limits 1 9 2>/dev/null
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "subflows above hard limit"
-
-ip netns exec $ns1 ./pm_nl_ctl limits 8 8
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8
-subflows 8" "set limits"
-
-ip netns exec $ns1 ./pm_nl_ctl flush
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 id 100
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.5 id 254
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.6
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.7
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.8
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1
-id 2 flags 10.0.1.2
-id 3 flags 10.0.1.7
-id 4 flags 10.0.1.8
-id 100 flags 10.0.1.3
-id 101 flags 10.0.1.4
-id 254 flags 10.0.1.5
-id 255 flags 10.0.1.6" "set ids"
-
-ip netns exec $ns1 ./pm_nl_ctl flush
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.1
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.2 id 254
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.3
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.4
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.5 id 253
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.6
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.7
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.8
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.0.1
-id 2 flags 10.0.0.4
-id 3 flags 10.0.0.6
-id 4 flags 10.0.0.7
-id 5 flags 10.0.0.8
-id 253 flags 10.0.0.5
-id 254 flags 10.0.0.2
-id 255 flags 10.0.0.3" "wrap-around ids"
-
-ip netns exec $ns1 ./pm_nl_ctl flush
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags subflow
-ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags backup
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
-subflow,backup 10.0.1.1" "set flags (backup)"
-ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
-subflow 10.0.1.1" " (nobackup)"
+check "show_endpoints" \
+ "$(format_endpoints "1,10.0.1.1" \
+ "3,10.0.1.3,signal backup" \
+ "4,10.0.1.4,signal" \
+ "5,10.0.1.5,signal" \
+ "6,10.0.1.6,signal" \
+ "7,10.0.1.7,signal" \
+ "8,10.0.1.8,signal")" "id limit"
+
+flush_endpoint
+check "show_endpoints" "" "flush addrs"
+
+set_limits 9 1 2>/dev/null
+check "get_limits" "${default_limits}" "rcv addrs above hard limit"
+
+set_limits 1 9 2>/dev/null
+check "get_limits" "${default_limits}" "subflows above hard limit"
+
+set_limits 8 8
+check "get_limits" "$(format_limits 8 8)" "set limits"
+
+flush_endpoint
+add_endpoint 10.0.1.1
+add_endpoint 10.0.1.2
+add_endpoint 10.0.1.3 id 100
+add_endpoint 10.0.1.4
+add_endpoint 10.0.1.5 id 254
+add_endpoint 10.0.1.6
+add_endpoint 10.0.1.7
+add_endpoint 10.0.1.8
+check "show_endpoints" \
+ "$(format_endpoints "1,10.0.1.1" \
+ "2,10.0.1.2" \
+ "3,10.0.1.7" \
+ "4,10.0.1.8" \
+ "100,10.0.1.3" \
+ "101,10.0.1.4" \
+ "254,10.0.1.5" \
+ "255,10.0.1.6")" "set ids"
+
+flush_endpoint
+add_endpoint 10.0.0.1
+add_endpoint 10.0.0.2 id 254
+add_endpoint 10.0.0.3
+add_endpoint 10.0.0.4
+add_endpoint 10.0.0.5 id 253
+add_endpoint 10.0.0.6
+add_endpoint 10.0.0.7
+add_endpoint 10.0.0.8
+check "show_endpoints" \
+ "$(format_endpoints "1,10.0.0.1" \
+ "2,10.0.0.4" \
+ "3,10.0.0.6" \
+ "4,10.0.0.7" \
+ "5,10.0.0.8" \
+ "253,10.0.0.5" \
+ "254,10.0.0.2" \
+ "255,10.0.0.3")" "wrap-around ids"
+
+flush_endpoint
+add_endpoint 10.0.1.1 flags subflow
+change_address 10.0.1.1 backup
+check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow backup")" \
+ "set flags (backup)"
+change_address 10.0.1.1 nobackup
+check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow")" \
+ " (nobackup)"
# fullmesh support has been added later
-ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh 2>/dev/null
-if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" ||
+change_endpoint 1 fullmesh 2>/dev/null
+if show_endpoints | grep -q "fullmesh" ||
mptcp_lib_expect_all_features; then
- check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
-subflow,fullmesh 10.0.1.1" " (fullmesh)"
- ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh
- check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
-subflow 10.0.1.1" " (nofullmesh)"
- ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh
- check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
-subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)"
+ check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow fullmesh")" \
+ " (fullmesh)"
+ change_endpoint 1 nofullmesh
+ check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow")" \
+ " (nofullmesh)"
+ change_endpoint 1 backup,fullmesh
+ check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow backup fullmesh")" \
+ " (backup,fullmesh)"
else
for st in fullmesh nofullmesh backup,fullmesh; do
st=" (${st})"
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 7322e1e4e5..f74e1c3c12 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -27,10 +27,11 @@ capout=""
size=0
usage() {
- echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
+ echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]"
echo -e "\t-b: bail out after first error, otherwise runs al testcases"
echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
echo -e "\t-d: debug this script"
+ echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'"
}
# This function is used in the cleanup trap
@@ -45,7 +46,7 @@ cleanup()
}
mptcp_lib_check_mptcp
-mptcp_lib_check_tools ip
+mptcp_lib_check_tools ip tc
# "$ns1" ns2 ns3
# ns1eth1 ns2eth1 ns2eth3 ns3eth1
@@ -85,8 +86,8 @@ setup()
ip -net "$ns1" route add default via 10.0.2.2 metric 101
ip -net "$ns1" route add default via dead:beef:2::2 metric 101
- ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
- ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
+ mptcp_lib_pm_nl_set_limits "${ns1}" 1 1
+ mptcp_lib_pm_nl_add_endpoint "${ns1}" 10.0.2.1 dev ns1eth2 flags subflow
ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
@@ -108,7 +109,7 @@ setup()
ip -net "$ns3" route add default via 10.0.3.2
ip -net "$ns3" route add default via dead:beef:3::2
- ip netns exec "$ns3" ./pm_nl_ctl limits 1 1
+ mptcp_lib_pm_nl_set_limits "${ns3}" 1 1
# debug build can slow down measurably the test program
# we use quite tight time limit on the run-time, to ensure
@@ -259,7 +260,7 @@ run_test()
fi
}
-while getopts "bcdh" option;do
+while getopts "bcdhi" option;do
case "$option" in
"h")
usage $0
@@ -274,6 +275,9 @@ while getopts "bcdh" option;do
"d")
set -x
;;
+ "i")
+ mptcp_lib_set_ip_mptcp
+ ;;
"?")
usage $0
exit ${KSFT_FAIL}
diff --git a/tools/testing/selftests/net/nat6to4.c b/tools/testing/selftests/net/nat6to4.bpf.c
index ac54c36b25..ac54c36b25 100644
--- a/tools/testing/selftests/net/nat6to4.c
+++ b/tools/testing/selftests/net/nat6to4.bpf.c
diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore
index c2229b3e40..0a64d6d0e2 100644
--- a/tools/testing/selftests/netfilter/.gitignore
+++ b/tools/testing/selftests/net/netfilter/.gitignore
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
-nf-queue
-connect_close
audit_logread
+connect_close
conntrack_dump_flush
sctp_collision
+nf_queue
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
new file mode 100644
index 0000000000..47945b2b3f
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+HOSTPKG_CONFIG := pkg-config
+MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
+MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
+
+TEST_PROGS := br_netfilter.sh bridge_brouter.sh
+TEST_PROGS += conntrack_icmp_related.sh
+TEST_PROGS += conntrack_ipip_mtu.sh
+TEST_PROGS += conntrack_tcp_unreplied.sh
+TEST_PROGS += conntrack_sctp_collision.sh
+TEST_PROGS += conntrack_vrf.sh
+TEST_PROGS += ipvs.sh
+TEST_PROGS += nf_conntrack_packetdrill.sh
+TEST_PROGS += nf_nat_edemux.sh
+TEST_PROGS += nft_audit.sh
+TEST_PROGS += nft_concat_range.sh
+TEST_PROGS += nft_conntrack_helper.sh
+TEST_PROGS += nft_fib.sh
+TEST_PROGS += nft_flowtable.sh
+TEST_PROGS += nft_meta.sh
+TEST_PROGS += nft_nat.sh
+TEST_PROGS += nft_nat_zones.sh
+TEST_PROGS += nft_queue.sh
+TEST_PROGS += nft_synproxy.sh
+TEST_PROGS += nft_zones_many.sh
+TEST_PROGS += rpath.sh
+TEST_PROGS += xt_string.sh
+
+TEST_PROGS_EXTENDED = nft_concat_range_perf.sh
+
+TEST_GEN_PROGS = conntrack_dump_flush
+
+TEST_GEN_FILES = audit_logread
+TEST_GEN_FILES += connect_close nf_queue
+TEST_GEN_FILES += sctp_collision
+
+include ../../lib.mk
+
+$(OUTPUT)/nf_queue: CFLAGS += $(MNL_CFLAGS)
+$(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS)
+
+$(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS)
+$(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS)
+
+TEST_FILES := lib.sh
+TEST_FILES += packetdrill
+
+TEST_INCLUDES := \
+ ../lib.sh
diff --git a/tools/testing/selftests/netfilter/audit_logread.c b/tools/testing/selftests/net/netfilter/audit_logread.c
index a0a880fc2d..a0a880fc2d 100644
--- a/tools/testing/selftests/netfilter/audit_logread.c
+++ b/tools/testing/selftests/net/netfilter/audit_logread.c
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh
new file mode 100755
index 0000000000..c28379a965
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test for legacy br_netfilter module combined with connection tracking,
+# a combination that doesn't really work.
+# Multicast/broadcast packets race for hash table insertion.
+
+# eth0 br0 eth0
+# setup is: ns1 <->,ns0 <-> ns3
+# ns2 <-' `'-> ns4
+
+source lib.sh
+
+checktool "nft --version" "run test without nft tool"
+
+cleanup() {
+ cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+setup_ns ns0 ns1 ns2 ns3 ns4
+
+ret=0
+
+do_ping()
+{
+ fromns="$1"
+ dstip="$2"
+
+ if ! ip netns exec "$fromns" ping -c 1 -q "$dstip" > /dev/null; then
+ echo "ERROR: ping from $fromns to $dstip"
+ ip netns exec "$ns0" nft list ruleset
+ ret=1
+ fi
+}
+
+bcast_ping()
+{
+ fromns="$1"
+ dstip="$2"
+
+ local packets=500
+
+ [ "$KSFT_MACHINE_SLOW" = yes ] && packets=100
+
+ for i in $(seq 1 $packets); do
+ if ! ip netns exec "$fromns" ping -q -f -b -c 1 -q "$dstip" > /dev/null 2>&1; then
+ echo "ERROR: ping -b from $fromns to $dstip"
+ ip netns exec "$ns0" nft list ruleset
+ ret=1
+ break
+ fi
+ done
+}
+
+ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0
+ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0
+
+if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then
+ echo "SKIP: Can't create veth device"
+ exit $ksft_skip
+fi
+
+ip link add veth2 netns "$ns0" type veth peer name eth0 netns "$ns2"
+ip link add veth3 netns "$ns0" type veth peer name eth0 netns "$ns3"
+ip link add veth4 netns "$ns0" type veth peer name eth0 netns "$ns4"
+
+for i in $(seq 1 4); do
+ ip -net "$ns0" link set "veth$i" up
+done
+
+if ! ip -net "$ns0" link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1; then
+ echo "SKIP: Can't create bridge br0"
+ exit $ksft_skip
+fi
+
+# make veth0,1,2 part of bridge.
+for i in $(seq 1 3); do
+ ip -net "$ns0" link set "veth$i" master br0
+done
+
+# add a macvlan on top of the bridge.
+MACVLAN_ADDR=ba:f3:13:37:42:23
+ip -net "$ns0" link add link br0 name macvlan0 type macvlan mode private
+ip -net "$ns0" link set macvlan0 address ${MACVLAN_ADDR}
+ip -net "$ns0" link set macvlan0 up
+ip -net "$ns0" addr add 10.23.0.1/24 dev macvlan0
+
+# add a macvlan on top of veth4.
+MACVLAN_ADDR=ba:f3:13:37:42:24
+ip -net "$ns0" link add link veth4 name macvlan4 type macvlan mode passthru
+ip -net "$ns0" link set macvlan4 address ${MACVLAN_ADDR}
+ip -net "$ns0" link set macvlan4 up
+
+# make the macvlan part of the bridge.
+# veth4 is not a bridge port, only the macvlan on top of it.
+ip -net "$ns0" link set macvlan4 master br0
+
+ip -net "$ns0" link set br0 up
+ip -net "$ns0" addr add 10.0.0.1/24 dev br0
+
+modprobe -q br_netfilter
+if ! ip netns exec "$ns0" sysctl -q net.bridge.bridge-nf-call-iptables=1; then
+ echo "SKIP: bridge netfilter not available"
+ ret=$ksft_skip
+fi
+
+# for testing, so namespaces will reply to ping -b probes.
+ip netns exec "$ns0" sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0
+
+# enable conntrack in ns0 and drop broadcast packets in forward to
+# avoid them from getting confirmed in the postrouting hook before
+# the cloned skb is passed up the stack.
+ip netns exec "$ns0" nft -f - <<EOF
+table ip filter {
+ chain input {
+ type filter hook input priority 1; policy accept
+ iifname br0 counter
+ ct state new accept
+ }
+}
+
+table bridge filter {
+ chain forward {
+ type filter hook forward priority 0; policy accept
+ meta pkttype broadcast ip protocol icmp counter drop
+ }
+}
+EOF
+if [ "$?" -ne 0 ];then
+ echo "SKIP: could not add nftables ruleset"
+ exit $ksft_skip
+fi
+
+# place 1, 2 & 3 in same subnet, connected via ns0:br0.
+# ns4 is placed in same subnet as well, but its not
+# part of the bridge: the corresponding veth4 is not
+# part of the bridge, only its macvlan interface.
+for i in $(seq 1 4); do
+ eval ip -net \$ns"$i" link set eth0 up
+done
+for i in $(seq 1 2); do
+ eval ip -net \$ns"$i" addr add "10.0.0.1$i/24" dev eth0
+done
+
+ip -net "$ns3" addr add 10.23.0.13/24 dev eth0
+ip -net "$ns4" addr add 10.23.0.14/24 dev eth0
+
+# test basic connectivity
+do_ping "$ns1" 10.0.0.12
+do_ping "$ns3" 10.23.0.1
+do_ping "$ns4" 10.23.0.1
+
+bcast_ping "$ns1" 10.0.0.255
+
+# This should deliver broadcast to macvlan0, which is on top of ns0:br0.
+bcast_ping "$ns3" 10.23.0.255
+
+# same, this time via veth4:macvlan4.
+bcast_ping "$ns4" 10.23.0.255
+
+read t < /proc/sys/kernel/tainted
+if [ "$t" -eq 0 ];then
+ echo PASS: kernel not tainted
+else
+ echo ERROR: kernel is tainted
+ ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
new file mode 100755
index 0000000000..2549b65906
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+#
+# This test is for bridge 'brouting', i.e. make some packets being routed
+# rather than getting bridged even though they arrive on interface that is
+# part of a bridge.
+
+# eth0 br0 eth0
+# setup is: ns1 <-> nsbr <-> ns2
+
+source lib.sh
+
+if ! ebtables -V > /dev/null 2>&1;then
+ echo "SKIP: Could not run test without ebtables"
+ exit $ksft_skip
+fi
+
+cleanup() {
+ cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+setup_ns nsbr ns1 ns2
+
+ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0
+ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0
+if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then
+ echo "SKIP: Can't create veth device"
+ exit $ksft_skip
+fi
+ip link add veth1 netns "$nsbr" type veth peer name eth0 netns "$ns2"
+
+if ! ip -net "$nsbr" link add br0 type bridge; then
+ echo "SKIP: Can't create bridge br0"
+ exit $ksft_skip
+fi
+
+ip -net "$nsbr" link set veth0 up
+ip -net "$nsbr" link set veth1 up
+
+ip -net "$nsbr" link set veth0 master br0
+ip -net "$nsbr" link set veth1 master br0
+ip -net "$nsbr" link set br0 up
+ip -net "$nsbr" addr add 10.0.0.1/24 dev br0
+
+# place both in same subnet, ${ns1} and ${ns2} connected via ${nsbr}:br0
+ip -net "$ns1" link set eth0 up
+ip -net "$ns2" link set eth0 up
+ip -net "$ns1" addr add 10.0.0.11/24 dev eth0
+ip -net "$ns2" addr add 10.0.0.12/24 dev eth0
+
+test_ebtables_broute()
+{
+ # redirect is needed so the dstmac is rewritten to the bridge itself,
+ # ip stack won't process OTHERHOST (foreign unicast mac) packets.
+ if ! ip netns exec "$nsbr" ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP; then
+ echo "SKIP: Could not add ebtables broute redirect rule"
+ return $ksft_skip
+ fi
+
+ ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth0.forwarding=0
+
+ # ping net${ns1}, expected to not work (ip forwarding is off)
+ if ip netns exec "$ns1" ping -q -c 1 10.0.0.12 -W 0.5 > /dev/null 2>&1; then
+ echo "ERROR: ping works, should have failed" 1>&2
+ return 1
+ fi
+
+ # enable forwarding on both interfaces.
+ # neither needs an ip address, but at least the bridge needs
+ # an ip address in same network segment as ${ns1} and ${ns2} (${nsbr}
+ # needs to be able to determine route for to-be-forwarded packet).
+ ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth0.forwarding=1
+ ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth1.forwarding=1
+
+ if ! ip netns exec "$ns1" ping -q -c 1 10.0.0.12 > /dev/null; then
+ echo "ERROR: ping did not work, but it should (broute+forward)" 1>&2
+ return 1
+ fi
+
+ echo "PASS: ${ns1}/${ns2} connectivity with active broute rule"
+ ip netns exec "$nsbr" ebtables -t broute -F
+
+ # ping net${ns1}, expected to work (frames are bridged)
+ if ! ip netns exec "$ns1" ping -q -c 1 10.0.0.12 > /dev/null; then
+ echo "ERROR: ping did not work, but it should (bridged)" 1>&2
+ return 1
+ fi
+
+ ip netns exec "$nsbr" ebtables -t filter -A FORWARD -p ipv4 --ip-protocol icmp -j DROP
+
+ # ping net${ns1}, expected to not work (DROP in bridge forward)
+ if ip netns exec "$ns1" ping -q -c 1 10.0.0.12 -W 0.5 > /dev/null 2>&1; then
+ echo "ERROR: ping works, should have failed (icmp forward drop)" 1>&2
+ return 1
+ fi
+
+ # re-activate brouter
+ ip netns exec "$nsbr" ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP
+
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.0.11 > /dev/null; then
+ echo "ERROR: ping did not work, but it should (broute+forward 2)" 1>&2
+ return 1
+ fi
+
+ echo "PASS: ${ns1}/${ns2} connectivity with active broute rule and bridge forward drop"
+ return 0
+}
+
+# test basic connectivity
+if ! ip netns exec "$ns1" ping -c 1 -q 10.0.0.12 > /dev/null; then
+ echo "ERROR: Could not reach ${ns2} from ${ns1}" 1>&2
+ exit 1
+fi
+
+if ! ip netns exec "$ns2" ping -c 1 -q 10.0.0.11 > /dev/null; then
+ echo "ERROR: Could not reach ${ns1} from ${ns2}" 1>&2
+ exit 1
+fi
+
+test_ebtables_broute
+exit $?
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
new file mode 100644
index 0000000000..63ef80ef47
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/config
@@ -0,0 +1,89 @@
+CONFIG_AUDIT=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_BRIDGE=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_NETFILTER=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_CGROUP_BPF=y
+CONFIG_DUMMY=m
+CONFIG_INET_ESP=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP_SCTP=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_RR=m
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_MACVLAN=m
+CONFIG_NAMESPACES=y
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_IPIP=m
+CONFIG_NET_VRF=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NETFILTER_NETLINK_QUEUE=m
+CONFIG_NETFILTER_SYNPROXY=m
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XT_NAT=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_LOG_IPV4=m
+CONFIG_NF_LOG_IPV6=m
+CONFIG_NF_NAT=m
+CONFIG_NF_NAT_REDIRECT=y
+CONFIG_NF_NAT_MASQUERADE=y
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NF_FLOW_TABLE_INET=m
+CONFIG_NFT_BRIDGE_META=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_FIB=m
+CONFIG_NFT_FIB_INET=m
+CONFIG_NFT_FIB_IPV4=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NFT_FLOW_OFFLOAD=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_NUMGEN=m
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_QUOTA=m
+CONFIG_NFT_REDIR=m
+CONFIG_NFT_SYNPROXY=m
+CONFIG_VETH=m
+CONFIG_VLAN_8021Q=m
+CONFIG_XFRM_USER=m
+CONFIG_XFRM_STATISTICS=y
+CONFIG_NET_PKTGEN=m
+CONFIG_TUN=m
diff --git a/tools/testing/selftests/netfilter/connect_close.c b/tools/testing/selftests/net/netfilter/connect_close.c
index 1c3b0add54..1c3b0add54 100644
--- a/tools/testing/selftests/netfilter/connect_close.c
+++ b/tools/testing/selftests/net/netfilter/connect_close.c
diff --git a/tools/testing/selftests/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
index b11ea8ee67..bd9317bf5a 100644
--- a/tools/testing/selftests/netfilter/conntrack_dump_flush.c
+++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
@@ -10,7 +10,7 @@
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
-#include "../kselftest_harness.h"
+#include "../../kselftest_harness.h"
#define TEST_ZONE_ID 123
#define NF_CT_DEFAULT_ZONE_ID 0
@@ -313,13 +313,11 @@ FIXTURE_SETUP(conntrack_dump_flush)
self->sock = mnl_socket_open(NETLINK_NETFILTER);
if (!self->sock) {
perror("mnl_socket_open");
- exit(EXIT_FAILURE);
+ SKIP(return, "cannot open netlink_netfilter socket");
}
- if (mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID) < 0) {
- perror("mnl_socket_bind");
- exit(EXIT_FAILURE);
- }
+ ret = mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID);
+ EXPECT_EQ(ret, 0);
ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
if (ret < 0 && errno == EPERM)
diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh
index 76645aaf2b..c63d840ead 100755
--- a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh
@@ -14,35 +14,32 @@
# check the icmp errors are propagated to the correct host as per
# nat of "established" icmp-echo "connection".
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
+source lib.sh
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
+if ! nft --version > /dev/null 2>&1;then
echo "SKIP: Could not run test without nft tool"
exit $ksft_skip
fi
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
cleanup() {
- for i in 1 2;do ip netns del nsclient$i;done
- for i in 1 2;do ip netns del nsrouter$i;done
+ cleanup_all_ns
}
trap cleanup EXIT
-ipv4() {
- echo -n 192.168.$1.2
-}
+setup_ns nsclient1 nsclient2 nsrouter1 nsrouter2
+
+ret=0
+
+add_addr()
+{
+ ns=$1
+ dev=$2
+ i=$3
-ipv6 () {
- echo -n dead:$1::2
+ ip -net "$ns" link set "$dev" up
+ ip -net "$ns" addr add "192.168.$i.2/24" dev "$dev"
+ ip -net "$ns" addr add "dead:$i::2/64" dev "$dev" nodad
}
check_counter()
@@ -52,10 +49,9 @@ check_counter()
expect=$3
local lret=0
- cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns" nft list counter inet filter "$name" | grep -q "$expect"; then
echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2
- ip netns exec $ns nft list counter inet filter "$name" 1>&2
+ ip netns exec "$ns" nft list counter inet filter "$name" 1>&2
lret=1
fi
@@ -65,9 +61,8 @@ check_counter()
check_unknown()
{
expect="packets 0 bytes 0"
- for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
- check_counter $n "unknown" "$expect"
- if [ $? -ne 0 ] ;then
+ for n in ${nsclient1} ${nsclient2} ${nsrouter1} ${nsrouter2}; do
+ if ! check_counter "$n" "unknown" "$expect"; then
return 1
fi
done
@@ -75,61 +70,48 @@ check_unknown()
return 0
}
-for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
- ip netns add $n
- ip -net $n link set lo up
-done
-
-DEV=veth0
-ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1
DEV=veth0
-ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2
+ip link add "$DEV" netns "$nsclient1" type veth peer name eth1 netns "$nsrouter1"
+ip link add "$DEV" netns "$nsclient2" type veth peer name eth1 netns "$nsrouter2"
+ip link add "$DEV" netns "$nsrouter1" type veth peer name eth2 netns "$nsrouter2"
-DEV=veth0
-ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2
+add_addr "$nsclient1" $DEV 1
+add_addr "$nsclient2" $DEV 2
-DEV=veth0
-for i in 1 2; do
- ip -net nsclient$i link set $DEV up
- ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV
- ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV
-done
-
-ip -net nsrouter1 link set eth1 up
-ip -net nsrouter1 link set veth0 up
+ip -net "$nsrouter1" link set eth1 up
+ip -net "$nsrouter1" link set $DEV up
-ip -net nsrouter2 link set eth1 up
-ip -net nsrouter2 link set eth2 up
+ip -net "$nsrouter2" link set eth1 mtu 1280 up
+ip -net "$nsrouter2" link set eth2 up
-ip -net nsclient1 route add default via 192.168.1.1
-ip -net nsclient1 -6 route add default via dead:1::1
+ip -net "$nsclient1" route add default via 192.168.1.1
+ip -net "$nsclient1" -6 route add default via dead:1::1
-ip -net nsclient2 route add default via 192.168.2.1
-ip -net nsclient2 route add default via dead:2::1
+ip -net "$nsclient2" route add default via 192.168.2.1
+ip -net "$nsclient2" route add default via dead:2::1
+ip -net "$nsclient2" link set veth0 mtu 1280
-i=3
-ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1
-ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0
-ip -net nsrouter1 addr add dead:1::1/64 dev eth1
-ip -net nsrouter1 addr add dead:3::1/64 dev veth0
-ip -net nsrouter1 route add default via 192.168.3.10
-ip -net nsrouter1 -6 route add default via dead:3::10
+ip -net "$nsrouter1" addr add 192.168.1.1/24 dev eth1
+ip -net "$nsrouter1" addr add 192.168.3.1/24 dev veth0
+ip -net "$nsrouter1" addr add dead:1::1/64 dev eth1 nodad
+ip -net "$nsrouter1" addr add dead:3::1/64 dev veth0 nodad
+ip -net "$nsrouter1" route add default via 192.168.3.10
+ip -net "$nsrouter1" -6 route add default via dead:3::10
-ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1
-ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2
-ip -net nsrouter2 addr add dead:2::1/64 dev eth1
-ip -net nsrouter2 addr add dead:3::10/64 dev eth2
-ip -net nsrouter2 route add default via 192.168.3.1
-ip -net nsrouter2 route add default via dead:3::1
+ip -net "$nsrouter2" addr add 192.168.2.1/24 dev eth1
+ip -net "$nsrouter2" addr add 192.168.3.10/24 dev eth2
+ip -net "$nsrouter2" addr add dead:2::1/64 dev eth1 nodad
+ip -net "$nsrouter2" addr add dead:3::10/64 dev eth2 nodad
+ip -net "$nsrouter2" route add default via 192.168.3.1
+ip -net "$nsrouter2" route add default via dead:3::1
-sleep 2
for i in 4 6; do
- ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1
- ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1
+ ip netns exec "$nsrouter1" sysctl -q net.ipv$i.conf.all.forwarding=1
+ ip netns exec "$nsrouter2" sysctl -q net.ipv$i.conf.all.forwarding=1
done
-for netns in nsrouter1 nsrouter2; do
-ip netns exec $netns nft -f - <<EOF
+for netns in "$nsrouter1" "$nsrouter2"; do
+ip netns exec "$netns" nft -f - <<EOF
table inet filter {
counter unknown { }
counter related { }
@@ -144,7 +126,7 @@ table inet filter {
EOF
done
-ip netns exec nsclient1 nft -f - <<EOF
+ip netns exec "$nsclient1" nft -f - <<EOF
table inet filter {
counter unknown { }
counter related { }
@@ -164,7 +146,7 @@ table inet filter {
}
EOF
-ip netns exec nsclient2 nft -f - <<EOF
+ip netns exec "$nsclient2" nft -f - <<EOF
table inet filter {
counter unknown { }
counter new { }
@@ -189,11 +171,10 @@ table inet filter {
}
EOF
-
# make sure NAT core rewrites adress of icmp error if nat is used according to
# conntrack nat information (icmp error will be directed at nsrouter1 address,
# but it needs to be routed to nsclient1 address).
-ip netns exec nsrouter1 nft -f - <<EOF
+ip netns exec "$nsrouter1" nft -f - <<EOF
table ip nat {
chain postrouting {
type nat hook postrouting priority 0; policy accept;
@@ -208,44 +189,32 @@ table ip6 nat {
}
EOF
-ip netns exec nsrouter2 ip link set eth1 mtu 1280
-ip netns exec nsclient2 ip link set veth0 mtu 1280
-sleep 1
-
-ip netns exec nsclient1 ping -c 1 -s 1000 -q -M do 192.168.2.2 >/dev/null
-if [ $? -ne 0 ]; then
+if ! ip netns exec "$nsclient1" ping -c 1 -s 1000 -q -M "do" 192.168.2.2 >/dev/null; then
echo "ERROR: netns ip routing/connectivity broken" 1>&2
- cleanup
exit 1
fi
-ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null
-if [ $? -ne 0 ]; then
+if ! ip netns exec "$nsclient1" ping -c 1 -s 1000 -q dead:2::2 >/dev/null; then
echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2
- cleanup
exit 1
fi
-check_unknown
-if [ $? -ne 0 ]; then
+if ! check_unknown; then
ret=1
fi
expect="packets 0 bytes 0"
-for netns in nsrouter1 nsrouter2 nsclient1;do
- check_counter "$netns" "related" "$expect"
- if [ $? -ne 0 ]; then
+for netns in "$nsrouter1" "$nsrouter2" "$nsclient1";do
+ if ! check_counter "$netns" "related" "$expect"; then
ret=1
fi
done
expect="packets 2 bytes 2076"
-check_counter nsclient2 "new" "$expect"
-if [ $? -ne 0 ]; then
+if ! check_counter "$nsclient2" "new" "$expect"; then
ret=1
fi
-ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null
-if [ $? -eq 0 ]; then
+if ip netns exec "$nsclient1" ping -W 0.5 -q -c 1 -s 1300 -M "do" 192.168.2.2 > /dev/null; then
echo "ERROR: ping should have failed with PMTU too big error" 1>&2
ret=1
fi
@@ -253,30 +222,26 @@ fi
# nsrouter2 should have generated the icmp error, so
# related counter should be 0 (its in forward).
expect="packets 0 bytes 0"
-check_counter "nsrouter2" "related" "$expect"
-if [ $? -ne 0 ]; then
+if ! check_counter "$nsrouter2" "related" "$expect"; then
ret=1
fi
# but nsrouter1 should have seen it, same for nsclient1.
expect="packets 1 bytes 576"
-for netns in nsrouter1 nsclient1;do
- check_counter "$netns" "related" "$expect"
- if [ $? -ne 0 ]; then
+for netns in ${nsrouter1} ${nsclient1};do
+ if ! check_counter "$netns" "related" "$expect"; then
ret=1
fi
done
-ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null
-if [ $? -eq 0 ]; then
+if ip netns exec "${nsclient1}" ping6 -W 0.5 -c 1 -s 1300 dead:2::2 > /dev/null; then
echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2
ret=1
fi
expect="packets 2 bytes 1856"
-for netns in nsrouter1 nsclient1;do
- check_counter "$netns" "related" "$expect"
- if [ $? -ne 0 ]; then
+for netns in "${nsrouter1}" "${nsclient1}";do
+ if ! check_counter "$netns" "related" "$expect"; then
ret=1
fi
done
@@ -288,21 +253,19 @@ else
fi
# add 'bad' route, expect icmp REDIRECT to be generated
-ip netns exec nsclient1 ip route add 192.168.1.42 via 192.168.1.1
-ip netns exec nsclient1 ip route add dead:1::42 via dead:1::1
+ip netns exec "${nsclient1}" ip route add 192.168.1.42 via 192.168.1.1
+ip netns exec "${nsclient1}" ip route add dead:1::42 via dead:1::1
-ip netns exec "nsclient1" ping -q -c 2 192.168.1.42 > /dev/null
+ip netns exec "$nsclient1" ping -W 1 -q -i 0.5 -c 2 192.168.1.42 > /dev/null
expect="packets 1 bytes 112"
-check_counter nsclient1 "redir4" "$expect"
-if [ $? -ne 0 ];then
+if ! check_counter "$nsclient1" "redir4" "$expect"; then
ret=1
fi
-ip netns exec "nsclient1" ping -c 1 dead:1::42 > /dev/null
+ip netns exec "$nsclient1" ping -W 1 -c 1 dead:1::42 > /dev/null
expect="packets 1 bytes 192"
-check_counter nsclient1 "redir6" "$expect"
-if [ $? -ne 0 ];then
+if ! check_counter "$nsclient1" "redir6" "$expect"; then
ret=1
fi
diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh
index eb9553e498..9832a5d019 100755
--- a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh
@@ -1,8 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
# Conntrack needs to reassemble fragments in order to have complete
# packets for rule matching. Reassembly can lead to packet loss.
@@ -23,56 +22,44 @@ ksft_skip=4
# between Client A and Client B over WAN. Wanrouter has MTU 1400 set
# on its interfaces.
-rnd=$(mktemp -u XXXXXXXX)
rx=$(mktemp)
-r_a="ns-ra-$rnd"
-r_b="ns-rb-$rnd"
-r_w="ns-rw-$rnd"
-c_a="ns-ca-$rnd"
-c_b="ns-cb-$rnd"
-
-checktool (){
- if ! $1 > /dev/null 2>&1; then
- echo "SKIP: Could not $2"
- exit $ksft_skip
- fi
-}
-
checktool "iptables --version" "run test without iptables"
-checktool "ip -Version" "run test without ip tool"
-checktool "which socat" "run test without socat"
-checktool "ip netns add ${r_a}" "create net namespace"
+checktool "socat -h" "run test without socat"
-for n in ${r_b} ${r_w} ${c_a} ${c_b};do
- ip netns add ${n}
-done
+setup_ns r_a r_b r_w c_a c_b
cleanup() {
- for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do
- ip netns del ${n}
- done
- rm -f ${rx}
+ cleanup_all_ns
+ rm -f "$rx"
}
trap cleanup EXIT
+listener_ready()
+{
+ ns="$1"
+ port="$2"
+ ss -N "$ns" -lnu -o "sport = :$port" | grep -q "$port"
+}
+
test_path() {
msg="$1"
- ip netns exec ${c_b} socat -t 3 - udp4-listen:5000,reuseaddr > ${rx} < /dev/null &
+ ip netns exec "$c_b" socat -t 3 - udp4-listen:5000,reuseaddr > "$rx" < /dev/null &
+
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$c_b" 5000
- sleep 1
for i in 1 2 3; do
head -c1400 /dev/zero | tr "\000" "a" | \
- ip netns exec ${c_a} socat -t 1 -u STDIN UDP:192.168.20.2:5000
+ ip netns exec "$c_a" socat -t 1 -u STDIN UDP:192.168.20.2:5000
done
wait
- bytes=$(wc -c < ${rx})
+ bytes=$(wc -c < "$rx")
- if [ $bytes -eq 1400 ];then
+ if [ "$bytes" -eq 1400 ];then
echo "OK: PMTU $msg connection tracking"
else
echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400"
@@ -91,24 +78,24 @@ test_path() {
# 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter)
# No iptables rules at all.
-ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w}
-ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a}
+ip link add veth0 netns "$r_a" type veth peer name veth0 netns "$r_w"
+ip link add veth1 netns "$r_a" type veth peer name veth0 netns "$c_a"
l_addr="10.2.2.1"
r_addr="10.4.4.1"
-ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
+ip netns exec "$r_a" ip link add ipip0 type ipip local "$l_addr" remote "$r_addr" mode ipip || exit $ksft_skip
for dev in lo veth0 veth1 ipip0; do
- ip -net ${r_a} link set $dev up
+ ip -net "$r_a" link set "$dev" up
done
-ip -net ${r_a} addr add 10.2.2.1/24 dev veth0
-ip -net ${r_a} addr add 192.168.10.1/24 dev veth1
+ip -net "$r_a" addr add 10.2.2.1/24 dev veth0
+ip -net "$r_a" addr add 192.168.10.1/24 dev veth1
-ip -net ${r_a} route add 192.168.20.0/24 dev ipip0
-ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254
+ip -net "$r_a" route add 192.168.20.0/24 dev ipip0
+ip -net "$r_a" route add 10.4.4.0/24 via 10.2.2.254
-ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip netns exec "$r_a" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
# Detailed setup for Router B
# ---------------------------
@@ -121,49 +108,46 @@ ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
# 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter)
# No iptables rules at all.
-ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w}
-ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b}
+ip link add veth0 netns "$r_b" type veth peer name veth1 netns "$r_w"
+ip link add veth1 netns "$r_b" type veth peer name veth0 netns "$c_b"
l_addr="10.4.4.1"
r_addr="10.2.2.1"
-ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
+ip netns exec "$r_b" ip link add ipip0 type ipip local "${l_addr}" remote "${r_addr}" mode ipip || exit $ksft_skip
-for dev in lo veth0 veth1 ipip0; do
- ip -net ${r_b} link set $dev up
+for dev in veth0 veth1 ipip0; do
+ ip -net "$r_b" link set $dev up
done
-ip -net ${r_b} addr add 10.4.4.1/24 dev veth0
-ip -net ${r_b} addr add 192.168.20.1/24 dev veth1
+ip -net "$r_b" addr add 10.4.4.1/24 dev veth0
+ip -net "$r_b" addr add 192.168.20.1/24 dev veth1
-ip -net ${r_b} route add 192.168.10.0/24 dev ipip0
-ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254
-ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip -net "$r_b" route add 192.168.10.0/24 dev ipip0
+ip -net "$r_b" route add 10.2.2.0/24 via 10.4.4.254
+ip netns exec "$r_b" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
# Client A
-ip -net ${c_a} addr add 192.168.10.2/24 dev veth0
-ip -net ${c_a} link set dev lo up
-ip -net ${c_a} link set dev veth0 up
-ip -net ${c_a} route add default via 192.168.10.1
+ip -net "$c_a" addr add 192.168.10.2/24 dev veth0
+ip -net "$c_a" link set dev veth0 up
+ip -net "$c_a" route add default via 192.168.10.1
# Client A
-ip -net ${c_b} addr add 192.168.20.2/24 dev veth0
-ip -net ${c_b} link set dev veth0 up
-ip -net ${c_b} link set dev lo up
-ip -net ${c_b} route add default via 192.168.20.1
+ip -net "$c_b" addr add 192.168.20.2/24 dev veth0
+ip -net "$c_b" link set dev veth0 up
+ip -net "$c_b" route add default via 192.168.20.1
# Wan
-ip -net ${r_w} addr add 10.2.2.254/24 dev veth0
-ip -net ${r_w} addr add 10.4.4.254/24 dev veth1
+ip -net "$r_w" addr add 10.2.2.254/24 dev veth0
+ip -net "$r_w" addr add 10.4.4.254/24 dev veth1
-ip -net ${r_w} link set dev lo up
-ip -net ${r_w} link set dev veth0 up mtu 1400
-ip -net ${r_w} link set dev veth1 up mtu 1400
+ip -net "$r_w" link set dev veth0 up mtu 1400
+ip -net "$r_w" link set dev veth1 up mtu 1400
-ip -net ${r_a} link set dev veth0 mtu 1400
-ip -net ${r_b} link set dev veth0 mtu 1400
+ip -net "$r_a" link set dev veth0 mtu 1400
+ip -net "$r_b" link set dev veth0 mtu 1400
-ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip netns exec "$r_w" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
# Path MTU discovery
# ------------------
@@ -203,5 +187,5 @@ test_path "without"
#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is
#dropped on Router A before sending.
-ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW
+ip netns exec "$r_a" iptables -A FORWARD -m conntrack --ctstate NEW
test_path "with"
diff --git a/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh
new file mode 100755
index 0000000000..d860f7d974
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing For SCTP COLLISION SCENARIO as Below:
+#
+# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359]
+# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187]
+# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359]
+# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO]
+# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK]
+# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970]
+#
+# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS
+
+source lib.sh
+
+CLIENT_IP="198.51.200.1"
+CLIENT_PORT=1234
+
+SERVER_IP="198.51.100.1"
+SERVER_PORT=1234
+
+CLIENT_GW="198.51.200.2"
+SERVER_GW="198.51.100.2"
+
+# setup the topo
+setup() {
+ setup_ns CLIENT_NS SERVER_NS ROUTER_NS
+ ip -n "$SERVER_NS" link add link0 type veth peer name link1 netns "$ROUTER_NS"
+ ip -n "$CLIENT_NS" link add link3 type veth peer name link2 netns "$ROUTER_NS"
+
+ ip -n "$SERVER_NS" link set link0 up
+ ip -n "$SERVER_NS" addr add $SERVER_IP/24 dev link0
+ ip -n "$SERVER_NS" route add $CLIENT_IP dev link0 via $SERVER_GW
+
+ ip -n "$ROUTER_NS" link set link1 up
+ ip -n "$ROUTER_NS" link set link2 up
+ ip -n "$ROUTER_NS" addr add $SERVER_GW/24 dev link1
+ ip -n "$ROUTER_NS" addr add $CLIENT_GW/24 dev link2
+ ip net exec "$ROUTER_NS" sysctl -wq net.ipv4.ip_forward=1
+
+ ip -n "$CLIENT_NS" link set link3 up
+ ip -n "$CLIENT_NS" addr add $CLIENT_IP/24 dev link3
+ ip -n "$CLIENT_NS" route add $SERVER_IP dev link3 via $CLIENT_GW
+
+ # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with
+ # tc on $SERVER_NS side
+ tc -n "$SERVER_NS" qdisc add dev link0 root handle 1: htb r2q 64
+ tc -n "$SERVER_NS" class add dev link0 parent 1: classid 1:1 htb rate 100mbit
+ tc -n "$SERVER_NS" filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \
+ 0xff match u8 2 0xff at 32 flowid 1:1
+ if ! tc -n "$SERVER_NS" qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms; then
+ echo "SKIP: Cannot add netem qdisc"
+ exit $ksft_skip
+ fi
+
+ # simulate the ctstate check on OVS nf_conntrack
+ ip net exec "$ROUTER_NS" iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP
+ ip net exec "$ROUTER_NS" iptables -A INPUT -p sctp -j DROP
+
+ # use a smaller number for assoc's max_retrans to reproduce the issue
+ modprobe -q sctp
+ ip net exec "$CLIENT_NS" sysctl -wq net.sctp.association_max_retrans=3
+}
+
+cleanup() {
+ ip net exec "$CLIENT_NS" pkill sctp_collision >/dev/null 2>&1
+ ip net exec "$SERVER_NS" pkill sctp_collision >/dev/null 2>&1
+ cleanup_all_ns
+}
+
+do_test() {
+ ip net exec "$SERVER_NS" ./sctp_collision server \
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT &
+ ip net exec "$CLIENT_NS" ./sctp_collision client \
+ $CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT
+}
+
+# NOTE: one way to work around the issue is set a smaller hb_interval
+# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500
+
+# run the test case
+trap cleanup EXIT
+setup && \
+echo "Test for SCTP Collision in nf_conntrack:" && \
+do_test && echo "PASS!"
+exit $?
diff --git a/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh
new file mode 100755
index 0000000000..121ea93c01
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that UNREPLIED tcp conntrack will eventually timeout.
+#
+
+source lib.sh
+
+if ! nft --version > /dev/null 2>&1;then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+if ! conntrack --version > /dev/null 2>&1;then
+ echo "SKIP: Could not run test without conntrack tool"
+ exit $ksft_skip
+fi
+
+ret=0
+
+cleanup() {
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
+
+ cleanup_all_ns
+}
+
+ipv4() {
+ echo -n 192.168."$1".2
+}
+
+check_counter()
+{
+ ns=$1
+ name=$2
+ expect=$3
+ local lret=0
+
+ if ! ip netns exec "$ns2" nft list counter inet filter "$name" | grep -q "$expect"; then
+ echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2
+ ip netns exec "$ns2" nft list counter inet filter "$name" 1>&2
+ lret=1
+ fi
+
+ return $lret
+}
+
+trap cleanup EXIT
+
+# Create test namespaces
+setup_ns ns1 ns2
+
+# Connect the namespace to the host using a veth pair
+ip -net "$ns1" link add name veth1 type veth peer name veth2
+ip -net "$ns1" link set netns "$ns2" dev veth2
+
+ip -net "$ns1" link set up dev lo
+ip -net "$ns2" link set up dev lo
+ip -net "$ns1" link set up dev veth1
+ip -net "$ns2" link set up dev veth2
+
+ip -net "$ns2" addr add 10.11.11.2/24 dev veth2
+ip -net "$ns2" route add default via 10.11.11.1
+
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.veth2.forwarding=1
+
+# add a rule inside NS so we enable conntrack
+ip netns exec "$ns1" nft -f - <<EOF
+table inet filter {
+ chain input {
+ type filter hook input priority 0; policy accept;
+ ct state established accept
+ }
+}
+EOF
+
+ip -net "$ns1" addr add 10.11.11.1/24 dev veth1
+ip -net "$ns1" route add 10.99.99.99 via 10.11.11.2
+
+# Check connectivity works
+ip netns exec "$ns1" ping -q -c 2 10.11.11.2 >/dev/null || exit 1
+
+ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT &
+
+ip netns exec "$ns2" nft -f - <<EOF
+table inet filter {
+ counter connreq { }
+ counter redir { }
+ chain input {
+ type filter hook input priority 0; policy accept;
+ ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept
+ ct state new ct status dnat tcp dport 8080 counter name "redir" accept
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "ERROR: Could not load nft rules"
+ exit 1
+fi
+
+ip netns exec "$ns2" sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10
+
+echo "INFO: connect $ns1 -> $ns2 to the virtual ip"
+ip netns exec "$ns1" bash -c 'for i in $(seq 1 $BUSYWAIT_TIMEOUT) ; do
+ socat -u STDIN TCP:10.99.99.99:80 < /dev/null
+ sleep 0.1
+ done' &
+
+wait_for_attempt()
+{
+ count=$(ip netns exec "$ns2" conntrack -L -p tcp --dport 80 2>/dev/null | wc -l)
+ if [ "$count" -gt 0 ]; then
+ return 0
+ fi
+
+ return 1
+}
+
+# wait for conntrack to pick the new connection request up before loading
+# the nat redirect rule.
+if ! busywait "$BUSYWAIT_TIMEOUT" wait_for_attempt; then
+ echo "ERROR: $ns2 did not pick up tcp connection from peer"
+ exit 1
+fi
+
+ip netns exec "$ns2" nft -f - <<EOF
+table inet nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ ip daddr 10.99.99.99 tcp dport 80 redirect to :8080
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "ERROR: Could not load nat redirect"
+ exit 1
+fi
+
+wait_for_redirect()
+{
+ count=$(ip netns exec "$ns2" conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l)
+ if [ "$count" -gt 0 ]; then
+ return 0
+ fi
+
+ return 1
+}
+echo "INFO: NAT redirect added in ns $ns2, waiting for $BUSYWAIT_TIMEOUT ms for nat to take effect"
+
+busywait "$BUSYWAIT_TIMEOUT" wait_for_redirect
+ret=$?
+
+expect="packets 1 bytes 60"
+if ! check_counter "$ns2" "redir" "$expect"; then
+ ret=1
+fi
+
+if [ $ret -eq 0 ];then
+ echo "PASS: redirection counter has expected values"
+else
+ echo "ERROR: no tcp connection was redirected"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
index 8b5ea92345..073e8e62d3 100755
--- a/tools/testing/selftests/netfilter/conntrack_vrf.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# This script demonstrates interaction of conntrack and vrf.
# The vrf driver calls the netfilter hooks again, with oif/iif
@@ -28,84 +28,67 @@
# that was supposed to be fixed by the commit mentioned above to make sure
# that any fix to test case 1 won't break masquerade again.
-ksft_skip=4
+source lib.sh
IP0=172.30.30.1
IP1=172.30.30.2
PFXL=30
ret=0
-sfx=$(mktemp -u "XXXXXXXX")
-ns0="ns0-$sfx"
-ns1="ns1-$sfx"
-
cleanup()
{
ip netns pids $ns0 | xargs kill 2>/dev/null
ip netns pids $ns1 | xargs kill 2>/dev/null
- ip netns del $ns0 $ns1
+ cleanup_all_ns
}
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-ip netns add "$ns0"
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace $ns0"
- exit $ksft_skip
-fi
-ip netns add "$ns1"
+checktool "nft --version" "run test without nft"
+checktool "conntrack --version" "run test without conntrack"
+checktool "socat -h" "run test without socat"
trap cleanup EXIT
-ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0
-ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
-ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+setup_ns ns0 ns1
+
+ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0
+ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
-ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1
-if [ $? -ne 0 ];then
+if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then
echo "SKIP: Could not add veth device"
exit $ksft_skip
fi
-ip -net $ns0 li add tvrf type vrf table 9876
-if [ $? -ne 0 ];then
+if ! ip -net "$ns0" li add tvrf type vrf table 9876; then
echo "SKIP: Could not add vrf device"
exit $ksft_skip
fi
-ip -net $ns0 li set lo up
+ip -net "$ns0" li set veth0 master tvrf
+ip -net "$ns0" li set tvrf up
+ip -net "$ns0" li set veth0 up
+ip -net "$ns1" li set veth0 up
-ip -net $ns0 li set veth0 master tvrf
-ip -net $ns0 li set tvrf up
-ip -net $ns0 li set veth0 up
-ip -net $ns1 li set veth0 up
+ip -net "$ns0" addr add $IP0/$PFXL dev veth0
+ip -net "$ns1" addr add $IP1/$PFXL dev veth0
-ip -net $ns0 addr add $IP0/$PFXL dev veth0
-ip -net $ns1 addr add $IP1/$PFXL dev veth0
+listener_ready()
+{
+ local ns="$1"
-ip netns exec $ns1 iperf3 -s > /dev/null 2>&1&
-if [ $? -ne 0 ];then
- echo "SKIP: Could not start iperf3"
- exit $ksft_skip
-fi
+ ss -N "$ns" -l -n -t -o "sport = :55555" | grep -q "55555"
+}
+
+ip netns exec "$ns1" socat -u -4 TCP-LISTEN:55555,reuseaddr,fork STDOUT > /dev/null &
+busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1"
# test vrf ingress handling.
# The incoming connection should be placed in conntrack zone 1,
# as decided by the first iteration of the ruleset.
test_ct_zone_in()
{
-ip netns exec $ns0 nft -f - <<EOF
+ip netns exec "$ns0" nft -f - <<EOF
table testct {
chain rawpre {
type filter hook prerouting priority raw;
@@ -126,21 +109,21 @@ table testct {
}
}
EOF
- ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null
+ ip netns exec "$ns1" ping -W 1 -c 1 -I veth0 "$IP0" > /dev/null
# should be in zone 1, not zone 2
- count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
- if [ $count -eq 1 ]; then
+ count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
+ if [ "$count" -eq 1 ]; then
echo "PASS: entry found in conntrack zone 1"
else
echo "FAIL: entry not found in conntrack zone 1"
- count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
- if [ $count -eq 1 ]; then
+ count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
+ if [ "$count" -eq 1 ]; then
echo "FAIL: entry found in zone 2 instead"
else
echo "FAIL: entry not in zone 1 or 2, dumping table"
- ip netns exec $ns0 conntrack -L
- ip netns exec $ns0 nft list ruleset
+ ip netns exec "$ns0" conntrack -L
+ ip netns exec "$ns0" nft list ruleset
fi
fi
}
@@ -153,12 +136,12 @@ test_masquerade_vrf()
local qdisc=$1
if [ "$qdisc" != "default" ]; then
- tc -net $ns0 qdisc add dev tvrf root $qdisc
+ tc -net "$ns0" qdisc add dev tvrf root "$qdisc"
fi
- ip netns exec $ns0 conntrack -F 2>/dev/null
+ ip netns exec "$ns0" conntrack -F 2>/dev/null
-ip netns exec $ns0 nft -f - <<EOF
+ip netns exec "$ns0" nft -f - <<EOF
flush ruleset
table ip nat {
chain rawout {
@@ -179,25 +162,23 @@ table ip nat {
}
}
EOF
- ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null
- if [ $? -ne 0 ]; then
- echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device"
+ if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then
+ echo "FAIL: connect failure with masquerade + sport rewrite on vrf device"
ret=1
return
fi
# must also check that nat table was evaluated on second (lower device) iteration.
- ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' &&
- ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]'
- if [ $? -eq 0 ]; then
- echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
+ if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1' &&
+ ip netns exec "$ns0" nft list table ip nat |grep -q 'untracked counter packets [1-9]'; then
+ echo "PASS: connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
else
echo "FAIL: vrf rules have unexpected counter value"
ret=1
fi
if [ "$qdisc" != "default" ]; then
- tc -net $ns0 qdisc del dev tvrf root
+ tc -net "$ns0" qdisc del dev tvrf root
fi
}
@@ -206,8 +187,8 @@ EOF
# oifname is the lower device (veth0 in this case).
test_masquerade_veth()
{
- ip netns exec $ns0 conntrack -F 2>/dev/null
-ip netns exec $ns0 nft -f - <<EOF
+ ip netns exec "$ns0" conntrack -F 2>/dev/null
+ip netns exec "$ns0" nft -f - <<EOF
flush ruleset
table ip nat {
chain postrouting {
@@ -216,17 +197,15 @@ table ip nat {
}
}
EOF
- ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null
- if [ $? -ne 0 ]; then
- echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device"
+ if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then
+ echo "FAIL: connect failure with masquerade + sport rewrite on veth device"
ret=1
return
fi
# must also check that nat table was evaluated on second (lower device) iteration.
- ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
- if [ $? -eq 0 ]; then
- echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device"
+ if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1'; then
+ echo "PASS: connect with masquerade + sport rewrite on veth device"
else
echo "FAIL: vrf masq rule has unexpected counter value"
ret=1
diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh
new file mode 100755
index 0000000000..4ceee9fb39
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/ipvs.sh
@@ -0,0 +1,211 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# End-to-end ipvs test suite
+# Topology:
+#--------------------------------------------------------------+
+# | |
+# ns0 | ns1 |
+# ----------- | ----------- ----------- |
+# | veth01 | --------- | veth10 | | veth12 | |
+# ----------- peer ----------- ----------- |
+# | | | |
+# ----------- | | |
+# | br0 | |----------------- peer |--------------|
+# ----------- | | |
+# | | | |
+# ---------- peer ---------- ----------- |
+# | veth02 | --------- | veth20 | | veth21 | |
+# ---------- | ---------- ----------- |
+# | ns2 |
+# | |
+#--------------------------------------------------------------+
+#
+# We assume that all network driver are loaded
+#
+
+source lib.sh
+
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+readonly port=8080
+
+readonly vip_v4=207.175.44.110
+readonly cip_v4=10.0.0.2
+readonly gip_v4=10.0.0.1
+readonly dip_v4=172.16.0.1
+readonly rip_v4=172.16.0.2
+readonly sip_v4=10.0.0.3
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+readonly datalen=32
+
+sysipvsnet="/proc/sys/net/ipv4/vs/"
+if [ ! -d $sysipvsnet ]; then
+ if ! modprobe -q ip_vs; then
+ echo "skip: could not run test without ipvs module"
+ exit $ksft_skip
+ fi
+fi
+
+checktool "ipvsadm -v" "run test without ipvsadm"
+checktool "socat -h" "run test without socat"
+
+setup() {
+ setup_ns ns0 ns1 ns2
+
+ ip link add veth01 netns "${ns0}" type veth peer name veth10 netns "${ns1}"
+ ip link add veth02 netns "${ns0}" type veth peer name veth20 netns "${ns2}"
+ ip link add veth12 netns "${ns1}" type veth peer name veth21 netns "${ns2}"
+
+ ip netns exec "${ns0}" ip link set veth01 up
+ ip netns exec "${ns0}" ip link set veth02 up
+ ip netns exec "${ns0}" ip link add br0 type bridge
+ ip netns exec "${ns0}" ip link set veth01 master br0
+ ip netns exec "${ns0}" ip link set veth02 master br0
+ ip netns exec "${ns0}" ip link set br0 up
+ ip netns exec "${ns0}" ip addr add "${cip_v4}/24" dev br0
+
+ ip netns exec "${ns1}" ip link set veth10 up
+ ip netns exec "${ns1}" ip addr add "${gip_v4}/24" dev veth10
+ ip netns exec "${ns1}" ip link set veth12 up
+ ip netns exec "${ns1}" ip addr add "${dip_v4}/24" dev veth12
+
+ ip netns exec "${ns2}" ip link set veth21 up
+ ip netns exec "${ns2}" ip addr add "${rip_v4}/24" dev veth21
+ ip netns exec "${ns2}" ip link set veth20 up
+ ip netns exec "${ns2}" ip addr add "${sip_v4}/24" dev veth20
+
+ sleep 1
+
+ dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
+}
+
+cleanup() {
+ cleanup_all_ns
+
+ if [ -f "${outfile}" ]; then
+ rm "${outfile}"
+ fi
+ if [ -f "${infile}" ]; then
+ rm "${infile}"
+ fi
+}
+
+server_listen() {
+ ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT > "${outfile}" &
+ server_pid=$!
+ sleep 0.2
+}
+
+client_connect() {
+ ip netns exec "${ns0}" timeout 2 socat -u -4 STDIN TCP:"${vip_v4}":"${port}" < "${infile}"
+}
+
+verify_data() {
+ wait "${server_pid}"
+ cmp "$infile" "$outfile" 2>/dev/null
+}
+
+test_service() {
+ server_listen
+ client_connect
+ verify_data
+}
+
+
+test_dr() {
+ ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
+
+ ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr
+ ip netns exec "${ns1}" ipvsadm -a -t "${vip_v4}:${port}" -r "${rip_v4}:${port}"
+ ip netns exec "${ns1}" ip addr add "${vip_v4}/32" dev lo:1
+
+ # avoid incorrect arp response
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
+ # avoid reverse route lookup
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+ ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
+
+ test_service
+}
+
+test_nat() {
+ ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
+
+ ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr
+ ip netns exec "${ns1}" ipvsadm -a -m -t "${vip_v4}:${port}" -r "${rip_v4}:${port}"
+ ip netns exec "${ns1}" ip addr add "${vip_v4}/32" dev lo:1
+
+ ip netns exec "${ns2}" ip link del veth20
+ ip netns exec "${ns2}" ip route add default via "${dip_v4}" dev veth21
+
+ test_service
+}
+
+test_tun() {
+ ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
+
+ ip netns exec "${ns1}" modprobe -q ipip
+ ip netns exec "${ns1}" ip link set tunl0 up
+ ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0
+ ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0
+ ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.default.send_redirects=0
+ ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr
+ ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port}
+ ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1
+
+ ip netns exec "${ns2}" modprobe -q ipip
+ ip netns exec "${ns2}" ip link set tunl0 up
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+ ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
+
+ test_service
+}
+
+run_tests() {
+ local errors=
+
+ echo "Testing DR mode..."
+ cleanup
+ setup
+ test_dr
+ errors=$(( $errors + $? ))
+
+ echo "Testing NAT mode..."
+ cleanup
+ setup
+ test_nat
+ errors=$(( $errors + $? ))
+
+ echo "Testing Tunnel mode..."
+ cleanup
+ setup
+ test_tun
+ errors=$(( $errors + $? ))
+
+ return $errors
+}
+
+trap cleanup EXIT
+
+run_tests
+
+if [ $? -ne 0 ]; then
+ echo -e "$(basename $0): ${RED}FAIL${NC}"
+ exit 1
+fi
+echo -e "$(basename $0): ${GREEN}PASS${NC}"
+exit 0
diff --git a/tools/testing/selftests/net/netfilter/lib.sh b/tools/testing/selftests/net/netfilter/lib.sh
new file mode 100644
index 0000000000..bedd35298e
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/lib.sh
@@ -0,0 +1,10 @@
+net_netfilter_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "$net_netfilter_dir/../lib.sh"
+
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
diff --git a/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh b/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh
new file mode 100755
index 0000000000..c6fdd2079f
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+checktool "conntrack --version" "run test without conntrack"
+checktool "iptables --version" "run test without iptables"
+checktool "ip6tables --version" "run test without ip6tables"
+
+modprobe -q tun
+modprobe -q nf_conntrack
+# echo 1 > /proc/sys/net/netfilter/nf_log_all_netns
+
+PDRILL_TIMEOUT=10
+
+files="
+conntrack_ack_loss_stall.pkt
+conntrack_inexact_rst.pkt
+conntrack_syn_challenge_ack.pkt
+conntrack_synack_old.pkt
+conntrack_synack_reuse.pkt
+conntrack_rst_invalid.pkt
+"
+
+if ! packetdrill --dry_run --verbose "packetdrill/conntrack_ack_loss_stall.pkt";then
+ echo "SKIP: packetdrill not installed"
+ exit ${ksft_skip}
+fi
+
+ret=0
+
+run_packetdrill()
+{
+ filename="$1"
+ ipver="$2"
+ local mtu=1500
+
+ export NFCT_IP_VERSION="$ipver"
+
+ if [ "$ipver" = "ipv4" ];then
+ export xtables="iptables"
+ elif [ "$ipver" = "ipv6" ];then
+ export xtables="ip6tables"
+ mtu=1520
+ fi
+
+ timeout "$PDRILL_TIMEOUT" unshare -n packetdrill --ip_version="$ipver" --mtu=$mtu \
+ --tolerance_usecs=1000000 --non_fatal packet "$filename"
+}
+
+run_one_test_file()
+{
+ filename="$1"
+
+ for v in ipv4 ipv6;do
+ printf "%-50s(%s)%-20s" "$filename" "$v" ""
+ if run_packetdrill packetdrill/"$f" "$v";then
+ echo OK
+ else
+ echo FAIL
+ ret=1
+ fi
+ done
+}
+
+echo "Replaying packetdrill test cases:"
+for f in $files;do
+ run_one_test_file packetdrill/"$f"
+done
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
new file mode 100755
index 0000000000..1014551dd7
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test NAT source port clash resolution
+#
+
+source lib.sh
+ret=0
+socatpid=0
+
+cleanup()
+{
+ [ "$socatpid" -gt 0 ] && kill "$socatpid"
+
+ cleanup_all_ns
+}
+
+checktool "socat -h" "run test without socat"
+checktool "iptables --version" "run test without iptables"
+
+trap cleanup EXIT
+
+setup_ns ns1 ns2
+
+# Connect the namespaces using a veth pair
+ip link add name veth2 type veth peer name veth1
+ip link set netns "$ns1" dev veth1
+ip link set netns "$ns2" dev veth2
+
+ip netns exec "$ns1" ip link set up dev lo
+ip netns exec "$ns1" ip link set up dev veth1
+ip netns exec "$ns1" ip addr add 192.168.1.1/24 dev veth1
+
+ip netns exec "$ns2" ip link set up dev lo
+ip netns exec "$ns2" ip link set up dev veth2
+ip netns exec "$ns2" ip addr add 192.168.1.2/24 dev veth2
+
+# Create a server in one namespace
+ip netns exec "$ns1" socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 &
+socatpid=$!
+
+# Restrict source port to just one so we don't have to exhaust
+# all others.
+ip netns exec "$ns2" sysctl -q net.ipv4.ip_local_port_range="10000 10000"
+
+# add a virtual IP using DNAT
+ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
+
+# ... and route it to the other namespace
+ip netns exec "$ns2" ip route add 10.96.0.1 via 192.168.1.1
+
+# add a persistent connection from the other namespace
+ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
+
+sleep 1
+
+# ip daddr:dport will be rewritten to 192.168.1.1 5201
+# NAT must reallocate source port 10000 because
+# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
+echo test | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null
+ret=$?
+
+# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
+if [ $ret -eq 0 ]; then
+ echo "PASS: socat can connect via NAT'd address"
+else
+ echo "FAIL: socat cannot connect via NAT'd address"
+fi
+
+# check sport clashres.
+ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
+ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
+
+sleep 5 | ip netns exec "$ns2" socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+
+# if connect succeeds, client closes instantly due to EOF on stdin.
+# if connect hangs, it will time out after 5s.
+echo | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
+cpid2=$!
+
+time_then=$(date +%s)
+wait $cpid2
+rv=$?
+time_now=$(date +%s)
+
+# Check how much time has elapsed, expectation is for
+# 'cpid2' to connect and then exit (and no connect delay).
+delta=$((time_now - time_then))
+
+if [ $delta -lt 2 ] && [ $rv -eq 0 ]; then
+ echo "PASS: could connect to service via redirected ports"
+else
+ echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
+ ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/net/netfilter/nf_queue.c
index 9e56b9d470..9e56b9d470 100644
--- a/tools/testing/selftests/netfilter/nf-queue.c
+++ b/tools/testing/selftests/net/netfilter/nf_queue.c
diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/net/netfilter/nft_audit.sh
index 99ed5bd6e8..902f8114bc 100755
--- a/tools/testing/selftests/netfilter/nft_audit.sh
+++ b/tools/testing/selftests/net/netfilter/nft_audit.sh
@@ -6,11 +6,34 @@
SKIP_RC=4
RC=0
+if [ -r /var/run/auditd.pid ];then
+ read pid < /var/run/auditd.pid
+ p=$(pgrep ^auditd$)
+
+ if [ "$pid" -eq "$p" ]; then
+ echo "SKIP: auditd is running"
+ exit $SKIP_RC
+ fi
+fi
+
nft --version >/dev/null 2>&1 || {
echo "SKIP: missing nft tool"
exit $SKIP_RC
}
+# nft must be recent enough to support "reset" keyword.
+nft --check -f /dev/stdin >/dev/null 2>&1 <<EOF
+add table t
+add chain t c
+reset rules t c
+EOF
+
+if [ "$?" -ne 0 ];then
+ echo -n "SKIP: nft reset feature test failed: "
+ nft --version
+ exit $SKIP_RC
+fi
+
# Run everything in a separate network namespace
[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
@@ -73,7 +96,7 @@ done
for ((i = 0; i < 500; i++)); do
echo "add rule t2 c3 counter accept comment \"rule $i\""
-done >$rulefile
+done > "$rulefile"
do_test "nft -f $rulefile" \
'table=t2 family=2 entries=500 op=nft_register_rule'
@@ -101,7 +124,7 @@ do_test 'nft add counter t2 c1; add counter t2 c2' \
for ((i = 3; i <= 500; i++)); do
echo "add counter t2 c$i"
-done >$rulefile
+done > "$rulefile"
do_test "nft -f $rulefile" \
'table=t2 family=2 entries=498 op=nft_register_obj'
@@ -115,7 +138,7 @@ do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \
for ((i = 3; i <= 500; i++)); do
echo "add quota t2 q$i { 10 bytes }"
-done >$rulefile
+done > "$rulefile"
do_test "nft -f $rulefile" \
'table=t2 family=2 entries=498 op=nft_register_obj'
@@ -157,7 +180,7 @@ table=t2 family=2 entries=135 op=nft_reset_rule'
# resetting sets and elements
-elem=(22 ,80 ,443)
+elem=(22 ",80" ",443")
relem=""
for i in {1..3}; do
relem+="${elem[((i - 1))]}"
diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index e908009576..6d66240e14 100755
--- a/tools/testing/selftests/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# nft_concat_range.sh - Tests for sets with concatenation of ranged fields
@@ -7,10 +7,10 @@
#
# Author: Stefano Brivio <sbrivio@redhat.com>
#
-# shellcheck disable=SC2154,SC2034,SC2016,SC2030,SC2031
+# shellcheck disable=SC2154,SC2034,SC2016,SC2030,SC2031,SC2317
# ^ Configuration and templates sourced with eval, counters reused in subshells
-KSELFTEST_SKIP=4
+source lib.sh
# Available test groups:
# - reported_issues: check for issues that were reported in the past
@@ -19,7 +19,7 @@ KSELFTEST_SKIP=4
# - timeout: check that packets match entries until they expire
# - performance: estimate matching rate, compare with rbtree and hash baselines
TESTS="reported_issues correctness concurrency timeout"
-[ "${quicktest}" != "1" ] && TESTS="${TESTS} performance"
+[ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}"
# Set types, defined by TYPE_ variables below
TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
@@ -31,7 +31,7 @@ BUGS="flush_remove_add reload"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
- ../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+ ../../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
pktgen/pktgen_bench_xmit_mode_netif_receive.sh"
# Definition of set types:
@@ -66,7 +66,7 @@ src
start 1
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip bash
proto udp
race_repeat 3
@@ -91,7 +91,7 @@ src
start 1
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp
race_repeat 3
@@ -116,7 +116,7 @@ src
start 10
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp6
race_repeat 3
@@ -141,7 +141,7 @@ src
start 1
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp
race_repeat 0
@@ -163,7 +163,7 @@ src mac
start 10
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp6
race_repeat 0
@@ -185,7 +185,7 @@ src mac proto
start 10
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp6
race_repeat 0
@@ -207,7 +207,7 @@ src addr4
start 1
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp
race_repeat 3
@@ -227,7 +227,7 @@ src addr6 port
start 10
count 5
src_delta 2000
-tools sendip socat nc
+tools sendip socat
proto udp6
race_repeat 3
@@ -247,7 +247,7 @@ src mac proto addr4
start 1
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp
race_repeat 0
@@ -264,7 +264,7 @@ src mac
start 1
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp
race_repeat 0
@@ -286,7 +286,7 @@ src mac addr4
start 1
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp
race_repeat 0
@@ -337,7 +337,7 @@ src addr4
start 1
count 5
src_delta 2000
-tools sendip socat nc
+tools sendip socat
proto udp
race_repeat 3
@@ -363,7 +363,7 @@ src mac
start 1
count 1
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp
race_repeat 0
@@ -473,8 +473,6 @@ setup_veth() {
B() {
ip netns exec B "$@" >/dev/null 2>&1
}
-
- sleep 2
}
# Fill in set template and initialise set
@@ -488,12 +486,6 @@ check_tools() {
__tools=
for tool in ${tools}; do
- if [ "${tool}" = "nc" ] && [ "${proto}" = "udp6" ] && \
- ! nc -u -w0 1.1.1.1 1 2>/dev/null; then
- # Some GNU netcat builds might not support IPv6
- __tools="${__tools} netcat-openbsd"
- continue
- fi
__tools="${__tools} ${tool}"
command -v "${tool}" >/dev/null && return 0
@@ -554,30 +546,7 @@ setup_send_udp() {
ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
[ -z "${dst_port}" ] && dst_port=12345
- echo "test4" | B socat -t 0.01 STDIN UDP4-DATAGRAM:${dst_addr4}:${dst_port}"${__socatbind}"
-
- src_addr4=
- src_port=
- }
- elif command -v nc >/dev/null; then
- if nc -u -w0 1.1.1.1 1 2>/dev/null; then
- # OpenBSD netcat
- nc_opt="-w0"
- else
- # GNU netcat
- nc_opt="-q0"
- fi
-
- send_udp() {
- if [ -n "${src_addr4}" ]; then
- B ip addr add "${src_addr4}" dev veth_b
- __src_addr4="-s ${src_addr4}"
- fi
- ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
- [ -n "${src_port}" ] && src_port="-p ${src_port}"
-
- echo "" | B nc -u "${nc_opt}" "${__src_addr4}" \
- "${src_port}" "${dst_addr4}" "${dst_port}"
+ echo "test4" | B socat -t 0.01 STDIN UDP4-DATAGRAM:"$dst_addr4":"$dst_port""${__socatbind}"
src_addr4=
src_port=
@@ -632,11 +601,7 @@ setup_send_udp6() {
__socatbind6=
if [ -n "${src_addr6}" ]; then
- if [ -n "${src_addr6} != "${src_addr6_added} ]; then
- B ip addr add "${src_addr6}" dev veth_b nodad
-
- src_addr6_added=${src_addr6}
- fi
+ B ip addr add "${src_addr6}" dev veth_b nodad
__socatbind6=",bind=[${src_addr6}]"
@@ -645,26 +610,7 @@ setup_send_udp6() {
fi
fi
- echo "test6" | B socat -t 0.01 STDIN UDP6-DATAGRAM:[${dst_addr6}]:${dst_port}"${__socatbind6}"
- }
- elif command -v nc >/dev/null && nc -u -w0 1.1.1.1 1 2>/dev/null; then
- # GNU netcat might not work with IPv6, try next tool
- send_udp6() {
- ip -6 addr add "${dst_addr6}" dev veth_a nodad \
- 2>/dev/null
- if [ -n "${src_addr6}" ]; then
- B ip addr add "${src_addr6}" dev veth_b nodad
- else
- src_addr6="2001:db8::2"
- fi
- [ -n "${src_port}" ] && src_port="-p ${src_port}"
-
- # shellcheck disable=SC2086 # this needs split options
- echo "" | B nc -u w0 "-s${src_addr6}" ${src_port} \
- ${dst_addr6} ${dst_port}
-
- src_addr6=
- src_port=
+ echo "test6" | B socat -t 0.01 STDIN UDP6-DATAGRAM:["$dst_addr6"]:"$dst_port""${__socatbind6}"
}
elif [ -z "$(bash -c 'type -p')" ]; then
send_udp6() {
@@ -679,10 +625,17 @@ setup_send_udp6() {
fi
}
+listener_ready()
+{
+ port="$1"
+ ss -lnt -o "sport = :$port" | grep -q "$port"
+}
+
# Set up function to send TCP traffic on IPv4
setup_flood_tcp() {
if command -v iperf3 >/dev/null; then
flood_tcp() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr4}" ]; then
B ip addr add "${src_addr4}/16" dev veth_b
@@ -699,7 +652,7 @@ setup_flood_tcp() {
# shellcheck disable=SC2086 # this needs split options
iperf3 -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
# shellcheck disable=SC2086 # this needs split options
B iperf3 -c "${dst_addr4}" ${dst_port} ${src_port} \
@@ -711,6 +664,7 @@ setup_flood_tcp() {
}
elif command -v iperf >/dev/null; then
flood_tcp() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr4}" ]; then
B ip addr add "${src_addr4}/16" dev veth_b
@@ -727,7 +681,7 @@ setup_flood_tcp() {
# shellcheck disable=SC2086 # this needs split options
iperf -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
# shellcheck disable=SC2086 # this needs split options
B iperf -c "${dst_addr4}" ${dst_port} ${src_addr4} \
@@ -739,6 +693,7 @@ setup_flood_tcp() {
}
elif command -v netperf >/dev/null; then
flood_tcp() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr4}" ]; then
B ip addr add "${src_addr4}/16" dev veth_b
@@ -755,7 +710,7 @@ setup_flood_tcp() {
# shellcheck disable=SC2086 # this needs split options
netserver -4 ${dst_port} -L "${dst_addr4}" \
>/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "${n_port}"
# shellcheck disable=SC2086 # this needs split options
B netperf -4 -H "${dst_addr4}" ${dst_port} \
@@ -774,6 +729,7 @@ setup_flood_tcp() {
setup_flood_tcp6() {
if command -v iperf3 >/dev/null; then
flood_tcp6() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr6}" ]; then
B ip addr add "${src_addr6}" dev veth_b nodad
@@ -790,7 +746,7 @@ setup_flood_tcp6() {
# shellcheck disable=SC2086 # this needs split options
iperf3 -s -DB "${dst_addr6}" ${dst_port} >/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "${n_port}"
# shellcheck disable=SC2086 # this needs split options
B iperf3 -c "${dst_addr6}" ${dst_port} \
@@ -802,6 +758,7 @@ setup_flood_tcp6() {
}
elif command -v iperf >/dev/null; then
flood_tcp6() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr6}" ]; then
B ip addr add "${src_addr6}" dev veth_b nodad
@@ -818,7 +775,7 @@ setup_flood_tcp6() {
# shellcheck disable=SC2086 # this needs split options
iperf -s -VDB "${dst_addr6}" ${dst_port} >/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
# shellcheck disable=SC2086 # this needs split options
B iperf -c "${dst_addr6}" -V ${dst_port} \
@@ -830,6 +787,7 @@ setup_flood_tcp6() {
}
elif command -v netperf >/dev/null; then
flood_tcp6() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr6}" ]; then
B ip addr add "${src_addr6}" dev veth_b nodad
@@ -846,7 +804,7 @@ setup_flood_tcp6() {
# shellcheck disable=SC2086 # this needs split options
netserver -6 ${dst_port} -L "${dst_addr6}" \
>/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
# shellcheck disable=SC2086 # this needs split options
B netperf -6 -H "${dst_addr6}" ${dst_port} \
@@ -865,6 +823,7 @@ setup_flood_tcp6() {
setup_flood_udp() {
if command -v iperf3 >/dev/null; then
flood_udp() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr4}" ]; then
B ip addr add "${src_addr4}/16" dev veth_b
@@ -881,7 +840,7 @@ setup_flood_udp() {
# shellcheck disable=SC2086 # this needs split options
iperf3 -s -DB "${dst_addr4}" ${dst_port}
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
# shellcheck disable=SC2086 # this needs split options
B iperf3 -u -c "${dst_addr4}" -Z -b 100M -l16 -t1000 \
@@ -893,6 +852,7 @@ setup_flood_udp() {
}
elif command -v iperf >/dev/null; then
flood_udp() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr4}" ]; then
B ip addr add "${src_addr4}/16" dev veth_b
@@ -909,7 +869,7 @@ setup_flood_udp() {
# shellcheck disable=SC2086 # this needs split options
iperf -u -sDB "${dst_addr4}" ${dst_port} >/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
# shellcheck disable=SC2086 # this needs split options
B iperf -u -c "${dst_addr4}" -b 100M -l1 -t1000 \
@@ -921,6 +881,7 @@ setup_flood_udp() {
}
elif command -v netperf >/dev/null; then
flood_udp() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr4}" ]; then
B ip addr add "${src_addr4}/16" dev veth_b
@@ -937,7 +898,7 @@ setup_flood_udp() {
# shellcheck disable=SC2086 # this needs split options
netserver -4 ${dst_port} -L "${dst_addr4}" \
>/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
# shellcheck disable=SC2086 # this needs split options
B netperf -4 -H "${dst_addr4}" ${dst_port} \
@@ -982,6 +943,7 @@ cleanup() {
ip link del dummy0 2>/dev/null
ip route del default 2>/dev/null
ip -6 route del default 2>/dev/null
+ ip netns pids B 2>/dev/null | xargs kill 2>/dev/null
ip netns del B 2>/dev/null
ip link del veth_a 2>/dev/null
timeout=
@@ -989,15 +951,18 @@ cleanup() {
killall iperf 2>/dev/null
killall netperf 2>/dev/null
killall netserver 2>/dev/null
- rm -f ${tmp}
- sleep 2
+}
+
+cleanup_exit() {
+ cleanup
+ rm -f "$tmp"
}
# Entry point for setup functions
setup() {
if [ "$(id -u)" -ne 0 ]; then
echo " need to run as root"
- exit ${KSELFTEST_SKIP}
+ exit ${ksft_skip}
fi
cleanup
@@ -1258,7 +1223,7 @@ send_nomatch() {
# - check that packets outside range don't match it
# - remove some elements, check that packets don't match anymore
test_correctness() {
- setup veth send_"${proto}" set || return ${KSELFTEST_SKIP}
+ setup veth send_"${proto}" set || return ${ksft_skip}
range_size=1
for i in $(seq "${start}" $((start + count))); do
@@ -1273,7 +1238,7 @@ test_correctness() {
srcend=$((end + src_delta))
add "$(format)" || return 1
- for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
+ for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
send_match "${j}" $((j + src_delta)) || return 1
done
send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
@@ -1281,7 +1246,7 @@ test_correctness() {
# Delete elements now and then
if [ $((i % 3)) -eq 0 ]; then
del "$(format)" || return 1
- for j in $(seq ${start} \
+ for j in $(seq "$start" \
$((range_size / 2 + 1)) ${end}); do
send_nomatch "${j}" $((j + src_delta)) \
|| return 1
@@ -1307,12 +1272,12 @@ test_concurrency() {
proto=${flood_proto}
tools=${flood_tools}
chain_spec=${flood_spec}
- setup veth flood_"${proto}" set || return ${KSELFTEST_SKIP}
+ setup veth flood_"${proto}" set || return ${ksft_skip}
range_size=1
cstart=${start}
flood_pids=
- for i in $(seq ${start} $((start + count))); do
+ for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
@@ -1325,7 +1290,7 @@ test_concurrency() {
start=$((end + range_size))
done
- sleep 10
+ sleep $((RANDOM%10))
pids=
for c in $(seq 1 "$(nproc)"); do (
@@ -1335,7 +1300,7 @@ test_concurrency() {
# $start needs to be local to this subshell
# shellcheck disable=SC2030
start=${cstart}
- for i in $(seq ${start} $((start + count))); do
+ for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
@@ -1350,7 +1315,7 @@ test_concurrency() {
range_size=1
start=${cstart}
- for i in $(seq ${start} $((start + count))); do
+ for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
@@ -1366,7 +1331,7 @@ test_concurrency() {
range_size=1
start=${cstart}
- for i in $(seq ${start} $((start + count))); do
+ for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
@@ -1379,7 +1344,7 @@ test_concurrency() {
range_size=1
start=${cstart}
- for i in $(seq ${start} $((start + count))); do
+ for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
@@ -1407,31 +1372,34 @@ test_concurrency() {
# - add all the elements with 3s timeout while checking that packets match
# - wait 3s after the last insertion, check that packets don't match any entry
test_timeout() {
- setup veth send_"${proto}" set || return ${KSELFTEST_SKIP}
+ setup veth send_"${proto}" set || return ${ksft_skip}
timeout=3
+
+ [ "$KSFT_MACHINE_SLOW" = "yes" ] && timeout=8
+
range_size=1
- for i in $(seq "${start}" $((start + count))); do
+ for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
add "$(format)" || return 1
- for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
+ for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
send_match "${j}" $((j + src_delta)) || return 1
done
range_size=$((range_size + 1))
start=$((end + range_size))
done
- sleep 3
- for i in $(seq ${start} $((start + count))); do
+ sleep $timeout
+ for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
- for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
+ for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
send_nomatch "${j}" $((j + src_delta)) || return 1
done
@@ -1450,13 +1418,13 @@ test_performance() {
chain_spec=${perf_spec}
dst="${perf_dst}"
src="${perf_src}"
- setup veth perf set || return ${KSELFTEST_SKIP}
+ setup veth perf set || return ${ksft_skip}
first=${start}
range_size=1
for set in test norange noconcat; do
start=${first}
- for i in $(seq ${start} $((start + perf_entries))); do
+ for i in $(seq "$start" $((start + perf_entries))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
@@ -1464,7 +1432,7 @@ test_performance() {
if [ $((end / 65534)) -gt $((start / 65534)) ]; then
start=${end}
end=$((end + 1))
- elif [ ${start} -eq ${end} ]; then
+ elif [ "$start" -eq "$end" ]; then
end=$((start + 1))
fi
@@ -1475,7 +1443,7 @@ test_performance() {
nft -f "${tmp}"
done
- perf $((end - 1)) ${srcstart}
+ perf $((end - 1)) "$srcstart"
sleep 2
@@ -1519,14 +1487,17 @@ test_performance() {
}
test_bug_flush_remove_add() {
+ rounds=100
+ [ "$KSFT_MACHINE_SLOW" = "yes" ] && rounds=10
+
set_cmd='{ set s { type ipv4_addr . inet_service; flags interval; }; }'
elem1='{ 10.0.0.1 . 22-25, 10.0.0.1 . 10-20 }'
elem2='{ 10.0.0.1 . 10-20, 10.0.0.1 . 22-25 }'
- for i in `seq 1 100`; do
- nft add table t ${set_cmd} || return ${KSELFTEST_SKIP}
- nft add element t s ${elem1} 2>/dev/null || return 1
+ for i in $(seq 1 $rounds); do
+ nft add table t "$set_cmd" || return ${ksft_skip}
+ nft add element t s "$elem1" 2>/dev/null || return 1
nft flush set t s 2>/dev/null || return 1
- nft add element t s ${elem2} 2>/dev/null || return 1
+ nft add element t s "$elem2" 2>/dev/null || return 1
done
nft flush ruleset
}
@@ -1534,7 +1505,7 @@ test_bug_flush_remove_add() {
# - add ranged element, check that packets match it
# - reload the set, check packets still match
test_bug_reload() {
- setup veth send_"${proto}" set || return ${KSELFTEST_SKIP}
+ setup veth send_"${proto}" set || return ${ksft_skip}
rstart=${start}
range_size=1
@@ -1573,7 +1544,7 @@ test_bug_reload() {
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
- for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
+ for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
send_match "${j}" $((j + src_delta)) || return 1
done
@@ -1591,12 +1562,12 @@ test_reported_issues() {
# Run everything in a separate network namespace
[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
tmp="$(mktemp)"
-trap cleanup EXIT
+trap cleanup_exit EXIT
# Entry point for test runs
passed=0
for name in ${TESTS}; do
- printf "TEST: %s\n" "$(echo ${name} | tr '_' ' ')"
+ printf "TEST: %s\n" "$(echo "$name" | tr '_' ' ')"
if [ "${name}" = "reported_issues" ]; then
SUBTESTS="${BUGS}"
else
@@ -1623,10 +1594,16 @@ for name in ${TESTS}; do
continue
fi
- printf " %-60s " "${display}"
+ [ "$KSFT_MACHINE_SLOW" = "yes" ] && count=1
+
+ printf " %-32s " "${display}"
+ tthen=$(date +%s)
eval test_"${name}"
ret=$?
+ tnow=$(date +%s)
+ printf "%5ds%-30s" $((tnow-tthen))
+
if [ $ret -eq 0 ]; then
printf "[ OK ]\n"
info_flush
@@ -1635,11 +1612,11 @@ for name in ${TESTS}; do
printf "[FAIL]\n"
err_flush
exit 1
- elif [ $ret -eq ${KSELFTEST_SKIP} ]; then
+ elif [ $ret -eq ${ksft_skip} ]; then
printf "[SKIP]\n"
err_flush
fi
done
done
-[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} || exit 0
+[ ${passed} -eq 0 ] && exit ${ksft_skip} || exit 0
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh
new file mode 100755
index 0000000000..5d276995a5
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+source lib.sh
+
+[ "$KSFT_MACHINE_SLOW" = yes ] && exit ${ksft_skip}
+
+NFT_CONCAT_RANGE_TESTS="performance" exec ./nft_concat_range.sh
diff --git a/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh
new file mode 100755
index 0000000000..abcaa73371
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+#
+# This tests connection tracking helper assignment:
+# 1. can attach ftp helper to a connection from nft ruleset.
+# 2. auto-assign still works.
+#
+# Kselftest framework requirement - SKIP code is 4.
+
+source lib.sh
+
+ret=0
+
+testipv6=1
+
+checktool "socat -h" "run test without socat"
+checktool "conntrack --version" "run test without conntrack"
+checktool "nft --version" "run test without nft"
+
+cleanup()
+{
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+
+ ip netns del "$ns1"
+ ip netns del "$ns2"
+}
+
+trap cleanup EXIT
+
+setup_ns ns1 ns2
+
+if ! ip link add veth0 netns "$ns1" type veth peer name veth0 netns "$ns2" > /dev/null 2>&1;then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+
+ip -net "$ns1" link set veth0 up
+ip -net "$ns2" link set veth0 up
+
+ip -net "$ns1" addr add 10.0.1.1/24 dev veth0
+ip -net "$ns1" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$ns2" addr add 10.0.1.2/24 dev veth0
+ip -net "$ns2" addr add dead:1::2/64 dev veth0 nodad
+
+load_ruleset_family() {
+ local family=$1
+ local ns=$2
+
+ip netns exec "$ns" nft -f - <<EOF
+table $family raw {
+ ct helper ftp {
+ type "ftp" protocol tcp
+ }
+ chain pre {
+ type filter hook prerouting priority 0; policy accept;
+ tcp dport 2121 ct helper set "ftp"
+ }
+ chain output {
+ type filter hook output priority 0; policy accept;
+ tcp dport 2121 ct helper set "ftp"
+ }
+}
+EOF
+ return $?
+}
+
+check_for_helper()
+{
+ local netns=$1
+ local message=$2
+ local port=$3
+
+ if echo "$message" |grep -q 'ipv6';then
+ local family="ipv6"
+ else
+ local family="ipv4"
+ fi
+
+ if ! ip netns exec "$netns" conntrack -L -f $family -p tcp --dport "$port" 2> /dev/null |grep -q 'helper=ftp';then
+ if [ "$autoassign" -eq 0 ] ;then
+ echo "FAIL: ${netns} did not show attached helper $message" 1>&2
+ ret=1
+ else
+ echo "PASS: ${netns} did not show attached helper $message" 1>&2
+ fi
+ else
+ if [ "$autoassign" -eq 0 ] ;then
+ echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2
+ else
+ echo "FAIL: ${netns} connection on port $port has ftp helper attached" 1>&2
+ ret=1
+ fi
+ fi
+
+ return 0
+}
+
+listener_ready()
+{
+ ns="$1"
+ port="$2"
+ proto="$3"
+ ss -N "$ns" -lnt -o "sport = :$port" | grep -q "$port"
+}
+
+test_helper()
+{
+ local port=$1
+ local autoassign=$2
+
+ if [ "$autoassign" -eq 0 ] ;then
+ msg="set via ruleset"
+ else
+ msg="auto-assign"
+ fi
+
+ ip netns exec "$ns2" socat -t 3 -u -4 TCP-LISTEN:"$port",reuseaddr STDOUT > /dev/null &
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" "$port" "-4"
+
+ ip netns exec "$ns1" socat -u -4 STDIN TCP:10.0.1.2:"$port" < /dev/null > /dev/null
+
+ check_for_helper "$ns1" "ip $msg" "$port" "$autoassign"
+ check_for_helper "$ns2" "ip $msg" "$port" "$autoassign"
+
+ if [ $testipv6 -eq 0 ] ;then
+ return 0
+ fi
+
+ ip netns exec "$ns1" conntrack -F 2> /dev/null
+ ip netns exec "$ns2" conntrack -F 2> /dev/null
+
+ ip netns exec "$ns2" socat -t 3 -u -6 TCP-LISTEN:"$port",reuseaddr STDOUT > /dev/null &
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns2" "$port" "-6"
+
+ ip netns exec "$ns1" socat -t 3 -u -6 STDIN TCP:"[dead:1::2]":"$port" < /dev/null > /dev/null
+
+ check_for_helper "$ns1" "ipv6 $msg" "$port"
+ check_for_helper "$ns2" "ipv6 $msg" "$port"
+}
+
+if ! load_ruleset_family ip "$ns1"; then
+ echo "FAIL: ${ns1} cannot load ip ruleset" 1>&2
+ exit 1
+fi
+
+if ! load_ruleset_family ip6 "$ns1"; then
+ echo "SKIP: ${ns1} cannot load ip6 ruleset" 1>&2
+ testipv6=0
+fi
+
+if ! load_ruleset_family inet "${ns2}"; then
+ echo "SKIP: ${ns1} cannot load inet ruleset" 1>&2
+ if ! load_ruleset_family ip "${ns2}"; then
+ echo "FAIL: ${ns2} cannot load ip ruleset" 1>&2
+ exit 1
+ fi
+
+ if [ "$testipv6" -eq 1 ] ;then
+ if ! load_ruleset_family ip6 "$ns2"; then
+ echo "FAIL: ${ns2} cannot load ip6 ruleset" 1>&2
+ exit 1
+ fi
+ fi
+fi
+
+test_helper 2121 0
+ip netns exec "$ns1" sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+ip netns exec "$ns2" sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+test_helper 21 1
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh
new file mode 100755
index 0000000000..ce1451c275
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_fib.sh
@@ -0,0 +1,234 @@
+#!/bin/bash
+#
+# This tests the fib expression.
+#
+# Kselftest framework requirement - SKIP code is 4.
+
+source lib.sh
+
+ret=0
+
+timeout=4
+
+log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
+
+cleanup()
+{
+ cleanup_all_ns
+
+ [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
+}
+
+checktool "nft --version" "run test without nft"
+
+setup_ns nsrouter ns1 ns2
+
+trap cleanup EXIT
+
+if dmesg | grep -q ' nft_rpfilter: ';then
+ dmesg -c | grep ' nft_rpfilter: '
+ echo "WARN: a previous test run has failed" 1>&2
+fi
+
+sysctl -q net.netfilter.nf_log_all_netns=1
+
+load_ruleset() {
+ local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority 0; policy accept;
+ fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+ }
+}
+EOF
+}
+
+load_pbr_ruleset() {
+ local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+ chain forward {
+ type filter hook forward priority raw;
+ fib saddr . iif oif gt 0 accept
+ log drop
+ }
+}
+EOF
+}
+
+load_ruleset_count() {
+ local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority 0; policy accept;
+ ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop
+ ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop
+ }
+}
+EOF
+}
+
+check_drops() {
+ if dmesg | grep -q ' nft_rpfilter: ';then
+ dmesg | grep ' nft_rpfilter: '
+ echo "FAIL: rpfilter did drop packets"
+ return 1
+ fi
+
+ return 0
+}
+
+check_fib_counter() {
+ local want=$1
+ local ns=$2
+ local address=$3
+
+ if ! ip netns exec "$ns" nft list table inet filter | grep 'fib saddr . iif' | grep "$address" | grep -q "packets $want";then
+ echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2
+ ip netns exec "$ns" nft list table inet filter
+ return 1
+ fi
+
+ if [ "$want" -gt 0 ]; then
+ echo "PASS: fib expression did drop packets for $address"
+ fi
+
+ return 0
+}
+
+load_ruleset "$nsrouter"
+load_ruleset "$ns1"
+load_ruleset "$ns2"
+
+if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
+
+ip -net "$nsrouter" link set veth0 up
+ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$nsrouter" link set veth1 up
+ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
+ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
+
+ip -net "$ns1" link set eth0 up
+ip -net "$ns2" link set eth0 up
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" route add default via dead:1::1
+
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns2" route add default via dead:2::1
+
+test_ping() {
+ local daddr4=$1
+ local daddr6=$2
+
+ if ! ip netns exec "$ns1" ping -c 1 -q "$daddr4" > /dev/null; then
+ check_drops
+ echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2
+ return 1
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q "$daddr6" > /dev/null; then
+ check_drops
+ echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2
+ return 1
+ fi
+
+ return 0
+}
+
+ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
+
+test_ping 10.0.2.1 dead:2::1 || exit 1
+check_drops || exit 1
+
+test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not cause unwanted packet drops"
+
+ip netns exec "$nsrouter" nft flush table inet filter
+
+ip -net "$ns1" route del default
+ip -net "$ns1" -6 route del default
+
+ip -net "$ns1" addr del 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr del dead:1::99/64 dev eth0
+
+ip -net "$ns1" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns1" addr add dead:2::99/64 dev eth0 nodad
+
+ip -net "$ns1" route add default via 10.0.2.1
+ip -net "$ns1" -6 route add default via dead:2::1
+
+ip -net "$nsrouter" addr add dead:2::1/64 dev veth0 nodad
+
+# switch to ruleset that doesn't log, this time
+# its expected that this does drop the packets.
+load_ruleset_count "$nsrouter"
+
+# ns1 has a default route, but nsrouter does not.
+# must not check return value, ping to 1.1.1.1 will
+# fail.
+check_fib_counter 0 "$nsrouter" 1.1.1.1 || exit 1
+check_fib_counter 0 "$nsrouter" 1c3::c01d || exit 1
+
+ip netns exec "$ns1" ping -W 0.5 -c 1 -q 1.1.1.1 > /dev/null
+check_fib_counter 1 "$nsrouter" 1.1.1.1 || exit 1
+
+ip netns exec "$ns1" ping -W 0.5 -i 0.1 -c 3 -q 1c3::c01d > /dev/null
+check_fib_counter 3 "$nsrouter" 1c3::c01d || exit 1
+
+# delete all rules
+ip netns exec "$ns1" nft flush ruleset
+ip netns exec "$ns2" nft flush ruleset
+ip netns exec "$nsrouter" nft flush ruleset
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+
+ip -net "$ns1" addr del 10.0.2.99/24 dev eth0
+ip -net "$ns1" addr del dead:2::99/64 dev eth0
+
+ip -net "$nsrouter" addr del dead:2::1/64 dev veth0
+
+# ... pbr ruleset for the router, check iif+oif.
+if ! load_pbr_ruleset "$nsrouter";then
+ echo "SKIP: Could not load fib forward ruleset"
+ exit $ksft_skip
+fi
+
+ip -net "$nsrouter" rule add from all table 128
+ip -net "$nsrouter" rule add from all iif veth0 table 129
+ip -net "$nsrouter" route add table 128 to 10.0.1.0/24 dev veth0
+ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1
+
+# drop main ipv4 table
+ip -net "$nsrouter" -4 rule delete table main
+
+if ! test_ping 10.0.2.99 dead:2::99;then
+ ip -net "$nsrouter" nft list ruleset
+ echo "FAIL: fib mismatch in pbr setup"
+ exit 1
+fi
+
+echo "PASS: fib expression forward check with policy based routing"
+exit 0
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index a32f490f75..b399555085 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -14,15 +14,10 @@
# nft_flowtable.sh -o8000 -l1500 -r2000
#
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-nsr1="nsr1-$sfx"
-nsr2="nsr2-$sfx"
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
+
ret=0
+SOCAT_TIMEOUT=60
nsin=""
ns1out=""
@@ -30,52 +25,41 @@ ns2out=""
log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
-checktool (){
- if ! $1 > /dev/null 2>&1; then
- echo "SKIP: Could not $2"
- exit $ksft_skip
- fi
-}
-
checktool "nft --version" "run test without nft tool"
-checktool "ip -Version" "run test without ip tool"
-checktool "which nc" "run test without nc (netcat)"
-checktool "ip netns add $nsr1" "create net namespace $nsr1"
+checktool "socat -h" "run test without socat"
-ip netns add $ns1
-ip netns add $ns2
-ip netns add $nsr2
+setup_ns ns1 ns2 nsr1 nsr2
cleanup() {
- ip netns del $ns1
- ip netns del $ns2
- ip netns del $nsr1
- ip netns del $nsr2
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
+
+ cleanup_all_ns
rm -f "$nsin" "$ns1out" "$ns2out"
- [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
+ [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns"
}
trap cleanup EXIT
sysctl -q net.netfilter.nf_log_all_netns=1
-ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1
-ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2
+ip link add veth0 netns "$nsr1" type veth peer name eth0 netns "$ns1"
+ip link add veth1 netns "$nsr1" type veth peer name veth0 netns "$nsr2"
-ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2
+ip link add veth1 netns "$nsr2" type veth peer name eth0 netns "$ns2"
-for dev in lo veth0 veth1; do
- ip -net $nsr1 link set $dev up
- ip -net $nsr2 link set $dev up
+for dev in veth0 veth1; do
+ ip -net "$nsr1" link set "$dev" up
+ ip -net "$nsr2" link set "$dev" up
done
-ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
-ip -net $nsr1 addr add dead:1::1/64 dev veth0
+ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
-ip -net $nsr2 addr add 10.0.2.1/24 dev veth1
-ip -net $nsr2 addr add dead:2::1/64 dev veth1
+ip -net "$nsr2" addr add 10.0.2.1/24 dev veth1
+ip -net "$nsr2" addr add dead:2::1/64 dev veth1 nodad
# set different MTUs so we need to push packets coming from ns1 (large MTU)
# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
@@ -107,56 +91,63 @@ do
esac
done
-if ! ip -net $nsr1 link set veth0 mtu $omtu; then
+if ! ip -net "$nsr1" link set veth0 mtu "$omtu"; then
+ exit 1
+fi
+
+ip -net "$ns1" link set eth0 mtu "$omtu"
+
+if ! ip -net "$nsr2" link set veth1 mtu "$rmtu"; then
exit 1
fi
-ip -net $ns1 link set eth0 mtu $omtu
+if ! ip -net "$nsr1" link set veth1 mtu "$lmtu"; then
+ exit 1
+fi
-if ! ip -net $nsr2 link set veth1 mtu $rmtu; then
+if ! ip -net "$nsr2" link set veth0 mtu "$lmtu"; then
exit 1
fi
-ip -net $ns2 link set eth0 mtu $rmtu
+ip -net "$ns2" link set eth0 mtu "$rmtu"
# transfer-net between nsr1 and nsr2.
# these addresses are not used for connections.
-ip -net $nsr1 addr add 192.168.10.1/24 dev veth1
-ip -net $nsr1 addr add fee1:2::1/64 dev veth1
+ip -net "$nsr1" addr add 192.168.10.1/24 dev veth1
+ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad
-ip -net $nsr2 addr add 192.168.10.2/24 dev veth0
-ip -net $nsr2 addr add fee1:2::2/64 dev veth0
+ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0
+ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad
for i in 0 1; do
- ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
- ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+ ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+ ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
done
-for ns in $ns1 $ns2;do
- ip -net $ns link set lo up
- ip -net $ns link set eth0 up
+for ns in "$ns1" "$ns2";do
+ ip -net "$ns" link set eth0 up
- if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
+ if ! ip netns exec "$ns" sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
echo "ERROR: Check Originator/Responder values (problem during address addition)"
exit 1
fi
# don't set ip DF bit for first two tests
- ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
+ ip netns exec "$ns" sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
done
-ip -net $ns1 addr add 10.0.1.99/24 dev eth0
-ip -net $ns2 addr add 10.0.2.99/24 dev eth0
-ip -net $ns1 route add default via 10.0.1.1
-ip -net $ns2 route add default via 10.0.2.1
-ip -net $ns1 addr add dead:1::99/64 dev eth0
-ip -net $ns2 addr add dead:2::99/64 dev eth0
-ip -net $ns1 route add default via dead:1::1
-ip -net $ns2 route add default via dead:2::1
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via dead:1::1
+ip -net "$ns2" route add default via dead:2::1
-ip -net $nsr1 route add default via 192.168.10.2
-ip -net $nsr2 route add default via 192.168.10.1
+ip -net "$nsr1" route add default via 192.168.10.2
+ip -net "$nsr2" route add default via 192.168.10.1
-ip netns exec $nsr1 nft -f - <<EOF
+ip netns exec "$nsr1" nft -f - <<EOF
table inet filter {
flowtable f1 {
hook ingress priority 0
@@ -188,7 +179,7 @@ if [ $? -ne 0 ]; then
exit $ksft_skip
fi
-ip netns exec $ns2 nft -f - <<EOF
+ip netns exec "$ns2" nft -f - <<EOF
table inet filter {
counter ip4dscp0 { }
counter ip4dscp3 { }
@@ -204,25 +195,22 @@ table inet filter {
EOF
if [ $? -ne 0 ]; then
- echo "SKIP: Could not load nft ruleset"
+ echo -n "SKIP: Could not load ruleset: "
+ nft --version
exit $ksft_skip
fi
# test basic connectivity
-if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
+if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
echo "ERROR: $ns1 cannot reach ns2" 1>&2
exit 1
fi
-if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
+if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
echo "ERROR: $ns2 cannot reach $ns1" 1>&2
exit 1
fi
-if [ $ret -eq 0 ];then
- echo "PASS: netns routing/connectivity: $ns1 can reach $ns2"
-fi
-
nsin=$(mktemp)
ns1out=$(mktemp)
ns2out=$(mktemp)
@@ -248,23 +236,27 @@ check_counters()
local what=$1
local ok=1
- local orig=$(ip netns exec $nsr1 nft reset counter inet filter routed_orig | grep packets)
- local repl=$(ip netns exec $nsr1 nft reset counter inet filter routed_repl | grep packets)
+ local orig repl
+ orig=$(ip netns exec "$nsr1" nft reset counter inet filter routed_orig | grep packets)
+ repl=$(ip netns exec "$nsr1" nft reset counter inet filter routed_repl | grep packets)
local orig_cnt=${orig#*bytes}
local repl_cnt=${repl#*bytes}
- local fs=$(du -sb $nsin)
+ local fs
+ fs=$(du -sb "$nsin")
local max_orig=${fs%%/*}
local max_repl=$((max_orig/4))
- if [ $orig_cnt -gt $max_orig ];then
+ # flowtable fastpath should bypass normal routing one, i.e. the counters in forward hook
+ # should always be lower than the size of the transmitted file (max_orig).
+ if [ "$orig_cnt" -gt "$max_orig" ];then
echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2
ret=1
ok=0
fi
- if [ $repl_cnt -gt $max_repl ];then
+ if [ "$repl_cnt" -gt $max_repl ];then
echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2
ret=1
ok=0
@@ -280,39 +272,40 @@ check_dscp()
local what=$1
local ok=1
- local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp3 | grep packets)
+ local counter
+ counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp3 | grep packets)
local pc4=${counter%*bytes*}
local pc4=${pc4#*packets}
- local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp0 | grep packets)
+ counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp0 | grep packets)
local pc4z=${counter%*bytes*}
local pc4z=${pc4z#*packets}
case "$what" in
"dscp_none")
- if [ $pc4 -gt 0 ] || [ $pc4z -eq 0 ]; then
+ if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then
echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_fwd")
- if [ $pc4 -eq 0 ] || [ $pc4z -eq 0 ]; then
+ if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then
echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_ingress")
- if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then
+ if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_egress")
- if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then
+ if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
@@ -324,7 +317,7 @@ check_dscp()
ok=0
esac
- if [ $ok -eq 1 ] ;then
+ if [ "$ok" -eq 1 ] ;then
echo "PASS: $what: dscp packet counters match"
fi
}
@@ -345,6 +338,11 @@ check_transfer()
return 0
}
+listener_ready()
+{
+ ss -N "$nsb" -lnt -o "sport = :12345" | grep -q 12345
+}
+
test_tcp_forwarding_ip()
{
local nsa=$1
@@ -353,40 +351,23 @@ test_tcp_forwarding_ip()
local dstport=$4
local lret=0
- ip netns exec $nsb nc -w 5 -l -p 12345 < "$nsin" > "$ns2out" &
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" &
lpid=$!
- sleep 1
- ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$nsin" > "$ns1out" &
- cpid=$!
-
- sleep 1
-
- prev="$(ls -l $ns1out $ns2out)"
- sleep 1
-
- while [[ "$prev" != "$(ls -l $ns1out $ns2out)" ]]; do
- sleep 1;
- prev="$(ls -l $ns1out $ns2out)"
- done
+ busywait 1000 listener_ready
- if test -d /proc/"$lpid"/; then
- kill $lpid
- fi
-
- if test -d /proc/"$cpid"/; then
- kill $cpid
- fi
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out"
wait $lpid
- wait $cpid
if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
lret=1
+ ret=1
fi
if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
lret=1
+ ret=1
fi
return $lret
@@ -403,7 +384,7 @@ test_tcp_forwarding_set_dscp()
{
check_dscp "dscp_none"
-ip netns exec $nsr1 nft -f - <<EOF
+ip netns exec "$nsr1" nft -f - <<EOF
table netdev dscpmangle {
chain setdscp0 {
type filter hook ingress device "veth0" priority 0; policy accept
@@ -415,12 +396,12 @@ if [ $? -eq 0 ]; then
test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
check_dscp "dscp_ingress"
- ip netns exec $nsr1 nft delete table netdev dscpmangle
+ ip netns exec "$nsr1" nft delete table netdev dscpmangle
else
echo "SKIP: Could not load netdev:ingress for veth0"
fi
-ip netns exec $nsr1 nft -f - <<EOF
+ip netns exec "$nsr1" nft -f - <<EOF
table netdev dscpmangle {
chain setdscp0 {
type filter hook egress device "veth1" priority 0; policy accept
@@ -432,14 +413,14 @@ if [ $? -eq 0 ]; then
test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
check_dscp "dscp_egress"
- ip netns exec $nsr1 nft flush table netdev dscpmangle
+ ip netns exec "$nsr1" nft flush table netdev dscpmangle
else
echo "SKIP: Could not load netdev:egress for veth1"
fi
# partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3
# counters should have seen packets (before and after ft offload kicks in).
- ip netns exec $nsr1 nft -a insert rule inet filter forward ip dscp set cs3
+ ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
check_dscp "dscp_fwd"
}
@@ -455,8 +436,8 @@ test_tcp_forwarding_nat()
pmtu=$3
what=$4
- if [ $lret -eq 0 ] ; then
- if [ $pmtu -eq 1 ] ;then
+ if [ "$lret" -eq 0 ] ; then
+ if [ "$pmtu" -eq 1 ] ;then
check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
else
echo "PASS: flow offload for ns1/ns2 with masquerade $what"
@@ -464,9 +445,9 @@ test_tcp_forwarding_nat()
test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
lret=$?
- if [ $pmtu -eq 1 ] ;then
+ if [ "$pmtu" -eq 1 ] ;then
check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
- elif [ $lret -eq 0 ] ; then
+ elif [ "$lret" -eq 0 ] ; then
echo "PASS: flow offload for ns1/ns2 with dnat $what"
fi
fi
@@ -481,25 +462,25 @@ make_file "$nsin"
# Due to MTU mismatch in both directions, all packets (except small packets like pure
# acks) have to be handled by normal forwarding path. Therefore, packet counters
# are not checked.
-if test_tcp_forwarding $ns1 $ns2; then
+if test_tcp_forwarding "$ns1" "$ns2"; then
echo "PASS: flow offloaded for ns1/ns2"
else
echo "FAIL: flow offload for ns1/ns2:" 1>&2
- ip netns exec $nsr1 nft list ruleset
+ ip netns exec "$nsr1" nft list ruleset
ret=1
fi
# delete default route, i.e. ns2 won't be able to reach ns1 and
# will depend on ns1 being masqueraded in nsr1.
# expect ns1 has nsr1 address.
-ip -net $ns2 route del default via 10.0.2.1
-ip -net $ns2 route del default via dead:2::1
-ip -net $ns2 route add 192.168.10.1 via 10.0.2.1
+ip -net "$ns2" route del default via 10.0.2.1
+ip -net "$ns2" route del default via dead:2::1
+ip -net "$ns2" route add 192.168.10.1 via 10.0.2.1
# Second test:
# Same, but with NAT enabled. Same as in first test: we expect normal forward path
# to handle most packets.
-ip netns exec $nsr1 nft -f - <<EOF
+ip netns exec "$nsr1" nft -f - <<EOF
table ip nat {
chain prerouting {
type nat hook prerouting priority 0; policy accept;
@@ -513,14 +494,14 @@ table ip nat {
}
EOF
-if ! test_tcp_forwarding_set_dscp $ns1 $ns2 0 ""; then
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then
echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2
exit 0
fi
-if ! test_tcp_forwarding_nat $ns1 $ns2 0 ""; then
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 0 ""; then
echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
- ip netns exec $nsr1 nft list ruleset
+ ip netns exec "$nsr1" nft list ruleset
ret=1
fi
@@ -528,35 +509,40 @@ fi
# Same as second test, but with PMTU discovery enabled. This
# means that we expect the fastpath to handle packets as soon
# as the endpoints adjust the packet size.
-ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
-ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+ip netns exec "$ns1" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
# reset counters.
# With pmtu in-place we'll also check that nft counters
# are lower than file size and packets were forwarded via flowtable layer.
# For earlier tests (large mtus), packets cannot be handled via flowtable
# (except pure acks and other small packets).
-ip netns exec $nsr1 nft reset counters table inet filter >/dev/null
+ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
-if ! test_tcp_forwarding_nat $ns1 $ns2 1 ""; then
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
- ip netns exec $nsr1 nft list ruleset
+ ip netns exec "$nsr1" nft list ruleset
fi
# Another test:
# Add bridge interface br0 to Router1, with NAT enabled.
-ip -net $nsr1 link add name br0 type bridge
-ip -net $nsr1 addr flush dev veth0
-ip -net $nsr1 link set up dev veth0
-ip -net $nsr1 link set veth0 master br0
-ip -net $nsr1 addr add 10.0.1.1/24 dev br0
-ip -net $nsr1 addr add dead:1::1/64 dev br0
-ip -net $nsr1 link set up dev br0
+test_bridge() {
+if ! ip -net "$nsr1" link add name br0 type bridge 2>/dev/null;then
+ echo "SKIP: could not add bridge br0"
+ [ "$ret" -eq 0 ] && ret=$ksft_skip
+ return
+fi
+ip -net "$nsr1" addr flush dev veth0
+ip -net "$nsr1" link set up dev veth0
+ip -net "$nsr1" link set veth0 master br0
+ip -net "$nsr1" addr add 10.0.1.1/24 dev br0
+ip -net "$nsr1" addr add dead:1::1/64 dev br0 nodad
+ip -net "$nsr1" link set up dev br0
-ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
+ip netns exec "$nsr1" sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
# br0 with NAT enabled.
-ip netns exec $nsr1 nft -f - <<EOF
+ip netns exec "$nsr1" nft -f - <<EOF
flush table ip nat
table ip nat {
chain prerouting {
@@ -571,56 +557,59 @@ table ip nat {
}
EOF
-if ! test_tcp_forwarding_nat $ns1 $ns2 1 "on bridge"; then
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "on bridge"; then
echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
- ip netns exec $nsr1 nft list ruleset
+ ip netns exec "$nsr1" nft list ruleset
ret=1
fi
# Another test:
# Add bridge interface br0 to Router1, with NAT and VLAN.
-ip -net $nsr1 link set veth0 nomaster
-ip -net $nsr1 link set down dev veth0
-ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10
-ip -net $nsr1 link set up dev veth0
-ip -net $nsr1 link set up dev veth0.10
-ip -net $nsr1 link set veth0.10 master br0
-
-ip -net $ns1 addr flush dev eth0
-ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10
-ip -net $ns1 link set eth0 up
-ip -net $ns1 link set eth0.10 up
-ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10
-ip -net $ns1 route add default via 10.0.1.1
-ip -net $ns1 addr add dead:1::99/64 dev eth0.10
-
-if ! test_tcp_forwarding_nat $ns1 $ns2 1 "bridge and VLAN"; then
+ip -net "$nsr1" link set veth0 nomaster
+ip -net "$nsr1" link set down dev veth0
+ip -net "$nsr1" link add link veth0 name veth0.10 type vlan id 10
+ip -net "$nsr1" link set up dev veth0
+ip -net "$nsr1" link set up dev veth0.10
+ip -net "$nsr1" link set veth0.10 master br0
+
+ip -net "$ns1" addr flush dev eth0
+ip -net "$ns1" link add link eth0 name eth0.10 type vlan id 10
+ip -net "$ns1" link set eth0 up
+ip -net "$ns1" link set eth0.10 up
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0.10
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" addr add dead:1::99/64 dev eth0.10 nodad
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "bridge and VLAN"; then
echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
- ip netns exec $nsr1 nft list ruleset
+ ip netns exec "$nsr1" nft list ruleset
ret=1
fi
# restore test topology (remove bridge and VLAN)
-ip -net $nsr1 link set veth0 nomaster
-ip -net $nsr1 link set veth0 down
-ip -net $nsr1 link set veth0.10 down
-ip -net $nsr1 link delete veth0.10 type vlan
-ip -net $nsr1 link delete br0 type bridge
-ip -net $ns1 addr flush dev eth0.10
-ip -net $ns1 link set eth0.10 down
-ip -net $ns1 link set eth0 down
-ip -net $ns1 link delete eth0.10 type vlan
+ip -net "$nsr1" link set veth0 nomaster
+ip -net "$nsr1" link set veth0 down
+ip -net "$nsr1" link set veth0.10 down
+ip -net "$nsr1" link delete veth0.10 type vlan
+ip -net "$nsr1" link delete br0 type bridge
+ip -net "$ns1" addr flush dev eth0.10
+ip -net "$ns1" link set eth0.10 down
+ip -net "$ns1" link set eth0 down
+ip -net "$ns1" link delete eth0.10 type vlan
# restore address in ns1 and nsr1
-ip -net $ns1 link set eth0 up
-ip -net $ns1 addr add 10.0.1.99/24 dev eth0
-ip -net $ns1 route add default via 10.0.1.1
-ip -net $ns1 addr add dead:1::99/64 dev eth0
-ip -net $ns1 route add default via dead:1::1
-ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
-ip -net $nsr1 addr add dead:1::1/64 dev veth0
-ip -net $nsr1 link set up dev veth0
+ip -net "$ns1" link set eth0 up
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via dead:1::1
+ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
+ip -net "$nsr1" link set up dev veth0
+}
+
+test_bridge
KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1)
KEY_AES="0x"$(ps -af | md5sum | cut -d " " -f 1)
@@ -640,33 +629,43 @@ do_esp() {
local spi_out=$6
local spi_in=$7
- ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
- ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
+ ip -net "$ns" xfrm state add src "$remote" dst "$me" proto esp spi "$spi_in" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$rnet" dst "$lnet"
+ ip -net "$ns" xfrm state add src "$me" dst "$remote" proto esp spi "$spi_out" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$lnet" dst "$rnet"
# to encrypt packets as they go out (includes forwarded packets that need encapsulation)
- ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
+ ip -net "$ns" xfrm policy add src "$lnet" dst "$rnet" dir out tmpl src "$me" dst "$remote" proto esp mode tunnel priority 1 action allow
# to fwd decrypted packets after esp processing:
- ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow
-
+ ip -net "$ns" xfrm policy add src "$rnet" dst "$lnet" dir fwd tmpl src "$remote" dst "$me" proto esp mode tunnel priority 1 action allow
}
-do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
+do_esp "$nsr1" 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 "$SPI1" "$SPI2"
-do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
+do_esp "$nsr2" 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 "$SPI2" "$SPI1"
-ip netns exec $nsr1 nft delete table ip nat
+ip netns exec "$nsr1" nft delete table ip nat
# restore default routes
-ip -net $ns2 route del 192.168.10.1 via 10.0.2.1
-ip -net $ns2 route add default via 10.0.2.1
-ip -net $ns2 route add default via dead:2::1
+ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns2" route add default via dead:2::1
-if test_tcp_forwarding $ns1 $ns2; then
+if test_tcp_forwarding "$ns1" "$ns2"; then
check_counters "ipsec tunnel mode for ns1/ns2"
else
echo "FAIL: ipsec tunnel mode for ns1/ns2"
- ip netns exec $nsr1 nft list ruleset 1>&2
- ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2
+ ip netns exec "$nsr1" nft list ruleset 1>&2
+ ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
+fi
+
+if [ "$1" = "" ]; then
+ low=1280
+ mtu=$((65536 - low))
+ o=$(((RANDOM%mtu) + low))
+ l=$(((RANDOM%mtu) + low))
+ r=$(((RANDOM%mtu) + low))
+
+ echo "re-run with random mtus: -o $o -l $l -r $r"
+ $0 -o "$o" -l "$l" -r "$r"
fi
exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/net/netfilter/nft_meta.sh
index f33154c04d..71505b6cb2 100755
--- a/tools/testing/selftests/netfilter/nft_meta.sh
+++ b/tools/testing/selftests/net/netfilter/nft_meta.sh
@@ -91,10 +91,10 @@ check_one_counter()
local want="packets $2"
local verbose="$3"
- if ! ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want"; then
+ if ! ip netns exec "$ns0" nft list counter inet filter "$cname" | grep -q "$want"; then
echo "FAIL: $cname, want \"$want\", got"
ret=1
- ip netns exec "$ns0" nft list counter inet filter $cname
+ ip netns exec "$ns0" nft list counter inet filter "$cname"
fi
}
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh
index dd40d9f6f2..9e39de2645 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat.sh
@@ -3,77 +3,60 @@
# This test is for basic NAT functionality: snat, dnat, redirect, masquerade.
#
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
+
ret=0
test_inet_nat=true
-sfx=$(mktemp -u "XXXXXXXX")
-ns0="ns0-$sfx"
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
+checktool "nft --version" "run test without nft tool"
+checktool "socat -h" "run test without socat"
cleanup()
{
- for i in 0 1 2; do ip netns del ns$i-"$sfx";done
-}
+ ip netns pids "$ns0" | xargs kill 2>/dev/null
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
+ rm -f "$INFILE" "$OUTFILE"
-ip netns add "$ns0"
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace $ns0"
- exit $ksft_skip
-fi
+ cleanup_all_ns
+}
trap cleanup EXIT
-ip netns add "$ns1"
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace $ns1"
- exit $ksft_skip
-fi
+INFILE=$(mktemp)
+OUTFILE=$(mktemp)
-ip netns add "$ns2"
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace $ns2"
- exit $ksft_skip
-fi
+setup_ns ns0 ns1 ns2
-ip link add veth0 netns "$ns0" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1
-if [ $? -ne 0 ];then
+if ! ip link add veth0 netns "$ns0" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1;then
echo "SKIP: No virtual ethernet pair device support in kernel"
exit $ksft_skip
fi
ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns2"
-ip -net "$ns0" link set lo up
ip -net "$ns0" link set veth0 up
ip -net "$ns0" addr add 10.0.1.1/24 dev veth0
-ip -net "$ns0" addr add dead:1::1/64 dev veth0
+ip -net "$ns0" addr add dead:1::1/64 dev veth0 nodad
ip -net "$ns0" link set veth1 up
ip -net "$ns0" addr add 10.0.2.1/24 dev veth1
-ip -net "$ns0" addr add dead:2::1/64 dev veth1
-
-for i in 1 2; do
- ip -net ns$i-$sfx link set lo up
- ip -net ns$i-$sfx link set eth0 up
- ip -net ns$i-$sfx addr add 10.0.$i.99/24 dev eth0
- ip -net ns$i-$sfx route add default via 10.0.$i.1
- ip -net ns$i-$sfx addr add dead:$i::99/64 dev eth0
- ip -net ns$i-$sfx route add default via dead:$i::1
-done
+ip -net "$ns0" addr add dead:2::1/64 dev veth1 nodad
+
+do_config()
+{
+ ns="$1"
+ subnet="$2"
+
+ ip -net "$ns" link set eth0 up
+ ip -net "$ns" addr add "10.0.$subnet.99/24" dev eth0
+ ip -net "$ns" route add default via "10.0.$subnet.1"
+ ip -net "$ns" addr add "dead:$subnet::99/64" dev eth0 nodad
+ ip -net "$ns" route add default via "dead:$subnet::1"
+}
+
+do_config "$ns1" 1
+do_config "$ns2" 2
bad_counter()
{
@@ -83,7 +66,7 @@ bad_counter()
local tag=$4
echo "ERROR: $counter counter in $ns has unexpected value (expected $expect) at $tag" 1>&2
- ip netns exec $ns nft list counter inet filter $counter 1>&2
+ ip netns exec "$ns" nft list counter inet filter "$counter" 1>&2
}
check_counters()
@@ -91,26 +74,23 @@ check_counters()
ns=$1
local lret=0
- cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84")
- if [ $? -ne 0 ]; then
- bad_counter $ns ns0in "packets 1 bytes 84" "check_counters 1"
+ if ! ip netns exec "$ns" nft list counter inet filter ns0in | grep -q "packets 1 bytes 84";then
+ bad_counter "$ns" ns0in "packets 1 bytes 84" "check_counters 1"
lret=1
fi
- cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84")
- if [ $? -ne 0 ]; then
- bad_counter $ns ns0out "packets 1 bytes 84" "check_counters 2"
+
+ if ! ip netns exec "$ns" nft list counter inet filter ns0out | grep -q "packets 1 bytes 84";then
+ bad_counter "$ns" ns0out "packets 1 bytes 84" "check_counters 2"
lret=1
fi
expect="packets 1 bytes 104"
- cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter $ns ns0in6 "$expect" "check_counters 3"
+ if ! ip netns exec "$ns" nft list counter inet filter ns0in6 | grep -q "$expect";then
+ bad_counter "$ns" ns0in6 "$expect" "check_counters 3"
lret=1
fi
- cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter $ns ns0out6 "$expect" "check_counters 4"
+ if ! ip netns exec "$ns" nft list counter inet filter ns0out6 | grep -q "$expect";then
+ bad_counter "$ns" ns0out6 "$expect" "check_counters 4"
lret=1
fi
@@ -122,41 +102,35 @@ check_ns0_counters()
local ns=$1
local lret=0
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0in | grep -q "packets 0 bytes 0")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter ns0in | grep -q "packets 0 bytes 0";then
bad_counter "$ns0" ns0in "packets 0 bytes 0" "check_ns0_counters 1"
lret=1
fi
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0";then
bad_counter "$ns0" ns0in6 "packets 0 bytes 0"
lret=1
fi
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0out | grep -q "packets 0 bytes 0")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter ns0out | grep -q "packets 0 bytes 0";then
bad_counter "$ns0" ns0out "packets 0 bytes 0" "check_ns0_counters 2"
lret=1
fi
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0";then
bad_counter "$ns0" ns0out6 "packets 0 bytes 0" "check_ns0_counters3 "
lret=1
fi
for dir in "in" "out" ; do
expect="packets 1 bytes 84"
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ${ns}${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns0" $ns$dir "$expect" "check_ns0_counters 4"
+ if ! ip netns exec "$ns0" nft list counter inet filter "${ns}${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" "$ns${dir}" "$expect" "check_ns0_counters 4"
lret=1
fi
expect="packets 1 bytes 104"
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ${ns}${dir}6 | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns0" $ns$dir6 "$expect" "check_ns0_counters 5"
+ if ! ip netns exec "$ns0" nft list counter inet filter "${ns}${dir}6" | grep -q "$expect";then
+ bad_counter "$ns0" "$ns${dir}6" "$expect" "check_ns0_counters 5"
lret=1
fi
done
@@ -166,8 +140,8 @@ check_ns0_counters()
reset_counters()
{
- for i in 0 1 2;do
- ip netns exec ns$i-$sfx nft reset counters inet > /dev/null
+ for i in "$ns0" "$ns1" "$ns2" ;do
+ ip netns exec "$i" nft reset counters inet > /dev/null
done
}
@@ -177,7 +151,7 @@ test_local_dnat6()
local lret=0
local IPF=""
- if [ $family = "inet" ];then
+ if [ "$family" = "inet" ];then
IPF="ip6"
fi
@@ -195,8 +169,7 @@ EOF
fi
# ping netns1, expect rewrite to netns2
- ip netns exec "$ns0" ping -q -c 1 dead:1::99 > /dev/null
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" ping -q -c 1 dead:1::99 > /dev/null;then
lret=1
echo "ERROR: ping6 failed"
return $lret
@@ -204,8 +177,7 @@ EOF
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat6 1"
lret=1
fi
@@ -213,8 +185,7 @@ EOF
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat6 2"
lret=1
fi
@@ -223,8 +194,7 @@ EOF
# expect 0 count in ns1
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat6 3"
lret=1
fi
@@ -233,8 +203,7 @@ EOF
# expect 1 packet in ns2
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat6 4"
lret=1
fi
@@ -252,7 +221,7 @@ test_local_dnat()
local lret=0
local IPF=""
- if [ $family = "inet" ];then
+ if [ "$family" = "inet" ];then
IPF="ip"
fi
@@ -265,7 +234,7 @@ table $family nat {
}
EOF
if [ $? -ne 0 ]; then
- if [ $family = "inet" ];then
+ if [ "$family" = "inet" ];then
echo "SKIP: inet nat tests"
test_inet_nat=false
return $ksft_skip
@@ -275,8 +244,7 @@ EOF
fi
# ping netns1, expect rewrite to netns2
- ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null;then
lret=1
echo "ERROR: ping failed"
return $lret
@@ -284,18 +252,16 @@ EOF
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat 1"
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" "ns1$dir" "$expect" "test_local_dnat 1"
lret=1
fi
done
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 2"
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" "ns2$dir" "$expect" "test_local_dnat 2"
lret=1
fi
done
@@ -303,9 +269,8 @@ EOF
# expect 0 count in ns1
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat 3"
+ if ! ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect";then
+ bad_counter "$ns1" "ns0$dir" "$expect" "test_local_dnat 3"
lret=1
fi
done
@@ -313,20 +278,18 @@ EOF
# expect 1 packet in ns2
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 4"
+ if ! ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect";then
+ bad_counter "$ns2" "ns0$dir" "$expect" "test_local_dnat 4"
lret=1
fi
done
test $lret -eq 0 && echo "PASS: ping to $ns1 was $family NATted to $ns2"
- ip netns exec "$ns0" nft flush chain $family nat output
+ ip netns exec "$ns0" nft flush chain "$family" nat output
reset_counters
- ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null;then
lret=1
echo "ERROR: ping failed"
return $lret
@@ -334,16 +297,14 @@ EOF
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns1$dir "$expect" "test_local_dnat 5"
lret=1
fi
done
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 6"
lret=1
fi
@@ -352,8 +313,7 @@ EOF
# expect 1 count in ns1
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
bad_counter "$ns0" ns0$dir "$expect" "test_local_dnat 7"
lret=1
fi
@@ -362,8 +322,7 @@ EOF
# expect 0 packet in ns2
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 8"
lret=1
fi
@@ -374,13 +333,19 @@ EOF
return $lret
}
+listener_ready()
+{
+ local ns="$1"
+ local port="$2"
+ local proto="$3"
+ ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port"
+}
+
test_local_dnat_portonly()
{
local family=$1
local daddr=$2
local lret=0
- local sr_s
- local sr_r
ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family nat {
@@ -392,7 +357,7 @@ table $family nat {
}
EOF
if [ $? -ne 0 ]; then
- if [ $family = "inet" ];then
+ if [ "$family" = "inet" ];then
echo "SKIP: inet port test"
test_inet_nat=false
return
@@ -401,17 +366,16 @@ EOF
return
fi
- echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 &
- sc_s=$!
+ echo "SERVER-$family" | ip netns exec "$ns1" timeout 3 socat -u STDIN TCP-LISTEN:2000 &
- sleep 1
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 2000 "-t"
- result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT)
+ result=$(ip netns exec "$ns0" timeout 1 socat -u TCP:"$daddr":2000 STDOUT)
if [ "$result" = "SERVER-inet" ];then
echo "PASS: inet port rewrite without l3 address"
else
- echo "ERROR: inet port rewrite"
+ echo "ERROR: inet port rewrite without l3 address, got $result"
ret=1
fi
}
@@ -424,24 +388,20 @@ test_masquerade6()
ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
- ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 via ipv6"
return 1
- lret=1
fi
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns1" ns2$dir "$expect" "test_masquerade6 1"
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" "ns2$dir" "$expect" "test_masquerade6 1"
lret=1
fi
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 2"
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade6 2"
lret=1
fi
done
@@ -462,8 +422,7 @@ EOF
return $ksft_skip
fi
- ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags"
lret=1
fi
@@ -471,14 +430,12 @@ EOF
# ns1 should have seen packets from ns0, due to masquerade
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 3"
lret=1
fi
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 4"
lret=1
fi
@@ -487,27 +444,23 @@ EOF
# ns1 should not have seen packets from ns2, due to masquerade
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 5"
lret=1
fi
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns0" ns1$dir "$expect" "test_masquerade6 6"
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" "ns1$dir" "$expect" "test_masquerade6 6"
lret=1
fi
done
- ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 with active ipv6 masquerade $natflags (attempt 2)"
lret=1
fi
- ip netns exec "$ns0" nft flush chain $family nat postrouting
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft flush chain "$family" nat postrouting;then
echo "ERROR: Could not flush $family nat postrouting" 1>&2
lret=1
fi
@@ -526,23 +479,20 @@ test_masquerade()
ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
- ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping $ns1 from "$ns2" $natflags"
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
+ echo "ERROR: cannot ping $ns1 from $ns2 $natflags"
lret=1
fi
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns1" ns2$dir "$expect" "test_masquerade 1"
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" "ns2$dir" "$expect" "test_masquerade 1"
lret=1
fi
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns2" ns1$dir "$expect" "test_masquerade 2"
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade 2"
lret=1
fi
done
@@ -563,8 +513,7 @@ EOF
return $ksft_skip
fi
- ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags"
lret=1
fi
@@ -572,15 +521,13 @@ EOF
# ns1 should have seen packets from ns0, due to masquerade
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns1" ns0$dir "$expect" "test_masquerade 3"
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" "ns0$dir" "$expect" "test_masquerade 3"
lret=1
fi
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns2" ns1$dir "$expect" "test_masquerade 4"
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade 4"
lret=1
fi
done
@@ -588,27 +535,23 @@ EOF
# ns1 should not have seen packets from ns2, due to masquerade
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns1" ns0$dir "$expect" "test_masquerade 5"
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" "ns0$dir" "$expect" "test_masquerade 5"
lret=1
fi
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns0" ns1$dir "$expect" "test_masquerade 6"
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" "ns1$dir" "$expect" "test_masquerade 6"
lret=1
fi
done
- ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 with active ip masquerade $natflags (attempt 2)"
lret=1
fi
- ip netns exec "$ns0" nft flush chain $family nat postrouting
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft flush chain "$family" nat postrouting; then
echo "ERROR: Could not flush $family nat postrouting" 1>&2
lret=1
fi
@@ -625,22 +568,19 @@ test_redirect6()
ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
- ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6"
lret=1
fi
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns2$dir "$expect" "test_redirect6 1"
lret=1
fi
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
bad_counter "$ns2" ns1$dir "$expect" "test_redirect6 2"
lret=1
fi
@@ -662,8 +602,7 @@ EOF
return $ksft_skip
fi
- ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 via ipv6 with active $family redirect"
lret=1
fi
@@ -671,8 +610,7 @@ EOF
# ns1 should have seen no packets from ns2, due to redirection
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 3"
lret=1
fi
@@ -681,15 +619,13 @@ EOF
# ns0 should have seen packets from ns2, due to masquerade
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 4"
lret=1
fi
done
- ip netns exec "$ns0" nft delete table $family nat
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft delete table "$family" nat;then
echo "ERROR: Could not delete $family nat table" 1>&2
lret=1
fi
@@ -707,22 +643,19 @@ test_redirect()
ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
- ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2"
lret=1
fi
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns1" $ns2$dir "$expect" "test_redirect 1"
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" "$ns2$dir" "$expect" "test_redirect 1"
lret=1
fi
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect";then
bad_counter "$ns2" ns1$dir "$expect" "test_redirect 2"
lret=1
fi
@@ -744,8 +677,7 @@ EOF
return $ksft_skip
fi
- ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 with active $family ip redirect"
lret=1
fi
@@ -754,8 +686,7 @@ EOF
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns0$dir "$expect" "test_redirect 3"
lret=1
fi
@@ -764,15 +695,13 @@ EOF
# ns0 should have seen packets from ns2, due to masquerade
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns0" ns0$dir "$expect" "test_redirect 4"
lret=1
fi
done
- ip netns exec "$ns0" nft delete table $family nat
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft delete table "$family" nat;then
echo "ERROR: Could not delete $family nat table" 1>&2
lret=1
fi
@@ -803,13 +732,13 @@ test_port_shadow()
# make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405
- echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 &
- sc_r=$!
+ echo ROUTER | ip netns exec "$ns0" timeout 3 socat -T 3 -u STDIN UDP4-LISTEN:1405 2>/dev/null &
+ local sc_r=$!
+ echo CLIENT | ip netns exec "$ns2" timeout 3 socat -T 3 -u STDIN UDP4-LISTEN:1405,reuseport 2>/dev/null &
+ local sc_c=$!
- echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport &
- sc_c=$!
-
- sleep 0.3
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1405 "-u"
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns2" 1405 "-u"
# ns1 tries to connect to ns0:1405. With default settings this should connect
# to client, it matches the conntrack entry created above.
@@ -846,7 +775,7 @@ table $family filter {
EOF
test_port_shadow "port-filter" "ROUTER"
- ip netns exec "$ns0" nft delete table $family filter
+ ip netns exec "$ns0" nft delete table "$family" filter
}
# This prevents port shadow of router service via notrack.
@@ -868,7 +797,7 @@ table $family raw {
EOF
test_port_shadow "port-notrack" "ROUTER"
- ip netns exec "$ns0" nft delete table $family raw
+ ip netns exec "$ns0" nft delete table "$family" raw
}
# This prevents port shadow of router service via sport remap.
@@ -886,21 +815,19 @@ table $family pat {
EOF
test_port_shadow "pat" "ROUTER"
- ip netns exec "$ns0" nft delete table $family pat
+ ip netns exec "$ns0" nft delete table "$family" pat
}
test_port_shadowing()
{
local family="ip"
- conntrack -h >/dev/null 2>&1
- if [ $? -ne 0 ];then
+ if ! conntrack -h >/dev/null 2>&1;then
echo "SKIP: Could not run nat port shadowing test without conntrack tool"
return
fi
- socat -h > /dev/null 2>&1
- if [ $? -ne 0 ];then
+ if ! socat -h > /dev/null 2>&1;then
echo "SKIP: Could not run nat port shadowing test without socat tool"
return
fi
@@ -946,8 +873,7 @@ test_stateless_nat_ip()
ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
- ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules"
return 1
fi
@@ -981,23 +907,20 @@ EOF
reset_counters
- ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null; then
echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules"
lret=1
fi
# ns1 should have seen packets from .2.2, due to stateless rewrite.
expect="packets 1 bytes 84"
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect";then
bad_counter "$ns1" ns0insl "$expect" "test_stateless 1"
lret=1
fi
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect";then
bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2"
lret=1
fi
@@ -1006,14 +929,12 @@ EOF
# ns1 should not have seen packets from ns2, due to masquerade
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect";then
bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3"
lret=1
fi
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect";then
bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4"
lret=1
fi
@@ -1021,8 +942,7 @@ EOF
reset_counters
- socat -h > /dev/null 2>&1
- if [ $? -ne 0 ];then
+ if ! socat -h > /dev/null 2>&1;then
echo "SKIP: Could not run stateless nat frag test without socat tool"
if [ $lret -eq 0 ]; then
return $ksft_skip
@@ -1032,42 +952,36 @@ EOF
return $lret
fi
- local tmpfile=$(mktemp)
- dd if=/dev/urandom of=$tmpfile bs=4096 count=1 2>/dev/null
+ dd if=/dev/urandom of="$INFILE" bs=4096 count=1 2>/dev/null
- local outfile=$(mktemp)
- ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:$outfile < /dev/null &
- sc_r=$!
+ ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:"$OUTFILE" < /dev/null 2>/dev/null &
+
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 4233 "-u"
- sleep 1
# re-do with large ping -> ip fragmentation
- ip netns exec "$ns2" timeout 3 socat - UDP4-SENDTO:"10.0.1.99:4233" < "$tmpfile" > /dev/null
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" timeout 3 socat -u STDIN UDP4-SENDTO:"10.0.1.99:4233" < "$INFILE" > /dev/null;then
echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2
lret=1
fi
wait
- cmp "$tmpfile" "$outfile"
- if [ $? -ne 0 ]; then
- ls -l "$tmpfile" "$outfile"
+ if ! cmp "$INFILE" "$OUTFILE";then
+ ls -l "$INFILE" "$OUTFILE"
echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
lret=1
fi
- rm -f "$tmpfile" "$outfile"
+ :> "$OUTFILE"
# ns1 should have seen packets from 2.2, due to stateless rewrite.
expect="packets 3 bytes 4164"
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect";then
bad_counter "$ns1" ns0insl "$expect" "test_stateless 5"
lret=1
fi
- ip netns exec "$ns0" nft delete table ip stateless
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft delete table ip stateless; then
echo "ERROR: Could not delete table ip stateless" 1>&2
lret=1
fi
@@ -1078,8 +992,8 @@ EOF
}
# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
-for i in 0 1 2; do
-ip netns exec ns$i-$sfx nft -f /dev/stdin <<EOF
+for i in "$ns0" "$ns1" "$ns2" ;do
+ip netns exec "$i" nft -f /dev/stdin <<EOF
table inet filter {
counter ns0in {}
counter ns1in {}
@@ -1145,7 +1059,7 @@ done
# special case for stateless nat check, counter needs to
# be done before (input) ip defragmentation
-ip netns exec ns1-$sfx nft -f /dev/stdin <<EOF
+ip netns exec "$ns1" nft -f /dev/stdin <<EOF
table inet filter {
counter ns0insl {}
@@ -1156,31 +1070,49 @@ table inet filter {
}
EOF
-sleep 3
-# test basic connectivity
-for i in 1 2; do
- ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 > /dev/null
- if [ $? -ne 0 ];then
- echo "ERROR: Could not reach other namespace(s)" 1>&2
- ret=1
- fi
-
- ip netns exec "$ns0" ping -c 1 -q dead:$i::99 > /dev/null
- if [ $? -ne 0 ];then
- echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2
- ret=1
- fi
- check_counters ns$i-$sfx
- if [ $? -ne 0 ]; then
- ret=1
- fi
-
- check_ns0_counters ns$i
- if [ $? -ne 0 ]; then
- ret=1
- fi
- reset_counters
-done
+ping_basic()
+{
+ i="$1"
+ if ! ip netns exec "$ns0" ping -c 1 -q 10.0."$i".99 > /dev/null;then
+ echo "ERROR: Could not reach other namespace(s)" 1>&2
+ ret=1
+ fi
+
+ if ! ip netns exec "$ns0" ping -c 1 -q dead:"$i"::99 > /dev/null;then
+ echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2
+ ret=1
+ fi
+}
+
+test_basic_conn()
+{
+ local nsexec
+ name="$1"
+
+ nsexec=$(eval echo \$"$1")
+
+ ping_basic 1
+ ping_basic 2
+
+ if ! check_counters "$nsexec";then
+ return 1
+ fi
+
+ if ! check_ns0_counters "$name";then
+ return 1
+ fi
+
+ reset_counters
+ return 0
+}
+
+if ! test_basic_conn "ns1" ; then
+ echo "ERROR: basic test for ns1 failed" 1>&2
+ exit 1
+fi
+if ! test_basic_conn "ns2"; then
+ echo "ERROR: basic test for ns1 failed" 1>&2
+fi
if [ $ret -eq 0 ];then
echo "PASS: netns routing/connectivity: $ns0 can reach $ns1 and $ns2"
diff --git a/tools/testing/selftests/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
index b9ab37380f..3b81d88bdd 100755
--- a/tools/testing/selftests/netfilter/nft_nat_zones.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
@@ -3,17 +3,17 @@
# Test connection tracking zone and NAT source port reallocation support.
#
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
# Don't increase too much, 2000 clients should work
# just fine but script can then take several minutes with
# KASAN/debug builds.
maxclients=100
-have_iperf=1
+have_socat=0
ret=0
+[ "$KSFT_MACHINE_SLOW" = yes ] && maxclients=40
# client1---.
# veth1-.
# |
@@ -31,12 +31,6 @@ ret=0
# NAT Gateway is supposed to do port reallocation for each of the
# connections.
-sfx=$(mktemp -u "XXXXXXXX")
-gw="ns-gw-$sfx"
-cl1="ns-cl1-$sfx"
-cl2="ns-cl2-$sfx"
-srv="ns-srv-$sfx"
-
v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
@@ -46,61 +40,29 @@ v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
cleanup()
{
- ip netns del $gw
- ip netns del $srv
- for i in $(seq 1 $maxclients); do
- ip netns del ns-cl$i-$sfx 2>/dev/null
- done
-
- sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null
- sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null
- sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null
- sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null
- sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null
- sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null
+ cleanup_all_ns
+
+ sysctl -q net.ipv4.neigh.default.gc_thresh1="$v4gc1" 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh2="$v4gc2" 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh3="$v4gc3" 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh1="$v6gc1" 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh2="$v6gc2" 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh3="$v6gc3" 2>/dev/null
}
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
+checktool "nft --version" echo "run test without nft tool"
+checktool "conntrack -V" "run test without conntrack tool"
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
+if socat -h >/dev/null 2>&1; then
+ have_socat=1
fi
-conntrack -V > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without conntrack tool"
- exit $ksft_skip
-fi
-
-iperf3 -v >/dev/null 2>&1
-if [ $? -ne 0 ];then
- have_iperf=0
-fi
-
-ip netns add "$gw"
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace $gw"
- exit $ksft_skip
-fi
-ip -net "$gw" link set lo up
+setup_ns gw srv
trap cleanup EXIT
-ip netns add "$srv"
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create server netns $srv"
- exit $ksft_skip
-fi
-
ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
ip -net "$gw" link set veth0 up
-ip -net "$srv" link set lo up
ip -net "$srv" link set eth0 up
sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null
@@ -110,55 +72,49 @@ sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null
sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
-for i in $(seq 1 $maxclients);do
- cl="ns-cl$i-$sfx"
+for i in $(seq 1 "$maxclients");do
+ setup_ns "cl$i"
- ip netns add "$cl"
- if [ $? -ne 0 ];then
- echo "SKIP: Could not create client netns $cl"
- exit $ksft_skip
- fi
- ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1
- if [ $? -ne 0 ];then
+ cl=$(eval echo \$cl"$i")
+ if ! ip link add veth"$i" netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1;then
echo "SKIP: No virtual ethernet pair device support in kernel"
exit $ksft_skip
fi
done
-for i in $(seq 1 $maxclients);do
- cl="ns-cl$i-$sfx"
- echo netns exec "$cl" ip link set lo up
+for i in $(seq 1 "$maxclients");do
+ cl=$(eval echo \$cl"$i")
echo netns exec "$cl" ip link set eth0 up
echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
- echo netns exec "$gw" ip link set veth$i up
- echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2
- echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0
+ echo netns exec "$gw" ip link set "veth$i" up
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0
# clients have same IP addresses.
echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
- echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0
+ echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0 nodad
echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
# NB: same addresses on client-facing interfaces.
- echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i
- echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i
+ echo netns exec "$gw" ip addr add 10.1.0.2/24 dev "veth$i"
+ echo netns exec "$gw" ip addr add dead:1::2/64 dev "veth$i" nodad
# gw: policy routing
- echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i))
- echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i))
+ echo netns exec "$gw" ip route add 10.1.0.0/24 dev "veth$i" table $((1000+i))
+ echo netns exec "$gw" ip route add dead:1::0/64 dev "veth$i" table $((1000+i))
echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
- echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i))
+ echo netns exec "$gw" ip rule add fwmark "$i" lookup $((1000+i))
done | ip -batch /dev/stdin
ip -net "$gw" addr add 10.3.0.1/24 dev veth0
-ip -net "$gw" addr add dead:3::1/64 dev veth0
+ip -net "$gw" addr add dead:3::1/64 dev veth0 nodad
ip -net "$srv" addr add 10.3.0.99/24 dev eth0
-ip -net "$srv" addr add dead:3::99/64 dev eth0
+ip -net "$srv" addr add dead:3::99/64 dev eth0 nodad
-ip netns exec $gw nft -f /dev/stdin<<EOF
+ip netns exec "$gw" nft -f /dev/stdin<<EOF
table inet raw {
map iiftomark {
type ifname : mark
@@ -203,18 +159,22 @@ table inet raw {
}
}
EOF
+if [ "$?" -ne 0 ];then
+ echo "SKIP: Could not add nftables rules"
+ exit $ksft_skip
+fi
( echo add element inet raw iiftomark \{
for i in $(seq 1 $((maxclients-1))); do
- echo \"veth$i\" : $i,
+ echo \"veth"$i"\" : "$i",
done
- echo \"veth$maxclients\" : $maxclients \}
+ echo \"veth"$maxclients"\" : "$maxclients" \}
echo add element inet raw iiftozone \{
for i in $(seq 1 $((maxclients-1))); do
- echo \"veth$i\" : $i,
+ echo \"veth"$i"\" : "$i",
done
echo \"veth$maxclients\" : $maxclients \}
-) | ip netns exec $gw nft -f /dev/stdin
+) | ip netns exec "$gw" nft -f /dev/stdin
ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -224,73 +184,72 @@ ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
-for i in $(seq 1 $maxclients); do
- cl="ns-cl$i-$sfx"
- ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
- if [ $? -ne 0 ]; then
- echo FAIL: Ping failure from $cl 1>&2
- ret=1
- break
- fi
+for i in $(seq 1 "$maxclients"); do
+ cl=$(eval echo \$cl"$i")
+ ip netns exec "$cl" ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
done
-wait
+wait || ret=1
-for i in $(seq 1 $maxclients); do
- ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"
- if [ $? -ne 0 ];then
+[ "$ret" -ne 0 ] && "FAIL: Ping failure from $cl" 1>&2
+
+for i in $(seq 1 "$maxclients"); do
+ if ! ip netns exec "$gw" nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"; then
ret=1
echo "FAIL: counter icmp mismatch for veth$i" 1>&2
- ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
+ ip netns exec "$gw" nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
break
fi
done
-ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
-if [ $? -ne 0 ];then
+if ! ip netns exec "$gw" nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * maxclients)) bytes $((252 * maxclients)) }"; then
ret=1
- echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
- ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
+ echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * maxclients)) bytes $((252 * maxclients)) }"
+ ip netns exec "$gw" nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
fi
-if [ $ret -eq 0 ]; then
+if [ $ret -eq 0 ]; then
echo "PASS: ping test from all $maxclients namespaces"
fi
-if [ $have_iperf -eq 0 ];then
- echo "SKIP: iperf3 not installed"
+if [ $have_socat -eq 0 ];then
+ echo "SKIP: socat not installed"
if [ $ret -ne 0 ];then
exit $ret
fi
exit $ksft_skip
fi
-ip netns exec $srv iperf3 -s > /dev/null 2>&1 &
-iperfpid=$!
-sleep 1
+listener_ready()
+{
+ ss -N "$1" -lnt -o "sport = :5201" | grep -q 5201
+}
+
+ip netns exec "$srv" socat -u TCP-LISTEN:5201,fork STDOUT > /dev/null 2>/dev/null &
+socatpid=$!
+
+busywait 1000 listener_ready "$srv"
-for i in $(seq 1 $maxclients); do
+for i in $(seq 1 "$maxclients"); do
if [ $ret -ne 0 ]; then
break
fi
- cl="ns-cl$i-$sfx"
- ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null
- if [ $? -ne 0 ]; then
- echo FAIL: Failure to connect for $cl 1>&2
- ip netns exec $gw conntrack -S 1>&2
+ cl=$(eval echo \$cl"$i")
+ if ! ip netns exec "$cl" socat -4 -u STDIN TCP:10.3.0.99:5201,sourceport=10000 < /dev/null > /dev/null; then
+ echo "FAIL: Failure to connect for $cl" 1>&2
+ ip netns exec "$gw" conntrack -S 1>&2
ret=1
fi
done
if [ $ret -eq 0 ];then
- echo "PASS: iperf3 connections for all $maxclients net namespaces"
+ echo "PASS: socat connections for all $maxclients net namespaces"
fi
-kill $iperfpid
+kill $socatpid
wait
-for i in $(seq 1 $maxclients); do
- ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null
- if [ $? -ne 0 ];then
+for i in $(seq 1 "$maxclients"); do
+ if ! ip netns exec "$gw" nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null;then
ret=1
echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
break
@@ -300,8 +259,7 @@ if [ $ret -eq 0 ];then
echo "PASS: Found client connection for all $maxclients net namespaces"
fi
-ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null
-if [ $? -ne 0 ];then
+if ! ip netns exec "$gw" nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null;then
ret=1
echo "FAIL: cannot find return entry on veth0" 1>&2
fi
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
new file mode 100755
index 0000000000..8538f08c64
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -0,0 +1,417 @@
+#!/bin/bash
+#
+# This tests nf_queue:
+# 1. can process packets from all hooks
+# 2. support running nfqueue from more than one base chain
+#
+# shellcheck disable=SC2162,SC2317
+
+source lib.sh
+ret=0
+timeout=2
+
+cleanup()
+{
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
+ ip netns pids "$nsrouter" | xargs kill 2>/dev/null
+
+ cleanup_all_ns
+
+ rm -f "$TMPINPUT"
+ rm -f "$TMPFILE0"
+ rm -f "$TMPFILE1"
+ rm -f "$TMPFILE2" "$TMPFILE3"
+}
+
+checktool "nft --version" "test without nft tool"
+
+trap cleanup EXIT
+
+setup_ns ns1 ns2 nsrouter
+
+TMPFILE0=$(mktemp)
+TMPFILE1=$(mktemp)
+TMPFILE2=$(mktemp)
+TMPFILE3=$(mktemp)
+
+TMPINPUT=$(mktemp)
+dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
+
+if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
+
+ip -net "$nsrouter" link set veth0 up
+ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$nsrouter" link set veth1 up
+ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
+ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
+
+ip -net "$ns1" link set eth0 up
+ip -net "$ns2" link set eth0 up
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" route add default via dead:1::1
+
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns2" route add default via dead:2::1
+
+load_ruleset() {
+ local name=$1
+ local prio=$2
+
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+table inet $name {
+ chain nfq {
+ ip protocol icmp queue bypass
+ icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass
+ }
+ chain pre {
+ type filter hook prerouting priority $prio; policy accept;
+ jump nfq
+ }
+ chain input {
+ type filter hook input priority $prio; policy accept;
+ jump nfq
+ }
+ chain forward {
+ type filter hook forward priority $prio; policy accept;
+ tcp dport 12345 queue num 2
+ jump nfq
+ }
+ chain output {
+ type filter hook output priority $prio; policy accept;
+ tcp dport 12345 queue num 3
+ tcp sport 23456 queue num 3
+ jump nfq
+ }
+ chain post {
+ type filter hook postrouting priority $prio; policy accept;
+ jump nfq
+ }
+}
+EOF
+}
+
+load_counter_ruleset() {
+ local prio=$1
+
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+table inet countrules {
+ chain pre {
+ type filter hook prerouting priority $prio; policy accept;
+ counter
+ }
+ chain input {
+ type filter hook input priority $prio; policy accept;
+ counter
+ }
+ chain forward {
+ type filter hook forward priority $prio; policy accept;
+ counter
+ }
+ chain output {
+ type filter hook output priority $prio; policy accept;
+ counter
+ }
+ chain post {
+ type filter hook postrouting priority $prio; policy accept;
+ counter
+ }
+}
+EOF
+}
+
+test_ping() {
+ if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
+ return 1
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then
+ return 2
+ fi
+
+ return 0
+}
+
+test_ping_router() {
+ if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then
+ return 3
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then
+ return 4
+ fi
+
+ return 0
+}
+
+test_queue_blackhole() {
+ local proto=$1
+
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+table $proto blackh {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ queue num 600
+ }
+}
+EOF
+ if [ "$proto" = "ip" ] ;then
+ ip netns exec "$ns1" ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
+ lret=$?
+ elif [ "$proto" = "ip6" ]; then
+ ip netns exec "$ns1" ping -W 2 -c 1 -q dead:2::99 > /dev/null
+ lret=$?
+ else
+ lret=111
+ fi
+
+ # queue without bypass keyword should drop traffic if no listener exists.
+ if [ "$lret" -eq 0 ];then
+ echo "FAIL: $proto expected failure, got $lret" 1>&2
+ exit 1
+ fi
+
+ if ! ip netns exec "$nsrouter" nft delete table "$proto" blackh; then
+ echo "FAIL: $proto: Could not delete blackh table"
+ exit 1
+ fi
+
+ echo "PASS: $proto: statement with no listener results in packet drop"
+}
+
+nf_queue_wait()
+{
+ local procfile="/proc/self/net/netfilter/nfnetlink_queue"
+ local netns id
+
+ netns="$1"
+ id="$2"
+
+ # if this file doesn't exist, nfnetlink_module isn't loaded.
+ # rather than loading it ourselves, wait for kernel module autoload
+ # completion, nfnetlink should do so automatically because nf_queue
+ # helper program, spawned in the background, asked for this functionality.
+ test -f "$procfile" &&
+ ip netns exec "$netns" cat "$procfile" | grep -q "^ *$id "
+}
+
+test_queue()
+{
+ local expected="$1"
+ local last=""
+
+ # spawn nf_queue listeners
+ ip netns exec "$nsrouter" ./nf_queue -c -q 0 -t $timeout > "$TMPFILE0" &
+ ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t $timeout > "$TMPFILE1" &
+
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 0
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1
+
+ if ! test_ping;then
+ echo "FAIL: netns routing/connectivity with active listener on queues 0 and 1: $ret" 1>&2
+ exit $ret
+ fi
+
+ if ! test_ping_router;then
+ echo "FAIL: netns router unreachable listener on queue 0 and 1: $ret" 1>&2
+ exit $ret
+ fi
+
+ wait
+ ret=$?
+
+ for file in $TMPFILE0 $TMPFILE1; do
+ last=$(tail -n1 "$file")
+ if [ x"$last" != x"$expected packets total" ]; then
+ echo "FAIL: Expected $expected packets total, but got $last" 1>&2
+ ip netns exec "$nsrouter" nft list ruleset
+ exit 1
+ fi
+ done
+
+ echo "PASS: Expected and received $last"
+}
+
+listener_ready()
+{
+ ss -N "$1" -lnt -o "sport = :12345" | grep -q 12345
+}
+
+test_tcp_forward()
+{
+ ip netns exec "$nsrouter" ./nf_queue -q 2 -t "$timeout" &
+ local nfqpid=$!
+
+ timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
+ local rpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
+
+ ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
+
+ wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain"
+}
+
+test_tcp_localhost()
+{
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
+ timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
+ local rpid=$!
+
+ ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+ local nfqpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
+
+ ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null
+
+ wait "$rpid" && echo "PASS: tcp via loopback"
+ wait 2>/dev/null
+}
+
+test_tcp_localhost_connectclose()
+{
+ ip netns exec "$nsrouter" ./connect_close -p 23456 -t "$timeout" &
+ ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
+
+ wait && echo "PASS: tcp via loopback with connect/close"
+ wait 2>/dev/null
+}
+
+test_tcp_localhost_requeue()
+{
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+}
+EOF
+ timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
+ local rpid=$!
+
+ ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t "$timeout" > "$TMPFILE2" &
+
+ # nfqueue 1 will be called via output hook. But this time,
+ # re-queue the packet to nfqueue program on queue 2.
+ ip netns exec "$nsrouter" ./nf_queue -G -d 150 -c -q 0 -Q 1 -t "$timeout" > "$TMPFILE3" &
+
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
+ ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" > /dev/null
+
+ wait
+
+ if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
+ echo "FAIL: lost packets during requeue?!" 1>&2
+ return
+ fi
+
+ echo "PASS: tcp via loopback and re-queueing"
+}
+
+test_icmp_vrf() {
+ if ! ip -net "$ns1" link add tvrf type vrf table 9876;then
+ echo "SKIP: Could not add vrf device"
+ return
+ fi
+
+ ip -net "$ns1" li set eth0 master tvrf
+ ip -net "$ns1" li set tvrf up
+
+ ip -net "$ns1" route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
+ip netns exec "$ns1" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta oifname "tvrf" icmp type echo-request counter queue num 1
+ meta oifname "eth0" icmp type echo-request counter queue num 1
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ meta oifname "tvrf" icmp type echo-request counter queue num 1
+ meta oifname "eth0" icmp type echo-request counter queue num 1
+ }
+}
+EOF
+ ip netns exec "$ns1" ./nf_queue -q 1 -t "$timeout" &
+ local nfqpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1
+
+ ip netns exec "$ns1" ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
+
+ for n in output post; do
+ for d in tvrf eth0; do
+ if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then
+ echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
+ ip netns exec "$ns1" nft list ruleset
+ ret=1
+ return
+ fi
+ done
+ done
+
+ wait "$nfqpid" && echo "PASS: icmp+nfqueue via vrf"
+ wait 2>/dev/null
+}
+
+ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+load_ruleset "filter" 0
+
+if test_ping; then
+ # queue bypass works (rules were skipped, no listener)
+ echo "PASS: ${ns1} can reach ${ns2}"
+else
+ echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
+ exit $ret
+fi
+
+test_queue_blackhole ip
+test_queue_blackhole ip6
+
+# dummy ruleset to add base chains between the
+# queueing rules. We don't want the second reinject
+# to re-execute the old hooks.
+load_counter_ruleset 10
+
+# we are hooking all: prerouting/input/forward/output/postrouting.
+# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so:
+# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply).
+# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply.
+# so we expect that userspace program receives 10 packets.
+test_queue 10
+
+# same. We queue to a second program as well.
+load_ruleset "filter2" 20
+test_queue 20
+
+test_tcp_forward
+test_tcp_localhost
+test_tcp_localhost_connectclose
+test_tcp_localhost_requeue
+test_icmp_vrf
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_synproxy.sh b/tools/testing/selftests/net/netfilter/nft_synproxy.sh
new file mode 100755
index 0000000000..293f667a6a
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_synproxy.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+ret=0
+
+checktool "nft --version" "run test without nft tool"
+checktool "iperf3 --version" "run test without iperf3"
+
+setup_ns nsr ns1 ns2
+
+modprobe -q nf_conntrack
+
+cleanup() {
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
+
+ cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+ip link add veth0 netns "$nsr" type veth peer name eth0 netns "$ns1"
+ip link add veth1 netns "$nsr" type veth peer name eth0 netns "$ns2"
+
+for dev in veth0 veth1; do
+ ip -net "$nsr" link set "$dev" up
+done
+
+ip -net "$nsr" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsr" addr add 10.0.2.1/24 dev veth1
+
+ip netns exec "$nsr" sysctl -q net.ipv4.conf.veth0.forwarding=1
+ip netns exec "$nsr" sysctl -q net.ipv4.conf.veth1.forwarding=1
+ip netns exec "$nsr" sysctl -q net.netfilter.nf_conntrack_tcp_loose=0
+
+for n in $ns1 $ns2; do
+ ip -net "$n" link set eth0 up
+done
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns2" route add default via 10.0.2.1
+
+# test basic connectivity
+if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
+ echo "ERROR: $ns1 cannot reach $ns2" 1>&2
+ exit 1
+fi
+
+if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
+ echo "ERROR: $ns2 cannot reach $ns1" 1>&2
+ exit 1
+fi
+
+ip netns exec "$ns2" iperf3 -s > /dev/null 2>&1 &
+# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 &
+
+sleep 1
+
+ip netns exec "$nsr" nft -f - <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority -300; policy accept;
+ meta iif veth0 tcp flags syn counter notrack
+ }
+
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+
+ ct state new,established counter accept
+
+ meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp
+
+ ct state invalid counter drop
+
+ # make ns2 unreachable w.o. tcp synproxy
+ tcp flags syn counter drop
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "SKIP: Cannot add nft synproxy"
+ exit $ksft_skip
+fi
+
+if ! ip netns exec "$ns1" timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null; then
+ echo "FAIL: iperf3 returned an error" 1>&2
+ ret=1
+ ip netns exec "$nsr" nft list ruleset
+else
+ echo "PASS: synproxy connection successful"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/net/netfilter/nft_zones_many.sh
index 5a8db0b489..7db9982ba5 100755
--- a/tools/testing/selftests/netfilter/nft_zones_many.sh
+++ b/tools/testing/selftests/net/netfilter/nft_zones_many.sh
@@ -3,47 +3,34 @@
# Test insertion speed for packets with identical addresses/ports
# that are all placed in distinct conntrack zones.
-sfx=$(mktemp -u "XXXXXXXX")
-ns="ns-$sfx"
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
zones=2000
+[ "$KSFT_MACHINE_SLOW" = yes ] && zones=500
+
have_ct_tool=0
ret=0
cleanup()
{
- ip netns del $ns
-}
-
-checktool (){
- if ! $1 > /dev/null 2>&1; then
- echo "SKIP: Could not $2"
- exit $ksft_skip
- fi
+ cleanup_all_ns
}
checktool "nft --version" "run test without nft tool"
-checktool "ip -Version" "run test without ip tool"
checktool "socat -V" "run test without socat tool"
-checktool "ip netns add $ns" "create net namespace"
+
+setup_ns ns1
trap cleanup EXIT
-conntrack -V > /dev/null 2>&1
-if [ $? -eq 0 ];then
+if conntrack -V > /dev/null 2>&1; then
have_ct_tool=1
fi
-ip -net "$ns" link set lo up
-
test_zones() {
local max_zones=$1
-ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
-ip netns exec $ns nft -f /dev/stdin<<EOF
+ip netns exec "$ns1" nft -f /dev/stdin<<EOF
flush ruleset
table inet raw {
map rndzone {
@@ -56,29 +43,39 @@ table inet raw {
}
}
EOF
+if [ "$?" -ne 0 ];then
+ echo "SKIP: Cannot add nftables rules"
+ exit $ksft_skip
+fi
+
+ ip netns exec "$ns1" sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
+
(
echo "add element inet raw rndzone {"
- for i in $(seq 1 $max_zones);do
+ for i in $(seq 1 "$max_zones");do
echo -n "$i : $i"
- if [ $i -lt $max_zones ]; then
+ if [ "$i" -lt "$max_zones" ]; then
echo ","
else
echo "}"
fi
done
- ) | ip netns exec $ns nft -f /dev/stdin
+ ) | ip netns exec "$ns1" nft -f /dev/stdin
local i=0
local j=0
- local outerstart=$(date +%s%3N)
- local stop=$outerstart
-
- while [ $i -lt $max_zones ]; do
- local start=$(date +%s%3N)
+ local outerstart
+ local stop
+ outerstart=$(date +%s%3N)
+ stop=$outerstart
+
+ while [ "$i" -lt "$max_zones" ]; do
+ local start
+ start=$(date +%s%3N)
i=$((i + 1000))
j=$((j + 1))
# nft rule in output places each packet in a different zone.
- dd if=/dev/zero of=/dev/stdout bs=8k count=1000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345
+ dd if=/dev/zero bs=8k count=1000 2>/dev/null | ip netns exec "$ns1" socat -u STDIN UDP:127.0.0.1:12345,sourceport=12345
if [ $? -ne 0 ] ;then
ret=1
break
@@ -89,14 +86,15 @@ EOF
echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)"
done
- if [ $have_ct_tool -eq 1 ]; then
- local count=$(ip netns exec "$ns" conntrack -C)
- local duration=$((stop-outerstart))
+ if [ "$have_ct_tool" -eq 1 ]; then
+ local count duration
+ count=$(ip netns exec "$ns1" conntrack -C)
+ duration=$((stop-outerstart))
- if [ $count -eq $max_zones ]; then
+ if [ "$count" -ge "$max_zones" ]; then
echo "PASS: inserted $count entries from packet path in $duration ms total"
else
- ip netns exec $ns conntrack -S 1>&2
+ ip netns exec "$ns1" conntrack -S 1>&2
echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries"
ret=1
fi
@@ -110,18 +108,19 @@ EOF
test_conntrack_tool() {
local max_zones=$1
- ip netns exec $ns conntrack -F >/dev/null 2>/dev/null
+ ip netns exec "$ns1" conntrack -F >/dev/null 2>/dev/null
- local outerstart=$(date +%s%3N)
- local start=$(date +%s%3N)
- local stop=$start
- local i=0
- while [ $i -lt $max_zones ]; do
+ local outerstart start stop i
+ outerstart=$(date +%s%3N)
+ start=$(date +%s%3N)
+ stop="$start"
+ i=0
+ while [ "$i" -lt "$max_zones" ]; do
i=$((i + 1))
- ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ ip netns exec "$ns1" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
--timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1
if [ $? -ne 0 ];then
- ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ ip netns exec "$ns1" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
--timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null
echo "FAIL: conntrack -I returned an error"
ret=1
@@ -137,13 +136,15 @@ test_conntrack_tool() {
fi
done
- local count=$(ip netns exec "$ns" conntrack -C)
- local duration=$((stop-outerstart))
+ local count
+ local duration
+ count=$(ip netns exec "$ns1" conntrack -C)
+ duration=$((stop-outerstart))
- if [ $count -eq $max_zones ]; then
+ if [ "$count" -eq "$max_zones" ]; then
echo "PASS: inserted $count entries via ctnetlink in $duration ms"
else
- ip netns exec $ns conntrack -S 1>&2
+ ip netns exec "$ns1" conntrack -S 1>&2
echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)"
ret=1
fi
@@ -151,7 +152,7 @@ test_conntrack_tool() {
test_zones $zones
-if [ $have_ct_tool -eq 1 ];then
+if [ "$have_ct_tool" -eq 1 ];then
test_conntrack_tool $zones
else
echo "SKIP: Could not run ctnetlink insertion test without conntrack tool"
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/common.sh b/tools/testing/selftests/net/netfilter/packetdrill/common.sh
new file mode 100755
index 0000000000..ed36d53519
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/common.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# for debugging set net.netfilter.nf_log_all_netns=1 in init_net
+# or do not use net namespaces.
+modprobe -q nf_conntrack
+sysctl -q net.netfilter.nf_conntrack_log_invalid=6
+
+# Flush old cached data (fastopen cookies).
+ip tcp_metrics flush all > /dev/null 2>&1
+
+# TCP min, default, and max receive and send buffer sizes.
+sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))"
+sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304"
+
+# TCP congestion control.
+sysctl -q net.ipv4.tcp_congestion_control=cubic
+
+# TCP slow start after idle.
+sysctl -q net.ipv4.tcp_slow_start_after_idle=0
+
+# TCP Explicit Congestion Notification (ECN)
+sysctl -q net.ipv4.tcp_ecn=0
+
+sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1
+
+# Override the default qdisc on the tun device.
+# Many tests fail with timing errors if the default
+# is FQ and that paces their flows.
+tc qdisc add dev tun0 root pfifo
+
+# Enable conntrack
+$xtables -A INPUT -m conntrack --ctstate NEW -p tcp --syn
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt
new file mode 100644
index 0000000000..d755bd64c5
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt
@@ -0,0 +1,118 @@
+// check that already-acked (retransmitted) packet is let through rather
+// than tagged as INVALID.
+
+`packetdrill/common.sh`
+
+// should set -P DROP but it disconnects VM w.o. extra netns
++0 `$xtables -A INPUT -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 10) = 0
+
++0 < S 0:0(0) win 32792 <mss 1000>
++0 > S. 0:0(0) ack 1 <mss 1460>
++.01 < . 1:1(0) ack 1 win 65535
++0 accept(3, ..., ...) = 4
+
++0.0001 < P. 1:1461(1460) ack 1 win 257
++.0 > . 1:1(0) ack 1461 win 65535
++0.0001 < P. 1461:2921(1460) ack 1 win 257
++.0 > . 1:1(0) ack 2921 win 65535
++0.0001 < P. 2921:4381(1460) ack 1 win 257
++.0 > . 1:1(0) ack 4381 win 65535
++0.0001 < P. 4381:5841(1460) ack 1 win 257
++.0 > . 1:1(0) ack 5841 win 65535
++0.0001 < P. 5841:7301(1460) ack 1 win 257
++.0 > . 1:1(0) ack 7301 win 65535
++0.0001 < P. 7301:8761(1460) ack 1 win 257
++.0 > . 1:1(0) ack 8761 win 65535
++0.0001 < P. 8761:10221(1460) ack 1 win 257
++.0 > . 1:1(0) ack 10221 win 65535
++0.0001 < P. 10221:11681(1460) ack 1 win 257
++.0 > . 1:1(0) ack 11681 win 65535
++0.0001 < P. 11681:13141(1460) ack 1 win 257
++.0 > . 1:1(0) ack 13141 win 65535
++0.0001 < P. 13141:14601(1460) ack 1 win 257
++.0 > . 1:1(0) ack 14601 win 65535
++0.0001 < P. 14601:16061(1460) ack 1 win 257
++.0 > . 1:1(0) ack 16061 win 65535
++0.0001 < P. 16061:17521(1460) ack 1 win 257
++.0 > . 1:1(0) ack 17521 win 65535
++0.0001 < P. 17521:18981(1460) ack 1 win 257
++.0 > . 1:1(0) ack 18981 win 65535
++0.0001 < P. 18981:20441(1460) ack 1 win 257
++.0 > . 1:1(0) ack 20441 win 65535
++0.0001 < P. 20441:21901(1460) ack 1 win 257
++.0 > . 1:1(0) ack 21901 win 65535
++0.0001 < P. 21901:23361(1460) ack 1 win 257
++.0 > . 1:1(0) ack 23361 win 65535
++0.0001 < P. 23361:24821(1460) ack 1 win 257
+0.055 > . 1:1(0) ack 24821 win 65535
++0.0001 < P. 24821:26281(1460) ack 1 win 257
++.0 > . 1:1(0) ack 26281 win 65535
++0.0001 < P. 26281:27741(1460) ack 1 win 257
++.0 > . 1:1(0) ack 27741 win 65535
++0.0001 < P. 27741:29201(1460) ack 1 win 257
++.0 > . 1:1(0) ack 29201 win 65535
++0.0001 < P. 29201:30661(1460) ack 1 win 257
++.0 > . 1:1(0) ack 30661 win 65535
++0.0001 < P. 30661:32121(1460) ack 1 win 257
++.0 > . 1:1(0) ack 32121 win 65535
++0.0001 < P. 32121:33581(1460) ack 1 win 257
++.0 > . 1:1(0) ack 33581 win 65535
++0.0001 < P. 33581:35041(1460) ack 1 win 257
++.0 > . 1:1(0) ack 35041 win 65535
++0.0001 < P. 35041:36501(1460) ack 1 win 257
++.0 > . 1:1(0) ack 36501 win 65535
++0.0001 < P. 36501:37961(1460) ack 1 win 257
++.0 > . 1:1(0) ack 37961 win 65535
++0.0001 < P. 37961:39421(1460) ack 1 win 257
++.0 > . 1:1(0) ack 39421 win 65535
++0.0001 < P. 39421:40881(1460) ack 1 win 257
++.0 > . 1:1(0) ack 40881 win 65535
++0.0001 < P. 40881:42341(1460) ack 1 win 257
++.0 > . 1:1(0) ack 42341 win 65535
++0.0001 < P. 42341:43801(1460) ack 1 win 257
++.0 > . 1:1(0) ack 43801 win 65535
++0.0001 < P. 43801:45261(1460) ack 1 win 257
++.0 > . 1:1(0) ack 45261 win 65535
++0.0001 < P. 45261:46721(1460) ack 1 win 257
++.0 > . 1:1(0) ack 46721 win 65535
++0.0001 < P. 46721:48181(1460) ack 1 win 257
++.0 > . 1:1(0) ack 48181 win 65535
++0.0001 < P. 48181:49641(1460) ack 1 win 257
++.0 > . 1:1(0) ack 49641 win 65535
++0.0001 < P. 49641:51101(1460) ack 1 win 257
++.0 > . 1:1(0) ack 51101 win 65535
++0.0001 < P. 51101:52561(1460) ack 1 win 257
++.0 > . 1:1(0) ack 52561 win 65535
++0.0001 < P. 52561:54021(1460) ack 1 win 257
++.0 > . 1:1(0) ack 54021 win 65535
++0.0001 < P. 54021:55481(1460) ack 1 win 257
++.0 > . 1:1(0) ack 55481 win 65535
++0.0001 < P. 55481:56941(1460) ack 1 win 257
++.0 > . 1:1(0) ack 56941 win 65535
++0.0001 < P. 56941:58401(1460) ack 1 win 257
++.0 > . 1:1(0) ack 58401 win 65535
++0.0001 < P. 58401:59861(1460) ack 1 win 257
++.0 > . 1:1(0) ack 59861 win 65535
++0.0001 < P. 59861:61321(1460) ack 1 win 257
++.0 > . 1:1(0) ack 61321 win 65535
++0.0001 < P. 61321:62781(1460) ack 1 win 257
++.0 > . 1:1(0) ack 62781 win 65535
++0.0001 < P. 62781:64241(1460) ack 1 win 257
++.0 > . 1:1(0) ack 64241 win 65535
++0.0001 < P. 64241:65701(1460) ack 1 win 257
++.0 > . 1:1(0) ack 65701 win 65535
++0.0001 < P. 65701:67161(1460) ack 1 win 257
++.0 > . 1:1(0) ack 67161 win 65535
+
+// nf_ct_proto_6: SEQ is under the lower bound (already ACKed data retransmitted) IN=tun0 OUT= MAC= SRC=192.0.2.1 DST=192.168.24.72 LEN=1500 TOS=0x00 PREC=0x00 TTL=255 ID=0 PROTO=TCP SPT=34375 DPT=8080 SEQ=1 ACK=4162510439 WINDOW=257 RES=0x00 ACK PSH URGP=0
++0.0001 < P. 1:1461(1460) ack 1 win 257
+
+// only sent if above packet isn't flagged as invalid
++.0 > . 1:1(0) ack 67161 win 65535
+
++0 `$xtables -D INPUT -m conntrack --ctstate INVALID -j DROP`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt
new file mode 100644
index 0000000000..dccdd4c009
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt
@@ -0,0 +1,62 @@
+// check RST packet that doesn't exactly match expected next sequence
+// number still transitions conntrack state to CLOSE iff its already in
+// FIN/CLOSE_WAIT.
+
+`packetdrill/common.sh`
+
+// 5.771921 server_ip > client_ip TLSv1.2 337 [Packet size limited during capture]
+// 5.771994 server_ip > client_ip TLSv1.2 337 [Packet size limited during capture]
+// 5.772212 client_ip > server_ip TCP 66 45020 > 443 [ACK] Seq=1905874048 Ack=781810658 Win=36352 Len=0 TSval=3317842872 TSecr=675936334
+// 5.787924 server_ip > client_ip TLSv1.2 1300 [Packet size limited during capture]
+// 5.788126 server_ip > client_ip TLSv1.2 90 Application Data
+// 5.788207 server_ip > client_ip TCP 66 443 > 45020 [FIN, ACK] Seq=781811916 Ack=1905874048 Win=31104 Len=0 TSval=675936350 TSecr=3317842872
+// 5.788447 client_ip > server_ip TLSv1.2 90 Application Data
+// 5.788479 client_ip > server_ip TCP 66 45020 > 443 [RST, ACK] Seq=1905874072 Ack=781811917 Win=39040 Len=0 TSval=3317842889 TSecr=675936350
+// 5.788581 server_ip > client_ip TCP 54 8443 > 45020 [RST] Seq=781811892 Win=0 Len=0
+
++0 `iptables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `iptables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
++0.1 < S. 1:1(0) ack 1 win 65535 <mss 1460>
+
++0 > . 1:1(0) ack 1 win 65535
++0 < . 1:1001(1000) ack 1 win 65535
++0 < . 1001:2001(1000) ack 1 win 65535
++0 < . 2001:3001(1000) ack 1 win 65535
+
++0 > . 1:1(0) ack 1001 win 65535
++0 > . 1:1(0) ack 2001 win 65535
++0 > . 1:1(0) ack 3001 win 65535
+
++0 write(3, ..., 1000) = 1000
+
++0.0 > P. 1:1001(1000) ack 3001 win 65535
+
++0.1 read(3, ..., 1000) = 1000
+
+// Conntrack should move to FIN_WAIT, then CLOSE_WAIT.
++0 < F. 3001:3001(0) ack 1001 win 65535
++0 > . 1001:1001(0) ack 3002 win 65535
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE_WAIT`
+
++1 close(3) = 0
+// RST: unread data. FIN was seen, hence ack + 1
++0 > R. 1001:1001(0) ack 3002 win 65535
+// ... and then, CLOSE.
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE\ `
+
+// Spurious RST from peer -- no sk state. Should NOT get
+// marked INVALID, because conntrack is already closing.
++0.1 < R 2001:2001(0) win 0
+
+// No packets should have been marked INVALID
++0 `iptables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"`
++0 `iptables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt
new file mode 100644
index 0000000000..686f18a3d9
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt
@@ -0,0 +1,59 @@
+// check that out of window resets are marked as INVALID and conntrack remains
+// in ESTABLISHED state.
+
+`packetdrill/common.sh`
+
++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
++0.1 < S. 1:1(0) ack 1 win 65535 <mss 1460>
+
++0 > . 1:1(0) ack 1 win 65535
++0 < . 1:1001(1000) ack 1 win 65535
++0 < . 1001:2001(1000) ack 1 win 65535
++0 < . 2001:3001(1000) ack 1 win 65535
+
++0 > . 1:1(0) ack 1001 win 65535
++0 > . 1:1(0) ack 2001 win 65535
++0 > . 1:1(0) ack 3001 win 65535
+
++0 write(3, ..., 1000) = 1000
+
+// out of window
++0.0 < R 0:0(0) win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
+// out of window
++0.0 < R 1000000:1000000(0) win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
+// in-window but not exact match
++0.0 < R 42:42(0) win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
++0.0 > P. 1:1001(1000) ack 3001 win 65535
+
++0.1 read(3, ..., 1000) = 1000
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
++0 < . 3001:3001(0) ack 1001 win 65535
+
++0.0 < R. 3000:3000(0) ack 1001 win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
+// exact next sequence
++0.0 < R. 3001:3001(0) ack 1001 win 0
+// Conntrack should move to CLOSE
+
+// Expect four invalid RSTs
++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 4 "`
++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE\ `
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
new file mode 100644
index 0000000000..3442cd29bc
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
@@ -0,0 +1,44 @@
+// Check connection re-use, i.e. peer that receives the SYN answers with
+// a challenge-ACK.
+// Check that conntrack lets all packets pass, including the challenge ack,
+// and that a new connection is established.
+
+`packetdrill/common.sh`
+
+// S >
+// . < (challnge-ack)
+// R. >
+// S >
+// S. <
+// Expected outcome: established connection.
+
++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
+// Challenge ACK, old incarnation.
+0.1 < . 145824453:145824453(0) ack 643160523 win 240 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0>
+
++0.01 > R 643160523:643160523(0) win 0
+
++0.01 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
+
+// Must go through.
++0.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
+// correct synack
++0.1 < S. 0:0(0) ack 1 win 250 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0>
+
+// 3whs completes.
++0.01 > . 1:1(0) ack 1 win 256 <nop,nop,TS val 1 ecr 1>
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ESTABLISHED | grep -q ASSURED`
+
+// No packets should have been marked INVALID
++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"`
++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt
new file mode 100644
index 0000000000..3047160c4b
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt
@@ -0,0 +1,51 @@
+// Check conntrack copes with syn/ack reply for a previous, old incarnation.
+
+// tcpdump with buggy sequence
+// 10.176.25.8.829 > 10.192.171.30.2049: Flags [S], seq 2375731741, win 29200, options [mss 1460,sackOK,TS val 2083107423 ecr 0,nop,wscale 7], length 0
+// OLD synack, for old/previous S
+// 10.192.171.30.2049 > 10.176.25.8.829: Flags [S.], seq 145824453, ack 643160523, win 65535, options [mss 8952,nop,wscale 5,TS val 3215437785 ecr 2082921663,nop,nop], length 0
+// This reset never makes it to the endpoint, elided in the packetdrill script
+// 10.192.171.30.2049 > 10.176.25.8.829: Flags [R.], seq 1, ack 1, win 65535, options [mss 8952,nop,wscale 5,TS val 3215443451 ecr 2082921663,nop,nop], length 0
+// Syn retransmit, no change
+// 10.176.25.8.829 > 10.192.171.30.2049: Flags [S], seq 2375731741, win 29200, options [mss 1460,sackOK,TS val 2083115583 ecr 0,nop,wscale 7], length 0
+// CORRECT synack, should be accepted, but conntrack classified this as INVALID:
+// SEQ is over the upper bound (over the window of the receiver) IN=tun0 OUT= MAC= SRC=192.0.2.1 DST=192.168.37.78 LEN=40 TOS=0x00 PREC=0x00 TTL=255 ID=0 PROTO=TCP SPT=8080 DPT=34500 SEQ=162602411 ACK=2124350315 ..
+// 10.192.171.30.2049 > 10.176.25.8.829: Flags [S.], seq 162602410, ack 2375731742, win 65535, options [mss 8952,nop,wscale 5,TS val 3215445754 ecr 2083115583,nop,nop], length 0
+
+`packetdrill/common.sh`
+
++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
+// bogus/outdated synack, invalid ack value
+0.1 < S. 145824453:145824453(0) ack 643160523 win 240 <mss 1440,nop,nop,TS val 1 ecr 1,nop,wscale 0>
+
+// syn retransmitted
+1.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1015 ecr 0,nop,wscale 8>
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
+
+// correct synack
++0 < S. 145758918:145758918(0) ack 1 win 250 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0>
++0 write(3, ..., 1) = 1
+
+// with buggy conntrack above packet is dropped, so SYN rtx is seen:
+// script packet: 1.054007 . 1:1(0) ack 16777958 win 256 <nop,nop,TS val 1033 ecr 1>
+// actual packet: 3.010000 S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1015 ecr 0,nop,wscale 8>
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ESTABLISHED | grep -q ASSURED`
+
++0 > P. 1:2(1) ack 4294901762 win 256 <nop,nop,TS val 1067 ecr 1>
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ASSURED | grep -q ESTABLISHED`
+
+// No packets should have been marked INVALID in OUTPUT direction, 1 in INPUT
++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 1 "`
+
++0 `$xtables -D INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -D OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt
new file mode 100644
index 0000000000..842242f8cc
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt
@@ -0,0 +1,34 @@
+// Check reception of another SYN while we have an established conntrack state.
+// Challenge ACK is supposed to pass through, RST reply should clear conntrack
+// state and SYN retransmit should give us new 'SYN_RECV' connection state.
+
+`packetdrill/common.sh`
+
+// should show a match if bug is present:
++0 `iptables -A INPUT -m conntrack --ctstate INVALID -p tcp --tcp-flags SYN,ACK SYN,ACK`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 10) = 0
+
++0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7, TS val 1 ecr 0,nop,nop>
++0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,TS val 100 ecr 1,nop,wscale 8>
++.01 < . 1:1(0) ack 1 win 257 <TS val 1 ecr 100,nop,nop>
++0 accept(3, ..., ...) = 4
+
++0 < P. 1:101(100) ack 1 win 257 <TS val 2 ecr 100,nop,nop>
++.001 > . 1:1(0) ack 101 win 256 <nop,nop,TS val 110 ecr 2>
++0 read(4, ..., 101) = 100
+
+1.0 < S 2000:2000(0) win 32792 <mss 1000,nop,wscale 7, TS val 233 ecr 0,nop,nop>
+// Won't expect this: challenge ack.
+
++0 > . 1:1(0) ack 101 win 256 <nop,nop,TS val 112 ecr 2>
++0 < R. 101:101(0) ack 1 win 257
++0 close(4) = 0
+
+1.5 < S 2000:2000(0) win 32792 <mss 1000,nop,wscale 0, TS val 233 ecr 0,nop,nop>
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep -q SYN_RECV`
++0 `iptables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"`
diff --git a/tools/testing/selftests/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh
index 5289c8447a..4485fd7675 100755
--- a/tools/testing/selftests/netfilter/rpath.sh
+++ b/tools/testing/selftests/net/netfilter/rpath.sh
@@ -64,12 +64,18 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
# firewall matches to test
[ -n "$iptables" ] && {
common='-t raw -A PREROUTING -s 192.168.0.0/16'
- ip netns exec "$ns2" "$iptables" $common -m rpfilter
+ if ! ip netns exec "$ns2" "$iptables" $common -m rpfilter;then
+ echo "Cannot add rpfilter rule"
+ exit $ksft_skip
+ fi
ip netns exec "$ns2" "$iptables" $common -m rpfilter --invert
}
[ -n "$ip6tables" ] && {
common='-t raw -A PREROUTING -s fec0::/16'
- ip netns exec "$ns2" "$ip6tables" $common -m rpfilter
+ if ! ip netns exec "$ns2" "$ip6tables" $common -m rpfilter;then
+ echo "Cannot add rpfilter rule"
+ exit $ksft_skip
+ fi
ip netns exec "$ns2" "$ip6tables" $common -m rpfilter --invert
}
[ -n "$nft" ] && ip netns exec "$ns2" $nft -f - <<EOF
diff --git a/tools/testing/selftests/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c
index 21bb1cfd8a..21bb1cfd8a 100644
--- a/tools/testing/selftests/netfilter/sctp_collision.c
+++ b/tools/testing/selftests/net/netfilter/sctp_collision.c
diff --git a/tools/testing/selftests/net/netfilter/settings b/tools/testing/selftests/net/netfilter/settings
new file mode 100644
index 0000000000..abc5648b59
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/settings
@@ -0,0 +1 @@
+timeout=1800
diff --git a/tools/testing/selftests/netfilter/xt_string.sh b/tools/testing/selftests/net/netfilter/xt_string.sh
index 1802653a47..8d401c69e3 100755
--- a/tools/testing/selftests/netfilter/xt_string.sh
+++ b/tools/testing/selftests/net/netfilter/xt_string.sh
@@ -5,53 +5,57 @@
ksft_skip=4
rc=0
-if ! iptables --version >/dev/null 2>&1; then
- echo "SKIP: Test needs iptables"
- exit $ksft_skip
-fi
-if ! ip -V >/dev/null 2>&1; then
- echo "SKIP: Test needs iproute2"
- exit $ksft_skip
-fi
-if ! nc -h >/dev/null 2>&1; then
- echo "SKIP: Test needs netcat"
- exit $ksft_skip
-fi
+source lib.sh
+
+checktool "socat -h" "run test without socat"
+checktool "iptables --version" "test needs iptables"
+
+infile=$(mktemp)
+
+cleanup()
+{
+ ip netns del "$netns"
+ rm -f "$infile"
+}
+
+trap cleanup EXIT
+
+setup_ns netns
+
+ip -net "$netns" link add d0 type dummy
+ip -net "$netns" link set d0 up
+ip -net "$netns" addr add 10.1.2.1/24 dev d0
pattern="foo bar baz"
patlen=11
hdrlen=$((20 + 8)) # IPv4 + UDP
-ns="ns-$(mktemp -u XXXXXXXX)"
-trap 'ip netns del $ns' EXIT
-ip netns add "$ns"
-ip -net "$ns" link add d0 type dummy
-ip -net "$ns" link set d0 up
-ip -net "$ns" addr add 10.1.2.1/24 dev d0
-
-#ip netns exec "$ns" tcpdump -npXi d0 &
+
+#ip netns exec "$netns" tcpdump -npXi d0 &
#tcpdump_pid=$!
-#trap 'kill $tcpdump_pid; ip netns del $ns' EXIT
+#trap 'kill $tcpdump_pid; ip netns del $netns' EXIT
add_rule() { # (alg, from, to)
- ip netns exec "$ns" \
+ ip netns exec "$netns" \
iptables -A OUTPUT -o d0 -m string \
- --string "$pattern" --algo $1 --from $2 --to $3
+ --string "$pattern" --algo "$1" --from "$2" --to "$3"
}
showrules() { # ()
- ip netns exec "$ns" iptables -v -S OUTPUT | grep '^-A'
+ ip netns exec "$netns" iptables -v -S OUTPUT | grep '^-A'
}
zerorules() {
- ip netns exec "$ns" iptables -Z OUTPUT
+ ip netns exec "$netns" iptables -Z OUTPUT
}
countrule() { # (pattern)
showrules | grep -c -- "$*"
}
send() { # (offset)
- ( for ((i = 0; i < $1 - $hdrlen; i++)); do
- printf " "
+ ( for ((i = 0; i < $1 - hdrlen; i++)); do
+ echo -n " "
done
- printf "$pattern"
- ) | ip netns exec "$ns" nc -w 1 -u 10.1.2.2 27374
+ echo -n "$pattern"
+ ) > "$infile"
+
+ ip netns exec "$netns" socat -t 1 -u STDIN UDP-SENDTO:10.1.2.2:27374 < "$infile"
}
add_rule bm 1000 1500
@@ -61,8 +65,8 @@ add_rule kmp 1400 1600
zerorules
send 0
-send $((1000 - $patlen))
-if [ $(countrule -c 0 0) -ne 4 ]; then
+send $((1000 - patlen))
+if [ "$(countrule -c 0 0)" -ne 4 ]; then
echo "FAIL: rules match data before --from"
showrules
((rc--))
@@ -70,16 +74,16 @@ fi
zerorules
send 1000
-send $((1400 - $patlen))
-if [ $(countrule -c 2) -ne 2 ]; then
+send $((1400 - patlen))
+if [ "$(countrule -c 2)" -ne 2 ]; then
echo "FAIL: only two rules should match at low offset"
showrules
((rc--))
fi
zerorules
-send $((1500 - $patlen))
-if [ $(countrule -c 1) -ne 4 ]; then
+send $((1500 - patlen))
+if [ "$(countrule -c 1)" -ne 4 ]; then
echo "FAIL: all rules should match at end of packet"
showrules
((rc--))
@@ -87,7 +91,7 @@ fi
zerorules
send 1495
-if [ $(countrule -c 1) -ne 1 ]; then
+if [ "$(countrule -c 1)" -ne 1 ]; then
echo "FAIL: only kmp with proper --to should match pattern spanning fragments"
showrules
((rc--))
@@ -95,23 +99,23 @@ fi
zerorules
send 1500
-if [ $(countrule -c 1) -ne 2 ]; then
+if [ "$(countrule -c 1)" -ne 2 ]; then
echo "FAIL: two rules should match pattern at start of second fragment"
showrules
((rc--))
fi
zerorules
-send $((1600 - $patlen))
-if [ $(countrule -c 1) -ne 2 ]; then
+send $((1600 - patlen))
+if [ "$(countrule -c 1)" -ne 2 ]; then
echo "FAIL: two rules should match pattern at end of largest --to"
showrules
((rc--))
fi
zerorules
-send $((1600 - $patlen + 1))
-if [ $(countrule -c 1) -ne 0 ]; then
+send $((1600 - patlen + 1))
+if [ "$(countrule -c 1)" -ne 0 ]; then
echo "FAIL: no rules should match pattern extending largest --to"
showrules
((rc--))
@@ -119,10 +123,11 @@ fi
zerorules
send 1600
-if [ $(countrule -c 1) -ne 0 ]; then
+if [ "$(countrule -c 1)" -ne 0 ]; then
echo "FAIL: no rule should match pattern past largest --to"
showrules
((rc--))
fi
+[ $rc -eq 0 ] && echo "PASS: string match tests"
exit $rc
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
new file mode 100755
index 0000000000..93d9d91452
--- /dev/null
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import time
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import ksft_eq, ksft_ge, ksft_busy_wait
+from lib.py import NetdevFamily, NetdevSimDev, ip
+
+
+def empty_check(nf) -> None:
+ devs = nf.dev_get({}, dump=True)
+ ksft_ge(len(devs), 1)
+
+
+def lo_check(nf) -> None:
+ lo_info = nf.dev_get({"ifindex": 1})
+ ksft_eq(len(lo_info['xdp-features']), 0)
+ ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0)
+
+
+def page_pool_check(nf) -> None:
+ with NetdevSimDev() as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ def up():
+ ip(f"link set dev {nsim.ifname} up")
+
+ def down():
+ ip(f"link set dev {nsim.ifname} down")
+
+ def get_pp():
+ pp_list = nf.page_pool_get({}, dump=True)
+ return [pp for pp in pp_list if pp.get("ifindex") == nsim.ifindex]
+
+ # No page pools when down
+ down()
+ ksft_eq(len(get_pp()), 0)
+
+ # Up, empty page pool appears
+ up()
+ pp_list = get_pp()
+ ksft_ge(len(pp_list), 0)
+ refs = sum([pp["inflight"] for pp in pp_list])
+ ksft_eq(refs, 0)
+
+ # Down, it disappears, again
+ down()
+ pp_list = get_pp()
+ ksft_eq(len(pp_list), 0)
+
+ # Up, allocate a page
+ up()
+ nsim.dfs_write("pp_hold", "y")
+ pp_list = nf.page_pool_get({}, dump=True)
+ refs = sum([pp["inflight"] for pp in pp_list if pp.get("ifindex") == nsim.ifindex])
+ ksft_ge(refs, 1)
+
+ # Now let's leak a page
+ down()
+ pp_list = get_pp()
+ ksft_eq(len(pp_list), 1)
+ refs = sum([pp["inflight"] for pp in pp_list])
+ ksft_eq(refs, 1)
+ attached = [pp for pp in pp_list if "detach-time" not in pp]
+ ksft_eq(len(attached), 0)
+
+ # New pp can get created, and we'll have two
+ up()
+ pp_list = get_pp()
+ attached = [pp for pp in pp_list if "detach-time" not in pp]
+ detached = [pp for pp in pp_list if "detach-time" in pp]
+ ksft_eq(len(attached), 1)
+ ksft_eq(len(detached), 1)
+
+ # Free the old page and the old pp is gone
+ nsim.dfs_write("pp_hold", "n")
+ # Freeing check is once a second so we may need to retry
+ ksft_busy_wait(lambda: len(get_pp()) == 1, deadline=2)
+
+ # And down...
+ down()
+ ksft_eq(len(get_pp()), 0)
+
+ # Last, leave the page hanging for destroy, nothing to check
+ # we're trying to exercise the orphaning path in the kernel
+ up()
+ nsim.dfs_write("pp_hold", "y")
+
+
+def main() -> None:
+ nf = NetdevFamily()
+ ksft_run([empty_check, lo_check, page_pool_check],
+ args=(nf, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 8b12071876..9f8dec2f65 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -489,7 +489,7 @@ class ovsactions(nla):
actstr, reason = parse_extract_field(
actstr,
"drop(",
- "([0-9]+)",
+ r"([0-9]+)",
lambda x: int(x, 0),
False,
None,
@@ -502,9 +502,9 @@ class ovsactions(nla):
actstr = actstr[len("drop"): ]
return (totallen - len(actstr))
- elif parse_starts_block(actstr, "^(\d+)", False, True):
+ elif parse_starts_block(actstr, r"^(\d+)", False, True):
actstr, output = parse_extract_field(
- actstr, None, "(\d+)", lambda x: int(x), False, "0"
+ actstr, None, r"(\d+)", lambda x: int(x), False, "0"
)
self["attrs"].append(["OVS_ACTION_ATTR_OUTPUT", output])
parsed = True
@@ -512,7 +512,7 @@ class ovsactions(nla):
actstr, recircid = parse_extract_field(
actstr,
"recirc(",
- "([0-9a-fA-Fx]+)",
+ r"([0-9a-fA-Fx]+)",
lambda x: int(x, 0),
False,
0,
@@ -588,17 +588,17 @@ class ovsactions(nla):
actstr = actstr[3:]
actstr, ip_block_min = parse_extract_field(
- actstr, "=", "([0-9a-fA-F\.]+)", str, False
+ actstr, "=", r"([0-9a-fA-F\.]+)", str, False
)
actstr, ip_block_max = parse_extract_field(
- actstr, "-", "([0-9a-fA-F\.]+)", str, False
+ actstr, "-", r"([0-9a-fA-F\.]+)", str, False
)
actstr, proto_min = parse_extract_field(
- actstr, ":", "(\d+)", int, False
+ actstr, ":", r"(\d+)", int, False
)
actstr, proto_max = parse_extract_field(
- actstr, "-", "(\d+)", int, False
+ actstr, "-", r"(\d+)", int, False
)
if t is not None:
diff --git a/tools/testing/selftests/bpf/progs/sample_map_ret0.c b/tools/testing/selftests/net/sample_map_ret0.bpf.c
index 495990d355..43ca925949 100644
--- a/tools/testing/selftests/bpf/progs/sample_map_ret0.c
+++ b/tools/testing/selftests/net/sample_map_ret0.bpf.c
@@ -17,7 +17,7 @@ struct {
} array SEC(".maps");
/* Sample program which should always load for testing control paths. */
-SEC(".text") int func()
+SEC("xdp") int func()
{
__u64 key64 = 0;
__u32 key = 0;
diff --git a/tools/testing/selftests/bpf/progs/sample_ret0.c b/tools/testing/selftests/net/sample_ret0.bpf.c
index fec99750d6..1df5ca98bb 100644
--- a/tools/testing/selftests/bpf/progs/sample_ret0.c
+++ b/tools/testing/selftests/net/sample_ret0.bpf.c
@@ -1,6 +1,9 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+#define SEC(name) __attribute__((section(name), used))
+
/* Sample program which should always load for testing control paths. */
+SEC("xdp")
int func()
{
return 0;
diff --git a/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh
new file mode 100755
index 0000000000..e23210aa54
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh
@@ -0,0 +1,335 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Jianguo Wu <wujianguo@chinatelecom.cn>
+#
+# Mostly copied from tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh.
+#
+# This script is designed for testing the support of netfilter hooks for
+# SRv6 End.DX4 behavior.
+#
+# Hereafter a network diagram is shown, where one tenants (named 100) offer
+# IPv4 L3 VPN services allowing hosts to communicate with each other across
+# an IPv6 network.
+#
+# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DX4 behavior.
+#
+# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-1 pings
+# the host hs-2.
+#
+# First of all, L2 reachability of the host hs-2 is taken into account by
+# the router rt-1 which acts as an arp proxy.
+#
+# When the host hs-1 sends an IPv4 packet destined to hs-2, the router rt-1
+# receives the packet on the internal veth-t100 interface, rt-1 contains the
+# SRv6 Encap route for encapsulating the IPv4 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and
+# routs the packet to the specified nexthop. Afterwards, the packet is sent to
+# the host hs-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a
+# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING
+# hooks.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-1 netns | | hs-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | 10.0.0.11/24 | +----------+ | | +----------+ | 10.0.0.22/24 | |
+# | +-------+-------+ | route | | | | route | +-------+-------- |
+# | | table | | | | table | |
+# | +----------+ | | +----------+ |
+# | +--------------+ | | +--------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | |
+# | +--------------+ | | +--------------+ |
+# | | | |
+# | rt-1 netns | | rt-2 netns |
+# | | | |
+# +-----------------------------------+ +-----------------------------------+
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table
+# +----------------------------------------------------------------+
+# |SID |Action |
+# +----------------------------------------------------------------+
+# |fc00:21:100::6004|apply SRv6 End.DX4 nh4 10.0.0.1 dev veth-t100 |
+# +----------------------------------------------------------------+
+#
+# rt-1: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table
+# +---------------------------------------------------------------+
+# |SID |Action |
+# +---------------------------------------------------------------+
+# |fc00:12:100::6004|apply SRv6 End.DX4 nh4 10.0.0.2 dev veth-t100|
+# +---------------------------------------------------------------+
+#
+# rt-2: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+readonly IPv6_RT_NETWORK=2001:11
+readonly IPv4_HS_NETWORK=10.0.0
+readonly SID_LOCATOR=fc00
+
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ # destroy routers rt-* and hosts hs-*
+ for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+ ip netns del ${ns} || true
+ done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_rt_netfilter()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1
+ ip netns exec ${nsname} iptables -t raw -A PREROUTING -m rpfilter --invert -j DROP
+}
+
+setup_hs()
+{
+ local hs=$1
+ local rt=$2
+ local tid=$3
+ local hsname=hs-${hs}
+ local rtname=rt-${rt}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns add ${hsname}
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.${rt}${hs}/24 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ local hssrc_name=hs-t${tid}-${hssrc}
+ local hsdst_name=hs-t${tid}-${hsdst}
+ local rtsrc_name=rt-${rtsrc}
+ local rtdst_name=rt-${rtdst}
+ local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \
+ via 2001:11::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \
+ encap seg6local action End.DX4 nh4 ${IPv4_HS_NETWORK}.${hsdst} dev veth-t${tid}
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_hs 1 1 100
+ setup_hs 2 2 100
+
+ # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+}
+
+check_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})"
+}
+
+host_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+router_netfilter_tests()
+{
+ log_section "SRv6 VPN connectivity test with netfilter enabled in routers"
+ setup_rt_netfilter 1
+ setup_rt_netfilter 2
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup &>/dev/null
+
+setup
+
+host_tests
+router_netfilter_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh
new file mode 100755
index 0000000000..9e69a2ed5b
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh
@@ -0,0 +1,340 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Jianguo Wu <wujianguo@chinatelecom.cn>
+#
+# Mostly copied from tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh.
+#
+# This script is designed for testing the support of netfilter hooks for
+# SRv6 End.DX4 behavior.
+#
+# Hereafter a network diagram is shown, where one tenants (named 100) offer
+# IPv6 L3 VPN services allowing hosts to communicate with each other across
+# an IPv6 network.
+#
+# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DX4 behavior.
+#
+# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-1 pings
+# the host hs-2.
+#
+# First of all, L2 reachability of the host hs-2 is taken into account by
+# the router rt-1 which acts as an arp proxy.
+#
+# When the host hs-1 sends an IPv6 packet destined to hs-2, the router rt-1
+# receives the packet on the internal veth-t100 interface, rt-1 contains the
+# SRv6 Encap route for encapsulating the IPv6 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and
+# routs the packet to the specified nexthop. Afterwards, the packet is sent to
+# the host hs-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a
+# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING
+# hooks.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-1 netns | | hs-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::1/64 | | | | cafe::2/64 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | cafe::11/64 | +----------+ | | +----------+ | cafe::22/64 | |
+# | +-------+-------+ | route | | | | route | +-------+-------- |
+# | | table | | | | table | |
+# | +----------+ | | +----------+ |
+# | +--------------+ | | +--------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | |
+# | +--------------+ | | +--------------+ |
+# | | | |
+# | rt-1 netns | | rt-2 netns |
+# | | | |
+# +-----------------------------------+ +-----------------------------------+
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table
+# +----------------------------------------------------------------+
+# |SID |Action |
+# +----------------------------------------------------------------+
+# |fc00:21:100::6004|apply SRv6 End.DX6 nh6 cafe::1 dev veth-t100 |
+# +----------------------------------------------------------------+
+#
+# rt-1: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::2 |apply seg6 encap segs fc00:12:100::6004|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table
+# +---------------------------------------------------------------+
+# |SID |Action |
+# +---------------------------------------------------------------+
+# |fc00:12:100::6004|apply SRv6 End.DX6 nh6 cafe::2 dev veth-t100 |
+# +---------------------------------------------------------------+
+#
+# rt-2: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::1 |apply seg6 encap segs fc00:21:100::6004|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+readonly IPv6_RT_NETWORK=2001:11
+readonly IPv6_HS_NETWORK=cafe
+readonly SID_LOCATOR=fc00
+
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ # destroy routers rt-* and hosts hs-*
+ for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+ ip netns del ${ns} || true
+ done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_rt_netfilter()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1
+ ip netns exec ${nsname} ip6tables -t raw -A PREROUTING -m rpfilter --invert -j DROP
+}
+
+setup_hs()
+{
+ local hs=$1
+ local rt=$2
+ local tid=$3
+ local hsname=hs-${hs}
+ local rtname=rt-${rt}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns add ${hsname}
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::${rt}${hs}/64 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ local hssrc_name=hs-t${tid}-${hssrc}
+ local hsdst_name=hs-t${tid}-${hsdst}
+ local rtsrc_name=rt-${rtsrc}
+ local rtdst_name=rt-${rtdst}
+ local rtveth=veth-t${tid}
+ local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004
+
+ ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \
+ via 2001:11::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \
+ encap seg6local action End.DX6 nh6 ${IPv6_HS_NETWORK}::${hsdst} dev veth-t${tid}
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_hs 1 1 100
+ setup_hs 2 2 100
+
+ # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+}
+
+check_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-${hssrc} ping -6 -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})"
+}
+
+host_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+router_netfilter_tests()
+{
+ log_section "SRv6 VPN connectivity test with netfilter enabled in routers"
+ setup_rt_netfilter 1
+ setup_rt_netfilter 2
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup &>/dev/null
+
+setup
+
+host_tests
+router_netfilter_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index 8802604148..11a1ebda56 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -7,7 +7,7 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.o"
+BPF_FILE="xdp_dummy.bpf.o"
# set global exit status, but never reset nonzero one.
check_err()
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index 7080eae531..c51ea90a13 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -7,7 +7,7 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.o"
+BPF_FILE="xdp_dummy.bpf.o"
cleanup() {
local -r jobs="$(jobs -p)"
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index e1ff645bd3..17404f49cd 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -7,7 +7,7 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.o"
+BPF_FILE="xdp_dummy.bpf.o"
cleanup() {
local -r jobs="$(jobs -p)"
@@ -42,8 +42,8 @@ run_one() {
ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp
tc -n "${PEER_NS}" qdisc add dev veth1 clsact
- tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.o section schedcls/ingress6/nat_6 direct-action
- tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.o section schedcls/egress4/snat4 direct-action
+ tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.bpf.o section schedcls/ingress6/nat_6 direct-action
+ tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action
echo ${rx_args}
ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
@@ -89,7 +89,7 @@ if [ ! -f ${BPF_FILE} ]; then
exit -1
fi
-if [ ! -f nat6to4.o ]; then
+if [ ! -f nat6to4.bpf.o ]; then
echo "Missing nat6to4 helper. Run 'make' first"
exit -1
fi
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index 83ed987cff..550d8eb3e2 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -3,7 +3,7 @@
source net_helper.sh
-BPF_FILE="xdp_dummy.o"
+BPF_FILE="xdp_dummy.bpf.o"
readonly BASE="ns-$(mktemp -u XXXXXX)"
readonly SRC=2
readonly DST=1
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 3a394b43e2..4f1edbafb9 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-BPF_FILE="xdp_dummy.o"
+BPF_FILE="xdp_dummy.bpf.o"
readonly STATS="$(mktemp -p /tmp ns-XXXXXX)"
readonly BASE=`basename $STATS`
readonly SRC=2
diff --git a/tools/testing/selftests/net/xdp_dummy.c b/tools/testing/selftests/net/xdp_dummy.bpf.c
index d988b2e0ce..d988b2e0ce 100644
--- a/tools/testing/selftests/net/xdp_dummy.c
+++ b/tools/testing/selftests/net/xdp_dummy.bpf.c
diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh
index 4577895306..3eeeeffb40 100755
--- a/tools/testing/selftests/net/xfrm_policy.sh
+++ b/tools/testing/selftests/net/xfrm_policy.sh
@@ -293,7 +293,7 @@ check_random_order()
local ns=$1
local log=$2
- for i in $(seq 100); do
+ for i in $(seq 50); do
ip -net $ns xfrm policy flush
for j in $(seq 0 16 255 | sort -R); do
ip -net $ns xfrm policy add dst $j.0.0.0/24 dir out priority 10 action allow
@@ -306,7 +306,7 @@ check_random_order()
done
done
- for i in $(seq 100); do
+ for i in $(seq 50); do
ip -net $ns xfrm policy flush
for j in $(seq 0 16 255 | sort -R); do
local addr=$(printf "e000:0000:%02x00::/56" $j)
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
deleted file mode 100644
index 936c3085bb..0000000000
--- a/tools/testing/selftests/netfilter/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# Makefile for netfilter selftests
-
-TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
- conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
- nft_concat_range.sh nft_conntrack_helper.sh \
- nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
- ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
- conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \
- conntrack_sctp_collision.sh xt_string.sh \
- bridge_netfilter.sh
-
-HOSTPKG_CONFIG := pkg-config
-
-CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
-LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
-
-TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision \
- conntrack_dump_flush
-
-include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/bridge_brouter.sh b/tools/testing/selftests/netfilter/bridge_brouter.sh
deleted file mode 100755
index 29f3955b9a..0000000000
--- a/tools/testing/selftests/netfilter/bridge_brouter.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-#
-# This test is for bridge 'brouting', i.e. make some packets being routed
-# rather than getting bridged even though they arrive on interface that is
-# part of a bridge.
-
-# eth0 br0 eth0
-# setup is: ns1 <-> ns0 <-> ns2
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-ebtables -V > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ebtables"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-ip netns add ns0
-ip netns add ns1
-ip netns add ns2
-
-ip link add veth0 netns ns0 type veth peer name eth0 netns ns1
-if [ $? -ne 0 ]; then
- echo "SKIP: Can't create veth device"
- exit $ksft_skip
-fi
-ip link add veth1 netns ns0 type veth peer name eth0 netns ns2
-
-ip -net ns0 link set lo up
-ip -net ns0 link set veth0 up
-ip -net ns0 link set veth1 up
-
-ip -net ns0 link add br0 type bridge
-if [ $? -ne 0 ]; then
- echo "SKIP: Can't create bridge br0"
- exit $ksft_skip
-fi
-
-ip -net ns0 link set veth0 master br0
-ip -net ns0 link set veth1 master br0
-ip -net ns0 link set br0 up
-ip -net ns0 addr add 10.0.0.1/24 dev br0
-
-# place both in same subnet, ns1 and ns2 connected via ns0:br0
-for i in 1 2; do
- ip -net ns$i link set lo up
- ip -net ns$i link set eth0 up
- ip -net ns$i addr add 10.0.0.1$i/24 dev eth0
-done
-
-test_ebtables_broute()
-{
- local cipt
-
- # redirect is needed so the dstmac is rewritten to the bridge itself,
- # ip stack won't process OTHERHOST (foreign unicast mac) packets.
- ip netns exec ns0 ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP
- if [ $? -ne 0 ]; then
- echo "SKIP: Could not add ebtables broute redirect rule"
- return $ksft_skip
- fi
-
- # ping netns1, expected to not work (ip forwarding is off)
- ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null 2>&1
- if [ $? -eq 0 ]; then
- echo "ERROR: ping works, should have failed" 1>&2
- return 1
- fi
-
- # enable forwarding on both interfaces.
- # neither needs an ip address, but at least the bridge needs
- # an ip address in same network segment as ns1 and ns2 (ns0
- # needs to be able to determine route for to-be-forwarded packet).
- ip netns exec ns0 sysctl -q net.ipv4.conf.veth0.forwarding=1
- ip netns exec ns0 sysctl -q net.ipv4.conf.veth1.forwarding=1
-
- sleep 1
-
- ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null
- if [ $? -ne 0 ]; then
- echo "ERROR: ping did not work, but it should (broute+forward)" 1>&2
- return 1
- fi
-
- echo "PASS: ns1/ns2 connectivity with active broute rule"
- ip netns exec ns0 ebtables -t broute -F
-
- # ping netns1, expected to work (frames are bridged)
- ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null
- if [ $? -ne 0 ]; then
- echo "ERROR: ping did not work, but it should (bridged)" 1>&2
- return 1
- fi
-
- ip netns exec ns0 ebtables -t filter -A FORWARD -p ipv4 --ip-protocol icmp -j DROP
-
- # ping netns1, expected to not work (DROP in bridge forward)
- ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null 2>&1
- if [ $? -eq 0 ]; then
- echo "ERROR: ping works, should have failed (icmp forward drop)" 1>&2
- return 1
- fi
-
- # re-activate brouter
- ip netns exec ns0 ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP
-
- ip netns exec ns2 ping -q -c 1 10.0.0.11 > /dev/null
- if [ $? -ne 0 ]; then
- echo "ERROR: ping did not work, but it should (broute+forward 2)" 1>&2
- return 1
- fi
-
- echo "PASS: ns1/ns2 connectivity with active broute rule and bridge forward drop"
- return 0
-}
-
-# test basic connectivity
-ip netns exec ns1 ping -c 1 -q 10.0.0.12 > /dev/null
-if [ $? -ne 0 ]; then
- echo "ERROR: Could not reach ns2 from ns1" 1>&2
- ret=1
-fi
-
-ip netns exec ns2 ping -c 1 -q 10.0.0.11 > /dev/null
-if [ $? -ne 0 ]; then
- echo "ERROR: Could not reach ns1 from ns2" 1>&2
- ret=1
-fi
-
-if [ $ret -eq 0 ];then
- echo "PASS: netns connectivity: ns1 and ns2 can reach each other"
-fi
-
-test_ebtables_broute
-ret=$?
-for i in 0 1 2; do ip netns del ns$i;done
-
-exit $ret
diff --git a/tools/testing/selftests/netfilter/bridge_netfilter.sh b/tools/testing/selftests/netfilter/bridge_netfilter.sh
deleted file mode 100644
index 659b3ab02c..0000000000
--- a/tools/testing/selftests/netfilter/bridge_netfilter.sh
+++ /dev/null
@@ -1,188 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Test bridge netfilter + conntrack, a combination that doesn't really work,
-# with multicast/broadcast packets racing for hash table insertion.
-
-# eth0 br0 eth0
-# setup is: ns1 <->,ns0 <-> ns3
-# ns2 <-' `'-> ns4
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-sfx=$(mktemp -u "XXXXXXXX")
-ns0="ns0-$sfx"
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-ns3="ns3-$sfx"
-ns4="ns4-$sfx"
-
-ebtables -V > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ebtables"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-for i in $(seq 0 4); do
- eval ip netns add \$ns$i
-done
-
-cleanup() {
- for i in $(seq 0 4); do eval ip netns del \$ns$i;done
-}
-
-trap cleanup EXIT
-
-do_ping()
-{
- fromns="$1"
- dstip="$2"
-
- ip netns exec $fromns ping -c 1 -q $dstip > /dev/null
- if [ $? -ne 0 ]; then
- echo "ERROR: ping from $fromns to $dstip"
- ip netns exec ${ns0} nft list ruleset
- ret=1
- fi
-}
-
-bcast_ping()
-{
- fromns="$1"
- dstip="$2"
-
- for i in $(seq 1 1000); do
- ip netns exec $fromns ping -q -f -b -c 1 -q $dstip > /dev/null 2>&1
- if [ $? -ne 0 ]; then
- echo "ERROR: ping -b from $fromns to $dstip"
- ip netns exec ${ns0} nft list ruleset
- fi
- done
-}
-
-ip link add veth1 netns ${ns0} type veth peer name eth0 netns ${ns1}
-if [ $? -ne 0 ]; then
- echo "SKIP: Can't create veth device"
- exit $ksft_skip
-fi
-
-ip link add veth2 netns ${ns0} type veth peer name eth0 netns $ns2
-ip link add veth3 netns ${ns0} type veth peer name eth0 netns $ns3
-ip link add veth4 netns ${ns0} type veth peer name eth0 netns $ns4
-
-ip -net ${ns0} link set lo up
-
-for i in $(seq 1 4); do
- ip -net ${ns0} link set veth$i up
-done
-
-ip -net ${ns0} link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1
-if [ $? -ne 0 ]; then
- echo "SKIP: Can't create bridge br0"
- exit $ksft_skip
-fi
-
-# make veth0,1,2 part of bridge.
-for i in $(seq 1 3); do
- ip -net ${ns0} link set veth$i master br0
-done
-
-# add a macvlan on top of the bridge.
-MACVLAN_ADDR=ba:f3:13:37:42:23
-ip -net ${ns0} link add link br0 name macvlan0 type macvlan mode private
-ip -net ${ns0} link set macvlan0 address ${MACVLAN_ADDR}
-ip -net ${ns0} link set macvlan0 up
-ip -net ${ns0} addr add 10.23.0.1/24 dev macvlan0
-
-# add a macvlan on top of veth4.
-MACVLAN_ADDR=ba:f3:13:37:42:24
-ip -net ${ns0} link add link veth4 name macvlan4 type macvlan mode vepa
-ip -net ${ns0} link set macvlan4 address ${MACVLAN_ADDR}
-ip -net ${ns0} link set macvlan4 up
-
-# make the macvlan part of the bridge.
-# veth4 is not a bridge port, only the macvlan on top of it.
-ip -net ${ns0} link set macvlan4 master br0
-
-ip -net ${ns0} link set br0 up
-ip -net ${ns0} addr add 10.0.0.1/24 dev br0
-ip netns exec ${ns0} sysctl -q net.bridge.bridge-nf-call-iptables=1
-ret=$?
-if [ $ret -ne 0 ] ; then
- echo "SKIP: bridge netfilter not available"
- ret=$ksft_skip
-fi
-
-# for testing, so namespaces will reply to ping -b probes.
-ip netns exec ${ns0} sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0
-
-# enable conntrack in ns0 and drop broadcast packets in forward to
-# avoid them from getting confirmed in the postrouting hook before
-# the cloned skb is passed up the stack.
-ip netns exec ${ns0} nft -f - <<EOF
-table ip filter {
- chain input {
- type filter hook input priority 1; policy accept
- iifname br0 counter
- ct state new accept
- }
-}
-
-table bridge filter {
- chain forward {
- type filter hook forward priority 0; policy accept
- meta pkttype broadcast ip protocol icmp counter drop
- }
-}
-EOF
-
-# place 1, 2 & 3 in same subnet, connected via ns0:br0.
-# ns4 is placed in same subnet as well, but its not
-# part of the bridge: the corresponding veth4 is not
-# part of the bridge, only its macvlan interface.
-for i in $(seq 1 4); do
- eval ip -net \$ns$i link set lo up
- eval ip -net \$ns$i link set eth0 up
-done
-for i in $(seq 1 2); do
- eval ip -net \$ns$i addr add 10.0.0.1$i/24 dev eth0
-done
-
-ip -net ${ns3} addr add 10.23.0.13/24 dev eth0
-ip -net ${ns4} addr add 10.23.0.14/24 dev eth0
-
-# test basic connectivity
-do_ping ${ns1} 10.0.0.12
-do_ping ${ns3} 10.23.0.1
-do_ping ${ns4} 10.23.0.1
-
-if [ $ret -eq 0 ];then
- echo "PASS: netns connectivity: ns1 can reach ns2, ns3 and ns4 can reach ns0"
-fi
-
-bcast_ping ${ns1} 10.0.0.255
-
-# This should deliver broadcast to macvlan0, which is on top of ns0:br0.
-bcast_ping ${ns3} 10.23.0.255
-
-# same, this time via veth4:macvlan4.
-bcast_ping ${ns4} 10.23.0.255
-
-read t < /proc/sys/kernel/tainted
-
-if [ $t -eq 0 ];then
- echo PASS: kernel not tainted
-else
- echo ERROR: kernel is tainted
- ret=1
-fi
-
-exit $ret
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
deleted file mode 100644
index 7c42b1b2c6..0000000000
--- a/tools/testing/selftests/netfilter/config
+++ /dev/null
@@ -1,9 +0,0 @@
-CONFIG_NET_NS=y
-CONFIG_NF_TABLES_INET=y
-CONFIG_NFT_QUEUE=m
-CONFIG_NFT_NAT=m
-CONFIG_NFT_REDIR=m
-CONFIG_NFT_MASQ=m
-CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_AUDIT=y
diff --git a/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh
deleted file mode 100755
index a924e595cf..0000000000
--- a/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Testing For SCTP COLLISION SCENARIO as Below:
-#
-# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359]
-# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187]
-# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359]
-# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO]
-# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK]
-# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970]
-#
-# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS
-
-CLIENT_NS=$(mktemp -u client-XXXXXXXX)
-CLIENT_IP="198.51.200.1"
-CLIENT_PORT=1234
-
-SERVER_NS=$(mktemp -u server-XXXXXXXX)
-SERVER_IP="198.51.100.1"
-SERVER_PORT=1234
-
-ROUTER_NS=$(mktemp -u router-XXXXXXXX)
-CLIENT_GW="198.51.200.2"
-SERVER_GW="198.51.100.2"
-
-# setup the topo
-setup() {
- ip net add $CLIENT_NS
- ip net add $SERVER_NS
- ip net add $ROUTER_NS
- ip -n $SERVER_NS link add link0 type veth peer name link1 netns $ROUTER_NS
- ip -n $CLIENT_NS link add link3 type veth peer name link2 netns $ROUTER_NS
-
- ip -n $SERVER_NS link set link0 up
- ip -n $SERVER_NS addr add $SERVER_IP/24 dev link0
- ip -n $SERVER_NS route add $CLIENT_IP dev link0 via $SERVER_GW
-
- ip -n $ROUTER_NS link set link1 up
- ip -n $ROUTER_NS link set link2 up
- ip -n $ROUTER_NS addr add $SERVER_GW/24 dev link1
- ip -n $ROUTER_NS addr add $CLIENT_GW/24 dev link2
- ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1
-
- ip -n $CLIENT_NS link set link3 up
- ip -n $CLIENT_NS addr add $CLIENT_IP/24 dev link3
- ip -n $CLIENT_NS route add $SERVER_IP dev link3 via $CLIENT_GW
-
- # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with
- # tc on $SERVER_NS side
- tc -n $SERVER_NS qdisc add dev link0 root handle 1: htb
- tc -n $SERVER_NS class add dev link0 parent 1: classid 1:1 htb rate 100mbit
- tc -n $SERVER_NS filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \
- 0xff match u8 2 0xff at 32 flowid 1:1
- tc -n $SERVER_NS qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms
-
- # simulate the ctstate check on OVS nf_conntrack
- ip net exec $ROUTER_NS iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP
- ip net exec $ROUTER_NS iptables -A INPUT -p sctp -j DROP
-
- # use a smaller number for assoc's max_retrans to reproduce the issue
- modprobe sctp
- ip net exec $CLIENT_NS sysctl -wq net.sctp.association_max_retrans=3
-}
-
-cleanup() {
- ip net exec $CLIENT_NS pkill sctp_collision 2>&1 >/dev/null
- ip net exec $SERVER_NS pkill sctp_collision 2>&1 >/dev/null
- ip net del "$CLIENT_NS"
- ip net del "$SERVER_NS"
- ip net del "$ROUTER_NS"
-}
-
-do_test() {
- ip net exec $SERVER_NS ./sctp_collision server \
- $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT &
- ip net exec $CLIENT_NS ./sctp_collision client \
- $CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT
-}
-
-# NOTE: one way to work around the issue is set a smaller hb_interval
-# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500
-
-# run the test case
-trap cleanup EXIT
-setup && \
-echo "Test for SCTP Collision in nf_conntrack:" && \
-do_test && echo "PASS!"
-exit $?
diff --git a/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh
deleted file mode 100755
index e7d7bf13cf..0000000000
--- a/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh
+++ /dev/null
@@ -1,167 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Check that UNREPLIED tcp conntrack will eventually timeout.
-#
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-waittime=20
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-cleanup() {
- ip netns pids $ns1 | xargs kill 2>/dev/null
- ip netns pids $ns2 | xargs kill 2>/dev/null
-
- ip netns del $ns1
- ip netns del $ns2
-}
-
-ipv4() {
- echo -n 192.168.$1.2
-}
-
-check_counter()
-{
- ns=$1
- name=$2
- expect=$3
- local lret=0
-
- cnt=$(ip netns exec $ns2 nft list counter inet filter "$name" | grep -q "$expect")
- if [ $? -ne 0 ]; then
- echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2
- ip netns exec $ns2 nft list counter inet filter "$name" 1>&2
- lret=1
- fi
-
- return $lret
-}
-
-# Create test namespaces
-ip netns add $ns1 || exit 1
-
-trap cleanup EXIT
-
-ip netns add $ns2 || exit 1
-
-# Connect the namespace to the host using a veth pair
-ip -net $ns1 link add name veth1 type veth peer name veth2
-ip -net $ns1 link set netns $ns2 dev veth2
-
-ip -net $ns1 link set up dev lo
-ip -net $ns2 link set up dev lo
-ip -net $ns1 link set up dev veth1
-ip -net $ns2 link set up dev veth2
-
-ip -net $ns2 addr add 10.11.11.2/24 dev veth2
-ip -net $ns2 route add default via 10.11.11.1
-
-ip netns exec $ns2 sysctl -q net.ipv4.conf.veth2.forwarding=1
-
-# add a rule inside NS so we enable conntrack
-ip netns exec $ns1 iptables -A INPUT -m state --state established,related -j ACCEPT
-
-ip -net $ns1 addr add 10.11.11.1/24 dev veth1
-ip -net $ns1 route add 10.99.99.99 via 10.11.11.2
-
-# Check connectivity works
-ip netns exec $ns1 ping -q -c 2 10.11.11.2 >/dev/null || exit 1
-
-ip netns exec $ns2 nc -l -p 8080 < /dev/null &
-
-# however, conntrack entries are there
-
-ip netns exec $ns2 nft -f - <<EOF
-table inet filter {
- counter connreq { }
- counter redir { }
- chain input {
- type filter hook input priority 0; policy accept;
- ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept
- ct state new ct status dnat tcp dport 8080 counter name "redir" accept
- }
-}
-EOF
-if [ $? -ne 0 ]; then
- echo "ERROR: Could not load nft rules"
- exit 1
-fi
-
-ip netns exec $ns2 sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10
-
-echo "INFO: connect $ns1 -> $ns2 to the virtual ip"
-ip netns exec $ns1 bash -c 'while true ; do
- nc -p 60000 10.99.99.99 80
- sleep 1
- done' &
-
-sleep 1
-
-ip netns exec $ns2 nft -f - <<EOF
-table inet nat {
- chain prerouting {
- type nat hook prerouting priority 0; policy accept;
- ip daddr 10.99.99.99 tcp dport 80 redirect to :8080
- }
-}
-EOF
-if [ $? -ne 0 ]; then
- echo "ERROR: Could not load nat redirect"
- exit 1
-fi
-
-count=$(ip netns exec $ns2 conntrack -L -p tcp --dport 80 2>/dev/null | wc -l)
-if [ $count -eq 0 ]; then
- echo "ERROR: $ns2 did not pick up tcp connection from peer"
- exit 1
-fi
-
-echo "INFO: NAT redirect added in ns $ns2, waiting for $waittime seconds for nat to take effect"
-for i in $(seq 1 $waittime); do
- echo -n "."
-
- sleep 1
-
- count=$(ip netns exec $ns2 conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l)
- if [ $count -gt 0 ]; then
- echo
- echo "PASS: redirection took effect after $i seconds"
- break
- fi
-
- m=$((i%20))
- if [ $m -eq 0 ]; then
- echo " waited for $i seconds"
- fi
-done
-
-expect="packets 1 bytes 60"
-check_counter "$ns2" "redir" "$expect"
-if [ $? -ne 0 ]; then
- ret=1
-fi
-
-if [ $ret -eq 0 ];then
- echo "PASS: redirection counter has expected values"
-else
- echo "ERROR: no tcp connection was redirected"
-fi
-
-exit $ret
diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh
deleted file mode 100755
index c3b8f90c49..0000000000
--- a/tools/testing/selftests/netfilter/ipvs.sh
+++ /dev/null
@@ -1,228 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# End-to-end ipvs test suite
-# Topology:
-#--------------------------------------------------------------+
-# | |
-# ns0 | ns1 |
-# ----------- | ----------- ----------- |
-# | veth01 | --------- | veth10 | | veth12 | |
-# ----------- peer ----------- ----------- |
-# | | | |
-# ----------- | | |
-# | br0 | |----------------- peer |--------------|
-# ----------- | | |
-# | | | |
-# ---------- peer ---------- ----------- |
-# | veth02 | --------- | veth20 | | veth21 | |
-# ---------- | ---------- ----------- |
-# | ns2 |
-# | |
-#--------------------------------------------------------------+
-#
-# We assume that all network driver are loaded
-#
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-GREEN='\033[0;92m'
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-readonly port=8080
-
-readonly vip_v4=207.175.44.110
-readonly cip_v4=10.0.0.2
-readonly gip_v4=10.0.0.1
-readonly dip_v4=172.16.0.1
-readonly rip_v4=172.16.0.2
-readonly sip_v4=10.0.0.3
-
-readonly infile="$(mktemp)"
-readonly outfile="$(mktemp)"
-readonly datalen=32
-
-sysipvsnet="/proc/sys/net/ipv4/vs/"
-if [ ! -d $sysipvsnet ]; then
- modprobe -q ip_vs
- if [ $? -ne 0 ]; then
- echo "skip: could not run test without ipvs module"
- exit $ksft_skip
- fi
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ]; then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-ipvsadm -v > /dev/null 2>&1
-if [ $? -ne 0 ]; then
- echo "SKIP: Could not run test without ipvsadm"
- exit $ksft_skip
-fi
-
-setup() {
- ip netns add ns0
- ip netns add ns1
- ip netns add ns2
-
- ip link add veth01 netns ns0 type veth peer name veth10 netns ns1
- ip link add veth02 netns ns0 type veth peer name veth20 netns ns2
- ip link add veth12 netns ns1 type veth peer name veth21 netns ns2
-
- ip netns exec ns0 ip link set veth01 up
- ip netns exec ns0 ip link set veth02 up
- ip netns exec ns0 ip link add br0 type bridge
- ip netns exec ns0 ip link set veth01 master br0
- ip netns exec ns0 ip link set veth02 master br0
- ip netns exec ns0 ip link set br0 up
- ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0
-
- ip netns exec ns1 ip link set lo up
- ip netns exec ns1 ip link set veth10 up
- ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10
- ip netns exec ns1 ip link set veth12 up
- ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12
-
- ip netns exec ns2 ip link set lo up
- ip netns exec ns2 ip link set veth21 up
- ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21
- ip netns exec ns2 ip link set veth20 up
- ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20
-
- sleep 1
-
- dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
-}
-
-cleanup() {
- for i in 0 1 2
- do
- ip netns del ns$i > /dev/null 2>&1
- done
-
- if [ -f "${outfile}" ]; then
- rm "${outfile}"
- fi
- if [ -f "${infile}" ]; then
- rm "${infile}"
- fi
-}
-
-server_listen() {
- ip netns exec ns2 nc -l -p 8080 > "${outfile}" &
- server_pid=$!
- sleep 0.2
-}
-
-client_connect() {
- ip netns exec ns0 timeout 2 nc -w 1 ${vip_v4} ${port} < "${infile}"
-}
-
-verify_data() {
- wait "${server_pid}"
- cmp "$infile" "$outfile" 2>/dev/null
-}
-
-test_service() {
- server_listen
- client_connect
- verify_data
-}
-
-
-test_dr() {
- ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
-
- ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
- ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
- ip netns exec ns1 ipvsadm -a -t ${vip_v4}:${port} -r ${rip_v4}:${port}
- ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
-
- # avoid incorrect arp response
- ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
- ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
- # avoid reverse route lookup
- ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
- ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
-
- test_service
-}
-
-test_nat() {
- ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
-
- ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
- ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
- ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port}
- ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
-
- ip netns exec ns2 ip link del veth20
- ip netns exec ns2 ip route add default via ${dip_v4} dev veth21
-
- test_service
-}
-
-test_tun() {
- ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
-
- ip netns exec ns1 modprobe ipip
- ip netns exec ns1 ip link set tunl0 up
- ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0
- ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0
- ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0
- ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
- ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port}
- ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
-
- ip netns exec ns2 modprobe ipip
- ip netns exec ns2 ip link set tunl0 up
- ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
- ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
- ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
- ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
- ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
-
- test_service
-}
-
-run_tests() {
- local errors=
-
- echo "Testing DR mode..."
- cleanup
- setup
- test_dr
- errors=$(( $errors + $? ))
-
- echo "Testing NAT mode..."
- cleanup
- setup
- test_nat
- errors=$(( $errors + $? ))
-
- echo "Testing Tunnel mode..."
- cleanup
- setup
- test_tun
- errors=$(( $errors + $? ))
-
- return $errors
-}
-
-trap cleanup EXIT
-
-run_tests
-
-if [ $? -ne 0 ]; then
- echo -e "$(basename $0): ${RED}FAIL${NC}"
- exit 1
-fi
-echo -e "$(basename $0): ${GREEN}PASS${NC}"
-exit 0
diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
deleted file mode 100755
index a1aa8f4a58..0000000000
--- a/tools/testing/selftests/netfilter/nf_nat_edemux.sh
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Test NAT source port clash resolution
-#
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-socatpid=0
-
-cleanup()
-{
- [ $socatpid -gt 0 ] && kill $socatpid
- ip netns del $ns1
- ip netns del $ns2
-}
-
-socat -h > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without socat"
- exit $ksft_skip
-fi
-
-iptables --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without iptables"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-ip netns add "$ns1"
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace $ns1"
- exit $ksft_skip
-fi
-
-trap cleanup EXIT
-
-ip netns add $ns2
-
-# Connect the namespaces using a veth pair
-ip link add name veth2 type veth peer name veth1
-ip link set netns $ns1 dev veth1
-ip link set netns $ns2 dev veth2
-
-ip netns exec $ns1 ip link set up dev lo
-ip netns exec $ns1 ip link set up dev veth1
-ip netns exec $ns1 ip addr add 192.168.1.1/24 dev veth1
-
-ip netns exec $ns2 ip link set up dev lo
-ip netns exec $ns2 ip link set up dev veth2
-ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2
-
-# Create a server in one namespace
-ip netns exec $ns1 socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 &
-socatpid=$!
-
-# Restrict source port to just one so we don't have to exhaust
-# all others.
-ip netns exec $ns2 sysctl -q net.ipv4.ip_local_port_range="10000 10000"
-
-# add a virtual IP using DNAT
-ip netns exec $ns2 iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
-
-# ... and route it to the other namespace
-ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1
-
-sleep 1
-
-# add a persistent connection from the other namespace
-ip netns exec $ns2 socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
-
-sleep 1
-
-# ip daddr:dport will be rewritten to 192.168.1.1 5201
-# NAT must reallocate source port 10000 because
-# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
-echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null
-ret=$?
-
-# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
-if [ $ret -eq 0 ]; then
- echo "PASS: socat can connect via NAT'd address"
-else
- echo "FAIL: socat cannot connect via NAT'd address"
-fi
-
-# check sport clashres.
-ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
-ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
-
-sleep 5 | ip netns exec $ns2 socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
-cpid1=$!
-sleep 1
-
-# if connect succeeds, client closes instantly due to EOF on stdin.
-# if connect hangs, it will time out after 5s.
-echo | ip netns exec $ns2 socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
-cpid2=$!
-
-time_then=$(date +%s)
-wait $cpid2
-rv=$?
-time_now=$(date +%s)
-
-# Check how much time has elapsed, expectation is for
-# 'cpid2' to connect and then exit (and no connect delay).
-delta=$((time_now - time_then))
-
-if [ $delta -lt 2 -a $rv -eq 0 ]; then
- echo "PASS: could connect to service via redirected ports"
-else
- echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
- ret=1
-fi
-
-exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
deleted file mode 100755
index faa7778d7b..0000000000
--- a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/bin/bash
-#
-# This tests connection tracking helper assignment:
-# 1. can attach ftp helper to a connection from nft ruleset.
-# 2. auto-assign still works.
-#
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-testipv6=1
-
-cleanup()
-{
- ip netns del ${ns1}
- ip netns del ${ns2}
-}
-
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-conntrack -V > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without conntrack tool"
- exit $ksft_skip
-fi
-
-which nc >/dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without netcat tool"
- exit $ksft_skip
-fi
-
-trap cleanup EXIT
-
-ip netns add ${ns1}
-ip netns add ${ns2}
-
-ip link add veth0 netns ${ns1} type veth peer name veth0 netns ${ns2} > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: No virtual ethernet pair device support in kernel"
- exit $ksft_skip
-fi
-
-ip -net ${ns1} link set lo up
-ip -net ${ns1} link set veth0 up
-
-ip -net ${ns2} link set lo up
-ip -net ${ns2} link set veth0 up
-
-ip -net ${ns1} addr add 10.0.1.1/24 dev veth0
-ip -net ${ns1} addr add dead:1::1/64 dev veth0
-
-ip -net ${ns2} addr add 10.0.1.2/24 dev veth0
-ip -net ${ns2} addr add dead:1::2/64 dev veth0
-
-load_ruleset_family() {
- local family=$1
- local ns=$2
-
-ip netns exec ${ns} nft -f - <<EOF
-table $family raw {
- ct helper ftp {
- type "ftp" protocol tcp
- }
- chain pre {
- type filter hook prerouting priority 0; policy accept;
- tcp dport 2121 ct helper set "ftp"
- }
- chain output {
- type filter hook output priority 0; policy accept;
- tcp dport 2121 ct helper set "ftp"
- }
-}
-EOF
- return $?
-}
-
-check_for_helper()
-{
- local netns=$1
- local message=$2
- local port=$3
-
- if echo $message |grep -q 'ipv6';then
- local family="ipv6"
- else
- local family="ipv4"
- fi
-
- ip netns exec ${netns} conntrack -L -f $family -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp'
- if [ $? -ne 0 ] ; then
- if [ $autoassign -eq 0 ] ;then
- echo "FAIL: ${netns} did not show attached helper $message" 1>&2
- ret=1
- else
- echo "PASS: ${netns} did not show attached helper $message" 1>&2
- fi
- else
- if [ $autoassign -eq 0 ] ;then
- echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2
- else
- echo "FAIL: ${netns} connection on port $port has ftp helper attached" 1>&2
- ret=1
- fi
- fi
-
- return 0
-}
-
-test_helper()
-{
- local port=$1
- local autoassign=$2
-
- if [ $autoassign -eq 0 ] ;then
- msg="set via ruleset"
- else
- msg="auto-assign"
- fi
-
- sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null &
-
- sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null &
- sleep 1
-
- check_for_helper "$ns1" "ip $msg" $port $autoassign
- check_for_helper "$ns2" "ip $msg" $port $autoassign
-
- wait
-
- if [ $testipv6 -eq 0 ] ;then
- return 0
- fi
-
- ip netns exec ${ns1} conntrack -F 2> /dev/null
- ip netns exec ${ns2} conntrack -F 2> /dev/null
-
- sleep 3 | ip netns exec ${ns2} nc -w 2 -6 -l -p $port > /dev/null &
-
- sleep 1 | ip netns exec ${ns1} nc -w 2 -6 dead:1::2 $port > /dev/null &
- sleep 1
-
- check_for_helper "$ns1" "ipv6 $msg" $port
- check_for_helper "$ns2" "ipv6 $msg" $port
-
- wait
-}
-
-load_ruleset_family ip ${ns1}
-if [ $? -ne 0 ];then
- echo "FAIL: ${ns1} cannot load ip ruleset" 1>&2
- exit 1
-fi
-
-load_ruleset_family ip6 ${ns1}
-if [ $? -ne 0 ];then
- echo "SKIP: ${ns1} cannot load ip6 ruleset" 1>&2
- testipv6=0
-fi
-
-load_ruleset_family inet ${ns2}
-if [ $? -ne 0 ];then
- echo "SKIP: ${ns1} cannot load inet ruleset" 1>&2
- load_ruleset_family ip ${ns2}
- if [ $? -ne 0 ];then
- echo "FAIL: ${ns2} cannot load ip ruleset" 1>&2
- exit 1
- fi
-
- if [ $testipv6 -eq 1 ] ;then
- load_ruleset_family ip6 ${ns2}
- if [ $? -ne 0 ];then
- echo "FAIL: ${ns2} cannot load ip6 ruleset" 1>&2
- exit 1
- fi
- fi
-fi
-
-test_helper 2121 0
-ip netns exec ${ns1} sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
-ip netns exec ${ns2} sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
-test_helper 21 1
-
-exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
deleted file mode 100755
index dff476e45e..0000000000
--- a/tools/testing/selftests/netfilter/nft_fib.sh
+++ /dev/null
@@ -1,273 +0,0 @@
-#!/bin/bash
-#
-# This tests the fib expression.
-#
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-nsrouter="nsrouter-$sfx"
-timeout=4
-
-log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
-
-cleanup()
-{
- ip netns del ${ns1}
- ip netns del ${ns2}
- ip netns del ${nsrouter}
-
- [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
-}
-
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-ip netns add ${nsrouter}
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace"
- exit $ksft_skip
-fi
-
-trap cleanup EXIT
-
-dmesg | grep -q ' nft_rpfilter: '
-if [ $? -eq 0 ]; then
- dmesg -c | grep ' nft_rpfilter: '
- echo "WARN: a previous test run has failed" 1>&2
-fi
-
-sysctl -q net.netfilter.nf_log_all_netns=1
-ip netns add ${ns1}
-ip netns add ${ns2}
-
-load_ruleset() {
- local netns=$1
-
-ip netns exec ${netns} nft -f /dev/stdin <<EOF
-table inet filter {
- chain prerouting {
- type filter hook prerouting priority 0; policy accept;
- fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
- }
-}
-EOF
-}
-
-load_pbr_ruleset() {
- local netns=$1
-
-ip netns exec ${netns} nft -f /dev/stdin <<EOF
-table inet filter {
- chain forward {
- type filter hook forward priority raw;
- fib saddr . iif oif gt 0 accept
- log drop
- }
-}
-EOF
-}
-
-load_ruleset_count() {
- local netns=$1
-
-ip netns exec ${netns} nft -f /dev/stdin <<EOF
-table inet filter {
- chain prerouting {
- type filter hook prerouting priority 0; policy accept;
- ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop
- ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop
- }
-}
-EOF
-}
-
-check_drops() {
- dmesg | grep -q ' nft_rpfilter: '
- if [ $? -eq 0 ]; then
- dmesg | grep ' nft_rpfilter: '
- echo "FAIL: rpfilter did drop packets"
- return 1
- fi
-
- return 0
-}
-
-check_fib_counter() {
- local want=$1
- local ns=$2
- local address=$3
-
- line=$(ip netns exec ${ns} nft list table inet filter | grep 'fib saddr . iif' | grep $address | grep "packets $want" )
- ret=$?
-
- if [ $ret -ne 0 ];then
- echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2
- ip netns exec ${ns} nft list table inet filter
- return 1
- fi
-
- if [ $want -gt 0 ]; then
- echo "PASS: fib expression did drop packets for $address"
- fi
-
- return 0
-}
-
-load_ruleset ${nsrouter}
-load_ruleset ${ns1}
-load_ruleset ${ns2}
-
-ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: No virtual ethernet pair device support in kernel"
- exit $ksft_skip
-fi
-ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
-
-ip -net ${nsrouter} link set lo up
-ip -net ${nsrouter} link set veth0 up
-ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
-ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
-
-ip -net ${nsrouter} link set veth1 up
-ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
-ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
-
-ip -net ${ns1} link set lo up
-ip -net ${ns1} link set eth0 up
-
-ip -net ${ns2} link set lo up
-ip -net ${ns2} link set eth0 up
-
-ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
-ip -net ${ns1} addr add dead:1::99/64 dev eth0
-ip -net ${ns1} route add default via 10.0.1.1
-ip -net ${ns1} route add default via dead:1::1
-
-ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
-ip -net ${ns2} addr add dead:2::99/64 dev eth0
-ip -net ${ns2} route add default via 10.0.2.1
-ip -net ${ns2} route add default via dead:2::1
-
-test_ping() {
- local daddr4=$1
- local daddr6=$2
-
- ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null
- ret=$?
- if [ $ret -ne 0 ];then
- check_drops
- echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2
- return 1
- fi
-
- ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null
- ret=$?
- if [ $ret -ne 0 ];then
- check_drops
- echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2
- return 1
- fi
-
- return 0
-}
-
-ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
-ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
-ip netns exec ${nsrouter} sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
-ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
-
-sleep 3
-
-test_ping 10.0.2.1 dead:2::1 || exit 1
-check_drops || exit 1
-
-test_ping 10.0.2.99 dead:2::99 || exit 1
-check_drops || exit 1
-
-echo "PASS: fib expression did not cause unwanted packet drops"
-
-ip netns exec ${nsrouter} nft flush table inet filter
-
-ip -net ${ns1} route del default
-ip -net ${ns1} -6 route del default
-
-ip -net ${ns1} addr del 10.0.1.99/24 dev eth0
-ip -net ${ns1} addr del dead:1::99/64 dev eth0
-
-ip -net ${ns1} addr add 10.0.2.99/24 dev eth0
-ip -net ${ns1} addr add dead:2::99/64 dev eth0
-
-ip -net ${ns1} route add default via 10.0.2.1
-ip -net ${ns1} -6 route add default via dead:2::1
-
-ip -net ${nsrouter} addr add dead:2::1/64 dev veth0
-
-# switch to ruleset that doesn't log, this time
-# its expected that this does drop the packets.
-load_ruleset_count ${nsrouter}
-
-# ns1 has a default route, but nsrouter does not.
-# must not check return value, ping to 1.1.1.1 will
-# fail.
-check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1
-check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1
-
-ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null
-check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1
-
-sleep 2
-ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
-check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
-
-# delete all rules
-ip netns exec ${ns1} nft flush ruleset
-ip netns exec ${ns2} nft flush ruleset
-ip netns exec ${nsrouter} nft flush ruleset
-
-ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
-ip -net ${ns1} addr add dead:1::99/64 dev eth0
-
-ip -net ${ns1} addr del 10.0.2.99/24 dev eth0
-ip -net ${ns1} addr del dead:2::99/64 dev eth0
-
-ip -net ${nsrouter} addr del dead:2::1/64 dev veth0
-
-# ... pbr ruleset for the router, check iif+oif.
-load_pbr_ruleset ${nsrouter}
-if [ $? -ne 0 ] ; then
- echo "SKIP: Could not load fib forward ruleset"
- exit $ksft_skip
-fi
-
-ip -net ${nsrouter} rule add from all table 128
-ip -net ${nsrouter} rule add from all iif veth0 table 129
-ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0
-ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1
-
-# drop main ipv4 table
-ip -net ${nsrouter} -4 rule delete table main
-
-test_ping 10.0.2.99 dead:2::99
-if [ $? -ne 0 ] ; then
- ip -net ${nsrouter} nft list ruleset
- echo "FAIL: fib mismatch in pbr setup"
- exit 1
-fi
-
-echo "PASS: fib expression forward check with policy based routing"
-exit 0
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
deleted file mode 100755
index e127297533..0000000000
--- a/tools/testing/selftests/netfilter/nft_queue.sh
+++ /dev/null
@@ -1,449 +0,0 @@
-#!/bin/bash
-#
-# This tests nf_queue:
-# 1. can process packets from all hooks
-# 2. support running nfqueue from more than one base chain
-#
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-nsrouter="nsrouter-$sfx"
-timeout=4
-
-cleanup()
-{
- ip netns pids ${ns1} | xargs kill 2>/dev/null
- ip netns pids ${ns2} | xargs kill 2>/dev/null
- ip netns pids ${nsrouter} | xargs kill 2>/dev/null
-
- ip netns del ${ns1}
- ip netns del ${ns2}
- ip netns del ${nsrouter}
- rm -f "$TMPFILE0"
- rm -f "$TMPFILE1"
- rm -f "$TMPFILE2" "$TMPFILE3"
-}
-
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-ip netns add ${nsrouter}
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace"
- exit $ksft_skip
-fi
-
-TMPFILE0=$(mktemp)
-TMPFILE1=$(mktemp)
-TMPFILE2=$(mktemp)
-TMPFILE3=$(mktemp)
-trap cleanup EXIT
-
-ip netns add ${ns1}
-ip netns add ${ns2}
-
-ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: No virtual ethernet pair device support in kernel"
- exit $ksft_skip
-fi
-ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
-
-ip -net ${nsrouter} link set lo up
-ip -net ${nsrouter} link set veth0 up
-ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
-ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
-
-ip -net ${nsrouter} link set veth1 up
-ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
-ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
-
-ip -net ${ns1} link set lo up
-ip -net ${ns1} link set eth0 up
-
-ip -net ${ns2} link set lo up
-ip -net ${ns2} link set eth0 up
-
-ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
-ip -net ${ns1} addr add dead:1::99/64 dev eth0
-ip -net ${ns1} route add default via 10.0.1.1
-ip -net ${ns1} route add default via dead:1::1
-
-ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
-ip -net ${ns2} addr add dead:2::99/64 dev eth0
-ip -net ${ns2} route add default via 10.0.2.1
-ip -net ${ns2} route add default via dead:2::1
-
-load_ruleset() {
- local name=$1
- local prio=$2
-
-ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
-table inet $name {
- chain nfq {
- ip protocol icmp queue bypass
- icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass
- }
- chain pre {
- type filter hook prerouting priority $prio; policy accept;
- jump nfq
- }
- chain input {
- type filter hook input priority $prio; policy accept;
- jump nfq
- }
- chain forward {
- type filter hook forward priority $prio; policy accept;
- tcp dport 12345 queue num 2
- jump nfq
- }
- chain output {
- type filter hook output priority $prio; policy accept;
- tcp dport 12345 queue num 3
- tcp sport 23456 queue num 3
- jump nfq
- }
- chain post {
- type filter hook postrouting priority $prio; policy accept;
- jump nfq
- }
-}
-EOF
-}
-
-load_counter_ruleset() {
- local prio=$1
-
-ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
-table inet countrules {
- chain pre {
- type filter hook prerouting priority $prio; policy accept;
- counter
- }
- chain input {
- type filter hook input priority $prio; policy accept;
- counter
- }
- chain forward {
- type filter hook forward priority $prio; policy accept;
- counter
- }
- chain output {
- type filter hook output priority $prio; policy accept;
- counter
- }
- chain post {
- type filter hook postrouting priority $prio; policy accept;
- counter
- }
-}
-EOF
-}
-
-test_ping() {
- ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
- if [ $? -ne 0 ];then
- return 1
- fi
-
- ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
- if [ $? -ne 0 ];then
- return 1
- fi
-
- return 0
-}
-
-test_ping_router() {
- ip netns exec ${ns1} ping -c 1 -q 10.0.2.1 > /dev/null
- if [ $? -ne 0 ];then
- return 1
- fi
-
- ip netns exec ${ns1} ping -c 1 -q dead:2::1 > /dev/null
- if [ $? -ne 0 ];then
- return 1
- fi
-
- return 0
-}
-
-test_queue_blackhole() {
- local proto=$1
-
-ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
-table $proto blackh {
- chain forward {
- type filter hook forward priority 0; policy accept;
- queue num 600
- }
-}
-EOF
- if [ $proto = "ip" ] ;then
- ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
- lret=$?
- elif [ $proto = "ip6" ]; then
- ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null
- lret=$?
- else
- lret=111
- fi
-
- # queue without bypass keyword should drop traffic if no listener exists.
- if [ $lret -eq 0 ];then
- echo "FAIL: $proto expected failure, got $lret" 1>&2
- exit 1
- fi
-
- ip netns exec ${nsrouter} nft delete table $proto blackh
- if [ $? -ne 0 ] ;then
- echo "FAIL: $proto: Could not delete blackh table"
- exit 1
- fi
-
- echo "PASS: $proto: statement with no listener results in packet drop"
-}
-
-test_queue()
-{
- local expected=$1
- local last=""
-
- # spawn nf-queue listeners
- ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" &
- ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" &
- sleep 1
- test_ping
- ret=$?
- if [ $ret -ne 0 ];then
- echo "FAIL: netns routing/connectivity with active listener on queue $queue: $ret" 1>&2
- exit $ret
- fi
-
- test_ping_router
- ret=$?
- if [ $ret -ne 0 ];then
- echo "FAIL: netns router unreachable listener on queue $queue: $ret" 1>&2
- exit $ret
- fi
-
- wait
- ret=$?
-
- for file in $TMPFILE0 $TMPFILE1; do
- last=$(tail -n1 "$file")
- if [ x"$last" != x"$expected packets total" ]; then
- echo "FAIL: Expected $expected packets total, but got $last" 1>&2
- cat "$file" 1>&2
-
- ip netns exec ${nsrouter} nft list ruleset
- exit 1
- fi
- done
-
- echo "PASS: Expected and received $last"
-}
-
-test_tcp_forward()
-{
- ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout &
- local nfqpid=$!
-
- tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
- ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
- local rpid=$!
-
- sleep 1
- ip netns exec ${ns1} nc -w 5 10.0.2.99 12345 <"$tmpfile" >/dev/null &
-
- rm -f "$tmpfile"
-
- wait $rpid
- wait $lpid
- [ $? -eq 0 ] && echo "PASS: tcp and nfqueue in forward chain"
-}
-
-test_tcp_localhost()
-{
- tmpfile=$(mktemp) || exit 1
-
- dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
- ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
- local rpid=$!
-
- ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
- local nfqpid=$!
-
- sleep 1
- ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
- rm -f "$tmpfile"
-
- wait $rpid
- [ $? -eq 0 ] && echo "PASS: tcp via loopback"
- wait 2>/dev/null
-}
-
-test_tcp_localhost_connectclose()
-{
- tmpfile=$(mktemp) || exit 1
-
- ip netns exec ${nsrouter} ./connect_close -p 23456 -t $timeout &
-
- ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
- local nfqpid=$!
-
- sleep 1
- rm -f "$tmpfile"
-
- wait $rpid
- [ $? -eq 0 ] && echo "PASS: tcp via loopback with connect/close"
- wait 2>/dev/null
-}
-
-test_tcp_localhost_requeue()
-{
-ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
-flush ruleset
-table inet filter {
- chain output {
- type filter hook output priority 0; policy accept;
- tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
- }
- chain post {
- type filter hook postrouting priority 0; policy accept;
- tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
- }
-}
-EOF
- tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
- ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
- local rpid=$!
-
- ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" &
-
- # nfqueue 1 will be called via output hook. But this time,
- # re-queue the packet to nfqueue program on queue 2.
- ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" &
-
- sleep 1
- ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
- rm -f "$tmpfile"
-
- wait
-
- if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
- echo "FAIL: lost packets during requeue?!" 1>&2
- return
- fi
-
- echo "PASS: tcp via loopback and re-queueing"
-}
-
-test_icmp_vrf() {
- ip -net $ns1 link add tvrf type vrf table 9876
- if [ $? -ne 0 ];then
- echo "SKIP: Could not add vrf device"
- return
- fi
-
- ip -net $ns1 li set eth0 master tvrf
- ip -net $ns1 li set tvrf up
-
- ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
-ip netns exec ${ns1} nft -f /dev/stdin <<EOF
-flush ruleset
-table inet filter {
- chain output {
- type filter hook output priority 0; policy accept;
- meta oifname "tvrf" icmp type echo-request counter queue num 1
- meta oifname "eth0" icmp type echo-request counter queue num 1
- }
- chain post {
- type filter hook postrouting priority 0; policy accept;
- meta oifname "tvrf" icmp type echo-request counter queue num 1
- meta oifname "eth0" icmp type echo-request counter queue num 1
- }
-}
-EOF
- ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout &
- local nfqpid=$!
-
- sleep 1
- ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
-
- for n in output post; do
- for d in tvrf eth0; do
- ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"
- if [ $? -ne 0 ] ; then
- echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
- ip netns exec ${ns1} nft list ruleset
- ret=1
- return
- fi
- done
- done
-
- wait $nfqpid
- [ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf"
- wait 2>/dev/null
-}
-
-ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
-ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
-
-load_ruleset "filter" 0
-
-sleep 3
-
-test_ping
-ret=$?
-if [ $ret -eq 0 ];then
- # queue bypass works (rules were skipped, no listener)
- echo "PASS: ${ns1} can reach ${ns2}"
-else
- echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
- exit $ret
-fi
-
-test_queue_blackhole ip
-test_queue_blackhole ip6
-
-# dummy ruleset to add base chains between the
-# queueing rules. We don't want the second reinject
-# to re-execute the old hooks.
-load_counter_ruleset 10
-
-# we are hooking all: prerouting/input/forward/output/postrouting.
-# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so:
-# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply).
-# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply.
-# so we expect that userspace program receives 10 packets.
-test_queue 10
-
-# same. We queue to a second program as well.
-load_ruleset "filter2" 20
-test_queue 20
-
-test_tcp_forward
-test_tcp_localhost
-test_tcp_localhost_connectclose
-test_tcp_localhost_requeue
-test_icmp_vrf
-
-exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_synproxy.sh b/tools/testing/selftests/netfilter/nft_synproxy.sh
deleted file mode 100755
index b62933b680..0000000000
--- a/tools/testing/selftests/netfilter/nft_synproxy.sh
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-rnd=$(mktemp -u XXXXXXXX)
-nsr="nsr-$rnd" # synproxy machine
-ns1="ns1-$rnd" # iperf client
-ns2="ns2-$rnd" # iperf server
-
-checktool (){
- if ! $1 > /dev/null 2>&1; then
- echo "SKIP: Could not $2"
- exit $ksft_skip
- fi
-}
-
-checktool "nft --version" "run test without nft tool"
-checktool "ip -Version" "run test without ip tool"
-checktool "iperf3 --version" "run test without iperf3"
-checktool "ip netns add $nsr" "create net namespace"
-
-modprobe -q nf_conntrack
-
-ip netns add $ns1
-ip netns add $ns2
-
-cleanup() {
- ip netns pids $ns1 | xargs kill 2>/dev/null
- ip netns pids $ns2 | xargs kill 2>/dev/null
- ip netns del $ns1
- ip netns del $ns2
-
- ip netns del $nsr
-}
-
-trap cleanup EXIT
-
-ip link add veth0 netns $nsr type veth peer name eth0 netns $ns1
-ip link add veth1 netns $nsr type veth peer name eth0 netns $ns2
-
-for dev in lo veth0 veth1; do
-ip -net $nsr link set $dev up
-done
-
-ip -net $nsr addr add 10.0.1.1/24 dev veth0
-ip -net $nsr addr add 10.0.2.1/24 dev veth1
-
-ip netns exec $nsr sysctl -q net.ipv4.conf.veth0.forwarding=1
-ip netns exec $nsr sysctl -q net.ipv4.conf.veth1.forwarding=1
-ip netns exec $nsr sysctl -q net.netfilter.nf_conntrack_tcp_loose=0
-
-for n in $ns1 $ns2; do
- ip -net $n link set lo up
- ip -net $n link set eth0 up
-done
-ip -net $ns1 addr add 10.0.1.99/24 dev eth0
-ip -net $ns2 addr add 10.0.2.99/24 dev eth0
-ip -net $ns1 route add default via 10.0.1.1
-ip -net $ns2 route add default via 10.0.2.1
-
-# test basic connectivity
-if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
- echo "ERROR: $ns1 cannot reach $ns2" 1>&2
- exit 1
-fi
-
-if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
- echo "ERROR: $ns2 cannot reach $ns1" 1>&2
- exit 1
-fi
-
-ip netns exec $ns2 iperf3 -s > /dev/null 2>&1 &
-# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 &
-
-sleep 1
-
-ip netns exec $nsr nft -f - <<EOF
-table inet filter {
- chain prerouting {
- type filter hook prerouting priority -300; policy accept;
- meta iif veth0 tcp flags syn counter notrack
- }
-
- chain forward {
- type filter hook forward priority 0; policy accept;
-
- ct state new,established counter accept
-
- meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp
-
- ct state invalid counter drop
-
- # make ns2 unreachable w.o. tcp synproxy
- tcp flags syn counter drop
- }
-}
-EOF
-if [ $? -ne 0 ]; then
- echo "SKIP: Cannot add nft synproxy"
- exit $ksft_skip
-fi
-
-ip netns exec $ns1 timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null
-
-if [ $? -ne 0 ]; then
- echo "FAIL: iperf3 returned an error" 1>&2
- ret=$?
- ip netns exec $nsr nft list ruleset
-else
- echo "PASS: synproxy connection successful"
-fi
-
-exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh
deleted file mode 100755
index 2ffba45a78..0000000000
--- a/tools/testing/selftests/netfilter/nft_trans_stress.sh
+++ /dev/null
@@ -1,151 +0,0 @@
-#!/bin/bash
-#
-# This test is for stress-testing the nf_tables config plane path vs.
-# packet path processing: Make sure we never release rules that are
-# still visible to other cpus.
-#
-# set -e
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-testns=testns-$(mktemp -u "XXXXXXXX")
-tmp=""
-
-tables="foo bar baz quux"
-global_ret=0
-eret=0
-lret=0
-
-cleanup() {
- ip netns pids "$testns" | xargs kill 2>/dev/null
- ip netns del "$testns"
-
- rm -f "$tmp"
-}
-
-check_result()
-{
- local r=$1
- local OK="PASS"
-
- if [ $r -ne 0 ] ;then
- OK="FAIL"
- global_ret=$r
- fi
-
- echo "$OK: nft $2 test returned $r"
-
- eret=0
-}
-
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-trap cleanup EXIT
-tmp=$(mktemp)
-
-for table in $tables; do
- echo add table inet "$table" >> "$tmp"
- echo flush table inet "$table" >> "$tmp"
-
- echo "add chain inet $table INPUT { type filter hook input priority 0; }" >> "$tmp"
- echo "add chain inet $table OUTPUT { type filter hook output priority 0; }" >> "$tmp"
- for c in $(seq 1 400); do
- chain=$(printf "chain%03u" "$c")
- echo "add chain inet $table $chain" >> "$tmp"
- done
-
- for c in $(seq 1 400); do
- chain=$(printf "chain%03u" "$c")
- for BASE in INPUT OUTPUT; do
- echo "add rule inet $table $BASE counter jump $chain" >> "$tmp"
- done
- echo "add rule inet $table $chain counter return" >> "$tmp"
- done
-done
-
-ip netns add "$testns"
-ip -netns "$testns" link set lo up
-
-lscpu | grep ^CPU\(s\): | ( read cpu cpunum ;
-cpunum=$((cpunum-1))
-for i in $(seq 0 $cpunum);do
- mask=$(printf 0x%x $((1<<$i)))
- ip netns exec "$testns" taskset $mask ping -4 127.0.0.1 -fq > /dev/null &
- ip netns exec "$testns" taskset $mask ping -6 ::1 -fq > /dev/null &
-done)
-
-sleep 1
-
-ip netns exec "$testns" nft -f "$tmp"
-for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done
-
-for table in $tables;do
- randsleep=$((RANDOM%2))
- sleep $randsleep
- ip netns exec "$testns" nft delete table inet $table
- lret=$?
- if [ $lret -ne 0 ]; then
- eret=$lret
- fi
-done
-
-check_result $eret "add/delete"
-
-for i in $(seq 1 10) ; do
- (echo "flush ruleset"; cat "$tmp") | ip netns exec "$testns" nft -f /dev/stdin
-
- lret=$?
- if [ $lret -ne 0 ]; then
- eret=$lret
- fi
-done
-
-check_result $eret "reload"
-
-for i in $(seq 1 10) ; do
- (echo "flush ruleset"; cat "$tmp"
- echo "insert rule inet foo INPUT meta nftrace set 1"
- echo "insert rule inet foo OUTPUT meta nftrace set 1"
- ) | ip netns exec "$testns" nft -f /dev/stdin
- lret=$?
- if [ $lret -ne 0 ]; then
- eret=$lret
- fi
-
- (echo "flush ruleset"; cat "$tmp"
- ) | ip netns exec "$testns" nft -f /dev/stdin
-
- lret=$?
- if [ $lret -ne 0 ]; then
- eret=$lret
- fi
-done
-
-check_result $eret "add/delete with nftrace enabled"
-
-echo "insert rule inet foo INPUT meta nftrace set 1" >> $tmp
-echo "insert rule inet foo OUTPUT meta nftrace set 1" >> $tmp
-
-for i in $(seq 1 10) ; do
- (echo "flush ruleset"; cat "$tmp") | ip netns exec "$testns" nft -f /dev/stdin
-
- lret=$?
- if [ $lret -ne 0 ]; then
- eret=1
- fi
-done
-
-check_result $lret "add/delete with nftrace enabled"
-
-exit $global_ret
diff --git a/tools/testing/selftests/netfilter/settings b/tools/testing/selftests/netfilter/settings
deleted file mode 100644
index 6091b45d22..0000000000
--- a/tools/testing/selftests/netfilter/settings
+++ /dev/null
@@ -1 +0,0 @@
-timeout=120
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 6ba4f8275a..994477ee87 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -27,6 +27,7 @@
#include <sys/syscall.h>
#include <sys/sysmacros.h>
#include <sys/time.h>
+#include <sys/utsname.h>
#include <sys/wait.h>
#include <dirent.h>
#include <errno.h>
@@ -600,6 +601,25 @@ int expect_strne(const char *expr, int llen, const char *cmp)
return ret;
}
+#define EXPECT_STRBUFEQ(cond, expr, buf, val, cmp) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_str_buf_eq(expr, buf, val, llen, cmp); } while (0)
+
+static __attribute__((unused))
+int expect_str_buf_eq(size_t expr, const char *buf, size_t val, int llen, const char *cmp)
+{
+ llen += printf(" = %lu <%s> ", (unsigned long)expr, buf);
+ if (strcmp(buf, cmp) != 0) {
+ result(llen, FAIL);
+ return 1;
+ }
+ if (expr != val) {
+ result(llen, FAIL);
+ return 1;
+ }
+
+ result(llen, OK);
+ return 0;
+}
/* declare tests based on line numbers. There must be exactly one test per line. */
#define CASE_TEST(name) \
@@ -761,6 +781,45 @@ int test_stat_timestamps(void)
return 0;
}
+int test_uname(void)
+{
+ struct utsname buf;
+ char osrelease[sizeof(buf.release)];
+ ssize_t r;
+ int fd;
+
+ memset(&buf.domainname, 'P', sizeof(buf.domainname));
+
+ if (uname(&buf))
+ return 1;
+
+ if (strncmp("Linux", buf.sysname, sizeof(buf.sysname)))
+ return 1;
+
+ fd = open("/proc/sys/kernel/osrelease", O_RDONLY);
+ if (fd == -1)
+ return 1;
+
+ r = read(fd, osrelease, sizeof(osrelease));
+ if (r == -1)
+ return 1;
+
+ close(fd);
+
+ if (osrelease[r - 1] == '\n')
+ r--;
+
+ /* Validate one of the later fields to ensure field sizes are correct */
+ if (strncmp(osrelease, buf.release, r))
+ return 1;
+
+ /* Ensure the field domainname is set, it is missing from struct old_utsname */
+ if (strnlen(buf.domainname, sizeof(buf.domainname)) == sizeof(buf.domainname))
+ return 1;
+
+ return 0;
+}
+
int test_mmap_munmap(void)
{
int ret, fd, i, page_size;
@@ -966,6 +1025,8 @@ int run_syscall(int min, int max)
CASE_TEST(stat_fault); EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break;
CASE_TEST(stat_timestamps); EXPECT_SYSZR(1, test_stat_timestamps()); break;
CASE_TEST(symlink_root); EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break;
+ CASE_TEST(uname); EXPECT_SYSZR(proc, test_uname()); break;
+ CASE_TEST(uname_fault); EXPECT_SYSER(1, uname(NULL), -1, EFAULT); break;
CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break;
CASE_TEST(unlink_blah); EXPECT_SYSER(1, unlink("/proc/self/blah"), -1, ENOENT); break;
CASE_TEST(wait_child); EXPECT_SYSER(1, wait(&tmp), -1, ECHILD); break;
@@ -991,6 +1052,14 @@ int run_stdlib(int min, int max)
for (test = min; test >= 0 && test <= max; test++) {
int llen = 0; /* line length */
+ /* For functions that take a long buffer, like strlcat()
+ * Add some more chars after the \0, to test functions that overwrite the buffer set
+ * the \0 at the exact right position.
+ */
+ char buf[10] = "test123456";
+ buf[4] = '\0';
+
+
/* avoid leaving empty lines below, this will insert holes into
* test numbers.
*/
@@ -1007,6 +1076,19 @@ int run_stdlib(int min, int max)
CASE_TEST(strchr_foobar_z); EXPECT_STRZR(1, strchr("foobar", 'z')); break;
CASE_TEST(strrchr_foobar_o); EXPECT_STREQ(1, strrchr("foobar", 'o'), "obar"); break;
CASE_TEST(strrchr_foobar_z); EXPECT_STRZR(1, strrchr("foobar", 'z')); break;
+#ifdef NOLIBC
+ CASE_TEST(strlcat_0); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 0), buf, 3, "test"); break;
+ CASE_TEST(strlcat_1); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 1), buf, 4, "test"); break;
+ CASE_TEST(strlcat_5); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 5), buf, 7, "test"); break;
+ CASE_TEST(strlcat_6); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 6), buf, 7, "testb"); break;
+ CASE_TEST(strlcat_7); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 7), buf, 7, "testba"); break;
+ CASE_TEST(strlcat_8); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 8), buf, 7, "testbar"); break;
+ CASE_TEST(strlcpy_0); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 0), buf, 3, "test"); break;
+ CASE_TEST(strlcpy_1); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 1), buf, 3, ""); break;
+ CASE_TEST(strlcpy_2); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 2), buf, 3, "b"); break;
+ CASE_TEST(strlcpy_3); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 3), buf, 3, "ba"); break;
+ CASE_TEST(strlcpy_4); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 4), buf, 3, "bar"); break;
+#endif
CASE_TEST(memcmp_20_20); EXPECT_EQ(1, memcmp("aaa\x20", "aaa\x20", 4), 0); break;
CASE_TEST(memcmp_20_60); EXPECT_LT(1, memcmp("aaa\x20", "aaa\x60", 4), 0); break;
CASE_TEST(memcmp_60_20); EXPECT_GT(1, memcmp("aaa\x60", "aaa\x20", 4), 0); break;
diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile
index 254d676a26..185dc76ebb 100644
--- a/tools/testing/selftests/openat2/Makefile
+++ b/tools/testing/selftests/openat2/Makefile
@@ -1,8 +1,18 @@
# SPDX-License-Identifier: GPL-2.0-or-later
-CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan
+CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined
TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test
+# gcc requires -static-libasan in order to ensure that Address Sanitizer's
+# library is the first one loaded. However, clang already statically links the
+# Address Sanitizer if -fsanitize is specified. Therefore, simply omit
+# -static-libasan for clang builds.
+ifeq ($(LLVM),)
+ CFLAGS += -static-libasan
+endif
+
+LOCAL_HDRS += helpers.h
+
include ../lib.mk
-$(TEST_GEN_PROGS): helpers.c helpers.h
+$(TEST_GEN_PROGS): helpers.c
diff --git a/tools/testing/selftests/perf_events/.gitignore b/tools/testing/selftests/perf_events/.gitignore
index 790c47001e..ee93dc4969 100644
--- a/tools/testing/selftests/perf_events/.gitignore
+++ b/tools/testing/selftests/perf_events/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
sigtrap_threads
remove_on_exec
+watermark_signal
diff --git a/tools/testing/selftests/perf_events/Makefile b/tools/testing/selftests/perf_events/Makefile
index db93c4ff08..70e3ff2112 100644
--- a/tools/testing/selftests/perf_events/Makefile
+++ b/tools/testing/selftests/perf_events/Makefile
@@ -2,5 +2,5 @@
CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES)
LDFLAGS += -lpthread
-TEST_GEN_PROGS := sigtrap_threads remove_on_exec
+TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal
include ../lib.mk
diff --git a/tools/testing/selftests/perf_events/watermark_signal.c b/tools/testing/selftests/perf_events/watermark_signal.c
new file mode 100644
index 0000000000..49dc1e8311
--- /dev/null
+++ b/tools/testing/selftests/perf_events/watermark_signal.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/perf_event.h>
+#include <stddef.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+#define __maybe_unused __attribute__((__unused__))
+
+static int sigio_count;
+
+static void handle_sigio(int signum __maybe_unused,
+ siginfo_t *oh __maybe_unused,
+ void *uc __maybe_unused)
+{
+ ++sigio_count;
+}
+
+static void do_child(void)
+{
+ raise(SIGSTOP);
+
+ for (int i = 0; i < 20; ++i)
+ sleep(1);
+
+ raise(SIGSTOP);
+
+ exit(0);
+}
+
+TEST(watermark_signal)
+{
+ struct perf_event_attr attr;
+ struct perf_event_mmap_page *p = NULL;
+ struct sigaction previous_sigio, sigio = { 0 };
+ pid_t child = -1;
+ int child_status;
+ int fd = -1;
+ long page_size = sysconf(_SC_PAGE_SIZE);
+
+ sigio.sa_sigaction = handle_sigio;
+ EXPECT_EQ(sigaction(SIGIO, &sigio, &previous_sigio), 0);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_DUMMY;
+ attr.sample_period = 1;
+ attr.disabled = 1;
+ attr.watermark = 1;
+ attr.context_switch = 1;
+ attr.wakeup_watermark = 1;
+
+ child = fork();
+ EXPECT_GE(child, 0);
+ if (child == 0)
+ do_child();
+ else if (child < 0) {
+ perror("fork()");
+ goto cleanup;
+ }
+
+ if (waitpid(child, &child_status, WSTOPPED) != child ||
+ !(WIFSTOPPED(child_status) && WSTOPSIG(child_status) == SIGSTOP)) {
+ fprintf(stderr,
+ "failed to sycnhronize with child errno=%d status=%x\n",
+ errno,
+ child_status);
+ goto cleanup;
+ }
+
+ fd = syscall(__NR_perf_event_open, &attr, child, -1, -1,
+ PERF_FLAG_FD_CLOEXEC);
+ if (fd < 0) {
+ fprintf(stderr, "failed opening event %llx\n", attr.config);
+ goto cleanup;
+ }
+
+ if (fcntl(fd, F_SETFL, FASYNC)) {
+ perror("F_SETFL FASYNC");
+ goto cleanup;
+ }
+
+ if (fcntl(fd, F_SETOWN, getpid())) {
+ perror("F_SETOWN getpid()");
+ goto cleanup;
+ }
+
+ if (fcntl(fd, F_SETSIG, SIGIO)) {
+ perror("F_SETSIG SIGIO");
+ goto cleanup;
+ }
+
+ p = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (p == NULL) {
+ perror("mmap");
+ goto cleanup;
+ }
+
+ if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) {
+ perror("PERF_EVENT_IOC_ENABLE");
+ goto cleanup;
+ }
+
+ if (kill(child, SIGCONT) < 0) {
+ perror("SIGCONT");
+ goto cleanup;
+ }
+
+ if (waitpid(child, &child_status, WSTOPPED) != -1 || errno != EINTR)
+ fprintf(stderr,
+ "expected SIGIO to terminate wait errno=%d status=%x\n%d",
+ errno,
+ child_status,
+ sigio_count);
+
+ EXPECT_GE(sigio_count, 1);
+
+cleanup:
+ if (p != NULL)
+ munmap(p, 2 * page_size);
+
+ if (fd >= 0)
+ close(fd);
+
+ if (child > 0) {
+ kill(child, SIGKILL);
+ waitpid(child, NULL, 0);
+ }
+
+ sigaction(SIGIO, &previous_sigio, NULL);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
index 01cc37bf61..f062a986e3 100644
--- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
@@ -307,5 +307,5 @@ int main(int argc, char **argv)
test_pidfd_fdinfo_nspid();
test_pidfd_dead_fdinfo();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c
index 8a59438ccc..c62564c264 100644
--- a/tools/testing/selftests/pidfd/pidfd_open_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
@@ -159,5 +159,7 @@ on_error:
if (pidfd >= 0)
close(pidfd);
- return !ret ? ksft_exit_pass() : ksft_exit_fail();
+ if (ret)
+ ksft_exit_fail();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c
index 6108112753..55d74a5035 100644
--- a/tools/testing/selftests/pidfd/pidfd_poll_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c
@@ -112,5 +112,5 @@ int main(int argc, char **argv)
}
ksft_test_result_pass("pidfd poll test: pass\n");
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index c081ae9131..9faa686f90 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -572,5 +572,5 @@ int main(int argc, char **argv)
test_pidfd_send_signal_exited_fail();
test_pidfd_send_signal_recycled_pid_fail();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index c376151982..b175e94e19 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -7,12 +7,6 @@ ARCH := $(shell echo $(ARCH) | sed -e s/ppc.*/powerpc/)
ifeq ($(ARCH),powerpc)
-GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
-
-CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR)/include $(CFLAGS)
-
-export CFLAGS
-
SUB_DIRS = alignment \
benchmarks \
cache_shape \
@@ -46,6 +40,7 @@ $(SUB_DIRS):
BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all
include ../lib.mk
+include ./flags.mk
override define RUN_TESTS
+@for TARGET in $(SUB_DIRS); do \
@@ -57,14 +52,14 @@ endef
override define INSTALL_RULE
+@for TARGET in $(SUB_DIRS); do \
BUILD_TARGET=$(OUTPUT)/$$TARGET; \
- $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET INSTALL_PATH=$$INSTALL_PATH/$$TARGET -C $$TARGET install;\
done;
endef
emit_tests:
+@for TARGET in $(SUB_DIRS); do \
BUILD_TARGET=$(OUTPUT)/$$TARGET; \
- $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET $@;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET COLLECTION=$(COLLECTION)/$$TARGET -s -C $$TARGET $@;\
done;
override define CLEAN
diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile
index 93e9af3744..66d5d7aaeb 100644
--- a/tools/testing/selftests/powerpc/alignment/Makefile
+++ b/tools/testing/selftests/powerpc/alignment/Makefile
@@ -3,5 +3,6 @@ TEST_GEN_PROGS := copy_first_unaligned alignment_handler
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
index a32a6ab899..1321922038 100644
--- a/tools/testing/selftests/powerpc/benchmarks/Makefile
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -4,10 +4,11 @@ TEST_GEN_FILES := exec_target
TEST_FILES := settings
-CFLAGS += -O2
-
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+CFLAGS += -O2
$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile
index 689f6c8ebc..3a3ca956ac 100644
--- a/tools/testing/selftests/powerpc/cache_shape/Makefile
+++ b/tools/testing/selftests/powerpc/cache_shape/Makefile
@@ -3,5 +3,6 @@ TEST_GEN_PROGS := cache_shape
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 77594e697f..42940f92d8 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -1,14 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-# The loops are all 64-bit code
-CFLAGS += -m64
-CFLAGS += -I$(CURDIR)
-CFLAGS += -D SELFTEST
-CFLAGS += -maltivec
-CFLAGS += -mcpu=power4
-
-# Use our CFLAGS for the implicit .S rule & set the asm machine type
-ASFLAGS = $(CFLAGS) -Wa,-mpower4
-
TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
copyuser_p7_t0 copyuser_p7_t1 \
memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
@@ -20,6 +10,17 @@ EXTRA_SOURCES := validate.c ../harness.c stubs.S
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+# The loops are all 64-bit code
+CFLAGS += -m64
+CFLAGS += -I$(CURDIR)
+CFLAGS += -D SELFTEST
+CFLAGS += -maltivec
+CFLAGS += -mcpu=power4
+
+# Use our CFLAGS for the implicit .S rule & set the asm machine type
+ASFLAGS = $(CFLAGS) -Wa,-mpower4
$(OUTPUT)/copyuser_64_t%: copyuser_64.S $(EXTRA_SOURCES)
$(CC) $(CPPFLAGS) $(CFLAGS) \
diff --git a/tools/testing/selftests/powerpc/dexcr/.gitignore b/tools/testing/selftests/powerpc/dexcr/.gitignore
index b82f45dd46..11eefb4b9f 100644
--- a/tools/testing/selftests/powerpc/dexcr/.gitignore
+++ b/tools/testing/selftests/powerpc/dexcr/.gitignore
@@ -1,2 +1,4 @@
+dexcr_test
hashchk_test
+chdexcr
lsdexcr
diff --git a/tools/testing/selftests/powerpc/dexcr/Makefile b/tools/testing/selftests/powerpc/dexcr/Makefile
index 829ad075b4..58cf9f7229 100644
--- a/tools/testing/selftests/powerpc/dexcr/Makefile
+++ b/tools/testing/selftests/powerpc/dexcr/Makefile
@@ -1,7 +1,10 @@
-TEST_GEN_PROGS := hashchk_test
-TEST_GEN_FILES := lsdexcr
+TEST_GEN_PROGS := dexcr_test hashchk_test
+TEST_GEN_FILES := lsdexcr chdexcr
include ../../lib.mk
+include ../flags.mk
+
+CFLAGS += $(KHDR_INCLUDES)
$(OUTPUT)/hashchk_test: CFLAGS += -fno-pie -no-pie $(call cc-option,-mno-rop-protect)
diff --git a/tools/testing/selftests/powerpc/dexcr/chdexcr.c b/tools/testing/selftests/powerpc/dexcr/chdexcr.c
new file mode 100644
index 0000000000..c548d7a5bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/chdexcr.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "dexcr.h"
+#include "utils.h"
+
+static void die(const char *msg)
+{
+ printf("%s\n", msg);
+ exit(1);
+}
+
+static void help(void)
+{
+ printf("Invoke a provided program with a custom DEXCR on-exec reset value\n"
+ "\n"
+ "usage: chdexcr [CHDEXCR OPTIONS] -- PROGRAM [ARGS...]\n"
+ "\n"
+ "Each configurable DEXCR aspect is exposed as an option.\n"
+ "\n"
+ "The normal option sets the aspect in the DEXCR. The --no- variant\n"
+ "clears that aspect. For example, --ibrtpd sets the IBRTPD aspect bit,\n"
+ "so indirect branch prediction will be disabled in the provided program.\n"
+ "Conversely, --no-ibrtpd clears the aspect bit, so indirect branch\n"
+ "prediction may occur.\n"
+ "\n"
+ "CHDEXCR OPTIONS:\n");
+
+ for (int i = 0; i < ARRAY_SIZE(aspects); i++) {
+ const struct dexcr_aspect *aspect = &aspects[i];
+
+ if (aspect->prctl == -1)
+ continue;
+
+ printf(" --%-6s / --no-%-6s : %s\n", aspect->opt, aspect->opt, aspect->desc);
+ }
+}
+
+static const struct dexcr_aspect *opt_to_aspect(const char *opt)
+{
+ for (int i = 0; i < ARRAY_SIZE(aspects); i++)
+ if (aspects[i].prctl != -1 && !strcmp(aspects[i].opt, opt))
+ return &aspects[i];
+
+ return NULL;
+}
+
+static int apply_option(const char *option)
+{
+ const struct dexcr_aspect *aspect;
+ const char *opt = NULL;
+ const char *set_prefix = "--";
+ const char *clear_prefix = "--no-";
+ unsigned long ctrl = 0;
+ int err;
+
+ if (!strcmp(option, "-h") || !strcmp(option, "--help")) {
+ help();
+ exit(0);
+ }
+
+ /* Strip out --(no-) prefix and determine ctrl value */
+ if (!strncmp(option, clear_prefix, strlen(clear_prefix))) {
+ opt = &option[strlen(clear_prefix)];
+ ctrl |= PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC;
+ } else if (!strncmp(option, set_prefix, strlen(set_prefix))) {
+ opt = &option[strlen(set_prefix)];
+ ctrl |= PR_PPC_DEXCR_CTRL_SET_ONEXEC;
+ }
+
+ if (!opt || !*opt)
+ return 1;
+
+ aspect = opt_to_aspect(opt);
+ if (!aspect)
+ die("unknown aspect");
+
+ err = pr_set_dexcr(aspect->prctl, ctrl);
+ if (err)
+ die("failed to apply option");
+
+ return 0;
+}
+
+int main(int argc, char *const argv[])
+{
+ int i;
+
+ if (!dexcr_exists())
+ die("DEXCR not detected on this hardware");
+
+ for (i = 1; i < argc; i++)
+ if (apply_option(argv[i]))
+ break;
+
+ if (i < argc && !strcmp(argv[i], "--"))
+ i++;
+
+ if (i >= argc)
+ die("missing command");
+
+ execvp(argv[i], &argv[i]);
+ perror("execve");
+
+ return errno;
+}
diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr.c b/tools/testing/selftests/powerpc/dexcr/dexcr.c
index 65ec5347de..468fd0dc99 100644
--- a/tools/testing/selftests/powerpc/dexcr/dexcr.c
+++ b/tools/testing/selftests/powerpc/dexcr/dexcr.c
@@ -3,6 +3,7 @@
#include <errno.h>
#include <setjmp.h>
#include <signal.h>
+#include <sys/prctl.h>
#include <sys/types.h>
#include <sys/wait.h>
@@ -43,6 +44,45 @@ out:
return exists;
}
+unsigned int pr_which_to_aspect(unsigned long which)
+{
+ switch (which) {
+ case PR_PPC_DEXCR_SBHE:
+ return DEXCR_PR_SBHE;
+ case PR_PPC_DEXCR_IBRTPD:
+ return DEXCR_PR_IBRTPD;
+ case PR_PPC_DEXCR_SRAPD:
+ return DEXCR_PR_SRAPD;
+ case PR_PPC_DEXCR_NPHIE:
+ return DEXCR_PR_NPHIE;
+ default:
+ FAIL_IF_EXIT_MSG(true, "unknown PR aspect");
+ }
+}
+
+int pr_get_dexcr(unsigned long which)
+{
+ return prctl(PR_PPC_GET_DEXCR, which, 0UL, 0UL, 0UL);
+}
+
+int pr_set_dexcr(unsigned long which, unsigned long ctrl)
+{
+ return prctl(PR_PPC_SET_DEXCR, which, ctrl, 0UL, 0UL);
+}
+
+bool pr_dexcr_aspect_supported(unsigned long which)
+{
+ if (pr_get_dexcr(which) == -1)
+ return errno == ENODEV;
+
+ return true;
+}
+
+bool pr_dexcr_aspect_editable(unsigned long which)
+{
+ return pr_get_dexcr(which) & PR_PPC_DEXCR_CTRL_EDITABLE;
+}
+
/*
* Just test if a bad hashchk triggers a signal, without checking
* for support or if the NPHIE aspect is enabled.
diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr.h b/tools/testing/selftests/powerpc/dexcr/dexcr.h
index f55cbbc864..51e9ba3b09 100644
--- a/tools/testing/selftests/powerpc/dexcr/dexcr.h
+++ b/tools/testing/selftests/powerpc/dexcr/dexcr.h
@@ -9,6 +9,7 @@
#define _SELFTESTS_POWERPC_DEXCR_DEXCR_H
#include <stdbool.h>
+#include <sys/prctl.h>
#include <sys/types.h>
#include "reg.h"
@@ -26,8 +27,64 @@
#define PPC_RAW_HASHCHK(b, i, a) \
str(.long (0x7C0005E4 | PPC_RAW_HASH_ARGS(b, i, a));)
+struct dexcr_aspect {
+ const char *name; /* Short display name */
+ const char *opt; /* Option name for chdexcr */
+ const char *desc; /* Expanded aspect meaning */
+ unsigned int index; /* Aspect bit index in DEXCR */
+ unsigned long prctl; /* 'which' value for get/set prctl */
+};
+
+static const struct dexcr_aspect aspects[] = {
+ {
+ .name = "SBHE",
+ .opt = "sbhe",
+ .desc = "Speculative branch hint enable",
+ .index = 0,
+ .prctl = PR_PPC_DEXCR_SBHE,
+ },
+ {
+ .name = "IBRTPD",
+ .opt = "ibrtpd",
+ .desc = "Indirect branch recurrent target prediction disable",
+ .index = 3,
+ .prctl = PR_PPC_DEXCR_IBRTPD,
+ },
+ {
+ .name = "SRAPD",
+ .opt = "srapd",
+ .desc = "Subroutine return address prediction disable",
+ .index = 4,
+ .prctl = PR_PPC_DEXCR_SRAPD,
+ },
+ {
+ .name = "NPHIE",
+ .opt = "nphie",
+ .desc = "Non-privileged hash instruction enable",
+ .index = 5,
+ .prctl = PR_PPC_DEXCR_NPHIE,
+ },
+ {
+ .name = "PHIE",
+ .opt = "phie",
+ .desc = "Privileged hash instruction enable",
+ .index = 6,
+ .prctl = -1,
+ },
+};
+
bool dexcr_exists(void);
+bool pr_dexcr_aspect_supported(unsigned long which);
+
+bool pr_dexcr_aspect_editable(unsigned long which);
+
+int pr_get_dexcr(unsigned long pr_aspect);
+
+int pr_set_dexcr(unsigned long pr_aspect, unsigned long ctrl);
+
+unsigned int pr_which_to_aspect(unsigned long which);
+
bool hashchk_triggers(void);
enum dexcr_source {
diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr_test.c b/tools/testing/selftests/powerpc/dexcr/dexcr_test.c
new file mode 100644
index 0000000000..7a86571649
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/dexcr_test.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "dexcr.h"
+#include "utils.h"
+
+/*
+ * Helper function for testing the behaviour of a newly exec-ed process
+ */
+static int dexcr_prctl_onexec_test_child(unsigned long which, const char *status)
+{
+ unsigned long dexcr = mfspr(SPRN_DEXCR_RO);
+ unsigned long aspect = pr_which_to_aspect(which);
+ int ctrl = pr_get_dexcr(which);
+
+ if (!strcmp(status, "set")) {
+ FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET),
+ "setting aspect across exec not applied");
+
+ FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC),
+ "setting aspect across exec not inherited");
+
+ FAIL_IF_EXIT_MSG(!(aspect & dexcr), "setting aspect across exec did not take effect");
+ } else if (!strcmp(status, "clear")) {
+ FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR),
+ "clearing aspect across exec not applied");
+
+ FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC),
+ "clearing aspect across exec not inherited");
+
+ FAIL_IF_EXIT_MSG(aspect & dexcr, "clearing aspect across exec did not take effect");
+ } else {
+ FAIL_IF_EXIT_MSG(true, "unknown expected status");
+ }
+
+ return 0;
+}
+
+/*
+ * Test that the given prctl value can be manipulated freely
+ */
+static int dexcr_prctl_aspect_test(unsigned long which)
+{
+ unsigned long aspect = pr_which_to_aspect(which);
+ pid_t pid;
+ int ctrl;
+ int err;
+ int errno_save;
+
+ SKIP_IF_MSG(!dexcr_exists(), "DEXCR not supported");
+ SKIP_IF_MSG(!pr_dexcr_aspect_supported(which), "DEXCR aspect not supported");
+ SKIP_IF_MSG(!pr_dexcr_aspect_editable(which), "DEXCR aspect not editable with prctl");
+
+ /* We reject invalid combinations of arguments */
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR);
+ errno_save = errno;
+ FAIL_IF_MSG(err != -1, "simultaneous set and clear should be rejected");
+ FAIL_IF_MSG(errno_save != EINVAL, "simultaneous set and clear should be rejected with EINVAL");
+
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET_ONEXEC | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC);
+ errno_save = errno;
+ FAIL_IF_MSG(err != -1, "simultaneous set and clear on exec should be rejected");
+ FAIL_IF_MSG(errno_save != EINVAL, "simultaneous set and clear on exec should be rejected with EINVAL");
+
+ /* We set the aspect */
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET);
+ FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET failed");
+
+ ctrl = pr_get_dexcr(which);
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET), "config value not PR_PPC_DEXCR_CTRL_SET");
+ FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_CLEAR, "config value unexpected clear flag");
+ FAIL_IF_MSG(!(aspect & mfspr(SPRN_DEXCR_RO)), "setting aspect did not take effect");
+
+ /* We clear the aspect */
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_CLEAR);
+ FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_CLEAR failed");
+
+ ctrl = pr_get_dexcr(which);
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "config value not PR_PPC_DEXCR_CTRL_CLEAR");
+ FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_SET, "config value unexpected set flag");
+ FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "clearing aspect did not take effect");
+
+ /* We make it set on exec (doesn't change our current value) */
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET_ONEXEC);
+ FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET_ONEXEC failed");
+
+ ctrl = pr_get_dexcr(which);
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "process aspect should still be cleared");
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_SET_ONEXEC");
+ FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC, "config value unexpected clear on exec flag");
+ FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "scheduling aspect to set on exec should not change it now");
+
+ /* We make it clear on exec (doesn't change our current value) */
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC);
+ FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC failed");
+
+ ctrl = pr_get_dexcr(which);
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "process aspect config should still be cleared");
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC");
+ FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC, "config value unexpected set on exec flag");
+ FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "process aspect should still be cleared");
+
+ /* We allow setting the current and on-exec value in a single call */
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC);
+ FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC failed");
+
+ ctrl = pr_get_dexcr(which);
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET), "config value not PR_PPC_DEXCR_CTRL_SET");
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC");
+ FAIL_IF_MSG(!(aspect & mfspr(SPRN_DEXCR_RO)), "process aspect should be set");
+
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_CLEAR | PR_PPC_DEXCR_CTRL_SET_ONEXEC);
+ FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_CLEAR | PR_PPC_DEXCR_CTRL_SET_ONEXEC failed");
+
+ ctrl = pr_get_dexcr(which);
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "config value not PR_PPC_DEXCR_CTRL_CLEAR");
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_SET_ONEXEC");
+ FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "process aspect should be clear");
+
+ /* Verify the onexec value is applied across exec */
+ pid = fork();
+ if (!pid) {
+ char which_str[32] = {};
+ char *args[] = { "dexcr_prctl_onexec_test_child", which_str, "set", NULL };
+ unsigned int ctrl = pr_get_dexcr(which);
+
+ sprintf(which_str, "%lu", which);
+
+ FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC),
+ "setting aspect on exec not copied across fork");
+
+ FAIL_IF_EXIT_MSG(mfspr(SPRN_DEXCR_RO) & aspect,
+ "setting aspect on exec wrongly applied to fork");
+
+ execve("/proc/self/exe", args, NULL);
+ _exit(errno);
+ }
+ await_child_success(pid);
+
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC);
+ FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC failed");
+
+ pid = fork();
+ if (!pid) {
+ char which_str[32] = {};
+ char *args[] = { "dexcr_prctl_onexec_test_child", which_str, "clear", NULL };
+ unsigned int ctrl = pr_get_dexcr(which);
+
+ sprintf(which_str, "%lu", which);
+
+ FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC),
+ "clearing aspect on exec not copied across fork");
+
+ FAIL_IF_EXIT_MSG(!(mfspr(SPRN_DEXCR_RO) & aspect),
+ "clearing aspect on exec wrongly applied to fork");
+
+ execve("/proc/self/exe", args, NULL);
+ _exit(errno);
+ }
+ await_child_success(pid);
+
+ return 0;
+}
+
+static int dexcr_prctl_ibrtpd_test(void)
+{
+ return dexcr_prctl_aspect_test(PR_PPC_DEXCR_IBRTPD);
+}
+
+static int dexcr_prctl_srapd_test(void)
+{
+ return dexcr_prctl_aspect_test(PR_PPC_DEXCR_SRAPD);
+}
+
+static int dexcr_prctl_nphie_test(void)
+{
+ return dexcr_prctl_aspect_test(PR_PPC_DEXCR_NPHIE);
+}
+
+int main(int argc, char *argv[])
+{
+ int err = 0;
+
+ /*
+ * Some tests require checking what happens across exec, so we may be
+ * invoked as the child of a particular test
+ */
+ if (argc > 1) {
+ if (argc == 3 && !strcmp(argv[0], "dexcr_prctl_onexec_test_child")) {
+ unsigned long which;
+
+ err = parse_ulong(argv[1], strlen(argv[1]), &which, 10);
+ FAIL_IF_MSG(err, "failed to parse which value for child");
+
+ return dexcr_prctl_onexec_test_child(which, argv[2]);
+ }
+
+ FAIL_IF_MSG(true, "unknown test case");
+ }
+
+ /*
+ * Otherwise we are the main test invocation and run the full suite
+ */
+ err |= test_harness(dexcr_prctl_ibrtpd_test, "dexcr_prctl_ibrtpd");
+ err |= test_harness(dexcr_prctl_srapd_test, "dexcr_prctl_srapd");
+ err |= test_harness(dexcr_prctl_nphie_test, "dexcr_prctl_nphie");
+
+ return err;
+}
diff --git a/tools/testing/selftests/powerpc/dexcr/hashchk_test.c b/tools/testing/selftests/powerpc/dexcr/hashchk_test.c
index 7d5658c9eb..645224bdc1 100644
--- a/tools/testing/selftests/powerpc/dexcr/hashchk_test.c
+++ b/tools/testing/selftests/powerpc/dexcr/hashchk_test.c
@@ -21,8 +21,14 @@
static int require_nphie(void)
{
SKIP_IF_MSG(!dexcr_exists(), "DEXCR not supported");
+
+ pr_set_dexcr(PR_PPC_DEXCR_NPHIE, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_SET_ONEXEC);
+
+ if (get_dexcr(EFFECTIVE) & DEXCR_PR_NPHIE)
+ return 0;
+
SKIP_IF_MSG(!(get_dexcr(EFFECTIVE) & DEXCR_PR_NPHIE),
- "DEXCR[NPHIE] not enabled");
+ "Failed to enable DEXCR[NPHIE]");
return 0;
}
diff --git a/tools/testing/selftests/powerpc/dexcr/lsdexcr.c b/tools/testing/selftests/powerpc/dexcr/lsdexcr.c
index 94abbfcc38..7588929180 100644
--- a/tools/testing/selftests/powerpc/dexcr/lsdexcr.c
+++ b/tools/testing/selftests/powerpc/dexcr/lsdexcr.c
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0+
-#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
+#include <sys/prctl.h>
#include "dexcr.h"
#include "utils.h"
@@ -12,40 +12,6 @@ static unsigned int dexcr;
static unsigned int hdexcr;
static unsigned int effective;
-struct dexcr_aspect {
- const char *name;
- const char *desc;
- unsigned int index;
-};
-
-static const struct dexcr_aspect aspects[] = {
- {
- .name = "SBHE",
- .desc = "Speculative branch hint enable",
- .index = 0,
- },
- {
- .name = "IBRTPD",
- .desc = "Indirect branch recurrent target prediction disable",
- .index = 3,
- },
- {
- .name = "SRAPD",
- .desc = "Subroutine return address prediction disable",
- .index = 4,
- },
- {
- .name = "NPHIE",
- .desc = "Non-privileged hash instruction enable",
- .index = 5,
- },
- {
- .name = "PHIE",
- .desc = "Privileged hash instruction enable",
- .index = 6,
- },
-};
-
static void print_list(const char *list[], size_t len)
{
for (size_t i = 0; i < len; i++) {
@@ -60,7 +26,7 @@ static void print_dexcr(char *name, unsigned int bits)
const char *enabled_aspects[ARRAY_SIZE(aspects) + 1] = {NULL};
size_t j = 0;
- printf("%s: %08x", name, bits);
+ printf("%s: 0x%08x", name, bits);
if (bits == 0) {
printf("\n");
@@ -103,6 +69,63 @@ static void print_aspect(const struct dexcr_aspect *aspect)
printf(" \t(%s)\n", aspect->desc);
}
+static void print_aspect_config(const struct dexcr_aspect *aspect)
+{
+ const char *reason = NULL;
+ const char *reason_hyp = NULL;
+ const char *reason_prctl = "no prctl";
+ bool actual = effective & DEXCR_PR_BIT(aspect->index);
+ bool expected = actual; /* Assume it's fine if we don't expect a specific set/clear value */
+
+ if (actual)
+ reason = "set by unknown";
+ else
+ reason = "cleared by unknown";
+
+ if (aspect->prctl != -1) {
+ int ctrl = pr_get_dexcr(aspect->prctl);
+
+ if (ctrl < 0) {
+ reason_prctl = "failed to read prctl";
+ } else {
+ if (ctrl & PR_PPC_DEXCR_CTRL_SET) {
+ reason_prctl = "set by prctl";
+ expected = true;
+ } else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR) {
+ reason_prctl = "cleared by prctl";
+ expected = false;
+ } else {
+ reason_prctl = "unknown prctl";
+ }
+
+ reason = reason_prctl;
+ }
+ }
+
+ if (hdexcr & DEXCR_PR_BIT(aspect->index)) {
+ reason_hyp = "set by hypervisor";
+ reason = reason_hyp;
+ expected = true;
+ } else {
+ reason_hyp = "not modified by hypervisor";
+ }
+
+ printf("%12s (%d): %-28s (%s, %s)\n",
+ aspect->name,
+ aspect->index,
+ reason,
+ reason_hyp,
+ reason_prctl);
+
+ /*
+ * The checks are not atomic, so this can technically trigger if the
+ * hypervisor makes a change while we are checking each source. It's
+ * far more likely to be a bug if we see this though.
+ */
+ if (actual != expected)
+ printf(" : ! actual %s does not match config\n", aspect->name);
+}
+
int main(int argc, char *argv[])
{
if (!dexcr_exists()) {
@@ -114,6 +137,8 @@ int main(int argc, char *argv[])
hdexcr = get_dexcr(HDEXCR);
effective = dexcr | hdexcr;
+ printf("current status:\n");
+
print_dexcr(" DEXCR", dexcr);
print_dexcr(" HDEXCR", hdexcr);
print_dexcr("Effective", effective);
@@ -136,6 +161,12 @@ int main(int argc, char *argv[])
else
printf("ignored\n");
}
+ printf("\n");
+
+ printf("configuration:\n");
+ for (size_t i = 0; i < ARRAY_SIZE(aspects); i++)
+ print_aspect_config(&aspects[i]);
+ printf("\n");
return 0;
}
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
index 9289d5febe..9fa9cb5bd9 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -5,6 +5,7 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
$(OUTPUT)/dscr_explicit_test: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/eeh/Makefile b/tools/testing/selftests/powerpc/eeh/Makefile
index ae963eb2dc..70797716f2 100644
--- a/tools/testing/selftests/powerpc/eeh/Makefile
+++ b/tools/testing/selftests/powerpc/eeh/Makefile
@@ -7,3 +7,4 @@ TEST_FILES := eeh-functions.sh settings
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
diff --git a/tools/testing/selftests/powerpc/flags.mk b/tools/testing/selftests/powerpc/flags.mk
new file mode 100644
index 0000000000..abb9e58d95
--- /dev/null
+++ b/tools/testing/selftests/powerpc/flags.mk
@@ -0,0 +1,9 @@
+#This checks for any ENV variables and add those.
+
+ifeq ($(GIT_VERSION),)
+GIT_VERSION := $(shell git describe --always --long --dirty || echo "unknown")
+export GIT_VERSION
+endif
+
+CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(USERCFLAGS)
+export CFLAGS
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index 3948f7c510..b14fd2e0c6 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -3,6 +3,7 @@ TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vm
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c
$(TEST_GEN_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
diff --git a/tools/testing/selftests/powerpc/mce/Makefile b/tools/testing/selftests/powerpc/mce/Makefile
index 2424513982..ce4ed679aa 100644
--- a/tools/testing/selftests/powerpc/mce/Makefile
+++ b/tools/testing/selftests/powerpc/mce/Makefile
@@ -3,5 +3,6 @@
TEST_GEN_PROGS := inject-ra-err
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 4a6608beef..aab058ecb3 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -13,6 +13,7 @@ TEST_GEN_FILES := tempfile
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
index 0785c2e99d..480d8ba94c 100644
--- a/tools/testing/selftests/powerpc/nx-gzip/Makefile
+++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
@@ -1,8 +1,9 @@
-CFLAGS = -O3 -m64 -I./include -I../include
-
TEST_GEN_FILES := gzfht_test gunz_test
TEST_PROGS := nx-gzip-test.sh
include ../../lib.mk
+include ../flags.mk
+
+CFLAGS = -O3 -m64 -I./include -I../include
$(TEST_GEN_FILES): gzip_vas.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/papr_attributes/Makefile b/tools/testing/selftests/powerpc/papr_attributes/Makefile
index e899712d49..4064294990 100644
--- a/tools/testing/selftests/powerpc/papr_attributes/Makefile
+++ b/tools/testing/selftests/powerpc/papr_attributes/Makefile
@@ -3,5 +3,6 @@ TEST_GEN_PROGS := attr_test
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
-$(TEST_GEN_PROGS): ../harness.c ../utils.c \ No newline at end of file
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/Makefile b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
index 7f79e43763..fed4f2414d 100644
--- a/tools/testing/selftests/powerpc/papr_sysparm/Makefile
+++ b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
@@ -6,6 +6,7 @@ TEST_GEN_PROGS := papr_sysparm
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/papr_vpd/Makefile b/tools/testing/selftests/powerpc/papr_vpd/Makefile
index 06b719703b..b09852e408 100644
--- a/tools/testing/selftests/powerpc/papr_vpd/Makefile
+++ b/tools/testing/selftests/powerpc/papr_vpd/Makefile
@@ -6,6 +6,7 @@ TEST_GEN_PROGS := papr_vpd
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index a284fa874a..7e9dbf3d0d 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -7,8 +7,11 @@ EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
-all: $(TEST_GEN_PROGS) ebb sampling_tests event_code_tests
+SUB_DIRS := ebb sampling_tests event_code_tests
+
+all: $(TEST_GEN_PROGS) $(SUB_DIRS)
$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
@@ -22,12 +25,16 @@ $(OUTPUT)/count_stcx_fail: loop.S $(EXTRA_SOURCES)
$(OUTPUT)/per_event_excludes: ../utils.c
+$(SUB_DIRS):
+ BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all
+
DEFAULT_RUN_TESTS := $(RUN_TESTS)
override define RUN_TESTS
$(DEFAULT_RUN_TESTS)
- +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
- +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
- +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
+ +@for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests; \
+ done;
endef
emit_tests:
@@ -35,34 +42,29 @@ emit_tests:
BASENAME_TEST=`basename $$TEST`; \
echo "$(COLLECTION):$$BASENAME_TEST"; \
done
- +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
- +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
- +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
+ +@for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET COLLECTION=$(COLLECTION)/$$TARGET -s -C $$TARGET emit_tests; \
+ done;
DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
override define INSTALL_RULE
$(DEFAULT_INSTALL_RULE)
- +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
- +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
- +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
+ +@for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET INSTALL_PATH=$$INSTALL_PATH/$$TARGET -C $$TARGET install; \
+ done;
endef
DEFAULT_CLEAN := $(CLEAN)
override define CLEAN
$(DEFAULT_CLEAN)
$(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o
- +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
- +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
- +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
+ +@for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \
+ done;
endef
-ebb:
- TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
-
-sampling_tests:
- TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
-
-event_code_tests:
- TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
.PHONY: all run_tests ebb sampling_tests event_code_tests emit_tests
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index 0101606902..1b39af7c10 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -4,16 +4,6 @@ include ../../../../../build/Build.include
noarg:
$(MAKE) -C ../../
-# The EBB handler is 64-bit code and everything links against it
-CFLAGS += -m64
-
-TMPOUT = $(OUTPUT)/TMPDIR/
-# Toolchains may build PIE by default which breaks the assembly
-no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
- $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
-
-LDFLAGS += $(no-pie-option)
-
TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \
cycles_with_freeze_test pmc56_overflow_test \
ebb_vs_cpu_event_test cpu_event_vs_ebb_test \
@@ -28,6 +18,17 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \
top_srcdir = ../../../../../..
include ../../../lib.mk
+include ../../flags.mk
+
+# The EBB handler is 64-bit code and everything links against it
+CFLAGS += -m64
+
+TMPOUT = $(OUTPUT)/TMPDIR/
+# Toolchains may build PIE by default which breaks the assembly
+no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
+ $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
+
+LDFLAGS += $(no-pie-option)
$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \
ebb.c ebb_handler.S trace.c busy_loop.S
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile
index 4e07d70464..fdb080b3fa 100644
--- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile
@@ -1,6 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -m64
-
TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \
group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \
group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \
@@ -11,5 +9,8 @@ TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_te
top_srcdir = ../../../../../..
include ../../../lib.mk
+include ../../flags.mk
+
+CFLAGS += -m64
$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c ../sampling_tests/misc.h ../sampling_tests/misc.c
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
index 9e67351fb2..9f79bec5fc 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -1,6 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -m64
-
TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \
mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \
mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \
@@ -11,5 +9,8 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test
top_srcdir = ../../../../../..
include ../../../lib.mk
+include ../../flags.mk
+
+CFLAGS += -m64
$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S ../branch_loops.S
diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile
index 9b9491a632..23bd9a7590 100644
--- a/tools/testing/selftests/powerpc/primitives/Makefile
+++ b/tools/testing/selftests/powerpc/primitives/Makefile
@@ -1,9 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -I$(CURDIR)
-
TEST_GEN_PROGS := load_unaligned_zeropad
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+CFLAGS += -I$(CURDIR)
$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 1b39b86849..59ca01d856 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -26,6 +26,7 @@ LOCAL_HDRS += $(patsubst %,$(selfdir)/powerpc/ptrace/%,$(wildcard *.h))
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
TM_TESTS := $(patsubst %,$(OUTPUT)/%,$(TM_TESTS))
TESTS_64 := $(patsubst %,$(OUTPUT)/%,$(TESTS_64))
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
index e0d979ab02..3328603972 100644
--- a/tools/testing/selftests/powerpc/security/Makefile
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -5,9 +5,10 @@ TEST_PROGS := mitigation-patching.sh
top_srcdir = ../../../../..
-CFLAGS += $(KHDR_INCLUDES)
-
include ../../lib.mk
+include ../flags.mk
+
+CFLAGS += $(KHDR_INCLUDES)
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index f679d260af..ece95bd52b 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -3,7 +3,6 @@ TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart
TEST_GEN_PROGS += sigreturn_kernel
TEST_GEN_PROGS += sigreturn_unaligned
-CFLAGS += -maltivec
$(OUTPUT)/signal_tm: CFLAGS += -mhtm
$(OUTPUT)/sigfuz: CFLAGS += -pthread -m64
@@ -11,5 +10,8 @@ TEST_FILES := settings
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+CFLAGS += -maltivec
$(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
index 9c39f55a58..4c9d9a58c9 100644
--- a/tools/testing/selftests/powerpc/stringloops/Makefile
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -1,7 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-# The loops are all 64-bit code
-CFLAGS += -I$(CURDIR)
-
EXTRA_SOURCES := ../harness.c
build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null 2>&1) then echo "1"; fi)
@@ -27,9 +24,13 @@ $(OUTPUT)/strlen_32: CFLAGS += -m32
TEST_GEN_PROGS += strlen_32
endif
-ASFLAGS = $(CFLAGS)
-
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+# The loops are all 64-bit code
+CFLAGS += -I$(CURDIR)
+
+ASFLAGS = $(CFLAGS)
$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
index bdc081afed..0da2e0a742 100644
--- a/tools/testing/selftests/powerpc/switch_endian/Makefile
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -1,12 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
TEST_GEN_PROGS := switch_endian_test
-ASFLAGS += -O2 -Wall -g -nostdlib -m64
-
EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+ASFLAGS += -O2 -Wall -g -nostdlib -m64
$(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT)
$(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S
diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile
index ee1740ddfb..3bc07af88f 100644
--- a/tools/testing/selftests/powerpc/syscalls/Makefile
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -1,9 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
TEST_GEN_PROGS := ipc_unmuxed rtas_filter
-CFLAGS += $(KHDR_INCLUDES)
-
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+CFLAGS += $(KHDR_INCLUDES)
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 3876805c2f..f13f0ab360 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -11,6 +11,7 @@ TEST_FILES := settings
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile
index cf65cbf330..61d519a076 100644
--- a/tools/testing/selftests/powerpc/vphn/Makefile
+++ b/tools/testing/selftests/powerpc/vphn/Makefile
@@ -1,10 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
TEST_GEN_PROGS := test-vphn
-CFLAGS += -m64 -I$(CURDIR)
-
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+CFLAGS += -m64 -I$(CURDIR)
$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index bbac5f4b03..990d24696f 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -391,7 +391,7 @@ __EOF__
forceflavor="`echo $flavor | sed -e 's/^CONFIG/CONFIG_FORCE/'`"
deselectedflavors="`grep -v $flavor $T/rcutasksflavors | tr '\012' ' ' | tr -s ' ' | sed -e 's/ *$//'`"
echo " --- Running RCU Tasks Trace flavor $flavor `date`" >> $rtfdir/log
- tools/testing/selftests/rcutorture/bin/kvm.sh --datestamp "$ds/results-rcutasksflavors/$flavor" --buildonly --configs "TINY01 TREE04" --kconfig "CONFIG_RCU_EXPERT=y CONFIG_RCU_SCALE_TEST=y $forceflavor=y $deselectedflavors" --trust-make > $T/$flavor.out 2>&1
+ tools/testing/selftests/rcutorture/bin/kvm.sh --datestamp "$ds/results-rcutasksflavors/$flavor" --buildonly --configs "TINY01 TREE04" --kconfig "CONFIG_RCU_EXPERT=y CONFIG_RCU_SCALE_TEST=y CONFIG_KPROBES=n CONFIG_RCU_TRACE=n CONFIG_TRACING=n CONFIG_BLK_DEV_IO_TRACE=n CONFIG_UPROBE_EVENTS=n $forceflavor=y $deselectedflavors" --trust-make > $T/$flavor.out 2>&1
retcode=$?
if test "$retcode" -ne 0
then
@@ -425,7 +425,7 @@ fi
if test "$do_scftorture" = "yes"
then
# Scale memory based on the number of CPUs.
- scfmem=$((2+HALF_ALLOTED_CPUS/16))
+ scfmem=$((3+HALF_ALLOTED_CPUS/16))
torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1"
torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory ${scfmem}G --trust-make
fi
@@ -559,7 +559,7 @@ do_kcsan="$do_kcsan_save"
if test "$do_kvfree" = "yes"
then
torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot"
- torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
+ torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration $duration_rcutorture --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
fi
if test "$do_clocksourcewd" = "yes"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
index fc45645bb5..9ecd1b4e65 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
@@ -10,8 +10,9 @@ CONFIG_NO_HZ_FULL=n
CONFIG_RCU_TRACE=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_BOOST=n
+CONFIG_RCU_BOOST=y
+CONFIG_RCU_BOOST_DELAY=100
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
-#CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_RCU_EXPERT=y
CONFIG_KPROBES=n
CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index 178a41d4bb..55315ed695 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -128,7 +128,7 @@ static int check_results(struct resctrl_val_param *param, const char *cache_type
return fail;
}
-void cat_test_cleanup(void)
+static void cat_test_cleanup(void)
{
remove(RESULT_FILE_NAME);
}
@@ -284,13 +284,10 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param
ret = cat_test(test, uparams, &param, span, start_mask);
if (ret)
- goto out;
+ return ret;
ret = check_results(&param, test->resource,
cache_total_size, full_cache_mask, start_mask);
-out:
- cat_test_cleanup();
-
return ret;
}
@@ -385,6 +382,7 @@ struct resctrl_test l3_cat_test = {
.resource = "L3",
.feature_check = test_resource_feature_check,
.run_test = cat_run_test,
+ .cleanup = cat_test_cleanup,
};
struct resctrl_test l3_noncont_cat_test = {
diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index a81f91222a..0105afec61 100644
--- a/tools/testing/selftests/resctrl/cmt_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -40,11 +40,11 @@ static int show_results_info(unsigned long sum_llc_val, int no_of_bits,
int ret;
avg_llc_val = sum_llc_val / num_of_runs;
- avg_diff = (long)abs(cache_span - avg_llc_val);
+ avg_diff = (long)(cache_span - avg_llc_val);
diff_percent = ((float)cache_span - avg_llc_val) / cache_span * 100;
ret = platform && abs((int)diff_percent) > max_diff_percent &&
- abs(avg_diff) > max_diff;
+ labs(avg_diff) > max_diff;
ksft_print_msg("%s Check cache miss rate within %lu%%\n",
ret ? "Fail:" : "Pass:", max_diff_percent);
@@ -91,7 +91,7 @@ static int check_results(struct resctrl_val_param *param, size_t span, int no_of
MAX_DIFF, MAX_DIFF_PERCENT, runs - 1, true);
}
-void cmt_test_cleanup(void)
+static void cmt_test_cleanup(void)
{
remove(RESULT_FILE_NAME);
}
@@ -161,7 +161,6 @@ static int cmt_run_test(const struct resctrl_test *test, const struct user_param
ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
out:
- cmt_test_cleanup();
free(span_str);
return ret;
@@ -178,4 +177,5 @@ struct resctrl_test cmt_test = {
.resource = "L3",
.feature_check = cmt_feature_check,
.run_test = cmt_run_test,
+ .cleanup = cmt_test_cleanup,
};
diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c
index 7946e32e85..a6ad39aae1 100644
--- a/tools/testing/selftests/resctrl/mba_test.c
+++ b/tools/testing/selftests/resctrl/mba_test.c
@@ -60,8 +60,8 @@ static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc)
/* Memory bandwidth from 100% down to 10% */
for (allocation = 0; allocation < ALLOCATION_MAX / ALLOCATION_STEP;
allocation++) {
- unsigned long avg_bw_imc, avg_bw_resc;
unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
+ long avg_bw_imc, avg_bw_resc;
int avg_diff_per;
float avg_diff;
@@ -137,7 +137,7 @@ static int check_results(void)
return show_mba_info(bw_imc, bw_resc);
}
-void mba_test_cleanup(void)
+static void mba_test_cleanup(void)
{
remove(RESULT_FILE_NAME);
}
@@ -158,13 +158,10 @@ static int mba_run_test(const struct resctrl_test *test, const struct user_param
ret = resctrl_val(test, uparams, uparams->benchmark_cmd, &param);
if (ret)
- goto out;
+ return ret;
ret = check_results();
-out:
- mba_test_cleanup();
-
return ret;
}
@@ -180,4 +177,5 @@ struct resctrl_test mba_test = {
.vendor_specific = ARCH_INTEL,
.feature_check = mba_feature_check,
.run_test = mba_run_test,
+ .cleanup = mba_test_cleanup,
};
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
index d67ffa3ec6..6fec51e1ff 100644
--- a/tools/testing/selftests/resctrl/mbm_test.c
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -17,8 +17,8 @@
static int
show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, size_t span)
{
- unsigned long avg_bw_imc = 0, avg_bw_resc = 0;
unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
+ long avg_bw_imc = 0, avg_bw_resc = 0;
int runs, ret, avg_diff_per;
float avg_diff = 0;
@@ -105,7 +105,7 @@ static int mbm_setup(const struct resctrl_test *test,
return ret;
}
-void mbm_test_cleanup(void)
+static void mbm_test_cleanup(void)
{
remove(RESULT_FILE_NAME);
}
@@ -126,15 +126,12 @@ static int mbm_run_test(const struct resctrl_test *test, const struct user_param
ret = resctrl_val(test, uparams, uparams->benchmark_cmd, &param);
if (ret)
- goto out;
+ return ret;
ret = check_results(DEFAULT_SPAN);
if (ret && (get_vendor() == ARCH_INTEL))
ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
-out:
- mbm_test_cleanup();
-
return ret;
}
@@ -150,4 +147,5 @@ struct resctrl_test mbm_test = {
.vendor_specific = ARCH_INTEL,
.feature_check = mbm_feature_check,
.run_test = mbm_run_test,
+ .cleanup = mbm_test_cleanup,
};
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 2051bd135e..00d51fa753 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -72,6 +72,7 @@ struct user_params {
* @disabled: Test is disabled
* @feature_check: Callback to check required resctrl features
* @run_test: Callback to run the test
+ * @cleanup: Callback to cleanup after the test
*/
struct resctrl_test {
const char *name;
@@ -82,6 +83,7 @@ struct resctrl_test {
bool (*feature_check)(const struct resctrl_test *test);
int (*run_test)(const struct resctrl_test *test,
const struct user_params *uparams);
+ void (*cleanup)(void);
};
/*
@@ -156,9 +158,6 @@ int resctrl_val(const struct resctrl_test *test,
const struct user_params *uparams,
const char * const *benchmark_cmd,
struct resctrl_val_param *param);
-void tests_cleanup(void);
-void mbm_test_cleanup(void);
-void mba_test_cleanup(void);
unsigned long create_bit_mask(unsigned int start, unsigned int len);
unsigned int count_contiguous_bits(unsigned long val, unsigned int *start);
int get_full_cbm(const char *cache_type, unsigned long *mask);
@@ -166,11 +165,9 @@ int get_mask_no_shareable(const char *cache_type, unsigned long *mask);
int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size);
int resource_info_unsigned_get(const char *resource, const char *filename, unsigned int *val);
void ctrlc_handler(int signum, siginfo_t *info, void *ptr);
-int signal_handler_register(void);
+int signal_handler_register(const struct resctrl_test *test);
void signal_handler_unregister(void);
-void cat_test_cleanup(void);
unsigned int count_bits(unsigned long n);
-void cmt_test_cleanup(void);
void perf_event_attr_initialize(struct perf_event_attr *pea, __u64 config);
void perf_event_initialize_read_format(struct perf_event_read *pe_read);
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index f3dc1b9696..ecbb7605a9 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -81,19 +81,11 @@ static void cmd_help(void)
printf("\t-h: help\n");
}
-void tests_cleanup(void)
-{
- mbm_test_cleanup();
- mba_test_cleanup();
- cmt_test_cleanup();
- cat_test_cleanup();
-}
-
-static int test_prepare(void)
+static int test_prepare(const struct resctrl_test *test)
{
int res;
- res = signal_handler_register();
+ res = signal_handler_register(test);
if (res) {
ksft_print_msg("Failed to register signal handler\n");
return res;
@@ -108,8 +100,10 @@ static int test_prepare(void)
return 0;
}
-static void test_cleanup(void)
+static void test_cleanup(const struct resctrl_test *test)
{
+ if (test->cleanup)
+ test->cleanup();
umount_resctrlfs();
signal_handler_unregister();
}
@@ -136,7 +130,7 @@ static void run_single_test(const struct resctrl_test *test, const struct user_p
ksft_print_msg("Starting %s test ...\n", test->name);
- if (test_prepare()) {
+ if (test_prepare(test)) {
ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
return;
}
@@ -151,7 +145,7 @@ static void run_single_test(const struct resctrl_test *test, const struct user_p
ksft_test_result(!ret, "%s: test\n", test->name);
cleanup:
- test_cleanup();
+ test_cleanup(test);
}
static void init_user_params(struct user_params *uparams)
@@ -253,13 +247,13 @@ last_arg:
* 2. We execute perf commands
*/
if (geteuid() != 0)
- return ksft_exit_skip("Not running as root. Skipping...\n");
+ ksft_exit_skip("Not running as root. Skipping...\n");
if (!check_resctrlfs_support())
- return ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n");
+ ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n");
if (umount_resctrlfs())
- return ksft_exit_skip("resctrl FS unmount failed.\n");
+ ksft_exit_skip("resctrl FS unmount failed.\n");
filter_dmesg();
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
index 5a49f07a6c..f55f5989de 100644
--- a/tools/testing/selftests/resctrl/resctrl_val.c
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -62,6 +62,7 @@ struct imc_counter_config {
static char mbm_total_path[1024];
static int imcs;
static struct imc_counter_config imc_counters_config[MAX_IMCS][2];
+static const struct resctrl_test *current_test;
void membw_initialize_perf_event_attr(int i, int j)
{
@@ -292,6 +293,18 @@ static int initialize_mem_bw_imc(void)
return 0;
}
+static void perf_close_imc_mem_bw(void)
+{
+ int mc;
+
+ for (mc = 0; mc < imcs; mc++) {
+ if (imc_counters_config[mc][READ].fd != -1)
+ close(imc_counters_config[mc][READ].fd);
+ if (imc_counters_config[mc][WRITE].fd != -1)
+ close(imc_counters_config[mc][WRITE].fd);
+ }
+}
+
/*
* get_mem_bw_imc: Memory band width as reported by iMC counters
* @cpu_no: CPU number that the benchmark PID is binded to
@@ -305,26 +318,33 @@ static int initialize_mem_bw_imc(void)
static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
{
float reads, writes, of_mul_read, of_mul_write;
- int imc, j, ret;
+ int imc, ret;
+
+ for (imc = 0; imc < imcs; imc++) {
+ imc_counters_config[imc][READ].fd = -1;
+ imc_counters_config[imc][WRITE].fd = -1;
+ }
/* Start all iMC counters to log values (both read and write) */
reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
for (imc = 0; imc < imcs; imc++) {
- for (j = 0; j < 2; j++) {
- ret = open_perf_event(imc, cpu_no, j);
- if (ret)
- return -1;
- }
- for (j = 0; j < 2; j++)
- membw_ioctl_perf_event_ioc_reset_enable(imc, j);
+ ret = open_perf_event(imc, cpu_no, READ);
+ if (ret)
+ goto close_fds;
+ ret = open_perf_event(imc, cpu_no, WRITE);
+ if (ret)
+ goto close_fds;
+
+ membw_ioctl_perf_event_ioc_reset_enable(imc, READ);
+ membw_ioctl_perf_event_ioc_reset_enable(imc, WRITE);
}
sleep(1);
/* Stop counters after a second to get results (both read and write) */
for (imc = 0; imc < imcs; imc++) {
- for (j = 0; j < 2; j++)
- membw_ioctl_perf_event_ioc_disable(imc, j);
+ membw_ioctl_perf_event_ioc_disable(imc, READ);
+ membw_ioctl_perf_event_ioc_disable(imc, WRITE);
}
/*
@@ -340,15 +360,13 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
if (read(r->fd, &r->return_value,
sizeof(struct membw_read_format)) == -1) {
ksft_perror("Couldn't get read b/w through iMC");
-
- return -1;
+ goto close_fds;
}
if (read(w->fd, &w->return_value,
sizeof(struct membw_read_format)) == -1) {
ksft_perror("Couldn't get write bw through iMC");
-
- return -1;
+ goto close_fds;
}
__u64 r_time_enabled = r->return_value.time_enabled;
@@ -368,10 +386,7 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
writes += w->return_value.value * of_mul_write * SCALE;
}
- for (imc = 0; imc < imcs; imc++) {
- close(imc_counters_config[imc][READ].fd);
- close(imc_counters_config[imc][WRITE].fd);
- }
+ perf_close_imc_mem_bw();
if (strcmp(bw_report, "reads") == 0) {
*bw_imc = reads;
@@ -385,6 +400,10 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
*bw_imc = reads + writes;
return 0;
+
+close_fds:
+ perf_close_imc_mem_bw();
+ return -1;
}
void set_mbm_path(const char *ctrlgrp, const char *mongrp, int domain_id)
@@ -472,7 +491,8 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
if (bm_pid)
kill(bm_pid, SIGKILL);
umount_resctrlfs();
- tests_cleanup();
+ if (current_test && current_test->cleanup)
+ current_test->cleanup();
ksft_print_msg("Ending\n\n");
exit(EXIT_SUCCESS);
@@ -482,13 +502,14 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
* Register CTRL-C handler for parent, as it has to kill
* child process before exiting.
*/
-int signal_handler_register(void)
+int signal_handler_register(const struct resctrl_test *test)
{
struct sigaction sigact = {};
int ret = 0;
bm_pid = 0;
+ current_test = test;
sigact.sa_sigaction = ctrlc_handler;
sigemptyset(&sigact.sa_mask);
sigact.sa_flags = SA_SIGINFO;
@@ -510,6 +531,7 @@ void signal_handler_unregister(void)
{
struct sigaction sigact = {};
+ current_test = NULL;
sigact.sa_handler = SIG_DFL;
sigemptyset(&sigact.sa_mask);
if (sigaction(SIGINT, &sigact, NULL) ||
diff --git a/tools/testing/selftests/ring-buffer/.gitignore b/tools/testing/selftests/ring-buffer/.gitignore
new file mode 100644
index 0000000000..3aed1a2a6c
--- /dev/null
+++ b/tools/testing/selftests/ring-buffer/.gitignore
@@ -0,0 +1 @@
+map_test
diff --git a/tools/testing/selftests/ring-buffer/Makefile b/tools/testing/selftests/ring-buffer/Makefile
new file mode 100644
index 0000000000..627c5fa6d1
--- /dev/null
+++ b/tools/testing/selftests/ring-buffer/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wl,-no-as-needed -Wall
+CFLAGS += $(KHDR_INCLUDES)
+CFLAGS += -D_GNU_SOURCE
+
+TEST_GEN_PROGS = map_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/ring-buffer/config b/tools/testing/selftests/ring-buffer/config
new file mode 100644
index 0000000000..d936f8f00e
--- /dev/null
+++ b/tools/testing/selftests/ring-buffer/config
@@ -0,0 +1,2 @@
+CONFIG_FTRACE=y
+CONFIG_TRACER_SNAPSHOT=y
diff --git a/tools/testing/selftests/ring-buffer/map_test.c b/tools/testing/selftests/ring-buffer/map_test.c
new file mode 100644
index 0000000000..a9006fa709
--- /dev/null
+++ b/tools/testing/selftests/ring-buffer/map_test.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ring-buffer memory mapping tests
+ *
+ * Copyright (c) 2024 Vincent Donnefort <vdonnefort@google.com>
+ */
+#include <fcntl.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/trace_mmap.h>
+
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+
+#include "../user_events/user_events_selftests.h" /* share tracefs setup */
+#include "../kselftest_harness.h"
+
+#define TRACEFS_ROOT "/sys/kernel/tracing"
+
+static int __tracefs_write(const char *path, const char *value)
+{
+ int fd, ret;
+
+ fd = open(path, O_WRONLY | O_TRUNC);
+ if (fd < 0)
+ return fd;
+
+ ret = write(fd, value, strlen(value));
+
+ close(fd);
+
+ return ret == -1 ? -errno : 0;
+}
+
+static int __tracefs_write_int(const char *path, int value)
+{
+ char *str;
+ int ret;
+
+ if (asprintf(&str, "%d", value) < 0)
+ return -1;
+
+ ret = __tracefs_write(path, str);
+
+ free(str);
+
+ return ret;
+}
+
+#define tracefs_write_int(path, value) \
+ ASSERT_EQ(__tracefs_write_int((path), (value)), 0)
+
+#define tracefs_write(path, value) \
+ ASSERT_EQ(__tracefs_write((path), (value)), 0)
+
+static int tracefs_reset(void)
+{
+ if (__tracefs_write_int(TRACEFS_ROOT"/tracing_on", 0))
+ return -1;
+ if (__tracefs_write(TRACEFS_ROOT"/trace", ""))
+ return -1;
+ if (__tracefs_write(TRACEFS_ROOT"/set_event", ""))
+ return -1;
+ if (__tracefs_write(TRACEFS_ROOT"/current_tracer", "nop"))
+ return -1;
+
+ return 0;
+}
+
+struct tracefs_cpu_map_desc {
+ struct trace_buffer_meta *meta;
+ int cpu_fd;
+};
+
+int tracefs_cpu_map(struct tracefs_cpu_map_desc *desc, int cpu)
+{
+ int page_size = getpagesize();
+ char *cpu_path;
+ void *map;
+
+ if (asprintf(&cpu_path,
+ TRACEFS_ROOT"/per_cpu/cpu%d/trace_pipe_raw",
+ cpu) < 0)
+ return -ENOMEM;
+
+ desc->cpu_fd = open(cpu_path, O_RDONLY | O_NONBLOCK);
+ free(cpu_path);
+ if (desc->cpu_fd < 0)
+ return -ENODEV;
+
+ map = mmap(NULL, page_size, PROT_READ, MAP_SHARED, desc->cpu_fd, 0);
+ if (map == MAP_FAILED)
+ return -errno;
+
+ desc->meta = (struct trace_buffer_meta *)map;
+
+ return 0;
+}
+
+void tracefs_cpu_unmap(struct tracefs_cpu_map_desc *desc)
+{
+ munmap(desc->meta, desc->meta->meta_page_size);
+ close(desc->cpu_fd);
+}
+
+FIXTURE(map) {
+ struct tracefs_cpu_map_desc map_desc;
+ bool umount;
+};
+
+FIXTURE_VARIANT(map) {
+ int subbuf_size;
+};
+
+FIXTURE_VARIANT_ADD(map, subbuf_size_4k) {
+ .subbuf_size = 4,
+};
+
+FIXTURE_VARIANT_ADD(map, subbuf_size_8k) {
+ .subbuf_size = 8,
+};
+
+FIXTURE_SETUP(map)
+{
+ int cpu = sched_getcpu();
+ cpu_set_t cpu_mask;
+ bool fail, umount;
+ char *message;
+
+ if (getuid() != 0)
+ SKIP(return, "Skipping: %s", "Please run the test as root");
+
+ if (!tracefs_enabled(&message, &fail, &umount)) {
+ if (fail) {
+ TH_LOG("Tracefs setup failed: %s", message);
+ ASSERT_FALSE(fail);
+ }
+ SKIP(return, "Skipping: %s", message);
+ }
+
+ self->umount = umount;
+
+ ASSERT_GE(cpu, 0);
+
+ ASSERT_EQ(tracefs_reset(), 0);
+
+ tracefs_write_int(TRACEFS_ROOT"/buffer_subbuf_size_kb", variant->subbuf_size);
+
+ ASSERT_EQ(tracefs_cpu_map(&self->map_desc, cpu), 0);
+
+ /*
+ * Ensure generated events will be found on this very same ring-buffer.
+ */
+ CPU_ZERO(&cpu_mask);
+ CPU_SET(cpu, &cpu_mask);
+ ASSERT_EQ(sched_setaffinity(0, sizeof(cpu_mask), &cpu_mask), 0);
+}
+
+FIXTURE_TEARDOWN(map)
+{
+ tracefs_reset();
+
+ if (self->umount)
+ tracefs_unmount();
+
+ tracefs_cpu_unmap(&self->map_desc);
+}
+
+TEST_F(map, meta_page_check)
+{
+ struct tracefs_cpu_map_desc *desc = &self->map_desc;
+ int cnt = 0;
+
+ ASSERT_EQ(desc->meta->entries, 0);
+ ASSERT_EQ(desc->meta->overrun, 0);
+ ASSERT_EQ(desc->meta->read, 0);
+
+ ASSERT_EQ(desc->meta->reader.id, 0);
+ ASSERT_EQ(desc->meta->reader.read, 0);
+
+ ASSERT_EQ(ioctl(desc->cpu_fd, TRACE_MMAP_IOCTL_GET_READER), 0);
+ ASSERT_EQ(desc->meta->reader.id, 0);
+
+ tracefs_write_int(TRACEFS_ROOT"/tracing_on", 1);
+ for (int i = 0; i < 16; i++)
+ tracefs_write_int(TRACEFS_ROOT"/trace_marker", i);
+again:
+ ASSERT_EQ(ioctl(desc->cpu_fd, TRACE_MMAP_IOCTL_GET_READER), 0);
+
+ ASSERT_EQ(desc->meta->entries, 16);
+ ASSERT_EQ(desc->meta->overrun, 0);
+ ASSERT_EQ(desc->meta->read, 16);
+
+ ASSERT_EQ(desc->meta->reader.id, 1);
+
+ if (!(cnt++))
+ goto again;
+}
+
+TEST_F(map, data_mmap)
+{
+ struct tracefs_cpu_map_desc *desc = &self->map_desc;
+ unsigned long meta_len, data_len;
+ void *data;
+
+ meta_len = desc->meta->meta_page_size;
+ data_len = desc->meta->subbuf_size * desc->meta->nr_subbufs;
+
+ /* Map all the available subbufs */
+ data = mmap(NULL, data_len, PROT_READ, MAP_SHARED,
+ desc->cpu_fd, meta_len);
+ ASSERT_NE(data, MAP_FAILED);
+ munmap(data, data_len);
+
+ /* Map all the available subbufs - 1 */
+ data_len -= desc->meta->subbuf_size;
+ data = mmap(NULL, data_len, PROT_READ, MAP_SHARED,
+ desc->cpu_fd, meta_len);
+ ASSERT_NE(data, MAP_FAILED);
+ munmap(data, data_len);
+
+ /* Overflow the available subbufs by 1 */
+ meta_len += desc->meta->subbuf_size * 2;
+ data = mmap(NULL, data_len, PROT_READ, MAP_SHARED,
+ desc->cpu_fd, meta_len);
+ ASSERT_EQ(data, MAP_FAILED);
+}
+
+FIXTURE(snapshot) {
+ bool umount;
+};
+
+FIXTURE_SETUP(snapshot)
+{
+ bool fail, umount;
+ struct stat sb;
+ char *message;
+
+ if (getuid() != 0)
+ SKIP(return, "Skipping: %s", "Please run the test as root");
+
+ if (stat(TRACEFS_ROOT"/snapshot", &sb))
+ SKIP(return, "Skipping: %s", "snapshot not available");
+
+ if (!tracefs_enabled(&message, &fail, &umount)) {
+ if (fail) {
+ TH_LOG("Tracefs setup failed: %s", message);
+ ASSERT_FALSE(fail);
+ }
+ SKIP(return, "Skipping: %s", message);
+ }
+
+ self->umount = umount;
+}
+
+FIXTURE_TEARDOWN(snapshot)
+{
+ __tracefs_write(TRACEFS_ROOT"/events/sched/sched_switch/trigger",
+ "!snapshot");
+ tracefs_reset();
+
+ if (self->umount)
+ tracefs_unmount();
+}
+
+TEST_F(snapshot, excludes_map)
+{
+ struct tracefs_cpu_map_desc map_desc;
+ int cpu = sched_getcpu();
+
+ ASSERT_GE(cpu, 0);
+ tracefs_write(TRACEFS_ROOT"/events/sched/sched_switch/trigger",
+ "snapshot");
+ ASSERT_EQ(tracefs_cpu_map(&map_desc, cpu), -EBUSY);
+}
+
+TEST_F(snapshot, excluded_by_map)
+{
+ struct tracefs_cpu_map_desc map_desc;
+ int cpu = sched_getcpu();
+
+ ASSERT_EQ(tracefs_cpu_map(&map_desc, cpu), 0);
+
+ ASSERT_EQ(__tracefs_write(TRACEFS_ROOT"/events/sched/sched_switch/trigger",
+ "snapshot"), -EBUSY);
+ ASSERT_EQ(__tracefs_write(TRACEFS_ROOT"/snapshot",
+ "1"), -EBUSY);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile
index 4a9ff515a3..7ce03d832b 100644
--- a/tools/testing/selftests/riscv/Makefile
+++ b/tools/testing/selftests/riscv/Makefile
@@ -5,7 +5,7 @@
ARCH ?= $(shell uname -m 2>/dev/null || echo not)
ifneq (,$(filter $(ARCH),riscv))
-RISCV_SUBTARGETS ?= hwprobe vector mm
+RISCV_SUBTARGETS ?= hwprobe vector mm sigreturn
else
RISCV_SUBTARGETS :=
endif
diff --git a/tools/testing/selftests/riscv/sigreturn/.gitignore b/tools/testing/selftests/riscv/sigreturn/.gitignore
new file mode 100644
index 0000000000..35002b8ae7
--- /dev/null
+++ b/tools/testing/selftests/riscv/sigreturn/.gitignore
@@ -0,0 +1 @@
+sigreturn
diff --git a/tools/testing/selftests/riscv/sigreturn/Makefile b/tools/testing/selftests/riscv/sigreturn/Makefile
new file mode 100644
index 0000000000..eb8bac9279
--- /dev/null
+++ b/tools/testing/selftests/riscv/sigreturn/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2021 ARM Limited
+# Originally tools/testing/arm64/abi/Makefile
+
+CFLAGS += -I$(top_srcdir)/tools/include
+
+TEST_GEN_PROGS := sigreturn
+
+include ../../lib.mk
+
+$(OUTPUT)/sigreturn: sigreturn.c
+ $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/sigreturn/sigreturn.c b/tools/testing/selftests/riscv/sigreturn/sigreturn.c
new file mode 100644
index 0000000000..ed351a1cb9
--- /dev/null
+++ b/tools/testing/selftests/riscv/sigreturn/sigreturn.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <linux/ptrace.h>
+#include "../../kselftest_harness.h"
+
+#define RISCV_V_MAGIC 0x53465457
+#define DEFAULT_VALUE 2
+#define SIGNAL_HANDLER_OVERRIDE 3
+
+static void simple_handle(int sig_no, siginfo_t *info, void *vcontext)
+{
+ ucontext_t *context = vcontext;
+
+ context->uc_mcontext.__gregs[REG_PC] = context->uc_mcontext.__gregs[REG_PC] + 4;
+}
+
+static void vector_override(int sig_no, siginfo_t *info, void *vcontext)
+{
+ ucontext_t *context = vcontext;
+
+ // vector state
+ struct __riscv_extra_ext_header *ext;
+ struct __riscv_v_ext_state *v_ext_state;
+
+ /* Find the vector context. */
+ ext = (void *)(&context->uc_mcontext.__fpregs);
+ if (ext->hdr.magic != RISCV_V_MAGIC) {
+ fprintf(stderr, "bad vector magic: %x\n", ext->hdr.magic);
+ abort();
+ }
+
+ v_ext_state = (void *)((char *)(ext) + sizeof(*ext));
+
+ *(int *)v_ext_state->datap = SIGNAL_HANDLER_OVERRIDE;
+
+ context->uc_mcontext.__gregs[REG_PC] = context->uc_mcontext.__gregs[REG_PC] + 4;
+}
+
+static int vector_sigreturn(int data, void (*handler)(int, siginfo_t *, void *))
+{
+ int after_sigreturn;
+ struct sigaction sig_action = {
+ .sa_sigaction = handler,
+ .sa_flags = SA_SIGINFO
+ };
+
+ sigaction(SIGSEGV, &sig_action, 0);
+
+ asm(".option push \n\
+ .option arch, +v \n\
+ vsetivli x0, 1, e32, m1, ta, ma \n\
+ vmv.s.x v0, %1 \n\
+ # Generate SIGSEGV \n\
+ lw a0, 0(x0) \n\
+ vmv.x.s %0, v0 \n\
+ .option pop" : "=r" (after_sigreturn) : "r" (data));
+
+ return after_sigreturn;
+}
+
+TEST(vector_restore)
+{
+ int result;
+
+ result = vector_sigreturn(DEFAULT_VALUE, &simple_handle);
+
+ EXPECT_EQ(DEFAULT_VALUE, result);
+}
+
+TEST(vector_restore_signal_handler_override)
+{
+ int result;
+
+ result = vector_sigreturn(DEFAULT_VALUE, &vector_override);
+
+ EXPECT_EQ(SIGNAL_HANDLER_OVERRIDE, result);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index b83099160f..94886c82ae 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -194,14 +194,14 @@ int main(int argc, char *argv[])
ksft_set_plan(7);
ksft_print_msg("Running on:\n");
- ksft_print_msg("");
+ ksft_print_msg("%s", "");
system("uname -a");
ksft_print_msg("Current BPF sysctl settings:\n");
/* Avoid using "sysctl" which may not be installed. */
- ksft_print_msg("");
+ ksft_print_msg("%s", "");
system("grep -H . /proc/sys/net/core/bpf_jit_enable");
- ksft_print_msg("");
+ ksft_print_msg("%s", "");
system("grep -H . /proc/sys/net/core/bpf_jit_harden");
affinity();
diff --git a/tools/testing/selftests/sigaltstack/current_stack_pointer.h b/tools/testing/selftests/sigaltstack/current_stack_pointer.h
index ea9bdf3a90..09da8f1011 100644
--- a/tools/testing/selftests/sigaltstack/current_stack_pointer.h
+++ b/tools/testing/selftests/sigaltstack/current_stack_pointer.h
@@ -8,7 +8,7 @@ register unsigned long sp asm("sp");
register unsigned long sp asm("esp");
#elif __loongarch64
register unsigned long sp asm("$sp");
-#elif __ppc__
+#elif __powerpc__
register unsigned long sp asm("r1");
#elif __s390x__
register unsigned long sp asm("%15");
diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c
index 414a617db9..93db5aa246 100644
--- a/tools/testing/selftests/sync/sync_test.c
+++ b/tools/testing/selftests/sync/sync_test.c
@@ -109,6 +109,5 @@ int main(void)
ksft_exit_fail_msg("%d out of %d sync tests failed\n",
err, ksft_test_num());
- /* need this return to keep gcc happy */
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c
index 47e05fdc32..205b76a4ab 100644
--- a/tools/testing/selftests/timers/adjtick.c
+++ b/tools/testing/selftests/timers/adjtick.c
@@ -205,7 +205,7 @@ int main(int argc, char **argv)
adjtimex(&tx1);
if (err)
- return ksft_exit_fail();
+ ksft_exit_fail();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
index 4332b49410..ad52e608b8 100644
--- a/tools/testing/selftests/timers/alarmtimer-suspend.c
+++ b/tools/testing/selftests/timers/alarmtimer-suspend.c
@@ -173,6 +173,6 @@ int main(void)
timer_delete(tm1);
}
if (final_ret)
- return ksft_exit_fail();
- return ksft_exit_pass();
+ ksft_exit_fail();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c
index 992a77f2a7..4421cd562c 100644
--- a/tools/testing/selftests/timers/change_skew.c
+++ b/tools/testing/selftests/timers/change_skew.c
@@ -89,8 +89,8 @@ int main(int argc, char **argv)
if (ret) {
printf("[FAIL]");
- return ksft_exit_fail();
+ ksft_exit_fail();
}
printf("[OK]");
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/freq-step.c b/tools/testing/selftests/timers/freq-step.c
index 4b76450d78..73b636f89f 100644
--- a/tools/testing/selftests/timers/freq-step.c
+++ b/tools/testing/selftests/timers/freq-step.c
@@ -257,7 +257,7 @@ int main(int argc, char **argv)
set_frequency(0.0);
if (fails)
- return ksft_exit_fail();
+ ksft_exit_fail();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
index 23eb398c81..986abbdb15 100644
--- a/tools/testing/selftests/timers/leap-a-day.c
+++ b/tools/testing/selftests/timers/leap-a-day.c
@@ -268,7 +268,7 @@ int main(int argc, char **argv)
if (ret < 0) {
printf("Error: Problem setting STA_INS/STA_DEL!: %s\n",
time_state_str(ret));
- return ksft_exit_fail();
+ ksft_exit_fail();
}
/* Validate STA_INS was set */
@@ -277,7 +277,7 @@ int main(int argc, char **argv)
if (tx.status != STA_INS && tx.status != STA_DEL) {
printf("Error: STA_INS/STA_DEL not set!: %s\n",
time_state_str(ret));
- return ksft_exit_fail();
+ ksft_exit_fail();
}
if (tai_time) {
@@ -295,7 +295,7 @@ int main(int argc, char **argv)
se.sigev_value.sival_int = 0;
if (timer_create(CLOCK_REALTIME, &se, &tm1) == -1) {
printf("Error: timer_create failed\n");
- return ksft_exit_fail();
+ ksft_exit_fail();
}
its1.it_value.tv_sec = next_leap;
its1.it_value.tv_nsec = 0;
@@ -366,7 +366,7 @@ int main(int argc, char **argv)
if (error_found) {
printf("Errors observed\n");
clear_time_state();
- return ksft_exit_fail();
+ ksft_exit_fail();
}
printf("\n");
if ((iterations != -1) && !(--iterations))
@@ -374,5 +374,5 @@ int main(int argc, char **argv)
}
clear_time_state();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c
index f70802c5dd..8fd065eec9 100644
--- a/tools/testing/selftests/timers/leapcrash.c
+++ b/tools/testing/selftests/timers/leapcrash.c
@@ -87,7 +87,7 @@ int main(void)
tv.tv_usec = 0;
if (settimeofday(&tv, NULL)) {
printf("Error: You're likely not running with proper (ie: root) permissions\n");
- return ksft_exit_fail();
+ ksft_exit_fail();
}
tx.modes = 0;
adjtimex(&tx);
@@ -104,5 +104,5 @@ int main(void)
fflush(stdout);
}
printf("[OK]\n");
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c
index 7916cf5cc6..f3179a605b 100644
--- a/tools/testing/selftests/timers/mqueue-lat.c
+++ b/tools/testing/selftests/timers/mqueue-lat.c
@@ -107,8 +107,8 @@ int main(int argc, char **argv)
ret = mqueue_lat_test();
if (ret < 0) {
printf("[FAILED]\n");
- return ksft_exit_fail();
+ ksft_exit_fail();
}
printf("[OK]\n");
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index c001dd7917..07c81c0093 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -260,16 +260,16 @@ int main(int argc, char **argv)
ksft_print_msg("based timers if other threads run on the CPU...\n");
if (check_itimer(ITIMER_VIRTUAL) < 0)
- return ksft_exit_fail();
+ ksft_exit_fail();
if (check_itimer(ITIMER_PROF) < 0)
- return ksft_exit_fail();
+ ksft_exit_fail();
if (check_itimer(ITIMER_REAL) < 0)
- return ksft_exit_fail();
+ ksft_exit_fail();
if (check_timer_create(CLOCK_THREAD_CPUTIME_ID) < 0)
- return ksft_exit_fail();
+ ksft_exit_fail();
/*
* It's unfortunately hard to reliably test a timer expiration
@@ -281,10 +281,10 @@ int main(int argc, char **argv)
* find a better solution.
*/
if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0)
- return ksft_exit_fail();
+ ksft_exit_fail();
if (check_timer_distribution() < 0)
- return ksft_exit_fail();
+ ksft_exit_fail();
ksft_finished();
}
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
index 6eba203f9d..030143eb09 100644
--- a/tools/testing/selftests/timers/raw_skew.c
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -137,11 +137,11 @@ int main(int argc, char **argv)
if (tx1.offset || tx2.offset ||
tx1.freq != tx2.freq || tx1.tick != tx2.tick) {
printf(" [SKIP]\n");
- return ksft_exit_skip("The clock was adjusted externally. Shutdown NTPd or other time sync daemons\n");
+ ksft_exit_skip("The clock was adjusted externally. Shutdown NTPd or other time sync daemons\n");
}
printf(" [FAILED]\n");
- return ksft_exit_fail();
+ ksft_exit_fail();
}
printf(" [OK]\n");
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c
index 688cfd81b5..f7d978721b 100644
--- a/tools/testing/selftests/timers/set-2038.c
+++ b/tools/testing/selftests/timers/set-2038.c
@@ -128,6 +128,6 @@ out:
/* restore clock */
settime(start);
if (ret)
- return ksft_exit_fail();
- return ksft_exit_pass();
+ ksft_exit_fail();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c
index 8c4179ee2c..5b67462efc 100644
--- a/tools/testing/selftests/timers/set-tai.c
+++ b/tools/testing/selftests/timers/set-tai.c
@@ -61,9 +61,9 @@ int main(int argc, char **argv)
ret = get_tai();
if (ret != i) {
printf("[FAILED] expected: %i got %i\n", i, ret);
- return ksft_exit_fail();
+ ksft_exit_fail();
}
}
printf("[OK]\n");
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
index 50da45437d..7ce240c89b 100644
--- a/tools/testing/selftests/timers/set-timer-lat.c
+++ b/tools/testing/selftests/timers/set-timer-lat.c
@@ -278,6 +278,6 @@ int main(void)
ret |= do_timer_oneshot(clock_id, 0);
}
if (ret)
- return ksft_exit_fail();
- return ksft_exit_pass();
+ ksft_exit_fail();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/set-tz.c b/tools/testing/selftests/timers/set-tz.c
index 62bd33eb16..20daaf1782 100644
--- a/tools/testing/selftests/timers/set-tz.c
+++ b/tools/testing/selftests/timers/set-tz.c
@@ -102,9 +102,9 @@ int main(int argc, char **argv)
printf("[OK]\n");
set_tz(min, dst);
- return ksft_exit_pass();
+ ksft_exit_pass();
err:
set_tz(min, dst);
- return ksft_exit_fail();
+ ksft_exit_fail();
}
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
index 63913f75b3..c8e6bffe4e 100644
--- a/tools/testing/selftests/timers/skew_consistency.c
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -70,8 +70,8 @@ int main(int argc, char **argv)
if (ret) {
printf("[FAILED]\n");
- return ksft_exit_fail();
+ ksft_exit_fail();
}
printf("[OK]\n");
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c
index 80aed4bf06..76b38e41d9 100644
--- a/tools/testing/selftests/timers/threadtest.c
+++ b/tools/testing/selftests/timers/threadtest.c
@@ -189,5 +189,5 @@ out:
/* die */
if (ret)
ksft_exit_fail();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
index d13ebde203..d500884801 100644
--- a/tools/testing/selftests/timers/valid-adjtimex.c
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -320,10 +320,10 @@ int validate_set_offset(void)
int main(int argc, char **argv)
{
if (validate_freq())
- return ksft_exit_fail();
+ ksft_exit_fail();
if (validate_set_offset())
- return ksft_exit_fail();
+ ksft_exit_fail();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/tty/tty_tstamp_update.c b/tools/testing/selftests/tty/tty_tstamp_update.c
index 0ee97943dc..9e1a40f5db 100644
--- a/tools/testing/selftests/tty/tty_tstamp_update.c
+++ b/tools/testing/selftests/tty/tty_tstamp_update.c
@@ -47,42 +47,60 @@ int main(int argc, char **argv)
int r;
char tty[PATH_MAX] = {};
struct stat st1, st2;
+ int result = KSFT_FAIL;
ksft_print_header();
ksft_set_plan(1);
r = readlink("/proc/self/fd/0", tty, PATH_MAX);
- if (r < 0)
- ksft_exit_fail_msg("readlink on /proc/self/fd/0 failed: %m\n");
+ if (r < 0) {
+ ksft_print_msg("readlink on /proc/self/fd/0 failed: %m\n");
+ goto out;
+ }
+
+ if (!tty_valid(tty)) {
+ ksft_print_msg("invalid tty path '%s'\n", tty);
+ result = KSFT_SKIP;
+ goto out;
- if (!tty_valid(tty))
- ksft_exit_skip("invalid tty path '%s'\n", tty);
+ }
r = stat(tty, &st1);
- if (r < 0)
- ksft_exit_fail_msg("stat failed on tty path '%s': %m\n", tty);
+ if (r < 0) {
+ ksft_print_msg("stat failed on tty path '%s': %m\n", tty);
+ goto out;
+ }
/* We need to wait at least 8 seconds in order to observe timestamp change */
/* https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fbf47635315ab308c9b58a1ea0906e711a9228de */
sleep(10);
r = write_dev_tty();
- if (r < 0)
- ksft_exit_fail_msg("failed to write to /dev/tty: %s\n",
- strerror(-r));
+ if (r < 0) {
+ ksft_print_msg("failed to write to /dev/tty: %s\n",
+ strerror(-r));
+ goto out;
+ }
r = stat(tty, &st2);
- if (r < 0)
- ksft_exit_fail_msg("stat failed on tty path '%s': %m\n", tty);
+ if (r < 0) {
+ ksft_print_msg("stat failed on tty path '%s': %m\n", tty);
+ goto out;
+ }
/* We wrote to the terminal so timestamps should have been updated */
if (st1.st_atim.tv_sec == st2.st_atim.tv_sec &&
st1.st_mtim.tv_sec == st2.st_mtim.tv_sec) {
- ksft_test_result_fail("tty timestamps not updated\n");
- ksft_exit_fail();
+ ksft_print_msg("tty timestamps not updated\n");
+ goto out;
}
- ksft_test_result_pass(
+ ksft_print_msg(
"timestamps of terminal '%s' updated after write to /dev/tty\n", tty);
- return EXIT_SUCCESS;
+ result = KSFT_PASS;
+
+out:
+ ksft_test_result_report(result, "tty_tstamp_update\n");
+
+ ksft_finished();
}
diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c
index dcd7509fe2..0bb46793dc 100644
--- a/tools/testing/selftests/user_events/ftrace_test.c
+++ b/tools/testing/selftests/user_events/ftrace_test.c
@@ -261,6 +261,12 @@ TEST_F(user, register_events) {
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
ASSERT_EQ(0, reg.write_index);
+ /* Register without separator spacing should still match */
+ reg.enable_bit = 29;
+ reg.name_args = (__u64)"__test_event u32 field1;u32 field2";
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(0, reg.write_index);
+
/* Multiple registers to same name but different args should fail */
reg.enable_bit = 29;
reg.name_args = (__u64)"__test_event u32 field1;";
@@ -288,6 +294,8 @@ TEST_F(user, register_events) {
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg));
unreg.disable_bit = 30;
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg));
+ unreg.disable_bit = 29;
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg));
/* Delete should have been auto-done after close and unregister */
close(self->data_fd);
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index d53a4d8008..98d8ba2afa 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -1,35 +1,30 @@
# SPDX-License-Identifier: GPL-2.0
-include ../lib.mk
-
uname_M := $(shell uname -m 2>/dev/null || echo not)
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu
-TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi
-TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres
+TEST_GEN_PROGS := vdso_test_gettimeofday
+TEST_GEN_PROGS += vdso_test_getcpu
+TEST_GEN_PROGS += vdso_test_abi
+TEST_GEN_PROGS += vdso_test_clock_getres
ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
-TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
+TEST_GEN_PROGS += vdso_standalone_test_x86
endif
-TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness
+TEST_GEN_PROGS += vdso_test_correctness
CFLAGS := -std=gnu99
-CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
-LDFLAGS_vdso_test_correctness := -ldl
+
ifeq ($(CONFIG_X86_32),y)
LDLIBS += -lgcc_s
endif
-all: $(TEST_GEN_PROGS)
+include ../lib.mk
$(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
$(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
$(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
$(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c
+
$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
- $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
- vdso_standalone_test_x86.c parse_vdso.c \
- -o $@
+$(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
+
$(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c
- $(CC) $(CFLAGS) \
- vdso_test_correctness.c \
- -o $@ \
- $(LDFLAGS_vdso_test_correctness)
+$(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
index a7f8e8a956..66290cf289 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
@@ -2,7 +2,7 @@ CONFIG_NONPORTABLE=y
CONFIG_ARCH_RV32I=y
CONFIG_MMU=y
CONFIG_FPU=y
-CONFIG_SOC_VIRT=y
+CONFIG_ARCH_VIRT=y
CONFIG_RISCV_ISA_FALLBACK=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
index daeb3e5e09..db1aa9f388 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
@@ -1,7 +1,7 @@
CONFIG_ARCH_RV64I=y
CONFIG_MMU=y
CONFIG_FPU=y
-CONFIG_SOC_VIRT=y
+CONFIG_ARCH_VIRT=y
CONFIG_RISCV_ISA_FALLBACK=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index 507555714b..f314d3789f 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -41,7 +41,6 @@ CONFIG_KALLSYMS=y
CONFIG_BUG=y
CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
CONFIG_JUMP_LABEL=y
-CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_SHMEM=y
CONFIG_SLUB=y
diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c
index d884fd69dd..95aad6d884 100644
--- a/tools/testing/selftests/x86/amx.c
+++ b/tools/testing/selftests/x86/amx.c
@@ -103,21 +103,6 @@ static void clearhandler(int sig)
#define CPUID_LEAF1_ECX_XSAVE_MASK (1 << 26)
#define CPUID_LEAF1_ECX_OSXSAVE_MASK (1 << 27)
-static inline void check_cpuid_xsave(void)
-{
- uint32_t eax, ebx, ecx, edx;
-
- /*
- * CPUID.1:ECX.XSAVE[bit 26] enumerates general
- * support for the XSAVE feature set, including
- * XGETBV.
- */
- __cpuid_count(1, 0, eax, ebx, ecx, edx);
- if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK))
- fatal_error("cpuid: no CPU xsave support");
- if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK))
- fatal_error("cpuid: no OS xsave support");
-}
static uint32_t xbuf_size;
@@ -350,6 +335,7 @@ enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED };
/* arch_prctl() and sigaltstack() test */
+#define ARCH_GET_XCOMP_SUPP 0x1021
#define ARCH_GET_XCOMP_PERM 0x1022
#define ARCH_REQ_XCOMP_PERM 0x1023
@@ -928,8 +914,15 @@ static void test_ptrace(void)
int main(void)
{
- /* Check hardware availability at first */
- check_cpuid_xsave();
+ unsigned long features;
+ long rc;
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_SUPP, &features);
+ if (rc || (features & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE) {
+ ksft_print_msg("no AMX support\n");
+ return KSFT_SKIP;
+ }
+
check_cpuid_xtiledata();
init_stashed_xsave();
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
index 215b8150b7..0ea4f68139 100644
--- a/tools/testing/selftests/x86/lam.c
+++ b/tools/testing/selftests/x86/lam.c
@@ -1183,7 +1183,7 @@ int main(int argc, char **argv)
if (!cpu_has_lam()) {
ksft_print_msg("Unsupported LAM feature!\n");
- return -1;
+ return KSFT_SKIP;
}
while ((c = getopt(argc, argv, "ht:")) != -1) {
@@ -1237,5 +1237,5 @@ int main(int argc, char **argv)
ksft_set_plan(tests_cnt);
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
index f0d876d482..d53959e035 100644
--- a/tools/testing/selftests/x86/test_mremap_vdso.c
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -19,6 +19,7 @@
#include <sys/auxv.h>
#include <sys/syscall.h>
#include <sys/wait.h>
+#include "../kselftest.h"
#define PAGE_SIZE 4096
@@ -29,13 +30,13 @@ static int try_to_remap(void *vdso_addr, unsigned long size)
/* Searching for memory location where to remap */
dest_addr = mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (dest_addr == MAP_FAILED) {
- printf("[WARN]\tmmap failed (%d): %m\n", errno);
+ ksft_print_msg("WARN: mmap failed (%d): %m\n", errno);
return 0;
}
- printf("[NOTE]\tMoving vDSO: [%p, %#lx] -> [%p, %#lx]\n",
- vdso_addr, (unsigned long)vdso_addr + size,
- dest_addr, (unsigned long)dest_addr + size);
+ ksft_print_msg("Moving vDSO: [%p, %#lx] -> [%p, %#lx]\n",
+ vdso_addr, (unsigned long)vdso_addr + size,
+ dest_addr, (unsigned long)dest_addr + size);
fflush(stdout);
new_addr = mremap(vdso_addr, size, size,
@@ -43,10 +44,10 @@ static int try_to_remap(void *vdso_addr, unsigned long size)
if ((unsigned long)new_addr == (unsigned long)-1) {
munmap(dest_addr, size);
if (errno == EINVAL) {
- printf("[NOTE]\tvDSO partial move failed, will try with bigger size\n");
+ ksft_print_msg("vDSO partial move failed, will try with bigger size\n");
return -1; /* Retry with larger */
}
- printf("[FAIL]\tmremap failed (%d): %m\n", errno);
+ ksft_print_msg("[FAIL]\tmremap failed (%d): %m\n", errno);
return 1;
}
@@ -58,11 +59,12 @@ int main(int argc, char **argv, char **envp)
{
pid_t child;
+ ksft_print_header();
+ ksft_set_plan(1);
+
child = fork();
- if (child == -1) {
- printf("[WARN]\tfailed to fork (%d): %m\n", errno);
- return 1;
- }
+ if (child == -1)
+ ksft_exit_fail_msg("failed to fork (%d): %m\n", errno);
if (child == 0) {
unsigned long vdso_size = PAGE_SIZE;
@@ -70,9 +72,9 @@ int main(int argc, char **argv, char **envp)
int ret = -1;
auxval = getauxval(AT_SYSINFO_EHDR);
- printf("\tAT_SYSINFO_EHDR is %#lx\n", auxval);
+ ksft_print_msg("AT_SYSINFO_EHDR is %#lx\n", auxval);
if (!auxval || auxval == -ENOENT) {
- printf("[WARN]\tgetauxval failed\n");
+ ksft_print_msg("WARN: getauxval failed\n");
return 0;
}
@@ -92,16 +94,13 @@ int main(int argc, char **argv, char **envp)
int status;
if (waitpid(child, &status, 0) != child ||
- !WIFEXITED(status)) {
- printf("[FAIL]\tmremap() of the vDSO does not work on this kernel!\n");
- return 1;
- } else if (WEXITSTATUS(status) != 0) {
- printf("[FAIL]\tChild failed with %d\n",
- WEXITSTATUS(status));
- return 1;
- }
- printf("[OK]\n");
+ !WIFEXITED(status))
+ ksft_test_result_fail("mremap() of the vDSO does not work on this kernel!\n");
+ else if (WEXITSTATUS(status) != 0)
+ ksft_test_result_fail("Child failed with %d\n", WEXITSTATUS(status));
+ else
+ ksft_test_result_pass("%s\n", __func__);
}
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/x86/test_shadow_stack.c b/tools/testing/selftests/x86/test_shadow_stack.c
index 757e6527f6..ee909a7927 100644
--- a/tools/testing/selftests/x86/test_shadow_stack.c
+++ b/tools/testing/selftests/x86/test_shadow_stack.c
@@ -556,7 +556,7 @@ struct node {
* looked at the shadow stack gaps.
* 5. See if it landed in the gap.
*/
-int test_guard_gap(void)
+int test_guard_gap_other_gaps(void)
{
void *free_area, *shstk, *test_map = (void *)0xFFFFFFFFFFFFFFFF;
struct node *head = NULL, *cur;
@@ -593,11 +593,64 @@ int test_guard_gap(void)
if (shstk - test_map - PAGE_SIZE != PAGE_SIZE)
return 1;
- printf("[OK]\tGuard gap test\n");
+ printf("[OK]\tGuard gap test, other mapping's gaps\n");
return 0;
}
+/* Tests respecting the guard gap of the mapping getting placed */
+int test_guard_gap_new_mappings_gaps(void)
+{
+ void *free_area, *shstk_start, *test_map = (void *)0xFFFFFFFFFFFFFFFF;
+ struct node *head = NULL, *cur;
+ int ret = 0;
+
+ free_area = mmap(0, PAGE_SIZE * 4, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ munmap(free_area, PAGE_SIZE * 4);
+
+ /* Test letting map_shadow_stack find a free space */
+ shstk_start = mmap(free_area, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (shstk_start == MAP_FAILED || shstk_start != free_area)
+ return 1;
+
+ while (test_map > shstk_start) {
+ test_map = (void *)syscall(__NR_map_shadow_stack, 0, PAGE_SIZE, 0);
+ if (test_map == MAP_FAILED) {
+ printf("[INFO]\tmap_shadow_stack MAP_FAILED\n");
+ ret = 1;
+ break;
+ }
+
+ cur = malloc(sizeof(*cur));
+ cur->mapping = test_map;
+
+ cur->next = head;
+ head = cur;
+
+ if (test_map == free_area + PAGE_SIZE) {
+ printf("[INFO]\tNew mapping has other mapping in guard gap!\n");
+ ret = 1;
+ break;
+ }
+ }
+
+ while (head) {
+ cur = head;
+ head = cur->next;
+ munmap(cur->mapping, PAGE_SIZE);
+ free(cur);
+ }
+
+ munmap(shstk_start, PAGE_SIZE);
+
+ if (!ret)
+ printf("[OK]\tGuard gap test, placement mapping's gaps\n");
+
+ return ret;
+}
+
/*
* Too complicated to pull it out of the 32 bit header, but also get the
* 64 bit one needed above. Just define a copy here.
@@ -850,9 +903,15 @@ int main(int argc, char *argv[])
goto out;
}
- if (test_guard_gap()) {
+ if (test_guard_gap_other_gaps()) {
+ ret = 1;
+ printf("[FAIL]\tGuard gap test, other mappings' gaps\n");
+ goto out;
+ }
+
+ if (test_guard_gap_new_mappings_gaps()) {
ret = 1;
- printf("[FAIL]\tGuard gap test\n");
+ printf("[FAIL]\tGuard gap test, placement mapping's gaps\n");
goto out;
}
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 47cab97280..d4c8e8d79d 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -21,6 +21,13 @@
#include <sys/uio.h>
#include "helpers.h"
+#include "../kselftest.h"
+
+#ifdef __x86_64__
+#define TOTAL_TESTS 13
+#else
+#define TOTAL_TESTS 8
+#endif
#ifdef __x86_64__
# define VSYS(x) (x)
@@ -39,18 +46,6 @@
/* max length of lines in /proc/self/maps - anything longer is skipped here */
#define MAPS_LINE_LEN 128
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
/* vsyscalls and vDSO */
bool vsyscall_map_r = false, vsyscall_map_x = false;
@@ -75,83 +70,25 @@ static void init_vdso(void)
if (!vdso)
vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
if (!vdso) {
- printf("[WARN]\tfailed to find vDSO\n");
+ ksft_print_msg("[WARN] failed to find vDSO\n");
return;
}
vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
if (!vdso_gtod)
- printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
+ ksft_print_msg("[WARN] failed to find gettimeofday in vDSO\n");
vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
if (!vdso_gettime)
- printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
+ ksft_print_msg("[WARN] failed to find clock_gettime in vDSO\n");
vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
if (!vdso_time)
- printf("[WARN]\tfailed to find time in vDSO\n");
+ ksft_print_msg("[WARN] failed to find time in vDSO\n");
vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
if (!vdso_getcpu)
- printf("[WARN]\tfailed to find getcpu in vDSO\n");
-}
-
-static int init_vsys(void)
-{
-#ifdef __x86_64__
- int nerrs = 0;
- FILE *maps;
- char line[MAPS_LINE_LEN];
- bool found = false;
-
- maps = fopen("/proc/self/maps", "r");
- if (!maps) {
- printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
- vsyscall_map_r = true;
- return 0;
- }
-
- while (fgets(line, MAPS_LINE_LEN, maps)) {
- char r, x;
- void *start, *end;
- char name[MAPS_LINE_LEN];
-
- /* sscanf() is safe here as strlen(name) >= strlen(line) */
- if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
- &start, &end, &r, &x, name) != 5)
- continue;
-
- if (strcmp(name, "[vsyscall]"))
- continue;
-
- printf("\tvsyscall map: %s", line);
-
- if (start != (void *)0xffffffffff600000 ||
- end != (void *)0xffffffffff601000) {
- printf("[FAIL]\taddress range is nonsense\n");
- nerrs++;
- }
-
- printf("\tvsyscall permissions are %c-%c\n", r, x);
- vsyscall_map_r = (r == 'r');
- vsyscall_map_x = (x == 'x');
-
- found = true;
- break;
- }
-
- fclose(maps);
-
- if (!found) {
- printf("\tno vsyscall map in /proc/self/maps\n");
- vsyscall_map_r = false;
- vsyscall_map_x = false;
- }
-
- return nerrs;
-#else
- return 0;
-#endif
+ ksft_print_msg("[WARN] failed to find getcpu in vDSO\n");
}
/* syscalls */
@@ -176,98 +113,76 @@ static inline long sys_getcpu(unsigned * cpu, unsigned * node,
return syscall(SYS_getcpu, cpu, node, cache);
}
-static jmp_buf jmpbuf;
-static volatile unsigned long segv_err;
-
-static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
-{
- ucontext_t *ctx = (ucontext_t *)ctx_void;
-
- segv_err = ctx->uc_mcontext.gregs[REG_ERR];
- siglongjmp(jmpbuf, 1);
-}
-
static double tv_diff(const struct timeval *a, const struct timeval *b)
{
return (double)(a->tv_sec - b->tv_sec) +
(double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
}
-static int check_gtod(const struct timeval *tv_sys1,
- const struct timeval *tv_sys2,
- const struct timezone *tz_sys,
- const char *which,
- const struct timeval *tv_other,
- const struct timezone *tz_other)
+static void check_gtod(const struct timeval *tv_sys1,
+ const struct timeval *tv_sys2,
+ const struct timezone *tz_sys,
+ const char *which,
+ const struct timeval *tv_other,
+ const struct timezone *tz_other)
{
- int nerrs = 0;
double d1, d2;
- if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
- printf("[FAIL] %s tz mismatch\n", which);
- nerrs++;
- }
+ if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest ||
+ tz_sys->tz_dsttime != tz_other->tz_dsttime))
+ ksft_print_msg("%s tz mismatch\n", which);
d1 = tv_diff(tv_other, tv_sys1);
d2 = tv_diff(tv_sys2, tv_other);
- printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
- if (d1 < 0 || d2 < 0) {
- printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
- nerrs++;
- } else {
- printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
- }
+ ksft_print_msg("%s time offsets: %lf %lf\n", which, d1, d2);
- return nerrs;
+ ksft_test_result(!(d1 < 0 || d2 < 0), "%s gettimeofday()'s timeval\n", which);
}
-static int test_gtod(void)
+static void test_gtod(void)
{
struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
struct timezone tz_sys, tz_vdso, tz_vsys;
long ret_vdso = -1;
long ret_vsys = -1;
- int nerrs = 0;
- printf("[RUN]\ttest gettimeofday()\n");
+ ksft_print_msg("test gettimeofday()\n");
if (sys_gtod(&tv_sys1, &tz_sys) != 0)
- err(1, "syscall gettimeofday");
+ ksft_exit_fail_msg("syscall gettimeofday: %s\n", strerror(errno));
if (vdso_gtod)
ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
if (vsyscall_map_x)
ret_vsys = vgtod(&tv_vsys, &tz_vsys);
if (sys_gtod(&tv_sys2, &tz_sys) != 0)
- err(1, "syscall gettimeofday");
+ ksft_exit_fail_msg("syscall gettimeofday: %s\n", strerror(errno));
if (vdso_gtod) {
- if (ret_vdso == 0) {
- nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
- } else {
- printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
- nerrs++;
- }
+ if (ret_vdso == 0)
+ check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
+ else
+ ksft_test_result_fail("vDSO gettimeofday() failed: %ld\n", ret_vdso);
+ } else {
+ ksft_test_result_skip("vdso_gtod isn't set\n");
}
if (vsyscall_map_x) {
- if (ret_vsys == 0) {
- nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
- } else {
- printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
- nerrs++;
- }
+ if (ret_vsys == 0)
+ check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
+ else
+ ksft_test_result_fail("vsys gettimeofday() failed: %ld\n", ret_vsys);
+ } else {
+ ksft_test_result_skip("vsyscall_map_x isn't set\n");
}
-
- return nerrs;
}
-static int test_time(void) {
- int nerrs = 0;
-
- printf("[RUN]\ttest time()\n");
+static void test_time(void)
+{
long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
+
+ ksft_print_msg("test time()\n");
t_sys1 = sys_time(&t2_sys1);
if (vdso_time)
t_vdso = vdso_time(&t2_vdso);
@@ -275,56 +190,60 @@ static int test_time(void) {
t_vsys = vtime(&t2_vsys);
t_sys2 = sys_time(&t2_sys2);
if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
- printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
- nerrs++;
- return nerrs;
+ ksft_print_msg("syscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n",
+ t_sys1, t2_sys1, t_sys2, t2_sys2);
+ ksft_test_result_skip("vdso_time\n");
+ ksft_test_result_skip("vdso_time\n");
+ return;
}
if (vdso_time) {
- if (t_vdso < 0 || t_vdso != t2_vdso) {
- printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
- nerrs++;
- } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
- printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
- nerrs++;
- } else {
- printf("[OK]\tvDSO time() is okay\n");
- }
+ if (t_vdso < 0 || t_vdso != t2_vdso)
+ ksft_test_result_fail("vDSO failed (ret:%ld output:%ld)\n",
+ t_vdso, t2_vdso);
+ else if (t_vdso < t_sys1 || t_vdso > t_sys2)
+ ksft_test_result_fail("vDSO returned the wrong time (%ld %ld %ld)\n",
+ t_sys1, t_vdso, t_sys2);
+ else
+ ksft_test_result_pass("vDSO time() is okay\n");
+ } else {
+ ksft_test_result_skip("vdso_time isn't set\n");
}
if (vsyscall_map_x) {
- if (t_vsys < 0 || t_vsys != t2_vsys) {
- printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
- nerrs++;
- } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
- printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
- nerrs++;
- } else {
- printf("[OK]\tvsyscall time() is okay\n");
- }
+ if (t_vsys < 0 || t_vsys != t2_vsys)
+ ksft_test_result_fail("vsyscall failed (ret:%ld output:%ld)\n",
+ t_vsys, t2_vsys);
+ else if (t_vsys < t_sys1 || t_vsys > t_sys2)
+ ksft_test_result_fail("vsyscall returned the wrong time (%ld %ld %ld)\n",
+ t_sys1, t_vsys, t_sys2);
+ else
+ ksft_test_result_pass("vsyscall time() is okay\n");
+ } else {
+ ksft_test_result_skip("vsyscall_map_x isn't set\n");
}
-
- return nerrs;
}
-static int test_getcpu(int cpu)
+static void test_getcpu(int cpu)
{
- int nerrs = 0;
+ unsigned int cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
long ret_sys, ret_vdso = -1, ret_vsys = -1;
+ unsigned int node = 0;
+ bool have_node = false;
+ cpu_set_t cpuset;
- printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
+ ksft_print_msg("getcpu() on CPU %d\n", cpu);
- cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(cpu, &cpuset);
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
- printf("[SKIP]\tfailed to force CPU %d\n", cpu);
- return nerrs;
+ ksft_print_msg("failed to force CPU %d\n", cpu);
+ ksft_test_result_skip("vdso_getcpu\n");
+ ksft_test_result_skip("vsyscall_map_x\n");
+
+ return;
}
- unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
- unsigned node = 0;
- bool have_node = false;
ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
if (vdso_getcpu)
ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
@@ -332,10 +251,9 @@ static int test_getcpu(int cpu)
ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
if (ret_sys == 0) {
- if (cpu_sys != cpu) {
- printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
- nerrs++;
- }
+ if (cpu_sys != cpu)
+ ksft_print_msg("syscall reported CPU %hu but should be %d\n",
+ cpu_sys, cpu);
have_node = true;
node = node_sys;
@@ -343,63 +261,84 @@ static int test_getcpu(int cpu)
if (vdso_getcpu) {
if (ret_vdso) {
- printf("[FAIL]\tvDSO getcpu() failed\n");
- nerrs++;
+ ksft_test_result_fail("vDSO getcpu() failed\n");
} else {
if (!have_node) {
have_node = true;
node = node_vdso;
}
- if (cpu_vdso != cpu) {
- printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
- nerrs++;
- } else {
- printf("[OK]\tvDSO reported correct CPU\n");
- }
-
- if (node_vdso != node) {
- printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
- nerrs++;
+ if (cpu_vdso != cpu || node_vdso != node) {
+ if (cpu_vdso != cpu)
+ ksft_print_msg("vDSO reported CPU %hu but should be %d\n",
+ cpu_vdso, cpu);
+ if (node_vdso != node)
+ ksft_print_msg("vDSO reported node %hu but should be %hu\n",
+ node_vdso, node);
+ ksft_test_result_fail("Wrong values\n");
} else {
- printf("[OK]\tvDSO reported correct node\n");
+ ksft_test_result_pass("vDSO reported correct CPU and node\n");
}
}
+ } else {
+ ksft_test_result_skip("vdso_getcpu isn't set\n");
}
if (vsyscall_map_x) {
if (ret_vsys) {
- printf("[FAIL]\tvsyscall getcpu() failed\n");
- nerrs++;
+ ksft_test_result_fail("vsyscall getcpu() failed\n");
} else {
if (!have_node) {
have_node = true;
node = node_vsys;
}
- if (cpu_vsys != cpu) {
- printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
- nerrs++;
+ if (cpu_vsys != cpu || node_vsys != node) {
+ if (cpu_vsys != cpu)
+ ksft_print_msg("vsyscall reported CPU %hu but should be %d\n",
+ cpu_vsys, cpu);
+ if (node_vsys != node)
+ ksft_print_msg("vsyscall reported node %hu but should be %hu\n",
+ node_vsys, node);
+ ksft_test_result_fail("Wrong values\n");
} else {
- printf("[OK]\tvsyscall reported correct CPU\n");
- }
-
- if (node_vsys != node) {
- printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
- nerrs++;
- } else {
- printf("[OK]\tvsyscall reported correct node\n");
+ ksft_test_result_pass("vsyscall reported correct CPU and node\n");
}
}
+ } else {
+ ksft_test_result_skip("vsyscall_map_x isn't set\n");
}
+}
+
+#ifdef __x86_64__
+
+static jmp_buf jmpbuf;
+static volatile unsigned long segv_err;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
- return nerrs;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ ksft_exit_fail_msg("sigaction failed\n");
}
-static int test_vsys_r(void)
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
{
-#ifdef __x86_64__
- printf("[RUN]\tChecking read access to the vsyscall page\n");
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+
+ segv_err = ctx->uc_mcontext.gregs[REG_ERR];
+ siglongjmp(jmpbuf, 1);
+}
+
+static void test_vsys_r(void)
+{
+ ksft_print_msg("Checking read access to the vsyscall page\n");
bool can_read;
if (sigsetjmp(jmpbuf, 1) == 0) {
*(volatile int *)0xffffffffff600000;
@@ -408,32 +347,25 @@ static int test_vsys_r(void)
can_read = false;
}
- if (can_read && !vsyscall_map_r) {
- printf("[FAIL]\tWe have read access, but we shouldn't\n");
- return 1;
- } else if (!can_read && vsyscall_map_r) {
- printf("[FAIL]\tWe don't have read access, but we should\n");
- return 1;
- } else if (can_read) {
- printf("[OK]\tWe have read access\n");
- } else {
- printf("[OK]\tWe do not have read access: #PF(0x%lx)\n",
- segv_err);
- }
-#endif
-
- return 0;
+ if (can_read && !vsyscall_map_r)
+ ksft_test_result_fail("We have read access, but we shouldn't\n");
+ else if (!can_read && vsyscall_map_r)
+ ksft_test_result_fail("We don't have read access, but we should\n");
+ else if (can_read)
+ ksft_test_result_pass("We have read access\n");
+ else
+ ksft_test_result_pass("We do not have read access: #PF(0x%lx)\n", segv_err);
}
-static int test_vsys_x(void)
+static void test_vsys_x(void)
{
-#ifdef __x86_64__
if (vsyscall_map_x) {
/* We already tested this adequately. */
- return 0;
+ ksft_test_result_pass("vsyscall_map_x is true\n");
+ return;
}
- printf("[RUN]\tMake sure that vsyscalls really page fault\n");
+ ksft_print_msg("Make sure that vsyscalls really page fault\n");
bool can_exec;
if (sigsetjmp(jmpbuf, 1) == 0) {
@@ -443,20 +375,14 @@ static int test_vsys_x(void)
can_exec = false;
}
- if (can_exec) {
- printf("[FAIL]\tExecuting the vsyscall did not page fault\n");
- return 1;
- } else if (segv_err & (1 << 4)) { /* INSTR */
- printf("[OK]\tExecuting the vsyscall page failed: #PF(0x%lx)\n",
- segv_err);
- } else {
- printf("[FAIL]\tExecution failed with the wrong error: #PF(0x%lx)\n",
- segv_err);
- return 1;
- }
-#endif
-
- return 0;
+ if (can_exec)
+ ksft_test_result_fail("Executing the vsyscall did not page fault\n");
+ else if (segv_err & (1 << 4)) /* INSTR */
+ ksft_test_result_pass("Executing the vsyscall page failed: #PF(0x%lx)\n",
+ segv_err);
+ else
+ ksft_test_result_fail("Execution failed with the wrong error: #PF(0x%lx)\n",
+ segv_err);
}
/*
@@ -470,14 +396,13 @@ static int test_vsys_x(void)
* fact that ptrace() ever worked was a nice courtesy of old kernels,
* but the code to support it is fairly gross.
*/
-static int test_process_vm_readv(void)
+static void test_process_vm_readv(void)
{
-#ifdef __x86_64__
char buf[4096];
struct iovec local, remote;
int ret;
- printf("[RUN]\tprocess_vm_readv() from vsyscall page\n");
+ ksft_print_msg("process_vm_readv() from vsyscall page\n");
local.iov_base = buf;
local.iov_len = 4096;
@@ -489,27 +414,71 @@ static int test_process_vm_readv(void)
* We expect process_vm_readv() to work if and only if the
* vsyscall page is readable.
*/
- printf("[%s]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", vsyscall_map_r ? "FAIL" : "OK", ret, errno);
- return vsyscall_map_r ? 1 : 0;
+ ksft_test_result(!vsyscall_map_r,
+ "process_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno);
+ return;
}
- if (vsyscall_map_r) {
- if (!memcmp(buf, remote.iov_base, sizeof(buf))) {
- printf("[OK]\tIt worked and read correct data\n");
- } else {
- printf("[FAIL]\tIt worked but returned incorrect data\n");
- return 1;
+ if (vsyscall_map_r)
+ ksft_test_result(!memcmp(buf, remote.iov_base, sizeof(buf)), "Read data\n");
+ else
+ ksft_test_result_fail("process_rm_readv() succeeded, but it should have failed in this configuration\n");
+}
+
+static void init_vsys(void)
+{
+ int nerrs = 0;
+ FILE *maps;
+ char line[MAPS_LINE_LEN];
+ bool found = false;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) {
+ ksft_test_result_skip("Could not open /proc/self/maps -- assuming vsyscall is r-x\n");
+ vsyscall_map_r = true;
+ return;
+ }
+
+ while (fgets(line, MAPS_LINE_LEN, maps)) {
+ char r, x;
+ void *start, *end;
+ char name[MAPS_LINE_LEN];
+
+ /* sscanf() is safe here as strlen(name) >= strlen(line) */
+ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+ &start, &end, &r, &x, name) != 5)
+ continue;
+
+ if (strcmp(name, "[vsyscall]"))
+ continue;
+
+ ksft_print_msg("vsyscall map: %s", line);
+
+ if (start != (void *)0xffffffffff600000 ||
+ end != (void *)0xffffffffff601000) {
+ ksft_print_msg("address range is nonsense\n");
+ nerrs++;
}
- } else {
- printf("[FAIL]\tprocess_rm_readv() succeeded, but it should have failed in this configuration\n");
- return 1;
+
+ ksft_print_msg("vsyscall permissions are %c-%c\n", r, x);
+ vsyscall_map_r = (r == 'r');
+ vsyscall_map_x = (x == 'x');
+
+ found = true;
+ break;
}
-#endif
- return 0;
+ fclose(maps);
+
+ if (!found) {
+ ksft_print_msg("no vsyscall map in /proc/self/maps\n");
+ vsyscall_map_r = false;
+ vsyscall_map_x = false;
+ }
+
+ ksft_test_result(!nerrs, "vsyscall map\n");
}
-#ifdef __x86_64__
static volatile sig_atomic_t num_vsyscall_traps;
static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
@@ -521,15 +490,17 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
num_vsyscall_traps++;
}
-static int test_emulation(void)
+static void test_emulation(void)
{
time_t tmp;
bool is_native;
- if (!vsyscall_map_x)
- return 0;
+ if (!vsyscall_map_x) {
+ ksft_test_result_skip("vsyscall_map_x isn't set\n");
+ return;
+ }
- printf("[RUN]\tchecking that vsyscalls are emulated\n");
+ ksft_print_msg("checking that vsyscalls are emulated\n");
sethandler(SIGTRAP, sigtrap, 0);
set_eflags(get_eflags() | X86_EFLAGS_TF);
vtime(&tmp);
@@ -545,36 +516,35 @@ static int test_emulation(void)
*/
is_native = (num_vsyscall_traps > 1);
- printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n",
- (is_native ? "FAIL" : "OK"),
- (is_native ? "native" : "emulated"),
- (int)num_vsyscall_traps);
-
- return is_native;
+ ksft_test_result(!is_native, "vsyscalls are %s (%d instructions in vsyscall page)\n",
+ (is_native ? "native" : "emulated"), (int)num_vsyscall_traps);
}
#endif
int main(int argc, char **argv)
{
- int nerrs = 0;
+ int total_tests = TOTAL_TESTS;
- init_vdso();
- nerrs += init_vsys();
+ ksft_print_header();
+ ksft_set_plan(total_tests);
- nerrs += test_gtod();
- nerrs += test_time();
- nerrs += test_getcpu(0);
- nerrs += test_getcpu(1);
-
- sethandler(SIGSEGV, sigsegv, 0);
- nerrs += test_vsys_r();
- nerrs += test_vsys_x();
+ init_vdso();
+#ifdef __x86_64__
+ init_vsys();
+#endif
- nerrs += test_process_vm_readv();
+ test_gtod();
+ test_time();
+ test_getcpu(0);
+ test_getcpu(1);
#ifdef __x86_64__
- nerrs += test_emulation();
+ sethandler(SIGSEGV, sigsegv, 0);
+ test_vsys_r();
+ test_vsys_x();
+ test_process_vm_readv();
+ test_emulation();
#endif
- return nerrs ? 1 : 0;
+ ksft_finished();
}