diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-07 13:17:46 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-07 13:17:46 +0000 |
commit | 7f3a4257159dea8e7ef66d1a539dc6df708b8ed3 (patch) | |
tree | bcc69b5f4609f348fac49e2f59e210b29eaea783 /tools/testing | |
parent | Adding upstream version 6.9.12. (diff) | |
download | linux-7f3a4257159dea8e7ef66d1a539dc6df708b8ed3.tar.xz linux-7f3a4257159dea8e7ef66d1a539dc6df708b8ed3.zip |
Adding upstream version 6.10.3.upstream/6.10.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools/testing')
605 files changed, 31647 insertions, 10756 deletions
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 0d0ca63de8..eaf091a3d3 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -128,7 +128,7 @@ static struct { #define CXL_TEST_EVENT_CNT_MAX 15 /* Set a number of events to return at a time for simulation. */ -#define CXL_TEST_EVENT_CNT 3 +#define CXL_TEST_EVENT_RET_MAX 4 struct mock_event_log { u16 clear_idx; @@ -223,6 +223,12 @@ static void mes_add_event(struct mock_event_store *mes, log->nr_events++; } +/* + * Vary the number of events returned to simulate events occuring while the + * logs are being read. + */ +static int ret_limit = 0; + static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd) { struct cxl_get_event_payload *pl; @@ -234,14 +240,18 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd) if (cmd->size_in != sizeof(log_type)) return -EINVAL; - if (cmd->size_out < struct_size(pl, records, CXL_TEST_EVENT_CNT)) + ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX; + if (!ret_limit) + ret_limit = 1; + + if (cmd->size_out < struct_size(pl, records, ret_limit)) return -EINVAL; log_type = *((u8 *)cmd->payload_in); if (log_type >= CXL_EVENT_TYPE_MAX) return -EINVAL; - memset(cmd->payload_out, 0, cmd->size_out); + memset(cmd->payload_out, 0, struct_size(pl, records, 0)); log = event_find_log(dev, log_type); if (!log || event_log_empty(log)) @@ -249,7 +259,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd) pl = cmd->payload_out; - for (i = 0; i < CXL_TEST_EVENT_CNT && !event_log_empty(log); i++) { + for (i = 0; i < ret_limit && !event_log_empty(log); i++) { memcpy(&pl->records[i], event_get_current(log), sizeof(pl->records[i])); pl->records[i].event.generic.hdr.handle = @@ -257,6 +267,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd) log->cur_idx++; } + cmd->size_out = struct_size(pl, records, i); pl->record_count = cpu_to_le16(i); if (!event_log_empty(log)) pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS; diff --git a/tools/testing/kunit/qemu_configs/riscv.py b/tools/testing/kunit/qemu_configs/riscv.py index 12a1d52597..c87758030f 100644 --- a/tools/testing/kunit/qemu_configs/riscv.py +++ b/tools/testing/kunit/qemu_configs/riscv.py @@ -13,7 +13,7 @@ if not os.path.isfile(OPENSBI_PATH): QEMU_ARCH = QemuArchParams(linux_arch='riscv', kconfig=''' -CONFIG_SOC_VIRT=y +CONFIG_ARCH_VIRT=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c index b8419f4603..b438f3d053 100644 --- a/tools/testing/nvdimm/test/ndtest.c +++ b/tools/testing/nvdimm/test/ndtest.c @@ -13,6 +13,8 @@ #include <nd-core.h> #include <linux/printk.h> #include <linux/seq_buf.h> +#include <linux/papr_scm.h> +#include <uapi/linux/papr_pdsm.h> #include "../watermark.h" #include "nfit_test.h" @@ -830,12 +832,11 @@ static int ndtest_bus_register(struct ndtest_priv *p) return 0; } -static int ndtest_remove(struct platform_device *pdev) +static void ndtest_remove(struct platform_device *pdev) { struct ndtest_priv *p = to_ndtest_priv(&pdev->dev); nvdimm_bus_unregister(p->bus); - return 0; } static int ndtest_probe(struct platform_device *pdev) @@ -882,7 +883,7 @@ static const struct platform_device_id ndtest_id[] = { static struct platform_driver ndtest_driver = { .probe = ndtest_probe, - .remove = ndtest_remove, + .remove_new = ndtest_remove, .driver = { .name = KBUILD_MODNAME, }, diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h index 2c54c9cbb9..8f27ad6f73 100644 --- a/tools/testing/nvdimm/test/ndtest.h +++ b/tools/testing/nvdimm/test/ndtest.h @@ -5,37 +5,6 @@ #include <linux/platform_device.h> #include <linux/libnvdimm.h> -/* SCM device is unable to persist memory contents */ -#define PAPR_PMEM_UNARMED (1ULL << (63 - 0)) -/* SCM device failed to persist memory contents */ -#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1)) -/* SCM device contents are not persisted from previous IPL */ -#define PAPR_PMEM_EMPTY (1ULL << (63 - 3)) -#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4)) -/* SCM device will be garded off next IPL due to failure */ -#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5)) -/* SCM contents cannot persist due to current platform health status */ -#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6)) - -/* Bits status indicators for health bitmap indicating unarmed dimm */ -#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \ - PAPR_PMEM_HEALTH_UNHEALTHY) - -#define PAPR_PMEM_SAVE_FAILED (1ULL << (63 - 10)) - -/* Bits status indicators for health bitmap indicating unflushed dimm */ -#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY) - -/* Bits status indicators for health bitmap indicating unrestored dimm */ -#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY) - -/* Bit status indicators for smart event notification */ -#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \ - PAPR_PMEM_HEALTH_FATAL | \ - PAPR_PMEM_HEALTH_UNHEALTHY) - -#define PAPR_PMEM_SAVE_MASK (PAPR_PMEM_SAVE_FAILED) - struct ndtest_config; struct ndtest_priv { diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index e150483365..9039f3709a 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -17,8 +17,10 @@ TARGETS += devices TARGETS += dmabuf-heaps TARGETS += drivers/dma-buf TARGETS += drivers/s390x/uvdevice +TARGETS += drivers/net TARGETS += drivers/net/bonding TARGETS += drivers/net/team +TARGETS += drivers/net/virtio_net TARGETS += dt TARGETS += efivarfs TARGETS += exec @@ -63,7 +65,7 @@ TARGETS += net/hsr TARGETS += net/mptcp TARGETS += net/openvswitch TARGETS += net/tcp_ao -TARGETS += netfilter +TARGETS += net/netfilter TARGETS += nsfs TARGETS += perf_events TARGETS += pidfd @@ -116,6 +118,13 @@ TARGETS += zram TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug +# Networking tests want the net/lib target, include it automatically +ifneq ($(filter net drivers/net drivers/net/hw,$(TARGETS)),) +ifeq ($(filter net/lib,$(TARGETS)),) + INSTALL_DEP_TARGETS := net/lib +endif +endif + # User can optionally provide a TARGETS skiplist. By default we skip # BPF since it has cutting edge build time dependencies which require # more effort to install. @@ -245,7 +254,7 @@ ifdef INSTALL_PATH install -m 744 run_kselftest.sh $(INSTALL_PATH)/ rm -f $(TEST_LIST) @ret=1; \ - for TARGET in $(TARGETS); do \ + for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install \ INSTALL_PATH=$(INSTALL_PATH)/$$TARGET \ diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c index 89e3656a04..e2b3a5810f 100644 --- a/tools/testing/selftests/alsa/conf.c +++ b/tools/testing/selftests/alsa/conf.c @@ -105,7 +105,7 @@ static struct card_cfg_data *conf_data_by_card(int card, bool msg) return NULL; } -static int dump_config_tree(snd_config_t *top) +static void dump_config_tree(snd_config_t *top) { snd_output_t *out; int err; diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c index 02ee3a91b7..285c47dd42 100644 --- a/tools/testing/selftests/arm64/abi/tpidr2.c +++ b/tools/testing/selftests/arm64/abi/tpidr2.c @@ -262,7 +262,7 @@ static int write_clone_read(void) int main(int argc, char **argv) { - int ret, i; + int ret; putstr("TAP version 13\n"); putstr("1.."); diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index f1aebabfb0..5025401323 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -17,7 +17,6 @@ test_dev_cgroup test_verifier_log feature test_sock -test_sock_addr urandom_read test_sockmap test_lirc_mode2_user diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index d8ade15e27..0445ac38bc 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -10,5 +10,3 @@ fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_mu fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95 missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95) -verifier_arena # JIT does not support arena -arena_htab # JIT does not support arena diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index f4a2f66a68..c34adf39ee 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -6,3 +6,4 @@ stacktrace_build_id # compare_map_keys stackid_hmap vs. sta verifier_iterating_callbacks verifier_arena # JIT does not support arena arena_htab # JIT does not support arena +arena_atomics diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 3b9eb40d63..dd49c1d23a 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -53,6 +53,7 @@ progs/syscall.c-CFLAGS := -fno-strict-aliasing progs/test_pkt_md_access.c-CFLAGS := -fno-strict-aliasing progs/test_sk_lookup.c-CFLAGS := -fno-strict-aliasing progs/timer_crash.c-CFLAGS := -fno-strict-aliasing +progs/test_global_func9.c-CFLAGS := -fno-strict-aliasing ifneq ($(LLVM),) # Silence some warnings when compiled with clang @@ -81,11 +82,24 @@ TEST_INST_SUBDIRS += bpf_gcc # The following tests contain C code that, although technically legal, # triggers GCC warnings that cannot be disabled: declaration of # anonymous struct types in function parameter lists. -progs/btf_dump_test_case_bitfields.c-CFLAGS := -Wno-error -progs/btf_dump_test_case_namespacing.c-CFLAGS := -Wno-error -progs/btf_dump_test_case_packing.c-CFLAGS := -Wno-error -progs/btf_dump_test_case_padding.c-CFLAGS := -Wno-error -progs/btf_dump_test_case_syntax.c-CFLAGS := -Wno-error +progs/btf_dump_test_case_bitfields.c-bpf_gcc-CFLAGS := -Wno-error +progs/btf_dump_test_case_namespacing.c-bpf_gcc-CFLAGS := -Wno-error +progs/btf_dump_test_case_packing.c-bpf_gcc-CFLAGS := -Wno-error +progs/btf_dump_test_case_padding.c-bpf_gcc-CFLAGS := -Wno-error +progs/btf_dump_test_case_syntax.c-bpf_gcc-CFLAGS := -Wno-error + +# The following tests do type-punning, via the __imm_insn macro, from +# `struct bpf_insn' to long and then uses the value. This triggers an +# "is used uninitialized" warning in GCC due to strict-aliasing +# rules. +progs/verifier_ref_tracking.c-bpf_gcc-CFLAGS := -fno-strict-aliasing +progs/verifier_unpriv.c-bpf_gcc-CFLAGS := -fno-strict-aliasing +progs/verifier_cgroup_storage.c-bpf_gcc-CFLAGS := -fno-strict-aliasing +progs/verifier_ld_ind.c-bpf_gcc-CFLAGS := -fno-strict-aliasing +progs/verifier_map_ret_val.c-bpf_gcc-CFLAGS := -fno-strict-aliasing +progs/verifier_spill_fill.c-bpf_gcc-CFLAGS := -fno-strict-aliasing +progs/verifier_subprog_precision.c-bpf_gcc-CFLAGS := -fno-strict-aliasing +progs/verifier_uninit.c-bpf_gcc-CFLAGS := -fno-strict-aliasing endif ifneq ($(CLANG_CPUV4),) @@ -102,8 +116,6 @@ TEST_PROGS := test_kmod.sh \ test_xdp_redirect_multi.sh \ test_xdp_meta.sh \ test_xdp_veth.sh \ - test_offload.py \ - test_sock_addr.sh \ test_tunnel.sh \ test_lwt_seg6local.sh \ test_lirc_mode2.sh \ @@ -128,7 +140,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ test_xdp_vlan.sh test_bpftool.py # Compile but not part of 'make run_tests' -TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ +TEST_GEN_PROGS_EXTENDED = test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \ @@ -136,18 +148,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi -# Emit succinct information message describing current building step -# $1 - generic step name (e.g., CC, LINK, etc); -# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor]; -# $3 - target (assumed to be file); only file name will be emitted; -# $4 - optional extra arg, emitted as-is, if provided. -ifeq ($(V),1) -Q = -msg = -else -Q = @ -msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))"; -MAKEFLAGS += --no-print-directory +ifneq ($(V),1) submake_extras := feature_display=0 endif @@ -274,7 +275,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \ BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ - BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf \ + BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/ \ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \ EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \ EXTRA_LDFLAGS='$(SAN_LDFLAGS)' && \ @@ -290,11 +291,11 @@ UNPRIV_HELPERS := $(OUTPUT)/unpriv_helpers.o TRACE_HELPERS := $(OUTPUT)/trace_helpers.o JSON_WRITER := $(OUTPUT)/json_writer.o CAP_HELPERS := $(OUTPUT)/cap_helpers.o +NETWORK_HELPERS := $(OUTPUT)/network_helpers.o $(OUTPUT)/test_dev_cgroup: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_skb_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_sock: $(CGROUP_HELPERS) $(TESTING_HELPERS) -$(OUTPUT)/test_sock_addr: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS) $(OUTPUT)/get_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) @@ -308,6 +309,7 @@ $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS) $(OUTPUT)/test_maps: $(TESTING_HELPERS) $(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) $(UNPRIV_HELPERS) $(OUTPUT)/xsk.o: $(BPFOBJ) +$(OUTPUT)/test_tcp_check_syncookie_user: $(NETWORK_HELPERS) BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ @@ -442,7 +444,7 @@ endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) - $(Q)$(BPF_GCC) $3 -O2 -c $1 -o $2 + $(Q)$(BPF_GCC) $3 -DBPF_NO_PRESERVE_ACCESS_INDEX -Wno-attributes -O2 -c $1 -o $2 endef SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c @@ -455,7 +457,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \ LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \ trace_printk.c trace_vprintk.c map_ptr_kern.c \ core_kern.c core_kern_overflow.c test_ringbuf.c \ - test_ringbuf_map_key.c + test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c # Generate both light skeleton and libbpf skeleton for these LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \ @@ -481,7 +483,7 @@ LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(ske # $eval()) and pass control to DEFINE_TEST_RUNNER_RULES. # Parameters: # $1 - test runner base binary name (e.g., test_progs) -# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, gcc-bpf, etc) +# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc) define DEFINE_TEST_RUNNER TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2 @@ -509,7 +511,7 @@ endef # Using TRUNNER_XXX variables, provided by callers of DEFINE_TEST_RUNNER and # set up by DEFINE_TEST_RUNNER itself, create test runner build rules with: # $1 - test runner base binary name (e.g., test_progs) -# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, gcc-bpf, etc) +# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc) define DEFINE_TEST_RUNNER_RULES ifeq ($($(TRUNNER_OUTPUT)-dir),) @@ -532,7 +534,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.bpf.o: \ | $(TRUNNER_OUTPUT) $$(BPFOBJ) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS) \ - $$($$<-CFLAGS)) + $$($$<-CFLAGS) \ + $$($$<-$2-CFLAGS)) $(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT) $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) @@ -658,7 +661,7 @@ $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32)) # Define test_progs-cpuv4 test runner. ifneq ($(CLANG_CPUV4),) TRUNNER_BPF_BUILD_RULE := CLANG_CPUV4_BPF_BUILD_RULE -TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS $(eval $(call DEFINE_TEST_RUNNER,test_progs,cpuv4)) endif @@ -695,7 +698,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) # Include find_bit.c to compile xskxceiver. EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c -$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT) +$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT) $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ @@ -729,6 +732,7 @@ $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tas $(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.skel.h $(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h $(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h +$(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -748,6 +752,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_bpf_hashmap_lookup.o \ $(OUTPUT)/bench_local_storage_create.o \ $(OUTPUT)/bench_htab_mem.o \ + $(OUTPUT)/bench_bpf_crypto.o \ # $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ @@ -759,7 +764,7 @@ $(OUTPUT)/veristat: $(OUTPUT)/veristat.o $(OUTPUT)/uprobe_multi: uprobe_multi.c $(call msg,BINARY,,$@) - $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@ + $(Q)$(CC) $(CFLAGS) -O0 $(LDFLAGS) $^ $(LDLIBS) -o $@ EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index b2b4c391eb..627b74ae04 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -280,6 +280,8 @@ extern struct argp bench_strncmp_argp; extern struct argp bench_hashmap_lookup_argp; extern struct argp bench_local_storage_create_argp; extern struct argp bench_htab_mem_argp; +extern struct argp bench_trigger_batch_argp; +extern struct argp bench_crypto_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, @@ -292,6 +294,8 @@ static const struct argp_child bench_parsers[] = { { &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 }, { &bench_local_storage_create_argp, 0, "local-storage-create benchmark", 0 }, { &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 }, + { &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 }, + { &bench_crypto_argp, 0, "bpf crypto benchmark", 0 }, {}, }; @@ -491,24 +495,31 @@ extern const struct bench bench_rename_kretprobe; extern const struct bench bench_rename_rawtp; extern const struct bench bench_rename_fentry; extern const struct bench bench_rename_fexit; -extern const struct bench bench_trig_base; -extern const struct bench bench_trig_tp; -extern const struct bench bench_trig_rawtp; + +/* pure counting benchmarks to establish theoretical lmits */ +extern const struct bench bench_trig_usermode_count; +extern const struct bench bench_trig_syscall_count; +extern const struct bench bench_trig_kernel_count; + +/* batched, staying mostly in-kernel benchmarks */ extern const struct bench bench_trig_kprobe; extern const struct bench bench_trig_kretprobe; extern const struct bench bench_trig_kprobe_multi; extern const struct bench bench_trig_kretprobe_multi; extern const struct bench bench_trig_fentry; extern const struct bench bench_trig_fexit; -extern const struct bench bench_trig_fentry_sleep; extern const struct bench bench_trig_fmodret; -extern const struct bench bench_trig_uprobe_base; +extern const struct bench bench_trig_tp; +extern const struct bench bench_trig_rawtp; + +/* uprobe/uretprobe benchmarks */ extern const struct bench bench_trig_uprobe_nop; extern const struct bench bench_trig_uretprobe_nop; extern const struct bench bench_trig_uprobe_push; extern const struct bench bench_trig_uretprobe_push; extern const struct bench bench_trig_uprobe_ret; extern const struct bench bench_trig_uretprobe_ret; + extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; extern const struct bench bench_pb_libbpf; @@ -529,6 +540,8 @@ extern const struct bench bench_local_storage_tasks_trace; extern const struct bench bench_bpf_hashmap_lookup; extern const struct bench bench_local_storage_create; extern const struct bench bench_htab_mem; +extern const struct bench bench_crypto_encrypt; +extern const struct bench bench_crypto_decrypt; static const struct bench *benchs[] = { &bench_count_global, @@ -539,24 +552,28 @@ static const struct bench *benchs[] = { &bench_rename_rawtp, &bench_rename_fentry, &bench_rename_fexit, - &bench_trig_base, - &bench_trig_tp, - &bench_trig_rawtp, + /* pure counting benchmarks for establishing theoretical limits */ + &bench_trig_usermode_count, + &bench_trig_kernel_count, + &bench_trig_syscall_count, + /* batched, staying mostly in-kernel triggers */ &bench_trig_kprobe, &bench_trig_kretprobe, &bench_trig_kprobe_multi, &bench_trig_kretprobe_multi, &bench_trig_fentry, &bench_trig_fexit, - &bench_trig_fentry_sleep, &bench_trig_fmodret, - &bench_trig_uprobe_base, + &bench_trig_tp, + &bench_trig_rawtp, + /* uprobes */ &bench_trig_uprobe_nop, &bench_trig_uretprobe_nop, &bench_trig_uprobe_push, &bench_trig_uretprobe_push, &bench_trig_uprobe_ret, &bench_trig_uretprobe_ret, + /* ringbuf/perfbuf benchmarks */ &bench_rb_libbpf, &bench_rb_custom, &bench_pb_libbpf, @@ -577,6 +594,8 @@ static const struct bench *benchs[] = { &bench_bpf_hashmap_lookup, &bench_local_storage_create, &bench_htab_mem, + &bench_crypto_encrypt, + &bench_crypto_decrypt, }; static void find_benchmark(void) diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c b/tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c new file mode 100644 index 0000000000..2845edaba8 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <argp.h> +#include "bench.h" +#include "crypto_bench.skel.h" + +#define MAX_CIPHER_LEN 32 +static char *input; +static struct crypto_ctx { + struct crypto_bench *skel; + int pfd; +} ctx; + +static struct crypto_args { + u32 crypto_len; + char *crypto_cipher; +} args = { + .crypto_len = 16, + .crypto_cipher = "ecb(aes)", +}; + +enum { + ARG_CRYPTO_LEN = 5000, + ARG_CRYPTO_CIPHER = 5001, +}; + +static const struct argp_option opts[] = { + { "crypto-len", ARG_CRYPTO_LEN, "CRYPTO_LEN", 0, + "Set the length of crypto buffer" }, + { "crypto-cipher", ARG_CRYPTO_CIPHER, "CRYPTO_CIPHER", 0, + "Set the cipher to use (default:ecb(aes))" }, + {}, +}; + +static error_t crypto_parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_CRYPTO_LEN: + args.crypto_len = strtoul(arg, NULL, 10); + if (!args.crypto_len || + args.crypto_len > sizeof(ctx.skel->bss->dst)) { + fprintf(stderr, "Invalid crypto buffer len (limit %zu)\n", + sizeof(ctx.skel->bss->dst)); + argp_usage(state); + } + break; + case ARG_CRYPTO_CIPHER: + args.crypto_cipher = strdup(arg); + if (!strlen(args.crypto_cipher) || + strlen(args.crypto_cipher) > MAX_CIPHER_LEN) { + fprintf(stderr, "Invalid crypto cipher len (limit %d)\n", + MAX_CIPHER_LEN); + argp_usage(state); + } + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_crypto_argp = { + .options = opts, + .parser = crypto_parse_arg, +}; + +static void crypto_validate(void) +{ + if (env.consumer_cnt != 0) { + fprintf(stderr, "bpf crypto benchmark doesn't support consumer!\n"); + exit(1); + } +} + +static void crypto_setup(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + + int err, pfd; + size_t i, sz; + + sz = args.crypto_len; + if (!sz || sz > sizeof(ctx.skel->bss->dst)) { + fprintf(stderr, "invalid encrypt buffer size (source %zu, target %zu)\n", + sz, sizeof(ctx.skel->bss->dst)); + exit(1); + } + + setup_libbpf(); + + ctx.skel = crypto_bench__open(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + snprintf(ctx.skel->bss->cipher, 128, "%s", args.crypto_cipher); + memcpy(ctx.skel->bss->key, "12345678testtest", 16); + ctx.skel->bss->key_len = 16; + ctx.skel->bss->authsize = 0; + + srandom(time(NULL)); + input = malloc(sz); + for (i = 0; i < sz - 1; i++) + input[i] = '1' + random() % 9; + input[sz - 1] = '\0'; + + ctx.skel->rodata->len = args.crypto_len; + + err = crypto_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "failed to load skeleton\n"); + crypto_bench__destroy(ctx.skel); + exit(1); + } + + pfd = bpf_program__fd(ctx.skel->progs.crypto_setup); + if (pfd < 0) { + fprintf(stderr, "failed to get fd for setup prog\n"); + crypto_bench__destroy(ctx.skel); + exit(1); + } + + err = bpf_prog_test_run_opts(pfd, &opts); + if (err || ctx.skel->bss->status) { + fprintf(stderr, "failed to run setup prog: err %d, status %d\n", + err, ctx.skel->bss->status); + crypto_bench__destroy(ctx.skel); + exit(1); + } +} + +static void crypto_encrypt_setup(void) +{ + crypto_setup(); + ctx.pfd = bpf_program__fd(ctx.skel->progs.crypto_encrypt); +} + +static void crypto_decrypt_setup(void) +{ + crypto_setup(); + ctx.pfd = bpf_program__fd(ctx.skel->progs.crypto_decrypt); +} + +static void crypto_measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); +} + +static void *crypto_producer(void *unused) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .repeat = 64, + .data_in = input, + .data_size_in = args.crypto_len, + ); + + while (true) + (void)bpf_prog_test_run_opts(ctx.pfd, &opts); + return NULL; +} + +const struct bench bench_crypto_encrypt = { + .name = "crypto-encrypt", + .argp = &bench_crypto_argp, + .validate = crypto_validate, + .setup = crypto_encrypt_setup, + .producer_thread = crypto_producer, + .measure = crypto_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + +const struct bench bench_crypto_decrypt = { + .name = "crypto-decrypt", + .argp = &bench_crypto_argp, + .validate = crypto_validate, + .setup = crypto_decrypt_setup, + .producer_thread = crypto_producer, + .measure = crypto_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c index b36de42ee4..e2ff8ea1cb 100644 --- a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c @@ -186,7 +186,7 @@ static void *task_producer(void *input) for (i = 0; i < batch_sz; i++) { if (!pthd_results[i]) - pthread_join(pthds[i], NULL);; + pthread_join(pthds[i], NULL); } } diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index ace0d1011a..4b05539f16 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -1,15 +1,95 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ +#define _GNU_SOURCE +#include <argp.h> +#include <unistd.h> +#include <stdint.h> #include "bench.h" #include "trigger_bench.skel.h" #include "trace_helpers.h" +#define MAX_TRIG_BATCH_ITERS 1000 + +static struct { + __u32 batch_iters; +} args = { + .batch_iters = 100, +}; + +enum { + ARG_TRIG_BATCH_ITERS = 7000, +}; + +static const struct argp_option opts[] = { + { "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0, + "Number of in-kernel iterations per one driver test run"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_TRIG_BATCH_ITERS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) { + fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n", + 1, MAX_TRIG_BATCH_ITERS); + argp_usage(state); + } + args.batch_iters = ret; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_trigger_batch_argp = { + .options = opts, + .parser = parse_arg, +}; + +/* adjust slot shift in inc_hits() if changing */ +#define MAX_BUCKETS 256 + +#pragma GCC diagnostic ignored "-Wattributes" + /* BPF triggering benchmarks */ static struct trigger_ctx { struct trigger_bench *skel; + bool usermode_counters; + int driver_prog_fd; } ctx; -static struct counter base_hits; +static struct counter base_hits[MAX_BUCKETS]; + +static __always_inline void inc_counter(struct counter *counters) +{ + static __thread int tid = 0; + unsigned slot; + + if (unlikely(tid == 0)) + tid = syscall(SYS_gettid); + + /* multiplicative hashing, it's fast */ + slot = 2654435769U * tid; + slot >>= 24; + + atomic_inc(&base_hits[slot].value); /* use highest byte as an index */ +} + +static long sum_and_reset_counters(struct counter *counters) +{ + int i; + long sum = 0; + + for (i = 0; i < MAX_BUCKETS; i++) + sum += atomic_swap(&counters[i].value, 0); + return sum; +} static void trigger_validate(void) { @@ -19,41 +99,63 @@ static void trigger_validate(void) } } -static void *trigger_base_producer(void *input) +static void *trigger_producer(void *input) { - while (true) { - (void)syscall(__NR_getpgid); - atomic_inc(&base_hits.value); + if (ctx.usermode_counters) { + while (true) { + (void)syscall(__NR_getpgid); + inc_counter(base_hits); + } + } else { + while (true) + (void)syscall(__NR_getpgid); } return NULL; } -static void trigger_base_measure(struct bench_res *res) +static void *trigger_producer_batch(void *input) { - res->hits = atomic_swap(&base_hits.value, 0); -} + int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver); -static void *trigger_producer(void *input) -{ while (true) - (void)syscall(__NR_getpgid); + bpf_prog_test_run_opts(fd, NULL); + return NULL; } static void trigger_measure(struct bench_res *res) { - res->hits = atomic_swap(&ctx.skel->bss->hits, 0); + if (ctx.usermode_counters) + res->hits = sum_and_reset_counters(base_hits); + else + res->hits = sum_and_reset_counters(ctx.skel->bss->hits); } static void setup_ctx(void) { setup_libbpf(); - ctx.skel = trigger_bench__open_and_load(); + ctx.skel = trigger_bench__open(); if (!ctx.skel) { fprintf(stderr, "failed to open skeleton\n"); exit(1); } + + /* default "driver" BPF program */ + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true); + + ctx.skel->rodata->batch_iters = args.batch_iters; +} + +static void load_ctx(void) +{ + int err; + + err = trigger_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } } static void attach_bpf(struct bpf_program *prog) @@ -67,64 +169,104 @@ static void attach_bpf(struct bpf_program *prog) } } -static void trigger_tp_setup(void) +static void trigger_syscall_count_setup(void) { - setup_ctx(); - attach_bpf(ctx.skel->progs.bench_trigger_tp); + ctx.usermode_counters = true; } -static void trigger_rawtp_setup(void) +/* Batched, staying mostly in-kernel triggering setups */ +static void trigger_kernel_count_setup(void) { setup_ctx(); - attach_bpf(ctx.skel->progs.bench_trigger_raw_tp); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_count, true); + load_ctx(); + /* override driver program */ + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count); } static void trigger_kprobe_setup(void) { setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true); + load_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_kprobe); } static void trigger_kretprobe_setup(void) { setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true); + load_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_kretprobe); } static void trigger_kprobe_multi_setup(void) { setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true); + load_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi); } static void trigger_kretprobe_multi_setup(void) { setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true); + load_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi); } static void trigger_fentry_setup(void) { setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true); + load_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_fentry); } static void trigger_fexit_setup(void) { setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true); + load_ctx(); attach_bpf(ctx.skel->progs.bench_trigger_fexit); } -static void trigger_fentry_sleep_setup(void) +static void trigger_fmodret_setup(void) { setup_ctx(); - attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true); + load_ctx(); + /* override driver program */ + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); + attach_bpf(ctx.skel->progs.bench_trigger_fmodret); } -static void trigger_fmodret_setup(void) +static void trigger_tp_setup(void) { setup_ctx(); - attach_bpf(ctx.skel->progs.bench_trigger_fmodret); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true); + load_ctx(); + /* override driver program */ + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); + attach_bpf(ctx.skel->progs.bench_trigger_tp); +} + +static void trigger_rawtp_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true); + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true); + load_ctx(); + /* override driver program */ + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc); + attach_bpf(ctx.skel->progs.bench_trigger_rawtp); } /* make sure call is not inlined and not avoided by compiler, so __weak and @@ -137,7 +279,7 @@ static void trigger_fmodret_setup(void) * GCC doesn't generate stack setup preample for these functions due to them * having no input arguments and doing nothing in the body. */ -__weak void uprobe_target_nop(void) +__nocf_check __weak void uprobe_target_nop(void) { asm volatile ("nop"); } @@ -146,7 +288,7 @@ __weak void opaque_noop_func(void) { } -__weak int uprobe_target_push(void) +__nocf_check __weak int uprobe_target_push(void) { /* overhead of function call is negligible compared to uprobe * triggering, so this shouldn't affect benchmark results much @@ -155,16 +297,16 @@ __weak int uprobe_target_push(void) return 1; } -__weak void uprobe_target_ret(void) +__nocf_check __weak void uprobe_target_ret(void) { asm volatile (""); } -static void *uprobe_base_producer(void *input) +static void *uprobe_producer_count(void *input) { while (true) { uprobe_target_nop(); - atomic_inc(&base_hits.value); + inc_counter(base_hits); } return NULL; } @@ -194,15 +336,24 @@ static void usetup(bool use_retprobe, void *target_addr) { size_t uprobe_offset; struct bpf_link *link; + int err; setup_libbpf(); - ctx.skel = trigger_bench__open_and_load(); + ctx.skel = trigger_bench__open(); if (!ctx.skel) { fprintf(stderr, "failed to open skeleton\n"); exit(1); } + bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true); + + err = trigger_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "failed to load skeleton\n"); + exit(1); + } + uprobe_offset = get_uprobe_offset(target_addr); link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe, use_retprobe, @@ -216,204 +367,90 @@ static void usetup(bool use_retprobe, void *target_addr) ctx.skel->links.bench_trigger_uprobe = link; } -static void uprobe_setup_nop(void) +static void usermode_count_setup(void) +{ + ctx.usermode_counters = true; +} + +static void uprobe_nop_setup(void) { usetup(false, &uprobe_target_nop); } -static void uretprobe_setup_nop(void) +static void uretprobe_nop_setup(void) { usetup(true, &uprobe_target_nop); } -static void uprobe_setup_push(void) +static void uprobe_push_setup(void) { usetup(false, &uprobe_target_push); } -static void uretprobe_setup_push(void) +static void uretprobe_push_setup(void) { usetup(true, &uprobe_target_push); } -static void uprobe_setup_ret(void) +static void uprobe_ret_setup(void) { usetup(false, &uprobe_target_ret); } -static void uretprobe_setup_ret(void) +static void uretprobe_ret_setup(void) { usetup(true, &uprobe_target_ret); } -const struct bench bench_trig_base = { - .name = "trig-base", +const struct bench bench_trig_syscall_count = { + .name = "trig-syscall-count", .validate = trigger_validate, - .producer_thread = trigger_base_producer, - .measure = trigger_base_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_tp = { - .name = "trig-tp", - .validate = trigger_validate, - .setup = trigger_tp_setup, - .producer_thread = trigger_producer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_rawtp = { - .name = "trig-rawtp", - .validate = trigger_validate, - .setup = trigger_rawtp_setup, - .producer_thread = trigger_producer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_kprobe = { - .name = "trig-kprobe", - .validate = trigger_validate, - .setup = trigger_kprobe_setup, + .setup = trigger_syscall_count_setup, .producer_thread = trigger_producer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, }; -const struct bench bench_trig_kretprobe = { - .name = "trig-kretprobe", - .validate = trigger_validate, - .setup = trigger_kretprobe_setup, - .producer_thread = trigger_producer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_kprobe_multi = { - .name = "trig-kprobe-multi", - .validate = trigger_validate, - .setup = trigger_kprobe_multi_setup, - .producer_thread = trigger_producer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_kretprobe_multi = { - .name = "trig-kretprobe-multi", - .validate = trigger_validate, - .setup = trigger_kretprobe_multi_setup, - .producer_thread = trigger_producer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_fentry = { - .name = "trig-fentry", - .validate = trigger_validate, - .setup = trigger_fentry_setup, - .producer_thread = trigger_producer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_fexit = { - .name = "trig-fexit", - .validate = trigger_validate, - .setup = trigger_fexit_setup, - .producer_thread = trigger_producer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_fentry_sleep = { - .name = "trig-fentry-sleep", - .validate = trigger_validate, - .setup = trigger_fentry_sleep_setup, - .producer_thread = trigger_producer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_fmodret = { - .name = "trig-fmodret", - .validate = trigger_validate, - .setup = trigger_fmodret_setup, - .producer_thread = trigger_producer, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_uprobe_base = { - .name = "trig-uprobe-base", - .setup = NULL, /* no uprobe/uretprobe is attached */ - .producer_thread = uprobe_base_producer, - .measure = trigger_base_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_uprobe_nop = { - .name = "trig-uprobe-nop", - .setup = uprobe_setup_nop, - .producer_thread = uprobe_producer_nop, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_uretprobe_nop = { - .name = "trig-uretprobe-nop", - .setup = uretprobe_setup_nop, - .producer_thread = uprobe_producer_nop, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_uprobe_push = { - .name = "trig-uprobe-push", - .setup = uprobe_setup_push, - .producer_thread = uprobe_producer_push, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_uretprobe_push = { - .name = "trig-uretprobe-push", - .setup = uretprobe_setup_push, - .producer_thread = uprobe_producer_push, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_uprobe_ret = { - .name = "trig-uprobe-ret", - .setup = uprobe_setup_ret, - .producer_thread = uprobe_producer_ret, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; - -const struct bench bench_trig_uretprobe_ret = { - .name = "trig-uretprobe-ret", - .setup = uretprobe_setup_ret, - .producer_thread = uprobe_producer_ret, - .measure = trigger_measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; +/* batched (staying mostly in kernel) kprobe/fentry benchmarks */ +#define BENCH_TRIG_KERNEL(KIND, NAME) \ +const struct bench bench_trig_##KIND = { \ + .name = "trig-" NAME, \ + .setup = trigger_##KIND##_setup, \ + .producer_thread = trigger_producer_batch, \ + .measure = trigger_measure, \ + .report_progress = hits_drops_report_progress, \ + .report_final = hits_drops_report_final, \ + .argp = &bench_trigger_batch_argp, \ +} + +BENCH_TRIG_KERNEL(kernel_count, "kernel-count"); +BENCH_TRIG_KERNEL(kprobe, "kprobe"); +BENCH_TRIG_KERNEL(kretprobe, "kretprobe"); +BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi"); +BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi"); +BENCH_TRIG_KERNEL(fentry, "fentry"); +BENCH_TRIG_KERNEL(fexit, "fexit"); +BENCH_TRIG_KERNEL(fmodret, "fmodret"); +BENCH_TRIG_KERNEL(tp, "tp"); +BENCH_TRIG_KERNEL(rawtp, "rawtp"); + +/* uprobe benchmarks */ +#define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \ +const struct bench bench_trig_##KIND = { \ + .name = "trig-" NAME, \ + .validate = trigger_validate, \ + .setup = KIND##_setup, \ + .producer_thread = uprobe_producer_##PRODUCER, \ + .measure = trigger_measure, \ + .report_progress = hits_drops_report_progress, \ + .report_final = hits_drops_report_final, \ +} + +BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count"); +BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop"); +BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push"); +BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret"); +BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop"); +BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push"); +BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret"); diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh index 78e83f2432..a690f5a68b 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh @@ -2,8 +2,22 @@ set -eufo pipefail -for i in base tp rawtp kprobe fentry fmodret -do - summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) - printf "%-10s: %s\n" $i "$summary" +def_tests=( \ + usermode-count kernel-count syscall-count \ + fentry fexit fmodret \ + rawtp tp \ + kprobe kprobe-multi \ + kretprobe kretprobe-multi \ +) + +tests=("$@") +if [ ${#tests[@]} -eq 0 ]; then + tests=("${def_tests[@]}") +fi + +p=${PROD_CNT:-1} + +for t in "${tests[@]}"; do + summary=$(sudo ./bench -w2 -d5 -a -p$p trig-$t | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) + printf "%-15s: %s\n" $t "$summary" done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh index 9bdcc74e03..af169f831f 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh @@ -2,7 +2,7 @@ set -eufo pipefail -for i in base {uprobe,uretprobe}-{nop,push,ret} +for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret} do summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) printf "%-15s: %s\n" $i "$summary" diff --git a/tools/testing/selftests/bpf/bpf_arena_list.h b/tools/testing/selftests/bpf/bpf_arena_list.h index b99b9f408e..85dbc3ea4d 100644 --- a/tools/testing/selftests/bpf/bpf_arena_list.h +++ b/tools/testing/selftests/bpf/bpf_arena_list.h @@ -29,6 +29,7 @@ static inline void *bpf_iter_num_new(struct bpf_iter_num *it, int i, int j) { re static inline void bpf_iter_num_destroy(struct bpf_iter_num *it) {} static inline bool bpf_iter_num_next(struct bpf_iter_num *it) { return true; } #define cond_break ({}) +#define can_loop true #endif /* Safely walk link list elements. Deletion of elements is allowed. */ @@ -36,8 +37,7 @@ static inline bool bpf_iter_num_next(struct bpf_iter_num *it) { return true; } for (void * ___tmp = (pos = list_entry_safe((head)->first, \ typeof(*(pos)), member), \ (void *)0); \ - pos && ({ ___tmp = (void *)pos->member.next; 1; }); \ - cond_break, \ + pos && ({ ___tmp = (void *)pos->member.next; 1; }) && can_loop; \ pos = list_entry_safe((void __arena *)___tmp, typeof(*(pos)), member)) static inline void list_add_head(arena_list_node_t *n, arena_list_head_t *h) diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index a5b9df38c1..3d9e4b8c6b 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -326,9 +326,48 @@ l_true: \ }) #endif +/* + * Note that cond_break can only be portably used in the body of a breakable + * construct, whereas can_loop can be used anywhere. + */ +#ifdef __BPF_FEATURE_MAY_GOTO +#define can_loop \ + ({ __label__ l_break, l_continue; \ + bool ret = true; \ + asm volatile goto("may_goto %l[l_break]" \ + :::: l_break); \ + goto l_continue; \ + l_break: ret = false; \ + l_continue:; \ + ret; \ + }) + +#define cond_break \ + ({ __label__ l_break, l_continue; \ + asm volatile goto("may_goto %l[l_break]" \ + :::: l_break); \ + goto l_continue; \ + l_break: break; \ + l_continue:; \ + }) +#else +#define can_loop \ + ({ __label__ l_break, l_continue; \ + bool ret = true; \ + asm volatile goto("1:.byte 0xe5; \ + .byte 0; \ + .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \ + .short 0" \ + :::: l_break); \ + goto l_continue; \ + l_break: ret = false; \ + l_continue:; \ + ret; \ + }) + #define cond_break \ ({ __label__ l_break, l_continue; \ - asm volatile goto("1:.byte 0xe5; \ + asm volatile goto("1:.byte 0xe5; \ .byte 0; \ .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \ .short 0" \ @@ -337,6 +376,7 @@ l_true: \ l_break: break; \ l_continue:; \ }) +#endif #ifndef bpf_nop_mov #define bpf_nop_mov(var) \ @@ -386,6 +426,28 @@ l_true: \ , [as]"i"((dst_as << 16) | src_as)); #endif +void bpf_preempt_disable(void) __weak __ksym; +void bpf_preempt_enable(void) __weak __ksym; + +typedef struct { +} __bpf_preempt_t; + +static inline __bpf_preempt_t __bpf_preempt_constructor(void) +{ + __bpf_preempt_t ret = {}; + + bpf_preempt_disable(); + return ret; +} +static inline void __bpf_preempt_destructor(__bpf_preempt_t *t) +{ + bpf_preempt_enable(); +} +#define bpf_guard_preempt() \ + __bpf_preempt_t ___bpf_apply(preempt, __COUNTER__) \ + __attribute__((__unused__, __cleanup__(__bpf_preempt_destructor))) = \ + __bpf_preempt_constructor() + /* Description * Assert that a conditional expression is true. * Returns @@ -459,4 +521,11 @@ extern int bpf_iter_css_new(struct bpf_iter_css *it, extern struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym; extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym; +extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym; +extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym; +extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, + int (callback_fn)(void *map, int *key, struct bpf_wq *wq), + unsigned int flags__k, void *aux__ign) __ksym; +#define bpf_wq_set_callback(timer, cb, flags) \ + bpf_wq_set_callback_impl(timer, cb, flags, NULL) #endif diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 14ebe7d9e1..3b6675ab40 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -75,4 +75,7 @@ extern void bpf_key_put(struct bpf_key *key) __ksym; extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, struct bpf_dynptr *sig_ptr, struct bpf_key *trusted_keyring) __ksym; + +extern bool bpf_session_is_return(void) __ksym __weak; +extern __u64 *bpf_session_cookie(void) __ksym __weak; #endif diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h deleted file mode 100644 index 82a7c9de95..0000000000 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ /dev/null @@ -1,241 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __BPF_TCP_HELPERS_H -#define __BPF_TCP_HELPERS_H - -#include <stdbool.h> -#include <linux/types.h> -#include <bpf/bpf_helpers.h> -#include <bpf/bpf_core_read.h> -#include <bpf/bpf_tracing.h> - -#define BPF_STRUCT_OPS(name, args...) \ -SEC("struct_ops/"#name) \ -BPF_PROG(name, args) - -#ifndef SOL_TCP -#define SOL_TCP 6 -#endif - -#ifndef TCP_CA_NAME_MAX -#define TCP_CA_NAME_MAX 16 -#endif - -#define tcp_jiffies32 ((__u32)bpf_jiffies64()) - -struct sock_common { - unsigned char skc_state; - __u16 skc_num; -} __attribute__((preserve_access_index)); - -enum sk_pacing { - SK_PACING_NONE = 0, - SK_PACING_NEEDED = 1, - SK_PACING_FQ = 2, -}; - -struct sock { - struct sock_common __sk_common; -#define sk_state __sk_common.skc_state - unsigned long sk_pacing_rate; - __u32 sk_pacing_status; /* see enum sk_pacing */ -} __attribute__((preserve_access_index)); - -struct inet_sock { - struct sock sk; -} __attribute__((preserve_access_index)); - -struct inet_connection_sock { - struct inet_sock icsk_inet; - __u8 icsk_ca_state:6, - icsk_ca_setsockopt:1, - icsk_ca_dst_locked:1; - struct { - __u8 pending; - } icsk_ack; - __u64 icsk_ca_priv[104 / sizeof(__u64)]; -} __attribute__((preserve_access_index)); - -struct request_sock { - struct sock_common __req_common; -} __attribute__((preserve_access_index)); - -struct tcp_sock { - struct inet_connection_sock inet_conn; - - __u32 rcv_nxt; - __u32 snd_nxt; - __u32 snd_una; - __u32 window_clamp; - __u8 ecn_flags; - __u32 delivered; - __u32 delivered_ce; - __u32 snd_cwnd; - __u32 snd_cwnd_cnt; - __u32 snd_cwnd_clamp; - __u32 snd_ssthresh; - __u8 syn_data:1, /* SYN includes data */ - syn_fastopen:1, /* SYN includes Fast Open option */ - syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ - syn_fastopen_ch:1, /* Active TFO re-enabling probe */ - syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ - save_syn:1, /* Save headers of SYN packet */ - is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ - syn_smc:1; /* SYN includes SMC */ - __u32 max_packets_out; - __u32 lsndtime; - __u32 prior_cwnd; - __u64 tcp_mstamp; /* most recent packet received/sent */ - bool is_mptcp; -} __attribute__((preserve_access_index)); - -static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk) -{ - return (struct inet_connection_sock *)sk; -} - -static __always_inline void *inet_csk_ca(const struct sock *sk) -{ - return (void *)inet_csk(sk)->icsk_ca_priv; -} - -static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk) -{ - return (struct tcp_sock *)sk; -} - -static __always_inline bool before(__u32 seq1, __u32 seq2) -{ - return (__s32)(seq1-seq2) < 0; -} -#define after(seq2, seq1) before(seq1, seq2) - -#define TCP_ECN_OK 1 -#define TCP_ECN_QUEUE_CWR 2 -#define TCP_ECN_DEMAND_CWR 4 -#define TCP_ECN_SEEN 8 - -enum inet_csk_ack_state_t { - ICSK_ACK_SCHED = 1, - ICSK_ACK_TIMER = 2, - ICSK_ACK_PUSHED = 4, - ICSK_ACK_PUSHED2 = 8, - ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */ -}; - -enum tcp_ca_event { - CA_EVENT_TX_START = 0, - CA_EVENT_CWND_RESTART = 1, - CA_EVENT_COMPLETE_CWR = 2, - CA_EVENT_LOSS = 3, - CA_EVENT_ECN_NO_CE = 4, - CA_EVENT_ECN_IS_CE = 5, -}; - -struct ack_sample { - __u32 pkts_acked; - __s32 rtt_us; - __u32 in_flight; -} __attribute__((preserve_access_index)); - -struct rate_sample { - __u64 prior_mstamp; /* starting timestamp for interval */ - __u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ - __s32 delivered; /* number of packets delivered over interval */ - long interval_us; /* time for tp->delivered to incr "delivered" */ - __u32 snd_interval_us; /* snd interval for delivered packets */ - __u32 rcv_interval_us; /* rcv interval for delivered packets */ - long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ - int losses; /* number of packets marked lost upon ACK */ - __u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ - __u32 prior_in_flight; /* in flight before this ACK */ - bool is_app_limited; /* is sample from packet with bubble in pipe? */ - bool is_retrans; /* is sample from retransmission? */ - bool is_ack_delayed; /* is this (likely) a delayed ACK? */ -} __attribute__((preserve_access_index)); - -#define TCP_CA_NAME_MAX 16 -#define TCP_CONG_NEEDS_ECN 0x2 - -struct tcp_congestion_ops { - char name[TCP_CA_NAME_MAX]; - __u32 flags; - - /* initialize private data (optional) */ - void (*init)(struct sock *sk); - /* cleanup private data (optional) */ - void (*release)(struct sock *sk); - - /* return slow start threshold (required) */ - __u32 (*ssthresh)(struct sock *sk); - /* do new cwnd calculation (required) */ - void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked); - /* call before changing ca_state (optional) */ - void (*set_state)(struct sock *sk, __u8 new_state); - /* call when cwnd event occurs (optional) */ - void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); - /* call when ack arrives (optional) */ - void (*in_ack_event)(struct sock *sk, __u32 flags); - /* new value of cwnd after loss (required) */ - __u32 (*undo_cwnd)(struct sock *sk); - /* hook for packet ack accounting (optional) */ - void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - /* override sysctl_tcp_min_tso_segs */ - __u32 (*min_tso_segs)(struct sock *sk); - /* returns the multiplier used in tcp_sndbuf_expand (optional) */ - __u32 (*sndbuf_expand)(struct sock *sk); - /* call when packets are delivered to update cwnd and pacing rate, - * after all the ca_state processing. (optional) - */ - void (*cong_control)(struct sock *sk, const struct rate_sample *rs); - void *owner; -}; - -#define min(a, b) ((a) < (b) ? (a) : (b)) -#define max(a, b) ((a) > (b) ? (a) : (b)) -#define min_not_zero(x, y) ({ \ - typeof(x) __x = (x); \ - typeof(y) __y = (y); \ - __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) - -static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp) -{ - return tp->snd_cwnd < tp->snd_ssthresh; -} - -static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - - /* If in slow start, ensure cwnd grows to twice what was ACKed. */ - if (tcp_in_slow_start(tp)) - return tp->snd_cwnd < 2 * tp->max_packets_out; - - return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited); -} - -static __always_inline bool tcp_cc_eq(const char *a, const char *b) -{ - int i; - - for (i = 0; i < TCP_CA_NAME_MAX; i++) { - if (a[i] != b[i]) - return false; - if (!a[i]) - break; - } - - return true; -} - -extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym; -extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym; - -struct mptcp_sock { - struct inet_connection_sock sk; - - __u32 token; - struct sock *first; - char ca_name[TCP_CA_NAME_MAX]; -} __attribute__((preserve_access_index)); - -#endif diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index edcd261065..2a18bd320e 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -10,18 +10,30 @@ #include <linux/percpu-defs.h> #include <linux/sysfs.h> #include <linux/tracepoint.h> +#include <linux/net.h> +#include <linux/socket.h> +#include <linux/nsproxy.h> +#include <linux/inet.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/un.h> +#include <net/sock.h> #include "bpf_testmod.h" #include "bpf_testmod_kfunc.h" #define CREATE_TRACE_POINTS #include "bpf_testmod-events.h" +#define CONNECT_TIMEOUT_SEC 1 + typedef int (*func_proto_typedef)(long); typedef int (*func_proto_typedef_nested1)(func_proto_typedef); typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1); DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123; long bpf_testmod_test_struct_arg_result; +static DEFINE_MUTEX(sock_lock); +static struct socket *sock; struct bpf_testmod_struct_arg_1 { int a; @@ -497,6 +509,241 @@ __bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused return arg; } +__bpf_kfunc void bpf_kfunc_call_test_sleepable(void) +{ +} + +__bpf_kfunc int bpf_kfunc_init_sock(struct init_sock_args *args) +{ + int proto; + int err; + + mutex_lock(&sock_lock); + + if (sock) { + pr_err("%s called without releasing old sock", __func__); + err = -EPERM; + goto out; + } + + switch (args->af) { + case AF_INET: + case AF_INET6: + proto = args->type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP; + break; + case AF_UNIX: + proto = PF_UNIX; + break; + default: + pr_err("invalid address family %d\n", args->af); + err = -EINVAL; + goto out; + } + + err = sock_create_kern(current->nsproxy->net_ns, args->af, args->type, + proto, &sock); + + if (!err) + /* Set timeout for call to kernel_connect() to prevent it from hanging, + * and consider the connection attempt failed if it returns + * -EINPROGRESS. + */ + sock->sk->sk_sndtimeo = CONNECT_TIMEOUT_SEC * HZ; +out: + mutex_unlock(&sock_lock); + + return err; +} + +__bpf_kfunc void bpf_kfunc_close_sock(void) +{ + mutex_lock(&sock_lock); + + if (sock) { + sock_release(sock); + sock = NULL; + } + + mutex_unlock(&sock_lock); +} + +__bpf_kfunc int bpf_kfunc_call_kernel_connect(struct addr_args *args) +{ + int err; + + if (args->addrlen > sizeof(args->addr)) + return -EINVAL; + + mutex_lock(&sock_lock); + + if (!sock) { + pr_err("%s called without initializing sock", __func__); + err = -EPERM; + goto out; + } + + err = kernel_connect(sock, (struct sockaddr *)&args->addr, + args->addrlen, 0); +out: + mutex_unlock(&sock_lock); + + return err; +} + +__bpf_kfunc int bpf_kfunc_call_kernel_bind(struct addr_args *args) +{ + int err; + + if (args->addrlen > sizeof(args->addr)) + return -EINVAL; + + mutex_lock(&sock_lock); + + if (!sock) { + pr_err("%s called without initializing sock", __func__); + err = -EPERM; + goto out; + } + + err = kernel_bind(sock, (struct sockaddr *)&args->addr, args->addrlen); +out: + mutex_unlock(&sock_lock); + + return err; +} + +__bpf_kfunc int bpf_kfunc_call_kernel_listen(void) +{ + int err; + + mutex_lock(&sock_lock); + + if (!sock) { + pr_err("%s called without initializing sock", __func__); + err = -EPERM; + goto out; + } + + err = kernel_listen(sock, 128); +out: + mutex_unlock(&sock_lock); + + return err; +} + +__bpf_kfunc int bpf_kfunc_call_kernel_sendmsg(struct sendmsg_args *args) +{ + struct msghdr msg = { + .msg_name = &args->addr.addr, + .msg_namelen = args->addr.addrlen, + }; + struct kvec iov; + int err; + + if (args->addr.addrlen > sizeof(args->addr.addr) || + args->msglen > sizeof(args->msg)) + return -EINVAL; + + iov.iov_base = args->msg; + iov.iov_len = args->msglen; + + mutex_lock(&sock_lock); + + if (!sock) { + pr_err("%s called without initializing sock", __func__); + err = -EPERM; + goto out; + } + + err = kernel_sendmsg(sock, &msg, &iov, 1, args->msglen); + args->addr.addrlen = msg.msg_namelen; +out: + mutex_unlock(&sock_lock); + + return err; +} + +__bpf_kfunc int bpf_kfunc_call_sock_sendmsg(struct sendmsg_args *args) +{ + struct msghdr msg = { + .msg_name = &args->addr.addr, + .msg_namelen = args->addr.addrlen, + }; + struct kvec iov; + int err; + + if (args->addr.addrlen > sizeof(args->addr.addr) || + args->msglen > sizeof(args->msg)) + return -EINVAL; + + iov.iov_base = args->msg; + iov.iov_len = args->msglen; + + iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iov, 1, args->msglen); + mutex_lock(&sock_lock); + + if (!sock) { + pr_err("%s called without initializing sock", __func__); + err = -EPERM; + goto out; + } + + err = sock_sendmsg(sock, &msg); + args->addr.addrlen = msg.msg_namelen; +out: + mutex_unlock(&sock_lock); + + return err; +} + +__bpf_kfunc int bpf_kfunc_call_kernel_getsockname(struct addr_args *args) +{ + int err; + + mutex_lock(&sock_lock); + + if (!sock) { + pr_err("%s called without initializing sock", __func__); + err = -EPERM; + goto out; + } + + err = kernel_getsockname(sock, (struct sockaddr *)&args->addr); + if (err < 0) + goto out; + + args->addrlen = err; + err = 0; +out: + mutex_unlock(&sock_lock); + + return err; +} + +__bpf_kfunc int bpf_kfunc_call_kernel_getpeername(struct addr_args *args) +{ + int err; + + mutex_lock(&sock_lock); + + if (!sock) { + pr_err("%s called without initializing sock", __func__); + err = -EPERM; + goto out; + } + + err = kernel_getpeername(sock, (struct sockaddr *)&args->addr); + if (err < 0) + goto out; + + args->addrlen = err; + err = 0; +out: + mutex_unlock(&sock_lock); + + return err; +} + BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) BTF_ID_FLAGS(func, bpf_kfunc_call_test1) @@ -523,6 +770,16 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_sleepable, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_init_sock, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_close_sock, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_connect, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_bind, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_listen, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_sendmsg, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_call_sock_sendmsg, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getsockname, KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getpeername, KF_SLEEPABLE) BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids) static int bpf_testmod_ops_init(struct btf *btf) @@ -653,6 +910,8 @@ static int bpf_testmod_init(void) return ret; if (bpf_fentry_test1(0) < 0) return -EINVAL; + sock = NULL; + mutex_init(&sock_lock); return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); } @@ -666,6 +925,7 @@ static void bpf_testmod_exit(void) while (refcount_read(&prog_test_struct.cnt) > 1) msleep(20); + bpf_kfunc_close_sock(); sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); } diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h index 7c664dd610..b0d586a675 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h @@ -64,6 +64,22 @@ struct prog_test_fail3 { char arr2[]; }; +struct init_sock_args { + int af; + int type; +}; + +struct addr_args { + char addr[sizeof(struct __kernel_sockaddr_storage)]; + int addrlen; +}; + +struct sendmsg_args { + struct addr_args addr; + char msg[10]; + int msglen; +}; + struct prog_test_ref_kfunc * bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym; void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; @@ -96,6 +112,7 @@ void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; void bpf_kfunc_call_test_destructive(void) __ksym; +void bpf_kfunc_call_test_sleepable(void) __ksym; void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p); struct prog_test_member *bpf_kfunc_call_memb_acquire(void); @@ -106,4 +123,15 @@ void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p); void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len); void bpf_kfunc_common_test(void) __ksym; + +int bpf_kfunc_init_sock(struct init_sock_args *args) __ksym; +void bpf_kfunc_close_sock(void) __ksym; +int bpf_kfunc_call_kernel_connect(struct addr_args *args) __ksym; +int bpf_kfunc_call_kernel_bind(struct addr_args *args) __ksym; +int bpf_kfunc_call_kernel_listen(void) __ksym; +int bpf_kfunc_call_kernel_sendmsg(struct sendmsg_args *args) __ksym; +int bpf_kfunc_call_sock_sendmsg(struct sendmsg_args *args) __ksym; +int bpf_kfunc_call_kernel_getsockname(struct addr_args *args) __ksym; +int bpf_kfunc_call_kernel_getpeername(struct addr_args *args) __ksym; + #endif /* _BPF_TESTMOD_KFUNC_H */ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index e812876d79..23bb9a9e6a 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -429,7 +429,7 @@ int create_and_get_cgroup(const char *relative_path) * which is an invalid cgroup id. * If there is a failure, it prints the error to stderr. */ -unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir) +static unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir) { int dirfd, err, flags, mount_id, fhsize; union { diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index dec9fd7ebb..98b6b6a886 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -13,7 +13,12 @@ CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_SHA256=y +CONFIG_CRYPTO_USER_API=y CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_CRYPTO_USER_API_SKCIPHER=y +CONFIG_CRYPTO_SKCIPHER=y +CONFIG_CRYPTO_ECB=y +CONFIG_CRYPTO_AES=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF4=y @@ -91,3 +96,5 @@ CONFIG_VSOCKETS=y CONFIG_VXLAN=y CONFIG_XDP_SOCKETS=y CONFIG_XFRM_INTERFACE=y +CONFIG_TCP_CONG_DCTCP=y +CONFIG_TCP_CONG_BBR=y diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index be96bf0223..35250e6cde 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -52,6 +52,8 @@ struct ipv6_packet pkt_v6 = { .tcp.doff = 5, }; +static const struct network_helper_opts default_opts; + int settimeo(int fd, int timeout_ms) { struct timeval timeout = { .tv_sec = 3 }; @@ -78,24 +80,22 @@ int settimeo(int fd, int timeout_ms) #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; }) -static int __start_server(int type, int protocol, const struct sockaddr *addr, - socklen_t addrlen, int timeout_ms, bool reuseport) +static int __start_server(int type, const struct sockaddr *addr, socklen_t addrlen, + const struct network_helper_opts *opts) { - int on = 1; int fd; - fd = socket(addr->sa_family, type, protocol); + fd = socket(addr->sa_family, type, opts->proto); if (fd < 0) { log_err("Failed to create server socket"); return -1; } - if (settimeo(fd, timeout_ms)) + if (settimeo(fd, opts->timeout_ms)) goto error_close; - if (reuseport && - setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) { - log_err("Failed to set SO_REUSEPORT"); + if (opts->post_socket_cb && opts->post_socket_cb(fd, NULL)) { + log_err("Failed to call post_socket_cb"); goto error_close; } @@ -118,35 +118,35 @@ error_close: return -1; } -static int start_server_proto(int family, int type, int protocol, - const char *addr_str, __u16 port, int timeout_ms) +int start_server(int family, int type, const char *addr_str, __u16 port, + int timeout_ms) { + struct network_helper_opts opts = { + .timeout_ms = timeout_ms, + }; struct sockaddr_storage addr; socklen_t addrlen; if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) return -1; - return __start_server(type, protocol, (struct sockaddr *)&addr, - addrlen, timeout_ms, false); + return __start_server(type, (struct sockaddr *)&addr, addrlen, &opts); } -int start_server(int family, int type, const char *addr_str, __u16 port, - int timeout_ms) +static int reuseport_cb(int fd, const struct post_socket_opts *opts) { - return start_server_proto(family, type, 0, addr_str, port, timeout_ms); -} + int on = 1; -int start_mptcp_server(int family, const char *addr_str, __u16 port, - int timeout_ms) -{ - return start_server_proto(family, SOCK_STREAM, IPPROTO_MPTCP, addr_str, - port, timeout_ms); + return setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on)); } int *start_reuseport_server(int family, int type, const char *addr_str, __u16 port, int timeout_ms, unsigned int nr_listens) { + struct network_helper_opts opts = { + .timeout_ms = timeout_ms, + .post_socket_cb = reuseport_cb, + }; struct sockaddr_storage addr; unsigned int nr_fds = 0; socklen_t addrlen; @@ -162,8 +162,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str, if (!fds) return NULL; - fds[0] = __start_server(type, 0, (struct sockaddr *)&addr, addrlen, - timeout_ms, true); + fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen, &opts); if (fds[0] == -1) goto close_fds; nr_fds = 1; @@ -172,8 +171,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str, goto close_fds; for (; nr_fds < nr_listens; nr_fds++) { - fds[nr_fds] = __start_server(type, 0, (struct sockaddr *)&addr, - addrlen, timeout_ms, true); + fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr, addrlen, &opts); if (fds[nr_fds] == -1) goto close_fds; } @@ -185,6 +183,15 @@ close_fds: return NULL; } +int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t len, + const struct network_helper_opts *opts) +{ + if (!opts) + opts = &default_opts; + + return __start_server(type, (struct sockaddr *)addr, len, opts); +} + void free_fds(int *fds, unsigned int nr_close_fds) { if (fds) { @@ -258,17 +265,24 @@ static int connect_fd_to_addr(int fd, return 0; } -int connect_to_addr(const struct sockaddr_storage *addr, socklen_t addrlen, int type) +int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen, + const struct network_helper_opts *opts) { int fd; - fd = socket(addr->ss_family, type, 0); + if (!opts) + opts = &default_opts; + + fd = socket(addr->ss_family, type, opts->proto); if (fd < 0) { log_err("Failed to create client socket"); return -1; } - if (connect_fd_to_addr(fd, addr, addrlen, false)) + if (settimeo(fd, opts->timeout_ms)) + goto error_close; + + if (connect_fd_to_addr(fd, addr, addrlen, opts->must_fail)) goto error_close; return fd; @@ -278,8 +292,6 @@ error_close: return -1; } -static const struct network_helper_opts default_opts; - int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) { struct sockaddr_storage addr; @@ -442,22 +454,30 @@ struct nstoken *open_netns(const char *name) struct nstoken *token; token = calloc(1, sizeof(struct nstoken)); - if (!ASSERT_OK_PTR(token, "malloc token")) + if (!token) { + log_err("Failed to malloc token"); return NULL; + } token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY); - if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net")) + if (token->orig_netns_fd == -1) { + log_err("Failed to open(/proc/self/ns/net)"); goto fail; + } snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); nsfd = open(nspath, O_RDONLY | O_CLOEXEC); - if (!ASSERT_GE(nsfd, 0, "open netns fd")) + if (nsfd == -1) { + log_err("Failed to open(%s)", nspath); goto fail; + } err = setns(nsfd, CLONE_NEWNET); close(nsfd); - if (!ASSERT_OK(err, "setns")) + if (err) { + log_err("Failed to setns(nsfd)"); goto fail; + } return token; fail: @@ -472,7 +492,8 @@ void close_netns(struct nstoken *token) if (!token) return; - ASSERT_OK(setns(token->orig_netns_fd, CLONE_NEWNET), "setns"); + if (setns(token->orig_netns_fd, CLONE_NEWNET)) + log_err("Failed to setns(orig_netns_fd)"); close(token->orig_netns_fd); free(token); } @@ -499,3 +520,153 @@ int get_socket_local_port(int sock_fd) return -1; } + +int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param) +{ + struct ifreq ifr = {0}; + int sockfd, err; + + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (sockfd < 0) + return -errno; + + memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + + ring_param->cmd = ETHTOOL_GRINGPARAM; + ifr.ifr_data = (char *)ring_param; + + if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { + err = errno; + close(sockfd); + return -err; + } + + close(sockfd); + return 0; +} + +int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param) +{ + struct ifreq ifr = {0}; + int sockfd, err; + + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (sockfd < 0) + return -errno; + + memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + + ring_param->cmd = ETHTOOL_SRINGPARAM; + ifr.ifr_data = (char *)ring_param; + + if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) { + err = errno; + close(sockfd); + return -err; + } + + close(sockfd); + return 0; +} + +struct send_recv_arg { + int fd; + uint32_t bytes; + int stop; +}; + +static void *send_recv_server(void *arg) +{ + struct send_recv_arg *a = (struct send_recv_arg *)arg; + ssize_t nr_sent = 0, bytes = 0; + char batch[1500]; + int err = 0, fd; + + fd = accept(a->fd, NULL, NULL); + while (fd == -1) { + if (errno == EINTR) + continue; + err = -errno; + goto done; + } + + if (settimeo(fd, 0)) { + err = -errno; + goto done; + } + + while (bytes < a->bytes && !READ_ONCE(a->stop)) { + nr_sent = send(fd, &batch, + MIN(a->bytes - bytes, sizeof(batch)), 0); + if (nr_sent == -1 && errno == EINTR) + continue; + if (nr_sent == -1) { + err = -errno; + break; + } + bytes += nr_sent; + } + + if (bytes != a->bytes) { + log_err("send %zd expected %u", bytes, a->bytes); + if (!err) + err = bytes > a->bytes ? -E2BIG : -EINTR; + } + +done: + if (fd >= 0) + close(fd); + if (err) { + WRITE_ONCE(a->stop, 1); + return ERR_PTR(err); + } + return NULL; +} + +int send_recv_data(int lfd, int fd, uint32_t total_bytes) +{ + ssize_t nr_recv = 0, bytes = 0; + struct send_recv_arg arg = { + .fd = lfd, + .bytes = total_bytes, + .stop = 0, + }; + pthread_t srv_thread; + void *thread_ret; + char batch[1500]; + int err = 0; + + err = pthread_create(&srv_thread, NULL, send_recv_server, (void *)&arg); + if (err) { + log_err("Failed to pthread_create"); + return err; + } + + /* recv total_bytes */ + while (bytes < total_bytes && !READ_ONCE(arg.stop)) { + nr_recv = recv(fd, &batch, + MIN(total_bytes - bytes, sizeof(batch)), 0); + if (nr_recv == -1 && errno == EINTR) + continue; + if (nr_recv == -1) { + err = -errno; + break; + } + bytes += nr_recv; + } + + if (bytes != total_bytes) { + log_err("recv %zd expected %u", bytes, total_bytes); + if (!err) + err = bytes > total_bytes ? -E2BIG : -EINTR; + } + + WRITE_ONCE(arg.stop, 1); + pthread_join(srv_thread, &thread_ret); + if (IS_ERR(thread_ret)) { + log_err("Failed in thread_ret %ld", PTR_ERR(thread_ret)); + err = err ? : PTR_ERR(thread_ret); + } + + return err; +} diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 94b9be24e3..883c7ea9d8 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -9,14 +9,20 @@ typedef __u16 __sum16; #include <linux/if_packet.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/ethtool.h> +#include <linux/sockios.h> +#include <linux/err.h> #include <netinet/tcp.h> #include <bpf/bpf_endian.h> +#include <net/if.h> #define MAGIC_VAL 0x1234 #define NUM_ITER 100000 #define VIP_NUM 5 #define MAGIC_BYTES 123 +struct post_socket_opts {}; + struct network_helper_opts { const char *cc; int timeout_ms; @@ -24,6 +30,7 @@ struct network_helper_opts { bool noconnect; int type; int proto; + int (*post_socket_cb)(int fd, const struct post_socket_opts *opts); }; /* ipv4 test vector */ @@ -45,13 +52,14 @@ extern struct ipv6_packet pkt_v6; int settimeo(int fd, int timeout_ms); int start_server(int family, int type, const char *addr, __u16 port, int timeout_ms); -int start_mptcp_server(int family, const char *addr, __u16 port, - int timeout_ms); int *start_reuseport_server(int family, int type, const char *addr_str, __u16 port, int timeout_ms, unsigned int nr_listens); +int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t len, + const struct network_helper_opts *opts); void free_fds(int *fds, unsigned int nr_close_fds); -int connect_to_addr(const struct sockaddr_storage *addr, socklen_t len, int type); +int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t len, + const struct network_helper_opts *opts); int connect_to_fd(int server_fd, int timeout_ms); int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts); int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); @@ -61,6 +69,8 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, struct sockaddr_storage *addr, socklen_t *len); char *ping_command(int family); int get_socket_local_port(int sock_fd); +int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param); +int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param); struct nstoken; /** @@ -71,6 +81,7 @@ struct nstoken; */ struct nstoken *open_netns(const char *name); void close_netns(struct nstoken *token); +int send_recv_data(int lfd, int fd, uint32_t total_bytes); static __u16 csum_fold(__u32 csum) { diff --git a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c new file mode 100644 index 0000000000..0807a48a58 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include "arena_atomics.skel.h" + +static void test_add(struct arena_atomics *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + /* No need to attach it, just run it directly */ + prog_fd = bpf_program__fd(skel->progs.add); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + ASSERT_EQ(skel->arena->add64_value, 3, "add64_value"); + ASSERT_EQ(skel->arena->add64_result, 1, "add64_result"); + + ASSERT_EQ(skel->arena->add32_value, 3, "add32_value"); + ASSERT_EQ(skel->arena->add32_result, 1, "add32_result"); + + ASSERT_EQ(skel->arena->add_stack_value_copy, 3, "add_stack_value"); + ASSERT_EQ(skel->arena->add_stack_result, 1, "add_stack_result"); + + ASSERT_EQ(skel->arena->add_noreturn_value, 3, "add_noreturn_value"); +} + +static void test_sub(struct arena_atomics *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + /* No need to attach it, just run it directly */ + prog_fd = bpf_program__fd(skel->progs.sub); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + ASSERT_EQ(skel->arena->sub64_value, -1, "sub64_value"); + ASSERT_EQ(skel->arena->sub64_result, 1, "sub64_result"); + + ASSERT_EQ(skel->arena->sub32_value, -1, "sub32_value"); + ASSERT_EQ(skel->arena->sub32_result, 1, "sub32_result"); + + ASSERT_EQ(skel->arena->sub_stack_value_copy, -1, "sub_stack_value"); + ASSERT_EQ(skel->arena->sub_stack_result, 1, "sub_stack_result"); + + ASSERT_EQ(skel->arena->sub_noreturn_value, -1, "sub_noreturn_value"); +} + +static void test_and(struct arena_atomics *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + /* No need to attach it, just run it directly */ + prog_fd = bpf_program__fd(skel->progs.and); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + ASSERT_EQ(skel->arena->and64_value, 0x010ull << 32, "and64_value"); + ASSERT_EQ(skel->arena->and32_value, 0x010, "and32_value"); +} + +static void test_or(struct arena_atomics *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + /* No need to attach it, just run it directly */ + prog_fd = bpf_program__fd(skel->progs.or); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + ASSERT_EQ(skel->arena->or64_value, 0x111ull << 32, "or64_value"); + ASSERT_EQ(skel->arena->or32_value, 0x111, "or32_value"); +} + +static void test_xor(struct arena_atomics *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + /* No need to attach it, just run it directly */ + prog_fd = bpf_program__fd(skel->progs.xor); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + ASSERT_EQ(skel->arena->xor64_value, 0x101ull << 32, "xor64_value"); + ASSERT_EQ(skel->arena->xor32_value, 0x101, "xor32_value"); +} + +static void test_cmpxchg(struct arena_atomics *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + /* No need to attach it, just run it directly */ + prog_fd = bpf_program__fd(skel->progs.cmpxchg); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + ASSERT_EQ(skel->arena->cmpxchg64_value, 2, "cmpxchg64_value"); + ASSERT_EQ(skel->arena->cmpxchg64_result_fail, 1, "cmpxchg_result_fail"); + ASSERT_EQ(skel->arena->cmpxchg64_result_succeed, 1, "cmpxchg_result_succeed"); + + ASSERT_EQ(skel->arena->cmpxchg32_value, 2, "lcmpxchg32_value"); + ASSERT_EQ(skel->arena->cmpxchg32_result_fail, 1, "cmpxchg_result_fail"); + ASSERT_EQ(skel->arena->cmpxchg32_result_succeed, 1, "cmpxchg_result_succeed"); +} + +static void test_xchg(struct arena_atomics *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, prog_fd; + + /* No need to attach it, just run it directly */ + prog_fd = bpf_program__fd(skel->progs.xchg); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + ASSERT_EQ(skel->arena->xchg64_value, 2, "xchg64_value"); + ASSERT_EQ(skel->arena->xchg64_result, 1, "xchg64_result"); + + ASSERT_EQ(skel->arena->xchg32_value, 2, "xchg32_value"); + ASSERT_EQ(skel->arena->xchg32_result, 1, "xchg32_result"); +} + +void test_arena_atomics(void) +{ + struct arena_atomics *skel; + int err; + + skel = arena_atomics__open(); + if (!ASSERT_OK_PTR(skel, "arena atomics skeleton open")) + return; + + if (skel->data->skip_tests) { + printf("%s:SKIP:no ENABLE_ATOMICS_TESTS or no addr_space_cast support in clang", + __func__); + test__skip(); + goto cleanup; + } + err = arena_atomics__load(skel); + if (!ASSERT_OK(err, "arena atomics skeleton load")) + return; + skel->bss->pid = getpid(); + + if (test__start_subtest("add")) + test_add(skel); + if (test__start_subtest("sub")) + test_sub(skel); + if (test__start_subtest("and")) + test_and(skel); + if (test__start_subtest("or")) + test_or(skel); + if (test__start_subtest("xor")) + test_xor(skel); + if (test__start_subtest("cmpxchg")) + test_cmpxchg(skel); + if (test__start_subtest("xchg")) + test_xchg(skel); + +cleanup: + arena_atomics__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 1454cebc26..4407ea428e 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -573,6 +573,115 @@ cleanup: close(lsm_fd); } +static void tp_btf_subtest(struct test_bpf_cookie *skel) +{ + __u64 cookie; + int prog_fd, link_fd = -1; + struct bpf_link *link = NULL; + LIBBPF_OPTS(bpf_link_create_opts, link_opts); + LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts); + LIBBPF_OPTS(bpf_trace_opts, trace_opts); + + /* There are three different ways to attach tp_btf (BTF-aware raw + * tracepoint) programs. Let's test all of them. + */ + prog_fd = bpf_program__fd(skel->progs.handle_tp_btf); + + /* low-level BPF_RAW_TRACEPOINT_OPEN command wrapper */ + skel->bss->tp_btf_res = 0; + + raw_tp_opts.cookie = cookie = 0x11000000000000L; + link_fd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_tp_opts); + if (!ASSERT_GE(link_fd, 0, "bpf_raw_tracepoint_open_opts")) + goto cleanup; + + usleep(1); /* trigger */ + close(link_fd); /* detach */ + link_fd = -1; + + ASSERT_EQ(skel->bss->tp_btf_res, cookie, "raw_tp_open_res"); + + /* low-level generic bpf_link_create() API */ + skel->bss->tp_btf_res = 0; + + link_opts.tracing.cookie = cookie = 0x22000000000000L; + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_RAW_TP, &link_opts); + if (!ASSERT_GE(link_fd, 0, "bpf_link_create")) + goto cleanup; + + usleep(1); /* trigger */ + close(link_fd); /* detach */ + link_fd = -1; + + ASSERT_EQ(skel->bss->tp_btf_res, cookie, "link_create_res"); + + /* high-level bpf_link-based bpf_program__attach_trace_opts() API */ + skel->bss->tp_btf_res = 0; + + trace_opts.cookie = cookie = 0x33000000000000L; + link = bpf_program__attach_trace_opts(skel->progs.handle_tp_btf, &trace_opts); + if (!ASSERT_OK_PTR(link, "attach_trace_opts")) + goto cleanup; + + usleep(1); /* trigger */ + bpf_link__destroy(link); /* detach */ + link = NULL; + + ASSERT_EQ(skel->bss->tp_btf_res, cookie, "attach_trace_opts_res"); + +cleanup: + if (link_fd >= 0) + close(link_fd); + bpf_link__destroy(link); +} + +static void raw_tp_subtest(struct test_bpf_cookie *skel) +{ + __u64 cookie; + int prog_fd, link_fd = -1; + struct bpf_link *link = NULL; + LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts); + LIBBPF_OPTS(bpf_raw_tracepoint_opts, opts); + + /* There are two different ways to attach raw_tp programs */ + prog_fd = bpf_program__fd(skel->progs.handle_raw_tp); + + /* low-level BPF_RAW_TRACEPOINT_OPEN command wrapper */ + skel->bss->raw_tp_res = 0; + + raw_tp_opts.tp_name = "sys_enter"; + raw_tp_opts.cookie = cookie = 0x55000000000000L; + link_fd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_tp_opts); + if (!ASSERT_GE(link_fd, 0, "bpf_raw_tracepoint_open_opts")) + goto cleanup; + + usleep(1); /* trigger */ + close(link_fd); /* detach */ + link_fd = -1; + + ASSERT_EQ(skel->bss->raw_tp_res, cookie, "raw_tp_open_res"); + + /* high-level bpf_link-based bpf_program__attach_raw_tracepoint_opts() API */ + skel->bss->raw_tp_res = 0; + + opts.cookie = cookie = 0x66000000000000L; + link = bpf_program__attach_raw_tracepoint_opts(skel->progs.handle_raw_tp, + "sys_enter", &opts); + if (!ASSERT_OK_PTR(link, "attach_raw_tp_opts")) + goto cleanup; + + usleep(1); /* trigger */ + bpf_link__destroy(link); /* detach */ + link = NULL; + + ASSERT_EQ(skel->bss->raw_tp_res, cookie, "attach_raw_tp_opts_res"); + +cleanup: + if (link_fd >= 0) + close(link_fd); + bpf_link__destroy(link); +} + void test_bpf_cookie(void) { struct test_bpf_cookie *skel; @@ -601,6 +710,9 @@ void test_bpf_cookie(void) tracing_subtest(skel); if (test__start_subtest("lsm")) lsm_subtest(skel); - + if (test__start_subtest("tp_btf")) + tp_btf_subtest(skel); + if (test__start_subtest("raw_tp")) + raw_tp_subtest(skel); test_bpf_cookie__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index a88e6e07e4..3f0daf6607 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -13,6 +13,8 @@ #include "tcp_ca_write_sk_pacing.skel.h" #include "tcp_ca_incompl_cong_ops.skel.h" #include "tcp_ca_unsupp_cong_op.skel.h" +#include "tcp_ca_kfunc.skel.h" +#include "bpf_cc_cubic.skel.h" #ifndef ENOTSUPP #define ENOTSUPP 524 @@ -20,7 +22,6 @@ static const unsigned int total_bytes = 10 * 1024 * 1024; static int expected_stg = 0xeB9F; -static int stop; static int settcpca(int fd, const char *tcp_ca) { @@ -33,63 +34,12 @@ static int settcpca(int fd, const char *tcp_ca) return 0; } -static void *server(void *arg) -{ - int lfd = (int)(long)arg, err = 0, fd; - ssize_t nr_sent = 0, bytes = 0; - char batch[1500]; - - fd = accept(lfd, NULL, NULL); - while (fd == -1) { - if (errno == EINTR) - continue; - err = -errno; - goto done; - } - - if (settimeo(fd, 0)) { - err = -errno; - goto done; - } - - while (bytes < total_bytes && !READ_ONCE(stop)) { - nr_sent = send(fd, &batch, - MIN(total_bytes - bytes, sizeof(batch)), 0); - if (nr_sent == -1 && errno == EINTR) - continue; - if (nr_sent == -1) { - err = -errno; - break; - } - bytes += nr_sent; - } - - ASSERT_EQ(bytes, total_bytes, "send"); - -done: - if (fd >= 0) - close(fd); - if (err) { - WRITE_ONCE(stop, 1); - return ERR_PTR(err); - } - return NULL; -} - static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) { - struct sockaddr_in6 sa6 = {}; - ssize_t nr_recv = 0, bytes = 0; int lfd = -1, fd = -1; - pthread_t srv_thread; - socklen_t addrlen = sizeof(sa6); - void *thread_ret; - char batch[1500]; int err; - WRITE_ONCE(stop, 0); - - lfd = socket(AF_INET6, SOCK_STREAM, 0); + lfd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); if (!ASSERT_NEQ(lfd, -1, "socket")) return; @@ -99,23 +49,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) return; } - if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) || - settimeo(lfd, 0) || settimeo(fd, 0)) - goto done; - - /* bind, listen and start server thread to accept */ - sa6.sin6_family = AF_INET6; - sa6.sin6_addr = in6addr_loopback; - err = bind(lfd, (struct sockaddr *)&sa6, addrlen); - if (!ASSERT_NEQ(err, -1, "bind")) - goto done; - - err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen); - if (!ASSERT_NEQ(err, -1, "getsockname")) - goto done; - - err = listen(lfd, 1); - if (!ASSERT_NEQ(err, -1, "listen")) + if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca)) goto done; if (sk_stg_map) { @@ -126,7 +60,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) } /* connect to server */ - err = connect(fd, (struct sockaddr *)&sa6, addrlen); + err = connect_fd_to_fd(fd, lfd, 0); if (!ASSERT_NEQ(err, -1, "connect")) goto done; @@ -140,26 +74,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) goto done; } - err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd); - if (!ASSERT_OK(err, "pthread_create")) - goto done; - - /* recv total_bytes */ - while (bytes < total_bytes && !READ_ONCE(stop)) { - nr_recv = recv(fd, &batch, - MIN(total_bytes - bytes, sizeof(batch)), 0); - if (nr_recv == -1 && errno == EINTR) - continue; - if (nr_recv == -1) - break; - bytes += nr_recv; - } - - ASSERT_EQ(bytes, total_bytes, "recv"); - - WRITE_ONCE(stop, 1); - pthread_join(srv_thread, &thread_ret); - ASSERT_OK(IS_ERR(thread_ret), "thread_ret"); + ASSERT_OK(send_recv_data(lfd, fd, total_bytes), "send_recv_data"); done: close(lfd); @@ -315,7 +230,7 @@ static void test_rel_setsockopt(void) struct bpf_dctcp_release *rel_skel; libbpf_print_fn_t old_print_fn; - err_str = "unknown func bpf_setsockopt"; + err_str = "program of this type cannot use helper bpf_setsockopt"; found = false; old_print_fn = libbpf_set_print(libbpf_debug_print); @@ -392,7 +307,8 @@ static void test_update_ca(void) return; link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); - ASSERT_OK_PTR(link, "attach_struct_ops"); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto out; do_test("tcp_ca_update", NULL); saved_ca1_cnt = skel->bss->ca1_cnt; @@ -406,6 +322,7 @@ static void test_update_ca(void) ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt"); bpf_link__destroy(link); +out: tcp_ca_update__destroy(skel); } @@ -421,7 +338,8 @@ static void test_update_wrong(void) return; link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); - ASSERT_OK_PTR(link, "attach_struct_ops"); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto out; do_test("tcp_ca_update", NULL); saved_ca1_cnt = skel->bss->ca1_cnt; @@ -434,6 +352,7 @@ static void test_update_wrong(void) ASSERT_GT(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt"); bpf_link__destroy(link); +out: tcp_ca_update__destroy(skel); } @@ -448,7 +367,8 @@ static void test_mixed_links(void) return; link_nl = bpf_map__attach_struct_ops(skel->maps.ca_no_link); - ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl"); + if (!ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl")) + goto out; link = bpf_map__attach_struct_ops(skel->maps.ca_update_1); ASSERT_OK_PTR(link, "attach_struct_ops"); @@ -461,6 +381,7 @@ static void test_mixed_links(void) bpf_link__destroy(link); bpf_link__destroy(link_nl); +out: tcp_ca_update__destroy(skel); } @@ -503,7 +424,8 @@ static void test_link_replace(void) bpf_link__destroy(link); link = bpf_map__attach_struct_ops(skel->maps.ca_update_2); - ASSERT_OK_PTR(link, "attach_struct_ops_2nd"); + if (!ASSERT_OK_PTR(link, "attach_struct_ops_2nd")) + goto out; /* BPF_F_REPLACE with a wrong old map Fd. It should fail! * @@ -526,9 +448,40 @@ static void test_link_replace(void) bpf_link__destroy(link); +out: tcp_ca_update__destroy(skel); } +static void test_tcp_ca_kfunc(void) +{ + struct tcp_ca_kfunc *skel; + + skel = tcp_ca_kfunc__open_and_load(); + ASSERT_OK_PTR(skel, "tcp_ca_kfunc__open_and_load"); + tcp_ca_kfunc__destroy(skel); +} + +static void test_cc_cubic(void) +{ + struct bpf_cc_cubic *cc_cubic_skel; + struct bpf_link *link; + + cc_cubic_skel = bpf_cc_cubic__open_and_load(); + if (!ASSERT_OK_PTR(cc_cubic_skel, "bpf_cc_cubic__open_and_load")) + return; + + link = bpf_map__attach_struct_ops(cc_cubic_skel->maps.cc_cubic); + if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) { + bpf_cc_cubic__destroy(cc_cubic_skel); + return; + } + + do_test("bpf_cc_cubic", NULL); + + bpf_link__destroy(link); + bpf_cc_cubic__destroy(cc_cubic_skel); +} + void test_bpf_tcp_ca(void) { if (test__start_subtest("dctcp")) @@ -557,4 +510,8 @@ void test_bpf_tcp_ca(void) test_multi_links(); if (test__start_subtest("link_replace")) test_link_replace(); + if (test__start_subtest("tcp_ca_kfunc")) + test_tcp_ca_kfunc(); + if (test__start_subtest("cc_cubic")) + test_cc_cubic(); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index e9ea38aa82..09a8e6f9b3 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -653,7 +653,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, cmpstr = "(struct file_operations){\n" " .owner = (struct module *)0xffffffffffffffff,\n" -" .llseek = (loff_t (*)(struct file *, loff_t, int))0xffffffffffffffff,"; +" .fop_flags = (fop_flags_t)4294967295,"; ASSERT_STRNEQ(str, cmpstr, strlen(cmpstr), "file_operations"); } diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c index 2a55f717fc..34b59f6bac 100644 --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -10,6 +10,7 @@ #include <netinet/tcp.h> #include <test_progs.h> +#include "network_helpers.h" #include "progs/test_cls_redirect.h" #include "test_cls_redirect.skel.h" @@ -35,39 +36,6 @@ struct tuple { struct addr_port dst; }; -static int start_server(const struct sockaddr *addr, socklen_t len, int type) -{ - int fd = socket(addr->sa_family, type, 0); - if (CHECK_FAIL(fd == -1)) - return -1; - if (CHECK_FAIL(bind(fd, addr, len) == -1)) - goto err; - if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1)) - goto err; - - return fd; - -err: - close(fd); - return -1; -} - -static int connect_to_server(const struct sockaddr *addr, socklen_t len, - int type) -{ - int fd = socket(addr->sa_family, type, 0); - if (CHECK_FAIL(fd == -1)) - return -1; - if (CHECK_FAIL(connect(fd, addr, len))) - goto err; - - return fd; - -err: - close(fd); - return -1; -} - static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap) { const struct sockaddr_in6 *in6; @@ -98,14 +66,14 @@ static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type, socklen_t slen = sizeof(ss); struct sockaddr *sa = (struct sockaddr *)&ss; - *server = start_server(addr, len, type); + *server = start_server_addr(type, (struct sockaddr_storage *)addr, len, NULL); if (*server < 0) return false; if (CHECK_FAIL(getsockname(*server, sa, &slen))) goto close_server; - *conn = connect_to_server(sa, slen, type); + *conn = connect_to_addr(type, (struct sockaddr_storage *)sa, slen, NULL); if (*conn < 0) goto close_server; diff --git a/tools/testing/selftests/bpf/prog_tests/crypto_sanity.c b/tools/testing/selftests/bpf/prog_tests/crypto_sanity.c new file mode 100644 index 0000000000..b1a3a49a82 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/crypto_sanity.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <net/if.h> +#include <linux/in6.h> +#include <linux/if_alg.h> + +#include "test_progs.h" +#include "network_helpers.h" +#include "crypto_sanity.skel.h" +#include "crypto_basic.skel.h" + +#define NS_TEST "crypto_sanity_ns" +#define IPV6_IFACE_ADDR "face::1" +static const unsigned char crypto_key[] = "testtest12345678"; +static const char plain_text[] = "stringtoencrypt0"; +static int opfd = -1, tfmfd = -1; +static const char algo[] = "ecb(aes)"; +static int init_afalg(void) +{ + struct sockaddr_alg sa = { + .salg_family = AF_ALG, + .salg_type = "skcipher", + .salg_name = "ecb(aes)" + }; + + tfmfd = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (tfmfd == -1) + return errno; + if (bind(tfmfd, (struct sockaddr *)&sa, sizeof(sa)) == -1) + return errno; + if (setsockopt(tfmfd, SOL_ALG, ALG_SET_KEY, crypto_key, 16) == -1) + return errno; + opfd = accept(tfmfd, NULL, 0); + if (opfd == -1) + return errno; + return 0; +} + +static void deinit_afalg(void) +{ + if (tfmfd != -1) + close(tfmfd); + if (opfd != -1) + close(opfd); +} + +static void do_crypt_afalg(const void *src, void *dst, int size, bool encrypt) +{ + struct msghdr msg = {}; + struct cmsghdr *cmsg; + char cbuf[CMSG_SPACE(4)] = {0}; + struct iovec iov; + + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_ALG; + cmsg->cmsg_type = ALG_SET_OP; + cmsg->cmsg_len = CMSG_LEN(4); + *(__u32 *)CMSG_DATA(cmsg) = encrypt ? ALG_OP_ENCRYPT : ALG_OP_DECRYPT; + + iov.iov_base = (char *)src; + iov.iov_len = size; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + sendmsg(opfd, &msg, 0); + read(opfd, dst, size); +} + +void test_crypto_basic(void) +{ + RUN_TESTS(crypto_basic); +} + +void test_crypto_sanity(void) +{ + LIBBPF_OPTS(bpf_tc_hook, qdisc_hook, .attach_point = BPF_TC_EGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_attach_enc); + LIBBPF_OPTS(bpf_tc_opts, tc_attach_dec); + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct nstoken *nstoken = NULL; + struct crypto_sanity *skel; + char afalg_plain[16] = {0}; + char afalg_dst[16] = {0}; + struct sockaddr_in6 addr; + int sockfd, err, pfd; + socklen_t addrlen; + u16 udp_test_port; + + skel = crypto_sanity__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open")) + return; + + SYS(fail, "ip netns add %s", NS_TEST); + SYS(fail, "ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR); + SYS(fail, "ip -net %s link set dev lo up", NS_TEST); + + nstoken = open_netns(NS_TEST); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto fail; + + err = init_afalg(); + if (!ASSERT_OK(err, "AF_ALG init fail")) + goto fail; + + qdisc_hook.ifindex = if_nametoindex("lo"); + if (!ASSERT_GT(qdisc_hook.ifindex, 0, "if_nametoindex lo")) + goto fail; + + skel->bss->key_len = 16; + skel->bss->authsize = 0; + udp_test_port = skel->data->udp_test_port; + memcpy(skel->bss->key, crypto_key, sizeof(crypto_key)); + snprintf(skel->bss->algo, 128, "%s", algo); + pfd = bpf_program__fd(skel->progs.skb_crypto_setup); + if (!ASSERT_GT(pfd, 0, "skb_crypto_setup fd")) + goto fail; + + err = bpf_prog_test_run_opts(pfd, &opts); + if (!ASSERT_OK(err, "skb_crypto_setup") || + !ASSERT_OK(opts.retval, "skb_crypto_setup retval")) + goto fail; + + if (!ASSERT_OK(skel->bss->status, "skb_crypto_setup status")) + goto fail; + + err = bpf_tc_hook_create(&qdisc_hook); + if (!ASSERT_OK(err, "create qdisc hook")) + goto fail; + + addrlen = sizeof(addr); + err = make_sockaddr(AF_INET6, IPV6_IFACE_ADDR, udp_test_port, + (void *)&addr, &addrlen); + if (!ASSERT_OK(err, "make_sockaddr")) + goto fail; + + tc_attach_enc.prog_fd = bpf_program__fd(skel->progs.encrypt_sanity); + err = bpf_tc_attach(&qdisc_hook, &tc_attach_enc); + if (!ASSERT_OK(err, "attach encrypt filter")) + goto fail; + + sockfd = socket(AF_INET6, SOCK_DGRAM, 0); + if (!ASSERT_NEQ(sockfd, -1, "encrypt socket")) + goto fail; + err = sendto(sockfd, plain_text, sizeof(plain_text), 0, (void *)&addr, addrlen); + close(sockfd); + if (!ASSERT_EQ(err, sizeof(plain_text), "encrypt send")) + goto fail; + + do_crypt_afalg(plain_text, afalg_dst, sizeof(afalg_dst), true); + + if (!ASSERT_OK(skel->bss->status, "encrypt status")) + goto fail; + if (!ASSERT_STRNEQ(skel->bss->dst, afalg_dst, sizeof(afalg_dst), "encrypt AF_ALG")) + goto fail; + + tc_attach_enc.flags = tc_attach_enc.prog_fd = tc_attach_enc.prog_id = 0; + err = bpf_tc_detach(&qdisc_hook, &tc_attach_enc); + if (!ASSERT_OK(err, "bpf_tc_detach encrypt")) + goto fail; + + tc_attach_dec.prog_fd = bpf_program__fd(skel->progs.decrypt_sanity); + err = bpf_tc_attach(&qdisc_hook, &tc_attach_dec); + if (!ASSERT_OK(err, "attach decrypt filter")) + goto fail; + + sockfd = socket(AF_INET6, SOCK_DGRAM, 0); + if (!ASSERT_NEQ(sockfd, -1, "decrypt socket")) + goto fail; + err = sendto(sockfd, afalg_dst, sizeof(afalg_dst), 0, (void *)&addr, addrlen); + close(sockfd); + if (!ASSERT_EQ(err, sizeof(afalg_dst), "decrypt send")) + goto fail; + + do_crypt_afalg(afalg_dst, afalg_plain, sizeof(afalg_plain), false); + + if (!ASSERT_OK(skel->bss->status, "decrypt status")) + goto fail; + if (!ASSERT_STRNEQ(skel->bss->dst, afalg_plain, sizeof(afalg_plain), "decrypt AF_ALG")) + goto fail; + + tc_attach_dec.flags = tc_attach_dec.prog_fd = tc_attach_dec.prog_id = 0; + err = bpf_tc_detach(&qdisc_hook, &tc_attach_dec); + ASSERT_OK(err, "bpf_tc_detach decrypt"); + +fail: + close_netns(nstoken); + deinit_afalg(); + SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); + crypto_sanity__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c index 261228eb68..438583e1f2 100644 --- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c @@ -94,6 +94,8 @@ void test_empty_skb(void) SYS(out, "ip netns add empty_skb"); tok = open_netns("empty_skb"); + if (!ASSERT_OK_PTR(tok, "setns")) + goto out; SYS(out, "ip link add veth0 type veth peer veth1"); SYS(out, "ip link set dev veth0 up"); SYS(out, "ip link set dev veth1 up"); diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c index f949647dbb..552a0875ca 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c @@ -21,13 +21,13 @@ static int do_sleep(void *skel) } #define STACK_SIZE (1024 * 1024) -static char child_stack[STACK_SIZE]; void test_fexit_sleep(void) { struct fexit_sleep_lskel *fexit_skel = NULL; int wstatus, duration = 0; pid_t cpid; + char *child_stack = NULL; int err, fexit_cnt; fexit_skel = fexit_sleep_lskel__open_and_load(); @@ -38,6 +38,11 @@ void test_fexit_sleep(void) if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto cleanup; + child_stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (!ASSERT_NEQ(child_stack, MAP_FAILED, "mmap")) + goto cleanup; + cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel); if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno))) goto cleanup; @@ -78,5 +83,6 @@ void test_fexit_sleep(void) goto cleanup; cleanup: + munmap(child_stack, STACK_SIZE); fexit_sleep_lskel__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c index 3379df2d4c..bd76589580 100644 --- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c @@ -26,6 +26,17 @@ #define IPV6_TBID_ADDR "fd00::FFFF" #define IPV6_TBID_NET "fd00::" #define IPV6_TBID_DST "fd00::2" +#define MARK_NO_POLICY 33 +#define MARK 42 +#define MARK_TABLE "200" +#define IPV4_REMOTE_DST "1.2.3.4" +#define IPV4_LOCAL "10.4.0.3" +#define IPV4_GW1 "10.4.0.1" +#define IPV4_GW2 "10.4.0.2" +#define IPV6_REMOTE_DST "be:ef::b0:10" +#define IPV6_LOCAL "fd01::3" +#define IPV6_GW1 "fd01::1" +#define IPV6_GW2 "fd01::2" #define DMAC "11:11:11:11:11:11" #define DMAC_INIT { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, } #define DMAC2 "01:01:01:01:01:01" @@ -36,9 +47,11 @@ struct fib_lookup_test { const char *daddr; int expected_ret; const char *expected_src; + const char *expected_dst; int lookup_flags; __u32 tbid; __u8 dmac[6]; + __u32 mark; }; static const struct fib_lookup_test tests[] = { @@ -90,10 +103,47 @@ static const struct fib_lookup_test tests[] = { .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, .expected_src = IPV6_IFACE_ADDR_SEC, .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, }, + /* policy routing */ + { .desc = "IPv4 policy routing, default", + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_dst = IPV4_GW1, + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv4 policy routing, mark doesn't point to a policy", + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_dst = IPV4_GW1, + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, + .mark = MARK_NO_POLICY, }, + { .desc = "IPv4 policy routing, mark points to a policy", + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_dst = IPV4_GW2, + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, + .mark = MARK, }, + { .desc = "IPv4 policy routing, mark points to a policy, but no flag", + .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_dst = IPV4_GW1, + .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH, + .mark = MARK, }, + { .desc = "IPv6 policy routing, default", + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_dst = IPV6_GW1, + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv6 policy routing, mark doesn't point to a policy", + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_dst = IPV6_GW1, + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, + .mark = MARK_NO_POLICY, }, + { .desc = "IPv6 policy routing, mark points to a policy", + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_dst = IPV6_GW2, + .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, + .mark = MARK, }, + { .desc = "IPv6 policy routing, mark points to a policy, but no flag", + .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .expected_dst = IPV6_GW1, + .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH, + .mark = MARK, }, }; -static int ifindex; - static int setup_netns(void) { int err; @@ -144,12 +194,24 @@ static int setup_netns(void) if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)")) goto fail; + /* Setup for policy routing tests */ + SYS(fail, "ip addr add %s/24 dev veth1", IPV4_LOCAL); + SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_LOCAL); + SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1); + SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE); + SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1); + SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE); + SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE); + SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE); + return 0; fail: return -1; } -static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_lookup_test *test) +static int set_lookup_params(struct bpf_fib_lookup *params, + const struct fib_lookup_test *test, + int ifindex) { int ret; @@ -158,6 +220,7 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo params->l4_protocol = IPPROTO_TCP; params->ifindex = ifindex; params->tbid = test->tbid; + params->mark = test->mark; if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) { params->family = AF_INET6; @@ -190,40 +253,45 @@ static void mac_str(char *b, const __u8 *mac) mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); } -static void assert_src_ip(struct bpf_fib_lookup *fib_params, const char *expected_src) +static void assert_ip_address(int family, void *addr, const char *expected_str) { + char str[INET6_ADDRSTRLEN]; + u8 expected_addr[16]; + int addr_len = 0; int ret; - __u32 src6[4]; - __be32 src4; - switch (fib_params->family) { + switch (family) { case AF_INET6: - ret = inet_pton(AF_INET6, expected_src, src6); - ASSERT_EQ(ret, 1, "inet_pton(expected_src)"); - - ret = memcmp(src6, fib_params->ipv6_src, sizeof(fib_params->ipv6_src)); - if (!ASSERT_EQ(ret, 0, "fib_lookup ipv6 src")) { - char str_src6[64]; - - inet_ntop(AF_INET6, fib_params->ipv6_src, str_src6, - sizeof(str_src6)); - printf("ipv6 expected %s actual %s ", expected_src, - str_src6); - } - + ret = inet_pton(AF_INET6, expected_str, expected_addr); + ASSERT_EQ(ret, 1, "inet_pton(AF_INET6, expected_str)"); + addr_len = 16; break; case AF_INET: - ret = inet_pton(AF_INET, expected_src, &src4); - ASSERT_EQ(ret, 1, "inet_pton(expected_src)"); - - ASSERT_EQ(fib_params->ipv4_src, src4, "fib_lookup ipv4 src"); - + ret = inet_pton(AF_INET, expected_str, expected_addr); + ASSERT_EQ(ret, 1, "inet_pton(AF_INET, expected_str)"); + addr_len = 4; break; default: - PRINT_FAIL("invalid addr family: %d", fib_params->family); + PRINT_FAIL("invalid address family: %d", family); + break; + } + + if (memcmp(addr, expected_addr, addr_len)) { + inet_ntop(family, addr, str, sizeof(str)); + PRINT_FAIL("expected %s actual %s ", expected_str, str); } } +static void assert_src_ip(struct bpf_fib_lookup *params, const char *expected) +{ + assert_ip_address(params->family, params->ipv6_src, expected); +} + +static void assert_dst_ip(struct bpf_fib_lookup *params, const char *expected) +{ + assert_ip_address(params->family, params->ipv6_dst, expected); +} + void test_fib_lookup(void) { struct bpf_fib_lookup *fib_params; @@ -256,15 +324,18 @@ void test_fib_lookup(void) if (setup_netns()) goto fail; - ifindex = if_nametoindex("veth1"); - skb.ifindex = ifindex; + skb.ifindex = if_nametoindex("veth1"); + if (!ASSERT_NEQ(skb.ifindex, 0, "if_nametoindex(veth1)")) + goto fail; + fib_params = &skel->bss->fib_params; for (i = 0; i < ARRAY_SIZE(tests); i++) { printf("Testing %s ", tests[i].desc); - if (set_lookup_params(fib_params, &tests[i])) + if (set_lookup_params(fib_params, &tests[i], skb.ifindex)) continue; + skel->bss->fib_lookup_ret = -1; skel->bss->lookup_flags = tests[i].lookup_flags; @@ -278,6 +349,9 @@ void test_fib_lookup(void) if (tests[i].expected_src) assert_src_ip(fib_params, tests[i].expected_src); + if (tests[i].expected_dst) + assert_dst_ip(fib_params, tests[i].expected_dst); + ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac)); if (!ASSERT_EQ(ret, 0, "dmac not match")) { char expected[18], actual[18]; diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index c4773173a4..9e5f387391 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -2,7 +2,6 @@ #include <test_progs.h> #include <network_helpers.h> #include <error.h> -#include <linux/if.h> #include <linux/if_tun.h> #include <sys/uio.h> diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c index 8963f8a549..09f6487f58 100644 --- a/tools/testing/selftests/bpf/prog_tests/for_each.c +++ b/tools/testing/selftests/bpf/prog_tests/for_each.c @@ -5,6 +5,7 @@ #include "for_each_hash_map_elem.skel.h" #include "for_each_array_map_elem.skel.h" #include "for_each_map_elem_write_key.skel.h" +#include "for_each_multi_maps.skel.h" static unsigned int duration; @@ -143,6 +144,65 @@ static void test_write_map_key(void) for_each_map_elem_write_key__destroy(skel); } +static void test_multi_maps(void) +{ + struct for_each_multi_maps *skel; + __u64 val, array_total, hash_total; + __u32 key, max_entries; + int i, err; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + + skel = for_each_multi_maps__open_and_load(); + if (!ASSERT_OK_PTR(skel, "for_each_multi_maps__open_and_load")) + return; + + array_total = 0; + max_entries = bpf_map__max_entries(skel->maps.arraymap); + for (i = 0; i < max_entries; i++) { + key = i; + val = i + 1; + array_total += val; + err = bpf_map__update_elem(skel->maps.arraymap, &key, sizeof(key), + &val, sizeof(val), BPF_ANY); + if (!ASSERT_OK(err, "array_map_update")) + goto out; + } + + hash_total = 0; + max_entries = bpf_map__max_entries(skel->maps.hashmap); + for (i = 0; i < max_entries; i++) { + key = i + 100; + val = i + 1; + hash_total += val; + err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key), + &val, sizeof(val), BPF_ANY); + if (!ASSERT_OK(err, "hash_map_update")) + goto out; + } + + skel->bss->data_output = 0; + skel->bss->use_array = 1; + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + ASSERT_OK(topts.retval, "retval"); + ASSERT_EQ(skel->bss->data_output, array_total, "array output"); + + skel->bss->data_output = 0; + skel->bss->use_array = 0; + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + ASSERT_OK(topts.retval, "retval"); + ASSERT_EQ(skel->bss->data_output, hash_total, "hash output"); + +out: + for_each_multi_maps__destroy(skel); +} + void test_for_each(void) { if (test__start_subtest("hash_map")) @@ -151,4 +211,6 @@ void test_for_each(void) test_array_map(); if (test__start_subtest("write_map_key")) test_write_map_key(); + if (test__start_subtest("multi_maps")) + test_multi_maps(); } diff --git a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c index 8dd2af9081..284764e717 100644 --- a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c +++ b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c @@ -88,6 +88,8 @@ static int attach(struct ip_check_defrag *skel, bool ipv6) int err = -1; nstoken = open_netns(NS1); + if (!ASSERT_OK_PTR(nstoken, "setns")) + goto out; skel->links.defrag = bpf_program__attach_netfilter(skel->progs.defrag, &opts); if (!ASSERT_OK_PTR(skel->links.defrag, "program attach")) diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 05000810e2..960c9323d1 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -4,6 +4,8 @@ #include "trace_helpers.h" #include "kprobe_multi_empty.skel.h" #include "kprobe_multi_override.skel.h" +#include "kprobe_multi_session.skel.h" +#include "kprobe_multi_session_cookie.skel.h" #include "bpf/libbpf_internal.h" #include "bpf/hashmap.h" @@ -326,6 +328,74 @@ cleanup: kprobe_multi__destroy(skel); } +static void test_session_skel_api(void) +{ + struct kprobe_multi_session *skel = NULL; + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct bpf_link *link = NULL; + int i, err, prog_fd; + + skel = kprobe_multi_session__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_multi_session__open_and_load")) + return; + + skel->bss->pid = getpid(); + + err = kprobe_multi_session__attach(skel); + if (!ASSERT_OK(err, " kprobe_multi_session__attach")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.trigger); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + /* bpf_fentry_test1-4 trigger return probe, result is 2 */ + for (i = 0; i < 4; i++) + ASSERT_EQ(skel->bss->kprobe_session_result[i], 2, "kprobe_session_result"); + + /* bpf_fentry_test5-8 trigger only entry probe, result is 1 */ + for (i = 4; i < 8; i++) + ASSERT_EQ(skel->bss->kprobe_session_result[i], 1, "kprobe_session_result"); + +cleanup: + bpf_link__destroy(link); + kprobe_multi_session__destroy(skel); +} + +static void test_session_cookie_skel_api(void) +{ + struct kprobe_multi_session_cookie *skel = NULL; + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct bpf_link *link = NULL; + int err, prog_fd; + + skel = kprobe_multi_session_cookie__open_and_load(); + if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load")) + return; + + skel->bss->pid = getpid(); + + err = kprobe_multi_session_cookie__attach(skel); + if (!ASSERT_OK(err, " kprobe_multi_wrapper__attach")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.trigger); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + ASSERT_EQ(skel->bss->test_kprobe_1_result, 1, "test_kprobe_1_result"); + ASSERT_EQ(skel->bss->test_kprobe_2_result, 2, "test_kprobe_2_result"); + ASSERT_EQ(skel->bss->test_kprobe_3_result, 3, "test_kprobe_3_result"); + +cleanup: + bpf_link__destroy(link); + kprobe_multi_session_cookie__destroy(skel); +} + static size_t symbol_hash(long key, void *ctx __maybe_unused) { return str_hash((const char *) key); @@ -336,15 +406,80 @@ static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused) return strcmp((const char *) key1, (const char *) key2) == 0; } +static bool is_invalid_entry(char *buf, bool kernel) +{ + if (kernel && strchr(buf, '[')) + return true; + if (!kernel && !strchr(buf, '[')) + return true; + return false; +} + +static bool skip_entry(char *name) +{ + /* + * We attach to almost all kernel functions and some of them + * will cause 'suspicious RCU usage' when fprobe is attached + * to them. Filter out the current culprits - arch_cpu_idle + * default_idle and rcu_* functions. + */ + if (!strcmp(name, "arch_cpu_idle")) + return true; + if (!strcmp(name, "default_idle")) + return true; + if (!strncmp(name, "rcu_", 4)) + return true; + if (!strcmp(name, "bpf_dispatcher_xdp_func")) + return true; + if (!strncmp(name, "__ftrace_invalid_address__", + sizeof("__ftrace_invalid_address__") - 1)) + return true; + return false; +} + +/* Do comparision by ignoring '.llvm.<hash>' suffixes. */ +static int compare_name(const char *name1, const char *name2) +{ + const char *res1, *res2; + int len1, len2; + + res1 = strstr(name1, ".llvm."); + res2 = strstr(name2, ".llvm."); + len1 = res1 ? res1 - name1 : strlen(name1); + len2 = res2 ? res2 - name2 : strlen(name2); + + if (len1 == len2) + return strncmp(name1, name2, len1); + if (len1 < len2) + return strncmp(name1, name2, len1) <= 0 ? -1 : 1; + return strncmp(name1, name2, len2) >= 0 ? 1 : -1; +} + +static int load_kallsyms_compare(const void *p1, const void *p2) +{ + return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name); +} + +static int search_kallsyms_compare(const void *p1, const struct ksym *p2) +{ + return compare_name(p1, p2->name); +} + static int get_syms(char ***symsp, size_t *cntp, bool kernel) { - size_t cap = 0, cnt = 0, i; - char *name = NULL, **syms = NULL; + size_t cap = 0, cnt = 0; + char *name = NULL, *ksym_name, **syms = NULL; struct hashmap *map; + struct ksyms *ksyms; + struct ksym *ks; char buf[256]; FILE *f; int err = 0; + ksyms = load_kallsyms_custom_local(load_kallsyms_compare); + if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_custom_local")) + return -EINVAL; + /* * The available_filter_functions contains many duplicates, * but other than that all symbols are usable in kprobe multi @@ -368,33 +503,23 @@ static int get_syms(char ***symsp, size_t *cntp, bool kernel) } while (fgets(buf, sizeof(buf), f)) { - if (kernel && strchr(buf, '[')) - continue; - if (!kernel && !strchr(buf, '[')) + if (is_invalid_entry(buf, kernel)) continue; free(name); if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1) continue; - /* - * We attach to almost all kernel functions and some of them - * will cause 'suspicious RCU usage' when fprobe is attached - * to them. Filter out the current culprits - arch_cpu_idle - * default_idle and rcu_* functions. - */ - if (!strcmp(name, "arch_cpu_idle")) - continue; - if (!strcmp(name, "default_idle")) - continue; - if (!strncmp(name, "rcu_", 4)) - continue; - if (!strcmp(name, "bpf_dispatcher_xdp_func")) - continue; - if (!strncmp(name, "__ftrace_invalid_address__", - sizeof("__ftrace_invalid_address__") - 1)) + if (skip_entry(name)) continue; - err = hashmap__add(map, name, 0); + ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare); + if (!ks) { + err = -EINVAL; + goto error; + } + + ksym_name = ks->name; + err = hashmap__add(map, ksym_name, 0); if (err == -EEXIST) { err = 0; continue; @@ -407,8 +532,7 @@ static int get_syms(char ***symsp, size_t *cntp, bool kernel) if (err) goto error; - syms[cnt++] = name; - name = NULL; + syms[cnt++] = ksym_name; } *symsp = syms; @@ -418,42 +542,88 @@ error: free(name); fclose(f); hashmap__free(map); - if (err) { - for (i = 0; i < cnt; i++) - free(syms[i]); + if (err) free(syms); + return err; +} + +static int get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel) +{ + unsigned long *addr, *addrs, *tmp_addrs; + int err = 0, max_cnt, inc_cnt; + char *name = NULL; + size_t cnt = 0; + char buf[256]; + FILE *f; + + if (access("/sys/kernel/tracing/trace", F_OK) == 0) + f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r"); + else + f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r"); + + if (!f) + return -ENOENT; + + /* In my local setup, the number of entries is 50k+ so Let us initially + * allocate space to hold 64k entries. If 64k is not enough, incrementally + * increase 1k each time. + */ + max_cnt = 65536; + inc_cnt = 1024; + addrs = malloc(max_cnt * sizeof(long)); + if (addrs == NULL) { + err = -ENOMEM; + goto error; + } + + while (fgets(buf, sizeof(buf), f)) { + if (is_invalid_entry(buf, kernel)) + continue; + + free(name); + if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2) + continue; + if (skip_entry(name)) + continue; + + if (cnt == max_cnt) { + max_cnt += inc_cnt; + tmp_addrs = realloc(addrs, max_cnt); + if (!tmp_addrs) { + err = -ENOMEM; + goto error; + } + addrs = tmp_addrs; + } + + addrs[cnt++] = (unsigned long)addr; } + + *addrsp = addrs; + *cntp = cnt; + +error: + free(name); + fclose(f); + if (err) + free(addrs); return err; } -static void test_kprobe_multi_bench_attach(bool kernel) +static void do_bench_test(struct kprobe_multi_empty *skel, struct bpf_kprobe_multi_opts *opts) { - LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); - struct kprobe_multi_empty *skel = NULL; long attach_start_ns, attach_end_ns; long detach_start_ns, detach_end_ns; double attach_delta, detach_delta; struct bpf_link *link = NULL; - char **syms = NULL; - size_t cnt = 0, i; - - if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms")) - return; - - skel = kprobe_multi_empty__open_and_load(); - if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load")) - goto cleanup; - - opts.syms = (const char **) syms; - opts.cnt = cnt; attach_start_ns = get_time_ns(); link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_empty, - NULL, &opts); + NULL, opts); attach_end_ns = get_time_ns(); if (!ASSERT_OK_PTR(link, "bpf_program__attach_kprobe_multi_opts")) - goto cleanup; + return; detach_start_ns = get_time_ns(); bpf_link__destroy(link); @@ -462,17 +632,65 @@ static void test_kprobe_multi_bench_attach(bool kernel) attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0; detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0; - printf("%s: found %lu functions\n", __func__, cnt); + printf("%s: found %lu functions\n", __func__, opts->cnt); printf("%s: attached in %7.3lfs\n", __func__, attach_delta); printf("%s: detached in %7.3lfs\n", __func__, detach_delta); +} + +static void test_kprobe_multi_bench_attach(bool kernel) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + struct kprobe_multi_empty *skel = NULL; + char **syms = NULL; + size_t cnt = 0; + + if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms")) + return; + + skel = kprobe_multi_empty__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load")) + goto cleanup; + + opts.syms = (const char **) syms; + opts.cnt = cnt; + + do_bench_test(skel, &opts); cleanup: kprobe_multi_empty__destroy(skel); - if (syms) { - for (i = 0; i < cnt; i++) - free(syms[i]); + if (syms) free(syms); +} + +static void test_kprobe_multi_bench_attach_addr(bool kernel) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + struct kprobe_multi_empty *skel = NULL; + unsigned long *addrs = NULL; + size_t cnt = 0; + int err; + + err = get_addrs(&addrs, &cnt, kernel); + if (err == -ENOENT) { + test__skip(); + return; } + + if (!ASSERT_OK(err, "get_addrs")) + return; + + skel = kprobe_multi_empty__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load")) + goto cleanup; + + opts.addrs = addrs; + opts.cnt = cnt; + + do_bench_test(skel, &opts); + +cleanup: + kprobe_multi_empty__destroy(skel); + free(addrs); } static void test_attach_override(void) @@ -515,6 +733,10 @@ void serial_test_kprobe_multi_bench_attach(void) test_kprobe_multi_bench_attach(true); if (test__start_subtest("modules")) test_kprobe_multi_bench_attach(false); + if (test__start_subtest("kernel")) + test_kprobe_multi_bench_attach_addr(true); + if (test__start_subtest("modules")) + test_kprobe_multi_bench_attach_addr(false); } void test_kprobe_multi_test(void) @@ -538,4 +760,8 @@ void test_kprobe_multi_test(void) test_attach_api_fails(); if (test__start_subtest("attach_override")) test_attach_override(); + if (test__start_subtest("session")) + test_session_skel_api(); + if (test__start_subtest("session_cookie")) + test_session_cookie_skel_api(); } diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c index b295969b26..dc7aab532f 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c @@ -5,8 +5,6 @@ #include "test_ksyms.skel.h" #include <sys/stat.h> -static int duration; - void test_ksyms(void) { const char *btf_path = "/sys/kernel/btf/vmlinux"; @@ -18,43 +16,37 @@ void test_ksyms(void) int err; err = kallsyms_find("bpf_link_fops", &link_fops_addr); - if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + if (!ASSERT_NEQ(err, -EINVAL, "bpf_link_fops: kallsyms_fopen")) return; - if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_link_fops' not found\n")) + if (!ASSERT_NEQ(err, -ENOENT, "bpf_link_fops: ksym_find")) return; err = kallsyms_find("__per_cpu_start", &per_cpu_start_addr); - if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + if (!ASSERT_NEQ(err, -EINVAL, "__per_cpu_start: kallsyms_fopen")) return; - if (CHECK(err == -ENOENT, "ksym_find", "symbol 'per_cpu_start' not found\n")) + if (!ASSERT_NEQ(err, -ENOENT, "__per_cpu_start: ksym_find")) return; - if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno)) + if (!ASSERT_OK(stat(btf_path, &st), "stat_btf")) return; btf_size = st.st_size; skel = test_ksyms__open_and_load(); - if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n")) + if (!ASSERT_OK_PTR(skel, "test_ksyms__open_and_load")) return; err = test_ksyms__attach(skel); - if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + if (!ASSERT_OK(err, "test_ksyms__attach")) goto cleanup; /* trigger tracepoint */ usleep(1); data = skel->data; - CHECK(data->out__bpf_link_fops != link_fops_addr, "bpf_link_fops", - "got 0x%llx, exp 0x%llx\n", - data->out__bpf_link_fops, link_fops_addr); - CHECK(data->out__bpf_link_fops1 != 0, "bpf_link_fops1", - "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0); - CHECK(data->out__btf_size != btf_size, "btf_size", - "got %llu, exp %llu\n", data->out__btf_size, btf_size); - CHECK(data->out__per_cpu_start != per_cpu_start_addr, "__per_cpu_start", - "got %llu, exp %llu\n", data->out__per_cpu_start, - per_cpu_start_addr); + ASSERT_EQ(data->out__bpf_link_fops, link_fops_addr, "bpf_link_fops"); + ASSERT_EQ(data->out__bpf_link_fops1, 0, "bpf_link_fops1"); + ASSERT_EQ(data->out__btf_size, btf_size, "btf_size"); + ASSERT_EQ(data->out__per_cpu_start, per_cpu_start_addr, "__per_cpu_start"); cleanup: test_ksyms__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index f53d658ed0..6d391d95f9 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -51,6 +51,10 @@ void test_module_attach(void) 0, "bpf_testmod_test_read"); ASSERT_OK(err, "set_attach_target"); + err = bpf_program__set_attach_target(skel->progs.handle_fentry_explicit_manual, + 0, "bpf_testmod:bpf_testmod_test_read"); + ASSERT_OK(err, "set_attach_target_explicit"); + err = test_module_attach__load(skel); if (CHECK(err, "skel_load", "failed to load skeleton\n")) return; @@ -70,6 +74,8 @@ void test_module_attach(void) ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf"); ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry"); ASSERT_EQ(bss->fentry_manual_read_sz, READ_SZ, "fentry_manual"); + ASSERT_EQ(bss->fentry_explicit_read_sz, READ_SZ, "fentry_explicit"); + ASSERT_EQ(bss->fentry_explicit_manual_read_sz, READ_SZ, "fentry_explicit_manual"); ASSERT_EQ(bss->fexit_read_sz, READ_SZ, "fexit"); ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet"); ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret"); diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index 8f8d792307..274d2e033e 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -82,6 +82,22 @@ static void cleanup_netns(struct nstoken *nstoken) SYS_NOFAIL("ip netns del %s", NS_TEST); } +static int start_mptcp_server(int family, const char *addr_str, __u16 port, + int timeout_ms) +{ + struct network_helper_opts opts = { + .timeout_ms = timeout_ms, + .proto = IPPROTO_MPTCP, + }; + struct sockaddr_storage addr; + socklen_t addrlen; + + if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) + return -1; + + return start_server_addr(SOCK_STREAM, &addr, addrlen, &opts); +} + static int verify_tsk(int map_fd, int client_fd) { int err, cfd = client_fd; @@ -273,6 +289,8 @@ static int run_mptcpify(int cgroup_fd) if (!ASSERT_OK_PTR(mptcpify_skel, "skel_open_load")) return libbpf_get_error(mptcpify_skel); + mptcpify_skel->bss->pid = getpid(); + err = mptcpify__attach(mptcpify_skel); if (!ASSERT_OK(err, "skel_attach")) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index 24d493482f..e72d75d6ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -12,77 +12,229 @@ #include <sys/wait.h> #include <sys/mount.h> #include <sys/fcntl.h> +#include "network_helpers.h" #define STACK_SIZE (1024 * 1024) static char child_stack[STACK_SIZE]; -static int test_current_pid_tgid(void *args) +static int get_pid_tgid(pid_t *pid, pid_t *tgid, + struct test_ns_current_pid_tgid__bss *bss) { - struct test_ns_current_pid_tgid__bss *bss; - struct test_ns_current_pid_tgid *skel; - int err = -1, duration = 0; - pid_t tgid, pid; struct stat st; + int err; - skel = test_ns_current_pid_tgid__open_and_load(); - if (CHECK(!skel, "skel_open_load", "failed to load skeleton\n")) - goto cleanup; - - pid = syscall(SYS_gettid); - tgid = getpid(); + *pid = syscall(SYS_gettid); + *tgid = getpid(); err = stat("/proc/self/ns/pid", &st); - if (CHECK(err, "stat", "failed /proc/self/ns/pid: %d\n", err)) - goto cleanup; + if (!ASSERT_OK(err, "stat /proc/self/ns/pid")) + return err; - bss = skel->bss; bss->dev = st.st_dev; bss->ino = st.st_ino; bss->user_pid = 0; bss->user_tgid = 0; + return 0; +} + +static int test_current_pid_tgid_tp(void *args) +{ + struct test_ns_current_pid_tgid__bss *bss; + struct test_ns_current_pid_tgid *skel; + int ret = -1, err; + pid_t tgid, pid; + + skel = test_ns_current_pid_tgid__open(); + if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open")) + return ret; + + bpf_program__set_autoload(skel->progs.tp_handler, true); + + err = test_ns_current_pid_tgid__load(skel); + if (!ASSERT_OK(err, "test_ns_current_pid_tgid__load")) + goto cleanup; + + bss = skel->bss; + if (get_pid_tgid(&pid, &tgid, bss)) + goto cleanup; err = test_ns_current_pid_tgid__attach(skel); - if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + if (!ASSERT_OK(err, "test_ns_current_pid_tgid__attach")) goto cleanup; /* trigger tracepoint */ usleep(1); - ASSERT_EQ(bss->user_pid, pid, "pid"); - ASSERT_EQ(bss->user_tgid, tgid, "tgid"); - err = 0; + if (!ASSERT_EQ(bss->user_pid, pid, "pid")) + goto cleanup; + if (!ASSERT_EQ(bss->user_tgid, tgid, "tgid")) + goto cleanup; + ret = 0; + +cleanup: + test_ns_current_pid_tgid__destroy(skel); + return ret; +} + +static int test_current_pid_tgid_cgrp(void *args) +{ + struct test_ns_current_pid_tgid__bss *bss; + struct test_ns_current_pid_tgid *skel; + int server_fd = -1, ret = -1, err; + int cgroup_fd = *(int *)args; + pid_t tgid, pid; + + skel = test_ns_current_pid_tgid__open(); + if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open")) + return ret; + + bpf_program__set_autoload(skel->progs.cgroup_bind4, true); + + err = test_ns_current_pid_tgid__load(skel); + if (!ASSERT_OK(err, "test_ns_current_pid_tgid__load")) + goto cleanup; + + bss = skel->bss; + if (get_pid_tgid(&pid, &tgid, bss)) + goto cleanup; + + skel->links.cgroup_bind4 = bpf_program__attach_cgroup( + skel->progs.cgroup_bind4, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.cgroup_bind4, "bpf_program__attach_cgroup")) + goto cleanup; + + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_GE(server_fd, 0, "start_server")) + goto cleanup; + + if (!ASSERT_EQ(bss->user_pid, pid, "pid")) + goto cleanup; + if (!ASSERT_EQ(bss->user_tgid, tgid, "tgid")) + goto cleanup; + ret = 0; cleanup: - test_ns_current_pid_tgid__destroy(skel); + if (server_fd >= 0) + close(server_fd); + test_ns_current_pid_tgid__destroy(skel); + return ret; +} + +static int test_current_pid_tgid_sk_msg(void *args) +{ + int verdict, map, server_fd = -1, client_fd = -1; + struct test_ns_current_pid_tgid__bss *bss; + static const char send_msg[] = "message"; + struct test_ns_current_pid_tgid *skel; + int ret = -1, err, key = 0; + pid_t tgid, pid; + + skel = test_ns_current_pid_tgid__open(); + if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open")) + return ret; + + bpf_program__set_autoload(skel->progs.sk_msg, true); + + err = test_ns_current_pid_tgid__load(skel); + if (!ASSERT_OK(err, "test_ns_current_pid_tgid__load")) + goto cleanup; + + bss = skel->bss; + if (get_pid_tgid(&pid, &tgid, skel->bss)) + goto cleanup; + + verdict = bpf_program__fd(skel->progs.sk_msg); + map = bpf_map__fd(skel->maps.sock_map); + err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0); + if (!ASSERT_OK(err, "prog_attach")) + goto cleanup; + + server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (!ASSERT_GE(server_fd, 0, "start_server")) + goto cleanup; - return err; + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto cleanup; + + err = bpf_map_update_elem(map, &key, &client_fd, BPF_ANY); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto cleanup; + + err = send(client_fd, send_msg, sizeof(send_msg), 0); + if (!ASSERT_EQ(err, sizeof(send_msg), "send(msg)")) + goto cleanup; + + if (!ASSERT_EQ(bss->user_pid, pid, "pid")) + goto cleanup; + if (!ASSERT_EQ(bss->user_tgid, tgid, "tgid")) + goto cleanup; + ret = 0; + +cleanup: + if (server_fd >= 0) + close(server_fd); + if (client_fd >= 0) + close(client_fd); + test_ns_current_pid_tgid__destroy(skel); + return ret; } -static void test_ns_current_pid_tgid_new_ns(void) +static void test_ns_current_pid_tgid_new_ns(int (*fn)(void *), void *arg) { - int wstatus, duration = 0; + int wstatus; pid_t cpid; /* Create a process in a new namespace, this process * will be the init process of this new namespace hence will be pid 1. */ - cpid = clone(test_current_pid_tgid, child_stack + STACK_SIZE, - CLONE_NEWPID | SIGCHLD, NULL); + cpid = clone(fn, child_stack + STACK_SIZE, + CLONE_NEWPID | SIGCHLD, arg); - if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno))) + if (!ASSERT_NEQ(cpid, -1, "clone")) return; - if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno))) + if (!ASSERT_NEQ(waitpid(cpid, &wstatus, 0), -1, "waitpid")) return; - if (CHECK(WEXITSTATUS(wstatus) != 0, "newns_pidtgid", "failed")) + if (!ASSERT_OK(WEXITSTATUS(wstatus), "newns_pidtgid")) return; } +static void test_in_netns(int (*fn)(void *), void *arg) +{ + struct nstoken *nstoken = NULL; + + SYS(cleanup, "ip netns add ns_current_pid_tgid"); + SYS(cleanup, "ip -net ns_current_pid_tgid link set dev lo up"); + + nstoken = open_netns("ns_current_pid_tgid"); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto cleanup; + + test_ns_current_pid_tgid_new_ns(fn, arg); + +cleanup: + if (nstoken) + close_netns(nstoken); + SYS_NOFAIL("ip netns del ns_current_pid_tgid"); +} + /* TODO: use a different tracepoint */ void serial_test_ns_current_pid_tgid(void) { - if (test__start_subtest("ns_current_pid_tgid_root_ns")) - test_current_pid_tgid(NULL); - if (test__start_subtest("ns_current_pid_tgid_new_ns")) - test_ns_current_pid_tgid_new_ns(); + if (test__start_subtest("root_ns_tp")) + test_current_pid_tgid_tp(NULL); + if (test__start_subtest("new_ns_tp")) + test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_tp, NULL); + if (test__start_subtest("new_ns_cgrp")) { + int cgroup_fd = -1; + + cgroup_fd = test__join_cgroup("/sock_addr"); + if (ASSERT_GE(cgroup_fd, 0, "join_cgroup")) { + test_in_netns(test_current_pid_tgid_cgrp, &cgroup_fd); + close(cgroup_fd); + } + } + if (test__start_subtest("new_ns_sk_msg")) + test_in_netns(test_current_pid_tgid_sk_msg, NULL); } diff --git a/tools/testing/selftests/bpf/prog_tests/perf_skip.c b/tools/testing/selftests/bpf/prog_tests/perf_skip.c new file mode 100644 index 0000000000..37d8618800 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/perf_skip.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <test_progs.h> +#include "test_perf_skip.skel.h" +#include <linux/compiler.h> +#include <linux/hw_breakpoint.h> +#include <sys/mman.h> + +#ifndef TRAP_PERF +#define TRAP_PERF 6 +#endif + +int sigio_count, sigtrap_count; + +static void handle_sigio(int sig __always_unused) +{ + ++sigio_count; +} + +static void handle_sigtrap(int signum __always_unused, + siginfo_t *info, + void *ucontext __always_unused) +{ + ASSERT_EQ(info->si_code, TRAP_PERF, "si_code"); + ++sigtrap_count; +} + +static noinline int test_function(void) +{ + asm volatile (""); + return 0; +} + +void serial_test_perf_skip(void) +{ + struct sigaction action = {}; + struct sigaction previous_sigtrap; + sighandler_t previous_sigio = SIG_ERR; + struct test_perf_skip *skel = NULL; + struct perf_event_attr attr = {}; + int perf_fd = -1; + int err; + struct f_owner_ex owner; + struct bpf_link *prog_link = NULL; + + action.sa_flags = SA_SIGINFO | SA_NODEFER; + action.sa_sigaction = handle_sigtrap; + sigemptyset(&action.sa_mask); + if (!ASSERT_OK(sigaction(SIGTRAP, &action, &previous_sigtrap), "sigaction")) + return; + + previous_sigio = signal(SIGIO, handle_sigio); + if (!ASSERT_NEQ(previous_sigio, SIG_ERR, "signal")) + goto cleanup; + + skel = test_perf_skip__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + attr.type = PERF_TYPE_BREAKPOINT; + attr.size = sizeof(attr); + attr.bp_type = HW_BREAKPOINT_X; + attr.bp_addr = (uintptr_t)test_function; + attr.bp_len = sizeof(long); + attr.sample_period = 1; + attr.sample_type = PERF_SAMPLE_IP; + attr.pinned = 1; + attr.exclude_kernel = 1; + attr.exclude_hv = 1; + attr.precise_ip = 3; + attr.sigtrap = 1; + attr.remove_on_exec = 1; + + perf_fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); + if (perf_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) { + printf("SKIP:no PERF_TYPE_BREAKPOINT/HW_BREAKPOINT_X\n"); + test__skip(); + goto cleanup; + } + if (!ASSERT_OK(perf_fd < 0, "perf_event_open")) + goto cleanup; + + /* Configure the perf event to signal on sample. */ + err = fcntl(perf_fd, F_SETFL, O_ASYNC); + if (!ASSERT_OK(err, "fcntl(F_SETFL, O_ASYNC)")) + goto cleanup; + + owner.type = F_OWNER_TID; + owner.pid = syscall(__NR_gettid); + err = fcntl(perf_fd, F_SETOWN_EX, &owner); + if (!ASSERT_OK(err, "fcntl(F_SETOWN_EX)")) + goto cleanup; + + /* Allow at most one sample. A sample rejected by bpf should + * not count against this. + */ + err = ioctl(perf_fd, PERF_EVENT_IOC_REFRESH, 1); + if (!ASSERT_OK(err, "ioctl(PERF_EVENT_IOC_REFRESH)")) + goto cleanup; + + prog_link = bpf_program__attach_perf_event(skel->progs.handler, perf_fd); + if (!ASSERT_OK_PTR(prog_link, "bpf_program__attach_perf_event")) + goto cleanup; + + /* Configure the bpf program to suppress the sample. */ + skel->bss->ip = (uintptr_t)test_function; + test_function(); + + ASSERT_EQ(sigio_count, 0, "sigio_count"); + ASSERT_EQ(sigtrap_count, 0, "sigtrap_count"); + + /* Configure the bpf program to allow the sample. */ + skel->bss->ip = 0; + test_function(); + + ASSERT_EQ(sigio_count, 1, "sigio_count"); + ASSERT_EQ(sigtrap_count, 1, "sigtrap_count"); + + /* Test that the sample above is the only one allowed (by perf, not + * by bpf) + */ + test_function(); + + ASSERT_EQ(sigio_count, 1, "sigio_count"); + ASSERT_EQ(sigtrap_count, 1, "sigtrap_count"); + +cleanup: + bpf_link__destroy(prog_link); + if (perf_fd >= 0) + close(perf_fd); + test_perf_skip__destroy(skel); + + if (previous_sigio != SIG_ERR) + signal(SIGIO, previous_sigio); + sigaction(SIGTRAP, &previous_sigtrap, NULL); +} diff --git a/tools/testing/selftests/bpf/prog_tests/preempt_lock.c b/tools/testing/selftests/bpf/prog_tests/preempt_lock.c new file mode 100644 index 0000000000..02917c6724 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/preempt_lock.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <network_helpers.h> +#include <preempt_lock.skel.h> + +void test_preempt_lock(void) +{ + RUN_TESTS(preempt_lock); +} diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index 48c5695b7a..da430df45a 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -12,8 +12,11 @@ #include <sys/sysinfo.h> #include <linux/perf_event.h> #include <linux/ring_buffer.h> + #include "test_ringbuf.lskel.h" +#include "test_ringbuf_n.lskel.h" #include "test_ringbuf_map_key.lskel.h" +#include "test_ringbuf_write.lskel.h" #define EDONE 7777 @@ -83,6 +86,58 @@ static void *poll_thread(void *input) return (void *)(long)ring_buffer__poll(ringbuf, timeout); } +static void ringbuf_write_subtest(void) +{ + struct test_ringbuf_write_lskel *skel; + int page_size = getpagesize(); + size_t *mmap_ptr; + int err, rb_fd; + + skel = test_ringbuf_write_lskel__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->maps.ringbuf.max_entries = 0x4000; + + err = test_ringbuf_write_lskel__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + rb_fd = skel->maps.ringbuf.map_fd; + + mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); + if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos")) + goto cleanup; + *mmap_ptr = 0x3000; + ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw"); + + skel->bss->pid = getpid(); + + ringbuf = ring_buffer__new(rb_fd, process_sample, NULL, NULL); + if (!ASSERT_OK_PTR(ringbuf, "ringbuf_new")) + goto cleanup; + + err = test_ringbuf_write_lskel__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto cleanup_ringbuf; + + skel->bss->discarded = 0; + skel->bss->passed = 0; + + /* trigger exactly two samples */ + syscall(__NR_getpgid); + syscall(__NR_getpgid); + + ASSERT_EQ(skel->bss->discarded, 2, "discarded"); + ASSERT_EQ(skel->bss->passed, 0, "passed"); + + test_ringbuf_write_lskel__detach(skel); +cleanup_ringbuf: + ring_buffer__free(ringbuf); +cleanup: + test_ringbuf_write_lskel__destroy(skel); +} + static void ringbuf_subtest(void) { const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample); @@ -326,6 +381,68 @@ cleanup: test_ringbuf_lskel__destroy(skel); } +/* + * Test ring_buffer__consume_n() by producing N_TOT_SAMPLES samples in the ring + * buffer, via getpid(), and consuming them in chunks of N_SAMPLES. + */ +#define N_TOT_SAMPLES 32 +#define N_SAMPLES 4 + +/* Sample value to verify the callback validity */ +#define SAMPLE_VALUE 42L + +static int process_n_sample(void *ctx, void *data, size_t len) +{ + struct sample *s = data; + + ASSERT_EQ(s->value, SAMPLE_VALUE, "sample_value"); + + return 0; +} + +static void ringbuf_n_subtest(void) +{ + struct test_ringbuf_n_lskel *skel_n; + int err, i; + + skel_n = test_ringbuf_n_lskel__open(); + if (!ASSERT_OK_PTR(skel_n, "test_ringbuf_n_lskel__open")) + return; + + skel_n->maps.ringbuf.max_entries = getpagesize(); + skel_n->bss->pid = getpid(); + + err = test_ringbuf_n_lskel__load(skel_n); + if (!ASSERT_OK(err, "test_ringbuf_n_lskel__load")) + goto cleanup; + + ringbuf = ring_buffer__new(skel_n->maps.ringbuf.map_fd, + process_n_sample, NULL, NULL); + if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new")) + goto cleanup; + + err = test_ringbuf_n_lskel__attach(skel_n); + if (!ASSERT_OK(err, "test_ringbuf_n_lskel__attach")) + goto cleanup_ringbuf; + + /* Produce N_TOT_SAMPLES samples in the ring buffer by calling getpid() */ + skel_n->bss->value = SAMPLE_VALUE; + for (i = 0; i < N_TOT_SAMPLES; i++) + syscall(__NR_getpgid); + + /* Consume all samples from the ring buffer in batches of N_SAMPLES */ + for (i = 0; i < N_TOT_SAMPLES; i += err) { + err = ring_buffer__consume_n(ringbuf, N_SAMPLES); + if (!ASSERT_EQ(err, N_SAMPLES, "rb_consume")) + goto cleanup_ringbuf; + } + +cleanup_ringbuf: + ring_buffer__free(ringbuf); +cleanup: + test_ringbuf_n_lskel__destroy(skel_n); +} + static int process_map_key_sample(void *ctx, void *data, size_t len) { struct sample *s; @@ -384,6 +501,10 @@ void test_ringbuf(void) { if (test__start_subtest("ringbuf")) ringbuf_subtest(); + if (test__start_subtest("ringbuf_n")) + ringbuf_n_subtest(); if (test__start_subtest("ringbuf_map_key")) ringbuf_map_key_subtest(); + if (test__start_subtest("ringbuf_write")) + ringbuf_write_subtest(); } diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index b15b343ebb..920aee41bd 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -179,7 +179,7 @@ static void test_send_signal_nmi(bool signal_thread) pmu_fd = syscall(__NR_perf_event_open, &attr, 0 /* pid */, -1 /* cpu */, -1 /* group_fd */, 0 /* flags */); if (pmu_fd == -1) { - if (errno == ENOENT) { + if (errno == ENOENT || errno == EOPNOTSUPP) { printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); test__skip(); diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c index 1374b626a9..0b9bd1d6f7 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c @@ -15,6 +15,7 @@ #include <unistd.h> #include "test_progs.h" +#include "network_helpers.h" #define BIND_PORT 1234 #define CONNECT_PORT 4321 @@ -22,8 +23,6 @@ #define NS_SELF "/proc/self/ns/net" #define SERVER_MAP_PATH "/sys/fs/bpf/tc/globals/server_map" -static const struct timeval timeo_sec = { .tv_sec = 3 }; -static const size_t timeo_optlen = sizeof(timeo_sec); static int stop, duration; static bool @@ -73,52 +72,6 @@ configure_stack(void) return true; } -static int -start_server(const struct sockaddr *addr, socklen_t len, int type) -{ - int fd; - - fd = socket(addr->sa_family, type, 0); - if (CHECK_FAIL(fd == -1)) - goto out; - if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec, - timeo_optlen))) - goto close_out; - if (CHECK_FAIL(bind(fd, addr, len) == -1)) - goto close_out; - if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1)) - goto close_out; - - goto out; -close_out: - close(fd); - fd = -1; -out: - return fd; -} - -static int -connect_to_server(const struct sockaddr *addr, socklen_t len, int type) -{ - int fd = -1; - - fd = socket(addr->sa_family, type, 0); - if (CHECK_FAIL(fd == -1)) - goto out; - if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec, - timeo_optlen))) - goto close_out; - if (CHECK_FAIL(connect(fd, addr, len))) - goto close_out; - - goto out; -close_out: - close(fd); - fd = -1; -out: - return fd; -} - static in_port_t get_port(int fd) { @@ -161,7 +114,7 @@ run_test(int server_fd, const struct sockaddr *addr, socklen_t len, int type) in_port_t port; int ret = 1; - client = connect_to_server(addr, len, type); + client = connect_to_addr(type, (struct sockaddr_storage *)addr, len, NULL); if (client == -1) { perror("Cannot connect to server"); goto out; @@ -310,7 +263,9 @@ void test_sk_assign(void) continue; prepare_addr(test->addr, test->family, BIND_PORT, false); addr = (const struct sockaddr *)test->addr; - server = start_server(addr, test->len, test->type); + server = start_server_addr(test->type, + (const struct sockaddr_storage *)addr, + test->len, NULL); if (server == -1) goto close; diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index 597d0467a9..de2466547e 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -994,7 +994,7 @@ static void drop_on_reuseport(const struct test *t) err = update_lookup_map(t->sock_map, SERVER_A, server1); if (err) - goto detach; + goto close_srv1; /* second server on destination address we should never reach */ server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port, diff --git a/tools/testing/selftests/bpf/prog_tests/sock_addr.c b/tools/testing/selftests/bpf/prog_tests/sock_addr.c index 5fd6177189..b880c564a2 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_addr.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_addr.c @@ -3,16 +3,56 @@ #include "test_progs.h" +#include "sock_addr_kern.skel.h" +#include "bind4_prog.skel.h" +#include "bind6_prog.skel.h" #include "connect_unix_prog.skel.h" +#include "connect4_prog.skel.h" +#include "connect6_prog.skel.h" +#include "sendmsg4_prog.skel.h" +#include "sendmsg6_prog.skel.h" +#include "recvmsg4_prog.skel.h" +#include "recvmsg6_prog.skel.h" #include "sendmsg_unix_prog.skel.h" #include "recvmsg_unix_prog.skel.h" +#include "getsockname4_prog.skel.h" +#include "getsockname6_prog.skel.h" #include "getsockname_unix_prog.skel.h" +#include "getpeername4_prog.skel.h" +#include "getpeername6_prog.skel.h" #include "getpeername_unix_prog.skel.h" #include "network_helpers.h" +#ifndef ENOTSUPP +# define ENOTSUPP 524 +#endif + +#define TEST_NS "sock_addr" +#define TEST_IF_PREFIX "test_sock_addr" +#define TEST_IPV4 "127.0.0.4" +#define TEST_IPV6 "::6" + +#define SERV4_IP "192.168.1.254" +#define SERV4_REWRITE_IP "127.0.0.1" +#define SRC4_IP "172.16.0.1" +#define SRC4_REWRITE_IP TEST_IPV4 +#define SERV4_PORT 4040 +#define SERV4_REWRITE_PORT 4444 + +#define SERV6_IP "face:b00c:1234:5678::abcd" +#define SERV6_REWRITE_IP "::1" +#define SERV6_V4MAPPED_IP "::ffff:192.168.0.4" +#define SRC6_IP "::1" +#define SRC6_REWRITE_IP TEST_IPV6 +#define WILDCARD6_IP "::" +#define SERV6_PORT 6060 +#define SERV6_REWRITE_PORT 6666 + #define SERVUN_ADDRESS "bpf_cgroup_unix_test" #define SERVUN_REWRITE_ADDRESS "bpf_cgroup_unix_test_rewrite" -#define SRCUN_ADDRESS "bpf_cgroup_unix_test_src" +#define SRCUN_ADDRESS "bpf_cgroup_unix_test_src" + +#define save_errno_do(op) ({ int __save = errno; op; errno = __save; }) enum sock_addr_test_type { SOCK_ADDR_TEST_BIND, @@ -23,152 +63,955 @@ enum sock_addr_test_type { SOCK_ADDR_TEST_GETPEERNAME, }; -typedef void *(*load_fn)(int cgroup_fd); +typedef void *(*load_fn)(int cgroup_fd, + enum bpf_attach_type attach_type, + bool expect_reject); typedef void (*destroy_fn)(void *skel); -struct sock_addr_test { - enum sock_addr_test_type type; - const char *name; - /* BPF prog properties */ - load_fn loadfn; - destroy_fn destroyfn; - /* Socket properties */ - int socket_family; - int socket_type; - /* IP:port pairs for BPF prog to override */ - const char *requested_addr; - unsigned short requested_port; - const char *expected_addr; - unsigned short expected_port; - const char *expected_src_addr; +static int cmp_addr(const struct sockaddr_storage *addr1, socklen_t addr1_len, + const struct sockaddr_storage *addr2, socklen_t addr2_len, + bool cmp_port); + +struct init_sock_args { + int af; + int type; }; -static void *connect_unix_prog_load(int cgroup_fd) -{ - struct connect_unix_prog *skel; +struct addr_args { + char addr[sizeof(struct sockaddr_storage)]; + int addrlen; +}; - skel = connect_unix_prog__open_and_load(); - if (!ASSERT_OK_PTR(skel, "skel_open")) - goto cleanup; +struct sendmsg_args { + struct addr_args addr; + char msg[10]; + int msglen; +}; - skel->links.connect_unix_prog = bpf_program__attach_cgroup( - skel->progs.connect_unix_prog, cgroup_fd); - if (!ASSERT_OK_PTR(skel->links.connect_unix_prog, "prog_attach")) - goto cleanup; +static struct sock_addr_kern *skel; - return skel; -cleanup: - connect_unix_prog__destroy(skel); - return NULL; +static int run_bpf_prog(const char *prog_name, void *ctx, int ctx_size) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct bpf_program *prog; + int prog_fd, err; + + topts.ctx_in = ctx; + topts.ctx_size_in = ctx_size; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto err; + + prog_fd = bpf_program__fd(prog); + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, prog_name)) + goto err; + + err = topts.retval; + errno = -topts.retval; + goto out; +err: + err = -1; +out: + return err; } -static void connect_unix_prog_destroy(void *skel) +static int kernel_init_sock(int af, int type, int protocol) { - connect_unix_prog__destroy(skel); + struct init_sock_args args = { + .af = af, + .type = type, + }; + + return run_bpf_prog("init_sock", &args, sizeof(args)); } -static void *sendmsg_unix_prog_load(int cgroup_fd) +static int kernel_close_sock(int fd) { - struct sendmsg_unix_prog *skel; + return run_bpf_prog("close_sock", NULL, 0); +} - skel = sendmsg_unix_prog__open_and_load(); - if (!ASSERT_OK_PTR(skel, "skel_open")) - goto cleanup; +static int sock_addr_op(const char *name, struct sockaddr *addr, + socklen_t *addrlen, bool expect_change) +{ + struct addr_args args; + int err; - skel->links.sendmsg_unix_prog = bpf_program__attach_cgroup( - skel->progs.sendmsg_unix_prog, cgroup_fd); - if (!ASSERT_OK_PTR(skel->links.sendmsg_unix_prog, "prog_attach")) - goto cleanup; + if (addrlen) + args.addrlen = *addrlen; - return skel; -cleanup: - sendmsg_unix_prog__destroy(skel); - return NULL; + if (addr) + memcpy(&args.addr, addr, *addrlen); + + err = run_bpf_prog(name, &args, sizeof(args)); + + if (!expect_change && addr) + if (!ASSERT_EQ(cmp_addr((struct sockaddr_storage *)addr, + *addrlen, + (struct sockaddr_storage *)&args.addr, + args.addrlen, 1), + 0, "address_param_modified")) + return -1; + + if (addrlen) + *addrlen = args.addrlen; + + if (addr) + memcpy(addr, &args.addr, *addrlen); + + return err; } -static void sendmsg_unix_prog_destroy(void *skel) +static int send_msg_op(const char *name, struct sockaddr *addr, + socklen_t addrlen, const char *msg, int msglen) { - sendmsg_unix_prog__destroy(skel); + struct sendmsg_args args; + int err; + + memset(&args, 0, sizeof(args)); + memcpy(&args.addr.addr, addr, addrlen); + args.addr.addrlen = addrlen; + memcpy(args.msg, msg, msglen); + args.msglen = msglen; + + err = run_bpf_prog(name, &args, sizeof(args)); + + if (!ASSERT_EQ(cmp_addr((struct sockaddr_storage *)addr, + addrlen, + (struct sockaddr_storage *)&args.addr.addr, + args.addr.addrlen, 1), + 0, "address_param_modified")) + return -1; + + return err; } -static void *recvmsg_unix_prog_load(int cgroup_fd) +static int kernel_connect(struct sockaddr *addr, socklen_t addrlen) { - struct recvmsg_unix_prog *skel; - - skel = recvmsg_unix_prog__open_and_load(); - if (!ASSERT_OK_PTR(skel, "skel_open")) - goto cleanup; + return sock_addr_op("kernel_connect", addr, &addrlen, false); +} - skel->links.recvmsg_unix_prog = bpf_program__attach_cgroup( - skel->progs.recvmsg_unix_prog, cgroup_fd); - if (!ASSERT_OK_PTR(skel->links.recvmsg_unix_prog, "prog_attach")) - goto cleanup; +static int kernel_bind(int fd, struct sockaddr *addr, socklen_t addrlen) +{ + return sock_addr_op("kernel_bind", addr, &addrlen, false); +} - return skel; -cleanup: - recvmsg_unix_prog__destroy(skel); - return NULL; +static int kernel_listen(void) +{ + return sock_addr_op("kernel_listen", NULL, NULL, false); } -static void recvmsg_unix_prog_destroy(void *skel) +static int kernel_sendmsg(int fd, struct sockaddr *addr, socklen_t addrlen, + char *msg, int msglen) { - recvmsg_unix_prog__destroy(skel); + return send_msg_op("kernel_sendmsg", addr, addrlen, msg, msglen); } -static void *getsockname_unix_prog_load(int cgroup_fd) +static int sock_sendmsg(int fd, struct sockaddr *addr, socklen_t addrlen, + char *msg, int msglen) { - struct getsockname_unix_prog *skel; + return send_msg_op("sock_sendmsg", addr, addrlen, msg, msglen); +} - skel = getsockname_unix_prog__open_and_load(); - if (!ASSERT_OK_PTR(skel, "skel_open")) - goto cleanup; +static int kernel_getsockname(int fd, struct sockaddr *addr, socklen_t *addrlen) +{ + return sock_addr_op("kernel_getsockname", addr, addrlen, true); +} - skel->links.getsockname_unix_prog = bpf_program__attach_cgroup( - skel->progs.getsockname_unix_prog, cgroup_fd); - if (!ASSERT_OK_PTR(skel->links.getsockname_unix_prog, "prog_attach")) - goto cleanup; +static int kernel_getpeername(int fd, struct sockaddr *addr, socklen_t *addrlen) +{ + return sock_addr_op("kernel_getpeername", addr, addrlen, true); +} - return skel; -cleanup: - getsockname_unix_prog__destroy(skel); - return NULL; +int kernel_connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen, + const struct network_helper_opts *opts) +{ + int err; + + if (!ASSERT_OK(kernel_init_sock(addr->ss_family, type, 0), + "kernel_init_sock")) + goto err; + + if (kernel_connect((struct sockaddr *)addr, addrlen) < 0) + goto err; + + /* Test code expects a "file descriptor" on success. */ + err = 1; + goto out; +err: + err = -1; + save_errno_do(ASSERT_OK(kernel_close_sock(0), "kernel_close_sock")); +out: + return err; } -static void getsockname_unix_prog_destroy(void *skel) +int kernel_start_server(int family, int type, const char *addr_str, __u16 port, + int timeout_ms) { - getsockname_unix_prog__destroy(skel); + struct sockaddr_storage addr; + socklen_t addrlen; + int err; + + if (!ASSERT_OK(kernel_init_sock(family, type, 0), "kernel_init_sock")) + goto err; + + if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) + goto err; + + if (kernel_bind(0, (struct sockaddr *)&addr, addrlen) < 0) + goto err; + + if (type == SOCK_STREAM) { + if (!ASSERT_OK(kernel_listen(), "kernel_listen")) + goto err; + } + + /* Test code expects a "file descriptor" on success. */ + err = 1; + goto out; +err: + err = -1; + save_errno_do(ASSERT_OK(kernel_close_sock(0), "kernel_close_sock")); +out: + return err; } -static void *getpeername_unix_prog_load(int cgroup_fd) +struct sock_ops { + int (*connect_to_addr)(int type, const struct sockaddr_storage *addr, + socklen_t addrlen, + const struct network_helper_opts *opts); + int (*start_server)(int family, int type, const char *addr_str, + __u16 port, int timeout_ms); + int (*socket)(int famil, int type, int protocol); + int (*bind)(int fd, struct sockaddr *addr, socklen_t addrlen); + int (*getsockname)(int fd, struct sockaddr *addr, socklen_t *addrlen); + int (*getpeername)(int fd, struct sockaddr *addr, socklen_t *addrlen); + int (*sendmsg)(int fd, struct sockaddr *addr, socklen_t addrlen, + char *msg, int msglen); + int (*close)(int fd); +}; + +static int user_sendmsg(int fd, struct sockaddr *addr, socklen_t addrlen, + char *msg, int msglen) { - struct getpeername_unix_prog *skel; + struct msghdr hdr; + struct iovec iov; - skel = getpeername_unix_prog__open_and_load(); - if (!ASSERT_OK_PTR(skel, "skel_open")) - goto cleanup; + memset(&iov, 0, sizeof(iov)); + iov.iov_base = msg; + iov.iov_len = msglen; - skel->links.getpeername_unix_prog = bpf_program__attach_cgroup( - skel->progs.getpeername_unix_prog, cgroup_fd); - if (!ASSERT_OK_PTR(skel->links.getpeername_unix_prog, "prog_attach")) - goto cleanup; + memset(&hdr, 0, sizeof(hdr)); + hdr.msg_name = (void *)addr; + hdr.msg_namelen = addrlen; + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; - return skel; -cleanup: - getpeername_unix_prog__destroy(skel); - return NULL; + return sendmsg(fd, &hdr, 0); } -static void getpeername_unix_prog_destroy(void *skel) +static int user_bind(int fd, struct sockaddr *addr, socklen_t addrlen) { - getpeername_unix_prog__destroy(skel); + return bind(fd, (const struct sockaddr *)addr, addrlen); +} + +struct sock_ops user_ops = { + .connect_to_addr = connect_to_addr, + .start_server = start_server, + .socket = socket, + .bind = user_bind, + .getsockname = getsockname, + .getpeername = getpeername, + .sendmsg = user_sendmsg, + .close = close, +}; + +struct sock_ops kern_ops_sock_sendmsg = { + .connect_to_addr = kernel_connect_to_addr, + .start_server = kernel_start_server, + .socket = kernel_init_sock, + .bind = kernel_bind, + .getsockname = kernel_getsockname, + .getpeername = kernel_getpeername, + .sendmsg = sock_sendmsg, + .close = kernel_close_sock, +}; + +struct sock_ops kern_ops_kernel_sendmsg = { + .connect_to_addr = kernel_connect_to_addr, + .start_server = kernel_start_server, + .socket = kernel_init_sock, + .bind = kernel_bind, + .getsockname = kernel_getsockname, + .getpeername = kernel_getpeername, + .sendmsg = kernel_sendmsg, + .close = kernel_close_sock, +}; + +struct sock_addr_test { + enum sock_addr_test_type type; + const char *name; + /* BPF prog properties */ + load_fn loadfn; + destroy_fn destroyfn; + enum bpf_attach_type attach_type; + /* Socket operations */ + struct sock_ops *ops; + /* Socket properties */ + int socket_family; + int socket_type; + /* IP:port pairs for BPF prog to override */ + const char *requested_addr; + unsigned short requested_port; + const char *expected_addr; + unsigned short expected_port; + const char *expected_src_addr; + /* Expected test result */ + enum { + LOAD_REJECT, + ATTACH_REJECT, + SYSCALL_EPERM, + SYSCALL_ENOTSUPP, + SUCCESS, + } expected_result; +}; + +#define BPF_SKEL_FUNCS_RAW(skel_name, prog_name) \ +static void *prog_name##_load_raw(int cgroup_fd, \ + enum bpf_attach_type attach_type, \ + bool expect_reject) \ +{ \ + struct skel_name *skel = skel_name##__open(); \ + int prog_fd = -1; \ + if (!ASSERT_OK_PTR(skel, "skel_open")) \ + goto cleanup; \ + if (!ASSERT_OK(skel_name##__load(skel), "load")) \ + goto cleanup; \ + prog_fd = bpf_program__fd(skel->progs.prog_name); \ + if (!ASSERT_GT(prog_fd, 0, "prog_fd")) \ + goto cleanup; \ + if (bpf_prog_attach(prog_fd, cgroup_fd, attach_type, \ + BPF_F_ALLOW_OVERRIDE), "bpf_prog_attach") { \ + ASSERT_TRUE(expect_reject, "unexpected rejection"); \ + goto cleanup; \ + } \ + if (!ASSERT_FALSE(expect_reject, "expected rejection")) \ + goto cleanup; \ +cleanup: \ + if (prog_fd > 0) \ + bpf_prog_detach(cgroup_fd, attach_type); \ + skel_name##__destroy(skel); \ + return NULL; \ +} \ +static void prog_name##_destroy_raw(void *progfd) \ +{ \ + /* No-op. *_load_raw does all cleanup. */ \ +} \ + +#define BPF_SKEL_FUNCS(skel_name, prog_name) \ +static void *prog_name##_load(int cgroup_fd, \ + enum bpf_attach_type attach_type, \ + bool expect_reject) \ +{ \ + struct skel_name *skel = skel_name##__open(); \ + if (!ASSERT_OK_PTR(skel, "skel_open")) \ + goto cleanup; \ + if (!ASSERT_OK(bpf_program__set_expected_attach_type(skel->progs.prog_name, \ + attach_type), \ + "set_expected_attach_type")) \ + goto cleanup; \ + if (skel_name##__load(skel)) { \ + ASSERT_TRUE(expect_reject, "unexpected rejection"); \ + goto cleanup; \ + } \ + if (!ASSERT_FALSE(expect_reject, "expected rejection")) \ + goto cleanup; \ + skel->links.prog_name = bpf_program__attach_cgroup( \ + skel->progs.prog_name, cgroup_fd); \ + if (!ASSERT_OK_PTR(skel->links.prog_name, "prog_attach")) \ + goto cleanup; \ + return skel; \ +cleanup: \ + skel_name##__destroy(skel); \ + return NULL; \ +} \ +static void prog_name##_destroy(void *skel) \ +{ \ + skel_name##__destroy(skel); \ } +BPF_SKEL_FUNCS(bind4_prog, bind_v4_prog); +BPF_SKEL_FUNCS_RAW(bind4_prog, bind_v4_prog); +BPF_SKEL_FUNCS(bind4_prog, bind_v4_deny_prog); +BPF_SKEL_FUNCS(bind6_prog, bind_v6_prog); +BPF_SKEL_FUNCS_RAW(bind6_prog, bind_v6_prog); +BPF_SKEL_FUNCS(bind6_prog, bind_v6_deny_prog); +BPF_SKEL_FUNCS(connect4_prog, connect_v4_prog); +BPF_SKEL_FUNCS_RAW(connect4_prog, connect_v4_prog); +BPF_SKEL_FUNCS(connect4_prog, connect_v4_deny_prog); +BPF_SKEL_FUNCS(connect6_prog, connect_v6_prog); +BPF_SKEL_FUNCS_RAW(connect6_prog, connect_v6_prog); +BPF_SKEL_FUNCS(connect6_prog, connect_v6_deny_prog); +BPF_SKEL_FUNCS(connect_unix_prog, connect_unix_prog); +BPF_SKEL_FUNCS_RAW(connect_unix_prog, connect_unix_prog); +BPF_SKEL_FUNCS(connect_unix_prog, connect_unix_deny_prog); +BPF_SKEL_FUNCS(sendmsg4_prog, sendmsg_v4_prog); +BPF_SKEL_FUNCS_RAW(sendmsg4_prog, sendmsg_v4_prog); +BPF_SKEL_FUNCS(sendmsg4_prog, sendmsg_v4_deny_prog); +BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_prog); +BPF_SKEL_FUNCS_RAW(sendmsg6_prog, sendmsg_v6_prog); +BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_deny_prog); +BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_preserve_dst_prog); +BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_v4mapped_prog); +BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_wildcard_prog); +BPF_SKEL_FUNCS(sendmsg_unix_prog, sendmsg_unix_prog); +BPF_SKEL_FUNCS_RAW(sendmsg_unix_prog, sendmsg_unix_prog); +BPF_SKEL_FUNCS(sendmsg_unix_prog, sendmsg_unix_deny_prog); +BPF_SKEL_FUNCS(recvmsg4_prog, recvmsg4_prog); +BPF_SKEL_FUNCS_RAW(recvmsg4_prog, recvmsg4_prog); +BPF_SKEL_FUNCS(recvmsg6_prog, recvmsg6_prog); +BPF_SKEL_FUNCS_RAW(recvmsg6_prog, recvmsg6_prog); +BPF_SKEL_FUNCS(recvmsg_unix_prog, recvmsg_unix_prog); +BPF_SKEL_FUNCS_RAW(recvmsg_unix_prog, recvmsg_unix_prog); +BPF_SKEL_FUNCS(getsockname_unix_prog, getsockname_unix_prog); +BPF_SKEL_FUNCS_RAW(getsockname_unix_prog, getsockname_unix_prog); +BPF_SKEL_FUNCS(getsockname4_prog, getsockname_v4_prog); +BPF_SKEL_FUNCS_RAW(getsockname4_prog, getsockname_v4_prog); +BPF_SKEL_FUNCS(getsockname6_prog, getsockname_v6_prog); +BPF_SKEL_FUNCS_RAW(getsockname6_prog, getsockname_v6_prog); +BPF_SKEL_FUNCS(getpeername_unix_prog, getpeername_unix_prog); +BPF_SKEL_FUNCS_RAW(getpeername_unix_prog, getpeername_unix_prog); +BPF_SKEL_FUNCS(getpeername4_prog, getpeername_v4_prog); +BPF_SKEL_FUNCS_RAW(getpeername4_prog, getpeername_v4_prog); +BPF_SKEL_FUNCS(getpeername6_prog, getpeername_v6_prog); +BPF_SKEL_FUNCS_RAW(getpeername6_prog, getpeername_v6_prog); + static struct sock_addr_test tests[] = { + /* bind - system calls */ + { + SOCK_ADDR_TEST_BIND, + "bind4: bind (stream)", + bind_v4_prog_load, + bind_v4_prog_destroy, + BPF_CGROUP_INET4_BIND, + &user_ops, + AF_INET, + SOCK_STREAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_BIND, + "bind4: bind deny (stream)", + bind_v4_deny_prog_load, + bind_v4_deny_prog_destroy, + BPF_CGROUP_INET4_BIND, + &user_ops, + AF_INET, + SOCK_STREAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + NULL, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_BIND, + "bind4: bind (dgram)", + bind_v4_prog_load, + bind_v4_prog_destroy, + BPF_CGROUP_INET4_BIND, + &user_ops, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_BIND, + "bind4: bind deny (dgram)", + bind_v4_deny_prog_load, + bind_v4_deny_prog_destroy, + BPF_CGROUP_INET4_BIND, + &user_ops, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + NULL, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_BIND, + "bind4: load prog with wrong expected attach type", + bind_v4_prog_load, + bind_v4_prog_destroy, + BPF_CGROUP_INET6_BIND, + &user_ops, + AF_INET, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + LOAD_REJECT, + }, + { + SOCK_ADDR_TEST_BIND, + "bind4: attach prog with wrong attach type", + bind_v4_prog_load_raw, + bind_v4_prog_destroy_raw, + BPF_CGROUP_INET6_BIND, + &user_ops, + AF_INET, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + ATTACH_REJECT, + }, + { + SOCK_ADDR_TEST_BIND, + "bind6: bind (stream)", + bind_v6_prog_load, + bind_v6_prog_destroy, + BPF_CGROUP_INET6_BIND, + &user_ops, + AF_INET6, + SOCK_STREAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_BIND, + "bind6: bind deny (stream)", + bind_v6_deny_prog_load, + bind_v6_deny_prog_destroy, + BPF_CGROUP_INET6_BIND, + &user_ops, + AF_INET6, + SOCK_STREAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + NULL, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_BIND, + "bind6: bind (dgram)", + bind_v6_prog_load, + bind_v6_prog_destroy, + BPF_CGROUP_INET6_BIND, + &user_ops, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_BIND, + "bind6: bind deny (dgram)", + bind_v6_deny_prog_load, + bind_v6_deny_prog_destroy, + BPF_CGROUP_INET6_BIND, + &user_ops, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + NULL, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_BIND, + "bind6: load prog with wrong expected attach type", + bind_v6_prog_load, + bind_v6_prog_destroy, + BPF_CGROUP_INET4_BIND, + &user_ops, + AF_INET6, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + LOAD_REJECT, + }, + { + SOCK_ADDR_TEST_BIND, + "bind6: attach prog with wrong attach type", + bind_v6_prog_load_raw, + bind_v6_prog_destroy_raw, + BPF_CGROUP_INET4_BIND, + &user_ops, + AF_INET, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + ATTACH_REJECT, + }, + + /* bind - kernel calls */ + { + SOCK_ADDR_TEST_BIND, + "bind4: kernel_bind (stream)", + bind_v4_prog_load, + bind_v4_prog_destroy, + BPF_CGROUP_INET4_BIND, + &kern_ops_sock_sendmsg, + AF_INET, + SOCK_STREAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_BIND, + "bind4: kernel_bind deny (stream)", + bind_v4_deny_prog_load, + bind_v4_deny_prog_destroy, + BPF_CGROUP_INET4_BIND, + &kern_ops_sock_sendmsg, + AF_INET, + SOCK_STREAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + NULL, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_BIND, + "bind4: kernel_bind (dgram)", + bind_v4_prog_load, + bind_v4_prog_destroy, + BPF_CGROUP_INET4_BIND, + &kern_ops_sock_sendmsg, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_BIND, + "bind4: kernel_bind deny (dgram)", + bind_v4_deny_prog_load, + bind_v4_deny_prog_destroy, + BPF_CGROUP_INET4_BIND, + &kern_ops_sock_sendmsg, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + NULL, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_BIND, + "bind6: kernel_bind (stream)", + bind_v6_prog_load, + bind_v6_prog_destroy, + BPF_CGROUP_INET6_BIND, + &kern_ops_sock_sendmsg, + AF_INET6, + SOCK_STREAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_BIND, + "bind6: kernel_bind deny (stream)", + bind_v6_deny_prog_load, + bind_v6_deny_prog_destroy, + BPF_CGROUP_INET6_BIND, + &kern_ops_sock_sendmsg, + AF_INET6, + SOCK_STREAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + NULL, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_BIND, + "bind6: kernel_bind (dgram)", + bind_v6_prog_load, + bind_v6_prog_destroy, + BPF_CGROUP_INET6_BIND, + &kern_ops_sock_sendmsg, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_BIND, + "bind6: kernel_bind deny (dgram)", + bind_v6_deny_prog_load, + bind_v6_deny_prog_destroy, + BPF_CGROUP_INET6_BIND, + &kern_ops_sock_sendmsg, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + NULL, + SYSCALL_EPERM, + }, + + /* connect - system calls */ + { + SOCK_ADDR_TEST_CONNECT, + "connect4: connect (stream)", + connect_v4_prog_load, + connect_v4_prog_destroy, + BPF_CGROUP_INET4_CONNECT, + &user_ops, + AF_INET, + SOCK_STREAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect4: connect deny (stream)", + connect_v4_deny_prog_load, + connect_v4_deny_prog_destroy, + BPF_CGROUP_INET4_CONNECT, + &user_ops, + AF_INET, + SOCK_STREAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect4: connect (dgram)", + connect_v4_prog_load, + connect_v4_prog_destroy, + BPF_CGROUP_INET4_CONNECT, + &user_ops, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect4: connect deny (dgram)", + connect_v4_deny_prog_load, + connect_v4_deny_prog_destroy, + BPF_CGROUP_INET4_CONNECT, + &user_ops, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect4: load prog with wrong expected attach type", + connect_v4_prog_load, + connect_v4_prog_destroy, + BPF_CGROUP_INET6_CONNECT, + &user_ops, + AF_INET, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + LOAD_REJECT, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect4: attach prog with wrong attach type", + connect_v4_prog_load_raw, + connect_v4_prog_destroy_raw, + BPF_CGROUP_INET6_CONNECT, + &user_ops, + AF_INET, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + ATTACH_REJECT, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect6: connect (stream)", + connect_v6_prog_load, + connect_v6_prog_destroy, + BPF_CGROUP_INET6_CONNECT, + &user_ops, + AF_INET6, + SOCK_STREAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect6: connect deny (stream)", + connect_v6_deny_prog_load, + connect_v6_deny_prog_destroy, + BPF_CGROUP_INET6_CONNECT, + &user_ops, + AF_INET6, + SOCK_STREAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SYSCALL_EPERM, + }, { SOCK_ADDR_TEST_CONNECT, - "connect_unix", + "connect6: connect (dgram)", + connect_v6_prog_load, + connect_v6_prog_destroy, + BPF_CGROUP_INET6_CONNECT, + &user_ops, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect6: connect deny (dgram)", + connect_v6_deny_prog_load, + connect_v6_deny_prog_destroy, + BPF_CGROUP_INET6_CONNECT, + &user_ops, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect6: load prog with wrong expected attach type", + connect_v6_prog_load, + connect_v6_prog_destroy, + BPF_CGROUP_INET4_CONNECT, + &user_ops, + AF_INET6, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + LOAD_REJECT, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect6: attach prog with wrong attach type", + connect_v6_prog_load_raw, + connect_v6_prog_destroy_raw, + BPF_CGROUP_INET4_CONNECT, + &user_ops, + AF_INET, + SOCK_STREAM, + NULL, + 0, + NULL, + 0, + NULL, + ATTACH_REJECT, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect_unix: connect (stream)", connect_unix_prog_load, connect_unix_prog_destroy, + BPF_CGROUP_UNIX_CONNECT, + &user_ops, AF_UNIX, SOCK_STREAM, SERVUN_ADDRESS, @@ -176,12 +1019,631 @@ static struct sock_addr_test tests[] = { SERVUN_REWRITE_ADDRESS, 0, NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect_unix: connect deny (stream)", + connect_unix_deny_prog_load, + connect_unix_deny_prog_destroy, + BPF_CGROUP_UNIX_CONNECT, + &user_ops, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect_unix: attach prog with wrong attach type", + connect_unix_prog_load_raw, + connect_unix_prog_destroy_raw, + BPF_CGROUP_INET4_CONNECT, + &user_ops, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + ATTACH_REJECT, + }, + + /* connect - kernel calls */ + { + SOCK_ADDR_TEST_CONNECT, + "connect4: kernel_connect (stream)", + connect_v4_prog_load, + connect_v4_prog_destroy, + BPF_CGROUP_INET4_CONNECT, + &kern_ops_sock_sendmsg, + AF_INET, + SOCK_STREAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect4: kernel_connect deny (stream)", + connect_v4_deny_prog_load, + connect_v4_deny_prog_destroy, + BPF_CGROUP_INET4_CONNECT, + &kern_ops_sock_sendmsg, + AF_INET, + SOCK_STREAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect4: kernel_connect (dgram)", + connect_v4_prog_load, + connect_v4_prog_destroy, + BPF_CGROUP_INET4_CONNECT, + &kern_ops_sock_sendmsg, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect4: kernel_connect deny (dgram)", + connect_v4_deny_prog_load, + connect_v4_deny_prog_destroy, + BPF_CGROUP_INET4_CONNECT, + &kern_ops_sock_sendmsg, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect6: kernel_connect (stream)", + connect_v6_prog_load, + connect_v6_prog_destroy, + BPF_CGROUP_INET6_CONNECT, + &kern_ops_sock_sendmsg, + AF_INET6, + SOCK_STREAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect6: kernel_connect deny (stream)", + connect_v6_deny_prog_load, + connect_v6_deny_prog_destroy, + BPF_CGROUP_INET6_CONNECT, + &kern_ops_sock_sendmsg, + AF_INET6, + SOCK_STREAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect6: kernel_connect (dgram)", + connect_v6_prog_load, + connect_v6_prog_destroy, + BPF_CGROUP_INET6_CONNECT, + &kern_ops_sock_sendmsg, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect6: kernel_connect deny (dgram)", + connect_v6_deny_prog_load, + connect_v6_deny_prog_destroy, + BPF_CGROUP_INET6_CONNECT, + &kern_ops_sock_sendmsg, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect_unix: kernel_connect (dgram)", + connect_unix_prog_load, + connect_unix_prog_destroy, + BPF_CGROUP_UNIX_CONNECT, + &kern_ops_sock_sendmsg, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_CONNECT, + "connect_unix: kernel_connect deny (dgram)", + connect_unix_deny_prog_load, + connect_unix_deny_prog_destroy, + BPF_CGROUP_UNIX_CONNECT, + &kern_ops_sock_sendmsg, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + SYSCALL_EPERM, + }, + + /* sendmsg - system calls */ + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg4: sendmsg (dgram)", + sendmsg_v4_prog_load, + sendmsg_v4_prog_destroy, + BPF_CGROUP_UDP4_SENDMSG, + &user_ops, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg4: sendmsg deny (dgram)", + sendmsg_v4_deny_prog_load, + sendmsg_v4_deny_prog_destroy, + BPF_CGROUP_UDP4_SENDMSG, + &user_ops, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg4: load prog with wrong expected attach type", + sendmsg_v4_prog_load, + sendmsg_v4_prog_destroy, + BPF_CGROUP_UDP6_SENDMSG, + &user_ops, + AF_INET, + SOCK_DGRAM, + NULL, + 0, + NULL, + 0, + NULL, + LOAD_REJECT, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg4: attach prog with wrong attach type", + sendmsg_v4_prog_load_raw, + sendmsg_v4_prog_destroy_raw, + BPF_CGROUP_UDP6_SENDMSG, + &user_ops, + AF_INET, + SOCK_DGRAM, + NULL, + 0, + NULL, + 0, + NULL, + ATTACH_REJECT, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg6: sendmsg (dgram)", + sendmsg_v6_prog_load, + sendmsg_v6_prog_destroy, + BPF_CGROUP_UDP6_SENDMSG, + &user_ops, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg6: sendmsg [::] (BSD'ism) (dgram)", + sendmsg_v6_preserve_dst_prog_load, + sendmsg_v6_preserve_dst_prog_destroy, + BPF_CGROUP_UDP6_SENDMSG, + &user_ops, + AF_INET6, + SOCK_DGRAM, + WILDCARD6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_PORT, + SRC6_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg6: sendmsg deny (dgram)", + sendmsg_v6_deny_prog_load, + sendmsg_v6_deny_prog_destroy, + BPF_CGROUP_UDP6_SENDMSG, + &user_ops, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg6: sendmsg IPv4-mapped IPv6 (dgram)", + sendmsg_v6_v4mapped_prog_load, + sendmsg_v6_v4mapped_prog_destroy, + BPF_CGROUP_UDP6_SENDMSG, + &user_ops, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SYSCALL_ENOTSUPP, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg6: sendmsg dst IP = [::] (BSD'ism) (dgram)", + sendmsg_v6_wildcard_prog_load, + sendmsg_v6_wildcard_prog_destroy, + BPF_CGROUP_UDP6_SENDMSG, + &user_ops, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg6: load prog with wrong expected attach type", + sendmsg_v6_prog_load, + sendmsg_v6_prog_destroy, + BPF_CGROUP_UDP4_SENDMSG, + &user_ops, + AF_INET6, + SOCK_DGRAM, + NULL, + 0, + NULL, + 0, + NULL, + LOAD_REJECT, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg6: attach prog with wrong attach type", + sendmsg_v6_prog_load_raw, + sendmsg_v6_prog_destroy_raw, + BPF_CGROUP_UDP4_SENDMSG, + &user_ops, + AF_INET6, + SOCK_DGRAM, + NULL, + 0, + NULL, + 0, + NULL, + ATTACH_REJECT, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg_unix: sendmsg (dgram)", + sendmsg_unix_prog_load, + sendmsg_unix_prog_destroy, + BPF_CGROUP_UNIX_SENDMSG, + &user_ops, + AF_UNIX, + SOCK_DGRAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg_unix: sendmsg deny (dgram)", + sendmsg_unix_deny_prog_load, + sendmsg_unix_deny_prog_destroy, + BPF_CGROUP_UNIX_SENDMSG, + &user_ops, + AF_UNIX, + SOCK_DGRAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg_unix: attach prog with wrong attach type", + sendmsg_unix_prog_load_raw, + sendmsg_unix_prog_destroy_raw, + BPF_CGROUP_UDP4_SENDMSG, + &user_ops, + AF_UNIX, + SOCK_DGRAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + ATTACH_REJECT, + }, + + /* sendmsg - kernel calls (sock_sendmsg) */ + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg4: sock_sendmsg (dgram)", + sendmsg_v4_prog_load, + sendmsg_v4_prog_destroy, + BPF_CGROUP_UDP4_SENDMSG, + &kern_ops_sock_sendmsg, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg4: sock_sendmsg deny (dgram)", + sendmsg_v4_deny_prog_load, + sendmsg_v4_deny_prog_destroy, + BPF_CGROUP_UDP4_SENDMSG, + &kern_ops_sock_sendmsg, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg6: sock_sendmsg (dgram)", + sendmsg_v6_prog_load, + sendmsg_v6_prog_destroy, + BPF_CGROUP_UDP6_SENDMSG, + &kern_ops_sock_sendmsg, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg6: sock_sendmsg [::] (BSD'ism) (dgram)", + sendmsg_v6_preserve_dst_prog_load, + sendmsg_v6_preserve_dst_prog_destroy, + BPF_CGROUP_UDP6_SENDMSG, + &kern_ops_sock_sendmsg, + AF_INET6, + SOCK_DGRAM, + WILDCARD6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_PORT, + SRC6_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg6: sock_sendmsg deny (dgram)", + sendmsg_v6_deny_prog_load, + sendmsg_v6_deny_prog_destroy, + BPF_CGROUP_UDP6_SENDMSG, + &kern_ops_sock_sendmsg, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg_unix: sock_sendmsg (dgram)", + sendmsg_unix_prog_load, + sendmsg_unix_prog_destroy, + BPF_CGROUP_UNIX_SENDMSG, + &kern_ops_sock_sendmsg, + AF_UNIX, + SOCK_DGRAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg_unix: sock_sendmsg deny (dgram)", + sendmsg_unix_deny_prog_load, + sendmsg_unix_deny_prog_destroy, + BPF_CGROUP_UNIX_SENDMSG, + &kern_ops_sock_sendmsg, + AF_UNIX, + SOCK_DGRAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + SYSCALL_EPERM, + }, + + /* sendmsg - kernel calls (kernel_sendmsg) */ + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg4: kernel_sendmsg (dgram)", + sendmsg_v4_prog_load, + sendmsg_v4_prog_destroy, + BPF_CGROUP_UDP4_SENDMSG, + &kern_ops_kernel_sendmsg, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg4: kernel_sendmsg deny (dgram)", + sendmsg_v4_deny_prog_load, + sendmsg_v4_deny_prog_destroy, + BPF_CGROUP_UDP4_SENDMSG, + &kern_ops_kernel_sendmsg, + AF_INET, + SOCK_DGRAM, + SERV4_IP, + SERV4_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SRC4_REWRITE_IP, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg6: kernel_sendmsg (dgram)", + sendmsg_v6_prog_load, + sendmsg_v6_prog_destroy, + BPF_CGROUP_UDP6_SENDMSG, + &kern_ops_kernel_sendmsg, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg6: kernel_sendmsg [::] (BSD'ism) (dgram)", + sendmsg_v6_preserve_dst_prog_load, + sendmsg_v6_preserve_dst_prog_destroy, + BPF_CGROUP_UDP6_SENDMSG, + &kern_ops_kernel_sendmsg, + AF_INET6, + SOCK_DGRAM, + WILDCARD6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_PORT, + SRC6_IP, + SUCCESS, }, { SOCK_ADDR_TEST_SENDMSG, - "sendmsg_unix", + "sendmsg6: kernel_sendmsg deny (dgram)", + sendmsg_v6_deny_prog_load, + sendmsg_v6_deny_prog_destroy, + BPF_CGROUP_UDP6_SENDMSG, + &kern_ops_kernel_sendmsg, + AF_INET6, + SOCK_DGRAM, + SERV6_IP, + SERV6_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SRC6_REWRITE_IP, + SYSCALL_EPERM, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg_unix: sock_sendmsg (dgram)", sendmsg_unix_prog_load, sendmsg_unix_prog_destroy, + BPF_CGROUP_UNIX_SENDMSG, + &kern_ops_kernel_sendmsg, AF_UNIX, SOCK_DGRAM, SERVUN_ADDRESS, @@ -189,12 +1651,97 @@ static struct sock_addr_test tests[] = { SERVUN_REWRITE_ADDRESS, 0, NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_SENDMSG, + "sendmsg_unix: kernel_sendmsg deny (dgram)", + sendmsg_unix_deny_prog_load, + sendmsg_unix_deny_prog_destroy, + BPF_CGROUP_UNIX_SENDMSG, + &kern_ops_kernel_sendmsg, + AF_UNIX, + SOCK_DGRAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + SYSCALL_EPERM, + }, + + /* recvmsg - system calls */ + { + SOCK_ADDR_TEST_RECVMSG, + "recvmsg4: recvfrom (dgram)", + recvmsg4_prog_load, + recvmsg4_prog_destroy, + BPF_CGROUP_UDP4_RECVMSG, + &user_ops, + AF_INET, + SOCK_DGRAM, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SERV4_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_RECVMSG, + "recvmsg4: attach prog with wrong attach type", + recvmsg4_prog_load_raw, + recvmsg4_prog_destroy_raw, + BPF_CGROUP_UDP6_RECVMSG, + &user_ops, + AF_INET, + SOCK_DGRAM, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SERV4_IP, + ATTACH_REJECT, }, { SOCK_ADDR_TEST_RECVMSG, - "recvmsg_unix-dgram", + "recvmsg6: recvfrom (dgram)", + recvmsg6_prog_load, + recvmsg6_prog_destroy, + BPF_CGROUP_UDP6_RECVMSG, + &user_ops, + AF_INET6, + SOCK_DGRAM, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SERV6_IP, + SUCCESS, + }, + { + SOCK_ADDR_TEST_RECVMSG, + "recvmsg6: attach prog with wrong attach type", + recvmsg6_prog_load_raw, + recvmsg6_prog_destroy_raw, + BPF_CGROUP_UDP4_RECVMSG, + &user_ops, + AF_INET6, + SOCK_DGRAM, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SERV6_IP, + ATTACH_REJECT, + }, + { + SOCK_ADDR_TEST_RECVMSG, + "recvmsg_unix: recvfrom (dgram)", recvmsg_unix_prog_load, recvmsg_unix_prog_destroy, + BPF_CGROUP_UNIX_RECVMSG, + &user_ops, AF_UNIX, SOCK_DGRAM, SERVUN_REWRITE_ADDRESS, @@ -202,12 +1749,15 @@ static struct sock_addr_test tests[] = { SERVUN_REWRITE_ADDRESS, 0, SERVUN_ADDRESS, + SUCCESS, }, { SOCK_ADDR_TEST_RECVMSG, - "recvmsg_unix-stream", + "recvmsg_unix: recvfrom (stream)", recvmsg_unix_prog_load, recvmsg_unix_prog_destroy, + BPF_CGROUP_UNIX_RECVMSG, + &user_ops, AF_UNIX, SOCK_STREAM, SERVUN_REWRITE_ADDRESS, @@ -215,12 +1765,227 @@ static struct sock_addr_test tests[] = { SERVUN_REWRITE_ADDRESS, 0, SERVUN_ADDRESS, + SUCCESS, + }, + { + SOCK_ADDR_TEST_RECVMSG, + "recvmsg_unix: attach prog with wrong attach type", + recvmsg_unix_prog_load_raw, + recvmsg_unix_prog_destroy_raw, + BPF_CGROUP_UDP4_RECVMSG, + &user_ops, + AF_INET6, + SOCK_STREAM, + SERVUN_REWRITE_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + SERVUN_ADDRESS, + ATTACH_REJECT, }, + + /* getsockname - system calls */ { SOCK_ADDR_TEST_GETSOCKNAME, - "getsockname_unix", + "getsockname4: getsockname (stream)", + getsockname_v4_prog_load, + getsockname_v4_prog_destroy, + BPF_CGROUP_INET4_GETSOCKNAME, + &user_ops, + AF_INET, + SOCK_STREAM, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SERV4_IP, + SERV4_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETSOCKNAME, + "getsockname4: getsockname (dgram)", + getsockname_v4_prog_load, + getsockname_v4_prog_destroy, + BPF_CGROUP_INET4_GETSOCKNAME, + &user_ops, + AF_INET, + SOCK_DGRAM, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SERV4_IP, + SERV4_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETSOCKNAME, + "getsockname4: attach prog with wrong attach type", + getsockname_v4_prog_load_raw, + getsockname_v4_prog_destroy_raw, + BPF_CGROUP_INET6_GETSOCKNAME, + &user_ops, + AF_INET, + SOCK_DGRAM, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SERV4_IP, + SERV4_PORT, + NULL, + ATTACH_REJECT, + }, + { + SOCK_ADDR_TEST_GETSOCKNAME, + "getsockname6: getsockname (stream)", + getsockname_v6_prog_load, + getsockname_v6_prog_destroy, + BPF_CGROUP_INET6_GETSOCKNAME, + &user_ops, + AF_INET6, + SOCK_STREAM, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SERV6_IP, + SERV6_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETSOCKNAME, + "getsockname6: getsockname (dgram)", + getsockname_v6_prog_load, + getsockname_v6_prog_destroy, + BPF_CGROUP_INET6_GETSOCKNAME, + &user_ops, + AF_INET6, + SOCK_DGRAM, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SERV6_IP, + SERV6_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETSOCKNAME, + "getsockname6: attach prog with wrong attach type", + getsockname_v6_prog_load_raw, + getsockname_v6_prog_destroy_raw, + BPF_CGROUP_INET4_GETSOCKNAME, + &user_ops, + AF_INET6, + SOCK_DGRAM, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SERV6_IP, + SERV6_PORT, + NULL, + ATTACH_REJECT, + }, + { + SOCK_ADDR_TEST_GETSOCKNAME, + "getsockname_unix: getsockname", + getsockname_unix_prog_load, + getsockname_unix_prog_destroy, + BPF_CGROUP_UNIX_GETSOCKNAME, + &user_ops, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETSOCKNAME, + "getsockname_unix: attach prog with wrong attach type", + getsockname_unix_prog_load_raw, + getsockname_unix_prog_destroy_raw, + BPF_CGROUP_INET4_GETSOCKNAME, + &user_ops, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + ATTACH_REJECT, + }, + + /* getsockname - kernel calls */ + { + SOCK_ADDR_TEST_GETSOCKNAME, + "getsockname4: kernel_getsockname (stream)", + getsockname_v4_prog_load, + getsockname_v4_prog_destroy, + BPF_CGROUP_INET4_GETSOCKNAME, + &kern_ops_kernel_sendmsg, + AF_INET, + SOCK_STREAM, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SERV4_IP, + SERV4_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETSOCKNAME, + "getsockname4: kernel_getsockname (dgram)", + getsockname_v4_prog_load, + getsockname_v4_prog_destroy, + BPF_CGROUP_INET4_GETSOCKNAME, + &kern_ops_kernel_sendmsg, + AF_INET, + SOCK_DGRAM, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SERV4_IP, + SERV4_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETSOCKNAME, + "getsockname6: kernel_getsockname (stream)", + getsockname_v6_prog_load, + getsockname_v6_prog_destroy, + BPF_CGROUP_INET6_GETSOCKNAME, + &kern_ops_kernel_sendmsg, + AF_INET6, + SOCK_STREAM, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SERV6_IP, + SERV6_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETSOCKNAME, + "getsockname6: kernel_getsockname (dgram)", + getsockname_v6_prog_load, + getsockname_v6_prog_destroy, + BPF_CGROUP_INET6_GETSOCKNAME, + &kern_ops_kernel_sendmsg, + AF_INET6, + SOCK_DGRAM, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SERV6_IP, + SERV6_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETSOCKNAME, + "getsockname_unix: kernel_getsockname", getsockname_unix_prog_load, getsockname_unix_prog_destroy, + BPF_CGROUP_UNIX_GETSOCKNAME, + &kern_ops_kernel_sendmsg, AF_UNIX, SOCK_STREAM, SERVUN_ADDRESS, @@ -228,12 +1993,113 @@ static struct sock_addr_test tests[] = { SERVUN_REWRITE_ADDRESS, 0, NULL, + SUCCESS, + }, + + /* getpeername - system calls */ + { + SOCK_ADDR_TEST_GETPEERNAME, + "getpeername4: getpeername (stream)", + getpeername_v4_prog_load, + getpeername_v4_prog_destroy, + BPF_CGROUP_INET4_GETPEERNAME, + &user_ops, + AF_INET, + SOCK_STREAM, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SERV4_IP, + SERV4_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETPEERNAME, + "getpeername4: getpeername (dgram)", + getpeername_v4_prog_load, + getpeername_v4_prog_destroy, + BPF_CGROUP_INET4_GETPEERNAME, + &user_ops, + AF_INET, + SOCK_DGRAM, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SERV4_IP, + SERV4_PORT, + NULL, + SUCCESS, }, { SOCK_ADDR_TEST_GETPEERNAME, - "getpeername_unix", + "getpeername4: attach prog with wrong attach type", + getpeername_v4_prog_load_raw, + getpeername_v4_prog_destroy_raw, + BPF_CGROUP_INET6_GETSOCKNAME, + &user_ops, + AF_UNIX, + SOCK_DGRAM, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SERV4_IP, + SERV4_PORT, + NULL, + ATTACH_REJECT, + }, + { + SOCK_ADDR_TEST_GETPEERNAME, + "getpeername6: getpeername (stream)", + getpeername_v6_prog_load, + getpeername_v6_prog_destroy, + BPF_CGROUP_INET6_GETPEERNAME, + &user_ops, + AF_INET6, + SOCK_STREAM, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SERV6_IP, + SERV6_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETPEERNAME, + "getpeername6: getpeername (dgram)", + getpeername_v6_prog_load, + getpeername_v6_prog_destroy, + BPF_CGROUP_INET6_GETPEERNAME, + &user_ops, + AF_INET6, + SOCK_DGRAM, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SERV6_IP, + SERV6_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETPEERNAME, + "getpeername6: attach prog with wrong attach type", + getpeername_v6_prog_load_raw, + getpeername_v6_prog_destroy_raw, + BPF_CGROUP_INET4_GETSOCKNAME, + &user_ops, + AF_INET6, + SOCK_DGRAM, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SERV6_IP, + SERV6_PORT, + NULL, + ATTACH_REJECT, + }, + { + SOCK_ADDR_TEST_GETPEERNAME, + "getpeername_unix: getpeername", getpeername_unix_prog_load, getpeername_unix_prog_destroy, + BPF_CGROUP_UNIX_GETPEERNAME, + &user_ops, AF_UNIX, SOCK_STREAM, SERVUN_ADDRESS, @@ -241,6 +2107,105 @@ static struct sock_addr_test tests[] = { SERVUN_REWRITE_ADDRESS, 0, NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETPEERNAME, + "getpeername_unix: attach prog with wrong attach type", + getpeername_unix_prog_load_raw, + getpeername_unix_prog_destroy_raw, + BPF_CGROUP_INET4_GETSOCKNAME, + &user_ops, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + ATTACH_REJECT, + }, + + /* getpeername - kernel calls */ + { + SOCK_ADDR_TEST_GETPEERNAME, + "getpeername4: kernel_getpeername (stream)", + getpeername_v4_prog_load, + getpeername_v4_prog_destroy, + BPF_CGROUP_INET4_GETPEERNAME, + &kern_ops_kernel_sendmsg, + AF_INET, + SOCK_STREAM, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SERV4_IP, + SERV4_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETPEERNAME, + "getpeername4: kernel_getpeername (dgram)", + getpeername_v4_prog_load, + getpeername_v4_prog_destroy, + BPF_CGROUP_INET4_GETPEERNAME, + &kern_ops_kernel_sendmsg, + AF_INET, + SOCK_DGRAM, + SERV4_REWRITE_IP, + SERV4_REWRITE_PORT, + SERV4_IP, + SERV4_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETPEERNAME, + "getpeername6: kernel_getpeername (stream)", + getpeername_v6_prog_load, + getpeername_v6_prog_destroy, + BPF_CGROUP_INET6_GETPEERNAME, + &kern_ops_kernel_sendmsg, + AF_INET6, + SOCK_STREAM, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SERV6_IP, + SERV6_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETPEERNAME, + "getpeername6: kernel_getpeername (dgram)", + getpeername_v6_prog_load, + getpeername_v6_prog_destroy, + BPF_CGROUP_INET6_GETPEERNAME, + &kern_ops_kernel_sendmsg, + AF_INET6, + SOCK_DGRAM, + SERV6_REWRITE_IP, + SERV6_REWRITE_PORT, + SERV6_IP, + SERV6_PORT, + NULL, + SUCCESS, + }, + { + SOCK_ADDR_TEST_GETPEERNAME, + "getpeername_unix: kernel_getpeername", + getpeername_unix_prog_load, + getpeername_unix_prog_destroy, + BPF_CGROUP_UNIX_GETPEERNAME, + &kern_ops_kernel_sendmsg, + AF_UNIX, + SOCK_STREAM, + SERVUN_ADDRESS, + 0, + SERVUN_REWRITE_ADDRESS, + 0, + NULL, + SUCCESS, }, }; @@ -294,28 +2259,40 @@ static int cmp_sock_addr(info_fn fn, int sock1, return cmp_addr(&addr1, len1, addr2, addr2_len, cmp_port); } -static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2, - socklen_t addr2_len, bool cmp_port) +static int load_sock_addr_kern(void) { - return cmp_sock_addr(getsockname, sock1, addr2, addr2_len, cmp_port); + int err; + + skel = sock_addr_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel")) + goto err; + + err = 0; + goto out; +err: + err = -1; +out: + return err; } -static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2, - socklen_t addr2_len, bool cmp_port) +static void unload_sock_addr_kern(void) { - return cmp_sock_addr(getpeername, sock1, addr2, addr2_len, cmp_port); + sock_addr_kern__destroy(skel); } -static void test_bind(struct sock_addr_test *test) +static int test_bind(struct sock_addr_test *test) { struct sockaddr_storage expected_addr; socklen_t expected_addr_len = sizeof(struct sockaddr_storage); int serv = -1, client = -1, err; - serv = start_server(test->socket_family, test->socket_type, - test->requested_addr, test->requested_port, 0); - if (!ASSERT_GE(serv, 0, "start_server")) - goto cleanup; + serv = test->ops->start_server(test->socket_family, test->socket_type, + test->requested_addr, + test->requested_port, 0); + if (serv < 0) { + err = errno; + goto err; + } err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port, @@ -323,23 +2300,28 @@ static void test_bind(struct sock_addr_test *test) if (!ASSERT_EQ(err, 0, "make_sockaddr")) goto cleanup; - err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true); + err = cmp_sock_addr(test->ops->getsockname, serv, &expected_addr, + expected_addr_len, true); if (!ASSERT_EQ(err, 0, "cmp_local_addr")) goto cleanup; /* Try to connect to server just in case */ - client = connect_to_addr(&expected_addr, expected_addr_len, test->socket_type); + client = connect_to_addr(test->socket_type, &expected_addr, expected_addr_len, NULL); if (!ASSERT_GE(client, 0, "connect_to_addr")) goto cleanup; cleanup: + err = 0; +err: if (client != -1) close(client); if (serv != -1) - close(serv); + test->ops->close(serv); + + return err; } -static void test_connect(struct sock_addr_test *test) +static int test_connect(struct sock_addr_test *test) { struct sockaddr_storage addr, expected_addr, expected_src_addr; socklen_t addr_len = sizeof(struct sockaddr_storage), @@ -357,9 +2339,12 @@ static void test_connect(struct sock_addr_test *test) if (!ASSERT_EQ(err, 0, "make_sockaddr")) goto cleanup; - client = connect_to_addr(&addr, addr_len, test->socket_type); - if (!ASSERT_GE(client, 0, "connect_to_addr")) - goto cleanup; + client = test->ops->connect_to_addr(test->socket_type, &addr, addr_len, + NULL); + if (client < 0) { + err = errno; + goto err; + } err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port, &expected_addr, &expected_addr_len); @@ -373,29 +2358,34 @@ static void test_connect(struct sock_addr_test *test) goto cleanup; } - err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true); + err = cmp_sock_addr(test->ops->getpeername, client, &expected_addr, + expected_addr_len, true); if (!ASSERT_EQ(err, 0, "cmp_peer_addr")) goto cleanup; if (test->expected_src_addr) { - err = cmp_local_addr(client, &expected_src_addr, expected_src_addr_len, false); + err = cmp_sock_addr(test->ops->getsockname, client, + &expected_src_addr, expected_src_addr_len, + false); if (!ASSERT_EQ(err, 0, "cmp_local_addr")) goto cleanup; } cleanup: + err = 0; +err: if (client != -1) - close(client); + test->ops->close(client); if (serv != -1) close(serv); + + return err; } -static void test_xmsg(struct sock_addr_test *test) +static int test_xmsg(struct sock_addr_test *test) { struct sockaddr_storage addr, src_addr; socklen_t addr_len = sizeof(struct sockaddr_storage), src_addr_len = sizeof(struct sockaddr_storage); - struct msghdr hdr; - struct iovec iov; char data = 'a'; int serv = -1, client = -1, err; @@ -408,7 +2398,7 @@ static void test_xmsg(struct sock_addr_test *test) if (!ASSERT_GE(serv, 0, "start_server")) goto cleanup; - client = socket(test->socket_family, test->socket_type, 0); + client = test->ops->socket(test->socket_family, test->socket_type, 0); if (!ASSERT_GE(client, 0, "socket")) goto cleanup; @@ -418,7 +2408,8 @@ static void test_xmsg(struct sock_addr_test *test) if (!ASSERT_EQ(err, 0, "make_sockaddr")) goto cleanup; - err = bind(client, (const struct sockaddr *) &src_addr, src_addr_len); + err = test->ops->bind(client, (struct sockaddr *)&src_addr, + src_addr_len); if (!ASSERT_OK(err, "bind")) goto cleanup; } @@ -429,17 +2420,13 @@ static void test_xmsg(struct sock_addr_test *test) goto cleanup; if (test->socket_type == SOCK_DGRAM) { - memset(&iov, 0, sizeof(iov)); - iov.iov_base = &data; - iov.iov_len = sizeof(data); - - memset(&hdr, 0, sizeof(hdr)); - hdr.msg_name = (void *)&addr; - hdr.msg_namelen = addr_len; - hdr.msg_iov = &iov; - hdr.msg_iovlen = 1; + err = test->ops->sendmsg(client, (struct sockaddr *)&addr, + addr_len, &data, sizeof(data)); + if (err < 0) { + err = errno; + goto err; + } - err = sendmsg(client, &hdr, 0); if (!ASSERT_EQ(err, sizeof(data), "sendmsg")) goto cleanup; } else { @@ -489,19 +2476,23 @@ static void test_xmsg(struct sock_addr_test *test) } cleanup: + err = 0; +err: if (client != -1) - close(client); + test->ops->close(client); if (serv != -1) close(serv); + + return err; } -static void test_getsockname(struct sock_addr_test *test) +static int test_getsockname(struct sock_addr_test *test) { struct sockaddr_storage expected_addr; socklen_t expected_addr_len = sizeof(struct sockaddr_storage); int serv = -1, err; - serv = start_server(test->socket_family, test->socket_type, + serv = test->ops->start_server(test->socket_family, test->socket_type, test->requested_addr, test->requested_port, 0); if (!ASSERT_GE(serv, 0, "start_server")) goto cleanup; @@ -512,16 +2503,18 @@ static void test_getsockname(struct sock_addr_test *test) if (!ASSERT_EQ(err, 0, "make_sockaddr")) goto cleanup; - err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true); + err = cmp_sock_addr(test->ops->getsockname, serv, &expected_addr, expected_addr_len, true); if (!ASSERT_EQ(err, 0, "cmp_local_addr")) goto cleanup; cleanup: if (serv != -1) - close(serv); + test->ops->close(serv); + + return 0; } -static void test_getpeername(struct sock_addr_test *test) +static int test_getpeername(struct sock_addr_test *test) { struct sockaddr_storage addr, expected_addr; socklen_t addr_len = sizeof(struct sockaddr_storage), @@ -538,7 +2531,8 @@ static void test_getpeername(struct sock_addr_test *test) if (!ASSERT_EQ(err, 0, "make_sockaddr")) goto cleanup; - client = connect_to_addr(&addr, addr_len, test->socket_type); + client = test->ops->connect_to_addr(test->socket_type, &addr, addr_len, + NULL); if (!ASSERT_GE(client, 0, "connect_to_addr")) goto cleanup; @@ -547,19 +2541,58 @@ static void test_getpeername(struct sock_addr_test *test) if (!ASSERT_EQ(err, 0, "make_sockaddr")) goto cleanup; - err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true); + err = cmp_sock_addr(test->ops->getpeername, client, &expected_addr, + expected_addr_len, true); if (!ASSERT_EQ(err, 0, "cmp_peer_addr")) goto cleanup; cleanup: if (client != -1) - close(client); + test->ops->close(client); if (serv != -1) close(serv); + + return 0; +} + +static int setup_test_env(struct nstoken **tok) +{ + int err; + + SYS_NOFAIL("ip netns delete %s", TEST_NS); + SYS(fail, "ip netns add %s", TEST_NS); + *tok = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(*tok, "netns token")) + goto fail; + + SYS(fail, "ip link add dev %s1 type veth peer name %s2", TEST_IF_PREFIX, + TEST_IF_PREFIX); + SYS(fail, "ip link set lo up"); + SYS(fail, "ip link set %s1 up", TEST_IF_PREFIX); + SYS(fail, "ip link set %s2 up", TEST_IF_PREFIX); + SYS(fail, "ip -4 addr add %s/8 dev %s1", TEST_IPV4, TEST_IF_PREFIX); + SYS(fail, "ip -6 addr add %s/128 nodad dev %s1", TEST_IPV6, TEST_IF_PREFIX); + + err = 0; + goto out; +fail: + err = -1; + close_netns(*tok); + *tok = NULL; + SYS_NOFAIL("ip netns delete %s", TEST_NS); +out: + return err; +} + +static void cleanup_test_env(struct nstoken *tok) +{ + close_netns(tok); + SYS_NOFAIL("ip netns delete %s", TEST_NS); } void test_sock_addr(void) { + struct nstoken *tok = NULL; int cgroup_fd = -1; void *skel; @@ -567,13 +2600,22 @@ void test_sock_addr(void) if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) goto cleanup; + if (!ASSERT_OK(setup_test_env(&tok), "setup_test_env")) + goto cleanup; + + if (!ASSERT_OK(load_sock_addr_kern(), "load_sock_addr_kern")) + goto cleanup; + for (size_t i = 0; i < ARRAY_SIZE(tests); ++i) { struct sock_addr_test *test = &tests[i]; + int err; if (!test__start_subtest(test->name)) continue; - skel = test->loadfn(cgroup_fd); + skel = test->loadfn(cgroup_fd, test->attach_type, + test->expected_result == LOAD_REJECT || + test->expected_result == ATTACH_REJECT); if (!skel) continue; @@ -583,30 +2625,39 @@ void test_sock_addr(void) * the future. */ case SOCK_ADDR_TEST_BIND: - test_bind(test); + err = test_bind(test); break; case SOCK_ADDR_TEST_CONNECT: - test_connect(test); + err = test_connect(test); break; case SOCK_ADDR_TEST_SENDMSG: case SOCK_ADDR_TEST_RECVMSG: - test_xmsg(test); + err = test_xmsg(test); break; case SOCK_ADDR_TEST_GETSOCKNAME: - test_getsockname(test); + err = test_getsockname(test); break; case SOCK_ADDR_TEST_GETPEERNAME: - test_getpeername(test); + err = test_getpeername(test); break; default: ASSERT_TRUE(false, "Unknown sock addr test type"); break; } + if (test->expected_result == SYSCALL_EPERM) + ASSERT_EQ(err, EPERM, "socket operation returns EPERM"); + else if (test->expected_result == SYSCALL_ENOTSUPP) + ASSERT_EQ(err, ENOTSUPP, "socket operation returns ENOTSUPP"); + else if (test->expected_result == SUCCESS) + ASSERT_OK(err, "socket operation succeeds"); + test->destroyfn(skel); } cleanup: + unload_sock_addr_kern(); + cleanup_test_env(tok); if (cgroup_fd >= 0) close(cgroup_fd); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 77e26ecffa..1337153eb0 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -131,6 +131,65 @@ out: test_skmsg_load_helpers__destroy(skel); } +static void test_skmsg_helpers_with_link(enum bpf_map_type map_type) +{ + struct bpf_program *prog, *prog_clone, *prog_clone2; + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, opts); + struct test_skmsg_load_helpers *skel; + struct bpf_link *link, *link2; + int err, map; + + skel = test_skmsg_load_helpers__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_skmsg_load_helpers__open_and_load")) + return; + + prog = skel->progs.prog_msg_verdict; + prog_clone = skel->progs.prog_msg_verdict_clone; + prog_clone2 = skel->progs.prog_msg_verdict_clone2; + map = bpf_map__fd(skel->maps.sock_map); + + link = bpf_program__attach_sockmap(prog, map); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap")) + goto out; + + /* Fail since bpf_link for the same prog has been created. */ + err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_MSG_VERDICT, 0); + if (!ASSERT_ERR(err, "bpf_prog_attach")) + goto out; + + /* Fail since bpf_link for the same prog type has been created. */ + link2 = bpf_program__attach_sockmap(prog_clone, map); + if (!ASSERT_ERR_PTR(link2, "bpf_program__attach_sockmap")) { + bpf_link__detach(link2); + goto out; + } + + err = bpf_link__update_program(link, prog_clone); + if (!ASSERT_OK(err, "bpf_link__update_program")) + goto out; + + /* Fail since a prog with different type attempts to do update. */ + err = bpf_link__update_program(link, skel->progs.prog_skb_verdict); + if (!ASSERT_ERR(err, "bpf_link__update_program")) + goto out; + + /* Fail since the old prog does not match the one in the kernel. */ + opts.old_prog_fd = bpf_program__fd(prog_clone2); + opts.flags = BPF_F_REPLACE; + err = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), &opts); + if (!ASSERT_ERR(err, "bpf_link_update")) + goto out; + + opts.old_prog_fd = bpf_program__fd(prog_clone); + opts.flags = BPF_F_REPLACE; + err = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), &opts); + if (!ASSERT_OK(err, "bpf_link_update")) + goto out; +out: + bpf_link__detach(link); + test_skmsg_load_helpers__destroy(skel); +} + static void test_sockmap_update(enum bpf_map_type map_type) { int err, prog, src; @@ -298,6 +357,40 @@ out: test_sockmap_skb_verdict_attach__destroy(skel); } +static void test_sockmap_skb_verdict_attach_with_link(void) +{ + struct test_sockmap_skb_verdict_attach *skel; + struct bpf_program *prog; + struct bpf_link *link; + int err, map; + + skel = test_sockmap_skb_verdict_attach__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + prog = skel->progs.prog_skb_verdict; + map = bpf_map__fd(skel->maps.sock_map); + link = bpf_program__attach_sockmap(prog, map); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap")) + goto out; + + bpf_link__detach(link); + + err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + + /* Fail since attaching with the same prog/map has been done. */ + link = bpf_program__attach_sockmap(prog, map); + if (!ASSERT_ERR_PTR(link, "bpf_program__attach_sockmap")) + bpf_link__detach(link); + + err = bpf_prog_detach2(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT); + if (!ASSERT_OK(err, "bpf_prog_detach2")) + goto out; +out: + test_sockmap_skb_verdict_attach__destroy(skel); +} + static __u32 query_prog_id(int prog_fd) { struct bpf_prog_info info = {}; @@ -475,30 +568,19 @@ out: test_sockmap_drop_prog__destroy(drop); } -static void test_sockmap_skb_verdict_peek(void) +static void test_sockmap_skb_verdict_peek_helper(int map) { - int err, map, verdict, s, c1, p1, zero = 0, sent, recvd, avail; - struct test_sockmap_pass_prog *pass; + int err, s, c1, p1, zero = 0, sent, recvd, avail; char snd[256] = "0123456789"; char rcv[256] = "0"; - pass = test_sockmap_pass_prog__open_and_load(); - if (!ASSERT_OK_PTR(pass, "open_and_load")) - return; - verdict = bpf_program__fd(pass->progs.prog_skb_verdict); - map = bpf_map__fd(pass->maps.sock_map_rx); - - err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); - if (!ASSERT_OK(err, "bpf_prog_attach")) - goto out; - s = socket_loopback(AF_INET, SOCK_STREAM); if (!ASSERT_GT(s, -1, "socket_loopback(s)")) - goto out; + return; err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1); if (!ASSERT_OK(err, "create_pairs(s)")) - goto out; + return; err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST); if (!ASSERT_OK(err, "bpf_map_update_elem(c1)")) @@ -520,7 +602,58 @@ static void test_sockmap_skb_verdict_peek(void) out_close: close(c1); close(p1); +} + +static void test_sockmap_skb_verdict_peek(void) +{ + struct test_sockmap_pass_prog *pass; + int err, map, verdict; + + pass = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(pass, "open_and_load")) + return; + verdict = bpf_program__fd(pass->progs.prog_skb_verdict); + map = bpf_map__fd(pass->maps.sock_map_rx); + + err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + + test_sockmap_skb_verdict_peek_helper(map); + +out: + test_sockmap_pass_prog__destroy(pass); +} + +static void test_sockmap_skb_verdict_peek_with_link(void) +{ + struct test_sockmap_pass_prog *pass; + struct bpf_program *prog; + struct bpf_link *link; + int err, map; + + pass = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(pass, "open_and_load")) + return; + prog = pass->progs.prog_skb_verdict; + map = bpf_map__fd(pass->maps.sock_map_rx); + link = bpf_program__attach_sockmap(prog, map); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap")) + goto out; + + err = bpf_link__update_program(link, pass->progs.prog_skb_verdict_clone); + if (!ASSERT_OK(err, "bpf_link__update_program")) + goto out; + + /* Fail since a prog with different attach type attempts to do update. */ + err = bpf_link__update_program(link, pass->progs.prog_skb_parser); + if (!ASSERT_ERR(err, "bpf_link__update_program")) + goto out; + + test_sockmap_skb_verdict_peek_helper(map); + ASSERT_EQ(pass->bss->clone_called, 1, "clone_called"); out: + bpf_link__detach(link); test_sockmap_pass_prog__destroy(pass); } @@ -788,6 +921,8 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT, BPF_SK_SKB_VERDICT); } + if (test__start_subtest("sockmap skb_verdict attach_with_link")) + test_sockmap_skb_verdict_attach_with_link(); if (test__start_subtest("sockmap msg_verdict progs query")) test_sockmap_progs_query(BPF_SK_MSG_VERDICT); if (test__start_subtest("sockmap stream_parser progs query")) @@ -804,6 +939,8 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_fionread(false); if (test__start_subtest("sockmap skb_verdict msg_f_peek")) test_sockmap_skb_verdict_peek(); + if (test__start_subtest("sockmap skb_verdict msg_f_peek with link")) + test_sockmap_skb_verdict_peek_with_link(); if (test__start_subtest("sockmap unconnected af_unix")) test_sockmap_unconnected_unix(); if (test__start_subtest("sockmap one socket to many map entries")) @@ -812,4 +949,8 @@ void test_sockmap_basic(void) test_sockmap_many_maps(); if (test__start_subtest("sockmap same socket replace")) test_sockmap_same_sock(); + if (test__start_subtest("sockmap sk_msg attach sockmap helpers with link")) + test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKMAP); + if (test__start_subtest("sockhash sk_msg attach sockhash helpers with link")) + test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index a92807bfcd..e91b593660 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -767,6 +767,24 @@ static void test_msg_redir_to_connected(struct test_sockmap_listen *skel, xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT); } +static void test_msg_redir_to_connected_with_link(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family, + int sotype) +{ + int prog_msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict); + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + int link_fd; + + link_fd = bpf_link_create(prog_msg_verdict, sock_map, BPF_SK_MSG_VERDICT, NULL); + if (!ASSERT_GE(link_fd, 0, "bpf_link_create")) + return; + + redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS); + + close(link_fd); +} + static void redir_to_listening(int family, int sotype, int sock_mapfd, int verd_mapfd, enum redir_mode mode) { @@ -869,6 +887,24 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel, xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT); } +static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family, + int sotype) +{ + struct bpf_program *verdict = skel->progs.prog_msg_verdict; + int verdict_map = bpf_map__fd(skel->maps.verdict_map); + int sock_map = bpf_map__fd(inner_map); + struct bpf_link *link; + + link = bpf_program__attach_sockmap(verdict, sock_map); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap")) + return; + + redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS); + + bpf_link__detach(link); +} + static void redir_partial(int family, int sotype, int sock_map, int parser_map) { int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1; @@ -1316,7 +1352,9 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, TEST(test_skb_redir_to_listening), TEST(test_skb_redir_partial), TEST(test_msg_redir_to_connected), + TEST(test_msg_redir_to_connected_with_link), TEST(test_msg_redir_to_listening), + TEST(test_msg_redir_to_listening_with_link), }; const char *family_name, *map_name; const struct redir_test *t; diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c index 5a4491d4ed..eaac83a7f3 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c @@ -24,6 +24,7 @@ enum sockopt_test_error { static struct sockopt_test { const char *descr; const struct bpf_insn insns[64]; + enum bpf_prog_type prog_type; enum bpf_attach_type attach_type; enum bpf_attach_type expected_attach_type; @@ -928,9 +929,40 @@ static struct sockopt_test { .error = EPERM_SETSOCKOPT, }, + + /* ==================== prog_type ==================== */ + + { + .descr = "can attach only BPF_CGROUP_SETSOCKOP", + .insns = { + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = 0, + .error = DENY_ATTACH, + }, + + { + .descr = "can attach only BPF_CGROUP_GETSOCKOP", + .insns = { + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + + }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .attach_type = BPF_CGROUP_GETSOCKOPT, + .expected_attach_type = 0, + .error = DENY_ATTACH, + }, }; static int load_prog(const struct bpf_insn *insns, + enum bpf_prog_type prog_type, enum bpf_attach_type expected_attach_type) { LIBBPF_OPTS(bpf_prog_load_opts, opts, @@ -947,7 +979,7 @@ static int load_prog(const struct bpf_insn *insns, } insns_cnt++; - fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCKOPT, NULL, "GPL", insns, insns_cnt, &opts); + fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, &opts); if (verbose && fd < 0) fprintf(stderr, "%s\n", bpf_log_buf); @@ -1036,13 +1068,18 @@ static int call_getsockopt(bool use_io_uring, int fd, int level, int optname, return getsockopt(fd, level, optname, optval, optlen); } -static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring) +static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring, + bool use_link) { - int sock_fd, err, prog_fd; + int prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT; + int sock_fd, err, prog_fd, link_fd = -1; void *optval = NULL; int ret = 0; - prog_fd = load_prog(test->insns, test->expected_attach_type); + if (test->prog_type) + prog_type = test->prog_type; + + prog_fd = load_prog(test->insns, prog_type, test->expected_attach_type); if (prog_fd < 0) { if (test->error == DENY_LOAD) return 0; @@ -1051,7 +1088,12 @@ static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring) return -1; } - err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0); + if (use_link) { + err = bpf_link_create(prog_fd, cgroup_fd, test->attach_type, NULL); + link_fd = err; + } else { + err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0); + } if (err < 0) { if (test->error == DENY_ATTACH) goto close_prog_fd; @@ -1142,7 +1184,12 @@ free_optval: close_sock_fd: close(sock_fd); detach_prog: - bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type); + if (use_link) { + if (link_fd >= 0) + close(link_fd); + } else { + bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type); + } close_prog_fd: close(prog_fd); return ret; @@ -1160,10 +1207,12 @@ void test_sockopt(void) if (!test__start_subtest(tests[i].descr)) continue; - ASSERT_OK(run_test(cgroup_fd, &tests[i], false), + ASSERT_OK(run_test(cgroup_fd, &tests[i], false, false), + tests[i].descr); + ASSERT_OK(run_test(cgroup_fd, &tests[i], false, true), tests[i].descr); if (tests[i].io_uring_support) - ASSERT_OK(run_test(cgroup_fd, &tests[i], true), + ASSERT_OK(run_test(cgroup_fd, &tests[i], true, false), tests[i].descr); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c index 917f486db8..1d3a20f01b 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> #include "cgroup_helpers.h" +#include "network_helpers.h" #include "sockopt_inherit.skel.h" @@ -9,35 +10,6 @@ #define CUSTOM_INHERIT2 1 #define CUSTOM_LISTENER 2 -static int connect_to_server(int server_fd) -{ - struct sockaddr_storage addr; - socklen_t len = sizeof(addr); - int fd; - - fd = socket(AF_INET, SOCK_STREAM, 0); - if (fd < 0) { - log_err("Failed to create client socket"); - return -1; - } - - if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { - log_err("Failed to get server addr"); - goto out; - } - - if (connect(fd, (const struct sockaddr *)&addr, len) < 0) { - log_err("Fail to connect to server"); - goto out; - } - - return fd; - -out: - close(fd); - return -1; -} - static int verify_sockopt(int fd, int optname, const char *msg, char expected) { socklen_t optlen = 1; @@ -98,47 +70,36 @@ static void *server_thread(void *arg) return (void *)(long)err; } -static int start_server(void) +static int custom_cb(int fd, const struct post_socket_opts *opts) { - struct sockaddr_in addr = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - }; char buf; int err; - int fd; int i; - fd = socket(AF_INET, SOCK_STREAM, 0); - if (fd < 0) { - log_err("Failed to create server socket"); - return -1; - } - for (i = CUSTOM_INHERIT1; i <= CUSTOM_LISTENER; i++) { buf = 0x01; err = setsockopt(fd, SOL_CUSTOM, i, &buf, 1); if (err) { log_err("Failed to call setsockopt(%d)", i); - close(fd); return -1; } } - if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) { - log_err("Failed to bind socket"); - close(fd); - return -1; - } - - return fd; + return 0; } static void run_test(int cgroup_fd) { struct bpf_link *link_getsockopt = NULL; struct bpf_link *link_setsockopt = NULL; + struct network_helper_opts opts = { + .post_socket_cb = custom_cb, + }; int server_fd = -1, client_fd; + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + }; struct sockopt_inherit *obj; void *server_err; pthread_t tid; @@ -160,7 +121,8 @@ static void run_test(int cgroup_fd) if (!ASSERT_OK_PTR(link_setsockopt, "cg-attach-setsockopt")) goto close_bpf_object; - server_fd = start_server(); + server_fd = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr, + sizeof(addr), &opts); if (!ASSERT_GE(server_fd, 0, "start_server")) goto close_bpf_object; @@ -173,7 +135,7 @@ static void run_test(int cgroup_fd) pthread_cond_wait(&server_started, &server_started_mtx); pthread_mutex_unlock(&server_started_mtx); - client_fd = connect_to_server(server_fd); + client_fd = connect_to_fd(server_fd, 0); if (!ASSERT_GE(client_fd, 0, "connect_to_server")) goto close_server_fd; diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index 5db9eec24b..0832fd7874 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -35,7 +35,7 @@ retry: pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu 0 */, -1 /* group id */, 0 /* flags */); - if (pmu_fd < 0 && errno == ENOENT) { + if (pmu_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) { printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); test__skip(); goto cleanup; diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c index 15ee7b2fc4..b913572002 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c @@ -73,6 +73,16 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, "up primary"); ASSERT_OK(system("ip addr add dev " netkit_name " 10.0.0.1/24"), "addr primary"); + + if (mode == NETKIT_L3) { + ASSERT_EQ(system("ip link set dev " netkit_name + " addr ee:ff:bb:cc:aa:dd 2> /dev/null"), 512, + "set hwaddress"); + } else { + ASSERT_OK(system("ip link set dev " netkit_name + " addr ee:ff:bb:cc:aa:dd"), + "set hwaddress"); + } if (same_netns) { ASSERT_OK(system("ip link set dev " netkit_peer " up"), "up peer"); @@ -89,6 +99,16 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, return err; } +static void move_netkit(void) +{ + ASSERT_OK(system("ip link set " netkit_peer " netns foo"), + "move peer"); + ASSERT_OK(system("ip netns exec foo ip link set dev " + netkit_peer " up"), "up peer"); + ASSERT_OK(system("ip netns exec foo ip addr add dev " + netkit_peer " 10.0.0.2/24"), "addr peer"); +} + static void destroy_netkit(void) { ASSERT_OK(system("ip link del dev " netkit_name), "del primary"); @@ -685,3 +705,77 @@ void serial_test_tc_netkit_neigh_links(void) serial_test_tc_netkit_neigh_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY); serial_test_tc_netkit_neigh_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY); } + +static void serial_test_tc_netkit_pkt_type_mode(int mode) +{ + LIBBPF_OPTS(bpf_netkit_opts, optl_nk); + LIBBPF_OPTS(bpf_tcx_opts, optl_tcx); + int err, ifindex, ifindex2; + struct test_tc_link *skel; + struct bpf_link *link; + + err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, + &ifindex, true); + if (err) + return; + + ifindex2 = if_nametoindex(netkit_peer); + ASSERT_NEQ(ifindex, ifindex2, "ifindex_1_2"); + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, + BPF_NETKIT_PRIMARY), 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc7, + BPF_TCX_INGRESS), 0, "tc7_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); + assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 0); + + link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl_nk); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1); + assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 0); + + link = bpf_program__attach_tcx(skel->progs.tc7, ifindex2, &optl_tcx); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc7 = link; + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1); + assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 1); + + move_netkit(); + + tc_skel_reset_all_seen(skel); + skel->bss->set_type = true; + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc7, true, "seen_tc7"); + + ASSERT_EQ(skel->bss->seen_host, true, "seen_host"); + ASSERT_EQ(skel->bss->seen_mcast, true, "seen_mcast"); +cleanup: + test_tc_link__destroy(skel); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); + destroy_netkit(); +} + +void serial_test_tc_netkit_pkt_type(void) +{ + serial_test_tc_netkit_pkt_type_mode(NETKIT_L2); + serial_test_tc_netkit_pkt_type_mode(NETKIT_L3); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index dbe06aeaa2..b1073d36d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -530,7 +530,7 @@ static int wait_netstamp_needed_key(void) __u64 tstamp = 0; nstoken = open_netns(NS_DST); - if (!nstoken) + if (!ASSERT_OK_PTR(nstoken, "setns dst")) return -1; srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0); diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index 8fe84da1b9..f2b99d95d9 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -10,6 +10,9 @@ struct tcp_rtt_storage { __u32 delivered; __u32 delivered_ce; __u32 icsk_retransmits; + + __u32 mrtt_us; /* args[0] */ + __u32 srtt; /* args[1] */ }; static void send_byte(int fd) @@ -83,6 +86,17 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked, err++; } + /* Precise values of mrtt and srtt are unavailable, just make sure they are nonzero */ + if (val.mrtt_us == 0) { + log_err("%s: unexpected bpf_tcp_sock.args[0] (mrtt_us) %u == 0", msg, val.mrtt_us); + err++; + } + + if (val.srtt == 0) { + log_err("%s: unexpected bpf_tcp_sock.args[1] (srtt) %u == 0", msg, val.srtt); + err++; + } + return err; } diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c index ee5372c7f2..29e183a80f 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c @@ -4,6 +4,8 @@ #include <time.h> #include "struct_ops_module.skel.h" +#include "struct_ops_nulled_out_cb.skel.h" +#include "struct_ops_forgotten_cb.skel.h" static void check_map_info(struct bpf_map_info *info) { @@ -66,6 +68,7 @@ static void test_struct_ops_load(void) * auto-loading, or it will fail to load. */ bpf_program__set_autoload(skel->progs.test_2, false); + bpf_map__set_autocreate(skel->maps.testmod_zeroed, false); err = struct_ops_module__load(skel); if (!ASSERT_OK(err, "struct_ops_module_load")) @@ -93,9 +96,163 @@ cleanup: struct_ops_module__destroy(skel); } +static void test_struct_ops_not_zeroed(void) +{ + struct struct_ops_module *skel; + int err; + + /* zeroed is 0, and zeroed_op is null */ + skel = struct_ops_module__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_module_open")) + return; + + skel->struct_ops.testmod_zeroed->zeroed = 0; + /* zeroed_op prog should be not loaded automatically now */ + skel->struct_ops.testmod_zeroed->zeroed_op = NULL; + + err = struct_ops_module__load(skel); + ASSERT_OK(err, "struct_ops_module_load"); + + struct_ops_module__destroy(skel); + + /* zeroed is not 0 */ + skel = struct_ops_module__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_module_open_not_zeroed")) + return; + + /* libbpf should reject the testmod_zeroed since struct + * bpf_testmod_ops in the kernel has no "zeroed" field and the + * value of "zeroed" is non-zero. + */ + skel->struct_ops.testmod_zeroed->zeroed = 0xdeadbeef; + skel->struct_ops.testmod_zeroed->zeroed_op = NULL; + err = struct_ops_module__load(skel); + ASSERT_ERR(err, "struct_ops_module_load_not_zeroed"); + + struct_ops_module__destroy(skel); + + /* zeroed_op is not null */ + skel = struct_ops_module__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_module_open_not_zeroed_op")) + return; + + /* libbpf should reject the testmod_zeroed since the value of its + * "zeroed_op" is not null. + */ + skel->struct_ops.testmod_zeroed->zeroed_op = skel->progs.test_3; + err = struct_ops_module__load(skel); + ASSERT_ERR(err, "struct_ops_module_load_not_zeroed_op"); + + struct_ops_module__destroy(skel); +} + +/* The signature of an implementation might not match the signature of the + * function pointer prototype defined in the BPF program. This mismatch + * should be allowed as long as the behavior of the operator program + * adheres to the signature in the kernel. Libbpf should not enforce the + * signature; rather, let the kernel verifier handle the enforcement. + */ +static void test_struct_ops_incompatible(void) +{ + struct struct_ops_module *skel; + struct bpf_link *link; + int err; + + skel = struct_ops_module__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_module_open")) + return; + + bpf_map__set_autocreate(skel->maps.testmod_zeroed, false); + + err = struct_ops_module__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_incompatible); + if (ASSERT_OK_PTR(link, "attach_struct_ops")) + bpf_link__destroy(link); + +cleanup: + struct_ops_module__destroy(skel); +} + +/* validate that it's ok to "turn off" callback that kernel supports */ +static void test_struct_ops_nulled_out_cb(void) +{ + struct struct_ops_nulled_out_cb *skel; + int err; + + skel = struct_ops_nulled_out_cb__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + /* kernel knows about test_1, but we still null it out */ + skel->struct_ops.ops->test_1 = NULL; + + err = struct_ops_nulled_out_cb__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + ASSERT_FALSE(bpf_program__autoload(skel->progs.test_1_turn_off), "prog_autoload"); + ASSERT_LT(bpf_program__fd(skel->progs.test_1_turn_off), 0, "prog_fd"); + +cleanup: + struct_ops_nulled_out_cb__destroy(skel); +} + +/* validate that libbpf generates reasonable error message if struct_ops is + * not referenced in any struct_ops map + */ +static void test_struct_ops_forgotten_cb(void) +{ + struct struct_ops_forgotten_cb *skel; + char *log; + int err; + + skel = struct_ops_forgotten_cb__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + start_libbpf_log_capture(); + + err = struct_ops_forgotten_cb__load(skel); + if (!ASSERT_ERR(err, "skel_load")) + goto cleanup; + + log = stop_libbpf_log_capture(); + ASSERT_HAS_SUBSTR(log, + "prog 'test_1_forgotten': SEC(\"struct_ops\") program isn't referenced anywhere, did you forget to use it?", + "libbpf_log"); + free(log); + + struct_ops_forgotten_cb__destroy(skel); + + /* now let's programmatically use it, we should be fine now */ + skel = struct_ops_forgotten_cb__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + skel->struct_ops.ops->test_1 = skel->progs.test_1_forgotten; /* not anymore */ + + err = struct_ops_forgotten_cb__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + +cleanup: + struct_ops_forgotten_cb__destroy(skel); +} + void serial_test_struct_ops_module(void) { - if (test__start_subtest("test_struct_ops_load")) + if (test__start_subtest("struct_ops_load")) test_struct_ops_load(); + if (test__start_subtest("struct_ops_not_zeroed")) + test_struct_ops_not_zeroed(); + if (test__start_subtest("struct_ops_incompatible")) + test_struct_ops_incompatible(); + if (test__start_subtest("struct_ops_null_out_cb")) + test_struct_ops_nulled_out_cb(); + if (test__start_subtest("struct_ops_forgotten_cb")) + test_struct_ops_forgotten_cb(); } diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index 5f1fb0a2ea..cec746e77c 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -612,6 +612,8 @@ static void test_ipip_tunnel(enum ipip_encap encap) /* ping from at_ns0 namespace test */ nstoken = open_netns("at_ns0"); + if (!ASSERT_OK_PTR(nstoken, "setns")) + goto done; err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); if (!ASSERT_OK(err, "test_ping")) goto done; @@ -666,6 +668,8 @@ static void test_xfrm_tunnel(void) /* ping from at_ns0 namespace test */ nstoken = open_netns("at_ns0"); + if (!ASSERT_OK_PTR(nstoken, "setns")) + goto done; err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); close_netns(nstoken); if (!ASSERT_OK(err, "test_ping")) diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c new file mode 100644 index 0000000000..871d16cb95 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <sched.h> +#include <test_progs.h> +#include <pthread.h> +#include <network_helpers.h> + +#include "timer_lockup.skel.h" + +static long cpu; +static int *timer1_err; +static int *timer2_err; +static bool skip; + +volatile int k = 0; + +static void *timer_lockup_thread(void *arg) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1000, + ); + int i, prog_fd = *(int *)arg; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset); + ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), + &cpuset), + "cpu affinity"); + + for (i = 0; !READ_ONCE(*timer1_err) && !READ_ONCE(*timer2_err); i++) { + bpf_prog_test_run_opts(prog_fd, &opts); + /* Skip the test if we can't reproduce the race in a reasonable + * amount of time. + */ + if (i > 50) { + WRITE_ONCE(skip, true); + break; + } + } + + return NULL; +} + +void test_timer_lockup(void) +{ + int timer1_prog, timer2_prog; + struct timer_lockup *skel; + pthread_t thrds[2]; + void *ret; + + skel = timer_lockup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load")) + return; + + timer1_prog = bpf_program__fd(skel->progs.timer1_prog); + timer2_prog = bpf_program__fd(skel->progs.timer2_prog); + + timer1_err = &skel->bss->timer1_err; + timer2_err = &skel->bss->timer2_err; + + if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread, + &timer1_prog), + "pthread_create thread1")) + goto out; + if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread, + &timer2_prog), + "pthread_create thread2")) { + pthread_exit(&thrds[0]); + goto out; + } + + pthread_join(thrds[1], &ret); + pthread_join(thrds[0], &ret); + + if (skip) { + test__skip(); + goto out; + } + + if (*timer1_err != -EDEADLK && *timer1_err != 0) + ASSERT_FAIL("timer1_err bad value"); + if (*timer2_err != -EDEADLK && *timer2_err != 0) + ASSERT_FAIL("timer2_err bad value"); +out: + timer_lockup__destroy(skel); + return; +} diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c index 7b9124d506..e56e88596d 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c @@ -5,18 +5,19 @@ #include "trace_printk.lskel.h" -#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" -#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" #define SEARCHMSG "testing,testing" +static void trace_pipe_cb(const char *str, void *data) +{ + if (strstr(str, SEARCHMSG) != NULL) + (*(int *)data)++; +} + void serial_test_trace_printk(void) { struct trace_printk_lskel__bss *bss; - int err = 0, iter = 0, found = 0; struct trace_printk_lskel *skel; - char *buf = NULL; - FILE *fp = NULL; - size_t buflen; + int err = 0, found = 0; skel = trace_printk_lskel__open(); if (!ASSERT_OK_PTR(skel, "trace_printk__open")) @@ -35,16 +36,6 @@ void serial_test_trace_printk(void) if (!ASSERT_OK(err, "trace_printk__attach")) goto cleanup; - if (access(TRACEFS_PIPE, F_OK) == 0) - fp = fopen(TRACEFS_PIPE, "r"); - else - fp = fopen(DEBUGFS_PIPE, "r"); - if (!ASSERT_OK_PTR(fp, "fopen(TRACE_PIPE)")) - goto cleanup; - - /* We do not want to wait forever if this test fails... */ - fcntl(fileno(fp), F_SETFL, O_NONBLOCK); - /* wait for tracepoint to trigger */ usleep(1); trace_printk_lskel__detach(skel); @@ -56,21 +47,12 @@ void serial_test_trace_printk(void) goto cleanup; /* verify our search string is in the trace buffer */ - while (getline(&buf, &buflen, fp) >= 0 || errno == EAGAIN) { - if (strstr(buf, SEARCHMSG) != NULL) - found++; - if (found == bss->trace_printk_ran) - break; - if (++iter > 1000) - break; - } + ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, &found, 1000), + "read_trace_pipe_iter"); if (!ASSERT_EQ(found, bss->trace_printk_ran, "found")) goto cleanup; cleanup: trace_printk_lskel__destroy(skel); - free(buf); - if (fp) - fclose(fp); } diff --git a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c index 44ea2fd88f..2af6a6f209 100644 --- a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c +++ b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c @@ -5,18 +5,19 @@ #include "trace_vprintk.lskel.h" -#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" -#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" #define SEARCHMSG "1,2,3,4,5,6,7,8,9,10" +static void trace_pipe_cb(const char *str, void *data) +{ + if (strstr(str, SEARCHMSG) != NULL) + (*(int *)data)++; +} + void serial_test_trace_vprintk(void) { struct trace_vprintk_lskel__bss *bss; - int err = 0, iter = 0, found = 0; struct trace_vprintk_lskel *skel; - char *buf = NULL; - FILE *fp = NULL; - size_t buflen; + int err = 0, found = 0; skel = trace_vprintk_lskel__open_and_load(); if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load")) @@ -28,16 +29,6 @@ void serial_test_trace_vprintk(void) if (!ASSERT_OK(err, "trace_vprintk__attach")) goto cleanup; - if (access(TRACEFS_PIPE, F_OK) == 0) - fp = fopen(TRACEFS_PIPE, "r"); - else - fp = fopen(DEBUGFS_PIPE, "r"); - if (!ASSERT_OK_PTR(fp, "fopen(TRACE_PIPE)")) - goto cleanup; - - /* We do not want to wait forever if this test fails... */ - fcntl(fileno(fp), F_SETFL, O_NONBLOCK); - /* wait for tracepoint to trigger */ usleep(1); trace_vprintk_lskel__detach(skel); @@ -49,14 +40,8 @@ void serial_test_trace_vprintk(void) goto cleanup; /* verify our search string is in the trace buffer */ - while (getline(&buf, &buflen, fp) >= 0 || errno == EAGAIN) { - if (strstr(buf, SEARCHMSG) != NULL) - found++; - if (found == bss->trace_vprintk_ran) - break; - if (++iter > 1000) - break; - } + ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, &found, 1000), + "read_trace_pipe_iter"); if (!ASSERT_EQ(found, bss->trace_vprintk_ran, "found")) goto cleanup; @@ -66,7 +51,4 @@ void serial_test_trace_vprintk(void) cleanup: trace_vprintk_lskel__destroy(skel); - free(buf); - if (fp) - fclose(fp); } diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c index 38fda42fd7..bf6ca8e3eb 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -1,12 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 #include <unistd.h> +#include <pthread.h> #include <test_progs.h> #include "uprobe_multi.skel.h" #include "uprobe_multi_bench.skel.h" #include "uprobe_multi_usdt.skel.h" #include "bpf/libbpf_internal.h" #include "testing_helpers.h" +#include "../sdt.h" static char test_data[] = "test_data"; @@ -25,9 +27,17 @@ noinline void uprobe_multi_func_3(void) asm volatile (""); } +noinline void usdt_trigger(void) +{ + STAP_PROBE(test, pid_filter_usdt); +} + struct child { int go[2]; + int c2p[2]; /* child -> parent channel */ int pid; + int tid; + pthread_t thread; }; static void release_child(struct child *child) @@ -38,6 +48,10 @@ static void release_child(struct child *child) return; close(child->go[1]); close(child->go[0]); + if (child->thread) + pthread_join(child->thread, NULL); + close(child->c2p[0]); + close(child->c2p[1]); if (child->pid > 0) waitpid(child->pid, &child_status, 0); } @@ -63,7 +77,7 @@ static struct child *spawn_child(void) if (pipe(child.go)) return NULL; - child.pid = fork(); + child.pid = child.tid = fork(); if (child.pid < 0) { release_child(&child); errno = EINVAL; @@ -82,6 +96,7 @@ static struct child *spawn_child(void) uprobe_multi_func_1(); uprobe_multi_func_2(); uprobe_multi_func_3(); + usdt_trigger(); exit(errno); } @@ -89,6 +104,67 @@ static struct child *spawn_child(void) return &child; } +static void *child_thread(void *ctx) +{ + struct child *child = ctx; + int c = 0, err; + + child->tid = syscall(SYS_gettid); + + /* let parent know we are ready */ + err = write(child->c2p[1], &c, 1); + if (err != 1) + pthread_exit(&err); + + /* wait for parent's kick */ + err = read(child->go[0], &c, 1); + if (err != 1) + pthread_exit(&err); + + uprobe_multi_func_1(); + uprobe_multi_func_2(); + uprobe_multi_func_3(); + usdt_trigger(); + + err = 0; + pthread_exit(&err); +} + +static struct child *spawn_thread(void) +{ + static struct child child; + int c, err; + + /* pipe to notify child to execute the trigger functions */ + if (pipe(child.go)) + return NULL; + /* pipe to notify parent that child thread is ready */ + if (pipe(child.c2p)) { + close(child.go[0]); + close(child.go[1]); + return NULL; + } + + child.pid = getpid(); + + err = pthread_create(&child.thread, NULL, child_thread, &child); + if (err) { + err = -errno; + close(child.go[0]); + close(child.go[1]); + close(child.c2p[0]); + close(child.c2p[1]); + errno = -err; + return NULL; + } + + err = read(child.c2p[0], &c, 1); + if (!ASSERT_EQ(err, 1, "child_thread_ready")) + return NULL; + + return &child; +} + static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child) { skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1; @@ -103,15 +179,23 @@ static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child * passed at the probe attach. */ skel->bss->pid = child ? 0 : getpid(); + skel->bss->expect_pid = child ? child->pid : 0; + + /* trigger all probes, if we are testing child *process*, just to make + * sure that PID filtering doesn't let through activations from wrong + * PIDs; when we test child *thread*, we don't want to do this to + * avoid double counting number of triggering events + */ + if (!child || !child->thread) { + uprobe_multi_func_1(); + uprobe_multi_func_2(); + uprobe_multi_func_3(); + usdt_trigger(); + } if (child) kick_child(child); - /* trigger all probes */ - uprobe_multi_func_1(); - uprobe_multi_func_2(); - uprobe_multi_func_3(); - /* * There are 2 entry and 2 exit probe called for each uprobe_multi_func_[123] * function and each slepable probe (6) increments uprobe_multi_sleep_result. @@ -126,8 +210,12 @@ static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 6, "uprobe_multi_sleep_result"); - if (child) + ASSERT_FALSE(skel->bss->bad_pid_seen, "bad_pid_seen"); + + if (child) { ASSERT_EQ(skel->bss->child_pid, child->pid, "uprobe_multi_child_pid"); + ASSERT_EQ(skel->bss->child_tid, child->tid, "uprobe_multi_child_tid"); + } } static void test_skel_api(void) @@ -190,8 +278,24 @@ __test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_mul if (!ASSERT_OK_PTR(skel->links.uprobe_extra, "bpf_program__attach_uprobe_multi")) goto cleanup; + /* Attach (uprobe-backed) USDTs */ + skel->links.usdt_pid = bpf_program__attach_usdt(skel->progs.usdt_pid, pid, binary, + "test", "pid_filter_usdt", NULL); + if (!ASSERT_OK_PTR(skel->links.usdt_pid, "attach_usdt_pid")) + goto cleanup; + + skel->links.usdt_extra = bpf_program__attach_usdt(skel->progs.usdt_extra, -1, binary, + "test", "pid_filter_usdt", NULL); + if (!ASSERT_OK_PTR(skel->links.usdt_extra, "attach_usdt_extra")) + goto cleanup; + uprobe_multi_test_run(skel, child); + ASSERT_FALSE(skel->bss->bad_pid_seen_usdt, "bad_pid_seen_usdt"); + if (child) { + ASSERT_EQ(skel->bss->child_pid_usdt, child->pid, "usdt_multi_child_pid"); + ASSERT_EQ(skel->bss->child_tid_usdt, child->tid, "usdt_multi_child_tid"); + } cleanup: uprobe_multi__destroy(skel); } @@ -210,6 +314,13 @@ test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi return; __test_attach_api(binary, pattern, opts, child); + + /* pid filter (thread) */ + child = spawn_thread(); + if (!ASSERT_OK_PTR(child, "spawn_thread")) + return; + + __test_attach_api(binary, pattern, opts, child); } static void test_attach_api_pattern(void) @@ -495,6 +606,13 @@ static void test_link_api(void) return; __test_link_api(child); + + /* pid filter (thread) */ + child = spawn_thread(); + if (!ASSERT_OK_PTR(child, "spawn_thread")) + return; + + __test_link_api(child); } static void test_bench_attach_uprobe(void) diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index c4f9f30664..98ef39efa7 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -53,6 +53,7 @@ #include "verifier_movsx.skel.h" #include "verifier_netfilter_ctx.skel.h" #include "verifier_netfilter_retcode.skel.h" +#include "verifier_or_jmp32_k.skel.h" #include "verifier_precision.skel.h" #include "verifier_prevent_map_lookup.skel.h" #include "verifier_raw_stack.skel.h" @@ -66,6 +67,8 @@ #include "verifier_sdiv.skel.h" #include "verifier_search_pruning.skel.h" #include "verifier_sock.skel.h" +#include "verifier_sock_addr.skel.h" +#include "verifier_sockmap_mutate.skel.h" #include "verifier_spill_fill.skel.h" #include "verifier_spin_lock.skel.h" #include "verifier_stack_ptr.skel.h" @@ -168,6 +171,7 @@ void test_verifier_meta_access(void) { RUN(verifier_meta_access); } void test_verifier_movsx(void) { RUN(verifier_movsx); } void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); } void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); } +void test_verifier_or_jmp32_k(void) { RUN(verifier_or_jmp32_k); } void test_verifier_precision(void) { RUN(verifier_precision); } void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } @@ -181,6 +185,8 @@ void test_verifier_scalar_ids(void) { RUN(verifier_scalar_ids); } void test_verifier_sdiv(void) { RUN(verifier_sdiv); } void test_verifier_search_pruning(void) { RUN(verifier_search_pruning); } void test_verifier_sock(void) { RUN(verifier_sock); } +void test_verifier_sock_addr(void) { RUN(verifier_sock_addr); } +void test_verifier_sockmap_mutate(void) { RUN(verifier_sockmap_mutate); } void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); } void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); } void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_kfunc_prog_types.c b/tools/testing/selftests/bpf/prog_tests/verifier_kfunc_prog_types.c new file mode 100644 index 0000000000..3918ecc2ee --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/verifier_kfunc_prog_types.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> + +#include "verifier_kfunc_prog_types.skel.h" + +void test_verifier_kfunc_prog_types(void) +{ + RUN_TESTS(verifier_kfunc_prog_types); +} diff --git a/tools/testing/selftests/bpf/prog_tests/wq.c b/tools/testing/selftests/bpf/prog_tests/wq.c new file mode 100644 index 0000000000..99e438fe12 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/wq.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Benjamin Tissoires */ +#include <test_progs.h> +#include "wq.skel.h" +#include "wq_failures.skel.h" + +void serial_test_wq(void) +{ + struct wq *wq_skel = NULL; + int err, prog_fd; + + LIBBPF_OPTS(bpf_test_run_opts, topts); + + RUN_TESTS(wq); + + /* re-run the success test to check if the timer was actually executed */ + + wq_skel = wq__open_and_load(); + if (!ASSERT_OK_PTR(wq_skel, "wq_skel_load")) + return; + + err = wq__attach(wq_skel); + if (!ASSERT_OK(err, "wq_attach")) + return; + + prog_fd = bpf_program__fd(wq_skel->progs.test_syscall_array_sleepable); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + ASSERT_EQ(topts.retval, 0, "test_run"); + + usleep(50); /* 10 usecs should be enough, but give it extra */ + + ASSERT_EQ(wq_skel->bss->ok_sleepable, (1 << 1), "ok_sleepable"); + wq__destroy(wq_skel); +} + +void serial_test_failures_wq(void) +{ + RUN_TESTS(wq_failures); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index f09505f8b0..53d6ad8c22 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -222,7 +222,7 @@ static void test_xdp_adjust_frags_tail_grow(void) prog = bpf_object__next_program(obj, NULL); if (bpf_object__load(obj)) - return; + goto out; prog_fd = bpf_program__fd(prog); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 05edcf32f5..f76b5d67a3 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -384,6 +384,8 @@ void test_xdp_metadata(void) SYS(out, "ip netns add " RX_NETNS_NAME); tok = open_netns(TX_NETNS_NAME); + if (!ASSERT_OK_PTR(tok, "setns")) + goto out; SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1"); SYS(out, "ip link set " RX_NAME " netns " RX_NETNS_NAME); @@ -400,6 +402,8 @@ void test_xdp_metadata(void) SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME_VLAN); switch_ns_to_rx(&tok); + if (!ASSERT_OK_PTR(tok, "setns rx")) + goto out; SYS(out, "ip link set dev " RX_NAME " address " RX_MAC); SYS(out, "ip link set dev " RX_NAME " up"); @@ -449,6 +453,8 @@ void test_xdp_metadata(void) goto out; switch_ns_to_tx(&tok); + if (!ASSERT_OK_PTR(tok, "setns tx")) + goto out; /* Setup separate AF_XDP for TX interface nad send packet to the RX socket. */ tx_ifindex = if_nametoindex(TX_NAME); @@ -461,6 +467,8 @@ void test_xdp_metadata(void) goto out; switch_ns_to_rx(&tok); + if (!ASSERT_OK_PTR(tok, "setns rx")) + goto out; /* Verify packet sent from AF_XDP has proper metadata. */ if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, true), 0, @@ -468,6 +476,8 @@ void test_xdp_metadata(void) goto out; switch_ns_to_tx(&tok); + if (!ASSERT_OK_PTR(tok, "setns tx")) + goto out; complete_tx(&tx_xsk); /* Now check metadata of packet, generated with network stack */ @@ -475,6 +485,8 @@ void test_xdp_metadata(void) goto out; switch_ns_to_rx(&tok); + if (!ASSERT_OK_PTR(tok, "setns rx")) + goto out; if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, false), 0, "verify_xsk_metadata")) @@ -498,6 +510,8 @@ void test_xdp_metadata(void) goto out; switch_ns_to_tx(&tok); + if (!ASSERT_OK_PTR(tok, "setns tx")) + goto out; /* Send packet to trigger . */ if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, @@ -505,6 +519,8 @@ void test_xdp_metadata(void) goto out; switch_ns_to_rx(&tok); + if (!ASSERT_OK_PTR(tok, "setns rx")) + goto out; while (!retries--) { if (bpf_obj2->bss->called) diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c new file mode 100644 index 0000000000..55f1056320 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/arena_atomics.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> +#include "bpf_arena_common.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARENA); + __uint(map_flags, BPF_F_MMAPABLE); + __uint(max_entries, 10); /* number of pages */ +#ifdef __TARGET_ARCH_arm64 + __ulong(map_extra, 0x1ull << 32); /* start of mmap() region */ +#else + __ulong(map_extra, 0x1ull << 44); /* start of mmap() region */ +#endif +} arena SEC(".maps"); + +#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST) +bool skip_tests __attribute((__section__(".data"))) = false; +#else +bool skip_tests = true; +#endif + +__u32 pid = 0; + +#undef __arena +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) +#define __arena __attribute__((address_space(1))) +#else +#define __arena SEC(".addr_space.1") +#endif + +__u64 __arena add64_value = 1; +__u64 __arena add64_result = 0; +__u32 __arena add32_value = 1; +__u32 __arena add32_result = 0; +__u64 __arena add_stack_value_copy = 0; +__u64 __arena add_stack_result = 0; +__u64 __arena add_noreturn_value = 1; + +SEC("raw_tp/sys_enter") +int add(const void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; +#ifdef ENABLE_ATOMICS_TESTS + __u64 add_stack_value = 1; + + add64_result = __sync_fetch_and_add(&add64_value, 2); + add32_result = __sync_fetch_and_add(&add32_value, 2); + add_stack_result = __sync_fetch_and_add(&add_stack_value, 2); + add_stack_value_copy = add_stack_value; + __sync_fetch_and_add(&add_noreturn_value, 2); +#endif + + return 0; +} + +__s64 __arena sub64_value = 1; +__s64 __arena sub64_result = 0; +__s32 __arena sub32_value = 1; +__s32 __arena sub32_result = 0; +__s64 __arena sub_stack_value_copy = 0; +__s64 __arena sub_stack_result = 0; +__s64 __arena sub_noreturn_value = 1; + +SEC("raw_tp/sys_enter") +int sub(const void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; +#ifdef ENABLE_ATOMICS_TESTS + __u64 sub_stack_value = 1; + + sub64_result = __sync_fetch_and_sub(&sub64_value, 2); + sub32_result = __sync_fetch_and_sub(&sub32_value, 2); + sub_stack_result = __sync_fetch_and_sub(&sub_stack_value, 2); + sub_stack_value_copy = sub_stack_value; + __sync_fetch_and_sub(&sub_noreturn_value, 2); +#endif + + return 0; +} + +__u64 __arena and64_value = (0x110ull << 32); +__u32 __arena and32_value = 0x110; + +SEC("raw_tp/sys_enter") +int and(const void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; +#ifdef ENABLE_ATOMICS_TESTS + + __sync_fetch_and_and(&and64_value, 0x011ull << 32); + __sync_fetch_and_and(&and32_value, 0x011); +#endif + + return 0; +} + +__u32 __arena or32_value = 0x110; +__u64 __arena or64_value = (0x110ull << 32); + +SEC("raw_tp/sys_enter") +int or(const void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; +#ifdef ENABLE_ATOMICS_TESTS + __sync_fetch_and_or(&or64_value, 0x011ull << 32); + __sync_fetch_and_or(&or32_value, 0x011); +#endif + + return 0; +} + +__u64 __arena xor64_value = (0x110ull << 32); +__u32 __arena xor32_value = 0x110; + +SEC("raw_tp/sys_enter") +int xor(const void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; +#ifdef ENABLE_ATOMICS_TESTS + __sync_fetch_and_xor(&xor64_value, 0x011ull << 32); + __sync_fetch_and_xor(&xor32_value, 0x011); +#endif + + return 0; +} + +__u32 __arena cmpxchg32_value = 1; +__u32 __arena cmpxchg32_result_fail = 0; +__u32 __arena cmpxchg32_result_succeed = 0; +__u64 __arena cmpxchg64_value = 1; +__u64 __arena cmpxchg64_result_fail = 0; +__u64 __arena cmpxchg64_result_succeed = 0; + +SEC("raw_tp/sys_enter") +int cmpxchg(const void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; +#ifdef ENABLE_ATOMICS_TESTS + cmpxchg64_result_fail = __sync_val_compare_and_swap(&cmpxchg64_value, 0, 3); + cmpxchg64_result_succeed = __sync_val_compare_and_swap(&cmpxchg64_value, 1, 2); + + cmpxchg32_result_fail = __sync_val_compare_and_swap(&cmpxchg32_value, 0, 3); + cmpxchg32_result_succeed = __sync_val_compare_and_swap(&cmpxchg32_value, 1, 2); +#endif + + return 0; +} + +__u64 __arena xchg64_value = 1; +__u64 __arena xchg64_result = 0; +__u32 __arena xchg32_value = 1; +__u32 __arena xchg32_result = 0; + +SEC("raw_tp/sys_enter") +int xchg(const void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; +#ifdef ENABLE_ATOMICS_TESTS + __u64 val64 = 2; + __u32 val32 = 2; + + xchg64_result = __sync_lock_test_and_set(&xchg64_value, val64); + xchg32_result = __sync_lock_test_and_set(&xchg32_value, val32); +#endif + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c index c0422c58ce..93bd0600eb 100644 --- a/tools/testing/selftests/bpf/progs/arena_list.c +++ b/tools/testing/selftests/bpf/progs/arena_list.c @@ -49,7 +49,7 @@ int arena_list_add(void *ctx) list_head = &global_head; - for (i = zero; i < cnt; cond_break, i++) { + for (i = zero; i < cnt && can_loop; i++) { struct elem __arena *n = bpf_alloc(sizeof(*n)); test_val++; diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c index a487f60b73..b7ddf8ec4e 100644 --- a/tools/testing/selftests/bpf/progs/bind4_prog.c +++ b/tools/testing/selftests/bpf/progs/bind4_prog.c @@ -12,6 +12,8 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include "bind_prog.h" + #define SERV4_IP 0xc0a801feU /* 192.168.1.254 */ #define SERV4_PORT 4040 #define SERV4_REWRITE_IP 0x7f000001U /* 127.0.0.1 */ @@ -118,23 +120,23 @@ int bind_v4_prog(struct bpf_sock_addr *ctx) // u8 narrow loads: user_ip4 = 0; - user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[0] << 0; - user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[1] << 8; - user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[2] << 16; - user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[3] << 24; + user_ip4 |= load_byte(ctx->user_ip4, 0, sizeof(user_ip4)); + user_ip4 |= load_byte(ctx->user_ip4, 1, sizeof(user_ip4)); + user_ip4 |= load_byte(ctx->user_ip4, 2, sizeof(user_ip4)); + user_ip4 |= load_byte(ctx->user_ip4, 3, sizeof(user_ip4)); if (ctx->user_ip4 != user_ip4) return 0; user_port = 0; - user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0; - user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8; + user_port |= load_byte(ctx->user_port, 0, sizeof(user_port)); + user_port |= load_byte(ctx->user_port, 1, sizeof(user_port)); if (ctx->user_port != user_port) return 0; // u16 narrow loads: user_ip4 = 0; - user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[0] << 0; - user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[1] << 16; + user_ip4 |= load_word(ctx->user_ip4, 0, sizeof(user_ip4)); + user_ip4 |= load_word(ctx->user_ip4, 1, sizeof(user_ip4)); if (ctx->user_ip4 != user_ip4) return 0; @@ -156,4 +158,10 @@ int bind_v4_prog(struct bpf_sock_addr *ctx) return 1; } +SEC("cgroup/bind4") +int bind_v4_deny_prog(struct bpf_sock_addr *ctx) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c index d62cd9e9cf..501c3fc11d 100644 --- a/tools/testing/selftests/bpf/progs/bind6_prog.c +++ b/tools/testing/selftests/bpf/progs/bind6_prog.c @@ -12,6 +12,8 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include "bind_prog.h" + #define SERV6_IP_0 0xfaceb00c /* face:b00c:1234:5678::abcd */ #define SERV6_IP_1 0x12345678 #define SERV6_IP_2 0x00000000 @@ -129,25 +131,25 @@ int bind_v6_prog(struct bpf_sock_addr *ctx) // u8 narrow loads: for (i = 0; i < 4; i++) { user_ip6 = 0; - user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[0] << 0; - user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[1] << 8; - user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[2] << 16; - user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[3] << 24; + user_ip6 |= load_byte(ctx->user_ip6[i], 0, sizeof(user_ip6)); + user_ip6 |= load_byte(ctx->user_ip6[i], 1, sizeof(user_ip6)); + user_ip6 |= load_byte(ctx->user_ip6[i], 2, sizeof(user_ip6)); + user_ip6 |= load_byte(ctx->user_ip6[i], 3, sizeof(user_ip6)); if (ctx->user_ip6[i] != user_ip6) return 0; } user_port = 0; - user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0; - user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8; + user_port |= load_byte(ctx->user_port, 0, sizeof(user_port)); + user_port |= load_byte(ctx->user_port, 1, sizeof(user_port)); if (ctx->user_port != user_port) return 0; // u16 narrow loads: for (i = 0; i < 4; i++) { user_ip6 = 0; - user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[0] << 0; - user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[1] << 16; + user_ip6 |= load_word(ctx->user_ip6[i], 0, sizeof(user_ip6)); + user_ip6 |= load_word(ctx->user_ip6[i], 1, sizeof(user_ip6)); if (ctx->user_ip6[i] != user_ip6) return 0; } @@ -173,4 +175,10 @@ int bind_v6_prog(struct bpf_sock_addr *ctx) return 1; } +SEC("cgroup/bind6") +int bind_v6_deny_prog(struct bpf_sock_addr *ctx) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bind_prog.h b/tools/testing/selftests/bpf/progs/bind_prog.h new file mode 100644 index 0000000000..e830caa940 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bind_prog.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __BIND_PROG_H__ +#define __BIND_PROG_H__ + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define load_byte(src, b, s) \ + (((volatile __u8 *)&(src))[b] << 8 * b) +#define load_word(src, w, s) \ + (((volatile __u16 *)&(src))[w] << 16 * w) +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define load_byte(src, b, s) \ + (((volatile __u8 *)&(src))[(b) + (sizeof(src) - (s))] << 8 * ((s) - (b) - 1)) +#define load_word(src, w, s) \ + (((volatile __u16 *)&(src))[w] << 16 * (((s) / 2) - (w) - 1)) +#else +# error "Fix your compiler's __BYTE_ORDER__?!" +#endif + +#endif diff --git a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c new file mode 100644 index 0000000000..1654a530aa --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* Highlights: + * 1. The major difference between this bpf program and tcp_cubic.c + * is that this bpf program relies on `cong_control` rather than + * `cong_avoid` in the struct tcp_congestion_ops. + * 2. Logic such as tcp_cwnd_reduction, tcp_cong_avoid, and + * tcp_update_pacing_rate is bypassed when `cong_control` is + * defined, so moving these logic to `cong_control`. + * 3. WARNING: This bpf program is NOT the same as tcp_cubic.c. + * The main purpose is to show use cases of the arguments in + * `cong_control`. For simplicity's sake, it reuses tcp cubic's + * kernel functions. + */ + +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#define USEC_PER_SEC 1000000UL +#define TCP_PACING_SS_RATIO (200) +#define TCP_PACING_CA_RATIO (120) +#define TCP_REORDERING (12) + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define max(a, b) ((a) > (b) ? (a) : (b)) +#define after(seq2, seq1) before(seq1, seq2) + +extern void cubictcp_init(struct sock *sk) __ksym; +extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym; +extern __u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym; +extern void cubictcp_state(struct sock *sk, __u8 new_state) __ksym; +extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym; +extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym; +extern void cubictcp_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym; + +static bool before(__u32 seq1, __u32 seq2) +{ + return (__s32)(seq1-seq2) < 0; +} + +static __u64 div64_u64(__u64 dividend, __u64 divisor) +{ + return dividend / divisor; +} + +static void tcp_update_pacing_rate(struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + __u64 rate; + + /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ + rate = (__u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3); + + /* current rate is (cwnd * mss) / srtt + * In Slow Start [1], set sk_pacing_rate to 200 % the current rate. + * In Congestion Avoidance phase, set it to 120 % the current rate. + * + * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh) + * If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching + * end of slow start and should slow down. + */ + if (tp->snd_cwnd < tp->snd_ssthresh / 2) + rate *= TCP_PACING_SS_RATIO; + else + rate *= TCP_PACING_CA_RATIO; + + rate *= max(tp->snd_cwnd, tp->packets_out); + + if (tp->srtt_us) + rate = div64_u64(rate, (__u64)tp->srtt_us); + + sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate); +} + +static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, + int newly_lost, int flag) +{ + struct tcp_sock *tp = tcp_sk(sk); + int sndcnt = 0; + __u32 pkts_in_flight = tp->packets_out - (tp->sacked_out + tp->lost_out) + tp->retrans_out; + int delta = tp->snd_ssthresh - pkts_in_flight; + + if (newly_acked_sacked <= 0 || !tp->prior_cwnd) + return; + + __u32 prr_delivered = tp->prr_delivered + newly_acked_sacked; + + if (delta < 0) { + __u64 dividend = + (__u64)tp->snd_ssthresh * prr_delivered + tp->prior_cwnd - 1; + sndcnt = (__u32)div64_u64(dividend, (__u64)tp->prior_cwnd) - tp->prr_out; + } else { + sndcnt = max(prr_delivered - tp->prr_out, newly_acked_sacked); + if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost) + sndcnt++; + sndcnt = min(delta, sndcnt); + } + /* Force a fast retransmit upon entering fast recovery */ + sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1)); + tp->snd_cwnd = pkts_in_flight + sndcnt; +} + +/* Decide wheather to run the increase function of congestion control. */ +static bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) +{ + if (tcp_sk(sk)->reordering > TCP_REORDERING) + return flag & FLAG_FORWARD_PROGRESS; + + return flag & FLAG_DATA_ACKED; +} + +SEC("struct_ops") +void BPF_PROG(bpf_cubic_init, struct sock *sk) +{ + cubictcp_init(sk); +} + +SEC("struct_ops") +void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event) +{ + cubictcp_cwnd_event(sk, event); +} + +SEC("struct_ops") +void BPF_PROG(bpf_cubic_cong_control, struct sock *sk, __u32 ack, int flag, + const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (((1<<TCP_CA_CWR) | (1<<TCP_CA_Recovery)) & + (1 << inet_csk(sk)->icsk_ca_state)) { + /* Reduce cwnd if state mandates */ + tcp_cwnd_reduction(sk, rs->acked_sacked, rs->losses, flag); + + if (!before(tp->snd_una, tp->high_seq)) { + /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ + if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH && + inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) { + tp->snd_cwnd = tp->snd_ssthresh; + tp->snd_cwnd_stamp = tcp_jiffies32; + } + } + } else if (tcp_may_raise_cwnd(sk, flag)) { + /* Advance cwnd if state allows */ + cubictcp_cong_avoid(sk, ack, rs->acked_sacked); + tp->snd_cwnd_stamp = tcp_jiffies32; + } + + tcp_update_pacing_rate(sk); +} + +SEC("struct_ops") +__u32 BPF_PROG(bpf_cubic_recalc_ssthresh, struct sock *sk) +{ + return cubictcp_recalc_ssthresh(sk); +} + +SEC("struct_ops") +void BPF_PROG(bpf_cubic_state, struct sock *sk, __u8 new_state) +{ + cubictcp_state(sk, new_state); +} + +SEC("struct_ops") +void BPF_PROG(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample) +{ + cubictcp_acked(sk, sample); +} + +SEC("struct_ops") +__u32 BPF_PROG(bpf_cubic_undo_cwnd, struct sock *sk) +{ + return tcp_reno_undo_cwnd(sk); +} + +SEC(".struct_ops") +struct tcp_congestion_ops cc_cubic = { + .init = (void *)bpf_cubic_init, + .ssthresh = (void *)bpf_cubic_recalc_ssthresh, + .cong_control = (void *)bpf_cubic_cong_control, + .set_state = (void *)bpf_cubic_state, + .undo_cwnd = (void *)bpf_cubic_undo_cwnd, + .cwnd_event = (void *)bpf_cubic_cwnd_event, + .pkts_acked = (void *)bpf_cubic_acked, + .name = "bpf_cc_cubic", +}; + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index c997e3e3d3..d665b8a15c 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -14,14 +14,22 @@ * "ca->ack_cnt / delta" operation. */ -#include <linux/bpf.h> -#include <linux/stddef.h> -#include <linux/tcp.h> -#include "bpf_tcp_helpers.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define max(a, b) ((a) > (b) ? (a) : (b)) +static bool before(__u32 seq1, __u32 seq2) +{ + return (__s32)(seq1-seq2) < 0; +} +#define after(seq2, seq1) before(seq1, seq2) + +extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym; +extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym; #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation * max_cwnd = snd_cwnd * beta @@ -70,7 +78,7 @@ static const __u64 cube_factor = (__u64)(1ull << (10+3*BICTCP_HZ)) / (bic_scale * 10); /* BIC TCP Parameters */ -struct bictcp { +struct bpf_bictcp { __u32 cnt; /* increase cwnd by 1 after ACKs */ __u32 last_max_cwnd; /* last maximum snd_cwnd */ __u32 last_cwnd; /* the last snd_cwnd */ @@ -91,7 +99,7 @@ struct bictcp { __u32 curr_rtt; /* the minimum rtt of current round */ }; -static inline void bictcp_reset(struct bictcp *ca) +static void bictcp_reset(struct bpf_bictcp *ca) { ca->cnt = 0; ca->last_max_cwnd = 0; @@ -112,7 +120,7 @@ extern unsigned long CONFIG_HZ __kconfig; #define USEC_PER_SEC 1000000UL #define USEC_PER_JIFFY (USEC_PER_SEC / HZ) -static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor) +static __u64 div64_u64(__u64 dividend, __u64 divisor) { return dividend / divisor; } @@ -120,7 +128,7 @@ static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor) #define div64_ul div64_u64 #define BITS_PER_U64 (sizeof(__u64) * 8) -static __always_inline int fls64(__u64 x) +static int fls64(__u64 x) { int num = BITS_PER_U64 - 1; @@ -153,15 +161,15 @@ static __always_inline int fls64(__u64 x) return num + 1; } -static __always_inline __u32 bictcp_clock_us(const struct sock *sk) +static __u32 bictcp_clock_us(const struct sock *sk) { return tcp_sk(sk)->tcp_mstamp; } -static __always_inline void bictcp_hystart_reset(struct sock *sk) +static void bictcp_hystart_reset(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); + struct bpf_bictcp *ca = inet_csk_ca(sk); ca->round_start = ca->last_ack = bictcp_clock_us(sk); ca->end_seq = tp->snd_nxt; @@ -169,11 +177,10 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk) ca->sample_cnt = 0; } -/* "struct_ops/" prefix is a requirement */ -SEC("struct_ops/bpf_cubic_init") +SEC("struct_ops") void BPF_PROG(bpf_cubic_init, struct sock *sk) { - struct bictcp *ca = inet_csk_ca(sk); + struct bpf_bictcp *ca = inet_csk_ca(sk); bictcp_reset(ca); @@ -184,12 +191,11 @@ void BPF_PROG(bpf_cubic_init, struct sock *sk) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } -/* "struct_ops" prefix is a requirement */ -SEC("struct_ops/bpf_cubic_cwnd_event") +SEC("struct_ops") void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_TX_START) { - struct bictcp *ca = inet_csk_ca(sk); + struct bpf_bictcp *ca = inet_csk_ca(sk); __u32 now = tcp_jiffies32; __s32 delta; @@ -230,7 +236,7 @@ static const __u8 v[] = { * Newton-Raphson iteration. * Avg err ~= 0.195% */ -static __always_inline __u32 cubic_root(__u64 a) +static __u32 cubic_root(__u64 a) { __u32 x, b, shift; @@ -263,8 +269,7 @@ static __always_inline __u32 cubic_root(__u64 a) /* * Compute congestion window to use. */ -static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd, - __u32 acked) +static void bictcp_update(struct bpf_bictcp *ca, __u32 cwnd, __u32 acked) { __u32 delta, bic_target, max_cnt; __u64 offs, t; @@ -377,11 +382,11 @@ tcp_friendliness: ca->cnt = max(ca->cnt, 2U); } -/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */ -void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) +SEC("struct_ops") +void BPF_PROG(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) { struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); + struct bpf_bictcp *ca = inet_csk_ca(sk); if (!tcp_is_cwnd_limited(sk)) return; @@ -397,10 +402,11 @@ void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acke tcp_cong_avoid_ai(tp, ca->cnt, acked); } -__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk) +SEC("struct_ops") +__u32 BPF_PROG(bpf_cubic_recalc_ssthresh, struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); + struct bpf_bictcp *ca = inet_csk_ca(sk); ca->epoch_start = 0; /* end of epoch */ @@ -414,7 +420,8 @@ __u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk) return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); } -void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state) +SEC("struct_ops") +void BPF_PROG(bpf_cubic_state, struct sock *sk, __u8 new_state) { if (new_state == TCP_CA_Loss) { bictcp_reset(inet_csk_ca(sk)); @@ -433,7 +440,7 @@ void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state) * We apply another 100% factor because @rate is doubled at this point. * We cap the cushion to 1ms. */ -static __always_inline __u32 hystart_ack_delay(struct sock *sk) +static __u32 hystart_ack_delay(struct sock *sk) { unsigned long rate; @@ -444,10 +451,10 @@ static __always_inline __u32 hystart_ack_delay(struct sock *sk) div64_ul((__u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate)); } -static __always_inline void hystart_update(struct sock *sk, __u32 delay) +static void hystart_update(struct sock *sk, __u32 delay) { struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); + struct bpf_bictcp *ca = inet_csk_ca(sk); __u32 threshold; if (hystart_detect & HYSTART_ACK_TRAIN) { @@ -492,11 +499,11 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay) int bpf_cubic_acked_called = 0; -void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk, - const struct ack_sample *sample) +SEC("struct_ops") +void BPF_PROG(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample) { const struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); + struct bpf_bictcp *ca = inet_csk_ca(sk); __u32 delay; bpf_cubic_acked_called = 1; @@ -524,7 +531,8 @@ void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk, extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym; -__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk) +SEC("struct_ops") +__u32 BPF_PROG(bpf_cubic_undo_cwnd, struct sock *sk) { return tcp_reno_undo_cwnd(sk); } diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index 460682759a..3c9ffe3403 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -6,15 +6,23 @@ * the kernel BPF logic. */ -#include <stddef.h> -#include <linux/bpf.h> -#include <linux/types.h> -#include <linux/stddef.h> -#include <linux/tcp.h> -#include <errno.h> +#include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "bpf_tcp_helpers.h" + +#ifndef EBUSY +#define EBUSY 16 +#endif +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define max(a, b) ((a) > (b) ? (a) : (b)) +#define min_not_zero(x, y) ({ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) +static bool before(__u32 seq1, __u32 seq2) +{ + return (__s32)(seq1-seq2) < 0; +} char _license[] SEC("license") = "GPL"; @@ -35,7 +43,7 @@ struct { #define DCTCP_MAX_ALPHA 1024U -struct dctcp { +struct bpf_dctcp { __u32 old_delivered; __u32 old_delivered_ce; __u32 prior_rcv_nxt; @@ -48,8 +56,7 @@ struct dctcp { static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */ static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA; -static __always_inline void dctcp_reset(const struct tcp_sock *tp, - struct dctcp *ca) +static void dctcp_reset(const struct tcp_sock *tp, struct bpf_dctcp *ca) { ca->next_seq = tp->snd_nxt; @@ -57,11 +64,11 @@ static __always_inline void dctcp_reset(const struct tcp_sock *tp, ca->old_delivered_ce = tp->delivered_ce; } -SEC("struct_ops/dctcp_init") +SEC("struct_ops") void BPF_PROG(dctcp_init, struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - struct dctcp *ca = inet_csk_ca(sk); + struct bpf_dctcp *ca = inet_csk_ca(sk); int *stg; if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) { @@ -104,21 +111,21 @@ void BPF_PROG(dctcp_init, struct sock *sk) dctcp_reset(tp, ca); } -SEC("struct_ops/dctcp_ssthresh") +SEC("struct_ops") __u32 BPF_PROG(dctcp_ssthresh, struct sock *sk) { - struct dctcp *ca = inet_csk_ca(sk); + struct bpf_dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); ca->loss_cwnd = tp->snd_cwnd; return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); } -SEC("struct_ops/dctcp_update_alpha") +SEC("struct_ops") void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags) { const struct tcp_sock *tp = tcp_sk(sk); - struct dctcp *ca = inet_csk_ca(sk); + struct bpf_dctcp *ca = inet_csk_ca(sk); /* Expired RTT */ if (!before(tp->snd_una, ca->next_seq)) { @@ -144,16 +151,16 @@ void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags) } } -static __always_inline void dctcp_react_to_loss(struct sock *sk) +static void dctcp_react_to_loss(struct sock *sk) { - struct dctcp *ca = inet_csk_ca(sk); + struct bpf_dctcp *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); ca->loss_cwnd = tp->snd_cwnd; tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U); } -SEC("struct_ops/dctcp_state") +SEC("struct_ops") void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state) { if (new_state == TCP_CA_Recovery && @@ -164,7 +171,7 @@ void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state) */ } -static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state) +static void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state) { struct tcp_sock *tp = tcp_sk(sk); @@ -179,9 +186,8 @@ static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state) * S: 0 <- last pkt was non-CE * 1 <- last pkt was CE */ -static __always_inline -void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt, - __u32 *prior_rcv_nxt, __u32 *ce_state) +static void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt, + __u32 *prior_rcv_nxt, __u32 *ce_state) { __u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0; @@ -201,10 +207,10 @@ void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt, dctcp_ece_ack_cwr(sk, new_ce_state); } -SEC("struct_ops/dctcp_cwnd_event") +SEC("struct_ops") void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev) { - struct dctcp *ca = inet_csk_ca(sk); + struct bpf_dctcp *ca = inet_csk_ca(sk); switch (ev) { case CA_EVENT_ECN_IS_CE: @@ -220,17 +226,17 @@ void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev) } } -SEC("struct_ops/dctcp_cwnd_undo") +SEC("struct_ops") __u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk) { - const struct dctcp *ca = inet_csk_ca(sk); + const struct bpf_dctcp *ca = inet_csk_ca(sk); return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); } extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym; -SEC("struct_ops/dctcp_reno_cong_avoid") +SEC("struct_ops") void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) { tcp_reno_cong_avoid(sk, ack, acked); diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c index d836f7c372..c91763f248 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c @@ -1,19 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ -#include <stddef.h> -#include <linux/bpf.h> -#include <linux/types.h> -#include <linux/stddef.h> -#include <linux/tcp.h> +#include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include "bpf_tcp_helpers.h" char _license[] SEC("license") = "GPL"; const char cubic[] = "cubic"; -void BPF_STRUCT_OPS(dctcp_nouse_release, struct sock *sk) +SEC("struct_ops") +void BPF_PROG(dctcp_nouse_release, struct sock *sk) { bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION, (void *)cubic, sizeof(cubic)); diff --git a/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c b/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c index 2ecd833dcd..8a7a4c1b54 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c +++ b/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c @@ -1,14 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/bpf.h> -#include <linux/types.h> -#include <bpf/bpf_helpers.h> +#include "bpf_tracing_net.h" #include <bpf/bpf_tracing.h> -#include "bpf_tcp_helpers.h" char _license[] SEC("license") = "X"; -void BPF_STRUCT_OPS(nogpltcp_init, struct sock *sk) +SEC("struct_ops") +void BPF_PROG(nogpltcp_init, struct sock *sk) { } diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 7001965d1c..59843b430f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -2,6 +2,9 @@ #ifndef __BPF_TRACING_NET_H__ #define __BPF_TRACING_NET_H__ +#include <vmlinux.h> +#include <bpf/bpf_core_read.h> + #define AF_INET 2 #define AF_INET6 10 @@ -22,6 +25,7 @@ #define IP_TOS 1 +#define SOL_IPV6 41 #define IPV6_TCLASS 67 #define IPV6_AUTOFLOWLABEL 70 @@ -46,6 +50,13 @@ #define TCP_CA_NAME_MAX 16 #define TCP_NAGLE_OFF 1 +#define TCP_ECN_OK 1 +#define TCP_ECN_QUEUE_CWR 2 +#define TCP_ECN_DEMAND_CWR 4 +#define TCP_ECN_SEEN 8 + +#define TCP_CONG_NEEDS_ECN 0x2 + #define ICSK_TIME_RETRANS 1 #define ICSK_TIME_PROBE0 3 #define ICSK_TIME_LOSS_PROBE 5 @@ -80,6 +91,14 @@ #define TCP_INFINITE_SSTHRESH 0x7fffffff #define TCP_PINGPONG_THRESH 3 +#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ +#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */ +#define FLAG_DATA_SACKED 0x20 /* New SACK. */ +#define FLAG_SND_UNA_ADVANCED \ + 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ +#define FLAG_ACKED (FLAG_DATA_ACKED | FLAG_SYN_ACKED) +#define FLAG_FORWARD_PROGRESS (FLAG_ACKED | FLAG_DATA_SACKED) + #define fib_nh_dev nh_common.nhc_dev #define fib_nh_gw_family nh_common.nhc_gw_family #define fib_nh_gw6 nh_common.nhc_gw.ipv6 @@ -119,4 +138,37 @@ #define tw_v6_daddr __tw_common.skc_v6_daddr #define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr +#define tcp_jiffies32 ((__u32)bpf_jiffies64()) + +static inline struct inet_connection_sock *inet_csk(const struct sock *sk) +{ + return (struct inet_connection_sock *)sk; +} + +static inline void *inet_csk_ca(const struct sock *sk) +{ + return (void *)inet_csk(sk)->icsk_ca_priv; +} + +static inline struct tcp_sock *tcp_sk(const struct sock *sk) +{ + return (struct tcp_sock *)sk; +} + +static inline bool tcp_in_slow_start(const struct tcp_sock *tp) +{ + return tp->snd_cwnd < tp->snd_ssthresh; +} + +static inline bool tcp_is_cwnd_limited(const struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + + /* If in slow start, ensure cwnd grows to twice what was ACKed. */ + if (tcp_in_slow_start(tp)) + return tp->snd_cwnd < 2 * tp->max_packets_out; + + return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited); +} + #endif diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c index ba97165bdb..a657651eba 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c @@ -14,9 +14,9 @@ typedef int *ptr_arr_t[6]; typedef int *ptr_multiarr_t[7][8][9][10]; -typedef int * (*fn_ptr_arr_t[11])(); +typedef int * (*fn_ptr_arr_t[11])(void); -typedef int * (*fn_ptr_multiarr_t[12][13])(); +typedef int * (*fn_ptr_multiarr_t[12][13])(void); struct root_struct { arr_t _1; diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index ad21ee8c7e..29d01fff32 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@ -100,7 +100,7 @@ typedef void (*printf_fn_t)(const char *, ...); * `int -> char *` function and returns pointer to a char. Equivalent: * typedef char * (*fn_input_t)(int); * typedef char * (*fn_output_outer_t)(fn_input_t); - * typedef const fn_output_outer_t (* fn_output_inner_t)(); + * typedef const fn_output_outer_t (* fn_output_inner_t)(void); * typedef const fn_output_inner_t fn_ptr_arr2_t[5]; */ /* ----- START-EXPECTED-OUTPUT ----- */ @@ -127,7 +127,7 @@ typedef void (* (*signal_t)(int, void (*)(int)))(int); typedef char * (*fn_ptr_arr1_t[10])(int **); -typedef char * (* (* const fn_ptr_arr2_t[5])())(char * (*)(int)); +typedef char * (* (* const fn_ptr_arr2_t[5])(void))(char * (*)(int)); struct struct_w_typedefs { int_t a; diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h index 22914a70db..73ba32e9a6 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h @@ -13,7 +13,7 @@ struct __cgrps_kfunc_map_value { struct cgroup __kptr * cgrp; }; -struct hash_map { +struct { __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); __type(value, struct __cgrps_kfunc_map_value); diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index 7ef49ec048..9e9ebf27b8 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -14,8 +14,6 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -#include "bpf_tcp_helpers.h" - #define SRC_REWRITE_IP4 0x7f000004U #define DST_REWRITE_IP4 0x7f000001U #define DST_REWRITE_PORT4 4444 @@ -32,6 +30,10 @@ #define IFNAMSIZ 16 #endif +#ifndef SOL_TCP +#define SOL_TCP 6 +#endif + __attribute__ ((noinline)) __weak int do_bind(struct bpf_sock_addr *ctx) { @@ -197,4 +199,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) return do_bind(ctx) ? 1 : 0; } +SEC("cgroup/connect4") +int connect_v4_deny_prog(struct bpf_sock_addr *ctx) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c index 40266d2c73..e98573b00d 100644 --- a/tools/testing/selftests/bpf/progs/connect6_prog.c +++ b/tools/testing/selftests/bpf/progs/connect6_prog.c @@ -90,4 +90,10 @@ int connect_v6_prog(struct bpf_sock_addr *ctx) return 1; } +SEC("cgroup/connect6") +int connect_v6_deny_prog(struct bpf_sock_addr *ctx) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/connect_unix_prog.c b/tools/testing/selftests/bpf/progs/connect_unix_prog.c index 2ef0e0c46d..ba60adadb3 100644 --- a/tools/testing/selftests/bpf/progs/connect_unix_prog.c +++ b/tools/testing/selftests/bpf/progs/connect_unix_prog.c @@ -36,4 +36,10 @@ int connect_unix_prog(struct bpf_sock_addr *ctx) return 1; } +SEC("cgroup/connect_unix") +int connect_unix_deny_prog(struct bpf_sock_addr *ctx) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index c705d8112a..b979e91f55 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -9,7 +9,7 @@ int err; -#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) +#define private(name) SEC(".bss." #name) __attribute__((aligned(8))) private(MASK) static struct bpf_cpumask __kptr * global_mask; struct __cpumask_map_value { diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index a9bf6ea336..a988d2823b 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -61,11 +61,8 @@ SEC("tp_btf/task_newtask") __failure __msg("bpf_cpumask_set_cpu args#1 expected pointer to STRUCT bpf_cpumask") int BPF_PROG(test_mutate_cpumask, struct task_struct *task, u64 clone_flags) { - struct bpf_cpumask *cpumask; - /* Can't set the CPU of a non-struct bpf_cpumask. */ bpf_cpumask_set_cpu(0, (struct bpf_cpumask *)task->cpus_ptr); - __sink(cpumask); return 0; } diff --git a/tools/testing/selftests/bpf/progs/crypto_basic.c b/tools/testing/selftests/bpf/progs/crypto_basic.c new file mode 100644 index 0000000000..8cf7168b42 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/crypto_basic.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "bpf_kfuncs.h" +#include "crypto_common.h" + +int status; +SEC("syscall") +int crypto_release(void *ctx) +{ + struct bpf_crypto_params params = { + .type = "skcipher", + .algo = "ecb(aes)", + .key_len = 16, + }; + + struct bpf_crypto_ctx *cctx; + int err = 0; + + status = 0; + + cctx = bpf_crypto_ctx_create(¶ms, sizeof(params), &err); + + if (!cctx) { + status = err; + return 0; + } + + bpf_crypto_ctx_release(cctx); + + return 0; +} + +SEC("syscall") +__failure __msg("Unreleased reference") +int crypto_acquire(void *ctx) +{ + struct bpf_crypto_params params = { + .type = "skcipher", + .algo = "ecb(aes)", + .key_len = 16, + }; + struct bpf_crypto_ctx *cctx; + int err = 0; + + status = 0; + + cctx = bpf_crypto_ctx_create(¶ms, sizeof(params), &err); + + if (!cctx) { + status = err; + return 0; + } + + cctx = bpf_crypto_ctx_acquire(cctx); + if (!cctx) + return -EINVAL; + + bpf_crypto_ctx_release(cctx); + + return 0; +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/crypto_bench.c b/tools/testing/selftests/bpf/progs/crypto_bench.c new file mode 100644 index 0000000000..e61fe08822 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/crypto_bench.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "bpf_kfuncs.h" +#include "crypto_common.h" + +const volatile unsigned int len = 16; +char cipher[128] = {}; +u32 key_len, authsize; +char dst[256] = {}; +u8 key[256] = {}; +long hits = 0; +int status; + +SEC("syscall") +int crypto_setup(void *args) +{ + struct bpf_crypto_ctx *cctx; + struct bpf_crypto_params params = { + .type = "skcipher", + .key_len = key_len, + .authsize = authsize, + }; + int err = 0; + + status = 0; + + if (!cipher[0] || !key_len || key_len > 256) { + status = -EINVAL; + return 0; + } + + __builtin_memcpy(¶ms.algo, cipher, sizeof(cipher)); + __builtin_memcpy(¶ms.key, key, sizeof(key)); + cctx = bpf_crypto_ctx_create(¶ms, sizeof(params), &err); + + if (!cctx) { + status = err; + return 0; + } + + err = crypto_ctx_insert(cctx); + if (err && err != -EEXIST) + status = err; + + return 0; +} + +SEC("tc") +int crypto_encrypt(struct __sk_buff *skb) +{ + struct __crypto_ctx_value *v; + struct bpf_crypto_ctx *ctx; + struct bpf_dynptr psrc, pdst, iv; + + v = crypto_ctx_value_lookup(); + if (!v) { + status = -ENOENT; + return 0; + } + + ctx = v->ctx; + if (!ctx) { + status = -ENOENT; + return 0; + } + + bpf_dynptr_from_skb(skb, 0, &psrc); + bpf_dynptr_from_mem(dst, len, 0, &pdst); + bpf_dynptr_from_mem(dst, 0, 0, &iv); + + status = bpf_crypto_encrypt(ctx, &psrc, &pdst, &iv); + __sync_add_and_fetch(&hits, 1); + + return 0; +} + +SEC("tc") +int crypto_decrypt(struct __sk_buff *skb) +{ + struct bpf_dynptr psrc, pdst, iv; + struct __crypto_ctx_value *v; + struct bpf_crypto_ctx *ctx; + + v = crypto_ctx_value_lookup(); + if (!v) + return -ENOENT; + + ctx = v->ctx; + if (!ctx) + return -ENOENT; + + bpf_dynptr_from_skb(skb, 0, &psrc); + bpf_dynptr_from_mem(dst, len, 0, &pdst); + bpf_dynptr_from_mem(dst, 0, 0, &iv); + + status = bpf_crypto_decrypt(ctx, &psrc, &pdst, &iv); + __sync_add_and_fetch(&hits, 1); + + return 0; +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/crypto_common.h b/tools/testing/selftests/bpf/progs/crypto_common.h new file mode 100644 index 0000000000..57dd7a68a8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/crypto_common.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#ifndef _CRYPTO_COMMON_H +#define _CRYPTO_COMMON_H + +#include "errno.h" +#include <stdbool.h> + +struct bpf_crypto_ctx *bpf_crypto_ctx_create(const struct bpf_crypto_params *params, + u32 params__sz, int *err) __ksym; +struct bpf_crypto_ctx *bpf_crypto_ctx_acquire(struct bpf_crypto_ctx *ctx) __ksym; +void bpf_crypto_ctx_release(struct bpf_crypto_ctx *ctx) __ksym; +int bpf_crypto_encrypt(struct bpf_crypto_ctx *ctx, const struct bpf_dynptr *src, + const struct bpf_dynptr *dst, const struct bpf_dynptr *iv) __ksym; +int bpf_crypto_decrypt(struct bpf_crypto_ctx *ctx, const struct bpf_dynptr *src, + const struct bpf_dynptr *dst, const struct bpf_dynptr *iv) __ksym; + +struct __crypto_ctx_value { + struct bpf_crypto_ctx __kptr * ctx; +}; + +struct array_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct __crypto_ctx_value); + __uint(max_entries, 1); +} __crypto_ctx_map SEC(".maps"); + +static inline struct __crypto_ctx_value *crypto_ctx_value_lookup(void) +{ + u32 key = 0; + + return bpf_map_lookup_elem(&__crypto_ctx_map, &key); +} + +static inline int crypto_ctx_insert(struct bpf_crypto_ctx *ctx) +{ + struct __crypto_ctx_value local, *v; + struct bpf_crypto_ctx *old; + u32 key = 0; + int err; + + local.ctx = NULL; + err = bpf_map_update_elem(&__crypto_ctx_map, &key, &local, 0); + if (err) { + bpf_crypto_ctx_release(ctx); + return err; + } + + v = bpf_map_lookup_elem(&__crypto_ctx_map, &key); + if (!v) { + bpf_crypto_ctx_release(ctx); + return -ENOENT; + } + + old = bpf_kptr_xchg(&v->ctx, ctx); + if (old) { + bpf_crypto_ctx_release(old); + return -EEXIST; + } + + return 0; +} + +#endif /* _CRYPTO_COMMON_H */ diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c new file mode 100644 index 0000000000..1be0a3fa5e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "bpf_kfuncs.h" +#include "crypto_common.h" + +unsigned char key[256] = {}; +u16 udp_test_port = 7777; +u32 authsize, key_len; +char algo[128] = {}; +char dst[16] = {}; +int status; + +static int skb_dynptr_validate(struct __sk_buff *skb, struct bpf_dynptr *psrc) +{ + struct ipv6hdr ip6h; + struct udphdr udph; + u32 offset; + + if (skb->protocol != __bpf_constant_htons(ETH_P_IPV6)) + return -1; + + if (bpf_skb_load_bytes(skb, ETH_HLEN, &ip6h, sizeof(ip6h))) + return -1; + + if (ip6h.nexthdr != IPPROTO_UDP) + return -1; + + if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(ip6h), &udph, sizeof(udph))) + return -1; + + if (udph.dest != __bpf_htons(udp_test_port)) + return -1; + + offset = ETH_HLEN + sizeof(ip6h) + sizeof(udph); + if (skb->len < offset + 16) + return -1; + + /* let's make sure that 16 bytes of payload are in the linear part of skb */ + bpf_skb_pull_data(skb, offset + 16); + bpf_dynptr_from_skb(skb, 0, psrc); + bpf_dynptr_adjust(psrc, offset, offset + 16); + + return 0; +} + +SEC("syscall") +int skb_crypto_setup(void *ctx) +{ + struct bpf_crypto_params params = { + .type = "skcipher", + .key_len = key_len, + .authsize = authsize, + }; + struct bpf_crypto_ctx *cctx; + int err = 0; + + status = 0; + + if (key_len > 256) { + status = -EINVAL; + return 0; + } + + __builtin_memcpy(¶ms.algo, algo, sizeof(algo)); + __builtin_memcpy(¶ms.key, key, sizeof(key)); + cctx = bpf_crypto_ctx_create(¶ms, sizeof(params), &err); + + if (!cctx) { + status = err; + return 0; + } + + err = crypto_ctx_insert(cctx); + if (err && err != -EEXIST) + status = err; + + return 0; +} + +SEC("tc") +int decrypt_sanity(struct __sk_buff *skb) +{ + struct __crypto_ctx_value *v; + struct bpf_crypto_ctx *ctx; + struct bpf_dynptr psrc, pdst, iv; + int err; + + err = skb_dynptr_validate(skb, &psrc); + if (err < 0) { + status = err; + return TC_ACT_SHOT; + } + + v = crypto_ctx_value_lookup(); + if (!v) { + status = -ENOENT; + return TC_ACT_SHOT; + } + + ctx = v->ctx; + if (!ctx) { + status = -ENOENT; + return TC_ACT_SHOT; + } + + /* dst is a global variable to make testing part easier to check. In real + * production code, a percpu map should be used to store the result. + */ + bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst); + /* iv dynptr has to be initialized with 0 size, but proper memory region + * has to be provided anyway + */ + bpf_dynptr_from_mem(dst, 0, 0, &iv); + + status = bpf_crypto_decrypt(ctx, &psrc, &pdst, &iv); + + return TC_ACT_SHOT; +} + +SEC("tc") +int encrypt_sanity(struct __sk_buff *skb) +{ + struct __crypto_ctx_value *v; + struct bpf_crypto_ctx *ctx; + struct bpf_dynptr psrc, pdst, iv; + int err; + + status = 0; + + err = skb_dynptr_validate(skb, &psrc); + if (err < 0) { + status = err; + return TC_ACT_SHOT; + } + + v = crypto_ctx_value_lookup(); + if (!v) { + status = -ENOENT; + return TC_ACT_SHOT; + } + + ctx = v->ctx; + if (!ctx) { + status = -ENOENT; + return TC_ACT_SHOT; + } + + /* dst is a global variable to make testing part easier to check. In real + * production code, a percpu map should be used to store the result. + */ + bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst); + /* iv dynptr has to be initialized with 0 size, but proper memory region + * has to be provided anyway + */ + bpf_dynptr_from_mem(dst, 0, 0, &iv); + + status = bpf_crypto_encrypt(ctx, &psrc, &pdst, &iv); + + return TC_ACT_SHOT; +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index 7ce7e827d5..66a60bfb58 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -80,7 +80,7 @@ SEC("?raw_tp") __failure __msg("Unreleased reference id=2") int ringbuf_missing_release1(void *ctx) { - struct bpf_dynptr ptr; + struct bpf_dynptr ptr = {}; bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); @@ -1385,7 +1385,7 @@ SEC("?raw_tp") __failure __msg("Expected an initialized dynptr as arg #1") int dynptr_adjust_invalid(void *ctx) { - struct bpf_dynptr ptr; + struct bpf_dynptr ptr = {}; /* this should fail */ bpf_dynptr_adjust(&ptr, 1, 2); @@ -1398,7 +1398,7 @@ SEC("?raw_tp") __failure __msg("Expected an initialized dynptr as arg #1") int dynptr_is_null_invalid(void *ctx) { - struct bpf_dynptr ptr; + struct bpf_dynptr ptr = {}; /* this should fail */ bpf_dynptr_is_null(&ptr); @@ -1411,7 +1411,7 @@ SEC("?raw_tp") __failure __msg("Expected an initialized dynptr as arg #1") int dynptr_is_rdonly_invalid(void *ctx) { - struct bpf_dynptr ptr; + struct bpf_dynptr ptr = {}; /* this should fail */ bpf_dynptr_is_rdonly(&ptr); @@ -1424,7 +1424,7 @@ SEC("?raw_tp") __failure __msg("Expected an initialized dynptr as arg #1") int dynptr_size_invalid(void *ctx) { - struct bpf_dynptr ptr; + struct bpf_dynptr ptr = {}; /* this should fail */ bpf_dynptr_size(&ptr); @@ -1437,7 +1437,7 @@ SEC("?raw_tp") __failure __msg("Expected an initialized dynptr as arg #1") int clone_invalid1(void *ctx) { - struct bpf_dynptr ptr1; + struct bpf_dynptr ptr1 = {}; struct bpf_dynptr ptr2; /* this should fail */ diff --git a/tools/testing/selftests/bpf/progs/fib_lookup.c b/tools/testing/selftests/bpf/progs/fib_lookup.c index c4514dd58c..7b5dd2214f 100644 --- a/tools/testing/selftests/bpf/progs/fib_lookup.c +++ b/tools/testing/selftests/bpf/progs/fib_lookup.c @@ -3,8 +3,8 @@ #include <linux/types.h> #include <linux/bpf.h> +#include <linux/pkt_cls.h> #include <bpf/bpf_helpers.h> -#include "bpf_tracing_net.h" struct bpf_fib_lookup fib_params = {}; int fib_lookup_ret = 0; diff --git a/tools/testing/selftests/bpf/progs/for_each_multi_maps.c b/tools/testing/selftests/bpf/progs/for_each_multi_maps.c new file mode 100644 index 0000000000..ff0bed7d44 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/for_each_multi_maps.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 3); + __type(key, __u32); + __type(value, __u64); +} arraymap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 5); + __type(key, __u32); + __type(value, __u64); +} hashmap SEC(".maps"); + +struct callback_ctx { + int output; +}; + +u32 data_output = 0; +int use_array = 0; + +static __u64 +check_map_elem(struct bpf_map *map, __u32 *key, __u64 *val, + struct callback_ctx *data) +{ + data->output += *val; + return 0; +} + +SEC("tc") +int test_pkt_access(struct __sk_buff *skb) +{ + struct callback_ctx data; + + data.output = 0; + if (use_array) + bpf_for_each_map_elem(&arraymap, check_map_elem, &data, 0); + else + bpf_for_each_map_elem(&hashmap, check_map_elem, &data, 0); + data_output = data.output; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/getpeername4_prog.c b/tools/testing/selftests/bpf/progs/getpeername4_prog.c new file mode 100644 index 0000000000..4c97208cd2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/getpeername4_prog.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Google LLC */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +#define REWRITE_ADDRESS_IP4 0xc0a801fe // 192.168.1.254 +#define REWRITE_ADDRESS_PORT4 4040 + +SEC("cgroup/getpeername4") +int getpeername_v4_prog(struct bpf_sock_addr *ctx) +{ + ctx->user_ip4 = bpf_htonl(REWRITE_ADDRESS_IP4); + ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT4); + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/getpeername6_prog.c b/tools/testing/selftests/bpf/progs/getpeername6_prog.c new file mode 100644 index 0000000000..070e4d7f63 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/getpeername6_prog.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Google LLC */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +#define REWRITE_ADDRESS_IP6_0 0xfaceb00c +#define REWRITE_ADDRESS_IP6_1 0x12345678 +#define REWRITE_ADDRESS_IP6_2 0x00000000 +#define REWRITE_ADDRESS_IP6_3 0x0000abcd + +#define REWRITE_ADDRESS_PORT6 6060 + +SEC("cgroup/getpeername6") +int getpeername_v6_prog(struct bpf_sock_addr *ctx) +{ + ctx->user_ip6[0] = bpf_htonl(REWRITE_ADDRESS_IP6_0); + ctx->user_ip6[1] = bpf_htonl(REWRITE_ADDRESS_IP6_1); + ctx->user_ip6[2] = bpf_htonl(REWRITE_ADDRESS_IP6_2); + ctx->user_ip6[3] = bpf_htonl(REWRITE_ADDRESS_IP6_3); + ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT6); + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/getsockname4_prog.c b/tools/testing/selftests/bpf/progs/getsockname4_prog.c new file mode 100644 index 0000000000..e298487c63 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/getsockname4_prog.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Google LLC */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +#define REWRITE_ADDRESS_IP4 0xc0a801fe // 192.168.1.254 +#define REWRITE_ADDRESS_PORT4 4040 + +SEC("cgroup/getsockname4") +int getsockname_v4_prog(struct bpf_sock_addr *ctx) +{ + ctx->user_ip4 = bpf_htonl(REWRITE_ADDRESS_IP4); + ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT4); + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/getsockname6_prog.c b/tools/testing/selftests/bpf/progs/getsockname6_prog.c new file mode 100644 index 0000000000..811d10cd55 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/getsockname6_prog.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Google LLC */ + +#include "vmlinux.h" + +#include <string.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_core_read.h> +#include "bpf_kfuncs.h" + +#define REWRITE_ADDRESS_IP6_0 0xfaceb00c +#define REWRITE_ADDRESS_IP6_1 0x12345678 +#define REWRITE_ADDRESS_IP6_2 0x00000000 +#define REWRITE_ADDRESS_IP6_3 0x0000abcd + +#define REWRITE_ADDRESS_PORT6 6060 + +SEC("cgroup/getsockname6") +int getsockname_v6_prog(struct bpf_sock_addr *ctx) +{ + ctx->user_ip6[0] = bpf_htonl(REWRITE_ADDRESS_IP6_0); + ctx->user_ip6[1] = bpf_htonl(REWRITE_ADDRESS_IP6_1); + ctx->user_ip6[2] = bpf_htonl(REWRITE_ADDRESS_IP6_2); + ctx->user_ip6[3] = bpf_htonl(REWRITE_ADDRESS_IP6_3); + ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT6); + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 3db416606f..fe65e0952a 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -673,7 +673,7 @@ static __noinline void fill(struct bpf_iter_num *it, int *arr, __u32 n, int mul) static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n) { - int *t, i, sum = 0;; + int *t, i, sum = 0; while ((t = bpf_iter_num_next(it))) { i = *t; diff --git a/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c index f46965053a..4d619bea9c 100644 --- a/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c +++ b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c @@ -4,6 +4,10 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#ifndef __clang__ +#pragma GCC diagnostic ignored "-Warray-bounds" +#endif + char _license[] SEC("license") = "GPL"; struct { diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c new file mode 100644 index 0000000000..bbba9eb465 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> +#include "bpf_kfuncs.h" + +#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0])) + +char _license[] SEC("license") = "GPL"; + +extern const void bpf_fentry_test1 __ksym; +extern const void bpf_fentry_test2 __ksym; +extern const void bpf_fentry_test3 __ksym; +extern const void bpf_fentry_test4 __ksym; +extern const void bpf_fentry_test5 __ksym; +extern const void bpf_fentry_test6 __ksym; +extern const void bpf_fentry_test7 __ksym; +extern const void bpf_fentry_test8 __ksym; + +int pid = 0; + +__u64 kprobe_session_result[8]; + +static int session_check(void *ctx) +{ + unsigned int i; + __u64 addr; + const void *kfuncs[] = { + &bpf_fentry_test1, + &bpf_fentry_test2, + &bpf_fentry_test3, + &bpf_fentry_test4, + &bpf_fentry_test5, + &bpf_fentry_test6, + &bpf_fentry_test7, + &bpf_fentry_test8, + }; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 1; + + addr = bpf_get_func_ip(ctx); + + for (i = 0; i < ARRAY_SIZE(kfuncs); i++) { + if (kfuncs[i] == (void *) addr) { + kprobe_session_result[i]++; + break; + } + } + + /* + * Force probes for function bpf_fentry_test[5-8] not to + * install and execute the return probe + */ + if (((const void *) addr == &bpf_fentry_test5) || + ((const void *) addr == &bpf_fentry_test6) || + ((const void *) addr == &bpf_fentry_test7) || + ((const void *) addr == &bpf_fentry_test8)) + return 1; + + return 0; +} + +/* + * No tests in here, just to trigger 'bpf_fentry_test*' + * through tracing test_run + */ +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(trigger) +{ + return 0; +} + +SEC("kprobe.session/bpf_fentry_test*") +int test_kprobe(struct pt_regs *ctx) +{ + return session_check(ctx); +} diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c new file mode 100644 index 0000000000..0835b5edf6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> +#include "bpf_kfuncs.h" + +char _license[] SEC("license") = "GPL"; + +int pid = 0; + +__u64 test_kprobe_1_result = 0; +__u64 test_kprobe_2_result = 0; +__u64 test_kprobe_3_result = 0; + +/* + * No tests in here, just to trigger 'bpf_fentry_test*' + * through tracing test_run + */ +SEC("fentry/bpf_modify_return_test") +int BPF_PROG(trigger) +{ + return 0; +} + +static int check_cookie(__u64 val, __u64 *result) +{ + __u64 *cookie; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 1; + + cookie = bpf_session_cookie(); + + if (bpf_session_is_return()) + *result = *cookie == val ? val : 0; + else + *cookie = val; + return 0; +} + +SEC("kprobe.session/bpf_fentry_test1") +int test_kprobe_1(struct pt_regs *ctx) +{ + return check_cookie(1, &test_kprobe_1_result); +} + +SEC("kprobe.session/bpf_fentry_test1") +int test_kprobe_2(struct pt_regs *ctx) +{ + return check_cookie(2, &test_kprobe_2_result); +} + +SEC("kprobe.session/bpf_fentry_test1") +int test_kprobe_3(struct pt_regs *ctx) +{ + return check_cookie(3, &test_kprobe_3_result); +} diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index da30f0d593..ab0ce1d01a 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -110,10 +110,14 @@ DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_map, array_of_array_maps); DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_map, array_of_hash_maps); DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_malloc_map, array_of_hash_malloc_maps); DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, lru_hash_map, array_of_lru_hash_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, pcpu_array_map, array_of_pcpu_array_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, pcpu_hash_map, array_of_pcpu_hash_maps); DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, array_map, hash_of_array_maps); DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps); DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps); DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, pcpu_array_map, hash_of_pcpu_array_maps); +DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, pcpu_hash_map, hash_of_pcpu_hash_maps); #define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val)) @@ -204,6 +208,8 @@ int test_map_kptr(struct __sk_buff *ctx) TEST(hash_map); TEST(hash_malloc_map); TEST(lru_hash_map); + TEST(pcpu_array_map); + TEST(pcpu_hash_map); #undef TEST return 0; @@ -281,10 +287,14 @@ int test_map_in_map_kptr(struct __sk_buff *ctx) TEST(array_of_hash_maps); TEST(array_of_hash_malloc_maps); TEST(array_of_lru_hash_maps); + TEST(array_of_pcpu_array_maps); + TEST(array_of_pcpu_hash_maps); TEST(hash_of_array_maps); TEST(hash_of_hash_maps); TEST(hash_of_hash_malloc_maps); TEST(hash_of_lru_hash_maps); + TEST(hash_of_pcpu_array_maps); + TEST(hash_of_pcpu_hash_maps); #undef TEST return 0; diff --git a/tools/testing/selftests/bpf/progs/mptcp_sock.c b/tools/testing/selftests/bpf/progs/mptcp_sock.c index 91a0d7eff2..f3acb90588 100644 --- a/tools/testing/selftests/bpf/progs/mptcp_sock.c +++ b/tools/testing/selftests/bpf/progs/mptcp_sock.c @@ -2,9 +2,9 @@ /* Copyright (c) 2020, Tessares SA. */ /* Copyright (c) 2022, SUSE. */ -#include <linux/bpf.h> +#include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> -#include "bpf_tcp_helpers.h" +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; __u32 token = 0; diff --git a/tools/testing/selftests/bpf/progs/mptcpify.c b/tools/testing/selftests/bpf/progs/mptcpify.c index 53301ae8a8..cbdc730c3a 100644 --- a/tools/testing/selftests/bpf/progs/mptcpify.c +++ b/tools/testing/selftests/bpf/progs/mptcpify.c @@ -6,10 +6,14 @@ #include "bpf_tracing_net.h" char _license[] SEC("license") = "GPL"; +int pid; SEC("fmod_ret/update_socket_protocol") int BPF_PROG(mptcpify, int family, int type, int protocol) { + if (bpf_get_current_pid_tgid() >> 32 != pid) + return protocol; + if ((family == AF_INET || family == AF_INET6) && type == SOCK_STREAM && (!protocol || protocol == IPPROTO_TCP)) { diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c new file mode 100644 index 0000000000..672fc368d9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/preempt_lock.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +SEC("?tc") +__failure __msg("1 bpf_preempt_enable is missing") +int preempt_lock_missing_1(struct __sk_buff *ctx) +{ + bpf_preempt_disable(); + return 0; +} + +SEC("?tc") +__failure __msg("2 bpf_preempt_enable(s) are missing") +int preempt_lock_missing_2(struct __sk_buff *ctx) +{ + bpf_preempt_disable(); + bpf_preempt_disable(); + return 0; +} + +SEC("?tc") +__failure __msg("3 bpf_preempt_enable(s) are missing") +int preempt_lock_missing_3(struct __sk_buff *ctx) +{ + bpf_preempt_disable(); + bpf_preempt_disable(); + bpf_preempt_disable(); + return 0; +} + +SEC("?tc") +__failure __msg("1 bpf_preempt_enable is missing") +int preempt_lock_missing_3_minus_2(struct __sk_buff *ctx) +{ + bpf_preempt_disable(); + bpf_preempt_disable(); + bpf_preempt_disable(); + bpf_preempt_enable(); + bpf_preempt_enable(); + return 0; +} + +static __noinline void preempt_disable(void) +{ + bpf_preempt_disable(); +} + +static __noinline void preempt_enable(void) +{ + bpf_preempt_enable(); +} + +SEC("?tc") +__failure __msg("1 bpf_preempt_enable is missing") +int preempt_lock_missing_1_subprog(struct __sk_buff *ctx) +{ + preempt_disable(); + return 0; +} + +SEC("?tc") +__failure __msg("2 bpf_preempt_enable(s) are missing") +int preempt_lock_missing_2_subprog(struct __sk_buff *ctx) +{ + preempt_disable(); + preempt_disable(); + return 0; +} + +SEC("?tc") +__failure __msg("1 bpf_preempt_enable is missing") +int preempt_lock_missing_2_minus_1_subprog(struct __sk_buff *ctx) +{ + preempt_disable(); + preempt_disable(); + preempt_enable(); + return 0; +} + +static __noinline void preempt_balance_subprog(void) +{ + preempt_disable(); + preempt_enable(); +} + +SEC("?tc") +__success int preempt_balance(struct __sk_buff *ctx) +{ + bpf_guard_preempt(); + return 0; +} + +SEC("?tc") +__success int preempt_balance_subprog_test(struct __sk_buff *ctx) +{ + preempt_balance_subprog(); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure __msg("sleepable helper bpf_copy_from_user#") +int preempt_sleepable_helper(void *ctx) +{ + u32 data; + + bpf_preempt_disable(); + bpf_copy_from_user(&data, sizeof(data), NULL); + bpf_preempt_enable(); + return 0; +} + +int __noinline preempt_global_subprog(void) +{ + preempt_balance_subprog(); + return 0; +} + +SEC("?tc") +__failure __msg("global function calls are not allowed with preemption disabled") +int preempt_global_subprog_test(struct __sk_buff *ctx) +{ + preempt_disable(); + preempt_global_subprog(); + preempt_enable(); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c index 351e79aef2..edc159598a 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c @@ -49,4 +49,10 @@ int sendmsg_v4_prog(struct bpf_sock_addr *ctx) return 1; } +SEC("cgroup/sendmsg4") +int sendmsg_v4_deny_prog(struct bpf_sock_addr *ctx) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c index bf9b46b806..36a7f96079 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c @@ -20,6 +20,11 @@ #define DST_REWRITE_IP6_2 0 #define DST_REWRITE_IP6_3 1 +#define DST_REWRITE_IP6_V4_MAPPED_0 0 +#define DST_REWRITE_IP6_V4_MAPPED_1 0 +#define DST_REWRITE_IP6_V4_MAPPED_2 0x0000FFFF +#define DST_REWRITE_IP6_V4_MAPPED_3 0xc0a80004 // 192.168.0.4 + #define DST_REWRITE_PORT6 6666 SEC("cgroup/sendmsg6") @@ -59,4 +64,56 @@ int sendmsg_v6_prog(struct bpf_sock_addr *ctx) return 1; } +SEC("cgroup/sendmsg6") +int sendmsg_v6_v4mapped_prog(struct bpf_sock_addr *ctx) +{ + /* Rewrite source. */ + ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0); + ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1); + ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2); + ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3); + + /* Rewrite destination. */ + ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_0); + ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_1); + ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_2); + ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_3); + + ctx->user_port = bpf_htons(DST_REWRITE_PORT6); + + return 1; +} + +SEC("cgroup/sendmsg6") +int sendmsg_v6_wildcard_prog(struct bpf_sock_addr *ctx) +{ + /* Rewrite source. */ + ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0); + ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1); + ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2); + ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3); + + /* Rewrite destination. */ + ctx->user_ip6[0] = bpf_htonl(0); + ctx->user_ip6[1] = bpf_htonl(0); + ctx->user_ip6[2] = bpf_htonl(0); + ctx->user_ip6[3] = bpf_htonl(0); + + ctx->user_port = bpf_htons(DST_REWRITE_PORT6); + + return 1; +} + +SEC("cgroup/sendmsg6") +int sendmsg_v6_preserve_dst_prog(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/sendmsg6") +int sendmsg_v6_deny_prog(struct bpf_sock_addr *ctx) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c index d8869b03dd..332d0eb111 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c @@ -36,4 +36,10 @@ int sendmsg_unix_prog(struct bpf_sock_addr *ctx) return 1; } +SEC("cgroup/sendmsg_unix") +int sendmsg_unix_deny_prog(struct bpf_sock_addr *ctx) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c index 992b786100..db4abd2682 100644 --- a/tools/testing/selftests/bpf/progs/skb_pkt_end.c +++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 +#ifndef BPF_NO_PRESERVE_ACCESS_INDEX #define BPF_NO_PRESERVE_ACCESS_INDEX +#endif #include <vmlinux.h> #include <bpf/bpf_core_read.h> #include <bpf/bpf_helpers.h> diff --git a/tools/testing/selftests/bpf/progs/sock_addr_kern.c b/tools/testing/selftests/bpf/progs/sock_addr_kern.c new file mode 100644 index 0000000000..8386bb15cc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sock_addr_kern.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Google LLC */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" + +SEC("syscall") +int init_sock(struct init_sock_args *args) +{ + bpf_kfunc_init_sock(args); + + return 0; +} + +SEC("syscall") +int close_sock(void *ctx) +{ + bpf_kfunc_close_sock(); + + return 0; +} + +SEC("syscall") +int kernel_connect(struct addr_args *args) +{ + return bpf_kfunc_call_kernel_connect(args); +} + +SEC("syscall") +int kernel_bind(struct addr_args *args) +{ + return bpf_kfunc_call_kernel_bind(args); +} + +SEC("syscall") +int kernel_listen(struct addr_args *args) +{ + return bpf_kfunc_call_kernel_listen(); +} + +SEC("syscall") +int kernel_sendmsg(struct sendmsg_args *args) +{ + return bpf_kfunc_call_kernel_sendmsg(args); +} + +SEC("syscall") +int sock_sendmsg(struct sendmsg_args *args) +{ + return bpf_kfunc_call_sock_sendmsg(args); +} + +SEC("syscall") +int kernel_getsockname(struct addr_args *args) +{ + return bpf_kfunc_call_kernel_getsockname(args); +} + +SEC("syscall") +int kernel_getpeername(struct addr_args *args) +{ + return bpf_kfunc_call_kernel_getpeername(args); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c index 83753b00a5..5c3614333b 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c +++ b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c @@ -1,24 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ -#include <string.h> -#include <linux/tcp.h> -#include <netinet/in.h> -#include <linux/bpf.h> -#include <bpf/bpf_helpers.h> -#include "bpf_tcp_helpers.h" +#include "bpf_tracing_net.h" char _license[] SEC("license") = "GPL"; __s32 page_size = 0; +const char cc_reno[TCP_CA_NAME_MAX] = "reno"; +const char cc_cubic[TCP_CA_NAME_MAX] = "cubic"; + SEC("cgroup/setsockopt") int sockopt_qos_to_cc(struct bpf_sockopt *ctx) { void *optval_end = ctx->optval_end; int *optval = ctx->optval; char buf[TCP_CA_NAME_MAX]; - char cc_reno[TCP_CA_NAME_MAX] = "reno"; - char cc_cubic[TCP_CA_NAME_MAX] = "cubic"; if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS) goto out; @@ -29,11 +25,11 @@ int sockopt_qos_to_cc(struct bpf_sockopt *ctx) if (bpf_getsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf))) return 0; - if (!tcp_cc_eq(buf, cc_cubic)) + if (bpf_strncmp(buf, sizeof(buf), cc_cubic)) return 0; if (*optval == 0x2d) { - if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &cc_reno, + if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, (void *)&cc_reno, sizeof(cc_reno))) return 0; } diff --git a/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c new file mode 100644 index 0000000000..3c822103bd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +SEC("struct_ops/test_1") +int BPF_PROG(test_1_forgotten) +{ + return 0; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops ops = { + /* we forgot to reference test_1_forgotten above, oops */ +}; + diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c index 026cabfa7f..4c56d4a9d9 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_module.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c @@ -23,7 +23,7 @@ void BPF_PROG(test_2, int a, int b) test_2_result = a + b; } -SEC("struct_ops/test_3") +SEC("?struct_ops/test_3") int BPF_PROG(test_3, int a, int b) { test_2_result = a + b + 3; @@ -54,3 +54,37 @@ struct bpf_testmod_ops___v2 testmod_2 = { .test_1 = (void *)test_1, .test_2 = (void *)test_2_v2, }; + +struct bpf_testmod_ops___zeroed { + int (*test_1)(void); + void (*test_2)(int a, int b); + int (*test_maybe_null)(int dummy, struct task_struct *task); + void (*zeroed_op)(int a, int b); + int zeroed; +}; + +SEC("struct_ops/test_3") +int BPF_PROG(zeroed_op) +{ + return 1; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops___zeroed testmod_zeroed = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2_v2, + .zeroed_op = (void *)zeroed_op, +}; + +struct bpf_testmod_ops___incompatible { + int (*test_1)(void); + void (*test_2)(int *a); + int data; +}; + +SEC(".struct_ops.link") +struct bpf_testmod_ops___incompatible testmod_incompatible = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2, + .data = 3, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c new file mode 100644 index 0000000000..fa20213884 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +int rand; +int arr[1]; + +SEC("struct_ops/test_1") +int BPF_PROG(test_1_turn_off) +{ + return arr[rand]; /* potentially way out of range access */ +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops ops = { + .test_1 = (void *)test_1_turn_off, +}; + diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h index 41f2d44f49..6720c4b5be 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h @@ -13,7 +13,7 @@ struct __tasks_kfunc_map_value { struct task_struct __kptr * task; }; -struct hash_map { +struct { __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); __type(value, struct __tasks_kfunc_map_value); diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c index 7bb872fb22..0016c90e9c 100644 --- a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c +++ b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c @@ -1,24 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 -#include "vmlinux.h" - +#include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; -static inline struct tcp_sock *tcp_sk(const struct sock *sk) -{ - return (struct tcp_sock *)sk; -} - -SEC("struct_ops/incompl_cong_ops_ssthresh") +SEC("struct_ops") __u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk) { return tcp_sk(sk)->snd_ssthresh; } -SEC("struct_ops/incompl_cong_ops_undo_cwnd") +SEC("struct_ops") __u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk) { return tcp_sk(sk)->snd_cwnd; diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c b/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c new file mode 100644 index 0000000000..f95862f570 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_tracing.h> + +extern void bbr_init(struct sock *sk) __ksym; +extern void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) __ksym; +extern u32 bbr_sndbuf_expand(struct sock *sk) __ksym; +extern u32 bbr_undo_cwnd(struct sock *sk) __ksym; +extern void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym; +extern u32 bbr_ssthresh(struct sock *sk) __ksym; +extern u32 bbr_min_tso_segs(struct sock *sk) __ksym; +extern void bbr_set_state(struct sock *sk, u8 new_state) __ksym; + +extern void dctcp_init(struct sock *sk) __ksym; +extern void dctcp_update_alpha(struct sock *sk, u32 flags) __ksym; +extern void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) __ksym; +extern u32 dctcp_ssthresh(struct sock *sk) __ksym; +extern u32 dctcp_cwnd_undo(struct sock *sk) __ksym; +extern void dctcp_state(struct sock *sk, u8 new_state) __ksym; + +extern void cubictcp_init(struct sock *sk) __ksym; +extern u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym; +extern void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) __ksym; +extern void cubictcp_state(struct sock *sk, u8 new_state) __ksym; +extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym; +extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym; + +SEC("struct_ops") +void BPF_PROG(init, struct sock *sk) +{ + bbr_init(sk); + dctcp_init(sk); + cubictcp_init(sk); +} + +SEC("struct_ops") +void BPF_PROG(in_ack_event, struct sock *sk, u32 flags) +{ + dctcp_update_alpha(sk, flags); +} + +SEC("struct_ops") +void BPF_PROG(cong_control, struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) +{ + bbr_main(sk, ack, flag, rs); +} + +SEC("struct_ops") +void BPF_PROG(cong_avoid, struct sock *sk, u32 ack, u32 acked) +{ + cubictcp_cong_avoid(sk, ack, acked); +} + +SEC("struct_ops") +u32 BPF_PROG(sndbuf_expand, struct sock *sk) +{ + return bbr_sndbuf_expand(sk); +} + +SEC("struct_ops") +u32 BPF_PROG(undo_cwnd, struct sock *sk) +{ + bbr_undo_cwnd(sk); + return dctcp_cwnd_undo(sk); +} + +SEC("struct_ops") +void BPF_PROG(cwnd_event, struct sock *sk, enum tcp_ca_event event) +{ + bbr_cwnd_event(sk, event); + dctcp_cwnd_event(sk, event); + cubictcp_cwnd_event(sk, event); +} + +SEC("struct_ops") +u32 BPF_PROG(ssthresh, struct sock *sk) +{ + bbr_ssthresh(sk); + dctcp_ssthresh(sk); + return cubictcp_recalc_ssthresh(sk); +} + +SEC("struct_ops") +u32 BPF_PROG(min_tso_segs, struct sock *sk) +{ + return bbr_min_tso_segs(sk); +} + +SEC("struct_ops") +void BPF_PROG(set_state, struct sock *sk, u8 new_state) +{ + bbr_set_state(sk, new_state); + dctcp_state(sk, new_state); + cubictcp_state(sk, new_state); +} + +SEC("struct_ops") +void BPF_PROG(pkts_acked, struct sock *sk, const struct ack_sample *sample) +{ + cubictcp_acked(sk, sample); +} + +SEC(".struct_ops") +struct tcp_congestion_ops tcp_ca_kfunc = { + .init = (void *)init, + .in_ack_event = (void *)in_ack_event, + .cong_control = (void *)cong_control, + .cong_avoid = (void *)cong_avoid, + .sndbuf_expand = (void *)sndbuf_expand, + .undo_cwnd = (void *)undo_cwnd, + .cwnd_event = (void *)cwnd_event, + .ssthresh = (void *)ssthresh, + .min_tso_segs = (void *)min_tso_segs, + .set_state = (void *)set_state, + .pkts_acked = (void *)pkts_acked, + .name = "tcp_ca_kfunc", +}; + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c index c06f4a41c2..54f916a931 100644 --- a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c +++ b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c @@ -7,7 +7,7 @@ char _license[] SEC("license") = "GPL"; -SEC("struct_ops/unsupp_cong_op_get_info") +SEC("struct_ops") size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) { diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c index b93a0ed330..e4bd82bc0d 100644 --- a/tools/testing/selftests/bpf/progs/tcp_ca_update.c +++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -#include "vmlinux.h" - +#include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> @@ -10,36 +9,31 @@ char _license[] SEC("license") = "GPL"; int ca1_cnt = 0; int ca2_cnt = 0; -static inline struct tcp_sock *tcp_sk(const struct sock *sk) -{ - return (struct tcp_sock *)sk; -} - -SEC("struct_ops/ca_update_1_init") +SEC("struct_ops") void BPF_PROG(ca_update_1_init, struct sock *sk) { ca1_cnt++; } -SEC("struct_ops/ca_update_2_init") +SEC("struct_ops") void BPF_PROG(ca_update_2_init, struct sock *sk) { ca2_cnt++; } -SEC("struct_ops/ca_update_cong_control") +SEC("struct_ops") void BPF_PROG(ca_update_cong_control, struct sock *sk, const struct rate_sample *rs) { } -SEC("struct_ops/ca_update_ssthresh") +SEC("struct_ops") __u32 BPF_PROG(ca_update_ssthresh, struct sock *sk) { return tcp_sk(sk)->snd_ssthresh; } -SEC("struct_ops/ca_update_undo_cwnd") +SEC("struct_ops") __u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk) { return tcp_sk(sk)->snd_cwnd; diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c index 0724a79cec..a58b5194fc 100644 --- a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c +++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -#include "vmlinux.h" - +#include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> @@ -11,22 +10,17 @@ char _license[] SEC("license") = "GPL"; #define min(a, b) ((a) < (b) ? (a) : (b)) -static inline struct tcp_sock *tcp_sk(const struct sock *sk) -{ - return (struct tcp_sock *)sk; -} - -static inline unsigned int tcp_left_out(const struct tcp_sock *tp) +static unsigned int tcp_left_out(const struct tcp_sock *tp) { return tp->sacked_out + tp->lost_out; } -static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) +static unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) { return tp->packets_out - tcp_left_out(tp) + tp->retrans_out; } -SEC("struct_ops/write_sk_pacing_init") +SEC("struct_ops") void BPF_PROG(write_sk_pacing_init, struct sock *sk) { #ifdef ENABLE_ATOMICS_TESTS @@ -37,7 +31,7 @@ void BPF_PROG(write_sk_pacing_init, struct sock *sk) #endif } -SEC("struct_ops/write_sk_pacing_cong_control") +SEC("struct_ops") void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk, const struct rate_sample *rs) { @@ -49,13 +43,13 @@ void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk, tp->app_limited = (tp->delivered + tcp_packets_in_flight(tp)) ?: 1; } -SEC("struct_ops/write_sk_pacing_ssthresh") +SEC("struct_ops") __u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk) { return tcp_sk(sk)->snd_ssthresh; } -SEC("struct_ops/write_sk_pacing_undo_cwnd") +SEC("struct_ops") __u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk) { return tcp_sk(sk)->snd_cwnd; diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c index 0988d79f15..42c729f855 100644 --- a/tools/testing/selftests/bpf/progs/tcp_rtt.c +++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c @@ -10,6 +10,9 @@ struct tcp_rtt_storage { __u32 delivered; __u32 delivered_ce; __u32 icsk_retransmits; + + __u32 mrtt_us; /* args[0] */ + __u32 srtt; /* args[1] */ }; struct { @@ -55,5 +58,8 @@ int _sockops(struct bpf_sock_ops *ctx) storage->delivered_ce = tcp_sk->delivered_ce; storage->icsk_retransmits = tcp_sk->icsk_retransmits; + storage->mrtt_us = ctx->args[0]; + storage->srtt = ctx->args[1]; + return 1; } diff --git a/tools/testing/selftests/bpf/progs/test_access_variable_array.c b/tools/testing/selftests/bpf/progs/test_access_variable_array.c index 808c49b798..326b7d1f49 100644 --- a/tools/testing/selftests/bpf/progs/test_access_variable_array.c +++ b/tools/testing/selftests/bpf/progs/test_access_variable_array.c @@ -7,7 +7,7 @@ unsigned long span = 0; -SEC("fentry/load_balance") +SEC("fentry/sched_balance_rq") int BPF_PROG(fentry_fentry, int this_cpu, struct rq *this_rq, struct sched_domain *sd) { diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c index 5a3a80f751..c83142b55f 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c @@ -15,6 +15,8 @@ __u64 uprobe_res; __u64 uretprobe_res; __u64 tp_res; __u64 pe_res; +__u64 raw_tp_res; +__u64 tp_btf_res; __u64 fentry_res; __u64 fexit_res; __u64 fmod_ret_res; @@ -87,6 +89,20 @@ int handle_pe(struct pt_regs *ctx) return 0; } +SEC("raw_tp/sys_enter") +int handle_raw_tp(void *ctx) +{ + update(ctx, &raw_tp_res); + return 0; +} + +SEC("tp_btf/sys_enter") +int handle_tp_btf(void *ctx) +{ + update(ctx, &tp_btf_res); + return 0; +} + SEC("fentry/bpf_fentry_test1") int BPF_PROG(fentry_test1, int a) { diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c index e2bea4da19..f0759efff6 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c @@ -1,19 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include <string.h> -#include <errno.h> -#include <netinet/in.h> -#include <linux/stddef.h> -#include <linux/bpf.h> -#include <linux/ipv6.h> -#include <linux/tcp.h> -#include <linux/if_ether.h> -#include <linux/pkt_cls.h> - +#include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -#include "bpf_tcp_helpers.h" + +#ifndef ENOENT +#define ENOENT 2 +#endif struct sockaddr_in6 srv_sa6 = {}; __u16 listen_tp_sport = 0; diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c index 8fba3f3649..5da001ca57 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func10.c +++ b/tools/testing/selftests/bpf/progs/test_global_func10.c @@ -4,6 +4,10 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#if !defined(__clang__) +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif + struct Small { long x; }; diff --git a/tools/testing/selftests/bpf/progs/test_lwt_redirect.c b/tools/testing/selftests/bpf/progs/test_lwt_redirect.c index 8c895122f2..83439b87b7 100644 --- a/tools/testing/selftests/bpf/progs/test_lwt_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_lwt_redirect.c @@ -3,7 +3,7 @@ #include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h> #include <linux/ip.h> -#include "bpf_tracing_net.h" +#include <linux/if_ether.h> /* We don't care about whether the packet can be received by network stack. * Just care if the packet is sent to the correct device at correct direction diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c index 8a1b50f3a0..cc1a012d03 100644 --- a/tools/testing/selftests/bpf/progs/test_module_attach.c +++ b/tools/testing/selftests/bpf/progs/test_module_attach.c @@ -73,6 +73,29 @@ int BPF_PROG(handle_fentry_manual, return 0; } +__u32 fentry_explicit_read_sz = 0; + +SEC("fentry/bpf_testmod:bpf_testmod_test_read") +int BPF_PROG(handle_fentry_explicit, + struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) +{ + fentry_explicit_read_sz = len; + return 0; +} + + +__u32 fentry_explicit_manual_read_sz = 0; + +SEC("fentry") +int BPF_PROG(handle_fentry_explicit_manual, + struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) +{ + fentry_explicit_manual_read_sz = len; + return 0; +} + __u32 fexit_read_sz = 0; int fexit_ret = 0; diff --git a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c index 0763d49f9c..386315afad 100644 --- a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c @@ -5,23 +5,48 @@ #include <stdint.h> #include <bpf/bpf_helpers.h> +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __u32); +} sock_map SEC(".maps"); + __u64 user_pid = 0; __u64 user_tgid = 0; __u64 dev = 0; __u64 ino = 0; -SEC("tracepoint/syscalls/sys_enter_nanosleep") -int handler(const void *ctx) +static void get_pid_tgid(void) { struct bpf_pidns_info nsdata; if (bpf_get_ns_current_pid_tgid(dev, ino, &nsdata, sizeof(struct bpf_pidns_info))) - return 0; + return; user_pid = nsdata.pid; user_tgid = nsdata.tgid; +} +SEC("?tracepoint/syscalls/sys_enter_nanosleep") +int tp_handler(const void *ctx) +{ + get_pid_tgid(); return 0; } +SEC("?cgroup/bind4") +int cgroup_bind4(struct bpf_sock_addr *ctx) +{ + get_pid_tgid(); + return 1; +} + +SEC("?sk_msg") +int sk_msg(struct sk_msg_md *msg) +{ + get_pid_tgid(); + return SK_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_perf_skip.c b/tools/testing/selftests/bpf/progs/test_perf_skip.c new file mode 100644 index 0000000000..7eb8b6de7a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_perf_skip.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +uintptr_t ip; + +SEC("perf_event") +int handler(struct bpf_perf_event_data *data) +{ + /* Skip events that have the correct ip. */ + return ip != PT_REGS_IP(&data->regs); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_n.c b/tools/testing/selftests/bpf/progs/test_ringbuf_n.c new file mode 100644 index 0000000000..8669eb42db --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_n.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2024 Andrea Righi <andrea.righi@canonical.com> + +#include <linux/bpf.h> +#include <sched.h> +#include <unistd.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +#define TASK_COMM_LEN 16 + +struct sample { + int pid; + long value; + char comm[16]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +int pid = 0; +long value = 0; + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int test_ringbuf_n(void *ctx) +{ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + struct sample *sample; + + if (cur_pid != pid) + return 0; + + sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0); + if (!sample) + return 0; + + sample->pid = pid; + sample->value = value; + bpf_get_current_comm(sample->comm, sizeof(sample->comm)); + + bpf_ringbuf_submit(sample, 0); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c new file mode 100644 index 0000000000..350513c0e4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); +} ringbuf SEC(".maps"); + +/* inputs */ +int pid = 0; + +/* outputs */ +long passed = 0; +long discarded = 0; + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int test_ringbuf_write(void *ctx) +{ + int *foo, cur_pid = bpf_get_current_pid_tgid() >> 32; + void *sample1, *sample2; + + if (cur_pid != pid) + return 0; + + sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + if (!sample1) + return 0; + /* first one can pass */ + sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + if (!sample2) { + bpf_ringbuf_discard(sample1, 0); + __sync_fetch_and_add(&discarded, 1); + return 0; + } + /* second one must not */ + __sync_fetch_and_add(&passed, 1); + foo = sample2 + 4084; + *foo = 256; + bpf_ringbuf_discard(sample1, 0); + bpf_ringbuf_discard(sample2, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c index 02e718f06e..40531e5677 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c +++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c @@ -84,7 +84,7 @@ int BPF_PROG(trace_tcp_connect, struct sock *sk) } SEC("fexit/inet_csk_accept") -int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern, +int BPF_PROG(inet_csk_accept, struct sock *sk, struct proto_accept_arg *arg, struct sock *accepted_sk) { set_task_info(accepted_sk); diff --git a/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c index 45e8fc75a7..996b177324 100644 --- a/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c @@ -24,8 +24,7 @@ struct { __type(value, __u64); } socket_storage SEC(".maps"); -SEC("sk_msg") -int prog_msg_verdict(struct sk_msg_md *msg) +static int prog_msg_verdict_common(struct sk_msg_md *msg) { struct task_struct *task = (struct task_struct *)bpf_get_current_task(); int verdict = SK_PASS; @@ -44,4 +43,28 @@ int prog_msg_verdict(struct sk_msg_md *msg) return verdict; } +SEC("sk_msg") +int prog_msg_verdict(struct sk_msg_md *msg) +{ + return prog_msg_verdict_common(msg); +} + +SEC("sk_msg") +int prog_msg_verdict_clone(struct sk_msg_md *msg) +{ + return prog_msg_verdict_common(msg); +} + +SEC("sk_msg") +int prog_msg_verdict_clone2(struct sk_msg_md *msg) +{ + return prog_msg_verdict_common(msg); +} + +SEC("sk_skb/stream_verdict") +int prog_skb_verdict(struct __sk_buff *skb) +{ + return SK_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index f75e531bf3..196844be34 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -7,7 +7,6 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -#include "bpf_tcp_helpers.h" enum bpf_linum_array_idx { EGRESS_LINUM_IDX, @@ -42,6 +41,10 @@ struct { __type(value, struct bpf_spinlock_cnt); } sk_pkt_out_cnt10 SEC(".maps"); +struct tcp_sock { + __u32 lsndtime; +} __attribute__((preserve_access_index)); + struct bpf_tcp_sock listen_tp = {}; struct sockaddr_in6 srv_sa6 = {}; struct bpf_tcp_sock cli_tp = {}; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c index 1d86a717a2..69aacc96db 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c @@ -23,10 +23,25 @@ struct { __type(value, int); } sock_map_msg SEC(".maps"); -SEC("sk_skb") +SEC("sk_skb/stream_verdict") int prog_skb_verdict(struct __sk_buff *skb) { return SK_PASS; } +int clone_called; + +SEC("sk_skb/stream_verdict") +int prog_skb_verdict_clone(struct __sk_buff *skb) +{ + clone_called = 1; + return SK_PASS; +} + +SEC("sk_skb/stream_parser") +int prog_skb_parser(struct __sk_buff *skb) +{ + return SK_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c index 3c69aa9717..d25b0bb30f 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c @@ -9,7 +9,7 @@ struct { __type(value, __u64); } sock_map SEC(".maps"); -SEC("sk_skb") +SEC("sk_skb/verdict") int prog_skb_verdict(struct __sk_buff *skb) { return SK_DROP; diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c index 992400acb9..ab3eae3d6a 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_link.c +++ b/tools/testing/selftests/bpf/progs/test_tc_link.c @@ -4,7 +4,8 @@ #include <linux/bpf.h> #include <linux/if_ether.h> - +#include <linux/stddef.h> +#include <linux/if_packet.h> #include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h> @@ -16,7 +17,13 @@ bool seen_tc3; bool seen_tc4; bool seen_tc5; bool seen_tc6; +bool seen_tc7; + +bool set_type; + bool seen_eth; +bool seen_host; +bool seen_mcast; SEC("tc/ingress") int tc1(struct __sk_buff *skb) @@ -28,8 +35,16 @@ int tc1(struct __sk_buff *skb) if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth))) goto out; seen_eth = eth.h_proto == bpf_htons(ETH_P_IP); + seen_host = skb->pkt_type == PACKET_HOST; + if (seen_host && set_type) { + eth.h_dest[0] = 4; + if (bpf_skb_store_bytes(skb, 0, ð, sizeof(eth), 0)) + goto fail; + bpf_skb_change_type(skb, PACKET_MULTICAST); + } out: seen_tc1 = true; +fail: return TCX_NEXT; } @@ -67,3 +82,21 @@ int tc6(struct __sk_buff *skb) seen_tc6 = true; return TCX_PASS; } + +SEC("tc/ingress") +int tc7(struct __sk_buff *skb) +{ + struct ethhdr eth = {}; + + if (skb->protocol != __bpf_constant_htons(ETH_P_IP)) + goto out; + if (bpf_skb_load_bytes(skb, 0, ð, sizeof(eth))) + goto out; + if (eth.h_dest[0] == 4 && set_type) { + seen_mcast = skb->pkt_type == PACKET_MULTICAST; + bpf_skb_change_type(skb, PACKET_HOST); + } +out: + seen_tc7 = true; + return TCX_PASS; +} diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index a3f3f43fc1..6935f32eeb 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -1,18 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#include <stddef.h> -#include <string.h> -#include <netinet/in.h> -#include <linux/bpf.h> -#include <linux/if_ether.h> -#include <linux/if_packet.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/tcp.h> +#include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -#include "bpf_tcp_helpers.h" #include "test_tcpbpf.h" struct tcpbpf_globals global = {}; diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 3e436e6f73..3f5abcf3ff 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -567,12 +567,18 @@ int ip6vxlan_get_tunnel_src(struct __sk_buff *skb) return TC_ACT_OK; } +struct local_geneve_opt { + struct geneve_opt gopt; + int data; +}; + SEC("tc") int geneve_set_tunnel(struct __sk_buff *skb) { int ret; struct bpf_tunnel_key key; - struct geneve_opt gopt; + struct local_geneve_opt local_gopt; + struct geneve_opt *gopt = (struct geneve_opt *) &local_gopt; __builtin_memset(&key, 0x0, sizeof(key)); key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ @@ -580,14 +586,14 @@ int geneve_set_tunnel(struct __sk_buff *skb) key.tunnel_tos = 0; key.tunnel_ttl = 64; - __builtin_memset(&gopt, 0x0, sizeof(gopt)); - gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */ - gopt.type = 0x08; - gopt.r1 = 0; - gopt.r2 = 0; - gopt.r3 = 0; - gopt.length = 2; /* 4-byte multiple */ - *(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef); + __builtin_memset(gopt, 0x0, sizeof(local_gopt)); + gopt->opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */ + gopt->type = 0x08; + gopt->r1 = 0; + gopt->r2 = 0; + gopt->r3 = 0; + gopt->length = 2; /* 4-byte multiple */ + *(int *) &gopt->opt_data = bpf_htonl(0xdeadbeef); ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); @@ -596,7 +602,7 @@ int geneve_set_tunnel(struct __sk_buff *skb) return TC_ACT_SHOT; } - ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt)); + ret = bpf_skb_set_tunnel_opt(skb, gopt, sizeof(local_gopt)); if (ret < 0) { log_err(ret); return TC_ACT_SHOT; @@ -631,7 +637,8 @@ SEC("tc") int ip6geneve_set_tunnel(struct __sk_buff *skb) { struct bpf_tunnel_key key; - struct geneve_opt gopt; + struct local_geneve_opt local_gopt; + struct geneve_opt *gopt = (struct geneve_opt *) &local_gopt; int ret; __builtin_memset(&key, 0x0, sizeof(key)); @@ -647,16 +654,16 @@ int ip6geneve_set_tunnel(struct __sk_buff *skb) return TC_ACT_SHOT; } - __builtin_memset(&gopt, 0x0, sizeof(gopt)); - gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */ - gopt.type = 0x08; - gopt.r1 = 0; - gopt.r2 = 0; - gopt.r3 = 0; - gopt.length = 2; /* 4-byte multiple */ - *(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef); + __builtin_memset(gopt, 0x0, sizeof(local_gopt)); + gopt->opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */ + gopt->type = 0x08; + gopt->r1 = 0; + gopt->r2 = 0; + gopt->r3 = 0; + gopt->length = 2; /* 4-byte multiple */ + *(int *) &gopt->opt_data = bpf_htonl(0xfeedbeef); - ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt)); + ret = bpf_skb_set_tunnel_opt(skb, gopt, sizeof(gopt)); if (ret < 0) { log_err(ret); return TC_ACT_SHOT; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 5c7e4758a0..fad94e41ce 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -318,6 +318,14 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, return true; } +#ifndef __clang__ +#pragma GCC push_options +/* GCC optimization collapses functions and increases the number of arguments + * beyond the compatible amount supported by BPF. + */ +#pragma GCC optimize("-fno-ipa-sra") +#endif + static __attribute__ ((noinline)) bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, struct packet_description *pckt, @@ -372,6 +380,10 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, return true; } +#ifndef __clang__ +#pragma GCC pop_options +#endif + static __attribute__ ((noinline)) int swap_mac_and_send(void *data, void *data_end) { @@ -588,12 +600,13 @@ static void connection_table_lookup(struct real_definition **real, __attribute__ ((noinline)) static int process_l3_headers_v6(struct packet_description *pckt, __u8 *protocol, __u64 off, - __u16 *pkt_bytes, void *data, - void *data_end) + __u16 *pkt_bytes, void *extra_args[2]) { struct ipv6hdr *ip6h; __u64 iph_len; int action; + void *data = extra_args[0]; + void *data_end = extra_args[1]; ip6h = data + off; if (ip6h + 1 > data_end) @@ -619,11 +632,12 @@ static int process_l3_headers_v6(struct packet_description *pckt, __attribute__ ((noinline)) static int process_l3_headers_v4(struct packet_description *pckt, __u8 *protocol, __u64 off, - __u16 *pkt_bytes, void *data, - void *data_end) + __u16 *pkt_bytes, void *extra_args[2]) { struct iphdr *iph; int action; + void *data = extra_args[0]; + void *data_end = extra_args[1]; iph = data + off; if (iph + 1 > data_end) @@ -666,13 +680,14 @@ static int process_packet(void *data, __u64 off, void *data_end, __u8 protocol; __u32 vip_num; int action; + void *extra_args[2] = { data, data_end }; if (is_ipv6) action = process_l3_headers_v6(&pckt, &protocol, off, - &pkt_bytes, data, data_end); + &pkt_bytes, extra_args); else action = process_l3_headers_v4(&pckt, &protocol, off, - &pkt_bytes, data, data_end); + &pkt_bytes, extra_args); if (action >= 0) return action; protocol = pckt.flow.proto; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c index f3ec808648..a758830226 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c @@ -160,7 +160,7 @@ int xdp_prognum1(struct xdp_md *ctx) /* Modifying VLAN, preserve top 4 bits */ vlan_hdr->h_vlan_TCI = - bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000) + bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000U) | TO_VLAN); } diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c index f615da97df..4c677c0012 100644 --- a/tools/testing/selftests/bpf/progs/timer.c +++ b/tools/testing/selftests/bpf/progs/timer.c @@ -2,9 +2,10 @@ /* Copyright (c) 2021 Facebook */ #include <linux/bpf.h> #include <time.h> +#include <stdbool.h> #include <errno.h> #include <bpf/bpf_helpers.h> -#include "bpf_tcp_helpers.h" +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; struct hmap_elem { diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c index 0996c2486f..5a2e9dabf1 100644 --- a/tools/testing/selftests/bpf/progs/timer_failure.c +++ b/tools/testing/selftests/bpf/progs/timer_failure.c @@ -5,8 +5,8 @@ #include <time.h> #include <errno.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> #include "bpf_misc.h" -#include "bpf_tcp_helpers.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/timer_lockup.c b/tools/testing/selftests/bpf/progs/timer_lockup.c new file mode 100644 index 0000000000..3e52013328 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_lockup.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <time.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct elem { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer1_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer2_map SEC(".maps"); + +int timer1_err; +int timer2_err; + +static int timer_cb1(void *map, int *k, struct elem *v) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer2_map, &key); + if (timer) + timer2_err = bpf_timer_cancel(timer); + + return 0; +} + +static int timer_cb2(void *map, int *k, struct elem *v) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer1_map, &key); + if (timer) + timer1_err = bpf_timer_cancel(timer); + + return 0; +} + +SEC("tc") +int timer1_prog(void *ctx) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer1_map, &key); + if (timer) { + bpf_timer_init(timer, &timer1_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb1); + bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN); + } + + return 0; +} + +SEC("tc") +int timer2_prog(void *ctx) +{ + struct bpf_timer *timer; + int key = 0; + + timer = bpf_map_lookup_elem(&timer2_map, &key); + if (timer) { + bpf_timer_init(timer, &timer2_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb2); + bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN); + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/timer_mim.c b/tools/testing/selftests/bpf/progs/timer_mim.c index 2fee7ab105..50ebc3f685 100644 --- a/tools/testing/selftests/bpf/progs/timer_mim.c +++ b/tools/testing/selftests/bpf/progs/timer_mim.c @@ -4,7 +4,7 @@ #include <time.h> #include <errno.h> #include <bpf/bpf_helpers.h> -#include "bpf_tcp_helpers.h" +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; struct hmap_elem { diff --git a/tools/testing/selftests/bpf/progs/timer_mim_reject.c b/tools/testing/selftests/bpf/progs/timer_mim_reject.c index 5d648e3d8a..dd3f1ed6d6 100644 --- a/tools/testing/selftests/bpf/progs/timer_mim_reject.c +++ b/tools/testing/selftests/bpf/progs/timer_mim_reject.c @@ -4,7 +4,7 @@ #include <time.h> #include <errno.h> #include <bpf/bpf_helpers.h> -#include "bpf_tcp_helpers.h" +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; struct hmap_elem { diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 5fda439010..2619ed193c 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2020 Facebook - #include <linux/bpf.h> #include <asm/unistd.h> #include <bpf/bpf_helpers.h> @@ -9,82 +8,126 @@ char _license[] SEC("license") = "GPL"; -long hits = 0; +#define CPU_MASK 255 +#define MAX_CPUS (CPU_MASK + 1) /* should match MAX_BUCKETS in benchs/bench_trigger.c */ -SEC("tp/syscalls/sys_enter_getpgid") -int bench_trigger_tp(void *ctx) +/* matches struct counter in bench.h */ +struct counter { + long value; +} __attribute__((aligned(128))); + +struct counter hits[MAX_CPUS]; + +static __always_inline void inc_counter(void) +{ + int cpu = bpf_get_smp_processor_id(); + + __sync_add_and_fetch(&hits[cpu & CPU_MASK].value, 1); +} + +SEC("?uprobe") +int bench_trigger_uprobe(void *ctx) { - __sync_add_and_fetch(&hits, 1); + inc_counter(); return 0; } -SEC("raw_tp/sys_enter") -int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id) +const volatile int batch_iters = 0; + +SEC("?raw_tp") +int trigger_count(void *ctx) { - if (id == __NR_getpgid) - __sync_add_and_fetch(&hits, 1); + int i; + + for (i = 0; i < batch_iters; i++) + inc_counter(); + return 0; } -SEC("kprobe/" SYS_PREFIX "sys_getpgid") +SEC("?raw_tp") +int trigger_driver(void *ctx) +{ + int i; + + for (i = 0; i < batch_iters; i++) + (void)bpf_get_numa_node_id(); /* attach point for benchmarking */ + + return 0; +} + +extern int bpf_modify_return_test_tp(int nonce) __ksym __weak; + +SEC("?raw_tp") +int trigger_driver_kfunc(void *ctx) +{ + int i; + + for (i = 0; i < batch_iters; i++) + (void)bpf_modify_return_test_tp(0); /* attach point for benchmarking */ + + return 0; +} + +SEC("?kprobe/bpf_get_numa_node_id") int bench_trigger_kprobe(void *ctx) { - __sync_add_and_fetch(&hits, 1); + inc_counter(); return 0; } -SEC("kretprobe/" SYS_PREFIX "sys_getpgid") +SEC("?kretprobe/bpf_get_numa_node_id") int bench_trigger_kretprobe(void *ctx) { - __sync_add_and_fetch(&hits, 1); + inc_counter(); return 0; } -SEC("kprobe.multi/" SYS_PREFIX "sys_getpgid") +SEC("?kprobe.multi/bpf_get_numa_node_id") int bench_trigger_kprobe_multi(void *ctx) { - __sync_add_and_fetch(&hits, 1); + inc_counter(); return 0; } -SEC("kretprobe.multi/" SYS_PREFIX "sys_getpgid") +SEC("?kretprobe.multi/bpf_get_numa_node_id") int bench_trigger_kretprobe_multi(void *ctx) { - __sync_add_and_fetch(&hits, 1); + inc_counter(); return 0; } -SEC("fentry/" SYS_PREFIX "sys_getpgid") +SEC("?fentry/bpf_get_numa_node_id") int bench_trigger_fentry(void *ctx) { - __sync_add_and_fetch(&hits, 1); + inc_counter(); return 0; } -SEC("fexit/" SYS_PREFIX "sys_getpgid") +SEC("?fexit/bpf_get_numa_node_id") int bench_trigger_fexit(void *ctx) { - __sync_add_and_fetch(&hits, 1); + inc_counter(); return 0; } -SEC("fentry.s/" SYS_PREFIX "sys_getpgid") -int bench_trigger_fentry_sleep(void *ctx) +SEC("?fmod_ret/bpf_modify_return_test_tp") +int bench_trigger_fmodret(void *ctx) { - __sync_add_and_fetch(&hits, 1); - return 0; + inc_counter(); + return -22; } -SEC("fmod_ret/" SYS_PREFIX "sys_getpgid") -int bench_trigger_fmodret(void *ctx) +SEC("?tp/bpf_test_run/bpf_trigger_tp") +int bench_trigger_tp(void *ctx) { - __sync_add_and_fetch(&hits, 1); - return -22; + inc_counter(); + return 0; } -SEC("uprobe") -int bench_trigger_uprobe(void *ctx) +SEC("?raw_tp/bpf_trigger_tp") +int bench_trigger_rawtp(void *ctx) { - __sync_add_and_fetch(&hits, 1); + inc_counter(); return 0; } diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi.c b/tools/testing/selftests/bpf/progs/uprobe_multi.c index 419d9aa28f..44190efcdb 100644 --- a/tools/testing/selftests/bpf/progs/uprobe_multi.c +++ b/tools/testing/selftests/bpf/progs/uprobe_multi.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include <stdbool.h> +#include <bpf/usdt.bpf.h> char _license[] SEC("license") = "GPL"; @@ -22,6 +22,13 @@ __u64 uprobe_multi_sleep_result = 0; int pid = 0; int child_pid = 0; +int child_tid = 0; +int child_pid_usdt = 0; +int child_tid_usdt = 0; + +int expect_pid = 0; +bool bad_pid_seen = false; +bool bad_pid_seen_usdt = false; bool test_cookie = false; void *user_ptr = 0; @@ -36,11 +43,19 @@ static __always_inline bool verify_sleepable_user_copy(void) static void uprobe_multi_check(void *ctx, bool is_return, bool is_sleep) { - child_pid = bpf_get_current_pid_tgid() >> 32; + __u64 cur_pid_tgid = bpf_get_current_pid_tgid(); + __u32 cur_pid; - if (pid && child_pid != pid) + cur_pid = cur_pid_tgid >> 32; + if (pid && cur_pid != pid) return; + if (expect_pid && cur_pid != expect_pid) + bad_pid_seen = true; + + child_pid = cur_pid_tgid >> 32; + child_tid = (__u32)cur_pid_tgid; + __u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0; __u64 addr = bpf_get_func_ip(ctx); @@ -97,5 +112,32 @@ int uretprobe_sleep(struct pt_regs *ctx) SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*") int uprobe_extra(struct pt_regs *ctx) { + /* we need this one just to mix PID-filtered and global uprobes */ + return 0; +} + +SEC("usdt") +int usdt_pid(struct pt_regs *ctx) +{ + __u64 cur_pid_tgid = bpf_get_current_pid_tgid(); + __u32 cur_pid; + + cur_pid = cur_pid_tgid >> 32; + if (pid && cur_pid != pid) + return 0; + + if (expect_pid && cur_pid != expect_pid) + bad_pid_seen_usdt = true; + + child_pid_usdt = cur_pid_tgid >> 32; + child_tid_usdt = (__u32)cur_pid_tgid; + + return 0; +} + +SEC("usdt") +int usdt_extra(struct pt_regs *ctx) +{ + /* we need this one just to mix PID-filtered and global USDT probes */ return 0; } diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 960998f163..a0bb7fb40e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -886,6 +886,69 @@ l1_%=: r0 = 0; \ } SEC("socket") +__description("bounds check for non const xor src dst") +__success __log_level(2) +__msg("5: (af) r0 ^= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))") +__naked void non_const_xor_src_dst(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r6 = r0; \ + call %[bpf_get_prandom_u32]; \ + r6 &= 0xaf; \ + r0 &= 0x1a0; \ + r0 ^= r6; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b), + __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("bounds check for non const or src dst") +__success __log_level(2) +__msg("5: (4f) r0 |= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))") +__naked void non_const_or_src_dst(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r6 = r0; \ + call %[bpf_get_prandom_u32]; \ + r6 &= 0xaf; \ + r0 &= 0x1a0; \ + r0 |= r6; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b), + __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("bounds check for non const mul regs") +__success __log_level(2) +__msg("5: (2f) r0 *= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))") +__naked void non_const_mul_regs(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r6 = r0; \ + call %[bpf_get_prandom_u32]; \ + r6 &= 0xff; \ + r0 &= 0x0f; \ + r0 *= r6; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b), + __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") __description("bounds checks after 32-bit truncation. test 1") __success __failure_unpriv __msg_unpriv("R0 leaks addr") __retval(0) diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c index 0ede0ccd09..059aa716e3 100644 --- a/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c +++ b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c @@ -30,7 +30,7 @@ struct { SEC("kprobe") __description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_KPROBE") -__failure __msg("unknown func bpf_ktime_get_coarse_ns") +__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns") __naked void in_bpf_prog_type_kprobe_1(void) { asm volatile (" \ @@ -44,7 +44,7 @@ __naked void in_bpf_prog_type_kprobe_1(void) SEC("tracepoint") __description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_TRACEPOINT") -__failure __msg("unknown func bpf_ktime_get_coarse_ns") +__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns") __naked void in_bpf_prog_type_tracepoint_1(void) { asm volatile (" \ @@ -58,7 +58,7 @@ __naked void in_bpf_prog_type_tracepoint_1(void) SEC("perf_event") __description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_PERF_EVENT") -__failure __msg("unknown func bpf_ktime_get_coarse_ns") +__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns") __naked void bpf_prog_type_perf_event_1(void) { asm volatile (" \ @@ -72,7 +72,7 @@ __naked void bpf_prog_type_perf_event_1(void) SEC("raw_tracepoint") __description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT") -__failure __msg("unknown func bpf_ktime_get_coarse_ns") +__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns") __naked void bpf_prog_type_raw_tracepoint_1(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c index 99e561f18f..80c737b6d3 100644 --- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -274,6 +274,58 @@ static __naked void iter_limit_bug_cb(void) ); } +int tmp_var; +SEC("socket") +__failure __msg("infinite loop detected at insn 2") +__naked void jgt_imm64_and_may_goto(void) +{ + asm volatile (" \ + r0 = %[tmp_var] ll; \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short -3; /* off -3 */ \ + .long 0; /* imm */ \ + if r0 > 10 goto l0_%=; \ + r0 = 0; \ + exit; \ +" :: __imm_addr(tmp_var) + : __clobber_all); +} + +SEC("socket") +__failure __msg("infinite loop detected at insn 1") +__naked void may_goto_self(void) +{ + asm volatile (" \ + r0 = *(u32 *)(r10 - 4); \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short -1; /* off -1 */ \ + .long 0; /* imm */ \ + if r0 > 10 goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__success __retval(0) +__naked void may_goto_neg_off(void) +{ + asm volatile (" \ + r0 = *(u32 *)(r10 - 4); \ + goto l0_%=; \ + goto l1_%=; \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short -2; /* off -2 */ \ + .long 0; /* imm */ \ + if r0 > 10 goto l0_%=; \ +l1_%=: r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + SEC("tc") __failure __flag(BPF_F_TEST_STATE_FREQ) @@ -307,6 +359,100 @@ int iter_limit_bug(struct __sk_buff *skb) return 0; } +SEC("socket") +__success __retval(0) +__naked void ja_and_may_goto(void) +{ + asm volatile (" \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_common); +} + +SEC("socket") +__success __retval(0) +__naked void ja_and_may_goto2(void) +{ + asm volatile (" \ +l0_%=: r0 = 0; \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_common); +} + +SEC("socket") +__success __retval(0) +__naked void jlt_and_may_goto(void) +{ + asm volatile (" \ +l0_%=: call %[bpf_jiffies64]; \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + if r0 < 10 goto l0_%=; \ + r0 = 0; \ + exit; \ +" :: __imm(bpf_jiffies64) + : __clobber_all); +} + +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \ + defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \ + defined(__TARGET_ARCH_loongarch)) && \ + __clang_major__ >= 18 +SEC("socket") +__success __retval(0) +__naked void gotol_and_may_goto(void) +{ + asm volatile (" \ +l0_%=: r0 = 0; \ + .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + gotol l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_common); +} +#endif + +SEC("socket") +__success __retval(0) +__naked void ja_and_may_goto_subprog(void) +{ + asm volatile (" \ + call subprog_with_may_goto; \ + exit; \ +" ::: __clobber_all); +} + +static __naked __noinline __used +void subprog_with_may_goto(void) +{ + asm volatile (" \ +l0_%=: .byte 0xe5; /* may_goto */ \ + .byte 0; /* regs */ \ + .short 1; /* off 1 */ \ + .long 0; /* imm */ \ + goto l0_%=; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + #define ARR_SZ 1000000 int zero; char arr[ARR_SZ]; @@ -318,7 +464,7 @@ int cond_break1(const void *ctx) unsigned long i; unsigned int sum = 0; - for (i = zero; i < ARR_SZ; cond_break, i++) + for (i = zero; i < ARR_SZ && can_loop; i++) sum += i; for (i = zero; i < ARR_SZ; i++) { barrier_var(i); @@ -336,12 +482,11 @@ int cond_break2(const void *ctx) int i, j; int sum = 0; - for (i = zero; i < 1000; cond_break, i++) + for (i = zero; i < 1000 && can_loop; i++) for (j = zero; j < 1000; j++) { sum += i + j; cond_break; - } - + } return sum; } @@ -349,7 +494,7 @@ static __noinline int loop(void) { int i, sum = 0; - for (i = zero; i <= 1000000; i++, cond_break) + for (i = zero; i <= 1000000 && can_loop; i++) sum += i; return sum; diff --git a/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c b/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c new file mode 100644 index 0000000000..cb32b0cfc8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" +#include "cgrp_kfunc_common.h" +#include "cpumask_common.h" +#include "task_kfunc_common.h" + +char _license[] SEC("license") = "GPL"; + +/*************** + * Task kfuncs * + ***************/ + +static void task_kfunc_load_test(void) +{ + struct task_struct *current, *ref_1, *ref_2; + + current = bpf_get_current_task_btf(); + ref_1 = bpf_task_from_pid(current->pid); + if (!ref_1) + return; + + ref_2 = bpf_task_acquire(ref_1); + if (ref_2) + bpf_task_release(ref_2); + bpf_task_release(ref_1); +} + +SEC("raw_tp") +__failure __msg("calling kernel function") +int BPF_PROG(task_kfunc_raw_tp) +{ + task_kfunc_load_test(); + return 0; +} + +SEC("syscall") +__success +int BPF_PROG(task_kfunc_syscall) +{ + task_kfunc_load_test(); + return 0; +} + +/***************** + * cgroup kfuncs * + *****************/ + +static void cgrp_kfunc_load_test(void) +{ + struct cgroup *cgrp, *ref; + + cgrp = bpf_cgroup_from_id(0); + if (!cgrp) + return; + + ref = bpf_cgroup_acquire(cgrp); + if (!ref) { + bpf_cgroup_release(cgrp); + return; + } + + bpf_cgroup_release(ref); + bpf_cgroup_release(cgrp); +} + +SEC("raw_tp") +__failure __msg("calling kernel function") +int BPF_PROG(cgrp_kfunc_raw_tp) +{ + cgrp_kfunc_load_test(); + return 0; +} + +SEC("syscall") +__success +int BPF_PROG(cgrp_kfunc_syscall) +{ + cgrp_kfunc_load_test(); + return 0; +} + +/****************** + * cpumask kfuncs * + ******************/ + +static void cpumask_kfunc_load_test(void) +{ + struct bpf_cpumask *alloc, *ref; + + alloc = bpf_cpumask_create(); + if (!alloc) + return; + + ref = bpf_cpumask_acquire(alloc); + bpf_cpumask_set_cpu(0, alloc); + bpf_cpumask_test_cpu(0, (const struct cpumask *)ref); + + bpf_cpumask_release(ref); + bpf_cpumask_release(alloc); +} + +SEC("raw_tp") +__failure __msg("calling kernel function") +int BPF_PROG(cpumask_kfunc_raw_tp) +{ + cpumask_kfunc_load_test(); + return 0; +} + +SEC("syscall") +__success +int BPF_PROG(cpumask_kfunc_syscall) +{ + cpumask_kfunc_load_test(); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index cbb9d6714f..028ec85558 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -224,6 +224,69 @@ l0_%=: \ : __clobber_all); } +SEC("socket") +__description("MOV32SX, S8, var_off u32_max") +__failure __msg("infinite loop detected") +__failure_unpriv __msg_unpriv("back-edge from insn 2 to 0") +__naked void mov64sx_s32_varoff_1(void) +{ + asm volatile (" \ +l0_%=: \ + r3 = *(u8 *)(r10 -387); \ + w7 = (s8)w3; \ + if w7 >= 0x2533823b goto l0_%=; \ + w0 = 0; \ + exit; \ +" : + : + : __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S8, var_off not u32_max, positive after s8 extension") +__success __retval(0) +__failure_unpriv __msg_unpriv("frame pointer is read only") +__naked void mov64sx_s32_varoff_2(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r3 = r0; \ + r3 &= 0xf; \ + w7 = (s8)w3; \ + if w7 s>= 16 goto l0_%=; \ + w0 = 0; \ + exit; \ +l0_%=: \ + r10 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S8, var_off not u32_max, negative after s8 extension") +__success __retval(0) +__failure_unpriv __msg_unpriv("frame pointer is read only") +__naked void mov64sx_s32_varoff_3(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r3 = r0; \ + r3 &= 0xf; \ + r3 |= 0x80; \ + w7 = (s8)w3; \ + if w7 s>= -5 goto l0_%=; \ + w0 = 0; \ + exit; \ +l0_%=: \ + r10 = 1; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + #else SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c new file mode 100644 index 0000000000..f37713a265 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +SEC("socket") +__description("or_jmp32_k: bit ops + branch on unknown value") +__failure +__msg("R0 invalid mem access 'scalar'") +__naked void or_jmp32_k(void) +{ + asm volatile (" \ + r0 = 0xffffffff; \ + r0 /= 1; \ + r1 = 0; \ + w1 = -1; \ + w1 >>= 1; \ + w0 &= w1; \ + w0 |= 2; \ + if w0 != 0x7ffffffd goto l1; \ + r0 = 1; \ + exit; \ +l3: \ + r0 = 5; \ + *(u64*)(r0 - 8) = r0; \ + exit; \ +l2: \ + w0 -= 0xe; \ + if w0 == 1 goto l3; \ + r0 = 4; \ + exit; \ +l1: \ + w0 -= 0x7ffffff0; \ + if w0 s>= 0xe goto l2; \ + r0 = 3; \ + exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_sock_addr.c b/tools/testing/selftests/bpf/progs/verifier_sock_addr.c new file mode 100644 index 0000000000..9c31448a0f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_sock_addr.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Google LLC */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf_sockopt_helpers.h> +#include "bpf_misc.h" + +SEC("cgroup/recvmsg4") +__success +int recvmsg4_good_return_code(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/recvmsg4") +__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]") +int recvmsg4_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/recvmsg6") +__success +int recvmsg6_good_return_code(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/recvmsg6") +__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]") +int recvmsg6_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/recvmsg_unix") +__success +int recvmsg_unix_good_return_code(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/recvmsg_unix") +__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]") +int recvmsg_unix_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/sendmsg4") +__success +int sendmsg4_good_return_code_0(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/sendmsg4") +__success +int sendmsg4_good_return_code_1(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/sendmsg4") +__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]") +int sendmsg4_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 2; +} + +SEC("cgroup/sendmsg6") +__success +int sendmsg6_good_return_code_0(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/sendmsg6") +__success +int sendmsg6_good_return_code_1(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/sendmsg6") +__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]") +int sendmsg6_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 2; +} + +SEC("cgroup/sendmsg_unix") +__success +int sendmsg_unix_good_return_code_0(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/sendmsg_unix") +__success +int sendmsg_unix_good_return_code_1(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/sendmsg_unix") +__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]") +int sendmsg_unix_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 2; +} + +SEC("cgroup/getpeername4") +__success +int getpeername4_good_return_code(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/getpeername4") +__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]") +int getpeername4_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/getpeername6") +__success +int getpeername6_good_return_code(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/getpeername6") +__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]") +int getpeername6_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/getpeername_unix") +__success +int getpeername_unix_good_return_code(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/getpeername_unix") +__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]") +int getpeername_unix_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/getsockname4") +__success +int getsockname4_good_return_code(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/getsockname4") +__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]") +int getsockname4_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/getsockname6") +__success +int getsockname6_good_return_code(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/getsockname6") +__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]") +int getsockname6_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/getsockname_unix") +__success +int getsockname_unix_good_return_code(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/getsockname_unix") +__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]") +int getsockname_unix_unix_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/bind4") +__success +int bind4_good_return_code_0(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/bind4") +__success +int bind4_good_return_code_1(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/bind4") +__success +int bind4_good_return_code_2(struct bpf_sock_addr *ctx) +{ + return 2; +} + +SEC("cgroup/bind4") +__success +int bind4_good_return_code_3(struct bpf_sock_addr *ctx) +{ + return 3; +} + +SEC("cgroup/bind4") +__failure __msg("At program exit the register R0 has smin=4 smax=4 should have been in [0, 3]") +int bind4_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 4; +} + +SEC("cgroup/bind6") +__success +int bind6_good_return_code_0(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/bind6") +__success +int bind6_good_return_code_1(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/bind6") +__success +int bind6_good_return_code_2(struct bpf_sock_addr *ctx) +{ + return 2; +} + +SEC("cgroup/bind6") +__success +int bind6_good_return_code_3(struct bpf_sock_addr *ctx) +{ + return 3; +} + +SEC("cgroup/bind6") +__failure __msg("At program exit the register R0 has smin=4 smax=4 should have been in [0, 3]") +int bind6_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 4; +} + +SEC("cgroup/connect4") +__success +int connect4_good_return_code_0(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/connect4") +__success +int connect4_good_return_code_1(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/connect4") +__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]") +int connect4_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 2; +} + +SEC("cgroup/connect6") +__success +int connect6_good_return_code_0(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/connect6") +__success +int connect6_good_return_code_1(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/connect6") +__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]") +int connect6_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 2; +} + +SEC("cgroup/connect_unix") +__success +int connect_unix_good_return_code_0(struct bpf_sock_addr *ctx) +{ + return 0; +} + +SEC("cgroup/connect_unix") +__success +int connect_unix_good_return_code_1(struct bpf_sock_addr *ctx) +{ + return 1; +} + +SEC("cgroup/connect_unix") +__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]") +int connect_unix_bad_return_code(struct bpf_sock_addr *ctx) +{ + return 2; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c b/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c new file mode 100644 index 0000000000..fe4b123187 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#include "bpf_misc.h" + +#define __always_unused __attribute__((unused)) + +char _license[] SEC("license") = "GPL"; + +struct sock { +} __attribute__((preserve_access_index)); + +struct bpf_iter__sockmap { + union { + struct sock *sk; + }; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} sockhash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} sockmap SEC(".maps"); + +enum { CG_OK = 1 }; + +int zero = 0; + +static __always_inline void test_sockmap_delete(void) +{ + bpf_map_delete_elem(&sockmap, &zero); + bpf_map_delete_elem(&sockhash, &zero); +} + +static __always_inline void test_sockmap_update(void *sk) +{ + if (sk) { + bpf_map_update_elem(&sockmap, &zero, sk, BPF_ANY); + bpf_map_update_elem(&sockhash, &zero, sk, BPF_ANY); + } +} + +static __always_inline void test_sockmap_lookup_and_update(void) +{ + struct bpf_sock *sk = bpf_map_lookup_elem(&sockmap, &zero); + + if (sk) { + test_sockmap_update(sk); + bpf_sk_release(sk); + } +} + +static __always_inline void test_sockmap_mutate(void *sk) +{ + test_sockmap_delete(); + test_sockmap_update(sk); +} + +static __always_inline void test_sockmap_lookup_and_mutate(void) +{ + test_sockmap_delete(); + test_sockmap_lookup_and_update(); +} + +SEC("action") +__success +int test_sched_act(struct __sk_buff *skb) +{ + test_sockmap_mutate(skb->sk); + return 0; +} + +SEC("classifier") +__success +int test_sched_cls(struct __sk_buff *skb) +{ + test_sockmap_mutate(skb->sk); + return 0; +} + +SEC("flow_dissector") +__success +int test_flow_dissector_delete(struct __sk_buff *skb __always_unused) +{ + test_sockmap_delete(); + return 0; +} + +SEC("flow_dissector") +__failure __msg("program of this type cannot use helper bpf_sk_release") +int test_flow_dissector_update(struct __sk_buff *skb __always_unused) +{ + test_sockmap_lookup_and_update(); /* no access to skb->sk */ + return 0; +} + +SEC("iter/sockmap") +__success +int test_trace_iter(struct bpf_iter__sockmap *ctx) +{ + test_sockmap_mutate(ctx->sk); + return 0; +} + +SEC("raw_tp/kfree") +__failure __msg("cannot update sockmap in this context") +int test_raw_tp_delete(const void *ctx __always_unused) +{ + test_sockmap_delete(); + return 0; +} + +SEC("raw_tp/kfree") +__failure __msg("cannot update sockmap in this context") +int test_raw_tp_update(const void *ctx __always_unused) +{ + test_sockmap_lookup_and_update(); + return 0; +} + +SEC("sk_lookup") +__success +int test_sk_lookup(struct bpf_sk_lookup *ctx) +{ + test_sockmap_mutate(ctx->sk); + return 0; +} + +SEC("sk_reuseport") +__success +int test_sk_reuseport(struct sk_reuseport_md *ctx) +{ + test_sockmap_mutate(ctx->sk); + return 0; +} + +SEC("socket") +__success +int test_socket_filter(struct __sk_buff *skb) +{ + test_sockmap_mutate(skb->sk); + return 0; +} + +SEC("sockops") +__success +int test_sockops_delete(struct bpf_sock_ops *ctx __always_unused) +{ + test_sockmap_delete(); + return CG_OK; +} + +SEC("sockops") +__failure __msg("cannot update sockmap in this context") +int test_sockops_update(struct bpf_sock_ops *ctx) +{ + test_sockmap_update(ctx->sk); + return CG_OK; +} + +SEC("sockops") +__success +int test_sockops_update_dedicated(struct bpf_sock_ops *ctx) +{ + bpf_sock_map_update(ctx, &sockmap, &zero, BPF_ANY); + bpf_sock_hash_update(ctx, &sockhash, &zero, BPF_ANY); + return CG_OK; +} + +SEC("xdp") +__success +int test_xdp(struct xdp_md *ctx __always_unused) +{ + test_sockmap_lookup_and_mutate(); + return XDP_PASS; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index 6f5d19665c..4a58e0398e 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -6,6 +6,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include <../../../tools/include/linux/filter.h> #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) @@ -76,6 +77,94 @@ __naked int subprog_result_precise(void) ); } +__naked __noinline __used +static unsigned long fp_leaking_subprog() +{ + asm volatile ( + ".8byte %[r0_eq_r10_cast_s8];" + "exit;" + :: __imm_insn(r0_eq_r10_cast_s8, BPF_MOVSX64_REG(BPF_REG_0, BPF_REG_10, 8)) + ); +} + +__naked __noinline __used +static unsigned long sneaky_fp_leaking_subprog() +{ + asm volatile ( + "r1 = r10;" + ".8byte %[r0_eq_r1_cast_s8];" + "exit;" + :: __imm_insn(r0_eq_r1_cast_s8, BPF_MOVSX64_REG(BPF_REG_0, BPF_REG_1, 8)) + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("6: (0f) r1 += r0") +__msg("mark_precise: frame0: last_idx 6 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4") +__msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3") +__msg("mark_precise: frame0: regs=r0 stack= before 10: (95) exit") +__msg("mark_precise: frame1: regs=r0 stack= before 9: (bf) r0 = (s8)r10") +__msg("7: R0_w=scalar") +__naked int fp_precise_subprog_result(void) +{ + asm volatile ( + "call fp_leaking_subprog;" + /* use subprog's returned value (which is derived from r10=fp + * register), as index into vals array, forcing all of that to + * be known precisely + */ + "r0 &= 3;" + "r0 *= 4;" + "r1 = %[vals];" + /* force precision marking */ + "r1 += r0;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("6: (0f) r1 += r0") +__msg("mark_precise: frame0: last_idx 6 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4") +__msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3") +__msg("mark_precise: frame0: regs=r0 stack= before 11: (95) exit") +__msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = (s8)r1") +/* here r1 is marked precise, even though it's fp register, but that's fine + * because by the time we get out of subprogram it has to be derived from r10 + * anyways, at which point we'll break precision chain + */ +__msg("mark_precise: frame1: regs=r1 stack= before 9: (bf) r1 = r10") +__msg("7: R0_w=scalar") +__naked int sneaky_fp_precise_subprog_result(void) +{ + asm volatile ( + "call sneaky_fp_leaking_subprog;" + /* use subprog's returned value (which is derived from r10=fp + * register), as index into vals array, forcing all of that to + * be known precisely + */ + "r0 &= 3;" + "r0 *= 4;" + "r1 = %[vals];" + /* force precision marking */ + "r1 += r0;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common + ); +} + SEC("?raw_tp") __success __log_level(2) __msg("9: (0f) r1 += r0") diff --git a/tools/testing/selftests/bpf/progs/wq.c b/tools/testing/selftests/bpf/progs/wq.c new file mode 100644 index 0000000000..49e712acbf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/wq.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Benjamin Tissoires + */ + +#include "bpf_experimental.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "../bpf_testmod/bpf_testmod_kfunc.h" + +char _license[] SEC("license") = "GPL"; + +struct hmap_elem { + int counter; + struct bpf_timer timer; /* unused */ + struct bpf_spin_lock lock; /* unused */ + struct bpf_wq work; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1000); + __type(key, int); + __type(value, struct hmap_elem); +} hmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, 1000); + __type(key, int); + __type(value, struct hmap_elem); +} hmap_malloc SEC(".maps"); + +struct elem { + struct bpf_wq w; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, struct elem); +} array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 4); + __type(key, int); + __type(value, struct elem); +} lru SEC(".maps"); + +__u32 ok; +__u32 ok_sleepable; + +static int test_elem_callback(void *map, int *key, + int (callback_fn)(void *map, int *key, struct bpf_wq *wq)) +{ + struct elem init = {}, *val; + struct bpf_wq *wq; + + if ((ok & (1 << *key) || + (ok_sleepable & (1 << *key)))) + return -22; + + if (map == &lru && + bpf_map_update_elem(map, key, &init, 0)) + return -1; + + val = bpf_map_lookup_elem(map, key); + if (!val) + return -2; + + wq = &val->w; + if (bpf_wq_init(wq, map, 0) != 0) + return -3; + + if (bpf_wq_set_callback(wq, callback_fn, 0)) + return -4; + + if (bpf_wq_start(wq, 0)) + return -5; + + return 0; +} + +static int test_hmap_elem_callback(void *map, int *key, + int (callback_fn)(void *map, int *key, struct bpf_wq *wq)) +{ + struct hmap_elem init = {}, *val; + struct bpf_wq *wq; + + if ((ok & (1 << *key) || + (ok_sleepable & (1 << *key)))) + return -22; + + if (bpf_map_update_elem(map, key, &init, 0)) + return -1; + + val = bpf_map_lookup_elem(map, key); + if (!val) + return -2; + + wq = &val->work; + if (bpf_wq_init(wq, map, 0) != 0) + return -3; + + if (bpf_wq_set_callback(wq, callback_fn, 0)) + return -4; + + if (bpf_wq_start(wq, 0)) + return -5; + + return 0; +} + +/* callback for non sleepable workqueue */ +static int wq_callback(void *map, int *key, struct bpf_wq *work) +{ + bpf_kfunc_common_test(); + ok |= (1 << *key); + return 0; +} + +/* callback for sleepable workqueue */ +static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work) +{ + bpf_kfunc_call_test_sleepable(); + ok_sleepable |= (1 << *key); + return 0; +} + +SEC("tc") +/* test that workqueues can be used from an array */ +__retval(0) +long test_call_array_sleepable(void *ctx) +{ + int key = 0; + + return test_elem_callback(&array, &key, wq_cb_sleepable); +} + +SEC("syscall") +/* Same test than above but from a sleepable context. */ +__retval(0) +long test_syscall_array_sleepable(void *ctx) +{ + int key = 1; + + return test_elem_callback(&array, &key, wq_cb_sleepable); +} + +SEC("tc") +/* test that workqueues can be used from a hashmap */ +__retval(0) +long test_call_hash_sleepable(void *ctx) +{ + int key = 2; + + return test_hmap_elem_callback(&hmap, &key, wq_callback); +} + +SEC("tc") +/* test that workqueues can be used from a hashmap with NO_PREALLOC. */ +__retval(0) +long test_call_hash_malloc_sleepable(void *ctx) +{ + int key = 3; + + return test_hmap_elem_callback(&hmap_malloc, &key, wq_callback); +} + +SEC("tc") +/* test that workqueues can be used from a LRU map */ +__retval(0) +long test_call_lru_sleepable(void *ctx) +{ + int key = 4; + + return test_elem_callback(&lru, &key, wq_callback); +} diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c new file mode 100644 index 0000000000..4cbdb425f2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/wq_failures.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Benjamin Tissoires + */ + +#include "bpf_experimental.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "../bpf_testmod/bpf_testmod_kfunc.h" + +char _license[] SEC("license") = "GPL"; + +struct elem { + struct bpf_wq w; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, struct elem); +} array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 4); + __type(key, int); + __type(value, struct elem); +} lru SEC(".maps"); + +/* callback for non sleepable workqueue */ +static int wq_callback(void *map, int *key, struct bpf_wq *work) +{ + bpf_kfunc_common_test(); + return 0; +} + +/* callback for sleepable workqueue */ +static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work) +{ + bpf_kfunc_call_test_sleepable(); + return 0; +} + +SEC("tc") +/* test that bpf_wq_init takes a map as a second argument + */ +__log_level(2) +__flag(BPF_F_TEST_STATE_FREQ) +__failure +__msg(": (85) call bpf_wq_init#") /* anchor message */ +__msg("pointer in R2 isn't map pointer") +long test_wq_init_nomap(void *ctx) +{ + struct bpf_wq *wq; + struct elem *val; + int key = 0; + + val = bpf_map_lookup_elem(&array, &key); + if (!val) + return -1; + + wq = &val->w; + if (bpf_wq_init(wq, &key, 0) != 0) + return -3; + + return 0; +} + +SEC("tc") +/* test that the workqueue is part of the map in bpf_wq_init + */ +__log_level(2) +__flag(BPF_F_TEST_STATE_FREQ) +__failure +__msg(": (85) call bpf_wq_init#") /* anchor message */ +__msg("workqueue pointer in R1 map_uid=0 doesn't match map pointer in R2 map_uid=0") +long test_wq_init_wrong_map(void *ctx) +{ + struct bpf_wq *wq; + struct elem *val; + int key = 0; + + val = bpf_map_lookup_elem(&array, &key); + if (!val) + return -1; + + wq = &val->w; + if (bpf_wq_init(wq, &lru, 0) != 0) + return -3; + + return 0; +} + +SEC("?tc") +__log_level(2) +__failure +/* check that the first argument of bpf_wq_set_callback() + * is a correct bpf_wq pointer. + */ +__msg(": (85) call bpf_wq_set_callback_impl#") /* anchor message */ +__msg("arg#0 doesn't point to a map value") +long test_wrong_wq_pointer(void *ctx) +{ + int key = 0; + struct bpf_wq *wq; + + wq = bpf_map_lookup_elem(&array, &key); + if (!wq) + return 1; + + if (bpf_wq_init(wq, &array, 0)) + return 2; + + if (bpf_wq_set_callback((void *)&wq, wq_callback, 0)) + return 3; + + return -22; +} + +SEC("?tc") +__log_level(2) +__failure +/* check that the first argument of bpf_wq_set_callback() + * is a correct bpf_wq pointer. + */ +__msg(": (85) call bpf_wq_set_callback_impl#") /* anchor message */ +__msg("off 1 doesn't point to 'struct bpf_wq' that is at 0") +long test_wrong_wq_pointer_offset(void *ctx) +{ + int key = 0; + struct bpf_wq *wq; + + wq = bpf_map_lookup_elem(&array, &key); + if (!wq) + return 1; + + if (bpf_wq_init(wq, &array, 0)) + return 2; + + if (bpf_wq_set_callback((void *)wq + 1, wq_cb_sleepable, 0)) + return 3; + + return -22; +} diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp index f4936834f7..dde0bb16e7 100644 --- a/tools/testing/selftests/bpf/test_cpp.cpp +++ b/tools/testing/selftests/bpf/test_cpp.cpp @@ -7,6 +7,7 @@ #include <bpf/bpf.h> #include <bpf/btf.h> #include "test_core_extern.skel.h" +#include "struct_ops_module.skel.h" template <typename T> class Skeleton { @@ -98,6 +99,7 @@ int main(int argc, char *argv[]) { struct btf_dump_opts opts = { }; struct test_core_extern *skel; + struct struct_ops_module *skel2; struct btf *btf; int fd; @@ -118,6 +120,9 @@ int main(int argc, char *argv[]) skel = test_core_extern__open_and_load(); test_core_extern__destroy(skel); + skel2 = struct_ops_module__open_and_load(); + struct_ops_module__destroy(skel2); + fd = bpf_enable_stats(BPF_STATS_RUN_TIME); if (fd < 0) std::cout << "FAILED to enable stats: " << fd << std::endl; diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c deleted file mode 100644 index 80c42583f5..0000000000 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ /dev/null @@ -1,1434 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2018 Facebook - -#define _GNU_SOURCE - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> - -#include <arpa/inet.h> -#include <netinet/in.h> -#include <sys/types.h> -#include <sys/select.h> -#include <sys/socket.h> - -#include <linux/filter.h> - -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include "cgroup_helpers.h" -#include "testing_helpers.h" -#include "bpf_util.h" - -#ifndef ENOTSUPP -# define ENOTSUPP 524 -#endif - -#define CG_PATH "/foo" -#define CONNECT4_PROG_PATH "./connect4_prog.bpf.o" -#define CONNECT6_PROG_PATH "./connect6_prog.bpf.o" -#define SENDMSG4_PROG_PATH "./sendmsg4_prog.bpf.o" -#define SENDMSG6_PROG_PATH "./sendmsg6_prog.bpf.o" -#define RECVMSG4_PROG_PATH "./recvmsg4_prog.bpf.o" -#define RECVMSG6_PROG_PATH "./recvmsg6_prog.bpf.o" -#define BIND4_PROG_PATH "./bind4_prog.bpf.o" -#define BIND6_PROG_PATH "./bind6_prog.bpf.o" - -#define SERV4_IP "192.168.1.254" -#define SERV4_REWRITE_IP "127.0.0.1" -#define SRC4_IP "172.16.0.1" -#define SRC4_REWRITE_IP "127.0.0.4" -#define SERV4_PORT 4040 -#define SERV4_REWRITE_PORT 4444 - -#define SERV6_IP "face:b00c:1234:5678::abcd" -#define SERV6_REWRITE_IP "::1" -#define SERV6_V4MAPPED_IP "::ffff:192.168.0.4" -#define SRC6_IP "::1" -#define SRC6_REWRITE_IP "::6" -#define WILDCARD6_IP "::" -#define SERV6_PORT 6060 -#define SERV6_REWRITE_PORT 6666 - -#define INET_NTOP_BUF 40 - -struct sock_addr_test; - -typedef int (*load_fn)(const struct sock_addr_test *test); -typedef int (*info_fn)(int, struct sockaddr *, socklen_t *); - -char bpf_log_buf[BPF_LOG_BUF_SIZE]; - -struct sock_addr_test { - const char *descr; - /* BPF prog properties */ - load_fn loadfn; - enum bpf_attach_type expected_attach_type; - enum bpf_attach_type attach_type; - /* Socket properties */ - int domain; - int type; - /* IP:port pairs for BPF prog to override */ - const char *requested_ip; - unsigned short requested_port; - const char *expected_ip; - unsigned short expected_port; - const char *expected_src_ip; - /* Expected test result */ - enum { - LOAD_REJECT, - ATTACH_REJECT, - ATTACH_OKAY, - SYSCALL_EPERM, - SYSCALL_ENOTSUPP, - SUCCESS, - } expected_result; -}; - -static int bind4_prog_load(const struct sock_addr_test *test); -static int bind6_prog_load(const struct sock_addr_test *test); -static int connect4_prog_load(const struct sock_addr_test *test); -static int connect6_prog_load(const struct sock_addr_test *test); -static int sendmsg_allow_prog_load(const struct sock_addr_test *test); -static int sendmsg_deny_prog_load(const struct sock_addr_test *test); -static int recvmsg_allow_prog_load(const struct sock_addr_test *test); -static int recvmsg_deny_prog_load(const struct sock_addr_test *test); -static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test); -static int recvmsg4_rw_c_prog_load(const struct sock_addr_test *test); -static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test); -static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test); -static int recvmsg6_rw_c_prog_load(const struct sock_addr_test *test); -static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test); -static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test); -static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test); - -static struct sock_addr_test tests[] = { - /* bind */ - { - "bind4: load prog with wrong expected attach type", - bind4_prog_load, - BPF_CGROUP_INET6_BIND, - BPF_CGROUP_INET4_BIND, - AF_INET, - SOCK_STREAM, - NULL, - 0, - NULL, - 0, - NULL, - LOAD_REJECT, - }, - { - "bind4: attach prog with wrong attach type", - bind4_prog_load, - BPF_CGROUP_INET4_BIND, - BPF_CGROUP_INET6_BIND, - AF_INET, - SOCK_STREAM, - NULL, - 0, - NULL, - 0, - NULL, - ATTACH_REJECT, - }, - { - "bind4: rewrite IP & TCP port in", - bind4_prog_load, - BPF_CGROUP_INET4_BIND, - BPF_CGROUP_INET4_BIND, - AF_INET, - SOCK_STREAM, - SERV4_IP, - SERV4_PORT, - SERV4_REWRITE_IP, - SERV4_REWRITE_PORT, - NULL, - SUCCESS, - }, - { - "bind4: rewrite IP & UDP port in", - bind4_prog_load, - BPF_CGROUP_INET4_BIND, - BPF_CGROUP_INET4_BIND, - AF_INET, - SOCK_DGRAM, - SERV4_IP, - SERV4_PORT, - SERV4_REWRITE_IP, - SERV4_REWRITE_PORT, - NULL, - SUCCESS, - }, - { - "bind6: load prog with wrong expected attach type", - bind6_prog_load, - BPF_CGROUP_INET4_BIND, - BPF_CGROUP_INET6_BIND, - AF_INET6, - SOCK_STREAM, - NULL, - 0, - NULL, - 0, - NULL, - LOAD_REJECT, - }, - { - "bind6: attach prog with wrong attach type", - bind6_prog_load, - BPF_CGROUP_INET6_BIND, - BPF_CGROUP_INET4_BIND, - AF_INET, - SOCK_STREAM, - NULL, - 0, - NULL, - 0, - NULL, - ATTACH_REJECT, - }, - { - "bind6: rewrite IP & TCP port in", - bind6_prog_load, - BPF_CGROUP_INET6_BIND, - BPF_CGROUP_INET6_BIND, - AF_INET6, - SOCK_STREAM, - SERV6_IP, - SERV6_PORT, - SERV6_REWRITE_IP, - SERV6_REWRITE_PORT, - NULL, - SUCCESS, - }, - { - "bind6: rewrite IP & UDP port in", - bind6_prog_load, - BPF_CGROUP_INET6_BIND, - BPF_CGROUP_INET6_BIND, - AF_INET6, - SOCK_DGRAM, - SERV6_IP, - SERV6_PORT, - SERV6_REWRITE_IP, - SERV6_REWRITE_PORT, - NULL, - SUCCESS, - }, - - /* connect */ - { - "connect4: load prog with wrong expected attach type", - connect4_prog_load, - BPF_CGROUP_INET6_CONNECT, - BPF_CGROUP_INET4_CONNECT, - AF_INET, - SOCK_STREAM, - NULL, - 0, - NULL, - 0, - NULL, - LOAD_REJECT, - }, - { - "connect4: attach prog with wrong attach type", - connect4_prog_load, - BPF_CGROUP_INET4_CONNECT, - BPF_CGROUP_INET6_CONNECT, - AF_INET, - SOCK_STREAM, - NULL, - 0, - NULL, - 0, - NULL, - ATTACH_REJECT, - }, - { - "connect4: rewrite IP & TCP port", - connect4_prog_load, - BPF_CGROUP_INET4_CONNECT, - BPF_CGROUP_INET4_CONNECT, - AF_INET, - SOCK_STREAM, - SERV4_IP, - SERV4_PORT, - SERV4_REWRITE_IP, - SERV4_REWRITE_PORT, - SRC4_REWRITE_IP, - SUCCESS, - }, - { - "connect4: rewrite IP & UDP port", - connect4_prog_load, - BPF_CGROUP_INET4_CONNECT, - BPF_CGROUP_INET4_CONNECT, - AF_INET, - SOCK_DGRAM, - SERV4_IP, - SERV4_PORT, - SERV4_REWRITE_IP, - SERV4_REWRITE_PORT, - SRC4_REWRITE_IP, - SUCCESS, - }, - { - "connect6: load prog with wrong expected attach type", - connect6_prog_load, - BPF_CGROUP_INET4_CONNECT, - BPF_CGROUP_INET6_CONNECT, - AF_INET6, - SOCK_STREAM, - NULL, - 0, - NULL, - 0, - NULL, - LOAD_REJECT, - }, - { - "connect6: attach prog with wrong attach type", - connect6_prog_load, - BPF_CGROUP_INET6_CONNECT, - BPF_CGROUP_INET4_CONNECT, - AF_INET, - SOCK_STREAM, - NULL, - 0, - NULL, - 0, - NULL, - ATTACH_REJECT, - }, - { - "connect6: rewrite IP & TCP port", - connect6_prog_load, - BPF_CGROUP_INET6_CONNECT, - BPF_CGROUP_INET6_CONNECT, - AF_INET6, - SOCK_STREAM, - SERV6_IP, - SERV6_PORT, - SERV6_REWRITE_IP, - SERV6_REWRITE_PORT, - SRC6_REWRITE_IP, - SUCCESS, - }, - { - "connect6: rewrite IP & UDP port", - connect6_prog_load, - BPF_CGROUP_INET6_CONNECT, - BPF_CGROUP_INET6_CONNECT, - AF_INET6, - SOCK_DGRAM, - SERV6_IP, - SERV6_PORT, - SERV6_REWRITE_IP, - SERV6_REWRITE_PORT, - SRC6_REWRITE_IP, - SUCCESS, - }, - - /* sendmsg */ - { - "sendmsg4: load prog with wrong expected attach type", - sendmsg4_rw_asm_prog_load, - BPF_CGROUP_UDP6_SENDMSG, - BPF_CGROUP_UDP4_SENDMSG, - AF_INET, - SOCK_DGRAM, - NULL, - 0, - NULL, - 0, - NULL, - LOAD_REJECT, - }, - { - "sendmsg4: attach prog with wrong attach type", - sendmsg4_rw_asm_prog_load, - BPF_CGROUP_UDP4_SENDMSG, - BPF_CGROUP_UDP6_SENDMSG, - AF_INET, - SOCK_DGRAM, - NULL, - 0, - NULL, - 0, - NULL, - ATTACH_REJECT, - }, - { - "sendmsg4: rewrite IP & port (asm)", - sendmsg4_rw_asm_prog_load, - BPF_CGROUP_UDP4_SENDMSG, - BPF_CGROUP_UDP4_SENDMSG, - AF_INET, - SOCK_DGRAM, - SERV4_IP, - SERV4_PORT, - SERV4_REWRITE_IP, - SERV4_REWRITE_PORT, - SRC4_REWRITE_IP, - SUCCESS, - }, - { - "sendmsg4: rewrite IP & port (C)", - sendmsg4_rw_c_prog_load, - BPF_CGROUP_UDP4_SENDMSG, - BPF_CGROUP_UDP4_SENDMSG, - AF_INET, - SOCK_DGRAM, - SERV4_IP, - SERV4_PORT, - SERV4_REWRITE_IP, - SERV4_REWRITE_PORT, - SRC4_REWRITE_IP, - SUCCESS, - }, - { - "sendmsg4: deny call", - sendmsg_deny_prog_load, - BPF_CGROUP_UDP4_SENDMSG, - BPF_CGROUP_UDP4_SENDMSG, - AF_INET, - SOCK_DGRAM, - SERV4_IP, - SERV4_PORT, - SERV4_REWRITE_IP, - SERV4_REWRITE_PORT, - SRC4_REWRITE_IP, - SYSCALL_EPERM, - }, - { - "sendmsg6: load prog with wrong expected attach type", - sendmsg6_rw_asm_prog_load, - BPF_CGROUP_UDP4_SENDMSG, - BPF_CGROUP_UDP6_SENDMSG, - AF_INET6, - SOCK_DGRAM, - NULL, - 0, - NULL, - 0, - NULL, - LOAD_REJECT, - }, - { - "sendmsg6: attach prog with wrong attach type", - sendmsg6_rw_asm_prog_load, - BPF_CGROUP_UDP6_SENDMSG, - BPF_CGROUP_UDP4_SENDMSG, - AF_INET6, - SOCK_DGRAM, - NULL, - 0, - NULL, - 0, - NULL, - ATTACH_REJECT, - }, - { - "sendmsg6: rewrite IP & port (asm)", - sendmsg6_rw_asm_prog_load, - BPF_CGROUP_UDP6_SENDMSG, - BPF_CGROUP_UDP6_SENDMSG, - AF_INET6, - SOCK_DGRAM, - SERV6_IP, - SERV6_PORT, - SERV6_REWRITE_IP, - SERV6_REWRITE_PORT, - SRC6_REWRITE_IP, - SUCCESS, - }, - { - "sendmsg6: rewrite IP & port (C)", - sendmsg6_rw_c_prog_load, - BPF_CGROUP_UDP6_SENDMSG, - BPF_CGROUP_UDP6_SENDMSG, - AF_INET6, - SOCK_DGRAM, - SERV6_IP, - SERV6_PORT, - SERV6_REWRITE_IP, - SERV6_REWRITE_PORT, - SRC6_REWRITE_IP, - SUCCESS, - }, - { - "sendmsg6: IPv4-mapped IPv6", - sendmsg6_rw_v4mapped_prog_load, - BPF_CGROUP_UDP6_SENDMSG, - BPF_CGROUP_UDP6_SENDMSG, - AF_INET6, - SOCK_DGRAM, - SERV6_IP, - SERV6_PORT, - SERV6_REWRITE_IP, - SERV6_REWRITE_PORT, - SRC6_REWRITE_IP, - SYSCALL_ENOTSUPP, - }, - { - "sendmsg6: set dst IP = [::] (BSD'ism)", - sendmsg6_rw_wildcard_prog_load, - BPF_CGROUP_UDP6_SENDMSG, - BPF_CGROUP_UDP6_SENDMSG, - AF_INET6, - SOCK_DGRAM, - SERV6_IP, - SERV6_PORT, - SERV6_REWRITE_IP, - SERV6_REWRITE_PORT, - SRC6_REWRITE_IP, - SUCCESS, - }, - { - "sendmsg6: preserve dst IP = [::] (BSD'ism)", - sendmsg_allow_prog_load, - BPF_CGROUP_UDP6_SENDMSG, - BPF_CGROUP_UDP6_SENDMSG, - AF_INET6, - SOCK_DGRAM, - WILDCARD6_IP, - SERV6_PORT, - SERV6_REWRITE_IP, - SERV6_PORT, - SRC6_IP, - SUCCESS, - }, - { - "sendmsg6: deny call", - sendmsg_deny_prog_load, - BPF_CGROUP_UDP6_SENDMSG, - BPF_CGROUP_UDP6_SENDMSG, - AF_INET6, - SOCK_DGRAM, - SERV6_IP, - SERV6_PORT, - SERV6_REWRITE_IP, - SERV6_REWRITE_PORT, - SRC6_REWRITE_IP, - SYSCALL_EPERM, - }, - - /* recvmsg */ - { - "recvmsg4: return code ok", - recvmsg_allow_prog_load, - BPF_CGROUP_UDP4_RECVMSG, - BPF_CGROUP_UDP4_RECVMSG, - AF_INET, - SOCK_DGRAM, - NULL, - 0, - NULL, - 0, - NULL, - ATTACH_OKAY, - }, - { - "recvmsg4: return code !ok", - recvmsg_deny_prog_load, - BPF_CGROUP_UDP4_RECVMSG, - BPF_CGROUP_UDP4_RECVMSG, - AF_INET, - SOCK_DGRAM, - NULL, - 0, - NULL, - 0, - NULL, - LOAD_REJECT, - }, - { - "recvmsg6: return code ok", - recvmsg_allow_prog_load, - BPF_CGROUP_UDP6_RECVMSG, - BPF_CGROUP_UDP6_RECVMSG, - AF_INET6, - SOCK_DGRAM, - NULL, - 0, - NULL, - 0, - NULL, - ATTACH_OKAY, - }, - { - "recvmsg6: return code !ok", - recvmsg_deny_prog_load, - BPF_CGROUP_UDP6_RECVMSG, - BPF_CGROUP_UDP6_RECVMSG, - AF_INET6, - SOCK_DGRAM, - NULL, - 0, - NULL, - 0, - NULL, - LOAD_REJECT, - }, - { - "recvmsg4: rewrite IP & port (C)", - recvmsg4_rw_c_prog_load, - BPF_CGROUP_UDP4_RECVMSG, - BPF_CGROUP_UDP4_RECVMSG, - AF_INET, - SOCK_DGRAM, - SERV4_REWRITE_IP, - SERV4_REWRITE_PORT, - SERV4_REWRITE_IP, - SERV4_REWRITE_PORT, - SERV4_IP, - SUCCESS, - }, - { - "recvmsg6: rewrite IP & port (C)", - recvmsg6_rw_c_prog_load, - BPF_CGROUP_UDP6_RECVMSG, - BPF_CGROUP_UDP6_RECVMSG, - AF_INET6, - SOCK_DGRAM, - SERV6_REWRITE_IP, - SERV6_REWRITE_PORT, - SERV6_REWRITE_IP, - SERV6_REWRITE_PORT, - SERV6_IP, - SUCCESS, - }, -}; - -static int mk_sockaddr(int domain, const char *ip, unsigned short port, - struct sockaddr *addr, socklen_t addr_len) -{ - struct sockaddr_in6 *addr6; - struct sockaddr_in *addr4; - - if (domain != AF_INET && domain != AF_INET6) { - log_err("Unsupported address family"); - return -1; - } - - memset(addr, 0, addr_len); - - if (domain == AF_INET) { - if (addr_len < sizeof(struct sockaddr_in)) - return -1; - addr4 = (struct sockaddr_in *)addr; - addr4->sin_family = domain; - addr4->sin_port = htons(port); - if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1) { - log_err("Invalid IPv4: %s", ip); - return -1; - } - } else if (domain == AF_INET6) { - if (addr_len < sizeof(struct sockaddr_in6)) - return -1; - addr6 = (struct sockaddr_in6 *)addr; - addr6->sin6_family = domain; - addr6->sin6_port = htons(port); - if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1) { - log_err("Invalid IPv6: %s", ip); - return -1; - } - } - - return 0; -} - -static int load_insns(const struct sock_addr_test *test, - const struct bpf_insn *insns, size_t insns_cnt) -{ - LIBBPF_OPTS(bpf_prog_load_opts, opts); - int ret; - - opts.expected_attach_type = test->expected_attach_type; - opts.log_buf = bpf_log_buf; - opts.log_size = BPF_LOG_BUF_SIZE; - - ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, NULL, "GPL", insns, insns_cnt, &opts); - if (ret < 0 && test->expected_result != LOAD_REJECT) { - log_err(">>> Loading program error.\n" - ">>> Verifier output:\n%s\n-------\n", bpf_log_buf); - } - - return ret; -} - -static int load_path(const struct sock_addr_test *test, const char *path) -{ - struct bpf_object *obj; - struct bpf_program *prog; - int err; - - obj = bpf_object__open_file(path, NULL); - err = libbpf_get_error(obj); - if (err) { - log_err(">>> Opening BPF object (%s) error.\n", path); - return -1; - } - - prog = bpf_object__next_program(obj, NULL); - if (!prog) - goto err_out; - - bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR); - bpf_program__set_expected_attach_type(prog, test->expected_attach_type); - bpf_program__set_flags(prog, testing_prog_flags()); - - err = bpf_object__load(obj); - if (err) { - if (test->expected_result != LOAD_REJECT) - log_err(">>> Loading program (%s) error.\n", path); - goto err_out; - } - - return bpf_program__fd(prog); -err_out: - bpf_object__close(obj); - return -1; -} - -static int bind4_prog_load(const struct sock_addr_test *test) -{ - return load_path(test, BIND4_PROG_PATH); -} - -static int bind6_prog_load(const struct sock_addr_test *test) -{ - return load_path(test, BIND6_PROG_PATH); -} - -static int connect4_prog_load(const struct sock_addr_test *test) -{ - return load_path(test, CONNECT4_PROG_PATH); -} - -static int connect6_prog_load(const struct sock_addr_test *test) -{ - return load_path(test, CONNECT6_PROG_PATH); -} - -static int xmsg_ret_only_prog_load(const struct sock_addr_test *test, - int32_t rc) -{ - struct bpf_insn insns[] = { - /* return rc */ - BPF_MOV64_IMM(BPF_REG_0, rc), - BPF_EXIT_INSN(), - }; - return load_insns(test, insns, ARRAY_SIZE(insns)); -} - -static int sendmsg_allow_prog_load(const struct sock_addr_test *test) -{ - return xmsg_ret_only_prog_load(test, /*rc*/ 1); -} - -static int sendmsg_deny_prog_load(const struct sock_addr_test *test) -{ - return xmsg_ret_only_prog_load(test, /*rc*/ 0); -} - -static int recvmsg_allow_prog_load(const struct sock_addr_test *test) -{ - return xmsg_ret_only_prog_load(test, /*rc*/ 1); -} - -static int recvmsg_deny_prog_load(const struct sock_addr_test *test) -{ - return xmsg_ret_only_prog_load(test, /*rc*/ 0); -} - -static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test) -{ - struct sockaddr_in dst4_rw_addr; - struct in_addr src4_rw_ip; - - if (inet_pton(AF_INET, SRC4_REWRITE_IP, (void *)&src4_rw_ip) != 1) { - log_err("Invalid IPv4: %s", SRC4_REWRITE_IP); - return -1; - } - - if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT, - (struct sockaddr *)&dst4_rw_addr, - sizeof(dst4_rw_addr)) == -1) - return -1; - - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - - /* if (sk.family == AF_INET && */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, family)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 8), - - /* sk.type == SOCK_DGRAM) { */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, type)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 6), - - /* msg_src_ip4 = src4_rw_ip */ - BPF_MOV32_IMM(BPF_REG_7, src4_rw_ip.s_addr), - BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, - offsetof(struct bpf_sock_addr, msg_src_ip4)), - - /* user_ip4 = dst4_rw_addr.sin_addr */ - BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_addr.s_addr), - BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, - offsetof(struct bpf_sock_addr, user_ip4)), - - /* user_port = dst4_rw_addr.sin_port */ - BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_port), - BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, - offsetof(struct bpf_sock_addr, user_port)), - /* } */ - - /* return 1 */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }; - - return load_insns(test, insns, ARRAY_SIZE(insns)); -} - -static int recvmsg4_rw_c_prog_load(const struct sock_addr_test *test) -{ - return load_path(test, RECVMSG4_PROG_PATH); -} - -static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test) -{ - return load_path(test, SENDMSG4_PROG_PATH); -} - -static int sendmsg6_rw_dst_asm_prog_load(const struct sock_addr_test *test, - const char *rw_dst_ip) -{ - struct sockaddr_in6 dst6_rw_addr; - struct in6_addr src6_rw_ip; - - if (inet_pton(AF_INET6, SRC6_REWRITE_IP, (void *)&src6_rw_ip) != 1) { - log_err("Invalid IPv6: %s", SRC6_REWRITE_IP); - return -1; - } - - if (mk_sockaddr(AF_INET6, rw_dst_ip, SERV6_REWRITE_PORT, - (struct sockaddr *)&dst6_rw_addr, - sizeof(dst6_rw_addr)) == -1) - return -1; - - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - - /* if (sk.family == AF_INET6) { */ - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock_addr, family)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18), - -#define STORE_IPV6_WORD_N(DST, SRC, N) \ - BPF_MOV32_IMM(BPF_REG_7, SRC[N]), \ - BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \ - offsetof(struct bpf_sock_addr, DST[N])) - -#define STORE_IPV6(DST, SRC) \ - STORE_IPV6_WORD_N(DST, SRC, 0), \ - STORE_IPV6_WORD_N(DST, SRC, 1), \ - STORE_IPV6_WORD_N(DST, SRC, 2), \ - STORE_IPV6_WORD_N(DST, SRC, 3) - - STORE_IPV6(msg_src_ip6, src6_rw_ip.s6_addr32), - STORE_IPV6(user_ip6, dst6_rw_addr.sin6_addr.s6_addr32), - - /* user_port = dst6_rw_addr.sin6_port */ - BPF_MOV32_IMM(BPF_REG_7, dst6_rw_addr.sin6_port), - BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, - offsetof(struct bpf_sock_addr, user_port)), - - /* } */ - - /* return 1 */ - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }; - - return load_insns(test, insns, ARRAY_SIZE(insns)); -} - -static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test) -{ - return sendmsg6_rw_dst_asm_prog_load(test, SERV6_REWRITE_IP); -} - -static int recvmsg6_rw_c_prog_load(const struct sock_addr_test *test) -{ - return load_path(test, RECVMSG6_PROG_PATH); -} - -static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test) -{ - return sendmsg6_rw_dst_asm_prog_load(test, SERV6_V4MAPPED_IP); -} - -static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test) -{ - return sendmsg6_rw_dst_asm_prog_load(test, WILDCARD6_IP); -} - -static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test) -{ - return load_path(test, SENDMSG6_PROG_PATH); -} - -static int cmp_addr(const struct sockaddr_storage *addr1, - const struct sockaddr_storage *addr2, int cmp_port) -{ - const struct sockaddr_in *four1, *four2; - const struct sockaddr_in6 *six1, *six2; - - if (addr1->ss_family != addr2->ss_family) - return -1; - - if (addr1->ss_family == AF_INET) { - four1 = (const struct sockaddr_in *)addr1; - four2 = (const struct sockaddr_in *)addr2; - return !((four1->sin_port == four2->sin_port || !cmp_port) && - four1->sin_addr.s_addr == four2->sin_addr.s_addr); - } else if (addr1->ss_family == AF_INET6) { - six1 = (const struct sockaddr_in6 *)addr1; - six2 = (const struct sockaddr_in6 *)addr2; - return !((six1->sin6_port == six2->sin6_port || !cmp_port) && - !memcmp(&six1->sin6_addr, &six2->sin6_addr, - sizeof(struct in6_addr))); - } - - return -1; -} - -static int cmp_sock_addr(info_fn fn, int sock1, - const struct sockaddr_storage *addr2, int cmp_port) -{ - struct sockaddr_storage addr1; - socklen_t len1 = sizeof(addr1); - - memset(&addr1, 0, len1); - if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0) - return -1; - - return cmp_addr(&addr1, addr2, cmp_port); -} - -static int cmp_local_ip(int sock1, const struct sockaddr_storage *addr2) -{ - return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 0); -} - -static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2) -{ - return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 1); -} - -static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2) -{ - return cmp_sock_addr(getpeername, sock1, addr2, /*cmp_port*/ 1); -} - -static int start_server(int type, const struct sockaddr_storage *addr, - socklen_t addr_len) -{ - int fd; - - fd = socket(addr->ss_family, type, 0); - if (fd == -1) { - log_err("Failed to create server socket"); - goto out; - } - - if (bind(fd, (const struct sockaddr *)addr, addr_len) == -1) { - log_err("Failed to bind server socket"); - goto close_out; - } - - if (type == SOCK_STREAM) { - if (listen(fd, 128) == -1) { - log_err("Failed to listen on server socket"); - goto close_out; - } - } - - goto out; -close_out: - close(fd); - fd = -1; -out: - return fd; -} - -static int connect_to_server(int type, const struct sockaddr_storage *addr, - socklen_t addr_len) -{ - int domain; - int fd = -1; - - domain = addr->ss_family; - - if (domain != AF_INET && domain != AF_INET6) { - log_err("Unsupported address family"); - goto err; - } - - fd = socket(domain, type, 0); - if (fd == -1) { - log_err("Failed to create client socket"); - goto err; - } - - if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) { - log_err("Fail to connect to server"); - goto err; - } - - goto out; -err: - close(fd); - fd = -1; -out: - return fd; -} - -int init_pktinfo(int domain, struct cmsghdr *cmsg) -{ - struct in6_pktinfo *pktinfo6; - struct in_pktinfo *pktinfo4; - - if (domain == AF_INET) { - cmsg->cmsg_level = SOL_IP; - cmsg->cmsg_type = IP_PKTINFO; - cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); - pktinfo4 = (struct in_pktinfo *)CMSG_DATA(cmsg); - memset(pktinfo4, 0, sizeof(struct in_pktinfo)); - if (inet_pton(domain, SRC4_IP, - (void *)&pktinfo4->ipi_spec_dst) != 1) - return -1; - } else if (domain == AF_INET6) { - cmsg->cmsg_level = SOL_IPV6; - cmsg->cmsg_type = IPV6_PKTINFO; - cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); - pktinfo6 = (struct in6_pktinfo *)CMSG_DATA(cmsg); - memset(pktinfo6, 0, sizeof(struct in6_pktinfo)); - if (inet_pton(domain, SRC6_IP, - (void *)&pktinfo6->ipi6_addr) != 1) - return -1; - } else { - return -1; - } - - return 0; -} - -static int sendmsg_to_server(int type, const struct sockaddr_storage *addr, - socklen_t addr_len, int set_cmsg, int flags, - int *syscall_err) -{ - union { - char buf[CMSG_SPACE(sizeof(struct in6_pktinfo))]; - struct cmsghdr align; - } control6; - union { - char buf[CMSG_SPACE(sizeof(struct in_pktinfo))]; - struct cmsghdr align; - } control4; - struct msghdr hdr; - struct iovec iov; - char data = 'a'; - int domain; - int fd = -1; - - domain = addr->ss_family; - - if (domain != AF_INET && domain != AF_INET6) { - log_err("Unsupported address family"); - goto err; - } - - fd = socket(domain, type, 0); - if (fd == -1) { - log_err("Failed to create client socket"); - goto err; - } - - memset(&iov, 0, sizeof(iov)); - iov.iov_base = &data; - iov.iov_len = sizeof(data); - - memset(&hdr, 0, sizeof(hdr)); - hdr.msg_name = (void *)addr; - hdr.msg_namelen = addr_len; - hdr.msg_iov = &iov; - hdr.msg_iovlen = 1; - - if (set_cmsg) { - if (domain == AF_INET) { - hdr.msg_control = &control4; - hdr.msg_controllen = sizeof(control4.buf); - } else if (domain == AF_INET6) { - hdr.msg_control = &control6; - hdr.msg_controllen = sizeof(control6.buf); - } - if (init_pktinfo(domain, CMSG_FIRSTHDR(&hdr))) { - log_err("Fail to init pktinfo"); - goto err; - } - } - - if (sendmsg(fd, &hdr, flags) != sizeof(data)) { - log_err("Fail to send message to server"); - *syscall_err = errno; - goto err; - } - - goto out; -err: - close(fd); - fd = -1; -out: - return fd; -} - -static int fastconnect_to_server(const struct sockaddr_storage *addr, - socklen_t addr_len) -{ - int sendmsg_err; - - return sendmsg_to_server(SOCK_STREAM, addr, addr_len, /*set_cmsg*/0, - MSG_FASTOPEN, &sendmsg_err); -} - -static int recvmsg_from_client(int sockfd, struct sockaddr_storage *src_addr) -{ - struct timeval tv; - struct msghdr hdr; - struct iovec iov; - char data[64]; - fd_set rfds; - - FD_ZERO(&rfds); - FD_SET(sockfd, &rfds); - - tv.tv_sec = 2; - tv.tv_usec = 0; - - if (select(sockfd + 1, &rfds, NULL, NULL, &tv) <= 0 || - !FD_ISSET(sockfd, &rfds)) - return -1; - - memset(&iov, 0, sizeof(iov)); - iov.iov_base = data; - iov.iov_len = sizeof(data); - - memset(&hdr, 0, sizeof(hdr)); - hdr.msg_name = src_addr; - hdr.msg_namelen = sizeof(struct sockaddr_storage); - hdr.msg_iov = &iov; - hdr.msg_iovlen = 1; - - return recvmsg(sockfd, &hdr, 0); -} - -static int init_addrs(const struct sock_addr_test *test, - struct sockaddr_storage *requested_addr, - struct sockaddr_storage *expected_addr, - struct sockaddr_storage *expected_src_addr) -{ - socklen_t addr_len = sizeof(struct sockaddr_storage); - - if (mk_sockaddr(test->domain, test->expected_ip, test->expected_port, - (struct sockaddr *)expected_addr, addr_len) == -1) - goto err; - - if (mk_sockaddr(test->domain, test->requested_ip, test->requested_port, - (struct sockaddr *)requested_addr, addr_len) == -1) - goto err; - - if (test->expected_src_ip && - mk_sockaddr(test->domain, test->expected_src_ip, 0, - (struct sockaddr *)expected_src_addr, addr_len) == -1) - goto err; - - return 0; -err: - return -1; -} - -static int run_bind_test_case(const struct sock_addr_test *test) -{ - socklen_t addr_len = sizeof(struct sockaddr_storage); - struct sockaddr_storage requested_addr; - struct sockaddr_storage expected_addr; - int clientfd = -1; - int servfd = -1; - int err = 0; - - if (init_addrs(test, &requested_addr, &expected_addr, NULL)) - goto err; - - servfd = start_server(test->type, &requested_addr, addr_len); - if (servfd == -1) - goto err; - - if (cmp_local_addr(servfd, &expected_addr)) - goto err; - - /* Try to connect to server just in case */ - clientfd = connect_to_server(test->type, &expected_addr, addr_len); - if (clientfd == -1) - goto err; - - goto out; -err: - err = -1; -out: - close(clientfd); - close(servfd); - return err; -} - -static int run_connect_test_case(const struct sock_addr_test *test) -{ - socklen_t addr_len = sizeof(struct sockaddr_storage); - struct sockaddr_storage expected_src_addr; - struct sockaddr_storage requested_addr; - struct sockaddr_storage expected_addr; - int clientfd = -1; - int servfd = -1; - int err = 0; - - if (init_addrs(test, &requested_addr, &expected_addr, - &expected_src_addr)) - goto err; - - /* Prepare server to connect to */ - servfd = start_server(test->type, &expected_addr, addr_len); - if (servfd == -1) - goto err; - - clientfd = connect_to_server(test->type, &requested_addr, addr_len); - if (clientfd == -1) - goto err; - - /* Make sure src and dst addrs were overridden properly */ - if (cmp_peer_addr(clientfd, &expected_addr)) - goto err; - - if (cmp_local_ip(clientfd, &expected_src_addr)) - goto err; - - if (test->type == SOCK_STREAM) { - /* Test TCP Fast Open scenario */ - clientfd = fastconnect_to_server(&requested_addr, addr_len); - if (clientfd == -1) - goto err; - - /* Make sure src and dst addrs were overridden properly */ - if (cmp_peer_addr(clientfd, &expected_addr)) - goto err; - - if (cmp_local_ip(clientfd, &expected_src_addr)) - goto err; - } - - goto out; -err: - err = -1; -out: - close(clientfd); - close(servfd); - return err; -} - -static int run_xmsg_test_case(const struct sock_addr_test *test, int max_cmsg) -{ - socklen_t addr_len = sizeof(struct sockaddr_storage); - struct sockaddr_storage expected_addr; - struct sockaddr_storage server_addr; - struct sockaddr_storage sendmsg_addr; - struct sockaddr_storage recvmsg_addr; - int clientfd = -1; - int servfd = -1; - int set_cmsg; - int err = 0; - - if (test->type != SOCK_DGRAM) - goto err; - - if (init_addrs(test, &sendmsg_addr, &server_addr, &expected_addr)) - goto err; - - /* Prepare server to sendmsg to */ - servfd = start_server(test->type, &server_addr, addr_len); - if (servfd == -1) - goto err; - - for (set_cmsg = 0; set_cmsg <= max_cmsg; ++set_cmsg) { - if (clientfd >= 0) - close(clientfd); - - clientfd = sendmsg_to_server(test->type, &sendmsg_addr, - addr_len, set_cmsg, /*flags*/0, - &err); - if (err) - goto out; - else if (clientfd == -1) - goto err; - - /* Try to receive message on server instead of using - * getpeername(2) on client socket, to check that client's - * destination address was rewritten properly, since - * getpeername(2) doesn't work with unconnected datagram - * sockets. - * - * Get source address from recvmsg(2) as well to make sure - * source was rewritten properly: getsockname(2) can't be used - * since socket is unconnected and source defined for one - * specific packet may differ from the one used by default and - * returned by getsockname(2). - */ - if (recvmsg_from_client(servfd, &recvmsg_addr) == -1) - goto err; - - if (cmp_addr(&recvmsg_addr, &expected_addr, /*cmp_port*/0)) - goto err; - } - - goto out; -err: - err = -1; -out: - close(clientfd); - close(servfd); - return err; -} - -static int run_test_case(int cgfd, const struct sock_addr_test *test) -{ - int progfd = -1; - int err = 0; - - printf("Test case: %s .. ", test->descr); - - progfd = test->loadfn(test); - if (test->expected_result == LOAD_REJECT && progfd < 0) - goto out; - else if (test->expected_result == LOAD_REJECT || progfd < 0) - goto err; - - err = bpf_prog_attach(progfd, cgfd, test->attach_type, - BPF_F_ALLOW_OVERRIDE); - if (test->expected_result == ATTACH_REJECT && err) { - err = 0; /* error was expected, reset it */ - goto out; - } else if (test->expected_result == ATTACH_REJECT || err) { - goto err; - } else if (test->expected_result == ATTACH_OKAY) { - err = 0; - goto out; - } - - switch (test->attach_type) { - case BPF_CGROUP_INET4_BIND: - case BPF_CGROUP_INET6_BIND: - err = run_bind_test_case(test); - break; - case BPF_CGROUP_INET4_CONNECT: - case BPF_CGROUP_INET6_CONNECT: - err = run_connect_test_case(test); - break; - case BPF_CGROUP_UDP4_SENDMSG: - case BPF_CGROUP_UDP6_SENDMSG: - err = run_xmsg_test_case(test, 1); - break; - case BPF_CGROUP_UDP4_RECVMSG: - case BPF_CGROUP_UDP6_RECVMSG: - err = run_xmsg_test_case(test, 0); - break; - default: - goto err; - } - - if (test->expected_result == SYSCALL_EPERM && err == EPERM) { - err = 0; /* error was expected, reset it */ - goto out; - } - - if (test->expected_result == SYSCALL_ENOTSUPP && err == ENOTSUPP) { - err = 0; /* error was expected, reset it */ - goto out; - } - - if (err || test->expected_result != SUCCESS) - goto err; - - goto out; -err: - err = -1; -out: - /* Detaching w/o checking return code: best effort attempt. */ - if (progfd != -1) - bpf_prog_detach(cgfd, test->attach_type); - close(progfd); - printf("[%s]\n", err ? "FAIL" : "PASS"); - return err; -} - -static int run_tests(int cgfd) -{ - int passes = 0; - int fails = 0; - int i; - - for (i = 0; i < ARRAY_SIZE(tests); ++i) { - if (run_test_case(cgfd, &tests[i])) - ++fails; - else - ++passes; - } - printf("Summary: %d PASSED, %d FAILED\n", passes, fails); - return fails ? -1 : 0; -} - -int main(int argc, char **argv) -{ - int cgfd = -1; - int err = 0; - - if (argc < 2) { - fprintf(stderr, - "%s has to be run via %s.sh. Skip direct run.\n", - argv[0], argv[0]); - exit(err); - } - - cgfd = cgroup_setup_and_join(CG_PATH); - if (cgfd < 0) - goto err; - - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - - if (run_tests(cgfd)) - goto err; - - goto out; -err: - err = -1; -out: - close(cgfd); - cleanup_cgroup_environment(); - return err; -} diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh deleted file mode 100755 index 3b9fdb8094..0000000000 --- a/tools/testing/selftests/bpf/test_sock_addr.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/sh - -set -eu - -ping_once() -{ - type ping${1} >/dev/null 2>&1 && PING="ping${1}" || PING="ping -${1}" - $PING -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1 -} - -wait_for_ip() -{ - local _i - echo -n "Wait for testing IPv4/IPv6 to become available " - for _i in $(seq ${MAX_PING_TRIES}); do - echo -n "." - if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then - echo " OK" - return - fi - done - echo 1>&2 "ERROR: Timeout waiting for test IP to become available." - exit 1 -} - -setup() -{ - # Create testing interfaces not to interfere with current environment. - ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER} - ip link set ${TEST_IF} up - ip link set ${TEST_IF_PEER} up - - ip -4 addr add ${TEST_IPv4} dev ${TEST_IF} - ip -6 addr add ${TEST_IPv6} dev ${TEST_IF} - wait_for_ip -} - -cleanup() -{ - ip link del ${TEST_IF} 2>/dev/null || : - ip link del ${TEST_IF_PEER} 2>/dev/null || : -} - -main() -{ - trap cleanup EXIT 2 3 6 15 - setup - ./test_sock_addr setup_done -} - -BASENAME=$(basename $0 .sh) -TEST_IF="${BASENAME}1" -TEST_IF_PEER="${BASENAME}2" -TEST_IPv4="127.0.0.4/8" -TEST_IPv6="::6/128" -MAX_PING_TRIES=5 - -main diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 43612de44f..a709911cdd 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -63,7 +63,7 @@ int passed; int failed; int map_fd[9]; struct bpf_map *maps[9]; -int prog_fd[11]; +int prog_fd[9]; int txmsg_pass; int txmsg_redir; @@ -680,7 +680,8 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, } } - s->bytes_recvd += recv; + if (recv > 0) + s->bytes_recvd += recv; if (opt->check_recved_len && s->bytes_recvd > total_bytes) { errno = EMSGSIZE; @@ -1793,8 +1794,6 @@ int prog_attach_type[] = { BPF_SK_MSG_VERDICT, BPF_SK_MSG_VERDICT, BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, - BPF_SK_MSG_VERDICT, }; int prog_type[] = { @@ -1807,8 +1806,6 @@ int prog_type[] = { BPF_PROG_TYPE_SK_MSG, BPF_PROG_TYPE_SK_MSG, BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, - BPF_PROG_TYPE_SK_MSG, }; static int populate_progs(char *bpf_file) @@ -1887,10 +1884,13 @@ static int check_whitelist(struct _test *t, struct sockmap_options *opt) while (entry) { if ((opt->prepend && strstr(opt->prepend, entry) != 0) || strstr(opt->map, entry) != 0 || - strstr(t->title, entry) != 0) + strstr(t->title, entry) != 0) { + free(ptr); return 0; + } entry = strtok(NULL, ","); } + free(ptr); return -EINVAL; } @@ -1907,10 +1907,13 @@ static int check_blacklist(struct _test *t, struct sockmap_options *opt) while (entry) { if ((opt->prepend && strstr(opt->prepend, entry) != 0) || strstr(opt->map, entry) != 0 || - strstr(t->title, entry) != 0) + strstr(t->title, entry) != 0) { + free(ptr); return 0; + } entry = strtok(NULL, ","); } + free(ptr); return -EINVAL; } diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c index 32df937470..7b5fc98838 100644 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c @@ -16,68 +16,7 @@ #include <bpf/libbpf.h> #include "cgroup_helpers.h" - -static int start_server(const struct sockaddr *addr, socklen_t len, bool dual) -{ - int mode = !dual; - int fd; - - fd = socket(addr->sa_family, SOCK_STREAM, 0); - if (fd == -1) { - log_err("Failed to create server socket"); - goto out; - } - - if (addr->sa_family == AF_INET6) { - if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (char *)&mode, - sizeof(mode)) == -1) { - log_err("Failed to set the dual-stack mode"); - goto close_out; - } - } - - if (bind(fd, addr, len) == -1) { - log_err("Failed to bind server socket"); - goto close_out; - } - - if (listen(fd, 128) == -1) { - log_err("Failed to listen on server socket"); - goto close_out; - } - - goto out; - -close_out: - close(fd); - fd = -1; -out: - return fd; -} - -static int connect_to_server(const struct sockaddr *addr, socklen_t len) -{ - int fd = -1; - - fd = socket(addr->sa_family, SOCK_STREAM, 0); - if (fd == -1) { - log_err("Failed to create client socket"); - goto out; - } - - if (connect(fd, (const struct sockaddr *)addr, len) == -1) { - log_err("Fail to connect to server"); - goto close_out; - } - - goto out; - -close_out: - close(fd); - fd = -1; -out: - return fd; -} +#include "network_helpers.h" static int get_map_fd_by_prog_id(int prog_id, bool *xdp) { @@ -117,8 +56,7 @@ err: return map_fd; } -static int run_test(int server_fd, int results_fd, bool xdp, - const struct sockaddr *addr, socklen_t len) +static int run_test(int server_fd, int results_fd, bool xdp) { int client = -1, srv_client = -1; int ret = 0; @@ -144,7 +82,7 @@ static int run_test(int server_fd, int results_fd, bool xdp, goto err; } - client = connect_to_server(addr, len); + client = connect_to_fd(server_fd, 0); if (client == -1) goto err; @@ -201,23 +139,23 @@ out: return ret; } -static bool get_port(int server_fd, in_port_t *port) +static int v6only_true(int fd, const struct post_socket_opts *opts) { - struct sockaddr_in addr; - socklen_t len = sizeof(addr); + int mode = true; - if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { - log_err("Failed to get server addr"); - return false; - } + return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode)); +} + +static int v6only_false(int fd, const struct post_socket_opts *opts) +{ + int mode = false; - /* sin_port and sin6_port are located at the same offset. */ - *port = addr.sin_port; - return true; + return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode)); } int main(int argc, char **argv) { + struct network_helper_opts opts = { 0 }; struct sockaddr_in addr4; struct sockaddr_in6 addr6; struct sockaddr_in addr4dual; @@ -259,31 +197,30 @@ int main(int argc, char **argv) addr6dual.sin6_addr = in6addr_any; addr6dual.sin6_port = 0; - server = start_server((const struct sockaddr *)&addr4, sizeof(addr4), - false); - if (server == -1 || !get_port(server, &addr4.sin_port)) + server = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr4, + sizeof(addr4), NULL); + if (server == -1) goto err; - server_v6 = start_server((const struct sockaddr *)&addr6, - sizeof(addr6), false); - if (server_v6 == -1 || !get_port(server_v6, &addr6.sin6_port)) + opts.post_socket_cb = v6only_true; + server_v6 = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr6, + sizeof(addr6), &opts); + if (server_v6 == -1) goto err; - server_dual = start_server((const struct sockaddr *)&addr6dual, - sizeof(addr6dual), true); - if (server_dual == -1 || !get_port(server_dual, &addr4dual.sin_port)) + opts.post_socket_cb = v6only_false; + server_dual = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr6dual, + sizeof(addr6dual), &opts); + if (server_dual == -1) goto err; - if (run_test(server, results, xdp, - (const struct sockaddr *)&addr4, sizeof(addr4))) + if (run_test(server, results, xdp)) goto err; - if (run_test(server_v6, results, xdp, - (const struct sockaddr *)&addr6, sizeof(addr6))) + if (run_test(server_v6, results, xdp)) goto err; - if (run_test(server_dual, results, xdp, - (const struct sockaddr *)&addr4dual, sizeof(addr4dual))) + if (run_test(server_dual, results, xdp)) goto err; printf("ok\n"); diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 28b6646662..d5379a0e6d 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -368,9 +368,23 @@ int delete_module(const char *name, int flags) int unload_bpf_testmod(bool verbose) { + int ret, cnt = 0; + if (kern_sync_rcu()) fprintf(stdout, "Failed to trigger kernel-side RCU sync!\n"); - if (delete_module("bpf_testmod", 0)) { + + for (;;) { + ret = delete_module("bpf_testmod", 0); + if (!ret || errno != EAGAIN) + break; + if (++cnt > 10000) { + fprintf(stdout, "Unload of bpf_testmod timed out\n"); + break; + } + usleep(100); + } + + if (ret) { if (errno == ENOENT) { if (verbose) fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 27fd7ed3e4..70e29f316f 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -61,12 +61,7 @@ void free_kallsyms_local(struct ksyms *ksyms) free(ksyms); } -static int ksym_cmp(const void *p1, const void *p2) -{ - return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; -} - -struct ksyms *load_kallsyms_local(void) +static struct ksyms *load_kallsyms_local_common(ksym_cmp_t cmp_cb) { FILE *f; char func[256], buf[256]; @@ -100,7 +95,7 @@ struct ksyms *load_kallsyms_local(void) goto error; } fclose(f); - qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), ksym_cmp); + qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), cmp_cb); return ksyms; error: @@ -109,6 +104,21 @@ error: return NULL; } +static int ksym_cmp(const void *p1, const void *p2) +{ + return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr; +} + +struct ksyms *load_kallsyms_local(void) +{ + return load_kallsyms_local_common(ksym_cmp); +} + +struct ksyms *load_kallsyms_custom_local(ksym_cmp_t cmp_cb) +{ + return load_kallsyms_local_common(cmp_cb); +} + int load_kallsyms(void) { pthread_mutex_lock(&ksyms_mutex); @@ -148,6 +158,28 @@ struct ksym *ksym_search_local(struct ksyms *ksyms, long key) return &ksyms->syms[0]; } +struct ksym *search_kallsyms_custom_local(struct ksyms *ksyms, const void *p, + ksym_search_cmp_t cmp_cb) +{ + int start = 0, mid, end = ksyms->sym_cnt; + struct ksym *ks; + int result; + + while (start < end) { + mid = start + (end - start) / 2; + ks = &ksyms->syms[mid]; + result = cmp_cb(p, ks); + if (result < 0) + end = mid; + else if (result > 0) + start = mid + 1; + else + return ks; + } + + return NULL; +} + struct ksym *ksym_search(long key) { if (!ksyms) @@ -201,29 +233,6 @@ out: return err; } -void read_trace_pipe(void) -{ - int trace_fd; - - if (access(TRACEFS_PIPE, F_OK) == 0) - trace_fd = open(TRACEFS_PIPE, O_RDONLY, 0); - else - trace_fd = open(DEBUGFS_PIPE, O_RDONLY, 0); - if (trace_fd < 0) - return; - - while (1) { - static char buf[4096]; - ssize_t sz; - - sz = read(trace_fd, buf, sizeof(buf) - 1); - if (sz > 0) { - buf[sz] = 0; - puts(buf); - } - } -} - ssize_t get_uprobe_offset(const void *addr) { size_t start, end, base; @@ -381,3 +390,43 @@ out: close(fd); return err; } + +int read_trace_pipe_iter(void (*cb)(const char *str, void *data), void *data, int iter) +{ + size_t buflen, n; + char *buf = NULL; + FILE *fp = NULL; + + if (access(TRACEFS_PIPE, F_OK) == 0) + fp = fopen(TRACEFS_PIPE, "r"); + else + fp = fopen(DEBUGFS_PIPE, "r"); + if (!fp) + return -1; + + /* We do not want to wait forever when iter is specified. */ + if (iter) + fcntl(fileno(fp), F_SETFL, O_NONBLOCK); + + while ((n = getline(&buf, &buflen, fp) >= 0) || errno == EAGAIN) { + if (n > 0) + cb(buf, data); + if (iter && !(--iter)) + break; + } + + free(buf); + if (fp) + fclose(fp); + return 0; +} + +static void trace_pipe_cb(const char *str, void *data) +{ + printf("%s", str); +} + +void read_trace_pipe(void) +{ + read_trace_pipe_iter(trace_pipe_cb, NULL, 0); +} diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 04fd1da707..2ce873c9f9 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -13,6 +13,9 @@ struct ksym { }; struct ksyms; +typedef int (*ksym_cmp_t)(const void *p1, const void *p2); +typedef int (*ksym_search_cmp_t)(const void *p1, const struct ksym *p2); + int load_kallsyms(void); struct ksym *ksym_search(long key); long ksym_get_addr(const char *name); @@ -22,10 +25,16 @@ struct ksym *ksym_search_local(struct ksyms *ksyms, long key); long ksym_get_addr_local(struct ksyms *ksyms, const char *name); void free_kallsyms_local(struct ksyms *ksyms); +struct ksyms *load_kallsyms_custom_local(ksym_cmp_t cmp_cb); +struct ksym *search_kallsyms_custom_local(struct ksyms *ksyms, const void *p1, + ksym_search_cmp_t cmp_cb); + /* open kallsyms and find addresses on the fly, faster than load + search. */ int kallsyms_find(const char *sym, unsigned long long *addr); void read_trace_pipe(void); +int read_trace_pipe_iter(void (*cb)(const char *str, void *data), + void *data, int iter); ssize_t get_uprobe_offset(const void *addr); ssize_t get_rel_offset(uintptr_t addr); diff --git a/tools/testing/selftests/bpf/uprobe_multi.c b/tools/testing/selftests/bpf/uprobe_multi.c index a61ceab60b..7ffa563ffe 100644 --- a/tools/testing/selftests/bpf/uprobe_multi.c +++ b/tools/testing/selftests/bpf/uprobe_multi.c @@ -9,7 +9,7 @@ #define NAME(name, idx) PASTE(name, idx) -#define DEF(name, idx) int NAME(name, idx)(void) { return 0; } +#define DEF(name, idx) int __attribute__((weak)) NAME(name, idx)(void) { return 0; } #define CALL(name, idx) NAME(name, idx)(); #define F(body, name, idx) body(name, idx) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 244d4996e0..b2854238d4 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -792,10 +792,13 @@ static int parse_stats(const char *stats_str, struct stat_specs *specs) while ((next = strtok_r(state ? NULL : input, ",", &state))) { err = parse_stat(next, specs); - if (err) + if (err) { + free(input); return err; + } } + free(input); return 0; } diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index bdf5d81800..6f9956eed7 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -495,20 +495,6 @@ peek: return 0; } -struct ethtool_channels { - __u32 cmd; - __u32 max_rx; - __u32 max_tx; - __u32 max_other; - __u32 max_combined; - __u32 rx_count; - __u32 tx_count; - __u32 other_count; - __u32 combined_count; -}; - -#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */ - static int rxq_num(const char *ifname) { struct ethtool_channels ch = { @@ -595,6 +581,8 @@ static void cleanup(void) if (bpf_obj) xdp_hw_metadata__destroy(bpf_obj); + + free((void *)saved_hwtstamp_ifname); } static void handle_signal(int sig) diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index b1102ee13f..2eac0895b0 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -81,6 +81,7 @@ #include <linux/mman.h> #include <linux/netdev.h> #include <linux/bitmap.h> +#include <linux/ethtool.h> #include <arpa/inet.h> #include <net/if.h> #include <locale.h> @@ -105,11 +106,15 @@ #include "../kselftest.h" #include "xsk_xdp_common.h" +#include <network_helpers.h> + static bool opt_verbose; static bool opt_print_tests; static enum test_mode opt_mode = TEST_MODE_ALL; static u32 opt_run_test = RUN_ALL_TESTS; +void test__fail(void) { /* for network_helpers.c */ } + static void __exit_with_error(int error, const char *file, const char *func, int line) { ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, @@ -239,7 +244,7 @@ static void enable_busy_poll(struct xsk_socket_info *xsk) (void *)&sock_opt, sizeof(sock_opt)) < 0) exit_with_error(errno); - sock_opt = BATCH_SIZE; + sock_opt = xsk->batch_size; if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET, (void *)&sock_opt, sizeof(sock_opt)) < 0) exit_with_error(errno); @@ -409,6 +414,33 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj } } +static int set_ring_size(struct ifobject *ifobj) +{ + int ret; + u32 ctr = 0; + + while (ctr++ < SOCK_RECONF_CTR) { + ret = set_hw_ring_size(ifobj->ifname, &ifobj->ring); + if (!ret) + break; + + /* Retry if it fails */ + if (ctr >= SOCK_RECONF_CTR || errno != EBUSY) + return -errno; + + usleep(USLEEP_MAX); + } + + return ret; +} + +static int hw_ring_size_reset(struct ifobject *ifobj) +{ + ifobj->ring.tx_pending = ifobj->set_ring.default_tx; + ifobj->ring.rx_pending = ifobj->set_ring.default_rx; + return set_ring_size(ifobj); +} + static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, struct ifobject *ifobj_rx) { @@ -439,6 +471,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, for (j = 0; j < MAX_SOCKETS; j++) { memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS; + ifobj->xsk_arr[j].batch_size = DEFAULT_BATCH_SIZE; if (i == 0) ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default; else @@ -451,12 +484,16 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, } } + if (ifobj_tx->hw_ring_size_supp) + hw_ring_size_reset(ifobj_tx); + test->ifobj_tx = ifobj_tx; test->ifobj_rx = ifobj_rx; test->current_step = 0; test->total_steps = 1; test->nb_sockets = 1; test->fail = false; + test->set_ring = false; test->mtu = MAX_ETH_PKT_SIZE; test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog; test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk; @@ -1087,7 +1124,7 @@ static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk) return TEST_CONTINUE; } - rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); + rcvd = xsk_ring_cons__peek(&xsk->rx, xsk->batch_size, &idx_rx); if (!rcvd) return TEST_CONTINUE; @@ -1239,7 +1276,8 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len); /* pkts_in_flight might be negative if many invalid packets are sent */ - if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) { + if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) / + buffer_len)) { ret = kick_tx(xsk); if (ret) return TEST_FAILURE; @@ -1249,7 +1287,7 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b fds.fd = xsk_socket__fd(xsk->xsk); fds.events = POLLOUT; - while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) { + while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) { if (use_poll) { ret = poll(&fds, 1, POLL_TMOUT); if (timeout) { @@ -1269,10 +1307,10 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b } } - complete_pkts(xsk, BATCH_SIZE); + complete_pkts(xsk, xsk->batch_size); } - for (i = 0; i < BATCH_SIZE; i++) { + for (i = 0; i < xsk->batch_size; i++) { struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream); u32 nb_frags_left, nb_frags, bytes_written = 0; @@ -1280,9 +1318,9 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b break; nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt); - if (nb_frags > BATCH_SIZE - i) { + if (nb_frags > xsk->batch_size - i) { pkt_stream_cancel(pkt_stream); - xsk_ring_prod__cancel(&xsk->tx, BATCH_SIZE - i); + xsk_ring_prod__cancel(&xsk->tx, xsk->batch_size - i); break; } nb_frags_left = nb_frags; @@ -1370,7 +1408,7 @@ static int wait_for_tx_completion(struct xsk_socket_info *xsk) return TEST_FAILURE; } - complete_pkts(xsk, BATCH_SIZE); + complete_pkts(xsk, xsk->batch_size); } return TEST_PASS; @@ -1860,6 +1898,14 @@ static int testapp_validate_traffic(struct test_spec *test) return TEST_SKIP; } + if (test->set_ring) { + if (ifobj_tx->hw_ring_size_supp) + return set_ring_size(ifobj_tx); + + ksft_test_result_skip("Changing HW ring size not supported.\n"); + return TEST_SKIP; + } + xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx); return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx); } @@ -2373,6 +2419,50 @@ static int testapp_xdp_metadata_mb(struct test_spec *test) return testapp_xdp_metadata_copy(test); } +static int testapp_hw_sw_min_ring_size(struct test_spec *test) +{ + int ret; + + test->set_ring = true; + test->total_steps = 2; + test->ifobj_tx->ring.tx_pending = DEFAULT_BATCH_SIZE; + test->ifobj_tx->ring.rx_pending = DEFAULT_BATCH_SIZE * 2; + test->ifobj_tx->xsk->batch_size = 1; + test->ifobj_rx->xsk->batch_size = 1; + ret = testapp_validate_traffic(test); + if (ret) + return ret; + + /* Set batch size to hw_ring_size - 1 */ + test->ifobj_tx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1; + test->ifobj_rx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1; + return testapp_validate_traffic(test); +} + +static int testapp_hw_sw_max_ring_size(struct test_spec *test) +{ + u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2; + int ret; + + test->set_ring = true; + test->total_steps = 2; + test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending; + test->ifobj_tx->ring.rx_pending = test->ifobj_tx->ring.rx_max_pending; + test->ifobj_rx->umem->num_frames = max_descs; + test->ifobj_rx->xsk->rxqsize = max_descs; + test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + + ret = testapp_validate_traffic(test); + if (ret) + return ret; + + /* Set batch_size to 4095 */ + test->ifobj_tx->xsk->batch_size = max_descs - 1; + test->ifobj_rx->xsk->batch_size = max_descs - 1; + return testapp_validate_traffic(test); +} + static void run_pkt_test(struct test_spec *test) { int ret; @@ -2477,7 +2567,9 @@ static const struct test_spec tests[] = { {.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb}, {.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb}, {.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags}, -}; + {.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size}, + {.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size}, + }; static void print_tests(void) { @@ -2497,6 +2589,7 @@ int main(int argc, char **argv) int modes = TEST_MODE_SKB + 1; struct test_spec test; bool shared_netdev; + int ret; /* Use libbpf 1.0 API mode */ libbpf_set_strict_mode(LIBBPF_STRICT_ALL); @@ -2534,6 +2627,13 @@ int main(int argc, char **argv) modes++; } + ret = get_hw_ring_size(ifobj_tx->ifname, &ifobj_tx->ring); + if (!ret) { + ifobj_tx->hw_ring_size_supp = true; + ifobj_tx->set_ring.default_tx = ifobj_tx->ring.tx_pending; + ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending; + } + init_iface(ifobj_rx, worker_testapp_validate_rx); init_iface(ifobj_tx, worker_testapp_validate_tx); @@ -2581,6 +2681,9 @@ int main(int argc, char **argv) } } + if (ifobj_tx->hw_ring_size_supp) + hw_ring_size_reset(ifobj_tx); + pkt_stream_delete(tx_pkt_stream_default); pkt_stream_delete(rx_pkt_stream_default); xsk_unload_xdp_programs(ifobj_tx); diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index f174df2d69..906de5fab7 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -44,7 +44,7 @@ #define MAX_ETH_JUMBO_SIZE 9000 #define USLEEP_MAX 10000 #define SOCK_RECONF_CTR 10 -#define BATCH_SIZE 64 +#define DEFAULT_BATCH_SIZE 64 #define POLL_TMOUT 1000 #define THREAD_TMOUT 3 #define DEFAULT_PKT_CNT (4 * 1024) @@ -91,6 +91,7 @@ struct xsk_socket_info { struct pkt_stream *pkt_stream; u32 outstanding_tx; u32 rxqsize; + u32 batch_size; u8 dst_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN]; }; @@ -113,6 +114,11 @@ struct pkt_stream { bool verbatim; }; +struct set_hw_ring { + u32 default_tx; + u32 default_rx; +}; + struct ifobject; struct test_spec; typedef int (*validation_func_t)(struct ifobject *ifobj); @@ -129,6 +135,8 @@ struct ifobject { struct xsk_xdp_progs *xdp_progs; struct bpf_map *xskmap; struct bpf_program *xdp_prog; + struct ethtool_ringparam ring; + struct set_hw_ring set_ring; enum test_mode mode; int ifindex; int mtu; @@ -145,6 +153,7 @@ struct ifobject { bool unaligned_supp; bool multi_buff_supp; bool multi_buff_zc_supp; + bool hw_ring_size_supp; }; struct test_spec { @@ -162,6 +171,7 @@ struct test_spec { u16 current_step; u16 nb_sockets; bool fail; + bool set_ring; enum test_mode mode; char name[MAX_TEST_NAME_SIZE]; }; diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c index 7cde07a5df..47bad7ddc5 100644 --- a/tools/testing/selftests/capabilities/test_execve.c +++ b/tools/testing/selftests/capabilities/test_execve.c @@ -82,7 +82,7 @@ static bool create_and_enter_ns(uid_t inner_uid) { uid_t outer_uid; gid_t outer_gid; - int i; + int i, ret; bool have_outer_privilege; outer_uid = getuid(); @@ -97,7 +97,10 @@ static bool create_and_enter_ns(uid_t inner_uid) ksft_exit_fail_msg("setresuid - %s\n", strerror(errno)); // Re-enable effective caps - capng_get_caps_process(); + ret = capng_get_caps_process(); + if (ret == -1) + ksft_exit_fail_msg("capng_get_caps_process failed\n"); + for (i = 0; i < CAP_LAST_CAP; i++) if (capng_have_capability(CAPNG_PERMITTED, i)) capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i); @@ -207,6 +210,7 @@ static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient) static int do_tests(int uid, const char *our_path) { + int ret; bool have_outer_privilege = create_and_enter_ns(uid); int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY); @@ -250,7 +254,9 @@ static int do_tests(int uid, const char *our_path) ksft_exit_fail_msg("chmod - %s\n", strerror(errno)); } - capng_get_caps_process(); + ret = capng_get_caps_process(); + if (ret == -1) + ksft_exit_fail_msg("capng_get_caps_process failed\n"); /* Make sure that i starts out clear */ capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c index 60b4e7b716..65f2a1c892 100644 --- a/tools/testing/selftests/capabilities/validate_cap.c +++ b/tools/testing/selftests/capabilities/validate_cap.c @@ -28,6 +28,7 @@ static bool bool_arg(char **argv, int i) int main(int argc, char **argv) { const char *atsec = ""; + int ret; /* * Be careful just in case a setgid or setcapped copy of this @@ -44,7 +45,11 @@ int main(int argc, char **argv) atsec = " (AT_SECURE is not set)"; #endif - capng_get_caps_process(); + ret = capng_get_caps_process(); + if (ret == -1) { + ksft_print_msg("capng_get_caps_process failed\n"); + return 1; + } if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) { ksft_print_msg("Wrong effective state%s\n", atsec); diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 00b4419289..16461dc0ff 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -4,7 +4,7 @@ CFLAGS += -Wall -pthread all: ${HELPER_PROGS} TEST_FILES := with_stress.sh -TEST_PROGS := test_stress.sh test_cpuset_prs.sh +TEST_PROGS := test_stress.sh test_cpuset_prs.sh test_cpuset_v1_hp.sh TEST_GEN_FILES := wait_inotify TEST_GEN_PROGS = test_memcontrol TEST_GEN_PROGS += test_kmem diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index 89e8519fb2..e8d04ac9e3 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -18,7 +18,7 @@ */ static inline int values_close(long a, long b, int err) { - return abs(a - b) <= (a + b) / 100 * err; + return labs(a - b) <= (a + b) / 100 * err; } extern int cg_find_unified_root(char *root, size_t len, bool *nsdelegate); diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index 186bf96f6a..dad2ed82f3 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -237,7 +237,7 @@ run_cpucg_weight_test( { int ret = KSFT_FAIL, i; char *parent = NULL; - struct cpu_hogger children[3] = {NULL}; + struct cpu_hogger children[3] = {}; parent = cg_name(root, "cpucg_test_0"); if (!parent) @@ -408,7 +408,7 @@ run_cpucg_nested_weight_test(const char *root, bool overprovisioned) { int ret = KSFT_FAIL, i; char *parent = NULL, *child = NULL; - struct cpu_hogger leaf[3] = {NULL}; + struct cpu_hogger leaf[3] = {}; long nested_leaf_usage, child_usage; int nprocs = get_nprocs(); diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh new file mode 100755 index 0000000000..3f45512fb5 --- /dev/null +++ b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh @@ -0,0 +1,46 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Test the special cpuset v1 hotplug case where a cpuset become empty of +# CPUs will force migration of tasks out to an ancestor. +# + +skip_test() { + echo "$1" + echo "Test SKIPPED" + exit 4 # ksft_skip +} + +[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!" + +# Find cpuset v1 mount point +CPUSET=$(mount -t cgroup | grep cpuset | head -1 | awk -e '{print $3}') +[[ -n "$CPUSET" ]] || skip_test "cpuset v1 mount point not found!" + +# +# Create a test cpuset, put a CPU and a task there and offline that CPU +# +TDIR=test$$ +[[ -d $CPUSET/$TDIR ]] || mkdir $CPUSET/$TDIR +echo 1 > $CPUSET/$TDIR/cpuset.cpus +echo 0 > $CPUSET/$TDIR/cpuset.mems +sleep 10& +TASK=$! +echo $TASK > $CPUSET/$TDIR/tasks +NEWCS=$(cat /proc/$TASK/cpuset) +[[ $NEWCS != "/$TDIR" ]] && { + echo "Unexpected cpuset $NEWCS, test FAILED!" + exit 1 +} + +echo 0 > /sys/devices/system/cpu/cpu1/online +sleep 0.5 +echo 1 > /sys/devices/system/cpu/cpu1/online +NEWCS=$(cat /proc/$TASK/cpuset) +rmdir $CPUSET/$TDIR +[[ $NEWCS != "/" ]] && { + echo "cpuset $NEWCS, test FAILED!" + exit 1 +} +echo "Test PASSED" +exit 0 diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index 137506db03..96693d8772 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -192,7 +192,7 @@ static int test_kmem_memcg_deletion(const char *root) goto cleanup; sum = anon + file + kernel + sock; - if (abs(sum - current) < MAX_VMSTAT_ERROR) { + if (labs(sum - current) < MAX_VMSTAT_ERROR) { ret = KSFT_PASS; } else { printf("memory.current = %ld\n", current); @@ -380,7 +380,7 @@ static int test_percpu_basic(const char *root) current = cg_read_long(parent, "memory.current"); percpu = cg_read_key_long(parent, "memory.stat", "percpu "); - if (current > 0 && percpu > 0 && abs(current - percpu) < + if (current > 0 && percpu > 0 && labs(current - percpu) < MAX_VMSTAT_ERROR) ret = KSFT_PASS; else diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index b462416b38..41ae8047b8 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -716,7 +716,9 @@ static bool reclaim_until(const char *memcg, long goal) */ static int test_memcg_reclaim(const char *root) { - int ret = KSFT_FAIL, fd, retries; + int ret = KSFT_FAIL; + int fd = -1; + int retries; char *memcg; long current, expected_usage; diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c index ef7f395453..190096017f 100644 --- a/tools/testing/selftests/cgroup/test_zswap.c +++ b/tools/testing/selftests/cgroup/test_zswap.c @@ -50,7 +50,7 @@ static int get_zswap_stored_pages(size_t *value) return read_int("/sys/kernel/debug/zswap/stored_pages", value); } -static int get_cg_wb_count(const char *cg) +static long get_cg_wb_count(const char *cg) { return cg_read_key_long(cg, "memory.stat", "zswpwb"); } @@ -249,6 +249,132 @@ out: } /* + * Attempt writeback with the following steps: + * 1. Allocate memory. + * 2. Reclaim memory equal to the amount that was allocated in step 1. + This will move it into zswap. + * 3. Save current zswap usage. + * 4. Move the memory allocated in step 1 back in from zswap. + * 5. Set zswap.max to half the amount that was recorded in step 3. + * 6. Attempt to reclaim memory equal to the amount that was allocated, + this will either trigger writeback if it's enabled, or reclamation + will fail if writeback is disabled as there isn't enough zswap space. + */ +static int attempt_writeback(const char *cgroup, void *arg) +{ + long pagesize = sysconf(_SC_PAGESIZE); + char *test_group = arg; + size_t memsize = MB(4); + char buf[pagesize]; + long zswap_usage; + bool wb_enabled; + int ret = -1; + char *mem; + + wb_enabled = cg_read_long(test_group, "memory.zswap.writeback"); + mem = (char *)malloc(memsize); + if (!mem) + return ret; + + /* + * Fill half of each page with increasing data, and keep other + * half empty, this will result in data that is still compressible + * and ends up in zswap, with material zswap usage. + */ + for (int i = 0; i < pagesize; i++) + buf[i] = i < pagesize/2 ? (char) i : 0; + + for (int i = 0; i < memsize; i += pagesize) + memcpy(&mem[i], buf, pagesize); + + /* Try and reclaim allocated memory */ + if (cg_write_numeric(test_group, "memory.reclaim", memsize)) { + ksft_print_msg("Failed to reclaim all of the requested memory\n"); + goto out; + } + + zswap_usage = cg_read_long(test_group, "memory.zswap.current"); + + /* zswpin */ + for (int i = 0; i < memsize; i += pagesize) { + if (memcmp(&mem[i], buf, pagesize)) { + ksft_print_msg("invalid memory\n"); + goto out; + } + } + + if (cg_write_numeric(test_group, "memory.zswap.max", zswap_usage/2)) + goto out; + + /* + * If writeback is enabled, trying to reclaim memory now will trigger a + * writeback as zswap.max is half of what was needed when reclaim ran the first time. + * If writeback is disabled, memory reclaim will fail as zswap is limited and + * it can't writeback to swap. + */ + ret = cg_write_numeric(test_group, "memory.reclaim", memsize); + if (!wb_enabled) + ret = (ret == -EAGAIN) ? 0 : -1; + +out: + free(mem); + return ret; +} + +/* Test to verify the zswap writeback path */ +static int test_zswap_writeback(const char *root, bool wb) +{ + long zswpwb_before, zswpwb_after; + int ret = KSFT_FAIL; + char *test_group; + + test_group = cg_name(root, "zswap_writeback_test"); + if (!test_group) + goto out; + if (cg_create(test_group)) + goto out; + if (cg_write(test_group, "memory.zswap.writeback", wb ? "1" : "0")) + goto out; + + zswpwb_before = get_cg_wb_count(test_group); + if (zswpwb_before != 0) { + ksft_print_msg("zswpwb_before = %ld instead of 0\n", zswpwb_before); + goto out; + } + + if (cg_run(test_group, attempt_writeback, (void *) test_group)) + goto out; + + /* Verify that zswap writeback occurred only if writeback was enabled */ + zswpwb_after = get_cg_wb_count(test_group); + if (zswpwb_after < 0) + goto out; + + if (wb != !!zswpwb_after) { + ksft_print_msg("zswpwb_after is %ld while wb is %s", + zswpwb_after, wb ? "enabled" : "disabled"); + goto out; + } + + ret = KSFT_PASS; + +out: + cg_destroy(test_group); + free(test_group); + return ret; +} + +static int test_zswap_writeback_enabled(const char *root) +{ + return test_zswap_writeback(root, true); +} + +static int test_zswap_writeback_disabled(const char *root) +{ + return test_zswap_writeback(root, false); +} + +/* * When trying to store a memcg page in zswap, if the memcg hits its memory * limit in zswap, writeback should affect only the zswapped pages of that * memcg. @@ -257,7 +383,7 @@ static int test_no_invasive_cgroup_shrink(const char *root) { int ret = KSFT_FAIL; size_t control_allocation_size = MB(10); - char *control_allocation, *wb_group = NULL, *control_group = NULL; + char *control_allocation = NULL, *wb_group = NULL, *control_group = NULL; wb_group = setup_test_group_1M(root, "per_memcg_wb_test1"); if (!wb_group) @@ -342,7 +468,7 @@ static int test_no_kmem_bypass(const char *root) struct sysinfo sys_info; int ret = KSFT_FAIL; int child_status; - char *test_group; + char *test_group = NULL; pid_t child_pid; /* Read sys info and compute test values accordingly */ @@ -364,8 +490,6 @@ static int test_no_kmem_bypass(const char *root) trigger_allocation_size = sys_info.totalram / 20; /* Set up test memcg */ - if (cg_write(root, "cgroup.subtree_control", "+memory")) - goto out; test_group = cg_name(root, "kmem_bypass_test"); if (!test_group) goto out; @@ -425,6 +549,8 @@ struct zswap_test { T(test_zswap_usage), T(test_swapin_nozswap), T(test_zswapin), + T(test_zswap_writeback_enabled), + T(test_zswap_writeback_disabled), T(test_no_kmem_bypass), T(test_no_invasive_cgroup_shrink), }; diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c index 3c9bf0cd82..e61f07973c 100644 --- a/tools/testing/selftests/clone3/clone3.c +++ b/tools/testing/selftests/clone3/clone3.c @@ -95,9 +95,14 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) getpid(), pid); if (waitpid(-1, &status, __WALL) < 0) { - ksft_print_msg("Child returned %s\n", strerror(errno)); + ksft_print_msg("waitpid() returned %s\n", strerror(errno)); return -errno; } + if (!WIFEXITED(status)) { + ksft_print_msg("Child did not exit normally, status 0x%x\n", + status); + return EXIT_FAILURE; + } if (WEXITSTATUS(status)) return WEXITSTATUS(status); diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c index 54a8b2445b..ce04267868 100644 --- a/tools/testing/selftests/clone3/clone3_clear_sighand.c +++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c @@ -120,5 +120,5 @@ int main(int argc, char **argv) test_clone3_clear_sighand(); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c index ed785afb60..bfb0da2b4f 100644 --- a/tools/testing/selftests/clone3/clone3_set_tid.c +++ b/tools/testing/selftests/clone3/clone3_set_tid.c @@ -114,7 +114,8 @@ static int call_clone3_set_tid(pid_t *set_tid, return WEXITSTATUS(status); } -static void test_clone3_set_tid(pid_t *set_tid, +static void test_clone3_set_tid(const char *desc, + pid_t *set_tid, size_t set_tid_size, int flags, int expected, @@ -129,17 +130,13 @@ static void test_clone3_set_tid(pid_t *set_tid, ret = call_clone3_set_tid(set_tid, set_tid_size, flags, expected_pid, wait_for_it); ksft_print_msg( - "[%d] clone3() with CLONE_SET_TID %d says :%d - expected %d\n", + "[%d] clone3() with CLONE_SET_TID %d says: %d - expected %d\n", getpid(), set_tid[0], ret, expected); - if (ret != expected) - ksft_test_result_fail( - "[%d] Result (%d) is different than expected (%d)\n", - getpid(), ret, expected); - else - ksft_test_result_pass( - "[%d] Result (%d) matches expectation (%d)\n", - getpid(), ret, expected); + + ksft_test_result(ret == expected, "%s with %zu TIDs and flags 0x%x\n", + desc, set_tid_size, flags); } + int main(int argc, char *argv[]) { FILE *f; @@ -172,73 +169,91 @@ int main(int argc, char *argv[]) /* Try invalid settings */ memset(&set_tid, 0, sizeof(set_tid)); - test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0); + test_clone3_set_tid("invalid size, 0 TID", + set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0); - test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0); + test_clone3_set_tid("invalid size, 0 TID", + set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0); - test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0, - -EINVAL, 0, 0); + test_clone3_set_tid("invalid size, 0 TID", + set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0, + -EINVAL, 0, 0); - test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0); + test_clone3_set_tid("invalid size, 0 TID", + set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0); /* * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1 * nested PID namespace. */ - test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0); + test_clone3_set_tid("invalid size, 0 TID", + set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0); memset(&set_tid, 0xff, sizeof(set_tid)); - test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0); + test_clone3_set_tid("invalid size, TID all 1s", + set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0); - test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0); + test_clone3_set_tid("invalid size, TID all 1s", + set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0); - test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0, - -EINVAL, 0, 0); + test_clone3_set_tid("invalid size, TID all 1s", + set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0, + -EINVAL, 0, 0); - test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0); + test_clone3_set_tid("invalid size, TID all 1s", + set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0); /* * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1 * nested PID namespace. */ - test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0); + test_clone3_set_tid("invalid size, TID all 1s", + set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0); memset(&set_tid, 0, sizeof(set_tid)); /* Try with an invalid PID */ set_tid[0] = 0; - test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0); + test_clone3_set_tid("valid size, 0 TID", + set_tid, 1, 0, -EINVAL, 0, 0); set_tid[0] = -1; - test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0); + test_clone3_set_tid("valid size, -1 TID", + set_tid, 1, 0, -EINVAL, 0, 0); /* Claim that the set_tid array actually contains 2 elements. */ - test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0); + test_clone3_set_tid("2 TIDs, -1 and 0", + set_tid, 2, 0, -EINVAL, 0, 0); /* Try it in a new PID namespace */ if (uid == 0) - test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0); + test_clone3_set_tid("valid size, -1 TID", + set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0); else ksft_test_result_skip("Clone3() with set_tid requires root\n"); /* Try with a valid PID (1) this should return -EEXIST. */ set_tid[0] = 1; if (uid == 0) - test_clone3_set_tid(set_tid, 1, 0, -EEXIST, 0, 0); + test_clone3_set_tid("duplicate PID 1", + set_tid, 1, 0, -EEXIST, 0, 0); else ksft_test_result_skip("Clone3() with set_tid requires root\n"); /* Try it in a new PID namespace */ if (uid == 0) - test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, 0, 0, 0); + test_clone3_set_tid("duplicate PID 1", + set_tid, 1, CLONE_NEWPID, 0, 0, 0); else ksft_test_result_skip("Clone3() with set_tid requires root\n"); /* pid_max should fail everywhere */ set_tid[0] = pid_max; - test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0); + test_clone3_set_tid("set TID to maximum", + set_tid, 1, 0, -EINVAL, 0, 0); if (uid == 0) - test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0); + test_clone3_set_tid("set TID to maximum", + set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0); else ksft_test_result_skip("Clone3() with set_tid requires root\n"); @@ -262,10 +277,12 @@ int main(int argc, char *argv[]) /* After the child has finished, its PID should be free. */ set_tid[0] = pid; - test_clone3_set_tid(set_tid, 1, 0, 0, 0, 0); + test_clone3_set_tid("reallocate child TID", + set_tid, 1, 0, 0, 0, 0); /* This should fail as there is no PID 1 in that namespace */ - test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0); + test_clone3_set_tid("duplicate child TID", + set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0); /* * Creating a process with PID 1 in the newly created most nested @@ -274,7 +291,8 @@ int main(int argc, char *argv[]) */ set_tid[0] = 1; set_tid[1] = pid; - test_clone3_set_tid(set_tid, 2, CLONE_NEWPID, 0, pid, 0); + test_clone3_set_tid("create PID 1 in new NS", + set_tid, 2, CLONE_NEWPID, 0, pid, 0); ksft_print_msg("unshare PID namespace\n"); if (unshare(CLONE_NEWPID) == -1) @@ -284,7 +302,8 @@ int main(int argc, char *argv[]) set_tid[0] = pid; /* This should fail as there is no PID 1 in that namespace */ - test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0); + test_clone3_set_tid("duplicate PID 1", + set_tid, 1, 0, -EINVAL, 0, 0); /* Let's create a PID 1 */ ns_pid = fork(); @@ -295,21 +314,25 @@ int main(int argc, char *argv[]) */ set_tid[0] = 43; set_tid[1] = -1; - test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0); + test_clone3_set_tid("check leak on invalid TID -1", + set_tid, 2, 0, -EINVAL, 0, 0); set_tid[0] = 43; set_tid[1] = pid; - test_clone3_set_tid(set_tid, 2, 0, 0, 43, 0); + test_clone3_set_tid("check leak on invalid specific TID", + set_tid, 2, 0, 0, 43, 0); ksft_print_msg("Child in PID namespace has PID %d\n", getpid()); set_tid[0] = 2; - test_clone3_set_tid(set_tid, 1, 0, 0, 2, 0); + test_clone3_set_tid("create PID 2 in child NS", + set_tid, 1, 0, 0, 2, 0); set_tid[0] = 1; set_tid[1] = -1; set_tid[2] = pid; /* This should fail as there is invalid PID at level '1'. */ - test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, -EINVAL, 0, 0); + test_clone3_set_tid("fail due to invalid TID at level 1", + set_tid, 3, CLONE_NEWPID, -EINVAL, 0, 0); set_tid[0] = 1; set_tid[1] = 42; @@ -319,13 +342,15 @@ int main(int argc, char *argv[]) * namespaces. Again assuming this is running in the host's * PID namespace. Not yet nested. */ - test_clone3_set_tid(set_tid, 4, CLONE_NEWPID, -EINVAL, 0, 0); + test_clone3_set_tid("fail due to too few active PID NSs", + set_tid, 4, CLONE_NEWPID, -EINVAL, 0, 0); /* * This should work and from the parent we should see * something like 'NSpid: pid 42 1'. */ - test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, 0, 42, true); + test_clone3_set_tid("verify that we have 3 PID NSs", + set_tid, 3, CLONE_NEWPID, 0, 42, true); child_exit(ksft_cnt.ksft_fail); } @@ -380,16 +405,14 @@ int main(int argc, char *argv[]) ksft_cnt.ksft_pass += 6 - (ksft_cnt.ksft_fail - WEXITSTATUS(status)); ksft_cnt.ksft_fail = WEXITSTATUS(status); - if (ns3 == pid && ns2 == 42 && ns1 == 1) - ksft_test_result_pass( - "PIDs in all namespaces as expected (%d,%d,%d)\n", - ns3, ns2, ns1); - else - ksft_test_result_fail( - "PIDs in all namespaces not as expected (%d,%d,%d)\n", - ns3, ns2, ns1); + ksft_print_msg("Expecting PIDs %d, 42, 1\n", pid); + ksft_print_msg("Have PIDs in namespaces: %d, %d, %d\n", ns3, ns2, ns1); + ksft_test_result(ns3 == pid && ns2 == 42 && ns1 == 1, + "PIDs in all namespaces as expected\n"); out: ret = 0; - return !ret ? ksft_exit_pass() : ksft_exit_fail(); + if (ret) + ksft_exit_fail(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index c59e4adb90..991c473e38 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -17,6 +17,15 @@ #include "../kselftest_harness.h" #include "../clone3/clone3_selftests.h" + +#ifndef F_LINUX_SPECIFIC_BASE +#define F_LINUX_SPECIFIC_BASE 1024 +#endif + +#ifndef F_DUPFD_QUERY +#define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3) +#endif + static inline int sys_close_range(unsigned int fd, unsigned int max_fd, unsigned int flags) { @@ -45,6 +54,15 @@ TEST(core_close_range) SKIP(return, "close_range() syscall not supported"); } + for (i = 0; i < 100; i++) { + ret = fcntl(open_fds[i], F_DUPFD_QUERY, open_fds[i + 1]); + if (ret < 0) { + EXPECT_EQ(errno, EINVAL); + } else { + EXPECT_EQ(ret, 0); + } + } + EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); for (i = 0; i <= 50; i++) @@ -358,7 +376,7 @@ TEST(close_range_cloexec_unshare) */ TEST(close_range_cloexec_syzbot) { - int fd1, fd2, fd3, flags, ret, status; + int fd1, fd2, fd3, fd4, flags, ret, status; pid_t pid; struct __clone_args args = { .flags = CLONE_FILES, @@ -372,6 +390,13 @@ TEST(close_range_cloexec_syzbot) fd2 = dup2(fd1, 1000); EXPECT_GT(fd2, 0); + flags = fcntl(fd1, F_DUPFD_QUERY, fd2); + if (flags < 0) { + EXPECT_EQ(errno, EINVAL); + } else { + EXPECT_EQ(flags, 1); + } + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); @@ -396,6 +421,15 @@ TEST(close_range_cloexec_syzbot) fd3 = dup2(fd1, 42); EXPECT_GT(fd3, 0); + flags = fcntl(fd1, F_DUPFD_QUERY, fd3); + if (flags < 0) { + EXPECT_EQ(errno, EINVAL); + } else { + EXPECT_EQ(flags, 1); + } + + + /* * Duplicating the file descriptor must remove the * FD_CLOEXEC flag. @@ -426,6 +460,24 @@ TEST(close_range_cloexec_syzbot) fd3 = dup2(fd1, 42); EXPECT_GT(fd3, 0); + flags = fcntl(fd1, F_DUPFD_QUERY, fd3); + if (flags < 0) { + EXPECT_EQ(errno, EINVAL); + } else { + EXPECT_EQ(flags, 1); + } + + fd4 = open("/dev/null", O_RDWR); + EXPECT_GT(fd4, 0); + + /* Same inode, different file pointers. */ + flags = fcntl(fd1, F_DUPFD_QUERY, fd4); + if (flags < 0) { + EXPECT_EQ(errno, EINVAL); + } else { + EXPECT_EQ(flags, 0); + } + flags = fcntl(fd3, F_GETFD); EXPECT_GT(flags, -1); EXPECT_EQ(flags & FD_CLOEXEC, 0); @@ -433,6 +485,7 @@ TEST(close_range_cloexec_syzbot) EXPECT_EQ(close(fd1), 0); EXPECT_EQ(close(fd2), 0); EXPECT_EQ(close(fd3), 0); + EXPECT_EQ(close(fd4), 0); } /* diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh index b583a2fb45..a8b1dbc0a3 100755 --- a/tools/testing/selftests/cpufreq/cpufreq.sh +++ b/tools/testing/selftests/cpufreq/cpufreq.sh @@ -178,8 +178,7 @@ cpufreq_basic_tests() count=$(count_cpufreq_managed_cpus) if [ $count = 0 ]; then - printf "No cpu is managed by cpufreq core, exiting\n" - exit; + ktap_exit_fail_msg "No cpu is managed by cpufreq core, exiting\n" else printf "CPUFreq manages: $count CPUs\n\n" fi diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh index 60ce18ed06..a0eb84cf71 100755 --- a/tools/testing/selftests/cpufreq/main.sh +++ b/tools/testing/selftests/cpufreq/main.sh @@ -7,15 +7,15 @@ source governor.sh source module.sh source special-tests.sh +DIR="$(dirname $(readlink -f "$0"))" +source "${DIR}"/../kselftest/ktap_helpers.sh + FUNC=basic # do basic tests by default OUTFILE=cpufreq_selftest SYSFS= CPUROOT= CPUFREQROOT= -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - helpme() { printf "Usage: $0 [-h] [-todg args] @@ -32,7 +32,7 @@ helpme() [-d <driver's module name: only with \"-t modtest>\"] [-g <governor's module name: only with \"-t modtest>\"] \n" - exit 2 + exit "${KSFT_FAIL}" } prerequisite() @@ -40,8 +40,8 @@ prerequisite() msg="skip all tests:" if [ $UID != 0 ]; then - echo $msg must be run as root >&2 - exit $ksft_skip + ktap_skip_all "$msg must be run as root" + exit "${KSFT_SKIP}" fi taskset -p 01 $$ @@ -49,21 +49,21 @@ prerequisite() SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` if [ ! -d "$SYSFS" ]; then - echo $msg sysfs is not mounted >&2 - exit 2 + ktap_skip_all "$msg sysfs is not mounted" + exit "${KSFT_SKIP}" fi CPUROOT=$SYSFS/devices/system/cpu CPUFREQROOT="$CPUROOT/cpufreq" if ! ls $CPUROOT/cpu* > /dev/null 2>&1; then - echo $msg cpus not available in sysfs >&2 - exit 2 + ktap_skip_all "$msg cpus not available in sysfs" + exit "${KSFT_SKIP}" fi if ! ls $CPUROOT/cpufreq > /dev/null 2>&1; then - echo $msg cpufreq directory not available in sysfs >&2 - exit 2 + ktap_skip_all "$msg cpufreq directory not available in sysfs" + exit "${KSFT_SKIP}" fi } @@ -105,8 +105,7 @@ do_test() count=$(count_cpufreq_managed_cpus) if [ $count = 0 -a $FUNC != "modtest" ]; then - echo "No cpu is managed by cpufreq core, exiting" - exit 2; + ktap_exit_fail_msg "No cpu is managed by cpufreq core, exiting" fi case "$FUNC" in @@ -125,8 +124,7 @@ do_test() "modtest") # Do we have modules in place? if [ -z $DRIVER_MOD ] && [ -z $GOVERNOR_MOD ]; then - echo "No driver or governor module passed with -d or -g" - exit 2; + ktap_exit_fail_msg "No driver or governor module passed with -d or -g" fi if [ $DRIVER_MOD ]; then @@ -137,8 +135,7 @@ do_test() fi else if [ $count = 0 ]; then - echo "No cpu is managed by cpufreq core, exiting" - exit 2; + ktap_exit_fail_msg "No cpu is managed by cpufreq core, exiting" fi module_governor_test $GOVERNOR_MOD @@ -162,7 +159,7 @@ do_test() ;; *) - echo "Invalid [-f] function type" + ktap_print_msg "Invalid [-f] function type" helpme ;; esac @@ -186,13 +183,25 @@ dmesg_dumps() dmesg >> $1.dmesg_full.txt } +ktap_print_header + # Parse arguments parse_arguments $@ +ktap_set_plan 1 + # Make sure all requirements are met prerequisite # Run requested functions clear_dumps $OUTFILE do_test | tee -a $OUTFILE.txt +if [ "${PIPESTATUS[0]}" -ne 0 ]; then + exit ${PIPESTATUS[0]}; +fi dmesg_dumps $OUTFILE + +ktap_test_pass "Completed successfully" + +ktap_print_totals +exit "${KSFT_PASS}" diff --git a/tools/testing/selftests/cpufreq/module.sh b/tools/testing/selftests/cpufreq/module.sh index 22563cd122..7f2667e0ae 100755 --- a/tools/testing/selftests/cpufreq/module.sh +++ b/tools/testing/selftests/cpufreq/module.sh @@ -24,16 +24,14 @@ test_basic_insmod_rmmod() # insert module insmod $1 if [ $? != 0 ]; then - printf "Insmod $1 failed\n" - exit; + ktap_exit_fail_msg "Insmod $1 failed\n" fi printf "Removing $1 module\n" # remove module rmmod $1 if [ $? != 0 ]; then - printf "rmmod $1 failed\n" - exit; + ktap_exit_fail_msg "rmmod $1 failed\n" fi printf "\n" diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 789d6949c2..29a22f50e7 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -7,16 +7,21 @@ TEST_GEN_FILES += debugfs_target_ids_pid_leak TEST_GEN_FILES += access_memory TEST_FILES = _chk_dependency.sh _debugfs_common.sh + +# functionality tests TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh +TEST_PROGS += sysfs.sh +TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py +TEST_PROGS += damos_quota.py damos_quota_goal.py damos_apply_interval.py +TEST_PROGS += reclaim.sh lru_sort.sh + +# regression tests (reproducers of previously found bugs) TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh TEST_PROGS += debugfs_duplicate_context_creation.sh TEST_PROGS += debugfs_rm_non_contexts.sh TEST_PROGS += debugfs_target_ids_read_before_terminate_race.sh TEST_PROGS += debugfs_target_ids_pid_leak.sh -TEST_PROGS += sysfs.sh sysfs_update_removed_scheme_dir.sh +TEST_PROGS += sysfs_update_removed_scheme_dir.sh TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py -TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py -TEST_PROGS += damos_quota.py damos_apply_interval.py -TEST_PROGS += reclaim.sh lru_sort.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py index fe77d7e73a..2bd44c32be 100644 --- a/tools/testing/selftests/damon/_damon_sysfs.py +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -2,7 +2,18 @@ import os -sysfs_root = '/sys/kernel/mm/damon/admin' +ksft_skip=4 + +sysfs_root = None +with open('/proc/mounts', 'r') as f: + for line in f: + dev_name, mount_point, dev_fs = line.split()[:3] + if dev_fs == 'sysfs': + sysfs_root = '%s/kernel/mm/damon/admin' % mount_point + break +if sysfs_root is None: + print('Seems sysfs not mounted?') + exit(ksft_skip) def write_file(path, string): "Returns error string if failed, or None otherwise" @@ -34,11 +45,11 @@ class DamosAccessPattern: self.nr_accesses = nr_accesses self.age = age - if self.size == None: + if self.size is None: self.size = [0, 2**64 - 1] - if self.nr_accesses == None: + if self.nr_accesses is None: self.nr_accesses = [0, 2**64 - 1] - if self.age == None: + if self.age is None: self.age = [0, 2**64 - 1] def sysfs_dir(self): @@ -47,55 +58,109 @@ class DamosAccessPattern: def stage(self): err = write_file( os.path.join(self.sysfs_dir(), 'sz', 'min'), self.size[0]) - if err != None: + if err is not None: return err err = write_file( os.path.join(self.sysfs_dir(), 'sz', 'max'), self.size[1]) - if err != None: + if err is not None: return err err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'min'), self.nr_accesses[0]) - if err != None: + if err is not None: return err err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'max'), self.nr_accesses[1]) - if err != None: + if err is not None: return err err = write_file( os.path.join(self.sysfs_dir(), 'age', 'min'), self.age[0]) - if err != None: + if err is not None: return err err = write_file( os.path.join(self.sysfs_dir(), 'age', 'max'), self.age[1]) - if err != None: + if err is not None: return err +qgoal_metric_user_input = 'user_input' +qgoal_metric_some_mem_psi_us = 'some_mem_psi_us' +qgoal_metrics = [qgoal_metric_user_input, qgoal_metric_some_mem_psi_us] + +class DamosQuotaGoal: + metric = None + target_value = None + current_value = None + effective_bytes = None + quota = None # owner quota + idx = None + + def __init__(self, metric, target_value=10000, current_value=0): + self.metric = metric + self.target_value = target_value + self.current_value = current_value + + def sysfs_dir(self): + return os.path.join(self.quota.sysfs_dir(), 'goals', '%d' % self.idx) + + def stage(self): + err = write_file(os.path.join(self.sysfs_dir(), 'target_metric'), + self.metric) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'target_value'), + self.target_value) + if err is not None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'current_value'), + self.current_value) + if err is not None: + return err + return None + class DamosQuota: sz = None # size quota, in bytes ms = None # time quota + goals = None # quota goals reset_interval_ms = None # quota reset interval scheme = None # owner scheme - def __init__(self, sz=0, ms=0, reset_interval_ms=0): + def __init__(self, sz=0, ms=0, goals=None, reset_interval_ms=0): self.sz = sz self.ms = ms self.reset_interval_ms = reset_interval_ms + self.goals = goals if goals is not None else [] + for idx, goal in enumerate(self.goals): + goal.idx = idx + goal.quota = self def sysfs_dir(self): return os.path.join(self.scheme.sysfs_dir(), 'quotas') def stage(self): err = write_file(os.path.join(self.sysfs_dir(), 'bytes'), self.sz) - if err != None: + if err is not None: return err err = write_file(os.path.join(self.sysfs_dir(), 'ms'), self.ms) - if err != None: + if err is not None: return err err = write_file(os.path.join(self.sysfs_dir(), 'reset_interval_ms'), self.reset_interval_ms) - if err != None: + if err is not None: return err + nr_goals_file = os.path.join(self.sysfs_dir(), 'goals', 'nr_goals') + content, err = read_file(nr_goals_file) + if err is not None: + return err + if int(content) != len(self.goals): + err = write_file(nr_goals_file, len(self.goals)) + if err is not None: + return err + for goal in self.goals: + err = goal.stage() + if err is not None: + return err + return None + class DamosStats: nr_tried = None sz_tried = None @@ -136,30 +201,30 @@ class Damos: def stage(self): err = write_file(os.path.join(self.sysfs_dir(), 'action'), self.action) - if err != None: + if err is not None: return err err = self.access_pattern.stage() - if err != None: + if err is not None: return err err = write_file(os.path.join(self.sysfs_dir(), 'apply_interval_us'), '%d' % self.apply_interval_us) - if err != None: + if err is not None: return err err = self.quota.stage() - if err != None: + if err is not None: return err # disable watermarks err = write_file( os.path.join(self.sysfs_dir(), 'watermarks', 'metric'), 'none') - if err != None: + if err is not None: return err # disable filters err = write_file( os.path.join(self.sysfs_dir(), 'filters', 'nr_filters'), '0') - if err != None: + if err is not None: return err class DamonTarget: @@ -178,7 +243,7 @@ class DamonTarget: def stage(self): err = write_file( os.path.join(self.sysfs_dir(), 'regions', 'nr_regions'), '0') - if err != None: + if err is not None: return err return write_file( os.path.join(self.sysfs_dir(), 'pid_target'), self.pid) @@ -210,27 +275,27 @@ class DamonAttrs: def stage(self): err = write_file(os.path.join(self.interval_sysfs_dir(), 'sample_us'), self.sample_us) - if err != None: + if err is not None: return err err = write_file(os.path.join(self.interval_sysfs_dir(), 'aggr_us'), self.aggr_us) - if err != None: + if err is not None: return err err = write_file(os.path.join(self.interval_sysfs_dir(), 'update_us'), self.update_us) - if err != None: + if err is not None: return err err = write_file( os.path.join(self.nr_regions_range_sysfs_dir(), 'min'), self.min_nr_regions) - if err != None: + if err is not None: return err err = write_file( os.path.join(self.nr_regions_range_sysfs_dir(), 'max'), self.max_nr_regions) - if err != None: + if err is not None: return err class DamonCtx: @@ -264,24 +329,24 @@ class DamonCtx: def stage(self): err = write_file( os.path.join(self.sysfs_dir(), 'operations'), self.ops) - if err != None: + if err is not None: return err err = self.monitoring_attrs.stage() - if err != None: + if err is not None: return err nr_targets_file = os.path.join( self.sysfs_dir(), 'targets', 'nr_targets') content, err = read_file(nr_targets_file) - if err != None: + if err is not None: return err if int(content) != len(self.targets): err = write_file(nr_targets_file, '%d' % len(self.targets)) - if err != None: + if err is not None: return err for target in self.targets: err = target.stage() - if err != None: + if err is not None: return err nr_schemes_file = os.path.join( @@ -291,11 +356,11 @@ class DamonCtx: return err if int(content) != len(self.schemes): err = write_file(nr_schemes_file, '%d' % len(self.schemes)) - if err != None: + if err is not None: return err for scheme in self.schemes: err = scheme.stage() - if err != None: + if err is not None: return err return None @@ -319,16 +384,16 @@ class Kdamond: nr_contexts_file = os.path.join(self.sysfs_dir(), 'contexts', 'nr_contexts') content, err = read_file(nr_contexts_file) - if err != None: + if err is not None: return err if int(content) != len(self.contexts): err = write_file(nr_contexts_file, '%d' % len(self.contexts)) - if err != None: + if err is not None: return err for context in self.contexts: err = context.stage() - if err != None: + if err is not None: return err err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on') return err @@ -336,20 +401,20 @@ class Kdamond: def update_schemes_tried_bytes(self): err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'update_schemes_tried_bytes') - if err != None: + if err is not None: return err for context in self.contexts: for scheme in context.schemes: content, err = read_file(os.path.join(scheme.sysfs_dir(), 'tried_regions', 'total_bytes')) - if err != None: + if err is not None: return err scheme.tried_bytes = int(content) def update_schemes_stats(self): err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'update_schemes_stats') - if err != None: + if err is not None: return err for context in self.contexts: for scheme in context.schemes: @@ -358,11 +423,39 @@ class Kdamond: 'sz_applied', 'qt_exceeds']: content, err = read_file( os.path.join(scheme.sysfs_dir(), 'stats', stat)) - if err != None: + if err is not None: return err stat_values.append(int(content)) scheme.stats = DamosStats(*stat_values) + def update_schemes_effective_quotas(self): + err = write_file(os.path.join(self.sysfs_dir(), 'state'), + 'update_schemes_effective_quotas') + if err is not None: + return err + for context in self.contexts: + for scheme in context.schemes: + for goal in scheme.quota.goals: + content, err = read_file( + os.path.join(scheme.quota.sysfs_dir(), + 'effective_bytes')) + if err is not None: + return err + goal.effective_bytes = int(content) + return None + + def commit_schemes_quota_goals(self): + for context in self.contexts: + for scheme in context.schemes: + for goal in scheme.quota.goals: + err = goal.stage() + if err is not None: + print('commit_schemes_quota_goals failed stagign: %s'% + err) + exit(1) + return write_file(os.path.join(self.sysfs_dir(), 'state'), + 'commit_schemes_quota_goals') + class Kdamonds: kdamonds = [] @@ -378,10 +471,10 @@ class Kdamonds: def start(self): err = write_file(os.path.join(self.sysfs_dir(), 'nr_kdamonds'), '%s' % len(self.kdamonds)) - if err != None: + if err is not None: return err for kdamond in self.kdamonds: err = kdamond.start() - if err != None: + if err is not None: return err return None diff --git a/tools/testing/selftests/damon/access_memory.c b/tools/testing/selftests/damon/access_memory.c index 585a2fa543..56b17e8fe1 100644 --- a/tools/testing/selftests/damon/access_memory.c +++ b/tools/testing/selftests/damon/access_memory.c @@ -35,7 +35,7 @@ int main(int argc, char *argv[]) start_clock = clock(); while ((clock() - start_clock) * 1000 / CLOCKS_PER_SEC < access_time_ms) - memset(regions[i], i, 1024 * 1024 * 10); + memset(regions[i], i, sz_region); } return 0; } diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py new file mode 100644 index 0000000000..18246f3b62 --- /dev/null +++ b/tools/testing/selftests/damon/damos_quota_goal.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +import time + +import _damon_sysfs + +def main(): + # access two 10 MiB memory regions, 2 second per each + sz_region = 10 * 1024 * 1024 + proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000']) + + goal = _damon_sysfs.DamosQuotaGoal( + metric=_damon_sysfs.qgoal_metric_user_input, target_value=10000) + kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + ops='vaddr', + targets=[_damon_sysfs.DamonTarget(pid=proc.pid)], + schemes=[_damon_sysfs.Damos( + action='stat', + quota=_damon_sysfs.DamosQuota( + goals=[goal], reset_interval_ms=100), + )] # schemes + )] # contexts + )]) # kdamonds + + err = kdamonds.start() + if err != None: + print('kdamond start failed: %s' % err) + exit(1) + + score_values_to_test = [0, 15000, 5000, 18000] + while proc.poll() == None: + if len(score_values_to_test) == 0: + time.sleep(0.1) + continue + + goal.current_value = score_values_to_test.pop(0) + expect_increase = goal.current_value < goal.target_value + + err = kdamonds.kdamonds[0].commit_schemes_quota_goals() + if err is not None: + print('commit_schemes_quota_goals failed: %s' % err) + exit(1) + + err = kdamonds.kdamonds[0].update_schemes_effective_quotas() + if err is not None: + print('before-update_schemes_effective_quotas failed: %s' % err) + exit(1) + last_effective_bytes = goal.effective_bytes + + time.sleep(0.5) + + err = kdamonds.kdamonds[0].update_schemes_effective_quotas() + if err is not None: + print('after-update_schemes_effective_quotas failed: %s' % err) + exit(1) + + print('score: %s, effective quota: %d -> %d (%.3fx)' % ( + goal.current_value, last_effective_bytes, goal.effective_bytes, + goal.effective_bytes / last_effective_bytes + if last_effective_bytes != 0 else -1.0)) + + if last_effective_bytes == goal.effective_bytes: + print('efective bytes not changed: %d' % goal.effective_bytes) + exit(1) + + increased = last_effective_bytes < goal.effective_bytes + if expect_increase != increased: + print('expectation of increase (%s) != increased (%s)' % + (expect_increase, increased)) + exit(1) + last_effective_bytes = goal.effective_bytes + +if __name__ == '__main__': + main() diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index 890a8236a8..5f54152236 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -15,6 +15,7 @@ #include <linux/dma-buf.h> #include <linux/dma-heap.h> #include <drm/drm.h> +#include "../kselftest.h" #define DEVPATH "/dev/dma_heap" @@ -90,14 +91,13 @@ static int dmabuf_heap_open(char *name) char buf[256]; ret = snprintf(buf, 256, "%s/%s", DEVPATH, name); - if (ret < 0) { - printf("snprintf failed!\n"); - return ret; - } + if (ret < 0) + ksft_exit_fail_msg("snprintf failed! %d\n", ret); fd = open(buf, O_RDWR); if (fd < 0) - printf("open %s failed!\n", buf); + ksft_exit_fail_msg("open %s failed: %s\n", buf, strerror(errno)); + return fd; } @@ -140,7 +140,7 @@ static int dmabuf_sync(int fd, int start_stop) #define ONE_MEG (1024 * 1024) -static int test_alloc_and_import(char *heap_name) +static void test_alloc_and_import(char *heap_name) { int heap_fd = -1, dmabuf_fd = -1, importer_fd = -1; uint32_t handle = 0; @@ -148,27 +148,19 @@ static int test_alloc_and_import(char *heap_name) int ret; heap_fd = dmabuf_heap_open(heap_name); - if (heap_fd < 0) - return -1; - printf(" Testing allocation and importing: "); + ksft_print_msg("Testing allocation and importing:\n"); ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0, &dmabuf_fd); if (ret) { - printf("FAIL (Allocation Failed!)\n"); - ret = -1; - goto out; + ksft_test_result_fail("FAIL (Allocation Failed!) %d\n", ret); + return; } + /* mmap and write a simple pattern */ - p = mmap(NULL, - ONE_MEG, - PROT_READ | PROT_WRITE, - MAP_SHARED, - dmabuf_fd, - 0); + p = mmap(NULL, ONE_MEG, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd, 0); if (p == MAP_FAILED) { - printf("FAIL (mmap() failed)\n"); - ret = -1; - goto out; + ksft_test_result_fail("FAIL (mmap() failed): %s\n", strerror(errno)); + goto close_and_return; } dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START); @@ -178,71 +170,64 @@ static int test_alloc_and_import(char *heap_name) importer_fd = open_vgem(); if (importer_fd < 0) { - ret = importer_fd; - printf("(Could not open vgem - skipping): "); + ksft_test_result_skip("Could not open vgem %d\n", importer_fd); } else { ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle); - if (ret < 0) { - printf("FAIL (Failed to import buffer)\n"); - goto out; - } + ksft_test_result(ret >= 0, "Import buffer %d\n", ret); } ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START); if (ret < 0) { - printf("FAIL (DMA_BUF_SYNC_START failed!)\n"); + ksft_print_msg("FAIL (DMA_BUF_SYNC_START failed!) %d\n", ret); goto out; } memset(p, 0xff, ONE_MEG); ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END); if (ret < 0) { - printf("FAIL (DMA_BUF_SYNC_END failed!)\n"); + ksft_print_msg("FAIL (DMA_BUF_SYNC_END failed!) %d\n", ret); goto out; } close_handle(importer_fd, handle); - ret = 0; - printf(" OK\n"); + ksft_test_result_pass("%s dmabuf sync succeeded\n", __func__); + return; + out: - if (p) - munmap(p, ONE_MEG); - if (importer_fd >= 0) - close(importer_fd); - if (dmabuf_fd >= 0) - close(dmabuf_fd); - if (heap_fd >= 0) - close(heap_fd); + ksft_test_result_fail("%s dmabuf sync failed\n", __func__); + munmap(p, ONE_MEG); + close(importer_fd); - return ret; +close_and_return: + close(dmabuf_fd); + close(heap_fd); } -static int test_alloc_zeroed(char *heap_name, size_t size) +static void test_alloc_zeroed(char *heap_name, size_t size) { int heap_fd = -1, dmabuf_fd[32]; - int i, j, ret; + int i, j, k, ret; void *p = NULL; char *c; - printf(" Testing alloced %ldk buffers are zeroed: ", size / 1024); + ksft_print_msg("Testing alloced %ldk buffers are zeroed:\n", size / 1024); heap_fd = dmabuf_heap_open(heap_name); - if (heap_fd < 0) - return -1; /* Allocate and fill a bunch of buffers */ for (i = 0; i < 32; i++) { ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]); - if (ret < 0) { - printf("FAIL (Allocation (%i) failed)\n", i); - goto out; + if (ret) { + ksft_test_result_fail("FAIL (Allocation (%i) failed) %d\n", i, ret); + goto close_and_return; } + /* mmap and fill with simple pattern */ p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0); if (p == MAP_FAILED) { - printf("FAIL (mmap() failed!)\n"); - ret = -1; - goto out; + ksft_test_result_fail("FAIL (mmap() failed!): %s\n", strerror(errno)); + goto close_and_return; } + dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START); memset(p, 0xff, size); dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END); @@ -251,48 +236,47 @@ static int test_alloc_zeroed(char *heap_name, size_t size) /* close them all */ for (i = 0; i < 32; i++) close(dmabuf_fd[i]); + ksft_test_result_pass("Allocate and fill a bunch of buffers\n"); /* Allocate and validate all buffers are zeroed */ for (i = 0; i < 32; i++) { ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]); if (ret < 0) { - printf("FAIL (Allocation (%i) failed)\n", i); - goto out; + ksft_test_result_fail("FAIL (Allocation (%i) failed) %d\n", i, ret); + goto close_and_return; } /* mmap and validate everything is zero */ p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0); if (p == MAP_FAILED) { - printf("FAIL (mmap() failed!)\n"); - ret = -1; - goto out; + ksft_test_result_fail("FAIL (mmap() failed!): %s\n", strerror(errno)); + goto close_and_return; } + dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START); c = (char *)p; for (j = 0; j < size; j++) { if (c[j] != 0) { - printf("FAIL (Allocated buffer not zeroed @ %i)\n", j); - break; + ksft_print_msg("FAIL (Allocated buffer not zeroed @ %i)\n", j); + dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END); + munmap(p, size); + goto out; } } dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END); munmap(p, size); } - /* close them all */ - for (i = 0; i < 32; i++) - close(dmabuf_fd[i]); - - close(heap_fd); - printf("OK\n"); - return 0; out: - while (i > 0) { - close(dmabuf_fd[i]); - i--; - } + ksft_test_result(i == 32, "Allocate and validate all buffers are zeroed\n"); + +close_and_return: + /* close them all */ + for (k = 0; k < i; k++) + close(dmabuf_fd[k]); + close(heap_fd); - return ret; + return; } /* Test the ioctl version compatibility w/ a smaller structure then expected */ @@ -360,126 +344,97 @@ static int dmabuf_heap_alloc_newer(int fd, size_t len, unsigned int flags, return ret; } -static int test_alloc_compat(char *heap_name) +static void test_alloc_compat(char *heap_name) { - int heap_fd = -1, dmabuf_fd = -1; - int ret; + int ret, heap_fd = -1, dmabuf_fd = -1; heap_fd = dmabuf_heap_open(heap_name); - if (heap_fd < 0) - return -1; - printf(" Testing (theoretical)older alloc compat: "); + ksft_print_msg("Testing (theoretical) older alloc compat:\n"); ret = dmabuf_heap_alloc_older(heap_fd, ONE_MEG, 0, &dmabuf_fd); - if (ret) { - printf("FAIL (Older compat allocation failed!)\n"); - ret = -1; - goto out; - } - close(dmabuf_fd); - printf("OK\n"); + if (dmabuf_fd >= 0) + close(dmabuf_fd); + ksft_test_result(!ret, "dmabuf_heap_alloc_older\n"); - printf(" Testing (theoretical)newer alloc compat: "); + ksft_print_msg("Testing (theoretical) newer alloc compat:\n"); ret = dmabuf_heap_alloc_newer(heap_fd, ONE_MEG, 0, &dmabuf_fd); - if (ret) { - printf("FAIL (Newer compat allocation failed!)\n"); - ret = -1; - goto out; - } - printf("OK\n"); -out: if (dmabuf_fd >= 0) close(dmabuf_fd); - if (heap_fd >= 0) - close(heap_fd); + ksft_test_result(!ret, "dmabuf_heap_alloc_newer\n"); - return ret; + close(heap_fd); } -static int test_alloc_errors(char *heap_name) +static void test_alloc_errors(char *heap_name) { int heap_fd = -1, dmabuf_fd = -1; int ret; heap_fd = dmabuf_heap_open(heap_name); - if (heap_fd < 0) - return -1; - printf(" Testing expected error cases: "); + ksft_print_msg("Testing expected error cases:\n"); ret = dmabuf_heap_alloc(0, ONE_MEG, 0x111111, &dmabuf_fd); - if (!ret) { - printf("FAIL (Did not see expected error (invalid fd)!)\n"); - ret = -1; - goto out; - } + ksft_test_result(ret, "Error expected on invalid fd %d\n", ret); ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0x111111, &dmabuf_fd); - if (!ret) { - printf("FAIL (Did not see expected error (invalid heap flags)!)\n"); - ret = -1; - goto out; - } + ksft_test_result(ret, "Error expected on invalid heap flags %d\n", ret); ret = dmabuf_heap_alloc_fdflags(heap_fd, ONE_MEG, ~(O_RDWR | O_CLOEXEC), 0, &dmabuf_fd); - if (!ret) { - printf("FAIL (Did not see expected error (invalid fd flags)!)\n"); - ret = -1; - goto out; - } + ksft_test_result(ret, "Error expected on invalid heap flags %d\n", ret); - printf("OK\n"); - ret = 0; -out: if (dmabuf_fd >= 0) close(dmabuf_fd); - if (heap_fd >= 0) - close(heap_fd); + close(heap_fd); +} - return ret; +static int numer_of_heaps(void) +{ + DIR *d = opendir(DEVPATH); + struct dirent *dir; + int heaps = 0; + + while ((dir = readdir(d))) { + if (!strncmp(dir->d_name, ".", 2)) + continue; + if (!strncmp(dir->d_name, "..", 3)) + continue; + heaps++; + } + + return heaps; } int main(void) { - DIR *d; struct dirent *dir; - int ret = -1; + DIR *d; + + ksft_print_header(); d = opendir(DEVPATH); if (!d) { - printf("No %s directory?\n", DEVPATH); - return -1; + ksft_print_msg("No %s directory?\n", DEVPATH); + return KSFT_SKIP; } - while ((dir = readdir(d)) != NULL) { + ksft_set_plan(11 * numer_of_heaps()); + + while ((dir = readdir(d))) { if (!strncmp(dir->d_name, ".", 2)) continue; if (!strncmp(dir->d_name, "..", 3)) continue; - printf("Testing heap: %s\n", dir->d_name); - printf("=======================================\n"); - ret = test_alloc_and_import(dir->d_name); - if (ret) - break; - - ret = test_alloc_zeroed(dir->d_name, 4 * 1024); - if (ret) - break; - - ret = test_alloc_zeroed(dir->d_name, ONE_MEG); - if (ret) - break; - - ret = test_alloc_compat(dir->d_name); - if (ret) - break; - - ret = test_alloc_errors(dir->d_name); - if (ret) - break; + ksft_print_msg("Testing heap: %s\n", dir->d_name); + ksft_print_msg("=======================================\n"); + test_alloc_and_import(dir->d_name); + test_alloc_zeroed(dir->d_name, 4 * 1024); + test_alloc_zeroed(dir->d_name, ONE_MEG); + test_alloc_compat(dir->d_name); + test_alloc_errors(dir->d_name); } closedir(d); - return ret; + ksft_finished(); } diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile new file mode 100644 index 0000000000..e54f382bcb --- /dev/null +++ b/tools/testing/selftests/drivers/net/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_INCLUDES := $(wildcard lib/py/*.py) + +TEST_PROGS := \ + ping.py \ + queues.py \ + stats.py \ +# end of TEST_PROGS + +include ../../lib.mk diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst new file mode 100644 index 0000000000..3b6a29e656 --- /dev/null +++ b/tools/testing/selftests/drivers/net/README.rst @@ -0,0 +1,136 @@ +.. SPDX-License-Identifier: GPL-2.0 + +Running driver tests +==================== + +Networking driver tests are executed within kselftest framework like any +other tests. They support testing both real device drivers and emulated / +software drivers (latter mostly to test the core parts of the stack). + +SW mode +~~~~~~~ + +By default, when no extra parameters are set or exported, tests execute +against software drivers such as netdevsim. No extra preparation is required +the software devices are created and destroyed as part of the test. +In this mode the tests are indistinguishable from other selftests and +(for example) can be run under ``virtme-ng`` like the core networking selftests. + +HW mode +~~~~~~~ + +Executing tests against a real device requires external preparation. +The netdevice against which tests will be run must exist, be running +(in UP state) and be configured with an IP address. + +Refer to list of :ref:`Variables` later in this file to set up running +the tests against a real device. + +Both modes required +~~~~~~~~~~~~~~~~~~~ + +All tests in drivers/net must support running both against a software device +and a real device. SW-only tests should instead be placed in net/ or +drivers/net/netdevsim, HW-only tests in drivers/net/hw. + +Variables +========= + +The variables can be set in the environment or by creating a net.config +file in the same directory as this README file. Example:: + + $ NETIF=eth0 ./some_test.sh + +or:: + + $ cat tools/testing/selftests/drivers/net/net.config + # Variable set in a file + NETIF=eth0 + +Local test (which don't require endpoint for sending / receiving traffic) +need only the ``NETIF`` variable. Remaining variables define the endpoint +and communication method. + +NETIF +~~~~~ + +Name of the netdevice against which the test should be executed. +When empty or not set software devices will be used. + +LOCAL_V4, LOCAL_V6, REMOTE_V4, REMOTE_V6 +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Local and remote endpoint IP addresses. + +REMOTE_TYPE +~~~~~~~~~~~ + +Communication method used to run commands on the remote endpoint. +Test framework has built-in support for ``netns`` and ``ssh`` channels. +``netns`` assumes the "remote" interface is part of the same +host, just moved to the specified netns. +``ssh`` communicates with remote endpoint over ``ssh`` and ``scp``. +Using persistent SSH connections is strongly encouraged to avoid +the latency of SSH connection setup on every command. + +Communication methods are defined by classes in ``lib/py/remote_{name}.py``. +It should be possible to add a new method without modifying any of +the framework, by simply adding an appropriately named file to ``lib/py``. + +REMOTE_ARGS +~~~~~~~~~~~ + +Arguments used to construct the communication channel. +Communication channel dependent:: + + for netns - name of the "remote" namespace + for ssh - name/address of the remote host + +Example +======= + +Build the selftests:: + + # make -C tools/testing/selftests/ TARGETS="drivers/net drivers/net/hw" + +"Install" the tests and copy them over to the target machine:: + + # make -C tools/testing/selftests/ TARGETS="drivers/net drivers/net/hw" \ + install INSTALL_PATH=/tmp/ksft-net-drv + + # rsync -ra --delete /tmp/ksft-net-drv root@192.168.1.1:/root/ + +On the target machine, running the tests will use netdevsim by default:: + + [/root] # ./ksft-net-drv/run_kselftest.sh -t drivers/net:ping.py + TAP version 13 + 1..1 + # timeout set to 45 + # selftests: drivers/net: ping.py + # KTAP version 1 + # 1..3 + # ok 1 ping.test_v4 + # ok 2 ping.test_v6 + # ok 3 ping.test_tcp + # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0 + ok 1 selftests: drivers/net: ping.py + +Create a config with remote info:: + + [/root] # cat > ./ksft-net-drv/drivers/net/net.config <<EOF + NETIF=eth0 + LOCAL_V4=192.168.1.1 + REMOTE_V4=192.168.1.2 + REMOTE_TYPE=ssh + REMOTE_ARGS=root@192.168.1.2 + EOF + +Run the test:: + + [/root] # ./ksft-net-drv/drivers/net/ping.py + KTAP version 1 + 1..3 + ok 1 ping.test_v4 + ok 2 ping.test_v6 # SKIP Test requires IPv6 connectivity + ok 3 ping.test_tcp + # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:1 error:0 diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config new file mode 100644 index 0000000000..f6a58ce8a2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/config @@ -0,0 +1,2 @@ +CONFIG_IPV6=y +CONFIG_NETDEVSIM=m diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile new file mode 100644 index 0000000000..4933d045ab --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT + +TEST_PROGS = \ + csum.py \ + devlink_port_split.py \ + ethtool.sh \ + ethtool_extended_state.sh \ + ethtool_mm.sh \ + ethtool_rmon.sh \ + hw_stats_l3.sh \ + hw_stats_l3_gre.sh \ + loopback.sh \ + pp_alloc_fail.py \ + # + +TEST_FILES := \ + ethtool_lib.sh \ + # + +TEST_INCLUDES := \ + $(wildcard lib/py/*.py ../lib/py/*.py) \ + ../../../net/lib.sh \ + ../../../net/forwarding/lib.sh \ + ../../../net/forwarding/ipip_lib.sh \ + ../../../net/forwarding/tc_common.sh \ + # + +include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py new file mode 100755 index 0000000000..cb40497fae --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/csum.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Run the tools/testing/selftests/net/csum testsuite.""" + +from os import path + +from lib.py import ksft_run, ksft_exit, KsftSkipEx +from lib.py import EthtoolFamily, NetDrvEpEnv +from lib.py import bkg, cmd, wait_port_listen + +def test_receive(cfg, ipv4=False, extra_args=None): + """Test local nic checksum receive. Remote host sends crafted packets.""" + if not cfg.have_rx_csum: + raise KsftSkipEx(f"Test requires rx checksum offload on {cfg.ifname}") + + if ipv4: + ip_args = f"-4 -S {cfg.remote_v4} -D {cfg.v4}" + else: + ip_args = f"-6 -S {cfg.remote_v6} -D {cfg.v6}" + + rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}" + tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}" + + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(34000, proto="udp") + cmd(tx_cmd, host=cfg.remote) + + +def test_transmit(cfg, ipv4=False, extra_args=None): + """Test local nic checksum transmit. Remote host verifies packets.""" + if (not cfg.have_tx_csum_generic and + not (cfg.have_tx_csum_ipv4 and ipv4) and + not (cfg.have_tx_csum_ipv6 and not ipv4)): + raise KsftSkipEx(f"Test requires tx checksum offload on {cfg.ifname}") + + if ipv4: + ip_args = f"-4 -S {cfg.v4} -D {cfg.remote_v4}" + else: + ip_args = f"-6 -S {cfg.v6} -D {cfg.remote_v6}" + + # Cannot randomize input when calculating zero checksum + if extra_args != "-U -Z": + extra_args += " -r 1" + + rx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -L 1 -n 100 {ip_args} -R {extra_args}" + tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}" + + with bkg(rx_cmd, host=cfg.remote, exit_wait=True): + wait_port_listen(34000, proto="udp", host=cfg.remote) + cmd(tx_cmd) + + +def test_builder(name, cfg, ipv4=False, tx=False, extra_args=""): + """Construct specific tests from the common template. + + Most tests follow the same basic pattern, differing only in + Direction of the test and optional flags passed to csum.""" + def f(cfg): + if ipv4: + cfg.require_v4() + else: + cfg.require_v6() + + if tx: + test_transmit(cfg, ipv4, extra_args) + else: + test_receive(cfg, ipv4, extra_args) + + if ipv4: + f.__name__ = "ipv4_" + name + else: + f.__name__ = "ipv6_" + name + return f + + +def check_nic_features(cfg) -> None: + """Test whether Tx and Rx checksum offload are enabled. + + If the device under test has either off, then skip the relevant tests.""" + cfg.have_tx_csum_generic = False + cfg.have_tx_csum_ipv4 = False + cfg.have_tx_csum_ipv6 = False + cfg.have_rx_csum = False + + ethnl = EthtoolFamily() + features = ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) + for f in features["active"]["bits"]["bit"]: + if f["name"] == "tx-checksum-ip-generic": + cfg.have_tx_csum_generic = True + elif f["name"] == "tx-checksum-ipv4": + cfg.have_tx_csum_ipv4 = True + elif f["name"] == "tx-checksum-ipv6": + cfg.have_tx_csum_ipv6 = True + elif f["name"] == "rx-checksum": + cfg.have_rx_csum = True + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + check_nic_features(cfg) + + cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../net/lib/csum") + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + cases = [] + for ipv4 in [True, False]: + cases.append(test_builder("rx_tcp", cfg, ipv4, False, "-t")) + cases.append(test_builder("rx_tcp_invalid", cfg, ipv4, False, "-t -E")) + + cases.append(test_builder("rx_udp", cfg, ipv4, False, "")) + cases.append(test_builder("rx_udp_invalid", cfg, ipv4, False, "-E")) + + cases.append(test_builder("tx_udp_csum_offload", cfg, ipv4, True, "-U")) + cases.append(test_builder("tx_udp_zero_checksum", cfg, ipv4, True, "-U -Z")) + + ksft_run(cases=cases, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py index 2d84c7a0be..2d84c7a0be 100755 --- a/tools/testing/selftests/net/devlink_port_split.py +++ b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/drivers/net/hw/ethtool.sh index aa2eafb7b2..fa6953de6b 100755 --- a/tools/testing/selftests/net/forwarding/ethtool.sh +++ b/tools/testing/selftests/drivers/net/hw/ethtool.sh @@ -10,7 +10,8 @@ ALL_TESTS=" different_speeds_autoneg_on " NUM_NETIFS=2 -source lib.sh +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh source ethtool_lib.sh h1_create() @@ -64,9 +65,8 @@ same_speeds_autoneg_off() setup_wait_dev_with_timeout $h1 setup_wait_dev_with_timeout $h2 ping_do $h1 192.0.2.2 - check_err $? "speed $speed autoneg off" - log_test "force of same speed autoneg off" - log_info "speed = $speed" + check_err $? "ping with speed $speed autoneg off" + log_test "force speed $speed on both ends" done ethtool -s $h2 autoneg on @@ -111,9 +111,8 @@ combination_of_neg_on_and_off() setup_wait_dev_with_timeout $h1 setup_wait_dev_with_timeout $h2 ping_do $h1 192.0.2.2 - check_err $? "h1-speed=$speed autoneg off, h2 autoneg on" - log_test "one side with autoneg off and another with autoneg on" - log_info "force speed = $speed" + check_err $? "ping with h1-speed=$speed autoneg off, h2 autoneg on" + log_test "force speed $speed vs. autoneg" done ethtool -s $h1 autoneg on @@ -206,10 +205,9 @@ advertise_subset_of_speeds() setup_wait_dev_with_timeout $h1 setup_wait_dev_with_timeout $h2 ping_do $h1 192.0.2.2 - check_err $? "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)" + check_err $? "ping with h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)" - log_test "advertise subset of speeds" - log_info "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise" + log_test "advertise $speed_1_to_advertise vs. $speed_2_to_advertise" done ethtool -s $h2 autoneg on @@ -286,8 +284,6 @@ different_speeds_autoneg_on() ethtool -s $h1 autoneg on } -skip_on_veth - trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh index 17f89c3b7c..a758444841 100755 --- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh +++ b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh @@ -8,7 +8,8 @@ ALL_TESTS=" " NUM_NETIFS=2 -source lib.sh +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh source ethtool_lib.sh TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms @@ -108,8 +109,6 @@ no_cable() ip link set dev $swp3 down } -skip_on_veth - setup_prepare tests_run diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh index b9bfb45085..b9bfb45085 100644 --- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh +++ b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh index 50d5bfb17e..c301e735c8 100755 --- a/tools/testing/selftests/net/forwarding/ethtool_mm.sh +++ b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh @@ -14,7 +14,8 @@ ALL_TESTS=" NUM_NETIFS=2 REQUIRE_MZ=no PREEMPTIBLE_PRIO=0 -source lib.sh +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh traffic_test() { diff --git a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh index e78776db85..8f60c1685a 100755 --- a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh +++ b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh @@ -7,7 +7,8 @@ ALL_TESTS=" " NUM_NETIFS=2 -source lib.sh +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh ETH_FCS_LEN=4 ETH_HLEN=$((6+6+2)) @@ -43,6 +44,7 @@ bucket_test() # Mausezahn does not include FCS bytes in its length - but the # histogram counters do len=$((len - ETH_FCS_LEN)) + len=$((len > 0 ? len : 0)) before=$(ethtool --json -S $iface --groups rmon | \ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh index 48584a5138..67fafefc80 100755 --- a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh +++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh @@ -48,7 +48,9 @@ ALL_TESTS=" test_double_enable " NUM_NETIFS=4 -source lib.sh +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh +source "$lib_dir"/../../../net/forwarding/tc_common.sh h1_create() { @@ -324,17 +326,9 @@ setup_wait used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info | jq '.[].info.l3_stats.used') -kind=$(ip -j -d link show dev $rp1 | - jq -r '.[].linkinfo.info_kind') -if [[ $used != true ]]; then - if [[ $kind == veth ]]; then - log_test_skip "l3_stats not offloaded on veth interface" - EXIT_STATUS=$ksft_skip - else - RET=1 log_test "l3_stats not offloaded" - fi -else - tests_run -fi +[[ $used = true ]] +check_err $? "hw_stats_info.used=$used" +log_test "l3_stats offloaded" +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh index 7594bbb490..a94d92e1ab 100755 --- a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh +++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh @@ -12,8 +12,10 @@ ALL_TESTS=" test_stats_tx " NUM_NETIFS=6 -source lib.sh -source ipip_lib.sh +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh +source "$lib_dir"/../../../net/forwarding/ipip_lib.sh +source "$lib_dir"/../../../net/forwarding/tc_common.sh setup_prepare() { @@ -99,8 +101,6 @@ test_stats_rx() test_stats g2a rx } -skip_on_veth - trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py new file mode 100644 index 0000000000..b582885786 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 + +import sys +from pathlib import Path + +KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve() + +try: + sys.path.append(KSFT_DIR.as_posix()) + from net.lib.py import * + from drivers.net.lib.py import * +except ModuleNotFoundError as e: + ksft_pr("Failed importing `net` library from kernel sources") + ksft_pr(str(e)) + ktap_result(True, comment="SKIP") + sys.exit(4) diff --git a/tools/testing/selftests/net/forwarding/loopback.sh b/tools/testing/selftests/drivers/net/hw/loopback.sh index 8f4057310b..5acc3ff820 100755 --- a/tools/testing/selftests/net/forwarding/loopback.sh +++ b/tools/testing/selftests/drivers/net/hw/loopback.sh @@ -6,8 +6,9 @@ ksft_skip=4 ALL_TESTS="loopback_test" NUM_NETIFS=2 -source tc_common.sh -source lib.sh +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/tc_common.sh +source "$lib_dir"/../../../net/forwarding/lib.sh h1_create() { diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py new file mode 100755 index 0000000000..026d98976c --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import time +import os +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import NetdevFamily, NlError +from lib.py import NetDrvEpEnv +from lib.py import cmd, tool, GenerateTraffic + + +def _write_fail_config(config): + for key, value in config.items(): + with open("/sys/kernel/debug/fail_function/" + key, "w") as fp: + fp.write(str(value) + "\n") + + +def _enable_pp_allocation_fail(): + if not os.path.exists("/sys/kernel/debug/fail_function"): + raise KsftSkipEx("Kernel built without function error injection (or DebugFS)") + + if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"): + with open("/sys/kernel/debug/fail_function/inject", "w") as fp: + fp.write("page_pool_alloc_pages\n") + + _write_fail_config({ + "verbose": 0, + "interval": 511, + "probability": 100, + "times": -1, + }) + + +def _disable_pp_allocation_fail(): + if not os.path.exists("/sys/kernel/debug/fail_function"): + return + + if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"): + with open("/sys/kernel/debug/fail_function/inject", "w") as fp: + fp.write("\n") + + _write_fail_config({ + "probability": 0, + "times": 0, + }) + + +def test_pp_alloc(cfg, netdevnl): + def get_stats(): + return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + def check_traffic_flowing(): + stat1 = get_stats() + time.sleep(1) + stat2 = get_stats() + if stat2['rx-packets'] - stat1['rx-packets'] < 15000: + raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets']) + + + try: + stats = get_stats() + except NlError as e: + if e.nl_msg.error == -95: + stats = {} + else: + raise + if 'rx-alloc-fail' not in stats: + raise KsftSkipEx("Driver does not report 'rx-alloc-fail' via qstats") + + set_g = False + traffic = None + try: + traffic = GenerateTraffic(cfg) + + check_traffic_flowing() + + _enable_pp_allocation_fail() + + s1 = get_stats() + time.sleep(3) + s2 = get_stats() + + if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 1: + raise KsftSkipEx("Allocation failures not increasing") + if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 100: + raise KsftSkipEx("Allocation increasing too slowly", s2['rx-alloc-fail'] - s1['rx-alloc-fail'], + "packets:", s2['rx-packets'] - s1['rx-packets']) + + # Basic failures are fine, try to wobble some settings to catch extra failures + check_traffic_flowing() + g = tool("ethtool", "-g " + cfg.ifname, json=True)[0] + if 'rx' in g and g["rx"] * 2 <= g["rx-max"]: + new_g = g['rx'] * 2 + elif 'rx' in g: + new_g = g['rx'] // 2 + else: + new_g = None + + if new_g: + set_g = cmd(f"ethtool -G {cfg.ifname} rx {new_g}", fail=False).ret == 0 + if set_g: + ksft_pr("ethtool -G change retval: success") + else: + ksft_pr("ethtool -G change retval: did not succeed", new_g) + else: + ksft_pr("ethtool -G change retval: did not try") + + time.sleep(0.1) + check_traffic_flowing() + finally: + _disable_pp_allocation_fail() + if traffic: + traffic.stop() + time.sleep(0.1) + if set_g: + cmd(f"ethtool -G {cfg.ifname} rx {g['rx']}") + + +def main() -> None: + netdevnl = NetdevFamily() + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + + ksft_run([test_pp_alloc], args=(cfg, netdevnl, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/settings b/tools/testing/selftests/drivers/net/hw/settings new file mode 100644 index 0000000000..e7b9417537 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py new file mode 100644 index 0000000000..401e70f7f1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 + +import sys +from pathlib import Path + +KSFT_DIR = (Path(__file__).parent / "../../../..").resolve() + +try: + sys.path.append(KSFT_DIR.as_posix()) + from net.lib.py import * +except ModuleNotFoundError as e: + ksft_pr("Failed importing `net` library from kernel sources") + ksft_pr(str(e)) + ktap_result(True, comment="SKIP") + sys.exit(4) + +from .env import * +from .load import * +from .remote import Remote diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py new file mode 100644 index 0000000000..edcedd7bff --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -0,0 +1,224 @@ +# SPDX-License-Identifier: GPL-2.0 + +import os +from pathlib import Path +from lib.py import KsftSkipEx, KsftXfailEx +from lib.py import cmd, ip +from lib.py import NetNS, NetdevSimDev +from .remote import Remote + + +def _load_env_file(src_path): + env = os.environ.copy() + + src_dir = Path(src_path).parent.resolve() + if not (src_dir / "net.config").exists(): + return env + + with open((src_dir / "net.config").as_posix(), 'r') as fp: + for line in fp.readlines(): + full_file = line + # Strip comments + pos = line.find("#") + if pos >= 0: + line = line[:pos] + line = line.strip() + if not line: + continue + pair = line.split('=', maxsplit=1) + if len(pair) != 2: + raise Exception("Can't parse configuration line:", full_file) + env[pair[0]] = pair[1] + return env + + +class NetDrvEnv: + """ + Class for a single NIC / host env, with no remote end + """ + def __init__(self, src_path, **kwargs): + self._ns = None + + self.env = _load_env_file(src_path) + + if 'NETIF' in self.env: + self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0] + else: + self._ns = NetdevSimDev(**kwargs) + self.dev = self._ns.nsims[0].dev + self.ifindex = self.dev['ifindex'] + + def __enter__(self): + ip(f"link set dev {self.dev['ifname']} up") + + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + """ + __exit__ gets called at the end of a "with" block. + """ + self.__del__() + + def __del__(self): + if self._ns: + self._ns.remove() + self._ns = None + + +class NetDrvEpEnv: + """ + Class for an environment with a local device and "remote endpoint" + which can be used to send traffic in. + + For local testing it creates two network namespaces and a pair + of netdevsim devices. + """ + + # Network prefixes used for local tests + nsim_v4_pfx = "192.0.2." + nsim_v6_pfx = "2001:db8::" + + def __init__(self, src_path, nsim_test=None): + + self.env = _load_env_file(src_path) + + # Things we try to destroy + self.remote = None + # These are for local testing state + self._netns = None + self._ns = None + self._ns_peer = None + + if "NETIF" in self.env: + if nsim_test is True: + raise KsftXfailEx("Test only works on netdevsim") + self._check_env() + + self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0] + + self.v4 = self.env.get("LOCAL_V4") + self.v6 = self.env.get("LOCAL_V6") + self.remote_v4 = self.env.get("REMOTE_V4") + self.remote_v6 = self.env.get("REMOTE_V6") + kind = self.env["REMOTE_TYPE"] + args = self.env["REMOTE_ARGS"] + else: + if nsim_test is False: + raise KsftXfailEx("Test does not work on netdevsim") + + self.create_local() + + self.dev = self._ns.nsims[0].dev + + self.v4 = self.nsim_v4_pfx + "1" + self.v6 = self.nsim_v6_pfx + "1" + self.remote_v4 = self.nsim_v4_pfx + "2" + self.remote_v6 = self.nsim_v6_pfx + "2" + kind = "netns" + args = self._netns.name + + self.remote = Remote(kind, args, src_path) + + self.addr = self.v6 if self.v6 else self.v4 + self.remote_addr = self.remote_v6 if self.remote_v6 else self.remote_v4 + + self.addr_ipver = "6" if self.v6 else "4" + # Bracketed addresses, some commands need IPv6 to be inside [] + self.baddr = f"[{self.v6}]" if self.v6 else self.v4 + self.remote_baddr = f"[{self.remote_v6}]" if self.remote_v6 else self.remote_v4 + + self.ifname = self.dev['ifname'] + self.ifindex = self.dev['ifindex'] + + self._required_cmd = {} + + def create_local(self): + self._netns = NetNS() + self._ns = NetdevSimDev() + self._ns_peer = NetdevSimDev(ns=self._netns) + + with open("/proc/self/ns/net") as nsfd0, \ + open("/var/run/netns/" + self._netns.name) as nsfd1: + ifi0 = self._ns.nsims[0].ifindex + ifi1 = self._ns_peer.nsims[0].ifindex + NetdevSimDev.ctrl_write('link_device', + f'{nsfd0.fileno()}:{ifi0} {nsfd1.fileno()}:{ifi1}') + + ip(f" addr add dev {self._ns.nsims[0].ifname} {self.nsim_v4_pfx}1/24") + ip(f"-6 addr add dev {self._ns.nsims[0].ifname} {self.nsim_v6_pfx}1/64 nodad") + ip(f" link set dev {self._ns.nsims[0].ifname} up") + + ip(f" addr add dev {self._ns_peer.nsims[0].ifname} {self.nsim_v4_pfx}2/24", ns=self._netns) + ip(f"-6 addr add dev {self._ns_peer.nsims[0].ifname} {self.nsim_v6_pfx}2/64 nodad", ns=self._netns) + ip(f" link set dev {self._ns_peer.nsims[0].ifname} up", ns=self._netns) + + def _check_env(self): + vars_needed = [ + ["LOCAL_V4", "LOCAL_V6"], + ["REMOTE_V4", "REMOTE_V6"], + ["REMOTE_TYPE"], + ["REMOTE_ARGS"] + ] + missing = [] + + for choice in vars_needed: + for entry in choice: + if entry in self.env: + break + else: + missing.append(choice) + # Make sure v4 / v6 configs are symmetric + if ("LOCAL_V6" in self.env) != ("REMOTE_V6" in self.env): + missing.append(["LOCAL_V6", "REMOTE_V6"]) + if ("LOCAL_V4" in self.env) != ("REMOTE_V4" in self.env): + missing.append(["LOCAL_V4", "REMOTE_V4"]) + if missing: + raise Exception("Invalid environment, missing configuration:", missing, + "Please see tools/testing/selftests/drivers/net/README.rst") + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + """ + __exit__ gets called at the end of a "with" block. + """ + self.__del__() + + def __del__(self): + if self._ns: + self._ns.remove() + self._ns = None + if self._ns_peer: + self._ns_peer.remove() + self._ns_peer = None + if self._netns: + del self._netns + self._netns = None + if self.remote: + del self.remote + self.remote = None + + def require_v4(self): + if not self.v4 or not self.remote_v4: + raise KsftSkipEx("Test requires IPv4 connectivity") + + def require_v6(self): + if not self.v6 or not self.remote_v6: + raise KsftSkipEx("Test requires IPv6 connectivity") + + def _require_cmd(self, comm, key, host=None): + cached = self._required_cmd.get(comm, {}) + if cached.get(key) is None: + cached[key] = cmd("command -v -- " + comm, fail=False, + shell=True, host=host).ret == 0 + self._required_cmd[comm] = cached + return cached[key] + + def require_cmd(self, comm, local=True, remote=False): + if local: + if not self._require_cmd(comm, "local"): + raise KsftSkipEx("Test requires command: " + comm) + if remote: + if not self._require_cmd(comm, "remote"): + raise KsftSkipEx("Test requires (remote) command: " + comm) diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py new file mode 100644 index 0000000000..abdb677bdb --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: GPL-2.0 + +import time + +from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen + +class GenerateTraffic: + def __init__(self, env): + env.require_cmd("iperf3", remote=True) + + self.env = env + + port = rand_port() + self._iperf_server = cmd(f"iperf3 -s -p {port}", background=True) + wait_port_listen(port) + time.sleep(0.1) + self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400", + background=True, host=env.remote) + + # Wait for traffic to ramp up + pkt = ip("-s link show dev " + env.ifname, json=True)[0]["stats64"]["rx"]["packets"] + for _ in range(50): + time.sleep(0.1) + now = ip("-s link show dev " + env.ifname, json=True)[0]["stats64"]["rx"]["packets"] + if now - pkt > 1000: + return + pkt = now + self.stop(verbose=True) + raise Exception("iperf3 traffic did not ramp up") + + def stop(self, verbose=None): + self._iperf_client.process(terminate=True) + if verbose: + ksft_pr(">> Client:") + ksft_pr(self._iperf_client.stdout) + ksft_pr(self._iperf_client.stderr) + self._iperf_server.process(terminate=True) + if verbose: + ksft_pr(">> Server:") + ksft_pr(self._iperf_server.stdout) + ksft_pr(self._iperf_server.stderr) diff --git a/tools/testing/selftests/drivers/net/lib/py/remote.py b/tools/testing/selftests/drivers/net/lib/py/remote.py new file mode 100644 index 0000000000..b1780b9877 --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/remote.py @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 + +import os +import importlib + +_modules = {} + +def Remote(kind, args, src_path): + global _modules + + if kind not in _modules: + _modules[kind] = importlib.import_module("..remote_" + kind, __name__) + + dir_path = os.path.abspath(src_path + "/../") + return getattr(_modules[kind], "Remote")(args, dir_path) diff --git a/tools/testing/selftests/drivers/net/lib/py/remote_netns.py b/tools/testing/selftests/drivers/net/lib/py/remote_netns.py new file mode 100644 index 0000000000..7d5eeb0271 --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/remote_netns.py @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 + +import os +import subprocess + +from lib.py import cmd + + +class Remote: + def __init__(self, name, dir_path): + self.name = name + self.dir_path = dir_path + + def cmd(self, comm): + return subprocess.Popen(["ip", "netns", "exec", self.name, "bash", "-c", comm], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + def deploy(self, what): + if os.path.isabs(what): + return what + return os.path.abspath(self.dir_path + "/" + what) diff --git a/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py b/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py new file mode 100644 index 0000000000..924addde19 --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: GPL-2.0 + +import os +import string +import subprocess +import random + +from lib.py import cmd + + +class Remote: + def __init__(self, name, dir_path): + self.name = name + self.dir_path = dir_path + self._tmpdir = None + + def __del__(self): + if self._tmpdir: + cmd("rm -rf " + self._tmpdir, host=self) + self._tmpdir = None + + def cmd(self, comm): + return subprocess.Popen(["ssh", "-q", self.name, comm], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + def _mktmp(self): + return ''.join(random.choice(string.ascii_lowercase) for _ in range(8)) + + def deploy(self, what): + if not self._tmpdir: + self._tmpdir = "/tmp/" + self._mktmp() + cmd("mkdir " + self._tmpdir, host=self) + file_name = self._tmpdir + "/" + self._mktmp() + os.path.basename(what) + + if not os.path.isabs(what): + what = os.path.abspath(self.dir_path + "/" + what) + + cmd(f"scp {what} {self.name}:{file_name}") + return file_name diff --git a/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh b/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh new file mode 100755 index 0000000000..82be5d0133 --- /dev/null +++ b/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh @@ -0,0 +1,668 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2024 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> + +# The script is adopted to work with the Microchip KSZ switch driver. + +ETH_FCS_LEN=4 + +WAIT_TIME=1 +NUM_NETIFS=4 +REQUIRE_JQ="yes" +REQUIRE_MZ="yes" +STABLE_MAC_ADDRS=yes +NETIF_CREATE=no +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +require_command dcb + +h1=${NETIFS[p1]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p3]} +h2=${NETIFS[p4]} + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +# On h1_ and h2_create do not set IP addresses to avoid interaction with the +# system, to keep packet counters clean. +h1_create() +{ + simple_if_init $h1 + sysctl_set net.ipv6.conf.${h1}.disable_ipv6 1 + # Get the MAC address of the interface to use it with mausezahn + h1_mac=$(ip -j link show dev ${h1} | jq -e '.[].address') +} + +h1_destroy() +{ + sysctl_restore net.ipv6.conf.${h1}.disable_ipv6 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + sysctl_set net.ipv6.conf.${h2}.disable_ipv6 1 + h2_mac=$(ip -j link show dev ${h2} | jq -e '.[].address') +} + +h2_destroy() +{ + sysctl_restore net.ipv6.conf.${h2}.disable_ipv6 + simple_if_fini $h2 +} + +switch_create() +{ + ip link set ${swp1} up + ip link set ${swp2} up + sysctl_set net.ipv6.conf.${swp1}.disable_ipv6 1 + sysctl_set net.ipv6.conf.${swp2}.disable_ipv6 1 + + # Ports should trust VLAN PCP even with vlan_filtering=0 + ip link add br0 type bridge + ip link set ${swp1} master br0 + ip link set ${swp2} master br0 + ip link set br0 up + sysctl_set net.ipv6.conf.br0.disable_ipv6 1 +} + +switch_destroy() +{ + sysctl_restore net.ipv6.conf.${swp2}.disable_ipv6 + sysctl_restore net.ipv6.conf.${swp1}.disable_ipv6 + + ip link del br0 +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + switch_destroy + + vrf_cleanup +} + +set_apptrust_order() +{ + local if_name=$1 + local order=$2 + + dcb apptrust set dev ${if_name} order ${order} +} + +# Function to extract a specified field from a given JSON stats string +extract_network_stat() { + local stats_json=$1 + local field_name=$2 + + echo $(echo "$stats_json" | jq -r "$field_name") +} + +run_test() +{ + local test_name=$1; + local apptrust_order=$2; + local port_prio=$3; + local dscp_ipv=$4; + local dscp=$5; + local have_vlan=$6; + local pcp_ipv=$7; + local vlan_pcp=$8; + local ip_v6=$9 + + local rx_ipv + local tx_ipv + + RET=0 + + # Send some packet to populate the switch MAC table + $MZ ${h2} -a ${h2_mac} -b ${h1_mac} -p 64 -t icmp echores -c 1 + + # Based on the apptrust order, set the expected Internal Priority values + # for the RX and TX paths. + if [ "${apptrust_order}" == "" ]; then + echo "Apptrust order not set." + rx_ipv=${port_prio} + tx_ipv=${port_prio} + elif [ "${apptrust_order}" == "dscp" ]; then + echo "Apptrust order is DSCP." + rx_ipv=${dscp_ipv} + tx_ipv=${dscp_ipv} + elif [ "${apptrust_order}" == "pcp" ]; then + echo "Apptrust order is PCP." + rx_ipv=${pcp_ipv} + tx_ipv=${pcp_ipv} + elif [ "${apptrust_order}" == "pcp dscp" ]; then + echo "Apptrust order is PCP DSCP." + if [ ${have_vlan} -eq 1 ]; then + rx_ipv=$((dscp_ipv > pcp_ipv ? dscp_ipv : pcp_ipv)) + tx_ipv=${pcp_ipv} + else + rx_ipv=${dscp_ipv} + tx_ipv=${dscp_ipv} + fi + else + RET=1 + echo "Error: Unknown apptrust order ${apptrust_order}" + log_test "${test_name}" + return + fi + + # Most/all? of the KSZ switches do not provide per-TC counters. There + # are only tx_hi and rx_hi counters, which are used to count packets + # which are considered as high priority and most likely not assigned + # to the queue 0. + # On the ingress path, packets seem to get high priority status + # independently of the DSCP or PCP global mapping. On the egress path, + # the high priority status is assigned based on the DSCP or PCP global + # map configuration. + # The thresholds for the high priority status are not documented, but + # it seems that the switch considers packets as high priority on the + # ingress path if detected Internal Priority is greater than 0. On the + # egress path, the switch considers packets as high priority if + # detected Internal Priority is greater than 1. + if [ ${rx_ipv} -ge 1 ]; then + local expect_rx_high_prio=1 + else + local expect_rx_high_prio=0 + fi + + if [ ${tx_ipv} -ge 2 ]; then + local expect_tx_high_prio=1 + else + local expect_tx_high_prio=0 + fi + + # Use ip tool to get the current switch packet counters. ethool stats + # need to be recalculated to get the correct values. + local swp1_stats=$(ip -s -j link show dev ${swp1}) + local swp2_stats=$(ip -s -j link show dev ${swp2}) + local swp1_rx_packets_before=$(extract_network_stat "$swp1_stats" \ + '.[0].stats64.rx.packets') + local swp1_rx_bytes_before=$(extract_network_stat "$swp1_stats" \ + '.[0].stats64.rx.bytes') + local swp2_tx_packets_before=$(extract_network_stat "$swp2_stats" \ + '.[0].stats64.tx.packets') + local swp2_tx_bytes_before=$(extract_network_stat "$swp2_stats" \ + '.[0].stats64.tx.bytes') + local swp1_rx_hi_before=$(ethtool_stats_get ${swp1} "rx_hi") + local swp2_tx_hi_before=$(ethtool_stats_get ${swp2} "tx_hi") + + # Assamble the mausezahn command based on the test parameters + # For the testis with ipv4 or ipv6, use icmp response packets, + # to avoid interaction with the system, to keep packet counters + # clean. + if [ ${ip_v6} -eq 0 ]; then + local ip="-a ${h1_mac} -b ${h2_mac} -A ${H1_IPV4} \ + -B ${H2_IPV4} -t icmp unreach,code=1,dscp=${dscp}" + else + local ip="-6 -a ${h1_mac} -b ${h2_mac} -A ${H1_IPV6} \ + -B ${H2_IPV6} -t icmp6 type=1,code=0,dscp=${dscp}" + fi + + if [ ${have_vlan} -eq 1 ]; then + local vlan_pcp_opt="-Q ${vlan_pcp}:0" + else + local vlan_pcp_opt="" + fi + $MZ ${h1} ${ip} -c ${PING_COUNT} -d 10msec ${vlan_pcp_opt} + + # Wait until the switch packet counters are updated + sleep 6 + + local swp1_stats=$(ip -s -j link show dev ${swp1}) + local swp2_stats=$(ip -s -j link show dev ${swp2}) + + local swp1_rx_packets_after=$(extract_network_stat "$swp1_stats" \ + '.[0].stats64.rx.packets') + local swp1_rx_bytes_after=$(extract_network_stat "$swp1_stats" \ + '.[0].stats64.rx.bytes') + local swp2_tx_packets_after=$(extract_network_stat "$swp2_stats" \ + '.[0].stats64.tx.packets') + local swp2_tx_bytes_after=$(extract_network_stat "$swp2_stats" \ + '.[0].stats64.tx.bytes') + + local swp1_rx_packets_diff=$((${swp1_rx_packets_after} - \ + ${swp1_rx_packets_before})) + local swp2_tx_packets_diff=$((${swp2_tx_packets_after} - \ + ${swp2_tx_packets_before})) + + local swp1_rx_hi_after=$(ethtool_stats_get ${swp1} "rx_hi") + local swp2_tx_hi_after=$(ethtool_stats_get ${swp2} "tx_hi") + + # Test if any packets were received on swp1, we will rx before and after + if [ ${swp1_rx_packets_diff} -lt ${PING_COUNT} ]; then + echo "Not expected amount of received packets on ${swp1}" + echo "before ${swp1_rx_packets_before} after ${swp1_rx_packets_after}" + RET=1 + fi + + # Test if any packets were transmitted on swp2, we will tx before and after + if [ ${swp2_tx_packets_diff} -lt ${PING_COUNT} ]; then + echo "Not expected amount of transmitted packets on ${swp2}" + echo "before ${swp2_tx_packets_before} after ${swp2_tx_packets_after}" + RET=1 + fi + + # tx/rx_hi counted in bytes. So, we need to compare the difference in bytes + local swp1_rx_bytes_diff=$(($swp1_rx_bytes_after - $swp1_rx_bytes_before)) + local swp2_tx_bytes_diff=$(($swp2_tx_bytes_after - $swp2_tx_bytes_before)) + local swp1_rx_hi_diff=$(($swp1_rx_hi_after - $swp1_rx_hi_before)) + local swp2_tx_hi_diff=$(($swp2_tx_hi_after - $swp2_tx_hi_before)) + + if [ ${expect_rx_high_prio} -eq 1 ]; then + swp1_rx_hi_diff=$((${swp1_rx_hi_diff} - \ + ${swp1_rx_packets_diff} * ${ETH_FCS_LEN})) + if [ ${swp1_rx_hi_diff} -ne ${swp1_rx_bytes_diff} ]; then + echo "Not expected amount of high priority packets received on ${swp1}" + echo "RX hi diff: ${swp1_rx_hi_diff}, expected RX bytes diff: ${swp1_rx_bytes_diff}" + RET=1 + fi + else + if [ ${swp1_rx_hi_diff} -ne 0 ]; then + echo "Unexpected amount of high priority packets received on ${swp1}" + echo "RX hi diff: ${swp1_rx_hi_diff}, expected 0" + RET=1 + fi + fi + + if [ ${expect_tx_high_prio} -eq 1 ]; then + swp2_tx_hi_diff=$((${swp2_tx_hi_diff} - \ + ${swp2_tx_packets_diff} * ${ETH_FCS_LEN})) + if [ ${swp2_tx_hi_diff} -ne ${swp2_tx_bytes_diff} ]; then + echo "Not expected amount of high priority packets transmitted on ${swp2}" + echo "TX hi diff: ${swp2_tx_hi_diff}, expected TX bytes diff: ${swp2_tx_bytes_diff}" + RET=1 + fi + else + if [ ${swp2_tx_hi_diff} -ne 0 ]; then + echo "Unexpected amount of high priority packets transmitted on ${swp2}" + echo "TX hi diff: ${swp2_tx_hi_diff}, expected 0" + RET=1 + fi + fi + + log_test "${test_name}" +} + +run_test_dscp() +{ + # IPv4 test + run_test "$1" "$2" "$3" "$4" "$5" 0 0 0 0 + # IPv6 test + run_test "$1" "$2" "$3" "$4" "$5" 0 0 0 1 +} + +run_test_dscp_pcp() +{ + # IPv4 test + run_test "$1" "$2" "$3" "$4" "$5" 1 "$6" "$7" 0 + # IPv6 test + run_test "$1" "$2" "$3" "$4" "$5" 1 "$6" "$7" 1 +} + +port_default_prio_get() +{ + local if_name=$1 + local prio + + prio="$(dcb -j app show dev ${if_name} default-prio | \ + jq '.default_prio[]')" + if [ -z "${prio}" ]; then + prio=0 + fi + + echo ${prio} +} + +test_port_default() +{ + local orig_apptrust=$(port_get_default_apptrust ${swp1}) + local orig_prio=$(port_default_prio_get ${swp1}) + local apptrust_order="" + + RET=0 + + # Make sure no other priority sources will interfere with the test + set_apptrust_order ${swp1} "${apptrust_order}" + + for val in $(seq 0 7); do + dcb app replace dev ${swp1} default-prio ${val} + if [ $val -ne $(port_default_prio_get ${swp1}) ]; then + RET=1 + break + fi + + run_test_dscp "Port-default QoS classification, prio: ${val}" \ + "${apptrust_order}" ${val} 0 0 + done + + set_apptrust_order ${swp1} "${orig_apptrust}" + if [[ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]]; then + RET=1 + fi + + dcb app replace dev ${swp1} default-prio ${orig_prio} + if [ $orig_prio -ne $(port_default_prio_get ${swp1}) ]; then + RET=1 + fi + + log_test "Port-default QoS classification" +} + +port_get_default_apptrust() +{ + local if_name=$1 + + dcb -j apptrust show dev ${if_name} | jq -r '.order[]' | \ + tr '\n' ' ' | xargs +} + +test_port_apptrust() +{ + local original_dscp_prios_swp1=$(get_dscp_prios ${swp1}) + local orig_apptrust=$(port_get_default_apptrust ${swp1}) + local orig_port_prio=$(port_default_prio_get ${swp1}) + local order_variants=("pcp dscp" "dscp" "pcp") + local apptrust_order + local port_prio + local dscp_prio + local pcp_prio + local dscp + local pcp + + RET=0 + + # First, test if apptrust configuration as taken by the kernel + for order in "${order_variants[@]}"; do + set_apptrust_order ${swp1} "${order}" + if [[ "$order" != "$(port_get_default_apptrust ${swp1})" ]]; then + RET=1 + break + fi + done + + log_test "Apptrust, supported variants" + + # To test if the apptrust configuration is working as expected, we need + # to set DSCP priorities for the switch port. + init_dscp_prios "${swp1}" "${original_dscp_prios_swp1}" + + # Start with a simple test where all apptrust sources are disabled + # default port priority is 0, DSCP priority is mapped to 7. + # No high priority packets should be received or transmitted. + port_prio=0 + dscp_prio=7 + dscp=4 + + dcb app replace dev ${swp1} default-prio ${port_prio} + dcb app replace dev ${swp1} dscp-prio ${dscp}:${dscp_prio} + + apptrust_order="" + set_apptrust_order ${swp1} "${apptrust_order}" + # Test with apptrust sources disabled, Packets should get port default + # priority which is 0 + run_test_dscp "Apptrust, all disabled. DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} + + apptrust_order="pcp" + set_apptrust_order ${swp1} "${apptrust_order}" + # If PCP is enabled, packets should get PCP priority, which is not + # set in this test (no VLAN tags are present in the packet). No high + # priority packets should be received or transmitted. + run_test_dscp "Apptrust, PCP enabled. DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} + + apptrust_order="dscp" + set_apptrust_order ${swp1} "${apptrust_order}" + # If DSCP is enabled, packets should get DSCP priority which is set to 7 + # in this test. High priority packets should be received and transmitted. + run_test_dscp "Apptrust, DSCP enabled. DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} + + apptrust_order="pcp dscp" + set_apptrust_order ${swp1} "${apptrust_order}" + # If PCP and DSCP are enabled, PCP would have higher apptrust priority + # so packets should get PCP priority. But in this test VLAN PCP is not + # set, so it should get DSCP priority which is set to 7. High priority + # packets should be received and transmitted. + run_test_dscp "Apptrust, PCP and DSCP are enabled. DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} + + # If VLAN PCP is set, it should have higher apptrust priority than DSCP + # so packets should get VLAN PCP priority. Send packets with VLAN PCP + # set to 0, DSCP set to 7. Packets should get VLAN PCP priority. + # No high priority packets should be transmitted. Due to nature of the + # switch, high priority packets will be received. + pcp_prio=0 + pcp=0 + run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp} + + # If VLAN PCP is set to 7, it should have higher apptrust priority than + # DSCP so packets should get VLAN PCP priority. Send packets with VLAN + # PCP set to 7, DSCP set to 7. Packets should get VLAN PCP priority. + # High priority packets should be received and transmitted. + pcp_prio=7 + pcp=7 + run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp} + # Now make sure that the switch is able to handle the case where DSCP + # priority is set to 0 and PCP priority is set to 7. Packets should get + # PCP priority. High priority packets should be received and transmitted. + dscp_prio=0 + dcb app replace dev ${swp1} dscp-prio ${dscp}:${dscp_prio} + run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp} + # If both VLAN PCP and DSCP are set to 0, packets should get 0 priority. + # No high priority packets should be received or transmitted. + pcp_prio=0 + pcp=0 + run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp} + + # Restore original priorities + if ! restore_priorities "${swp1}" "${original_dscp_prios_swp1}"; then + RET=1 + fi + + set_apptrust_order ${swp1} "${orig_apptrust}" + if [ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]; then + RET=1 + fi + + dcb app replace dev ${swp1} default-prio ${orig_port_prio} + if [ $orig_port_prio -ne $(port_default_prio_get ${swp1}) ]; then + RET=1 + fi + + log_test "Apptrust, restore original settings" +} + +# Function to get current DSCP priorities +get_dscp_prios() { + local if_name=$1 + dcb -j app show dev ${if_name} | jq -c '.dscp_prio' +} + +# Function to set a specific DSCP priority on a device +replace_dscp_prio() { + local if_name=$1 + local dscp=$2 + local prio=$3 + dcb app replace dev ${if_name} dscp-prio ${dscp}:${prio} +} + +# Function to compare DSCP maps +compare_dscp_maps() { + local old_json=$1 + local new_json=$2 + local dscp=$3 + local prio=$4 + + # Create a modified old_json with the expected change for comparison + local modified_old_json=$(echo "$old_json" | + jq --argjson dscp $dscp --argjson prio $prio \ + 'map(if .[0] == $dscp then [$dscp, $prio] else . end)' | + tr -d " \n") + + # Compare new_json with the modified_old_json + if [[ "$modified_old_json" == "$new_json" ]]; then + return 0 + else + return 1 + fi +} + +# Function to set DSCP priorities +set_and_verify_dscp() { + local port=$1 + local dscp=$2 + local new_prio=$3 + + local old_prios=$(get_dscp_prios $port) + + replace_dscp_prio "$port" $dscp $new_prio + + # Fetch current settings and compare + local current_prios=$(get_dscp_prios $port) + if ! compare_dscp_maps "$old_prios" "$current_prios" $dscp $new_prio; then + echo "Error: Unintended changes detected in DSCP map for $port after setting DSCP $dscp to $new_prio." + return 1 + fi + return 0 +} + +# Function to restore original priorities +restore_priorities() { + local port=$1 + local original_prios=$2 + + echo "Removing test artifacts for $port" + local current_prios=$(get_dscp_prios $port) + local prio_str=$(echo "$current_prios" | + jq -r 'map("\(.[0]):\(.[1])") | join(" ")') + dcb app del dev $port dscp-prio $prio_str + + echo "Restoring original DSCP priorities for $port" + local restore_str=$(echo "$original_prios" | + jq -r 'map("\(.[0]):\(.[1])") | join(" ")') + dcb app add dev $port dscp-prio $restore_str + + local current_prios=$(get_dscp_prios $port) + if [[ "$original_prios" != "$current_prios" ]]; then + echo "Error: Failed to restore original DSCP priorities for $port" + return 1 + fi + return 0 +} + +# Initialize DSCP priorities. Set them to predictable values for testing. +init_dscp_prios() { + local port=$1 + local original_prios=$2 + + echo "Removing any existing DSCP priority mappins for $port" + local prio_str=$(echo "$original_prios" | + jq -r 'map("\(.[0]):\(.[1])") | join(" ")') + dcb app del dev $port dscp-prio $prio_str + + # Initialize DSCP priorities list + local dscp_prios="" + for dscp in {0..63}; do + dscp_prios+=("$dscp:0") + done + + echo "Setting initial DSCP priorities map to 0 for $port" + dcb app add dev $port dscp-prio ${dscp_prios[@]} +} + +# Main function to test global DSCP map across specified ports +test_global_dscp_map() { + local ports=("$swp1" "$swp2") + local original_dscp_prios_port0=$(get_dscp_prios ${ports[0]}) + local orig_apptrust=$(port_get_default_apptrust ${swp1}) + local orig_port_prio=$(port_default_prio_get ${swp1}) + local apptrust_order="dscp" + local port_prio=0 + local dscp_prio + local dscp + + RET=0 + + set_apptrust_order ${swp1} "${apptrust_order}" + dcb app replace dev ${swp1} default-prio ${port_prio} + + # Initialize DSCP priorities + init_dscp_prios "${ports[0]}" "$original_dscp_prios_port0" + + # Loop over each DSCP index + for dscp in {0..63}; do + # and test each Internal Priority value + for dscp_prio in {0..7}; do + # do it for each port. This is to test if the global DSCP map + # is accessible from all ports. + for port in "${ports[@]}"; do + if ! set_and_verify_dscp "$port" $dscp $dscp_prio; then + RET=1 + fi + done + + # Test if the DSCP priority is correctly applied to the packets + run_test_dscp "DSCP (${dscp}) QoS classification, prio: ${dscp_prio}" \ + "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} + if [ ${RET} -eq 1 ]; then + break + fi + done + done + + # Restore original priorities + if ! restore_priorities "${ports[0]}" "${original_dscp_prios_port0}"; then + RET=1 + fi + + set_apptrust_order ${swp1} "${orig_apptrust}" + if [[ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]]; then + RET=1 + fi + + dcb app replace dev ${swp1} default-prio ${orig_port_prio} + if [ $orig_port_prio -ne $(port_default_prio_get ${swp1}) ]; then + RET=1 + fi + + log_test "DSCP global map" +} + +trap cleanup EXIT + +ALL_TESTS=" + test_port_default + test_port_apptrust + test_global_dscp_map +" + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh index 91891b9418..877cd6df94 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh @@ -24,8 +24,8 @@ setup_prepare() busywait "$TIMEOUT" wait_for_port_up ethtool $swp2 check_err $? "ports did not come up" - local lanes_exist=$(ethtool $swp1 | grep 'Lanes:') - if [[ -z $lanes_exist ]]; then + busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:" + if [[ $? -ne 0 ]]; then log_test "SKIP: driver does not support lanes setting" exit 1 fi @@ -122,8 +122,9 @@ autoneg() ethtool_set $swp1 speed $max_speed lanes $lanes ip link set dev $swp1 up ip link set dev $swp2 up - busywait "$TIMEOUT" wait_for_port_up ethtool $swp2 - check_err $? "ports did not come up" + + busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:" + check_err $? "Lanes parameter is not presented on time" check_lanes $swp1 $lanes $max_speed log_test "$lanes lanes is autonegotiated" @@ -160,8 +161,9 @@ autoneg_force_mode() ethtool_set $swp2 speed $max_speed lanes $lanes autoneg off ip link set dev $swp1 up ip link set dev $swp2 up - busywait "$TIMEOUT" wait_for_port_up ethtool $swp2 - check_err $? "ports did not come up" + + busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:" + check_err $? "Lanes parameter is not presented on time" check_lanes $swp1 $lanes $max_speed log_test "Autoneg off, $lanes lanes detected during force mode" diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index 31252bc877..4994bea5da 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -11,7 +11,7 @@ ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ multiple_masks_test ctcam_edge_cases_test delta_simple_test \ delta_two_masks_one_key_test delta_simple_rehash_test \ bloom_simple_test bloom_complex_test bloom_delta_test \ - max_erp_entries_test max_group_size_test" + max_erp_entries_test max_group_size_test collision_test" NUM_NETIFS=2 source $lib_dir/lib.sh source $lib_dir/tc_common.sh @@ -457,7 +457,7 @@ delta_two_masks_one_key_test() { # If 2 keys are the same and only differ in mask in a way that # they belong under the same ERP (second is delta of the first), - # there should be no C-TCAM spill. + # there should be C-TCAM spill. RET=0 @@ -474,8 +474,8 @@ delta_two_masks_one_key_test() tp_record "mlxsw:*" "tc filter add dev $h2 ingress protocol ip \ pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \ action drop" - tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 0 - check_err $? "incorrect C-TCAM spill while inserting the second rule" + tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 1 + check_err $? "C-TCAM spill did not happen while inserting the second rule" $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ -t ip -q @@ -1087,6 +1087,53 @@ max_group_size_test() log_test "max ACL group size test ($tcflags). max size $max_size" } +collision_test() +{ + # Filters cannot share an eRP if in the common unmasked part (i.e., + # without the delta bits) they have the same values. If the driver does + # not prevent such configuration (by spilling into the C-TCAM), then + # multiple entries will be present in the device with the same key, + # leading to collisions and a reduced scale. + # + # Create such a scenario and make sure all the filters are successfully + # added. + + RET=0 + + local ret + + if [[ "$tcflags" != "skip_sw" ]]; then + return 0; + fi + + # Add a single dst_ip/24 filter and multiple dst_ip/32 filters that all + # have the same values in the common unmasked part (dst_ip/24). + + tc filter add dev $h2 ingress pref 1 proto ipv4 handle 101 \ + flower $tcflags dst_ip 198.51.100.0/24 \ + action drop + + for i in {0..255}; do + tc filter add dev $h2 ingress pref 2 proto ipv4 \ + handle $((102 + i)) \ + flower $tcflags dst_ip 198.51.100.${i}/32 \ + action drop + ret=$? + [[ $ret -ne 0 ]] && break + done + + check_err $ret "failed to add all the filters" + + for i in {255..0}; do + tc filter del dev $h2 ingress pref 2 proto ipv4 \ + handle $((102 + i)) flower + done + + tc filter del dev $h2 ingress pref 1 proto ipv4 handle 101 flower + + log_test "collision test ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py new file mode 100755 index 0000000000..eb83e7b487 --- /dev/null +++ b/tools/testing/selftests/drivers/net/ping.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq +from lib.py import NetDrvEpEnv +from lib.py import bkg, cmd, wait_port_listen, rand_port + + +def test_v4(cfg) -> None: + cfg.require_v4() + + cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}") + cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote) + + +def test_v6(cfg) -> None: + cfg.require_v6() + + cmd(f"ping -c 1 -W0.5 {cfg.remote_v6}") + cmd(f"ping -c 1 -W0.5 {cfg.v6}", host=cfg.remote) + + +def test_tcp(cfg) -> None: + cfg.require_cmd("socat", remote=True) + + port = rand_port() + listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT" + + with bkg(listen_cmd, exit_wait=True) as nc: + wait_port_listen(port) + + cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}", + shell=True, host=cfg.remote) + ksft_eq(nc.stdout.strip(), "ping") + + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc: + wait_port_listen(port, host=cfg.remote) + + cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True) + ksft_eq(nc.stdout.strip(), "ping") + + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py new file mode 100755 index 0000000000..30f29096e2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/queues.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit, ksft_eq, KsftSkipEx +from lib.py import EthtoolFamily, NetdevFamily +from lib.py import NetDrvEnv +from lib.py import cmd +import glob + + +def sys_get_queues(ifname) -> int: + folders = glob.glob(f'/sys/class/net/{ifname}/queues/rx-*') + return len(folders) + + +def nl_get_queues(cfg, nl): + queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True) + if queues: + return len([q for q in queues if q['type'] == 'rx']) + return None + + +def get_queues(cfg, nl) -> None: + queues = nl_get_queues(cfg, nl) + if not queues: + raise KsftSkipEx('queue-get not supported by device') + + expected = sys_get_queues(cfg.dev['ifname']) + ksft_eq(queues, expected) + + +def addremove_queues(cfg, nl) -> None: + queues = nl_get_queues(cfg, nl) + if not queues: + raise KsftSkipEx('queue-get not supported by device') + + curr_queues = sys_get_queues(cfg.dev['ifname']) + if curr_queues == 1: + raise KsftSkipEx('cannot decrement queue: already at 1') + + netnl = EthtoolFamily() + channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if channels['combined-count'] == 0: + rx_type = 'rx' + else: + rx_type = 'combined' + + expected = curr_queues - 1 + cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10) + queues = nl_get_queues(cfg, nl) + ksft_eq(queues, expected) + + expected = curr_queues + cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10) + queues = nl_get_queues(cfg, nl) + ksft_eq(queues, expected) + + +def main() -> None: + with NetDrvEnv(__file__, queue_count=3) as cfg: + ksft_run([get_queues, addremove_queues], args=(cfg, NetdevFamily())) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py new file mode 100755 index 0000000000..820b8e0a22 --- /dev/null +++ b/tools/testing/selftests/drivers/net/stats.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx +from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError +from lib.py import NetDrvEnv + +ethnl = EthtoolFamily() +netfam = NetdevFamily() +rtnl = RtnlFamily() + + +def check_pause(cfg) -> None: + global ethnl + + try: + ethnl.pause_get({"header": {"dev-index": cfg.ifindex}}) + except NlError as e: + if e.error == 95: + raise KsftXfailEx("pause not supported by the device") + raise + + data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex, + "flags": {'stats'}}}) + ksft_true(data['stats'], "driver does not report stats") + + +def check_fec(cfg) -> None: + global ethnl + + try: + ethnl.fec_get({"header": {"dev-index": cfg.ifindex}}) + except NlError as e: + if e.error == 95: + raise KsftXfailEx("FEC not supported by the device") + raise + + data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex, + "flags": {'stats'}}}) + ksft_true(data['stats'], "driver does not report stats") + + +def pkt_byte_sum(cfg) -> None: + global netfam, rtnl + + def get_qstat(test): + global netfam + stats = netfam.qstats_get({}, dump=True) + if stats: + for qs in stats: + if qs["ifindex"]== test.ifindex: + return qs + + qstat = get_qstat(cfg) + if qstat is None: + raise KsftSkipEx("qstats not supported by the device") + + for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']: + ksft_in(key, qstat, "Drivers should always report basic keys") + + # Compare stats, rtnl stats and qstats must match, + # but the interface may be up, so do a series of dumps + # each time the more "recent" stats must be higher or same. + def stat_cmp(rstat, qstat): + for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']: + if rstat[key] != qstat[key]: + return rstat[key] - qstat[key] + return 0 + + for _ in range(10): + rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] + if stat_cmp(rtstat, qstat) < 0: + raise Exception("RTNL stats are lower, fetched later") + qstat = get_qstat(cfg) + if stat_cmp(rtstat, qstat) > 0: + raise Exception("Qstats are lower, fetched later") + + +def qstat_by_ifindex(cfg) -> None: + global netfam + global rtnl + + # Construct a map ifindex -> [dump, by-index, dump] + ifindexes = {} + stats = netfam.qstats_get({}, dump=True) + for entry in stats: + ifindexes[entry['ifindex']] = [entry, None, None] + + for ifindex in ifindexes.keys(): + entry = netfam.qstats_get({"ifindex": ifindex}, dump=True) + ksft_eq(len(entry), 1) + ifindexes[entry[0]['ifindex']][1] = entry[0] + + stats = netfam.qstats_get({}, dump=True) + for entry in stats: + ifindexes[entry['ifindex']][2] = entry + + if len(ifindexes) == 0: + raise KsftSkipEx("No ifindex supports qstats") + + # Now make sure the stats match/make sense + for ifindex, triple in ifindexes.items(): + all_keys = triple[0].keys() | triple[1].keys() | triple[2].keys() + + for key in all_keys: + ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key) + ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key) + + # Test invalid dumps + # 0 is invalid + with ksft_raises(NlError) as cm: + netfam.qstats_get({"ifindex": 0}, dump=True) + ksft_eq(cm.exception.nl_msg.error, -34) + ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex') + + # loopback has no stats + with ksft_raises(NlError) as cm: + netfam.qstats_get({"ifindex": 1}, dump=True) + ksft_eq(cm.exception.nl_msg.error, -95) + ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex') + + # Try to get stats for lowest unused ifindex but not 0 + devs = rtnl.getlink({}, dump=True) + all_ifindexes = set([dev["ifi-index"] for dev in devs]) + lowest = 2 + while lowest in all_ifindexes: + lowest += 1 + + with ksft_raises(NlError) as cm: + netfam.qstats_get({"ifindex": lowest}, dump=True) + ksft_eq(cm.exception.nl_msg.error, -19) + ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex') + + +def main() -> None: + with NetDrvEnv(__file__) as cfg: + ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/virtio_net/Makefile b/tools/testing/selftests/drivers/net/virtio_net/Makefile new file mode 100644 index 0000000000..7ec7cd3ab2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/virtio_net/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT + +TEST_PROGS = basic_features.sh \ + # + +TEST_FILES = \ + virtio_net_common.sh \ + # + +TEST_INCLUDES = \ + ../../../net/forwarding/lib.sh \ + ../../../net/lib.sh \ + # + +include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh b/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh new file mode 100755 index 0000000000..cf8cf816ed --- /dev/null +++ b/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh @@ -0,0 +1,131 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# See virtio_net_common.sh comments for more details about assumed setup + +ALL_TESTS=" + initial_ping_test + f_mac_test +" + +source virtio_net_common.sh + +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + +h1=${NETIFS[p1]} +h2=${NETIFS[p2]} + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +initial_ping_test() +{ + setup_cleanup + setup_prepare + ping_test $h1 $H2_IPV4 " simple" +} + +f_mac_test() +{ + RET=0 + local test_name="mac feature filtered" + + virtio_feature_present $h1 $VIRTIO_NET_F_MAC + if [ $? -ne 0 ]; then + log_test_skip "$test_name" "Device $h1 is missing feature $VIRTIO_NET_F_MAC." + return 0 + fi + virtio_feature_present $h1 $VIRTIO_NET_F_MAC + if [ $? -ne 0 ]; then + log_test_skip "$test_name" "Device $h2 is missing feature $VIRTIO_NET_F_MAC." + return 0 + fi + + setup_cleanup + setup_prepare + + grep -q 0 /sys/class/net/$h1/addr_assign_type + check_err $? "Permanent address assign type for $h1 is not set" + grep -q 0 /sys/class/net/$h2/addr_assign_type + check_err $? "Permanent address assign type for $h2 is not set" + + setup_cleanup + virtio_filter_feature_add $h1 $VIRTIO_NET_F_MAC + virtio_filter_feature_add $h2 $VIRTIO_NET_F_MAC + setup_prepare + + grep -q 0 /sys/class/net/$h1/addr_assign_type + check_fail $? "Permanent address assign type for $h1 is set when F_MAC feature is filtered" + grep -q 0 /sys/class/net/$h2/addr_assign_type + check_fail $? "Permanent address assign type for $h2 is set when F_MAC feature is filtered" + + ping_do $h1 $H2_IPV4 + check_err $? "Ping failed" + + log_test "$test_name" +} + +setup_prepare() +{ + virtio_device_rebind $h1 + virtio_device_rebind $h2 + wait_for_dev $h1 + wait_for_dev $h2 + + vrf_prepare + + h1_create + h2_create +} + +setup_cleanup() +{ + h2_destroy + h1_destroy + + vrf_cleanup + + virtio_filter_features_clear $h1 + virtio_filter_features_clear $h2 + virtio_device_rebind $h1 + virtio_device_rebind $h2 + wait_for_dev $h1 + wait_for_dev $h2 +} + +cleanup() +{ + pre_cleanup + setup_cleanup +} + +check_driver $h1 "virtio_net" +check_driver $h2 "virtio_net" +check_virtio_debugfs $h1 +check_virtio_debugfs $h2 + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/drivers/net/virtio_net/config b/tools/testing/selftests/drivers/net/virtio_net/config new file mode 100644 index 0000000000..bcf7555eaf --- /dev/null +++ b/tools/testing/selftests/drivers/net/virtio_net/config @@ -0,0 +1,8 @@ +CONFIG_BPF_SYSCALL=y +CONFIG_CGROUP_BPF=y +CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=m +CONFIG_VIRTIO_DEBUG=y +CONFIG_VIRTIO_NET=y diff --git a/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh b/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh new file mode 100644 index 0000000000..57bd8055e2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This assumes running on a host with two virtio interfaces connected +# back to back. Example script to do such wire-up of tap devices would +# look like this: +# +# ======================================================================================================= +# #!/bin/bash +# +# DEV1="$1" +# DEV2="$2" +# +# sudo tc qdisc add dev $DEV1 clsact +# sudo tc qdisc add dev $DEV2 clsact +# sudo tc filter add dev $DEV1 ingress protocol all pref 1 matchall action mirred egress redirect dev $DEV2 +# sudo tc filter add dev $DEV2 ingress protocol all pref 1 matchall action mirred egress redirect dev $DEV1 +# sudo ip link set $DEV1 up +# sudo ip link set $DEV2 up +# ======================================================================================================= + +REQUIRE_MZ="no" +NETIF_CREATE="no" +NETIF_FIND_DRIVER="virtio_net" +NUM_NETIFS=2 + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +VIRTIO_NET_F_MAC=5 + +virtio_device_get() +{ + local dev=$1; shift + local device_path="/sys/class/net/$dev/device/" + + basename `realpath $device_path` +} + +virtio_device_rebind() +{ + local dev=$1; shift + local device=`virtio_device_get $dev` + + echo "$device" > /sys/bus/virtio/drivers/virtio_net/unbind + echo "$device" > /sys/bus/virtio/drivers/virtio_net/bind +} + +virtio_debugfs_get() +{ + local dev=$1; shift + local device=`virtio_device_get $dev` + + echo /sys/kernel/debug/virtio/$device/ +} + +check_virtio_debugfs() +{ + local dev=$1; shift + local debugfs=`virtio_debugfs_get $dev` + + if [ ! -f "$debugfs/device_features" ] || + [ ! -f "$debugfs/filter_feature_add" ] || + [ ! -f "$debugfs/filter_feature_del" ] || + [ ! -f "$debugfs/filter_features" ] || + [ ! -f "$debugfs/filter_features_clear" ]; then + echo "SKIP: not possible to access debugfs for $dev" + exit $ksft_skip + fi +} + +virtio_feature_present() +{ + local dev=$1; shift + local feature=$1; shift + local debugfs=`virtio_debugfs_get $dev` + + cat $debugfs/device_features |grep "^$feature$" &> /dev/null + return $? +} + +virtio_filter_features_clear() +{ + local dev=$1; shift + local debugfs=`virtio_debugfs_get $dev` + + echo "1" > $debugfs/filter_features_clear +} + +virtio_filter_feature_add() +{ + local dev=$1; shift + local feature=$1; shift + local debugfs=`virtio_debugfs_get $dev` + + echo "$feature" > $debugfs/filter_feature_add +} diff --git a/tools/testing/selftests/exec/recursion-depth.c b/tools/testing/selftests/exec/recursion-depth.c index b2f37d86a5..438c8ff2fd 100644 --- a/tools/testing/selftests/exec/recursion-depth.c +++ b/tools/testing/selftests/exec/recursion-depth.c @@ -37,25 +37,25 @@ int main(void) ksft_test_result_skip("error: unshare, errno %d\n", errno); ksft_finished(); } - ksft_exit_fail_msg("error: unshare, errno %d\n", errno); + ksft_exit_fail_perror("error: unshare"); } if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL) == -1) - ksft_exit_fail_msg("error: mount '/', errno %d\n", errno); + ksft_exit_fail_perror("error: mount '/'"); /* Require "exec" filesystem. */ if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1) - ksft_exit_fail_msg("error: mount ramfs, errno %d\n", errno); + ksft_exit_fail_perror("error: mount ramfs"); #define FILENAME "/tmp/1" fd = creat(FILENAME, 0700); if (fd == -1) - ksft_exit_fail_msg("error: creat, errno %d\n", errno); + ksft_exit_fail_perror("error: creat"); #define S "#!" FILENAME "\n" if (write(fd, S, strlen(S)) != strlen(S)) - ksft_exit_fail_msg("error: write, errno %d\n", errno); + ksft_exit_fail_perror("error: write"); close(fd); diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile index 71ec34bf15..4373cea79b 100644 --- a/tools/testing/selftests/fchmodat2/Makefile +++ b/tools/testing/selftests/fchmodat2/Makefile @@ -1,6 +1,15 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan $(KHDR_INCLUDES) +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(KHDR_INCLUDES) + +# gcc requires -static-libasan in order to ensure that Address Sanitizer's +# library is the first one loaded. However, clang already statically links the +# Address Sanitizer if -fsanitize is specified. Therefore, simply omit +# -static-libasan for clang builds. +ifeq ($(LLVM),) + CFLAGS += -static-libasan +endif + TEST_GEN_PROGS := fchmodat2_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index 3eafd7da58..e8c019d72c 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -3,6 +3,7 @@ #define _GNU_SOURCE #include <assert.h> +#include <stddef.h> #include <stdint.h> #include <sched.h> #include <fcntl.h> @@ -124,8 +125,16 @@ static uint32_t old_root_id, old_parent_id; static void cleanup_namespace(void) { - fchdir(orig_root); - chroot("."); + int ret; + + ret = fchdir(orig_root); + if (ret == -1) + ksft_perror("fchdir to original root"); + + ret = chroot("."); + if (ret == -1) + ksft_perror("chroot to original root"); + umount2(root_mntpoint, MNT_DETACH); rmdir(root_mntpoint); } diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config index e59d985eef..048a312abf 100644 --- a/tools/testing/selftests/ftrace/config +++ b/tools/testing/selftests/ftrace/config @@ -1,16 +1,28 @@ -CONFIG_KPROBES=y +CONFIG_BPF_SYSCALL=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_EPROBE_EVENTS=y +CONFIG_FPROBE=y +CONFIG_FPROBE_EVENTS=y CONFIG_FTRACE=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_FUNCTION_GRAPH_RETVAL=y CONFIG_FUNCTION_PROFILER=y -CONFIG_TRACER_SNAPSHOT=y -CONFIG_STACK_TRACER=y CONFIG_HIST_TRIGGERS=y -CONFIG_SCHED_TRACER=y -CONFIG_PREEMPT_TRACER=y CONFIG_IRQSOFF_TRACER=y -CONFIG_PREEMPTIRQ_DELAY_TEST=m +CONFIG_KALLSYMS_ALL=y +CONFIG_KPROBES=y +CONFIG_KPROBE_EVENTS=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_PREEMPTIRQ_DELAY_TEST=m +CONFIG_PREEMPT_TRACER=y +CONFIG_PROBE_EVENTS_BTF_ARGS=y CONFIG_SAMPLES=y CONFIG_SAMPLE_FTRACE_DIRECT=m CONFIG_SAMPLE_TRACE_PRINTK=m -CONFIG_KALLSYMS_ALL=y +CONFIG_SCHED_TRACER=y +CONFIG_STACK_TRACER=y +CONFIG_TRACER_SNAPSHOT=y +CONFIG_UPROBES=y +CONFIG_UPROBE_EVENTS=y diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 25d4e0fca3..cce72f8b03 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -255,7 +255,13 @@ prlog() { # messages [ "$LOG_FILE" ] && printf "$*$newline" | strip_esc >> $LOG_FILE } catlog() { #file - cat $1 + if [ "${KTAP}" = "1" ]; then + cat $1 | while read line ; do + echo "# $line" + done + else + cat $1 + fi [ "$LOG_FILE" ] && cat $1 | strip_esc >> $LOG_FILE } prlog "=== Ftrace unit tests ===" diff --git a/tools/testing/selftests/ftrace/ftracetest-ktap b/tools/testing/selftests/ftrace/ftracetest-ktap index b3284679ef..14e62ef3f3 100755 --- a/tools/testing/selftests/ftrace/ftracetest-ktap +++ b/tools/testing/selftests/ftrace/ftracetest-ktap @@ -5,4 +5,4 @@ # # Copyright (C) Arm Ltd., 2023 -./ftracetest -K +./ftracetest -K -v diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc new file mode 100644 index 0000000000..c6a9d2466a --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc @@ -0,0 +1,41 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Fprobe event VFS type argument +# requires: dynamic_events "%pd/%pD":README "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README + + +: "Test argument %pd with name for fprobe" +echo 'f:testprobe dput name=$arg1:%pd' > dynamic_events +echo 1 > events/fprobes/testprobe/enable +grep -q "1" events/fprobes/testprobe/enable +echo 0 > events/fprobes/testprobe/enable +grep "dput" trace | grep -q "enable" +echo "" > dynamic_events +echo "" > trace + +: "Test argument %pd without name for fprobe" +echo 'f:testprobe dput $arg1:%pd' > dynamic_events +echo 1 > events/fprobes/testprobe/enable +grep -q "1" events/fprobes/testprobe/enable +echo 0 > events/fprobes/testprobe/enable +grep "dput" trace | grep -q "enable" +echo "" > dynamic_events +echo "" > trace + +: "Test argument %pD with name for fprobe" +echo 'f:testprobe vfs_read name=$arg1:%pD' > dynamic_events +echo 1 > events/fprobes/testprobe/enable +grep -q "1" events/fprobes/testprobe/enable +echo 0 > events/fprobes/testprobe/enable +grep "vfs_read" trace | grep -q "enable" +echo "" > dynamic_events +echo "" > trace + +: "Test argument %pD without name for fprobe" +echo 'f:testprobe vfs_read $arg1:%pD' > dynamic_events +echo 1 > events/fprobes/testprobe/enable +grep -q "1" events/fprobes/testprobe/enable +echo 0 > events/fprobes/testprobe/enable +grep "vfs_read" trace | grep -q "enable" +echo "" > dynamic_events +echo "" > trace diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc index 25432b8cd5..073a748b93 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc @@ -19,7 +19,7 @@ fail() { # mesg FILTER=set_ftrace_filter FUNC1="schedule" -FUNC2="scheduler_tick" +FUNC2="sched_tick" ALL_FUNCS="#### all functions enabled ####" diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc new file mode 100644 index 0000000000..21a54be689 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc @@ -0,0 +1,40 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kprobe event VFS type argument +# requires: kprobe_events "%pd/%pD":README + +: "Test argument %pd with name" +echo 'p:testprobe dput name=$arg1:%pd' > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "dput" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace + +: "Test argument %pd without name" +echo 'p:testprobe dput $arg1:%pd' > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "dput" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace + +: "Test argument %pD with name" +echo 'p:testprobe vfs_read name=$arg1:%pD' > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "vfs_read" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace + +: "Test argument %pD without name" +echo 'p:testprobe vfs_read $arg1:%pD' > kprobe_events +echo 1 > events/kprobes/testprobe/enable +grep -q "1" events/kprobes/testprobe/enable +echo 0 > events/kprobes/testprobe/enable +grep "vfs_read" trace | grep -q "enable" +echo "" > kprobe_events +echo "" > trace diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile index 11e157d753..78ab2cd111 100644 --- a/tools/testing/selftests/futex/Makefile +++ b/tools/testing/selftests/futex/Makefile @@ -3,8 +3,6 @@ SUBDIRS := functional TEST_PROGS := run.sh -.PHONY: all clean - include ../lib.mk all: diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common index 0f456dbab6..45b5570441 100644 --- a/tools/testing/selftests/hid/config.common +++ b/tools/testing/selftests/hid/config.common @@ -238,3 +238,4 @@ CONFIG_VLAN_8021Q=y CONFIG_XFRM_SUB_POLICY=y CONFIG_XFRM_USER=y CONFIG_ZEROPLUS_FF=y +CONFIG_KASAN=y diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c index 2cf96f818f..f825623e3e 100644 --- a/tools/testing/selftests/hid/hid_bpf.c +++ b/tools/testing/selftests/hid/hid_bpf.c @@ -16,6 +16,11 @@ #define SHOW_UHID_DEBUG 0 +#define min(a, b) \ + ({ __typeof__(a) _a = (a); \ + __typeof__(b) _b = (b); \ + _a < _b ? _a : _b; }) + static unsigned char rdesc[] = { 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ 0x09, 0x21, /* Usage (Vendor Usage 0x21) */ @@ -111,6 +116,10 @@ struct hid_hw_request_syscall_args { static pthread_mutex_t uhid_started_mtx = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t uhid_started = PTHREAD_COND_INITIALIZER; +static pthread_mutex_t uhid_output_mtx = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t uhid_output_cond = PTHREAD_COND_INITIALIZER; +static unsigned char output_report[10]; + /* no need to protect uhid_stopped, only one thread accesses it */ static bool uhid_stopped; @@ -205,6 +214,13 @@ static int uhid_event(struct __test_metadata *_metadata, int fd) break; case UHID_OUTPUT: UHID_LOG("UHID_OUTPUT from uhid-dev"); + + pthread_mutex_lock(&uhid_output_mtx); + memcpy(output_report, + ev.u.output.data, + min(ev.u.output.size, sizeof(output_report))); + pthread_cond_signal(&uhid_output_cond); + pthread_mutex_unlock(&uhid_output_mtx); break; case UHID_GET_REPORT: UHID_LOG("UHID_GET_REPORT from uhid-dev"); @@ -734,8 +750,100 @@ TEST_F(hid_bpf, test_hid_change_report) } /* - * Attach hid_user_raw_request to the given uhid device, - * call the bpf program from userspace + * Call hid_bpf_input_report against the given uhid device, + * check that the program is called and does the expected. + */ +TEST_F(hid_bpf, test_hid_user_input_report_call) +{ + struct hid_hw_request_syscall_args args = { + .retval = -1, + .size = 10, + }; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattrs, + .ctx_in = &args, + .ctx_size_in = sizeof(args), + ); + __u8 buf[10] = {0}; + int err, prog_fd; + + LOAD_BPF; + + args.hid = self->hid_id; + args.data[0] = 1; /* report ID */ + args.data[1] = 2; /* report ID */ + args.data[2] = 42; /* report ID */ + + prog_fd = bpf_program__fd(self->skel->progs.hid_user_input_report); + + /* check that there is no data to read from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, -1) TH_LOG("read_hidraw"); + + err = bpf_prog_test_run_opts(prog_fd, &tattrs); + + ASSERT_OK(err) TH_LOG("error while calling bpf_prog_test_run_opts"); + + ASSERT_EQ(args.retval, 0); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[1], 2); + ASSERT_EQ(buf[2], 42); +} + +/* + * Call hid_bpf_hw_output_report against the given uhid device, + * check that the program is called and does the expected. + */ +TEST_F(hid_bpf, test_hid_user_output_report_call) +{ + struct hid_hw_request_syscall_args args = { + .retval = -1, + .size = 10, + }; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattrs, + .ctx_in = &args, + .ctx_size_in = sizeof(args), + ); + int err, cond_err, prog_fd; + struct timespec time_to_wait; + + LOAD_BPF; + + args.hid = self->hid_id; + args.data[0] = 1; /* report ID */ + args.data[1] = 2; /* report ID */ + args.data[2] = 42; /* report ID */ + + prog_fd = bpf_program__fd(self->skel->progs.hid_user_output_report); + + pthread_mutex_lock(&uhid_output_mtx); + + memset(output_report, 0, sizeof(output_report)); + clock_gettime(CLOCK_REALTIME, &time_to_wait); + time_to_wait.tv_sec += 2; + + err = bpf_prog_test_run_opts(prog_fd, &tattrs); + cond_err = pthread_cond_timedwait(&uhid_output_cond, &uhid_output_mtx, &time_to_wait); + + ASSERT_OK(err) TH_LOG("error while calling bpf_prog_test_run_opts"); + ASSERT_OK(cond_err) TH_LOG("error while calling waiting for the condition"); + + ASSERT_EQ(args.retval, 3); + + ASSERT_EQ(output_report[0], 1); + ASSERT_EQ(output_report[1], 2); + ASSERT_EQ(output_report[2], 42); + + pthread_mutex_unlock(&uhid_output_mtx); +} + +/* + * Call hid_hw_raw_request against the given uhid device, * check that the program is called and does the expected. */ TEST_F(hid_bpf, test_hid_user_raw_request_call) diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c index 1e558826b8..f67d35def1 100644 --- a/tools/testing/selftests/hid/progs/hid.c +++ b/tools/testing/selftests/hid/progs/hid.c @@ -101,6 +101,52 @@ int hid_user_raw_request(struct hid_hw_request_syscall_args *args) return 0; } +SEC("syscall") +int hid_user_output_report(struct hid_hw_request_syscall_args *args) +{ + struct hid_bpf_ctx *ctx; + const size_t size = args->size; + int i, ret = 0; + + if (size > sizeof(args->data)) + return -7; /* -E2BIG */ + + ctx = hid_bpf_allocate_context(args->hid); + if (!ctx) + return -1; /* EPERM check */ + + ret = hid_bpf_hw_output_report(ctx, + args->data, + size); + args->retval = ret; + + hid_bpf_release_context(ctx); + + return 0; +} + +SEC("syscall") +int hid_user_input_report(struct hid_hw_request_syscall_args *args) +{ + struct hid_bpf_ctx *ctx; + const size_t size = args->size; + int i, ret = 0; + + if (size > sizeof(args->data)) + return -7; /* -E2BIG */ + + ctx = hid_bpf_allocate_context(args->hid); + if (!ctx) + return -1; /* EPERM check */ + + ret = hid_bpf_input_report(ctx, HID_INPUT_REPORT, args->data, size); + args->retval = ret; + + hid_bpf_release_context(ctx); + + return 0; +} + static const __u8 rdesc[] = { 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */ 0x09, 0x32, /* USAGE (Z) */ diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h index 65e657ac11..9cd56821d0 100644 --- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h +++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h @@ -94,5 +94,11 @@ extern int hid_bpf_hw_request(struct hid_bpf_ctx *ctx, size_t buf__sz, enum hid_report_type type, enum hid_class_request reqtype) __ksym; +extern int hid_bpf_hw_output_report(struct hid_bpf_ctx *ctx, + __u8 *buf, size_t buf__sz) __ksym; +extern int hid_bpf_input_report(struct hid_bpf_ctx *ctx, + enum hid_report_type type, + __u8 *data, + size_t buf__sz) __ksym; #endif /* __HID_BPF_HELPERS_H */ diff --git a/tools/testing/selftests/hid/tests/base.py b/tools/testing/selftests/hid/tests/base.py index 51433063b2..3a465768e5 100644 --- a/tools/testing/selftests/hid/tests/base.py +++ b/tools/testing/selftests/hid/tests/base.py @@ -8,11 +8,13 @@ import libevdev import os import pytest +import shutil +import subprocess import time import logging -from hidtools.device.base_device import BaseDevice, EvdevMatch, SysfsFile +from .base_device import BaseDevice, EvdevMatch, SysfsFile from pathlib import Path from typing import Final, List, Tuple @@ -157,6 +159,17 @@ class BaseTestCase: # for example ("playstation", "hid-playstation") kernel_modules: List[Tuple[str, str]] = [] + # List of in kernel HID-BPF object files to load + # before starting the test + # Any existing pre-loaded HID-BPF module will be removed + # before the ones in this list will be manually loaded. + # Each Element is a tuple '(hid_bpf_object, rdesc_fixup_present)', + # for example '("xppen-ArtistPro16Gen2.bpf.o", True)' + # If 'rdesc_fixup_present' is True, the test needs to wait + # for one unbind and rebind before it can be sure the kernel is + # ready + hid_bpfs: List[Tuple[str, bool]] = [] + def assertInputEventsIn(self, expected_events, effective_events): effective_events = effective_events.copy() for ev in expected_events: @@ -211,8 +224,6 @@ class BaseTestCase: # we don't know beforehand the name of the module from modinfo sysfs_path = Path("/sys/module") / kernel_module.replace("-", "_") if not sysfs_path.exists(): - import subprocess - ret = subprocess.run(["/usr/sbin/modprobe", kernel_module]) if ret.returncode != 0: pytest.skip( @@ -225,6 +236,64 @@ class BaseTestCase: self._load_kernel_module(kernel_driver, kernel_module) yield + def load_hid_bpfs(self): + script_dir = Path(os.path.dirname(os.path.realpath(__file__))) + root_dir = (script_dir / "../../../../..").resolve() + bpf_dir = root_dir / "drivers/hid/bpf/progs" + + udev_hid_bpf = shutil.which("udev-hid-bpf") + if not udev_hid_bpf: + pytest.skip("udev-hid-bpf not found in $PATH, skipping") + + wait = False + for _, rdesc_fixup in self.hid_bpfs: + if rdesc_fixup: + wait = True + + for hid_bpf, _ in self.hid_bpfs: + # We need to start `udev-hid-bpf` in the background + # and dispatch uhid events in case the kernel needs + # to fetch features on the device + process = subprocess.Popen( + [ + "udev-hid-bpf", + "--verbose", + "add", + str(self.uhdev.sys_path), + str(bpf_dir / hid_bpf), + ], + ) + while process.poll() is None: + self.uhdev.dispatch(1) + + if process.poll() != 0: + pytest.fail( + f"Couldn't insert hid-bpf program '{hid_bpf}', marking the test as failed" + ) + + if wait: + # the HID-BPF program exports a rdesc fixup, so it needs to be + # unbound by the kernel and then rebound. + # Ensure we get the bound event exactly 2 times (one for the normal + # uhid loading, and then the reload from HID-BPF) + now = time.time() + while self.uhdev.kernel_ready_count < 2 and time.time() - now < 2: + self.uhdev.dispatch(1) + + if self.uhdev.kernel_ready_count < 2: + pytest.fail( + f"Couldn't insert hid-bpf programs, marking the test as failed" + ) + + def unload_hid_bpfs(self): + ret = subprocess.run( + ["udev-hid-bpf", "--verbose", "remove", str(self.uhdev.sys_path)], + ) + if ret.returncode != 0: + pytest.fail( + f"Couldn't unload hid-bpf programs, marking the test as failed" + ) + @pytest.fixture() def new_uhdev(self, load_kernel_module): return self.create_device() @@ -248,12 +317,18 @@ class BaseTestCase: now = time.time() while not self.uhdev.is_ready() and time.time() - now < 5: self.uhdev.dispatch(1) + + if self.hid_bpfs: + self.load_hid_bpfs() + if self.uhdev.get_evdev() is None: logger.warning( f"available list of input nodes: (default application is '{self.uhdev.application}')" ) logger.warning(self.uhdev.input_nodes) yield + if self.hid_bpfs: + self.unload_hid_bpfs() self.uhdev = None except PermissionError: pytest.skip("Insufficient permissions, run me as root") @@ -313,8 +388,6 @@ class HIDTestUdevRule(object): self.reload_udev_rules() def reload_udev_rules(self): - import subprocess - subprocess.run("udevadm control --reload-rules".split()) subprocess.run("systemd-hwdb update".split()) @@ -330,10 +403,11 @@ class HIDTestUdevRule(object): delete=False, ) as f: f.write( - 'KERNELS=="*input*", ATTRS{name}=="*uhid test *", ENV{LIBINPUT_IGNORE_DEVICE}="1"\n' - ) - f.write( - 'KERNELS=="*input*", ATTRS{name}=="*uhid test * System Multi Axis", ENV{ID_INPUT_TOUCHSCREEN}="", ENV{ID_INPUT_SYSTEM_MULTIAXIS}="1"\n' + """ +KERNELS=="*input*", ATTRS{name}=="*uhid test *", ENV{LIBINPUT_IGNORE_DEVICE}="1" +KERNELS=="*hid*", ENV{HID_NAME}=="*uhid test *", ENV{HID_BPF_IGNORE_DEVICE}="1" +KERNELS=="*input*", ATTRS{name}=="*uhid test * System Multi Axis", ENV{ID_INPUT_TOUCHSCREEN}="", ENV{ID_INPUT_SYSTEM_MULTIAXIS}="1" +""" ) self.rulesfile = f diff --git a/tools/testing/selftests/hid/tests/base_device.py b/tools/testing/selftests/hid/tests/base_device.py new file mode 100644 index 0000000000..e0515be97f --- /dev/null +++ b/tools/testing/selftests/hid/tests/base_device.py @@ -0,0 +1,421 @@ +#!/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# -*- coding: utf-8 -*- +# +# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com> +# Copyright (c) 2017 Red Hat, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import fcntl +import functools +import libevdev +import os + +try: + import pyudev +except ImportError: + raise ImportError("UHID is not supported due to missing pyudev dependency") + +import logging + +import hidtools.hid as hid +from hidtools.uhid import UHIDDevice +from hidtools.util import BusType + +from pathlib import Path +from typing import Any, ClassVar, Dict, List, Optional, Tuple, Type, Union + +logger = logging.getLogger("hidtools.device.base_device") + + +class SysfsFile(object): + def __init__(self, path): + self.path = path + + def __set_value(self, value): + with open(self.path, "w") as f: + return f.write(f"{value}\n") + + def __get_value(self): + with open(self.path) as f: + return f.read().strip() + + @property + def int_value(self) -> int: + return int(self.__get_value()) + + @int_value.setter + def int_value(self, v: int) -> None: + self.__set_value(v) + + @property + def str_value(self) -> str: + return self.__get_value() + + @str_value.setter + def str_value(self, v: str) -> None: + self.__set_value(v) + + +class LED(object): + def __init__(self, sys_path): + self.max_brightness = SysfsFile(sys_path / "max_brightness").int_value + self.__brightness = SysfsFile(sys_path / "brightness") + + @property + def brightness(self) -> int: + return self.__brightness.int_value + + @brightness.setter + def brightness(self, value: int) -> None: + self.__brightness.int_value = value + + +class PowerSupply(object): + """Represents Linux power_supply_class sysfs nodes.""" + + def __init__(self, sys_path): + self._capacity = SysfsFile(sys_path / "capacity") + self._status = SysfsFile(sys_path / "status") + self._type = SysfsFile(sys_path / "type") + + @property + def capacity(self) -> int: + return self._capacity.int_value + + @property + def status(self) -> str: + return self._status.str_value + + @property + def type(self) -> str: + return self._type.str_value + + +class HIDIsReady(object): + """ + Companion class that binds to a kernel mechanism + and that allows to know when a uhid device is ready or not. + + See :meth:`is_ready` for details. + """ + + def __init__(self: "HIDIsReady", uhid: UHIDDevice) -> None: + self.uhid = uhid + + def is_ready(self: "HIDIsReady") -> bool: + """ + Overwrite in subclasses: should return True or False whether + the attached uhid device is ready or not. + """ + return False + + +class UdevHIDIsReady(HIDIsReady): + _pyudev_context: ClassVar[Optional[pyudev.Context]] = None + _pyudev_monitor: ClassVar[Optional[pyudev.Monitor]] = None + _uhid_devices: ClassVar[Dict[int, Tuple[bool, int]]] = {} + + def __init__(self: "UdevHIDIsReady", uhid: UHIDDevice) -> None: + super().__init__(uhid) + self._init_pyudev() + + @classmethod + def _init_pyudev(cls: Type["UdevHIDIsReady"]) -> None: + if cls._pyudev_context is None: + cls._pyudev_context = pyudev.Context() + cls._pyudev_monitor = pyudev.Monitor.from_netlink(cls._pyudev_context) + cls._pyudev_monitor.filter_by("hid") + cls._pyudev_monitor.start() + + UHIDDevice._append_fd_to_poll( + cls._pyudev_monitor.fileno(), cls._cls_udev_event_callback + ) + + @classmethod + def _cls_udev_event_callback(cls: Type["UdevHIDIsReady"]) -> None: + if cls._pyudev_monitor is None: + return + event: pyudev.Device + for event in iter(functools.partial(cls._pyudev_monitor.poll, 0.02), None): + if event.action not in ["bind", "remove", "unbind"]: + return + + logger.debug(f"udev event: {event.action} -> {event}") + + id = int(event.sys_path.strip().split(".")[-1], 16) + + device_ready, count = cls._uhid_devices.get(id, (False, 0)) + + ready = event.action == "bind" + if not device_ready and ready: + count += 1 + cls._uhid_devices[id] = (ready, count) + + def is_ready(self: "UdevHIDIsReady") -> Tuple[bool, int]: + try: + return self._uhid_devices[self.uhid.hid_id] + except KeyError: + return (False, 0) + + +class EvdevMatch(object): + def __init__( + self: "EvdevMatch", + *, + requires: List[Any] = [], + excludes: List[Any] = [], + req_properties: List[Any] = [], + excl_properties: List[Any] = [], + ) -> None: + self.requires = requires + self.excludes = excludes + self.req_properties = req_properties + self.excl_properties = excl_properties + + def is_a_match(self: "EvdevMatch", evdev: libevdev.Device) -> bool: + for m in self.requires: + if not evdev.has(m): + return False + for m in self.excludes: + if evdev.has(m): + return False + for p in self.req_properties: + if not evdev.has_property(p): + return False + for p in self.excl_properties: + if evdev.has_property(p): + return False + return True + + +class EvdevDevice(object): + """ + Represents an Evdev node and its properties. + This is a stub for the libevdev devices, as they are relying on + uevent to get the data, saving us some ioctls to fetch the names + and properties. + """ + + def __init__(self: "EvdevDevice", sysfs: Path) -> None: + self.sysfs = sysfs + self.event_node: Any = None + self.libevdev: Optional[libevdev.Device] = None + + self.uevents = {} + # all of the interesting properties are stored in the input uevent, so in the parent + # so convert the uevent file of the parent input node into a dict + with open(sysfs.parent / "uevent") as f: + for line in f.readlines(): + key, value = line.strip().split("=") + self.uevents[key] = value.strip('"') + + # we open all evdev nodes in order to not miss any event + self.open() + + @property + def name(self: "EvdevDevice") -> str: + assert "NAME" in self.uevents + + return self.uevents["NAME"] + + @property + def evdev(self: "EvdevDevice") -> Path: + return Path("/dev/input") / self.sysfs.name + + def matches_application( + self: "EvdevDevice", application: str, matches: Dict[str, EvdevMatch] + ) -> bool: + if self.libevdev is None: + return False + + if application in matches: + return matches[application].is_a_match(self.libevdev) + + logger.error( + f"application '{application}' is unknown, please update/fix hid-tools" + ) + assert False # hid-tools likely needs an update + + def open(self: "EvdevDevice") -> libevdev.Device: + self.event_node = open(self.evdev, "rb") + self.libevdev = libevdev.Device(self.event_node) + + assert self.libevdev.fd is not None + + fd = self.libevdev.fd.fileno() + flag = fcntl.fcntl(fd, fcntl.F_GETFD) + fcntl.fcntl(fd, fcntl.F_SETFL, flag | os.O_NONBLOCK) + + return self.libevdev + + def close(self: "EvdevDevice") -> None: + if self.libevdev is not None and self.libevdev.fd is not None: + self.libevdev.fd.close() + self.libevdev = None + if self.event_node is not None: + self.event_node.close() + self.event_node = None + + +class BaseDevice(UHIDDevice): + # default _application_matches that matches nothing. This needs + # to be set in the subclasses to have get_evdev() working + _application_matches: Dict[str, EvdevMatch] = {} + + def __init__( + self, + name, + application, + rdesc_str: Optional[str] = None, + rdesc: Optional[Union[hid.ReportDescriptor, str, bytes]] = None, + input_info=None, + ) -> None: + self._kernel_is_ready: HIDIsReady = UdevHIDIsReady(self) + if rdesc_str is None and rdesc is None: + raise Exception("Please provide at least a rdesc or rdesc_str") + super().__init__() + if name is None: + name = f"uhid gamepad test {self.__class__.__name__}" + if input_info is None: + input_info = (BusType.USB, 1, 2) + self.name = name + self.info = input_info + self.default_reportID = None + self.opened = False + self.started = False + self.application = application + self._input_nodes: Optional[list[EvdevDevice]] = None + if rdesc is None: + assert rdesc_str is not None + self.rdesc = hid.ReportDescriptor.from_human_descr(rdesc_str) # type: ignore + else: + self.rdesc = rdesc # type: ignore + + @property + def power_supply_class(self: "BaseDevice") -> Optional[PowerSupply]: + ps = self.walk_sysfs("power_supply", "power_supply/*") + if ps is None or len(ps) < 1: + return None + + return PowerSupply(ps[0]) + + @property + def led_classes(self: "BaseDevice") -> List[LED]: + leds = self.walk_sysfs("led", "**/max_brightness") + if leds is None: + return [] + + return [LED(led.parent) for led in leds] + + @property + def kernel_is_ready(self: "BaseDevice") -> bool: + return self._kernel_is_ready.is_ready()[0] and self.started + + @property + def kernel_ready_count(self: "BaseDevice") -> int: + return self._kernel_is_ready.is_ready()[1] + + @property + def input_nodes(self: "BaseDevice") -> List[EvdevDevice]: + if self._input_nodes is not None: + return self._input_nodes + + if not self.kernel_is_ready or not self.started: + return [] + + self._input_nodes = [ + EvdevDevice(path) + for path in self.walk_sysfs("input", "input/input*/event*") + ] + return self._input_nodes + + def match_evdev_rule(self, application, evdev): + """Replace this in subclasses if the device has multiple reports + of the same type and we need to filter based on the actual evdev + node. + + returning True will append the corresponding report to + `self.input_nodes[type]` + returning False will ignore this report / type combination + for the device. + """ + return True + + def open(self): + self.opened = True + + def _close_all_opened_evdev(self): + if self._input_nodes is not None: + for e in self._input_nodes: + e.close() + + def __del__(self): + self._close_all_opened_evdev() + + def close(self): + self.opened = False + + def start(self, flags): + self.started = True + + def stop(self): + self.started = False + self._close_all_opened_evdev() + + def next_sync_events(self, application=None): + evdev = self.get_evdev(application) + if evdev is not None: + return list(evdev.events()) + return [] + + @property + def application_matches(self: "BaseDevice") -> Dict[str, EvdevMatch]: + return self._application_matches + + @application_matches.setter + def application_matches(self: "BaseDevice", data: Dict[str, EvdevMatch]) -> None: + self._application_matches = data + + def get_evdev(self, application=None): + if application is None: + application = self.application + + if len(self.input_nodes) == 0: + return None + + assert self._input_nodes is not None + + if len(self._input_nodes) == 1: + evdev = self._input_nodes[0] + if self.match_evdev_rule(application, evdev.libevdev): + return evdev.libevdev + else: + for _evdev in self._input_nodes: + if _evdev.matches_application(application, self.application_matches): + if self.match_evdev_rule(application, _evdev.libevdev): + return _evdev.libevdev + + def is_ready(self): + """Returns whether a UHID device is ready. Can be overwritten in + subclasses to add extra conditions on when to consider a UHID + device ready. This can be: + + - we need to wait on different types of input devices to be ready + (Touch Screen and Pen for example) + - we need to have at least 4 LEDs present + (len(self.uhdev.leds_classes) == 4) + - or any other combinations""" + return self.kernel_is_ready diff --git a/tools/testing/selftests/hid/tests/base_gamepad.py b/tools/testing/selftests/hid/tests/base_gamepad.py new file mode 100644 index 0000000000..ec74d75767 --- /dev/null +++ b/tools/testing/selftests/hid/tests/base_gamepad.py @@ -0,0 +1,238 @@ +# SPDX-License-Identifier: GPL-2.0 +import libevdev + +from .base_device import BaseDevice +from hidtools.util import BusType + + +class InvalidHIDCommunication(Exception): + pass + + +class GamepadData(object): + pass + + +class AxisMapping(object): + """Represents a mapping between a HID type + and an evdev event""" + + def __init__(self, hid, evdev=None): + self.hid = hid.lower() + + if evdev is None: + evdev = f"ABS_{hid.upper()}" + + self.evdev = libevdev.evbit("EV_ABS", evdev) + + +class BaseGamepad(BaseDevice): + buttons_map = { + 1: "BTN_SOUTH", + 2: "BTN_EAST", + 3: "BTN_C", + 4: "BTN_NORTH", + 5: "BTN_WEST", + 6: "BTN_Z", + 7: "BTN_TL", + 8: "BTN_TR", + 9: "BTN_TL2", + 10: "BTN_TR2", + 11: "BTN_SELECT", + 12: "BTN_START", + 13: "BTN_MODE", + 14: "BTN_THUMBL", + 15: "BTN_THUMBR", + } + + axes_map = { + "left_stick": { + "x": AxisMapping("x"), + "y": AxisMapping("y"), + }, + "right_stick": { + "x": AxisMapping("z"), + "y": AxisMapping("Rz"), + }, + } + + def __init__(self, rdesc, application="Game Pad", name=None, input_info=None): + assert rdesc is not None + super().__init__(name, application, input_info=input_info, rdesc=rdesc) + self.buttons = (1, 2, 3) + self._buttons = {} + self.left = (127, 127) + self.right = (127, 127) + self.hat_switch = 15 + assert self.parsed_rdesc is not None + + self.fields = [] + for r in self.parsed_rdesc.input_reports.values(): + if r.application_name == self.application: + self.fields.extend([f.usage_name for f in r]) + + def store_axes(self, which, gamepad, data): + amap = self.axes_map[which] + x, y = data + setattr(gamepad, amap["x"].hid, x) + setattr(gamepad, amap["y"].hid, y) + + def create_report( + self, + *, + left=(None, None), + right=(None, None), + hat_switch=None, + buttons=None, + reportID=None, + application="Game Pad", + ): + """ + Return an input report for this device. + + :param left: a tuple of absolute (x, y) value of the left joypad + where ``None`` is "leave unchanged" + :param right: a tuple of absolute (x, y) value of the right joypad + where ``None`` is "leave unchanged" + :param hat_switch: an absolute angular value of the hat switch + (expressed in 1/8 of circle, 0 being North, 2 East) + where ``None`` is "leave unchanged" + :param buttons: a dict of index/bool for the button states, + where ``None`` is "leave unchanged" + :param reportID: the numeric report ID for this report, if needed + :param application: the application used to report the values + """ + if buttons is not None: + for i, b in buttons.items(): + if i not in self.buttons: + raise InvalidHIDCommunication( + f"button {i} is not part of this {self.application}" + ) + if b is not None: + self._buttons[i] = b + + def replace_none_in_tuple(item, default): + if item is None: + item = (None, None) + + if None in item: + if item[0] is None: + item = (default[0], item[1]) + if item[1] is None: + item = (item[0], default[1]) + + return item + + right = replace_none_in_tuple(right, self.right) + self.right = right + left = replace_none_in_tuple(left, self.left) + self.left = left + + if hat_switch is None: + hat_switch = self.hat_switch + else: + self.hat_switch = hat_switch + + reportID = reportID or self.default_reportID + + gamepad = GamepadData() + for i, b in self._buttons.items(): + gamepad.__setattr__(f"b{i}", int(b) if b is not None else 0) + + self.store_axes("left_stick", gamepad, left) + self.store_axes("right_stick", gamepad, right) + gamepad.hatswitch = hat_switch # type: ignore ### gamepad is by default empty + return super().create_report( + gamepad, reportID=reportID, application=application + ) + + def event( + self, *, left=(None, None), right=(None, None), hat_switch=None, buttons=None + ): + """ + Send an input event on the default report ID. + + :param left: a tuple of absolute (x, y) value of the left joypad + where ``None`` is "leave unchanged" + :param right: a tuple of absolute (x, y) value of the right joypad + where ``None`` is "leave unchanged" + :param hat_switch: an absolute angular value of the hat switch + where ``None`` is "leave unchanged" + :param buttons: a dict of index/bool for the button states, + where ``None`` is "leave unchanged" + """ + r = self.create_report( + left=left, right=right, hat_switch=hat_switch, buttons=buttons + ) + self.call_input_event(r) + return [r] + + +class JoystickGamepad(BaseGamepad): + buttons_map = { + 1: "BTN_TRIGGER", + 2: "BTN_THUMB", + 3: "BTN_THUMB2", + 4: "BTN_TOP", + 5: "BTN_TOP2", + 6: "BTN_PINKIE", + 7: "BTN_BASE", + 8: "BTN_BASE2", + 9: "BTN_BASE3", + 10: "BTN_BASE4", + 11: "BTN_BASE5", + 12: "BTN_BASE6", + 13: "BTN_DEAD", + } + + axes_map = { + "left_stick": { + "x": AxisMapping("x"), + "y": AxisMapping("y"), + }, + "right_stick": { + "x": AxisMapping("rudder"), + "y": AxisMapping("throttle"), + }, + } + + def __init__(self, rdesc, application="Joystick", name=None, input_info=None): + super().__init__(rdesc, application, name, input_info) + + def create_report( + self, + *, + left=(None, None), + right=(None, None), + hat_switch=None, + buttons=None, + reportID=None, + application=None, + ): + """ + Return an input report for this device. + + :param left: a tuple of absolute (x, y) value of the left joypad + where ``None`` is "leave unchanged" + :param right: a tuple of absolute (x, y) value of the right joypad + where ``None`` is "leave unchanged" + :param hat_switch: an absolute angular value of the hat switch + where ``None`` is "leave unchanged" + :param buttons: a dict of index/bool for the button states, + where ``None`` is "leave unchanged" + :param reportID: the numeric report ID for this report, if needed + :param application: the application for this report, if needed + """ + if application is None: + application = "Joystick" + return super().create_report( + left=left, + right=right, + hat_switch=hat_switch, + buttons=buttons, + reportID=reportID, + application=application, + ) + + def store_right_joystick(self, gamepad, data): + gamepad.rudder, gamepad.throttle = data diff --git a/tools/testing/selftests/hid/tests/test_gamepad.py b/tools/testing/selftests/hid/tests/test_gamepad.py index 26c74040b7..8d5b5ffdae 100644 --- a/tools/testing/selftests/hid/tests/test_gamepad.py +++ b/tools/testing/selftests/hid/tests/test_gamepad.py @@ -10,7 +10,8 @@ from . import base import libevdev import pytest -from hidtools.device.base_gamepad import AsusGamepad, SaitekGamepad +from .base_gamepad import BaseGamepad, JoystickGamepad, AxisMapping +from hidtools.util import BusType import logging @@ -199,6 +200,449 @@ class BaseTest: ) +class SaitekGamepad(JoystickGamepad): + # fmt: off + report_descriptor = [ + 0x05, 0x01, # Usage Page (Generic Desktop) 0 + 0x09, 0x04, # Usage (Joystick) 2 + 0xa1, 0x01, # Collection (Application) 4 + 0x09, 0x01, # .Usage (Pointer) 6 + 0xa1, 0x00, # .Collection (Physical) 8 + 0x85, 0x01, # ..Report ID (1) 10 + 0x09, 0x30, # ..Usage (X) 12 + 0x15, 0x00, # ..Logical Minimum (0) 14 + 0x26, 0xff, 0x00, # ..Logical Maximum (255) 16 + 0x35, 0x00, # ..Physical Minimum (0) 19 + 0x46, 0xff, 0x00, # ..Physical Maximum (255) 21 + 0x75, 0x08, # ..Report Size (8) 24 + 0x95, 0x01, # ..Report Count (1) 26 + 0x81, 0x02, # ..Input (Data,Var,Abs) 28 + 0x09, 0x31, # ..Usage (Y) 30 + 0x81, 0x02, # ..Input (Data,Var,Abs) 32 + 0x05, 0x02, # ..Usage Page (Simulation Controls) 34 + 0x09, 0xba, # ..Usage (Rudder) 36 + 0x81, 0x02, # ..Input (Data,Var,Abs) 38 + 0x09, 0xbb, # ..Usage (Throttle) 40 + 0x81, 0x02, # ..Input (Data,Var,Abs) 42 + 0x05, 0x09, # ..Usage Page (Button) 44 + 0x19, 0x01, # ..Usage Minimum (1) 46 + 0x29, 0x0c, # ..Usage Maximum (12) 48 + 0x25, 0x01, # ..Logical Maximum (1) 50 + 0x45, 0x01, # ..Physical Maximum (1) 52 + 0x75, 0x01, # ..Report Size (1) 54 + 0x95, 0x0c, # ..Report Count (12) 56 + 0x81, 0x02, # ..Input (Data,Var,Abs) 58 + 0x95, 0x01, # ..Report Count (1) 60 + 0x75, 0x00, # ..Report Size (0) 62 + 0x81, 0x03, # ..Input (Cnst,Var,Abs) 64 + 0x05, 0x01, # ..Usage Page (Generic Desktop) 66 + 0x09, 0x39, # ..Usage (Hat switch) 68 + 0x25, 0x07, # ..Logical Maximum (7) 70 + 0x46, 0x3b, 0x01, # ..Physical Maximum (315) 72 + 0x55, 0x00, # ..Unit Exponent (0) 75 + 0x65, 0x44, # ..Unit (Degrees^4,EngRotation) 77 + 0x75, 0x04, # ..Report Size (4) 79 + 0x81, 0x42, # ..Input (Data,Var,Abs,Null) 81 + 0x65, 0x00, # ..Unit (None) 83 + 0xc0, # .End Collection 85 + 0x05, 0x0f, # .Usage Page (Vendor Usage Page 0x0f) 86 + 0x09, 0x92, # .Usage (Vendor Usage 0x92) 88 + 0xa1, 0x02, # .Collection (Logical) 90 + 0x85, 0x02, # ..Report ID (2) 92 + 0x09, 0xa0, # ..Usage (Vendor Usage 0xa0) 94 + 0x09, 0x9f, # ..Usage (Vendor Usage 0x9f) 96 + 0x25, 0x01, # ..Logical Maximum (1) 98 + 0x45, 0x00, # ..Physical Maximum (0) 100 + 0x75, 0x01, # ..Report Size (1) 102 + 0x95, 0x02, # ..Report Count (2) 104 + 0x81, 0x02, # ..Input (Data,Var,Abs) 106 + 0x75, 0x06, # ..Report Size (6) 108 + 0x95, 0x01, # ..Report Count (1) 110 + 0x81, 0x03, # ..Input (Cnst,Var,Abs) 112 + 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 114 + 0x75, 0x07, # ..Report Size (7) 116 + 0x25, 0x7f, # ..Logical Maximum (127) 118 + 0x81, 0x02, # ..Input (Data,Var,Abs) 120 + 0x09, 0x94, # ..Usage (Vendor Usage 0x94) 122 + 0x75, 0x01, # ..Report Size (1) 124 + 0x25, 0x01, # ..Logical Maximum (1) 126 + 0x81, 0x02, # ..Input (Data,Var,Abs) 128 + 0xc0, # .End Collection 130 + 0x09, 0x21, # .Usage (Vendor Usage 0x21) 131 + 0xa1, 0x02, # .Collection (Logical) 133 + 0x85, 0x0b, # ..Report ID (11) 135 + 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 137 + 0x26, 0xff, 0x00, # ..Logical Maximum (255) 139 + 0x75, 0x08, # ..Report Size (8) 142 + 0x91, 0x02, # ..Output (Data,Var,Abs) 144 + 0x09, 0x53, # ..Usage (Vendor Usage 0x53) 146 + 0x25, 0x0a, # ..Logical Maximum (10) 148 + 0x91, 0x02, # ..Output (Data,Var,Abs) 150 + 0x09, 0x50, # ..Usage (Vendor Usage 0x50) 152 + 0x27, 0xfe, 0xff, 0x00, 0x00, # ..Logical Maximum (65534) 154 + 0x47, 0xfe, 0xff, 0x00, 0x00, # ..Physical Maximum (65534) 159 + 0x75, 0x10, # ..Report Size (16) 164 + 0x55, 0xfd, # ..Unit Exponent (237) 166 + 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 168 + 0x91, 0x02, # ..Output (Data,Var,Abs) 171 + 0x55, 0x00, # ..Unit Exponent (0) 173 + 0x65, 0x00, # ..Unit (None) 175 + 0x09, 0x54, # ..Usage (Vendor Usage 0x54) 177 + 0x55, 0xfd, # ..Unit Exponent (237) 179 + 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 181 + 0x91, 0x02, # ..Output (Data,Var,Abs) 184 + 0x55, 0x00, # ..Unit Exponent (0) 186 + 0x65, 0x00, # ..Unit (None) 188 + 0x09, 0xa7, # ..Usage (Vendor Usage 0xa7) 190 + 0x55, 0xfd, # ..Unit Exponent (237) 192 + 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 194 + 0x91, 0x02, # ..Output (Data,Var,Abs) 197 + 0x55, 0x00, # ..Unit Exponent (0) 199 + 0x65, 0x00, # ..Unit (None) 201 + 0xc0, # .End Collection 203 + 0x09, 0x5a, # .Usage (Vendor Usage 0x5a) 204 + 0xa1, 0x02, # .Collection (Logical) 206 + 0x85, 0x0c, # ..Report ID (12) 208 + 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 210 + 0x26, 0xff, 0x00, # ..Logical Maximum (255) 212 + 0x45, 0x00, # ..Physical Maximum (0) 215 + 0x75, 0x08, # ..Report Size (8) 217 + 0x91, 0x02, # ..Output (Data,Var,Abs) 219 + 0x09, 0x5c, # ..Usage (Vendor Usage 0x5c) 221 + 0x26, 0x10, 0x27, # ..Logical Maximum (10000) 223 + 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 226 + 0x75, 0x10, # ..Report Size (16) 229 + 0x55, 0xfd, # ..Unit Exponent (237) 231 + 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 233 + 0x91, 0x02, # ..Output (Data,Var,Abs) 236 + 0x55, 0x00, # ..Unit Exponent (0) 238 + 0x65, 0x00, # ..Unit (None) 240 + 0x09, 0x5b, # ..Usage (Vendor Usage 0x5b) 242 + 0x25, 0x7f, # ..Logical Maximum (127) 244 + 0x75, 0x08, # ..Report Size (8) 246 + 0x91, 0x02, # ..Output (Data,Var,Abs) 248 + 0x09, 0x5e, # ..Usage (Vendor Usage 0x5e) 250 + 0x26, 0x10, 0x27, # ..Logical Maximum (10000) 252 + 0x75, 0x10, # ..Report Size (16) 255 + 0x55, 0xfd, # ..Unit Exponent (237) 257 + 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 259 + 0x91, 0x02, # ..Output (Data,Var,Abs) 262 + 0x55, 0x00, # ..Unit Exponent (0) 264 + 0x65, 0x00, # ..Unit (None) 266 + 0x09, 0x5d, # ..Usage (Vendor Usage 0x5d) 268 + 0x25, 0x7f, # ..Logical Maximum (127) 270 + 0x75, 0x08, # ..Report Size (8) 272 + 0x91, 0x02, # ..Output (Data,Var,Abs) 274 + 0xc0, # .End Collection 276 + 0x09, 0x73, # .Usage (Vendor Usage 0x73) 277 + 0xa1, 0x02, # .Collection (Logical) 279 + 0x85, 0x0d, # ..Report ID (13) 281 + 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 283 + 0x26, 0xff, 0x00, # ..Logical Maximum (255) 285 + 0x45, 0x00, # ..Physical Maximum (0) 288 + 0x91, 0x02, # ..Output (Data,Var,Abs) 290 + 0x09, 0x70, # ..Usage (Vendor Usage 0x70) 292 + 0x15, 0x81, # ..Logical Minimum (-127) 294 + 0x25, 0x7f, # ..Logical Maximum (127) 296 + 0x36, 0xf0, 0xd8, # ..Physical Minimum (-10000) 298 + 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 301 + 0x91, 0x02, # ..Output (Data,Var,Abs) 304 + 0xc0, # .End Collection 306 + 0x09, 0x6e, # .Usage (Vendor Usage 0x6e) 307 + 0xa1, 0x02, # .Collection (Logical) 309 + 0x85, 0x0e, # ..Report ID (14) 311 + 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 313 + 0x15, 0x00, # ..Logical Minimum (0) 315 + 0x26, 0xff, 0x00, # ..Logical Maximum (255) 317 + 0x35, 0x00, # ..Physical Minimum (0) 320 + 0x45, 0x00, # ..Physical Maximum (0) 322 + 0x91, 0x02, # ..Output (Data,Var,Abs) 324 + 0x09, 0x70, # ..Usage (Vendor Usage 0x70) 326 + 0x25, 0x7f, # ..Logical Maximum (127) 328 + 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 330 + 0x91, 0x02, # ..Output (Data,Var,Abs) 333 + 0x09, 0x6f, # ..Usage (Vendor Usage 0x6f) 335 + 0x15, 0x81, # ..Logical Minimum (-127) 337 + 0x36, 0xf0, 0xd8, # ..Physical Minimum (-10000) 339 + 0x91, 0x02, # ..Output (Data,Var,Abs) 342 + 0x09, 0x71, # ..Usage (Vendor Usage 0x71) 344 + 0x15, 0x00, # ..Logical Minimum (0) 346 + 0x26, 0xff, 0x00, # ..Logical Maximum (255) 348 + 0x35, 0x00, # ..Physical Minimum (0) 351 + 0x46, 0x68, 0x01, # ..Physical Maximum (360) 353 + 0x91, 0x02, # ..Output (Data,Var,Abs) 356 + 0x09, 0x72, # ..Usage (Vendor Usage 0x72) 358 + 0x75, 0x10, # ..Report Size (16) 360 + 0x26, 0x10, 0x27, # ..Logical Maximum (10000) 362 + 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 365 + 0x55, 0xfd, # ..Unit Exponent (237) 368 + 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 370 + 0x91, 0x02, # ..Output (Data,Var,Abs) 373 + 0x55, 0x00, # ..Unit Exponent (0) 375 + 0x65, 0x00, # ..Unit (None) 377 + 0xc0, # .End Collection 379 + 0x09, 0x77, # .Usage (Vendor Usage 0x77) 380 + 0xa1, 0x02, # .Collection (Logical) 382 + 0x85, 0x51, # ..Report ID (81) 384 + 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 386 + 0x25, 0x7f, # ..Logical Maximum (127) 388 + 0x45, 0x00, # ..Physical Maximum (0) 390 + 0x75, 0x08, # ..Report Size (8) 392 + 0x91, 0x02, # ..Output (Data,Var,Abs) 394 + 0x09, 0x78, # ..Usage (Vendor Usage 0x78) 396 + 0xa1, 0x02, # ..Collection (Logical) 398 + 0x09, 0x7b, # ...Usage (Vendor Usage 0x7b) 400 + 0x09, 0x79, # ...Usage (Vendor Usage 0x79) 402 + 0x09, 0x7a, # ...Usage (Vendor Usage 0x7a) 404 + 0x15, 0x01, # ...Logical Minimum (1) 406 + 0x25, 0x03, # ...Logical Maximum (3) 408 + 0x91, 0x00, # ...Output (Data,Arr,Abs) 410 + 0xc0, # ..End Collection 412 + 0x09, 0x7c, # ..Usage (Vendor Usage 0x7c) 413 + 0x15, 0x00, # ..Logical Minimum (0) 415 + 0x26, 0xfe, 0x00, # ..Logical Maximum (254) 417 + 0x91, 0x02, # ..Output (Data,Var,Abs) 420 + 0xc0, # .End Collection 422 + 0x09, 0x92, # .Usage (Vendor Usage 0x92) 423 + 0xa1, 0x02, # .Collection (Logical) 425 + 0x85, 0x52, # ..Report ID (82) 427 + 0x09, 0x96, # ..Usage (Vendor Usage 0x96) 429 + 0xa1, 0x02, # ..Collection (Logical) 431 + 0x09, 0x9a, # ...Usage (Vendor Usage 0x9a) 433 + 0x09, 0x99, # ...Usage (Vendor Usage 0x99) 435 + 0x09, 0x97, # ...Usage (Vendor Usage 0x97) 437 + 0x09, 0x98, # ...Usage (Vendor Usage 0x98) 439 + 0x09, 0x9b, # ...Usage (Vendor Usage 0x9b) 441 + 0x09, 0x9c, # ...Usage (Vendor Usage 0x9c) 443 + 0x15, 0x01, # ...Logical Minimum (1) 445 + 0x25, 0x06, # ...Logical Maximum (6) 447 + 0x91, 0x00, # ...Output (Data,Arr,Abs) 449 + 0xc0, # ..End Collection 451 + 0xc0, # .End Collection 452 + 0x05, 0xff, # .Usage Page (Vendor Usage Page 0xff) 453 + 0x0a, 0x01, 0x03, # .Usage (Vendor Usage 0x301) 455 + 0xa1, 0x02, # .Collection (Logical) 458 + 0x85, 0x40, # ..Report ID (64) 460 + 0x0a, 0x02, 0x03, # ..Usage (Vendor Usage 0x302) 462 + 0xa1, 0x02, # ..Collection (Logical) 465 + 0x1a, 0x11, 0x03, # ...Usage Minimum (785) 467 + 0x2a, 0x20, 0x03, # ...Usage Maximum (800) 470 + 0x25, 0x10, # ...Logical Maximum (16) 473 + 0x91, 0x00, # ...Output (Data,Arr,Abs) 475 + 0xc0, # ..End Collection 477 + 0x0a, 0x03, 0x03, # ..Usage (Vendor Usage 0x303) 478 + 0x15, 0x00, # ..Logical Minimum (0) 481 + 0x27, 0xff, 0xff, 0x00, 0x00, # ..Logical Maximum (65535) 483 + 0x75, 0x10, # ..Report Size (16) 488 + 0x91, 0x02, # ..Output (Data,Var,Abs) 490 + 0xc0, # .End Collection 492 + 0x05, 0x0f, # .Usage Page (Vendor Usage Page 0x0f) 493 + 0x09, 0x7d, # .Usage (Vendor Usage 0x7d) 495 + 0xa1, 0x02, # .Collection (Logical) 497 + 0x85, 0x43, # ..Report ID (67) 499 + 0x09, 0x7e, # ..Usage (Vendor Usage 0x7e) 501 + 0x26, 0x80, 0x00, # ..Logical Maximum (128) 503 + 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 506 + 0x75, 0x08, # ..Report Size (8) 509 + 0x91, 0x02, # ..Output (Data,Var,Abs) 511 + 0xc0, # .End Collection 513 + 0x09, 0x7f, # .Usage (Vendor Usage 0x7f) 514 + 0xa1, 0x02, # .Collection (Logical) 516 + 0x85, 0x0b, # ..Report ID (11) 518 + 0x09, 0x80, # ..Usage (Vendor Usage 0x80) 520 + 0x26, 0xff, 0x7f, # ..Logical Maximum (32767) 522 + 0x45, 0x00, # ..Physical Maximum (0) 525 + 0x75, 0x0f, # ..Report Size (15) 527 + 0xb1, 0x03, # ..Feature (Cnst,Var,Abs) 529 + 0x09, 0xa9, # ..Usage (Vendor Usage 0xa9) 531 + 0x25, 0x01, # ..Logical Maximum (1) 533 + 0x75, 0x01, # ..Report Size (1) 535 + 0xb1, 0x03, # ..Feature (Cnst,Var,Abs) 537 + 0x09, 0x83, # ..Usage (Vendor Usage 0x83) 539 + 0x26, 0xff, 0x00, # ..Logical Maximum (255) 541 + 0x75, 0x08, # ..Report Size (8) 544 + 0xb1, 0x03, # ..Feature (Cnst,Var,Abs) 546 + 0xc0, # .End Collection 548 + 0x09, 0xab, # .Usage (Vendor Usage 0xab) 549 + 0xa1, 0x03, # .Collection (Report) 551 + 0x85, 0x15, # ..Report ID (21) 553 + 0x09, 0x25, # ..Usage (Vendor Usage 0x25) 555 + 0xa1, 0x02, # ..Collection (Logical) 557 + 0x09, 0x26, # ...Usage (Vendor Usage 0x26) 559 + 0x09, 0x30, # ...Usage (Vendor Usage 0x30) 561 + 0x09, 0x32, # ...Usage (Vendor Usage 0x32) 563 + 0x09, 0x31, # ...Usage (Vendor Usage 0x31) 565 + 0x09, 0x33, # ...Usage (Vendor Usage 0x33) 567 + 0x09, 0x34, # ...Usage (Vendor Usage 0x34) 569 + 0x15, 0x01, # ...Logical Minimum (1) 571 + 0x25, 0x06, # ...Logical Maximum (6) 573 + 0xb1, 0x00, # ...Feature (Data,Arr,Abs) 575 + 0xc0, # ..End Collection 577 + 0xc0, # .End Collection 578 + 0x09, 0x89, # .Usage (Vendor Usage 0x89) 579 + 0xa1, 0x03, # .Collection (Report) 581 + 0x85, 0x16, # ..Report ID (22) 583 + 0x09, 0x8b, # ..Usage (Vendor Usage 0x8b) 585 + 0xa1, 0x02, # ..Collection (Logical) 587 + 0x09, 0x8c, # ...Usage (Vendor Usage 0x8c) 589 + 0x09, 0x8d, # ...Usage (Vendor Usage 0x8d) 591 + 0x09, 0x8e, # ...Usage (Vendor Usage 0x8e) 593 + 0x25, 0x03, # ...Logical Maximum (3) 595 + 0xb1, 0x00, # ...Feature (Data,Arr,Abs) 597 + 0xc0, # ..End Collection 599 + 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 600 + 0x15, 0x00, # ..Logical Minimum (0) 602 + 0x26, 0xfe, 0x00, # ..Logical Maximum (254) 604 + 0xb1, 0x02, # ..Feature (Data,Var,Abs) 607 + 0xc0, # .End Collection 609 + 0x09, 0x90, # .Usage (Vendor Usage 0x90) 610 + 0xa1, 0x03, # .Collection (Report) 612 + 0x85, 0x50, # ..Report ID (80) 614 + 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 616 + 0x26, 0xff, 0x00, # ..Logical Maximum (255) 618 + 0x91, 0x02, # ..Output (Data,Var,Abs) 621 + 0xc0, # .End Collection 623 + 0xc0, # End Collection 624 + ] + # fmt: on + + def __init__(self, rdesc=report_descriptor, name=None): + super().__init__(rdesc, name=name, input_info=(BusType.USB, 0x06A3, 0xFF0D)) + self.buttons = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12) + + +class AsusGamepad(BaseGamepad): + # fmt: off + report_descriptor = [ + 0x05, 0x01, # Usage Page (Generic Desktop) 0 + 0x09, 0x05, # Usage (Game Pad) 2 + 0xa1, 0x01, # Collection (Application) 4 + 0x85, 0x01, # .Report ID (1) 6 + 0x05, 0x09, # .Usage Page (Button) 8 + 0x0a, 0x01, 0x00, # .Usage (Vendor Usage 0x01) 10 + 0x0a, 0x02, 0x00, # .Usage (Vendor Usage 0x02) 13 + 0x0a, 0x04, 0x00, # .Usage (Vendor Usage 0x04) 16 + 0x0a, 0x05, 0x00, # .Usage (Vendor Usage 0x05) 19 + 0x0a, 0x07, 0x00, # .Usage (Vendor Usage 0x07) 22 + 0x0a, 0x08, 0x00, # .Usage (Vendor Usage 0x08) 25 + 0x0a, 0x0e, 0x00, # .Usage (Vendor Usage 0x0e) 28 + 0x0a, 0x0f, 0x00, # .Usage (Vendor Usage 0x0f) 31 + 0x0a, 0x0d, 0x00, # .Usage (Vendor Usage 0x0d) 34 + 0x05, 0x0c, # .Usage Page (Consumer Devices) 37 + 0x0a, 0x24, 0x02, # .Usage (AC Back) 39 + 0x0a, 0x23, 0x02, # .Usage (AC Home) 42 + 0x15, 0x00, # .Logical Minimum (0) 45 + 0x25, 0x01, # .Logical Maximum (1) 47 + 0x75, 0x01, # .Report Size (1) 49 + 0x95, 0x0b, # .Report Count (11) 51 + 0x81, 0x02, # .Input (Data,Var,Abs) 53 + 0x75, 0x01, # .Report Size (1) 55 + 0x95, 0x01, # .Report Count (1) 57 + 0x81, 0x03, # .Input (Cnst,Var,Abs) 59 + 0x05, 0x01, # .Usage Page (Generic Desktop) 61 + 0x75, 0x04, # .Report Size (4) 63 + 0x95, 0x01, # .Report Count (1) 65 + 0x25, 0x07, # .Logical Maximum (7) 67 + 0x46, 0x3b, 0x01, # .Physical Maximum (315) 69 + 0x66, 0x14, 0x00, # .Unit (Degrees,EngRotation) 72 + 0x09, 0x39, # .Usage (Hat switch) 75 + 0x81, 0x42, # .Input (Data,Var,Abs,Null) 77 + 0x66, 0x00, 0x00, # .Unit (None) 79 + 0x09, 0x01, # .Usage (Pointer) 82 + 0xa1, 0x00, # .Collection (Physical) 84 + 0x09, 0x30, # ..Usage (X) 86 + 0x09, 0x31, # ..Usage (Y) 88 + 0x09, 0x32, # ..Usage (Z) 90 + 0x09, 0x35, # ..Usage (Rz) 92 + 0x05, 0x02, # ..Usage Page (Simulation Controls) 94 + 0x09, 0xc5, # ..Usage (Brake) 96 + 0x09, 0xc4, # ..Usage (Accelerator) 98 + 0x15, 0x00, # ..Logical Minimum (0) 100 + 0x26, 0xff, 0x00, # ..Logical Maximum (255) 102 + 0x35, 0x00, # ..Physical Minimum (0) 105 + 0x46, 0xff, 0x00, # ..Physical Maximum (255) 107 + 0x75, 0x08, # ..Report Size (8) 110 + 0x95, 0x06, # ..Report Count (6) 112 + 0x81, 0x02, # ..Input (Data,Var,Abs) 114 + 0xc0, # .End Collection 116 + 0x85, 0x02, # .Report ID (2) 117 + 0x05, 0x08, # .Usage Page (LEDs) 119 + 0x0a, 0x01, 0x00, # .Usage (Num Lock) 121 + 0x0a, 0x02, 0x00, # .Usage (Caps Lock) 124 + 0x0a, 0x03, 0x00, # .Usage (Scroll Lock) 127 + 0x0a, 0x04, 0x00, # .Usage (Compose) 130 + 0x15, 0x00, # .Logical Minimum (0) 133 + 0x25, 0x01, # .Logical Maximum (1) 135 + 0x75, 0x01, # .Report Size (1) 137 + 0x95, 0x04, # .Report Count (4) 139 + 0x91, 0x02, # .Output (Data,Var,Abs) 141 + 0x75, 0x04, # .Report Size (4) 143 + 0x95, 0x01, # .Report Count (1) 145 + 0x91, 0x03, # .Output (Cnst,Var,Abs) 147 + 0xc0, # End Collection 149 + 0x05, 0x0c, # Usage Page (Consumer Devices) 150 + 0x09, 0x01, # Usage (Consumer Control) 152 + 0xa1, 0x01, # Collection (Application) 154 + 0x85, 0x03, # .Report ID (3) 156 + 0x05, 0x01, # .Usage Page (Generic Desktop) 158 + 0x09, 0x06, # .Usage (Keyboard) 160 + 0xa1, 0x02, # .Collection (Logical) 162 + 0x05, 0x06, # ..Usage Page (Generic Device Controls) 164 + 0x09, 0x20, # ..Usage (Battery Strength) 166 + 0x15, 0x00, # ..Logical Minimum (0) 168 + 0x26, 0xff, 0x00, # ..Logical Maximum (255) 170 + 0x75, 0x08, # ..Report Size (8) 173 + 0x95, 0x01, # ..Report Count (1) 175 + 0x81, 0x02, # ..Input (Data,Var,Abs) 177 + 0x06, 0xbc, 0xff, # ..Usage Page (Vendor Usage Page 0xffbc) 179 + 0x0a, 0xad, 0xbd, # ..Usage (Vendor Usage 0xbdad) 182 + 0x75, 0x08, # ..Report Size (8) 185 + 0x95, 0x06, # ..Report Count (6) 187 + 0x81, 0x02, # ..Input (Data,Var,Abs) 189 + 0xc0, # .End Collection 191 + 0xc0, # End Collection 192 + ] + # fmt: on + + def __init__(self, rdesc=report_descriptor, name=None): + super().__init__(rdesc, name=name, input_info=(BusType.USB, 0x18D1, 0x2C40)) + self.buttons = (1, 2, 4, 5, 7, 8, 14, 15, 13) + + +class RaptorMach2Joystick(JoystickGamepad): + axes_map = { + "left_stick": { + "x": AxisMapping("x"), + "y": AxisMapping("y"), + }, + "right_stick": { + "x": AxisMapping("z"), + "y": AxisMapping("Rz"), + }, + } + + def __init__( + self, + name, + rdesc=None, + application="Joystick", + input_info=(BusType.USB, 0x11C0, 0x5606), + ): + super().__init__(rdesc, application, name, input_info) + self.buttons = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12) + self.hat_switch = 240 # null value is 240 as max is 239 + + def event( + self, *, left=(None, None), right=(None, None), hat_switch=None, buttons=None + ): + if hat_switch is not None: + hat_switch *= 30 + + return super().event( + left=left, right=right, hat_switch=hat_switch, buttons=buttons + ) + + class TestSaitekGamepad(BaseTest.TestGamepad): def create_device(self): return SaitekGamepad() @@ -207,3 +651,14 @@ class TestSaitekGamepad(BaseTest.TestGamepad): class TestAsusGamepad(BaseTest.TestGamepad): def create_device(self): return AsusGamepad() + + +class TestRaptorMach2Joystick(BaseTest.TestGamepad): + hid_bpfs = [("FR-TEC__Raptor-Mach-2.bpf.o", True)] + + def create_device(self): + return RaptorMach2Joystick( + "uhid test Sanmos Group FR-TEC Raptor MACH 2", + rdesc="05 01 09 04 a1 01 05 01 85 01 05 01 09 30 75 10 95 01 15 00 26 ff 07 46 ff 07 81 02 05 01 09 31 75 10 95 01 15 00 26 ff 07 46 ff 07 81 02 05 01 09 33 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 00 09 00 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 01 09 32 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 01 09 35 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 01 09 34 75 10 95 01 15 00 26 ff 07 46 ff 07 81 02 05 01 09 36 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 09 19 01 2a 1d 00 15 00 25 01 75 01 96 80 00 81 02 05 01 09 39 26 ef 00 46 68 01 65 14 75 10 95 01 81 42 05 01 09 00 75 08 95 1d 81 01 15 00 26 ef 00 85 58 26 ff 00 46 ff 00 75 08 95 3f 09 00 91 02 85 59 75 08 95 80 09 00 b1 02 c0", + input_info=(BusType.USB, 0x11C0, 0x5606), + ) diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index 903f19f7cb..a9e2de1e88 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -35,6 +35,7 @@ class BtnPressed(Enum): PRIMARY_PRESSED = libevdev.EV_KEY.BTN_STYLUS SECONDARY_PRESSED = libevdev.EV_KEY.BTN_STYLUS2 + THIRD_PRESSED = libevdev.EV_KEY.BTN_STYLUS3 class PenState(Enum): @@ -44,58 +45,28 @@ class PenState(Enum): We extend it with the various buttons when we need to check them. """ - PEN_IS_OUT_OF_RANGE = BtnTouch.UP, None, None - PEN_IS_IN_RANGE = BtnTouch.UP, ToolType.PEN, None - PEN_IS_IN_RANGE_WITH_BUTTON = BtnTouch.UP, ToolType.PEN, BtnPressed.PRIMARY_PRESSED - PEN_IS_IN_RANGE_WITH_SECOND_BUTTON = ( - BtnTouch.UP, - ToolType.PEN, - BtnPressed.SECONDARY_PRESSED, - ) - PEN_IS_IN_CONTACT = BtnTouch.DOWN, ToolType.PEN, None - PEN_IS_IN_CONTACT_WITH_BUTTON = ( - BtnTouch.DOWN, - ToolType.PEN, - BtnPressed.PRIMARY_PRESSED, - ) - PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON = ( - BtnTouch.DOWN, - ToolType.PEN, - BtnPressed.SECONDARY_PRESSED, - ) - PEN_IS_IN_RANGE_WITH_ERASING_INTENT = BtnTouch.UP, ToolType.RUBBER, None - PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON = ( - BtnTouch.UP, - ToolType.RUBBER, - BtnPressed.PRIMARY_PRESSED, - ) - PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_SECOND_BUTTON = ( - BtnTouch.UP, - ToolType.RUBBER, - BtnPressed.SECONDARY_PRESSED, - ) - PEN_IS_ERASING = BtnTouch.DOWN, ToolType.RUBBER, None - PEN_IS_ERASING_WITH_BUTTON = ( - BtnTouch.DOWN, - ToolType.RUBBER, - BtnPressed.PRIMARY_PRESSED, - ) - PEN_IS_ERASING_WITH_SECOND_BUTTON = ( - BtnTouch.DOWN, - ToolType.RUBBER, - BtnPressed.SECONDARY_PRESSED, - ) - - def __init__(self, touch: BtnTouch, tool: Optional[ToolType], button: Optional[BtnPressed]): + PEN_IS_OUT_OF_RANGE = BtnTouch.UP, None, False + PEN_IS_IN_RANGE = BtnTouch.UP, ToolType.PEN, False + PEN_IS_IN_RANGE_WITH_BUTTON = BtnTouch.UP, ToolType.PEN, True + PEN_IS_IN_CONTACT = BtnTouch.DOWN, ToolType.PEN, False + PEN_IS_IN_CONTACT_WITH_BUTTON = BtnTouch.DOWN, ToolType.PEN, True + PEN_IS_IN_RANGE_WITH_ERASING_INTENT = BtnTouch.UP, ToolType.RUBBER, False + PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON = BtnTouch.UP, ToolType.RUBBER, True + PEN_IS_ERASING = BtnTouch.DOWN, ToolType.RUBBER, False + PEN_IS_ERASING_WITH_BUTTON = BtnTouch.DOWN, ToolType.RUBBER, True + + def __init__( + self, touch: BtnTouch, tool: Optional[ToolType], button: Optional[bool] + ): self.touch = touch # type: ignore self.tool = tool # type: ignore self.button = button # type: ignore @classmethod - def from_evdev(cls, evdev) -> "PenState": + def from_evdev(cls, evdev, test_button) -> "PenState": touch = BtnTouch(evdev.value[libevdev.EV_KEY.BTN_TOUCH]) tool = None - button = None + button = False if ( evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER] and not evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN] @@ -112,19 +83,20 @@ class PenState(Enum): ): raise ValueError("2 tools are not allowed") - # we take only the highest button in account - for b in [libevdev.EV_KEY.BTN_STYLUS, libevdev.EV_KEY.BTN_STYLUS2]: - if bool(evdev.value[b]): - button = BtnPressed(b) + # we take only the provided button into account + if test_button is not None: + button = bool(evdev.value[test_button.value]) # the kernel tends to insert an EV_SYN once removing the tool, so # the button will be released after if tool is None: - button = None + button = False return cls((touch, tool, button)) # type: ignore - def apply(self, events: List[libevdev.InputEvent], strict: bool) -> "PenState": + def apply( + self, events: List[libevdev.InputEvent], strict: bool, test_button: BtnPressed + ) -> "PenState": if libevdev.EV_SYN.SYN_REPORT in events: raise ValueError("EV_SYN is in the event sequence") touch = self.touch @@ -148,19 +120,16 @@ class PenState(Enum): raise ValueError(f"duplicated BTN_TOOL_* in {events}") tool_found = True tool = ToolType(ev.code) if ev.value else None - elif ev in ( - libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS), - libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS2), - ): + elif test_button is not None and ev in (test_button.value,): if button_found: raise ValueError(f"duplicated BTN_STYLUS* in {events}") button_found = True - button = BtnPressed(ev.code) if ev.value else None + button = bool(ev.value) # the kernel tends to insert an EV_SYN once removing the tool, so # the button will be released after if tool is None: - button = None + button = False new_state = PenState((touch, tool, button)) # type: ignore if strict: @@ -183,11 +152,9 @@ class PenState(Enum): PenState.PEN_IS_OUT_OF_RANGE, PenState.PEN_IS_IN_RANGE, PenState.PEN_IS_IN_RANGE_WITH_BUTTON, - PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT, PenState.PEN_IS_IN_CONTACT, PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, - PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, PenState.PEN_IS_ERASING, ) @@ -195,7 +162,6 @@ class PenState(Enum): return ( PenState.PEN_IS_IN_RANGE, PenState.PEN_IS_IN_RANGE_WITH_BUTTON, - PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, PenState.PEN_IS_OUT_OF_RANGE, PenState.PEN_IS_IN_CONTACT, ) @@ -204,7 +170,6 @@ class PenState(Enum): return ( PenState.PEN_IS_IN_CONTACT, PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, - PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, PenState.PEN_IS_IN_RANGE, ) @@ -236,21 +201,6 @@ class PenState(Enum): PenState.PEN_IS_IN_RANGE_WITH_BUTTON, ) - if self == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON: - return ( - PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, - PenState.PEN_IS_IN_RANGE, - PenState.PEN_IS_OUT_OF_RANGE, - PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, - ) - - if self == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON: - return ( - PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, - PenState.PEN_IS_IN_CONTACT, - PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, - ) - return tuple() def historically_tolerated_transitions(self) -> Tuple["PenState", ...]: @@ -263,11 +213,9 @@ class PenState(Enum): PenState.PEN_IS_OUT_OF_RANGE, PenState.PEN_IS_IN_RANGE, PenState.PEN_IS_IN_RANGE_WITH_BUTTON, - PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT, PenState.PEN_IS_IN_CONTACT, PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, - PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, PenState.PEN_IS_ERASING, ) @@ -275,7 +223,6 @@ class PenState(Enum): return ( PenState.PEN_IS_IN_RANGE, PenState.PEN_IS_IN_RANGE_WITH_BUTTON, - PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, PenState.PEN_IS_OUT_OF_RANGE, PenState.PEN_IS_IN_CONTACT, ) @@ -284,7 +231,6 @@ class PenState(Enum): return ( PenState.PEN_IS_IN_CONTACT, PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, - PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, PenState.PEN_IS_IN_RANGE, PenState.PEN_IS_OUT_OF_RANGE, ) @@ -319,22 +265,6 @@ class PenState(Enum): PenState.PEN_IS_OUT_OF_RANGE, ) - if self == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON: - return ( - PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, - PenState.PEN_IS_IN_RANGE, - PenState.PEN_IS_OUT_OF_RANGE, - PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, - ) - - if self == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON: - return ( - PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, - PenState.PEN_IS_IN_CONTACT, - PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, - PenState.PEN_IS_OUT_OF_RANGE, - ) - return tuple() @staticmethod @@ -402,9 +332,9 @@ class PenState(Enum): } @staticmethod - def legal_transitions_with_primary_button() -> Dict[str, Tuple["PenState", ...]]: + def legal_transitions_with_button() -> Dict[str, Tuple["PenState", ...]]: """We revisit the Windows Pen Implementation state machine: - we now have a primary button. + we now have a button. """ return { "hover-button": (PenState.PEN_IS_IN_RANGE_WITH_BUTTON,), @@ -451,56 +381,6 @@ class PenState(Enum): } @staticmethod - def legal_transitions_with_secondary_button() -> Dict[str, Tuple["PenState", ...]]: - """We revisit the Windows Pen Implementation state machine: - we now have a secondary button. - Note: we don't looks for 2 buttons interactions. - """ - return { - "hover-button": (PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,), - "hover-button -> out-of-range": ( - PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, - PenState.PEN_IS_OUT_OF_RANGE, - ), - "in-range -> button-press": ( - PenState.PEN_IS_IN_RANGE, - PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, - ), - "in-range -> button-press -> button-release": ( - PenState.PEN_IS_IN_RANGE, - PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, - PenState.PEN_IS_IN_RANGE, - ), - "in-range -> touch -> button-press -> button-release": ( - PenState.PEN_IS_IN_RANGE, - PenState.PEN_IS_IN_CONTACT, - PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, - PenState.PEN_IS_IN_CONTACT, - ), - "in-range -> touch -> button-press -> release -> button-release": ( - PenState.PEN_IS_IN_RANGE, - PenState.PEN_IS_IN_CONTACT, - PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, - PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, - PenState.PEN_IS_IN_RANGE, - ), - "in-range -> button-press -> touch -> release -> button-release": ( - PenState.PEN_IS_IN_RANGE, - PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, - PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, - PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, - PenState.PEN_IS_IN_RANGE, - ), - "in-range -> button-press -> touch -> button-release -> release": ( - PenState.PEN_IS_IN_RANGE, - PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, - PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, - PenState.PEN_IS_IN_CONTACT, - PenState.PEN_IS_IN_RANGE, - ), - } - - @staticmethod def tolerated_transitions() -> Dict[str, Tuple["PenState", ...]]: """This is not adhering to the Windows Pen Implementation state machine but we should expect the kernel to behave properly, mostly for historical @@ -616,10 +496,22 @@ class Pen(object): evdev.value[axis] == value ), f"assert evdev.value[{axis}] ({evdev.value[axis]}) != {value}" - def assert_expected_input_events(self, evdev): + def assert_expected_input_events(self, evdev, button): assert evdev.value[libevdev.EV_ABS.ABS_X] == self.x assert evdev.value[libevdev.EV_ABS.ABS_Y] == self.y - assert self.current_state == PenState.from_evdev(evdev) + + # assert no other buttons than the tested ones are set + buttons = [ + BtnPressed.PRIMARY_PRESSED, + BtnPressed.SECONDARY_PRESSED, + BtnPressed.THIRD_PRESSED, + ] + if button is not None: + buttons.remove(button) + for b in buttons: + assert evdev.value[b.value] is None or evdev.value[b.value] == False + + assert self.current_state == PenState.from_evdev(evdev, button) class PenDigitizer(base.UHIDTestDevice): @@ -647,7 +539,7 @@ class PenDigitizer(base.UHIDTestDevice): continue self.fields = [f.usage_name for f in r] - def move_to(self, pen, state): + def move_to(self, pen, state, button): # fill in the previous values if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE: pen.restore() @@ -690,29 +582,17 @@ class PenDigitizer(base.UHIDTestDevice): pen.inrange = True pen.invert = False pen.eraser = False - pen.barrelswitch = True - pen.secondarybarrelswitch = False + assert button is not None + pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED + pen.secondarybarrelswitch = button == BtnPressed.SECONDARY_PRESSED elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON: pen.tipswitch = True pen.inrange = True pen.invert = False pen.eraser = False - pen.barrelswitch = True - pen.secondarybarrelswitch = False - elif state == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON: - pen.tipswitch = False - pen.inrange = True - pen.invert = False - pen.eraser = False - pen.barrelswitch = False - pen.secondarybarrelswitch = True - elif state == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON: - pen.tipswitch = True - pen.inrange = True - pen.invert = False - pen.eraser = False - pen.barrelswitch = False - pen.secondarybarrelswitch = True + assert button is not None + pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED + pen.secondarybarrelswitch = button == BtnPressed.SECONDARY_PRESSED elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT: pen.tipswitch = False pen.inrange = True @@ -730,7 +610,7 @@ class PenDigitizer(base.UHIDTestDevice): pen.current_state = state - def event(self, pen): + def event(self, pen, button): rs = [] r = self.create_report(application=self.cur_application, data=pen) self.call_input_event(r) @@ -771,17 +651,17 @@ class BaseTest: def create_device(self): raise Exception("please reimplement me in subclasses") - def post(self, uhdev, pen): - r = uhdev.event(pen) + def post(self, uhdev, pen, test_button): + r = uhdev.event(pen, test_button) events = uhdev.next_sync_events() self.debug_reports(r, uhdev, events) return events def validate_transitions( - self, from_state, pen, evdev, events, allow_intermediate_states + self, from_state, pen, evdev, events, allow_intermediate_states, button ): # check that the final state is correct - pen.assert_expected_input_events(evdev) + pen.assert_expected_input_events(evdev, button) state = from_state @@ -794,12 +674,14 @@ class BaseTest: events = events[idx + 1 :] # now check for a valid transition - state = state.apply(sync_events, not allow_intermediate_states) + state = state.apply(sync_events, not allow_intermediate_states, button) if events: - state = state.apply(sync_events, not allow_intermediate_states) + state = state.apply(sync_events, not allow_intermediate_states, button) - def _test_states(self, state_list, scribble, allow_intermediate_states): + def _test_states( + self, state_list, scribble, allow_intermediate_states, button=None + ): """Internal method to test against a list of transition between states. state_list is a list of PenState objects @@ -812,10 +694,10 @@ class BaseTest: cur_state = PenState.PEN_IS_OUT_OF_RANGE p = Pen(50, 60) - uhdev.move_to(p, PenState.PEN_IS_OUT_OF_RANGE) - events = self.post(uhdev, p) + uhdev.move_to(p, PenState.PEN_IS_OUT_OF_RANGE, button) + events = self.post(uhdev, p, button) self.validate_transitions( - cur_state, p, evdev, events, allow_intermediate_states + cur_state, p, evdev, events, allow_intermediate_states, button ) cur_state = p.current_state @@ -824,18 +706,18 @@ class BaseTest: if scribble and cur_state != PenState.PEN_IS_OUT_OF_RANGE: p.x += 1 p.y -= 1 - events = self.post(uhdev, p) + events = self.post(uhdev, p, button) self.validate_transitions( - cur_state, p, evdev, events, allow_intermediate_states + cur_state, p, evdev, events, allow_intermediate_states, button ) assert len(events) >= 3 # X, Y, SYN - uhdev.move_to(p, state) + uhdev.move_to(p, state, button) if scribble and state != PenState.PEN_IS_OUT_OF_RANGE: p.x += 1 p.y -= 1 - events = self.post(uhdev, p) + events = self.post(uhdev, p, button) self.validate_transitions( - cur_state, p, evdev, events, allow_intermediate_states + cur_state, p, evdev, events, allow_intermediate_states, button ) cur_state = p.current_state @@ -874,12 +756,17 @@ class BaseTest: "state_list", [ pytest.param(v, id=k) - for k, v in PenState.legal_transitions_with_primary_button().items() + for k, v in PenState.legal_transitions_with_button().items() ], ) def test_valid_primary_button_pen_states(self, state_list, scribble): """Rework the transition state machine by adding the primary button.""" - self._test_states(state_list, scribble, allow_intermediate_states=False) + self._test_states( + state_list, + scribble, + allow_intermediate_states=False, + button=BtnPressed.PRIMARY_PRESSED, + ) @pytest.mark.skip_if_uhdev( lambda uhdev: "Secondary Barrel Switch" not in uhdev.fields, @@ -890,12 +777,38 @@ class BaseTest: "state_list", [ pytest.param(v, id=k) - for k, v in PenState.legal_transitions_with_secondary_button().items() + for k, v in PenState.legal_transitions_with_button().items() ], ) def test_valid_secondary_button_pen_states(self, state_list, scribble): """Rework the transition state machine by adding the secondary button.""" - self._test_states(state_list, scribble, allow_intermediate_states=False) + self._test_states( + state_list, + scribble, + allow_intermediate_states=False, + button=BtnPressed.SECONDARY_PRESSED, + ) + + @pytest.mark.skip_if_uhdev( + lambda uhdev: "Third Barrel Switch" not in uhdev.fields, + "Device not compatible, missing Third Barrel Switch usage", + ) + @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"]) + @pytest.mark.parametrize( + "state_list", + [ + pytest.param(v, id=k) + for k, v in PenState.legal_transitions_with_button().items() + ], + ) + def test_valid_third_button_pen_states(self, state_list, scribble): + """Rework the transition state machine by adding the secondary button.""" + self._test_states( + state_list, + scribble, + allow_intermediate_states=False, + button=BtnPressed.THIRD_PRESSED, + ) @pytest.mark.skip_if_uhdev( lambda uhdev: "Invert" not in uhdev.fields, @@ -956,7 +869,7 @@ class BaseTest: class GXTP_pen(PenDigitizer): - def event(self, pen): + def event(self, pen, test_button): if not hasattr(self, "prev_tip_state"): self.prev_tip_state = False @@ -977,13 +890,407 @@ class GXTP_pen(PenDigitizer): if pen.eraser: internal_pen.invert = False - return super().event(internal_pen) + return super().event(internal_pen, test_button) class USIPen(PenDigitizer): pass +class XPPen_ArtistPro16Gen2_28bd_095b(PenDigitizer): + """ + Pen with two buttons and a rubber end, but which reports + the second button as an eraser + """ + + def __init__( + self, + name, + rdesc_str=None, + rdesc=None, + application="Pen", + physical="Stylus", + input_info=(BusType.USB, 0x28BD, 0x095B), + evdev_name_suffix=None, + ): + super().__init__( + name, rdesc_str, rdesc, application, physical, input_info, evdev_name_suffix + ) + self.fields.append("Secondary Barrel Switch") + + def move_to(self, pen, state, button): + # fill in the previous values + if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE: + pen.restore() + + print(f"\n *** pen is moving to {state} ***") + + if state == PenState.PEN_IS_OUT_OF_RANGE: + pen.backup() + pen.x = 0 + pen.y = 0 + pen.tipswitch = False + pen.tippressure = 0 + pen.azimuth = 0 + pen.inrange = False + pen.width = 0 + pen.height = 0 + pen.invert = False + pen.eraser = False + pen.xtilt = 0 + pen.ytilt = 0 + pen.twist = 0 + pen.barrelswitch = False + elif state == PenState.PEN_IS_IN_RANGE: + pen.tipswitch = False + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = False + elif state == PenState.PEN_IS_IN_CONTACT: + pen.tipswitch = True + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = False + elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON: + pen.tipswitch = False + pen.inrange = True + pen.invert = False + assert button is not None + pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED + pen.eraser = button == BtnPressed.SECONDARY_PRESSED + elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON: + pen.tipswitch = True + pen.inrange = True + pen.invert = False + assert button is not None + pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED + pen.eraser = button == BtnPressed.SECONDARY_PRESSED + elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT: + pen.tipswitch = False + pen.inrange = True + pen.invert = True + pen.eraser = False + pen.barrelswitch = False + elif state == PenState.PEN_IS_ERASING: + pen.tipswitch = True + pen.inrange = True + pen.invert = True + pen.eraser = False + pen.barrelswitch = False + + pen.current_state = state + + def event(self, pen, test_button): + import math + + pen_copy = copy.copy(pen) + width = 13.567 + height = 8.480 + tip_height = 0.055677699 + hx = tip_height * (32767 / width) + hy = tip_height * (32767 / height) + if pen_copy.xtilt != 0: + pen_copy.x += round(hx * math.sin(math.radians(pen_copy.xtilt))) + if pen_copy.ytilt != 0: + pen_copy.y += round(hy * math.sin(math.radians(pen_copy.ytilt))) + + return super().event(pen_copy, test_button) + + +class XPPen_Artist24_28bd_093a(PenDigitizer): + """ + Pen that reports secondary barrel switch through eraser + """ + + def __init__( + self, + name, + rdesc_str=None, + rdesc=None, + application="Pen", + physical="Stylus", + input_info=(BusType.USB, 0x28BD, 0x093A), + evdev_name_suffix=None, + ): + super().__init__( + name, rdesc_str, rdesc, application, physical, input_info, evdev_name_suffix + ) + self.fields.append("Secondary Barrel Switch") + self.previous_state = PenState.PEN_IS_OUT_OF_RANGE + + def move_to(self, pen, state, button, debug=True): + # fill in the previous values + if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE: + pen.restore() + + if debug: + print(f"\n *** pen is moving to {state} ***") + + if state == PenState.PEN_IS_OUT_OF_RANGE: + pen.backup() + pen.tipswitch = False + pen.tippressure = 0 + pen.azimuth = 0 + pen.inrange = False + pen.width = 0 + pen.height = 0 + pen.invert = False + pen.eraser = False + pen.xtilt = 0 + pen.ytilt = 0 + pen.twist = 0 + pen.barrelswitch = False + elif state == PenState.PEN_IS_IN_RANGE: + pen.tipswitch = False + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = False + elif state == PenState.PEN_IS_IN_CONTACT: + pen.tipswitch = True + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = False + elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON: + pen.tipswitch = False + pen.inrange = True + pen.invert = False + assert button is not None + pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED + pen.eraser = button == BtnPressed.SECONDARY_PRESSED + elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON: + pen.tipswitch = True + pen.inrange = True + pen.invert = False + assert button is not None + pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED + pen.eraser = button == BtnPressed.SECONDARY_PRESSED + + pen.current_state = state + + def send_intermediate_state(self, pen, state, button): + intermediate_pen = copy.copy(pen) + self.move_to(intermediate_pen, state, button, debug=False) + return super().event(intermediate_pen, button) + + def event(self, pen, button): + rs = [] + + # the pen reliably sends in-range events in a normal case (non emulation of eraser mode) + if self.previous_state == PenState.PEN_IS_IN_CONTACT: + if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE: + rs.extend( + self.send_intermediate_state(pen, PenState.PEN_IS_IN_RANGE, button) + ) + + if button == BtnPressed.SECONDARY_PRESSED: + if self.previous_state == PenState.PEN_IS_IN_RANGE: + if pen.current_state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON: + rs.extend( + self.send_intermediate_state( + pen, PenState.PEN_IS_OUT_OF_RANGE, button + ) + ) + + if self.previous_state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON: + if pen.current_state == PenState.PEN_IS_IN_RANGE: + rs.extend( + self.send_intermediate_state( + pen, PenState.PEN_IS_OUT_OF_RANGE, button + ) + ) + + if self.previous_state == PenState.PEN_IS_IN_CONTACT: + if pen.current_state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON: + rs.extend( + self.send_intermediate_state( + pen, PenState.PEN_IS_OUT_OF_RANGE, button + ) + ) + rs.extend( + self.send_intermediate_state( + pen, PenState.PEN_IS_IN_RANGE_WITH_BUTTON, button + ) + ) + + if self.previous_state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON: + if pen.current_state == PenState.PEN_IS_IN_CONTACT: + rs.extend( + self.send_intermediate_state( + pen, PenState.PEN_IS_OUT_OF_RANGE, button + ) + ) + rs.extend( + self.send_intermediate_state( + pen, PenState.PEN_IS_IN_RANGE, button + ) + ) + + rs.extend(super().event(pen, button)) + self.previous_state = pen.current_state + return rs + + +class Huion_Kamvas_Pro_19_256c_006b(PenDigitizer): + """ + Pen that reports secondary barrel switch through secondary TipSwtich + and 3rd button through Invert + """ + + def __init__( + self, + name, + rdesc_str=None, + rdesc=None, + application="Stylus", + physical=None, + input_info=(BusType.USB, 0x256C, 0x006B), + evdev_name_suffix=None, + ): + super().__init__( + name, rdesc_str, rdesc, application, physical, input_info, evdev_name_suffix + ) + self.fields.append("Secondary Barrel Switch") + self.fields.append("Third Barrel Switch") + self.previous_state = PenState.PEN_IS_OUT_OF_RANGE + + def move_to(self, pen, state, button, debug=True): + # fill in the previous values + if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE: + pen.restore() + + if debug: + print(f"\n *** pen is moving to {state} ***") + + if state == PenState.PEN_IS_OUT_OF_RANGE: + pen.backup() + pen.tipswitch = False + pen.tippressure = 0 + pen.azimuth = 0 + pen.inrange = False + pen.width = 0 + pen.height = 0 + pen.invert = False + pen.eraser = False + pen.xtilt = 0 + pen.ytilt = 0 + pen.twist = 0 + pen.barrelswitch = False + pen.secondarytipswitch = False + elif state == PenState.PEN_IS_IN_RANGE: + pen.tipswitch = False + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = False + pen.secondarytipswitch = False + elif state == PenState.PEN_IS_IN_CONTACT: + pen.tipswitch = True + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = False + pen.secondarytipswitch = False + elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON: + pen.tipswitch = False + pen.inrange = True + pen.eraser = False + assert button is not None + pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED + pen.secondarytipswitch = button == BtnPressed.SECONDARY_PRESSED + pen.invert = button == BtnPressed.THIRD_PRESSED + elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON: + pen.tipswitch = True + pen.inrange = True + pen.eraser = False + assert button is not None + pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED + pen.secondarytipswitch = button == BtnPressed.SECONDARY_PRESSED + pen.invert = button == BtnPressed.THIRD_PRESSED + elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT: + pen.tipswitch = False + pen.inrange = True + pen.invert = True + pen.eraser = False + pen.barrelswitch = False + pen.secondarytipswitch = False + elif state == PenState.PEN_IS_ERASING: + pen.tipswitch = False + pen.inrange = True + pen.invert = False + pen.eraser = True + pen.barrelswitch = False + pen.secondarytipswitch = False + + pen.current_state = state + + def call_input_event(self, report): + if report[0] == 0x0a: + # ensures the original second Eraser usage is null + report[1] &= 0xdf + + # ensures the original last bit is equal to bit 6 (In Range) + if report[1] & 0x40: + report[1] |= 0x80 + + super().call_input_event(report) + + def send_intermediate_state(self, pen, state, test_button): + intermediate_pen = copy.copy(pen) + self.move_to(intermediate_pen, state, test_button, debug=False) + return super().event(intermediate_pen, test_button) + + def event(self, pen, button): + rs = [] + + # it's not possible to go between eraser mode or not without + # going out-of-prox: the eraser mode is activated by presenting + # the tail of the pen + if self.previous_state in ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + ) and pen.current_state in ( + PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT, + PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON, + PenState.PEN_IS_ERASING, + PenState.PEN_IS_ERASING_WITH_BUTTON, + ): + rs.extend( + self.send_intermediate_state(pen, PenState.PEN_IS_OUT_OF_RANGE, button) + ) + + # same than above except from eraser to normal + if self.previous_state in ( + PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT, + PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON, + PenState.PEN_IS_ERASING, + PenState.PEN_IS_ERASING_WITH_BUTTON, + ) and pen.current_state in ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + ): + rs.extend( + self.send_intermediate_state(pen, PenState.PEN_IS_OUT_OF_RANGE, button) + ) + + if self.previous_state == PenState.PEN_IS_OUT_OF_RANGE: + if pen.current_state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON: + rs.extend( + self.send_intermediate_state(pen, PenState.PEN_IS_IN_RANGE, button) + ) + + rs.extend(super().event(pen, button)) + self.previous_state = pen.current_state + return rs + + ################################################################################ # # Windows 7 compatible devices @@ -1162,3 +1469,37 @@ class TestGoodix_27c6_0e00(BaseTest.TestTablet): rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 55 0e 65 11 35 00 15 00 09 42 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 95 01 75 08 09 51 81 02 75 10 05 01 26 04 20 46 e6 09 09 30 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 95 01 75 08 09 51 81 02 75 10 05 01 26 04 20 46 e6 09 09 30 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 95 01 75 08 09 51 81 02 75 10 05 01 26 04 20 46 e6 09 09 30 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 75 08 09 51 95 01 81 02 05 01 26 04 20 75 10 55 0e 65 11 09 30 35 00 46 e6 09 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 75 08 09 51 95 01 81 02 05 01 26 04 20 75 10 55 0e 65 11 09 30 35 00 46 e6 09 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 54 15 00 25 7f 75 08 95 01 81 02 85 02 09 55 95 01 25 0a b1 02 85 03 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 02 a1 01 09 20 a1 00 85 08 05 01 a4 09 30 35 00 46 e6 09 15 00 26 04 20 55 0d 65 13 75 10 95 01 81 02 09 31 46 9a 06 26 60 15 81 02 b4 05 0d 09 38 95 01 75 08 15 00 25 01 81 02 09 30 75 10 26 ff 0f 81 02 09 31 81 02 09 42 09 44 09 5a 09 3c 09 45 09 32 75 01 95 06 25 01 81 02 95 02 81 03 09 3d 55 0e 65 14 36 d8 dc 46 28 23 16 d8 dc 26 28 23 95 01 75 10 81 02 09 3e 81 02 09 41 15 00 27 a0 8c 00 00 35 00 47 a0 8c 00 00 81 02 05 20 0a 53 04 65 00 16 01 f8 26 ff 07 75 10 95 01 81 02 0a 54 04 81 02 0a 55 04 81 02 0a 57 04 81 02 0a 58 04 81 02 0a 59 04 81 02 0a 72 04 81 02 0a 73 04 81 02 0a 74 04 81 02 05 0d 09 3b 15 00 25 64 75 08 81 02 09 5b 25 ff 75 40 81 02 06 00 ff 09 5b 75 20 81 02 05 0d 09 5c 26 ff 00 75 08 81 02 09 5e 81 02 09 70 a1 02 15 01 25 06 09 72 09 73 09 74 09 75 09 76 09 77 81 20 c0 06 00 ff 09 01 15 00 27 ff ff 00 00 75 10 95 01 81 02 85 09 09 81 a1 02 09 81 15 01 25 04 09 82 09 83 09 84 09 85 81 20 c0 85 10 09 5c a1 02 15 00 25 01 75 08 95 01 09 38 b1 02 09 5c 26 ff 00 b1 02 09 5d 75 01 95 01 25 01 b1 02 95 07 b1 03 c0 85 11 09 5e a1 02 09 38 15 00 25 01 75 08 95 01 b1 02 09 5e 26 ff 00 b1 02 09 5f 75 01 25 01 b1 02 75 07 b1 03 c0 85 12 09 70 a1 02 75 08 95 01 15 00 25 01 09 38 b1 02 09 70 a1 02 25 06 09 72 09 73 09 74 09 75 09 76 09 77 b1 20 c0 09 71 75 01 25 01 b1 02 75 07 b1 03 c0 85 13 09 80 15 00 25 ff 75 40 95 01 b1 02 85 14 09 44 a1 02 09 38 75 08 95 01 25 01 b1 02 15 01 25 03 09 44 a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 09 5a a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 09 45 a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 c0 85 15 75 08 95 01 05 0d 09 90 a1 02 09 38 25 01 b1 02 09 91 75 10 26 ff 0f b1 02 09 92 75 40 25 ff b1 02 05 06 09 2a 75 08 26 ff 00 a1 02 09 2d b1 02 09 2e b1 02 c0 c0 85 16 05 06 09 2b a1 02 05 0d 25 01 09 38 b1 02 05 06 09 2b a1 02 09 2d 26 ff 00 b1 02 09 2e b1 02 c0 c0 85 17 06 00 ff 09 01 a1 02 05 0d 09 38 75 08 95 01 25 01 b1 02 06 00 ff 09 01 75 10 27 ff ff 00 00 b1 02 c0 85 18 05 0d 09 38 75 08 95 01 15 00 25 01 b1 02 c0 c0 06 f0 ff 09 01 a1 01 85 0e 09 01 15 00 25 ff 75 08 95 40 91 02 09 01 15 00 25 ff 75 08 95 40 81 02 c0", input_info=(BusType.I2C, 0x27C6, 0x0E00), ) + + +class TestXPPen_ArtistPro16Gen2_28bd_095b(BaseTest.TestTablet): + hid_bpfs = [("XPPen__ArtistPro16Gen2.bpf.o", True)] + + def create_device(self): + dev = XPPen_ArtistPro16Gen2_28bd_095b( + "uhid test XPPen Artist Pro 16 Gen2 28bd 095b", + rdesc="05 0d 09 02 a1 01 85 07 09 20 a1 00 09 42 09 44 09 45 09 3c 15 00 25 01 75 01 95 04 81 02 95 01 81 03 09 32 15 00 25 01 95 01 81 02 95 02 81 03 75 10 95 01 35 00 a4 05 01 09 30 65 13 55 0d 46 ff 34 26 ff 7f 81 02 09 31 46 20 21 26 ff 7f 81 02 b4 09 30 45 00 26 ff 3f 81 42 09 3d 15 81 25 7f 75 08 95 01 81 02 09 3e 15 81 25 7f 81 02 c0 c0", + input_info=(BusType.USB, 0x28BD, 0x095B), + ) + return dev + + +class TestXPPen_Artist24_28bd_093a(BaseTest.TestTablet): + hid_bpfs = [("XPPen__Artist24.bpf.o", True)] + + def create_device(self): + return XPPen_Artist24_28bd_093a( + "uhid test XPPen Artist 24 28bd 093a", + rdesc="05 0d 09 02 a1 01 85 07 09 20 a1 00 09 42 09 44 09 45 15 00 25 01 75 01 95 03 81 02 95 02 81 03 09 32 95 01 81 02 95 02 81 03 75 10 95 01 35 00 a4 05 01 09 30 65 13 55 0d 46 f0 50 26 ff 7f 81 02 09 31 46 91 2d 26 ff 7f 81 02 b4 09 30 45 00 26 ff 1f 81 42 09 3d 15 81 25 7f 75 08 95 01 81 02 09 3e 15 81 25 7f 81 02 c0 c0", + input_info=(BusType.USB, 0x28BD, 0x093A), + ) + + +class TestHuion_Kamvas_Pro_19_256c_006b(BaseTest.TestTablet): + hid_bpfs = [("Huion__Kamvas-Pro-19.bpf.o", True)] + + def create_device(self): + return Huion_Kamvas_Pro_19_256c_006b( + "uhid test HUION Huion Tablet_GT1902", + rdesc="05 0d 09 02 a1 01 85 0a 09 20 a1 01 09 42 09 44 09 43 09 3c 09 45 15 00 25 01 75 01 95 06 81 02 09 32 75 01 95 01 81 02 81 03 05 01 09 30 09 31 55 0d 65 33 26 ff 7f 35 00 46 00 08 75 10 95 02 81 02 05 0d 09 30 26 ff 3f 75 10 95 01 81 02 09 3d 09 3e 15 a6 25 5a 75 08 95 02 81 02 c0 c0 05 0d 09 04 a1 01 85 04 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 75 08 95 08 81 03 85 05 09 55 25 0a 75 08 95 01 b1 02 06 00 ff 09 c5 85 06 15 00 26 ff 00 75 08 96 00 01 b1 02 c0", + input_info=(BusType.USB, 0x256C, 0x006B), + ) diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index edf1c99c99..5f7d5a5ba8 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -1722,10 +1722,17 @@ FIXTURE_VARIANT(iommufd_dirty_tracking) FIXTURE_SETUP(iommufd_dirty_tracking) { + unsigned long size; int mmap_flags; void *vrc; int rc; + if (variant->buffer_size < MOCK_PAGE_SIZE) { + SKIP(return, + "Skipping buffer_size=%lu, less than MOCK_PAGE_SIZE=%lu", + variant->buffer_size, MOCK_PAGE_SIZE); + } + self->fd = open("/dev/iommu", O_RDWR); ASSERT_NE(-1, self->fd); @@ -1749,12 +1756,11 @@ FIXTURE_SETUP(iommufd_dirty_tracking) assert(vrc == self->buffer); self->page_size = MOCK_PAGE_SIZE; - self->bitmap_size = - variant->buffer_size / self->page_size / BITS_PER_BYTE; + self->bitmap_size = variant->buffer_size / self->page_size; /* Provision with an extra (PAGE_SIZE) for the unaligned case */ - rc = posix_memalign(&self->bitmap, PAGE_SIZE, - self->bitmap_size + PAGE_SIZE); + size = DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE); + rc = posix_memalign(&self->bitmap, PAGE_SIZE, size + PAGE_SIZE); assert(!rc); assert(self->bitmap); assert((uintptr_t)self->bitmap % PAGE_SIZE == 0); @@ -1775,51 +1781,63 @@ FIXTURE_SETUP(iommufd_dirty_tracking) FIXTURE_TEARDOWN(iommufd_dirty_tracking) { munmap(self->buffer, variant->buffer_size); - munmap(self->bitmap, self->bitmap_size); + munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE)); teardown_iommufd(self->fd, _metadata); } -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty8k) +{ + /* half of an u8 index bitmap */ + .buffer_size = 8UL * 1024UL, +}; + +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty16k) +{ + /* one u8 index bitmap */ + .buffer_size = 16UL * 1024UL, +}; + +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64k) { /* one u32 index bitmap */ - .buffer_size = 128UL * 1024UL, + .buffer_size = 64UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256k) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k) { /* one u64 index bitmap */ - .buffer_size = 256UL * 1024UL, + .buffer_size = 128UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty640k) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty320k) { /* two u64 index and trailing end bitmap */ - .buffer_size = 640UL * 1024UL, + .buffer_size = 320UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M) { - /* 4K bitmap (128M IOVA range) */ - .buffer_size = 128UL * 1024UL * 1024UL, + /* 4K bitmap (64M IOVA range) */ + .buffer_size = 64UL * 1024UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M_huge) { - /* 4K bitmap (128M IOVA range) */ - .buffer_size = 128UL * 1024UL * 1024UL, + /* 4K bitmap (64M IOVA range) */ + .buffer_size = 64UL * 1024UL * 1024UL, .hugepages = true, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M) { - /* 8K bitmap (256M IOVA range) */ - .buffer_size = 256UL * 1024UL * 1024UL, + /* 8K bitmap (128M IOVA range) */ + .buffer_size = 128UL * 1024UL * 1024UL, }; -FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M_huge) +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge) { - /* 8K bitmap (256M IOVA range) */ - .buffer_size = 256UL * 1024UL * 1024UL, + /* 8K bitmap (128M IOVA range) */ + .buffer_size = 128UL * 1024UL * 1024UL, .hugepages = true, }; diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 8d2b46b211..c612fbf019 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -22,6 +22,8 @@ #define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / __BITS_PER_LONG) +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) + static inline void set_bit(unsigned int nr, unsigned long *addr) { unsigned long mask = BIT_MASK(nr); @@ -346,12 +348,12 @@ static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length, static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, __u64 iova, size_t page_size, size_t pte_page_size, __u64 *bitmap, - __u64 bitmap_size, __u32 flags, + __u64 nbits, __u32 flags, struct __test_metadata *_metadata) { unsigned long npte = pte_page_size / page_size, pteset = 2 * npte; - unsigned long nbits = bitmap_size * BITS_PER_BYTE; unsigned long j, i, nr = nbits / pteset ?: 1; + unsigned long bitmap_size = DIV_ROUND_UP(nbits, BITS_PER_BYTE); __u64 out_dirty = 0; /* Mark all even bits as dirty in the mock domain */ diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index 656c43c240..c75ea40948 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -198,13 +198,12 @@ int main(int argc, char **argv) struct msgque_data msgque; if (getuid() != 0) - return ksft_exit_skip( - "Please run the test as root - Exiting.\n"); + ksft_exit_skip("Please run the test as root - Exiting.\n"); msgque.key = ftok(argv[0], 822155650); if (msgque.key == -1) { printf("Can't make key: %d\n", -errno); - return ksft_exit_fail(); + ksft_exit_fail(); } msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666); @@ -243,13 +242,13 @@ int main(int argc, char **argv) printf("Failed to test queue: %d\n", err); goto err_out; } - return ksft_exit_pass(); + ksft_exit_pass(); err_destroy: if (msgctl(msgque.msq_id, IPC_RMID, NULL)) { printf("Failed to destroy queue: %d\n", -errno); - return ksft_exit_fail(); + ksft_exit_fail(); } err_out: - return ksft_exit_fail(); + ksft_exit_fail(); } diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 14bbab0cce..76c2a6945d 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -16,10 +16,12 @@ * For each test, report any progress, debugging, etc with: * * ksft_print_msg(fmt, ...); + * ksft_perror(msg); * * and finally report the pass/fail/skip/xfail state of the test with one of: * * ksft_test_result(condition, fmt, ...); + * ksft_test_result_report(result, fmt, ...); * ksft_test_result_pass(fmt, ...); * ksft_test_result_fail(fmt, ...); * ksft_test_result_skip(fmt, ...); @@ -39,6 +41,7 @@ * the program is aborting before finishing all tests): * * ksft_exit_fail_msg(fmt, ...); + * ksft_exit_fail_perror(msg); * */ #ifndef __KSELFTEST_H @@ -305,13 +308,34 @@ void ksft_test_result_code(int exit_code, const char *test_name, printf("\n"); } -static inline __noreturn int ksft_exit_pass(void) +/** + * ksft_test_result() - Report test success based on truth of condition + * + * @condition: if true, report test success, otherwise failure. + */ +#define ksft_test_result_report(result, fmt, ...) do { \ + switch (result) { \ + case KSFT_PASS: \ + ksft_test_result_pass(fmt, ##__VA_ARGS__); \ + break; \ + case KSFT_FAIL: \ + ksft_test_result_fail(fmt, ##__VA_ARGS__); \ + break; \ + case KSFT_XFAIL: \ + ksft_test_result_xfail(fmt, ##__VA_ARGS__); \ + break; \ + case KSFT_SKIP: \ + ksft_test_result_skip(fmt, ##__VA_ARGS__); \ + break; \ + } } while (0) + +static inline __noreturn void ksft_exit_pass(void) { ksft_print_cnts(); exit(KSFT_PASS); } -static inline __noreturn int ksft_exit_fail(void) +static inline __noreturn void ksft_exit_fail(void) { ksft_print_cnts(); exit(KSFT_FAIL); @@ -338,7 +362,7 @@ static inline __noreturn int ksft_exit_fail(void) ksft_cnt.ksft_xfail + \ ksft_cnt.ksft_xskip) -static inline __noreturn __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...) +static inline __noreturn __printf(1, 2) void ksft_exit_fail_msg(const char *msg, ...) { int saved_errno = errno; va_list args; @@ -353,19 +377,32 @@ static inline __noreturn __printf(1, 2) int ksft_exit_fail_msg(const char *msg, exit(KSFT_FAIL); } -static inline __noreturn int ksft_exit_xfail(void) +static inline __noreturn void ksft_exit_fail_perror(const char *msg) +{ +#ifndef NOLIBC + ksft_exit_fail_msg("%s: %s (%d)\n", msg, strerror(errno), errno); +#else + /* + * nolibc doesn't provide strerror() and it seems + * inappropriate to add one, just print the errno. + */ + ksft_exit_fail_msg("%s: %d)\n", msg, errno); +#endif +} + +static inline __noreturn void ksft_exit_xfail(void) { ksft_print_cnts(); exit(KSFT_XFAIL); } -static inline __noreturn int ksft_exit_xpass(void) +static inline __noreturn void ksft_exit_xpass(void) { ksft_print_cnts(); exit(KSFT_XPASS); } -static inline __noreturn __printf(1, 2) int ksft_exit_skip(const char *msg, ...) +static inline __noreturn __printf(1, 2) void ksft_exit_skip(const char *msg, ...) { int saved_errno = errno; va_list args; diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh index de59cc8f03..487e49fdf2 100755 --- a/tools/testing/selftests/kselftest_deps.sh +++ b/tools/testing/selftests/kselftest_deps.sh @@ -244,6 +244,7 @@ l4_test() l5_test() { tests=$(find $(dirname "$test") -type f -name "*.mk") + [[ -z "${tests// }" ]] && return test_libs=$(grep "^IOURING_EXTRA_LIBS +\?=" $tests | \ cut -d "=" -f 2) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 741c7dc16a..ac280dcba9 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -45,6 +45,7 @@ LIBKVM_x86_64 += lib/x86_64/vmx.c LIBKVM_aarch64 += lib/aarch64/gic.c LIBKVM_aarch64 += lib/aarch64/gic_v3.c +LIBKVM_aarch64 += lib/aarch64/gic_v3_its.c LIBKVM_aarch64 += lib/aarch64/handlers.S LIBKVM_aarch64 += lib/aarch64/processor.c LIBKVM_aarch64 += lib/aarch64/spinlock.c @@ -120,6 +121,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test +TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test TEST_GEN_PROGS_x86_64 += x86_64/amx_test @@ -157,6 +159,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq +TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access TEST_GEN_PROGS_aarch64 += access_tracking_perf_test TEST_GEN_PROGS_aarch64 += arch_timer @@ -180,6 +183,7 @@ TEST_GEN_PROGS_s390x += s390x/sync_regs_test TEST_GEN_PROGS_s390x += s390x/tprot TEST_GEN_PROGS_s390x += s390x/cmma_test TEST_GEN_PROGS_s390x += s390x/debug_test +TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test TEST_GEN_PROGS_s390x += demand_paging_test TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += guest_print_test @@ -189,6 +193,8 @@ TEST_GEN_PROGS_s390x += rseq_test TEST_GEN_PROGS_s390x += set_memory_region_test TEST_GEN_PROGS_s390x += kvm_binary_stats_test +TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test +TEST_GEN_PROGS_riscv += riscv/ebreak_test TEST_GEN_PROGS_riscv += arch_timer TEST_GEN_PROGS_riscv += demand_paging_test TEST_GEN_PROGS_riscv += dirty_log_test @@ -225,8 +231,8 @@ LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ - -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \ - -fno-builtin-strnlen \ + -D_GNU_SOURCE -fno-builtin-memcmp -fno-builtin-memcpy \ + -fno-builtin-memset -fno-builtin-strnlen \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 4eaba83cdc..eeba1cc87f 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -5,18 +5,14 @@ * * Copyright (c) 2021, Google LLC. */ -#define _GNU_SOURCE - #include "arch_timer.h" #include "delay.h" #include "gic.h" #include "processor.h" #include "timer_test.h" +#include "ucall_common.h" #include "vgic.h" -#define GICD_BASE_GPA 0x8000000ULL -#define GICR_BASE_GPA 0x80A0000ULL - enum guest_stage { GUEST_STAGE_VTIMER_CVAL = 1, GUEST_STAGE_VTIMER_TVAL, @@ -149,8 +145,7 @@ static void guest_code(void) local_irq_disable(); - gic_init(GIC_V3, test_args.nr_vcpus, - (void *)GICD_BASE_GPA, (void *)GICR_BASE_GPA); + gic_init(GIC_V3, test_args.nr_vcpus); timer_set_ctl(VIRTUAL, CTL_IMASK); timer_set_ctl(PHYSICAL, CTL_IMASK); @@ -209,7 +204,7 @@ struct kvm_vm *test_vm_create(void) vcpu_init_descriptor_tables(vcpus[i]); test_init_timer_irq(vm); - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); + gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); /* Make all the test's cmdline args visible to the guest */ diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index 5972905275..d29b08198b 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -7,7 +7,6 @@ * hugetlbfs with a hole). It checks that the expected handling method is * called (e.g., uffd faults with the right address and write/read flag). */ -#define _GNU_SOURCE #include <linux/bitmap.h> #include <fcntl.h> #include <test_util.h> @@ -375,14 +374,14 @@ static void setup_uffd(struct kvm_vm *vm, struct test_params *p, *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0, pt_args.hva, pt_args.paging_size, - test->uffd_pt_handler); + 1, test->uffd_pt_handler); *data_uffd = NULL; if (test->uffd_data_handler) *data_uffd = uffd_setup_demand_paging(uffd_mode, 0, data_args.hva, data_args.paging_size, - test->uffd_data_handler); + 1, test->uffd_data_handler); } static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd, diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c index 9b004905d1..61731a950d 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/aarch64/psci_test.c @@ -11,9 +11,9 @@ * KVM_SYSTEM_EVENT_SUSPEND UAPI. */ -#define _GNU_SOURCE - +#include <linux/kernel.h> #include <linux/psci.h> +#include <asm/cputype.h> #include "kvm_util.h" #include "processor.h" diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c index 16e2338686..a7de39fa2a 100644 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c @@ -327,8 +327,8 @@ uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr) return ftr; } -static void test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, - const struct reg_ftr_bits *ftr_bits) +static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, + const struct reg_ftr_bits *ftr_bits) { uint8_t shift = ftr_bits->shift; uint64_t mask = ftr_bits->mask; @@ -346,6 +346,8 @@ static void test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg, vcpu_set_reg(vcpu, reg, val); vcpu_get_reg(vcpu, reg, &new_val); TEST_ASSERT_EQ(new_val, val); + + return new_val; } static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, @@ -374,7 +376,15 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg, TEST_ASSERT_EQ(val, old_val); } -static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only) +static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + +#define encoding_to_range_idx(encoding) \ + KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding), \ + sys_reg_CRn(encoding), sys_reg_CRm(encoding), \ + sys_reg_Op2(encoding)) + + +static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) { uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; struct reg_mask_range range = { @@ -398,9 +408,7 @@ static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only) int idx; /* Get the index to masks array for the idreg */ - idx = KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(reg_id), sys_reg_Op1(reg_id), - sys_reg_CRn(reg_id), sys_reg_CRm(reg_id), - sys_reg_Op2(reg_id)); + idx = encoding_to_range_idx(reg_id); for (int j = 0; ftr_bits[j].type != FTR_END; j++) { /* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */ @@ -414,7 +422,9 @@ static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only) TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask); test_reg_set_fail(vcpu, reg, &ftr_bits[j]); - test_reg_set_success(vcpu, reg, &ftr_bits[j]); + + test_reg_vals[idx] = test_reg_set_success(vcpu, reg, + &ftr_bits[j]); ksft_test_result_pass("%s\n", ftr_bits[j].name); } @@ -425,7 +435,6 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu) { bool done = false; struct ucall uc; - uint64_t val; while (!done) { vcpu_run(vcpu); @@ -436,8 +445,8 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu) break; case UCALL_SYNC: /* Make sure the written values are seen by guest */ - vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(uc.args[2]), &val); - TEST_ASSERT_EQ(val, uc.args[3]); + TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])], + uc.args[3]); break; case UCALL_DONE: done = true; @@ -448,13 +457,85 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu) } } +/* Politely lifted from arch/arm64/include/asm/cache.h */ +/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */ +#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1)) +#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level)) +#define CLIDR_CTYPE(clidr, level) \ + (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level)) + +static void test_clidr(struct kvm_vcpu *vcpu) +{ + uint64_t clidr; + int level; + + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), &clidr); + + /* find the first empty level in the cache hierarchy */ + for (level = 1; level < 7; level++) { + if (!CLIDR_CTYPE(clidr, level)) + break; + } + + /* + * If you have a mind-boggling 7 levels of cache, congratulations, you + * get to fix this. + */ + TEST_ASSERT(level <= 7, "can't find an empty level in cache hierarchy"); + + /* stick in a unified cache level */ + clidr |= BIT(2) << CLIDR_CTYPE_SHIFT(level); + + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), clidr); + test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr; +} + +static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu) +{ + u64 val; + + test_clidr(vcpu); + + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &val); + val++; + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val); + + test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val; + ksft_test_result_pass("%s\n", __func__); +} + +static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding) +{ + size_t idx = encoding_to_range_idx(encoding); + uint64_t observed; + + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding), &observed); + TEST_ASSERT_EQ(test_reg_vals[idx], observed); +} + +static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu) +{ + /* + * Calls KVM_ARM_VCPU_INIT behind the scenes, which will do an + * architectural reset of the vCPU. + */ + aarch64_vcpu_setup(vcpu, NULL); + + for (int i = 0; i < ARRAY_SIZE(test_regs); i++) + test_assert_id_reg_unchanged(vcpu, test_regs[i].reg); + + test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1); + + ksft_test_result_pass("%s\n", __func__); +} + int main(void) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; bool aarch64_only; uint64_t val, el0; - int ftr_cnt; + int test_cnt; TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES)); @@ -467,18 +548,22 @@ int main(void) ksft_print_header(); - ftr_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) + - ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) + - ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - - ARRAY_SIZE(test_regs); + test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) + + ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) + + ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - + ARRAY_SIZE(test_regs) + 2; - ksft_set_plan(ftr_cnt); + ksft_set_plan(test_cnt); + + test_vm_ftr_id_regs(vcpu, aarch64_only); + test_vcpu_ftr_id_regs(vcpu); - test_user_set_reg(vcpu, aarch64_only); test_guest_reg_read(vcpu); + test_reset_preserves_id_regs(vcpu); + kvm_vm_free(vm); ksft_finished(); diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c index ca917c71ff..b3b5fb0ff0 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_init.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c @@ -4,7 +4,6 @@ * * Copyright (C) 2020, Red Hat, Inc. */ -#define _GNU_SOURCE #include <linux/kernel.h> #include <sys/syscall.h> #include <asm/kvm.h> diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index 2e64b4856e..a51dbd2a5f 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -19,9 +19,6 @@ #include "gic_v3.h" #include "vgic.h" -#define GICD_BASE_GPA 0x08000000ULL -#define GICR_BASE_GPA 0x080A0000ULL - /* * Stores the user specified args; it's passed to the guest and to every test * function. @@ -49,9 +46,6 @@ struct test_args { #define IRQ_DEFAULT_PRIO (LOWEST_PRIO - 1) #define IRQ_DEFAULT_PRIO_REG (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */ -static void *dist = (void *)GICD_BASE_GPA; -static void *redist = (void *)GICR_BASE_GPA; - /* * The kvm_inject_* utilities are used by the guest to ask the host to inject * interrupts (e.g., using the KVM_IRQ_LINE ioctl). @@ -152,7 +146,7 @@ static void reset_stats(void) static uint64_t gic_read_ap1r0(void) { - uint64_t reg = read_sysreg_s(SYS_ICV_AP1R0_EL1); + uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1); dsb(sy); return reg; @@ -160,7 +154,7 @@ static uint64_t gic_read_ap1r0(void) static void gic_write_ap1r0(uint64_t val) { - write_sysreg_s(val, SYS_ICV_AP1R0_EL1); + write_sysreg_s(val, SYS_ICC_AP1R0_EL1); isb(); } @@ -478,7 +472,7 @@ static void guest_code(struct test_args *args) bool level_sensitive = args->level_sensitive; struct kvm_inject_desc *f, *inject_fns; - gic_init(GIC_V3, 1, dist, redist); + gic_init(GIC_V3, 1); for (i = 0; i < nr_irqs; i++) gic_irq_enable(i); @@ -764,8 +758,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); vcpu_args_set(vcpu, 1, args_gva); - gic_fd = vgic_v3_setup(vm, 1, nr_irqs, - GICD_BASE_GPA, GICR_BASE_GPA); + gic_fd = vgic_v3_setup(vm, 1, nr_irqs); __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping"); vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, diff --git a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c new file mode 100644 index 0000000000..fc4fe52fb6 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c @@ -0,0 +1,410 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * vgic_lpi_stress - Stress test for KVM's ITS emulation + * + * Copyright (c) 2024 Google LLC + */ + +#include <linux/sizes.h> +#include <pthread.h> +#include <stdatomic.h> +#include <sys/sysinfo.h> + +#include "kvm_util.h" +#include "gic.h" +#include "gic_v3.h" +#include "gic_v3_its.h" +#include "processor.h" +#include "ucall.h" +#include "vgic.h" + +#define TEST_MEMSLOT_INDEX 1 + +#define GIC_LPI_OFFSET 8192 + +static size_t nr_iterations = 1000; +static vm_paddr_t gpa_base; + +static struct kvm_vm *vm; +static struct kvm_vcpu **vcpus; +static int gic_fd, its_fd; + +static struct test_data { + bool request_vcpus_stop; + u32 nr_cpus; + u32 nr_devices; + u32 nr_event_ids; + + vm_paddr_t device_table; + vm_paddr_t collection_table; + vm_paddr_t cmdq_base; + void *cmdq_base_va; + vm_paddr_t itt_tables; + + vm_paddr_t lpi_prop_table; + vm_paddr_t lpi_pend_tables; +} test_data = { + .nr_cpus = 1, + .nr_devices = 1, + .nr_event_ids = 16, +}; + +static void guest_irq_handler(struct ex_regs *regs) +{ + u32 intid = gic_get_and_ack_irq(); + + if (intid == IAR_SPURIOUS) + return; + + GUEST_ASSERT(intid >= GIC_LPI_OFFSET); + gic_set_eoi(intid); +} + +static void guest_setup_its_mappings(void) +{ + u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET; + u32 nr_events = test_data.nr_event_ids; + u32 nr_devices = test_data.nr_devices; + u32 nr_cpus = test_data.nr_cpus; + + for (coll_id = 0; coll_id < nr_cpus; coll_id++) + its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true); + + /* Round-robin the LPIs to all of the vCPUs in the VM */ + coll_id = 0; + for (device_id = 0; device_id < nr_devices; device_id++) { + vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K); + + its_send_mapd_cmd(test_data.cmdq_base_va, device_id, + itt_base, SZ_64K, true); + + for (event_id = 0; event_id < nr_events; event_id++) { + its_send_mapti_cmd(test_data.cmdq_base_va, device_id, + event_id, coll_id, intid++); + + coll_id = (coll_id + 1) % test_data.nr_cpus; + } + } +} + +static void guest_invalidate_all_rdists(void) +{ + int i; + + for (i = 0; i < test_data.nr_cpus; i++) + its_send_invall_cmd(test_data.cmdq_base_va, i); +} + +static void guest_setup_gic(void) +{ + static atomic_int nr_cpus_ready = 0; + u32 cpuid = guest_get_vcpuid(); + + gic_init(GIC_V3, test_data.nr_cpus); + gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K, + test_data.lpi_pend_tables + (cpuid * SZ_64K)); + + atomic_fetch_add(&nr_cpus_ready, 1); + + if (cpuid > 0) + return; + + while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus) + cpu_relax(); + + its_init(test_data.collection_table, SZ_64K, + test_data.device_table, SZ_64K, + test_data.cmdq_base, SZ_64K); + + guest_setup_its_mappings(); + guest_invalidate_all_rdists(); +} + +static void guest_code(size_t nr_lpis) +{ + guest_setup_gic(); + + GUEST_SYNC(0); + + /* + * Don't use WFI here to avoid blocking the vCPU thread indefinitely and + * never getting the stop signal. + */ + while (!READ_ONCE(test_data.request_vcpus_stop)) + cpu_relax(); + + GUEST_DONE(); +} + +static void setup_memslot(void) +{ + size_t pages; + size_t sz; + + /* + * For the ITS: + * - A single level device table + * - A single level collection table + * - The command queue + * - An ITT for each device + */ + sz = (3 + test_data.nr_devices) * SZ_64K; + + /* + * For the redistributors: + * - A shared LPI configuration table + * - An LPI pending table for each vCPU + */ + sz += (1 + test_data.nr_cpus) * SZ_64K; + + pages = sz / vm->page_size; + gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz; + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base, + TEST_MEMSLOT_INDEX, pages, 0); +} + +#define LPI_PROP_DEFAULT_PRIO 0xa0 + +static void configure_lpis(void) +{ + size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids; + u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table); + size_t i; + + for (i = 0; i < nr_lpis; i++) { + tbl[i] = LPI_PROP_DEFAULT_PRIO | + LPI_PROP_GROUP1 | + LPI_PROP_ENABLED; + } +} + +static void setup_test_data(void) +{ + size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K); + u32 nr_devices = test_data.nr_devices; + u32 nr_cpus = test_data.nr_cpus; + vm_paddr_t cmdq_base; + + test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k, + gpa_base, + TEST_MEMSLOT_INDEX); + + test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k, + gpa_base, + TEST_MEMSLOT_INDEX); + + cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base, + TEST_MEMSLOT_INDEX); + virt_map(vm, cmdq_base, cmdq_base, pages_per_64k); + test_data.cmdq_base = cmdq_base; + test_data.cmdq_base_va = (void *)cmdq_base; + + test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices, + gpa_base, TEST_MEMSLOT_INDEX); + + test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k, + gpa_base, TEST_MEMSLOT_INDEX); + configure_lpis(); + + test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus, + gpa_base, TEST_MEMSLOT_INDEX); + + sync_global_to_guest(vm, test_data); +} + +static void setup_gic(void) +{ + gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64); + __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3"); + + its_fd = vgic_its_setup(vm); +} + +static void signal_lpi(u32 device_id, u32 event_id) +{ + vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER; + + struct kvm_msi msi = { + .address_lo = db_addr, + .address_hi = db_addr >> 32, + .data = event_id, + .devid = device_id, + .flags = KVM_MSI_VALID_DEVID, + }; + + /* + * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM, + * which for arm64 implies having a valid translation in the ITS. + */ + TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1, + "KVM_SIGNAL_MSI ioctl failed"); +} + +static pthread_barrier_t test_setup_barrier; + +static void *lpi_worker_thread(void *data) +{ + u32 device_id = (size_t)data; + u32 event_id; + size_t i; + + pthread_barrier_wait(&test_setup_barrier); + + for (i = 0; i < nr_iterations; i++) + for (event_id = 0; event_id < test_data.nr_event_ids; event_id++) + signal_lpi(device_id, event_id); + + return NULL; +} + +static void *vcpu_worker_thread(void *data) +{ + struct kvm_vcpu *vcpu = data; + struct ucall uc; + + while (true) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + pthread_barrier_wait(&test_setup_barrier); + continue; + case UCALL_DONE: + return NULL; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + TEST_FAIL("Unknown ucall: %lu", uc.cmd); + } + } + + return NULL; +} + +static void report_stats(struct timespec delta) +{ + double nr_lpis; + double time; + + nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations; + + time = delta.tv_sec; + time += ((double)delta.tv_nsec) / NSEC_PER_SEC; + + pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time); +} + +static void run_test(void) +{ + u32 nr_devices = test_data.nr_devices; + u32 nr_vcpus = test_data.nr_cpus; + pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t)); + pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t)); + struct timespec start, delta; + size_t i; + + TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays"); + + pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1); + + for (i = 0; i < nr_vcpus; i++) + pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]); + + for (i = 0; i < nr_devices; i++) + pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i); + + pthread_barrier_wait(&test_setup_barrier); + + clock_gettime(CLOCK_MONOTONIC, &start); + + for (i = 0; i < nr_devices; i++) + pthread_join(lpi_threads[i], NULL); + + delta = timespec_elapsed(start); + write_guest_global(vm, test_data.request_vcpus_stop, true); + + for (i = 0; i < nr_vcpus; i++) + pthread_join(vcpu_threads[i], NULL); + + report_stats(delta); +} + +static void setup_vm(void) +{ + int i; + + vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu)); + TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); + + vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus); + + vm_init_descriptor_tables(vm); + for (i = 0; i < test_data.nr_cpus; i++) + vcpu_init_descriptor_tables(vcpus[i]); + + vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler); + + setup_memslot(); + + setup_gic(); + + setup_test_data(); +} + +static void destroy_vm(void) +{ + close(its_fd); + close(gic_fd); + kvm_vm_free(vm); + free(vcpus); +} + +static void pr_usage(const char *name) +{ + pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name); + pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus); + pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices); + pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids); + pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations); +} + +int main(int argc, char **argv) +{ + u32 nr_threads; + int c; + + while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) { + switch (c) { + case 'v': + test_data.nr_cpus = atoi(optarg); + break; + case 'd': + test_data.nr_devices = atoi(optarg); + break; + case 'e': + test_data.nr_event_ids = atoi(optarg); + break; + case 'i': + nr_iterations = strtoul(optarg, NULL, 0); + break; + case 'h': + default: + pr_usage(argv[0]); + return 1; + } + } + + nr_threads = test_data.nr_cpus + test_data.nr_devices; + if (nr_threads > get_nprocs()) + pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n", + nr_threads, get_nprocs()); + + setup_vm(); + + run_test(); + + destroy_vm(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c index f2fb0e3f14..d31b9f64ba 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c @@ -404,9 +404,6 @@ static void guest_code(uint64_t expected_pmcr_n) GUEST_DONE(); } -#define GICD_BASE_GPA 0x8000000ULL -#define GICR_BASE_GPA 0x80A0000ULL - /* Create a VM that has one vCPU with PMUv3 configured. */ static void create_vpmu_vm(void *guest_code) { @@ -438,8 +435,7 @@ static void create_vpmu_vm(void *guest_code) init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3); vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code); vcpu_init_descriptor_tables(vpmu_vm.vcpu); - vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64, - GICD_BASE_GPA, GICR_BASE_GPA); + vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64); __TEST_REQUIRE(vpmu_vm.gic_fd >= 0, "Failed to create vgic-v3, skipping"); diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c index ae1f1a6d83..acb2cb5963 100644 --- a/tools/testing/selftests/kvm/arch_timer.c +++ b/tools/testing/selftests/kvm/arch_timer.c @@ -19,9 +19,6 @@ * * Copyright (c) 2021, Google LLC. */ - -#define _GNU_SOURCE - #include <stdlib.h> #include <pthread.h> #include <linux/sizes.h> @@ -29,6 +26,7 @@ #include <sys/sysinfo.h> #include "timer_test.h" +#include "ucall_common.h" struct test_args test_args = { .nr_vcpus = NR_VCPUS_DEF, diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index bf3609f718..0202b78f86 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -6,14 +6,10 @@ * Copyright (C) 2018, Red Hat, Inc. * Copyright (C) 2019, Google, Inc. */ - -#define _GNU_SOURCE /* for pipe2 */ - #include <inttypes.h> #include <stdio.h> #include <stdlib.h> #include <time.h> -#include <poll.h> #include <pthread.h> #include <linux/userfaultfd.h> #include <sys/syscall.h> @@ -22,6 +18,7 @@ #include "test_util.h" #include "memstress.h" #include "guest_modes.h" +#include "ucall_common.h" #include "userfaultfd_util.h" #ifdef __NR_userfaultfd @@ -77,8 +74,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, copy.mode = 0; r = ioctl(uffd, UFFDIO_COPY, ©); - if (r == -1) { - pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n", + /* + * With multiple vCPU threads fault on a single page and there are + * multiple readers for the UFFD, at least one of the UFFDIO_COPYs + * will fail with EEXIST: handle that case without signaling an + * error. + * + * Note that this also suppress any EEXISTs occurring from, + * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never + * happens here, but a realistic VMM might potentially maintain + * some external state to correctly surface EEXISTs to userspace + * (or prevent duplicate COPY/CONTINUEs in the first place). + */ + if (r == -1 && errno != EEXIST) { + pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d, errno = %d\n", addr, tid, errno); return r; } @@ -89,8 +98,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, cont.range.len = demand_paging_size; r = ioctl(uffd, UFFDIO_CONTINUE, &cont); - if (r == -1) { - pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n", + /* + * With multiple vCPU threads fault on a single page and there are + * multiple readers for the UFFD, at least one of the UFFDIO_COPYs + * will fail with EEXIST: handle that case without signaling an + * error. + * + * Note that this also suppress any EEXISTs occurring from, + * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never + * happens here, but a realistic VMM might potentially maintain + * some external state to correctly surface EEXISTs to userspace + * (or prevent duplicate COPY/CONTINUEs in the first place). + */ + if (r == -1 && errno != EEXIST) { + pr_info("Failed UFFDIO_CONTINUE in 0x%lx, thread %d, errno = %d\n", addr, tid, errno); return r; } @@ -110,7 +131,9 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, struct test_params { int uffd_mode; + bool single_uffd; useconds_t uffd_delay; + int readers_per_uffd; enum vm_mem_backing_src_type src_type; bool partition_vcpu_memory_access; }; @@ -131,10 +154,12 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct memstress_vcpu_args *vcpu_args; struct test_params *p = arg; struct uffd_desc **uffd_descs = NULL; + uint64_t uffd_region_size; struct timespec start; struct timespec ts_diff; + double vcpu_paging_rate; struct kvm_vm *vm; - int i; + int i, num_uffds = 0; vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, p->src_type, p->partition_vcpu_memory_access); @@ -147,7 +172,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) memset(guest_data_prototype, 0xAB, demand_paging_size); if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) { - for (i = 0; i < nr_vcpus; i++) { + num_uffds = p->single_uffd ? 1 : nr_vcpus; + for (i = 0; i < num_uffds; i++) { vcpu_args = &memstress_args.vcpu_args[i]; prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa), vcpu_args->pages * memstress_args.guest_page_size); @@ -155,9 +181,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) } if (p->uffd_mode) { - uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *)); + num_uffds = p->single_uffd ? 1 : nr_vcpus; + uffd_region_size = nr_vcpus * guest_percpu_mem_size / num_uffds; + + uffd_descs = malloc(num_uffds * sizeof(struct uffd_desc *)); TEST_ASSERT(uffd_descs, "Memory allocation failed"); - for (i = 0; i < nr_vcpus; i++) { + for (i = 0; i < num_uffds; i++) { + struct memstress_vcpu_args *vcpu_args; void *vcpu_hva; vcpu_args = &memstress_args.vcpu_args[i]; @@ -170,7 +200,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) */ uffd_descs[i] = uffd_setup_demand_paging( p->uffd_mode, p->uffd_delay, vcpu_hva, - vcpu_args->pages * memstress_args.guest_page_size, + uffd_region_size, + p->readers_per_uffd, &handle_uffd_page_request); } } @@ -187,15 +218,19 @@ static void run_test(enum vm_guest_mode mode, void *arg) if (p->uffd_mode) { /* Tell the user fault fd handler threads to quit */ - for (i = 0; i < nr_vcpus; i++) + for (i = 0; i < num_uffds; i++) uffd_stop_demand_paging(uffd_descs[i]); } - pr_info("Total guest execution time: %ld.%.9lds\n", + pr_info("Total guest execution time:\t%ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); - pr_info("Overall demand paging rate: %f pgs/sec\n", - memstress_args.vcpu_args[0].pages * nr_vcpus / - ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC)); + + vcpu_paging_rate = memstress_args.vcpu_args[0].pages / + ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC); + pr_info("Per-vcpu demand paging rate:\t%f pgs/sec/vcpu\n", + vcpu_paging_rate); + pr_info("Overall demand paging rate:\t%f pgs/sec\n", + vcpu_paging_rate * nr_vcpus); memstress_destroy_vm(vm); @@ -207,15 +242,20 @@ static void run_test(enum vm_guest_mode mode, void *arg) static void help(char *name) { puts(""); - printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n" - " [-b memory] [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name); + printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-a]\n" + " [-d uffd_delay_usec] [-r readers_per_uffd] [-b memory]\n" + " [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name); guest_modes_help(); printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n" " UFFD registration mode: 'MISSING' or 'MINOR'.\n"); kvm_print_vcpu_pinning_help(); + printf(" -a: Use a single userfaultfd for all of guest memory, instead of\n" + " creating one for each region paged by a unique vCPU\n" + " Set implicitly with -o, and no effect without -u.\n"); printf(" -d: add a delay in usec to the User Fault\n" " FD handler to simulate demand paging\n" " overheads. Ignored without -u.\n"); + printf(" -r: Set the number of reader threads per uffd.\n"); printf(" -b: specify the size of the memory region which should be\n" " demand paged by each vCPU. e.g. 10M or 3G.\n" " Default: 1G\n"); @@ -234,12 +274,14 @@ int main(int argc, char *argv[]) struct test_params p = { .src_type = DEFAULT_VM_MEM_SRC, .partition_vcpu_memory_access = true, + .readers_per_uffd = 1, + .single_uffd = false, }; int opt; guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:c:o")) != -1) { + while ((opt = getopt(argc, argv, "ahom:u:d:b:s:v:c:r:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); @@ -251,6 +293,9 @@ int main(int argc, char *argv[]) p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR; TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'."); break; + case 'a': + p.single_uffd = true; + break; case 'd': p.uffd_delay = strtoul(optarg, NULL, 0); TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported."); @@ -271,6 +316,13 @@ int main(int argc, char *argv[]) break; case 'o': p.partition_vcpu_memory_access = false; + p.single_uffd = true; + break; + case 'r': + p.readers_per_uffd = atoi(optarg); + TEST_ASSERT(p.readers_per_uffd >= 1, + "Invalid number of readers per uffd %d: must be >=1", + p.readers_per_uffd); break; case 'h': default: diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 504f6fe980..9f24303acb 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -18,13 +18,11 @@ #include "test_util.h" #include "memstress.h" #include "guest_modes.h" +#include "ucall_common.h" #ifdef __aarch64__ #include "aarch64/vgic.h" -#define GICD_BASE_GPA 0x8000000ULL -#define GICR_BASE_GPA 0x80A0000ULL - static int gic_fd; static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) @@ -33,7 +31,7 @@ static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus) * The test can still run even if hardware does not support GICv3, as it * is only an optimization to reduce guest exits. */ - gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); + gic_fd = vgic_v3_setup(vm, nr_vcpus, 64); } static void arch_cleanup_vm(struct kvm_vm *vm) @@ -132,7 +130,6 @@ struct test_params { enum vm_mem_backing_src_type backing_src; int slots; uint32_t write_percent; - uint32_t random_seed; bool random_access; }; @@ -156,8 +153,6 @@ static void run_test(enum vm_guest_mode mode, void *arg) p->slots, p->backing_src, p->partition_vcpu_memory_access); - pr_info("Random seed: %u\n", p->random_seed); - memstress_set_random_seed(vm, p->random_seed); memstress_set_write_percent(vm, p->write_percent); guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift; @@ -346,11 +341,13 @@ int main(int argc, char *argv[]) .partition_vcpu_memory_access = true, .backing_src = DEFAULT_VM_MEM_SRC, .slots = 1, - .random_seed = 1, .write_percent = 100, }; int opt; + /* Override the seed to be deterministic by default. */ + guest_random_seed = 1; + dirty_log_manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | @@ -395,7 +392,7 @@ int main(int argc, char *argv[]) p.phys_offset = strtoull(optarg, NULL, 0); break; case 'r': - p.random_seed = atoi_positive("Random seed", optarg); + guest_random_seed = atoi_positive("Random seed", optarg); break; case 's': p.backing_src = parse_backing_src_type(optarg); diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index eaad5b2085..aacf80f574 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -4,9 +4,6 @@ * * Copyright (C) 2018, Red Hat, Inc. */ - -#define _GNU_SOURCE /* for program_invocation_name */ - #include <stdio.h> #include <stdlib.h> #include <pthread.h> @@ -23,6 +20,7 @@ #include "test_util.h" #include "guest_modes.h" #include "processor.h" +#include "ucall_common.h" #define DIRTY_MEM_BITS 30 /* 1G */ #define PAGE_SHIFT_4K 12 @@ -76,7 +74,6 @@ static uint64_t host_page_size; static uint64_t guest_page_size; static uint64_t guest_num_pages; -static uint64_t random_array[TEST_PAGES_PER_LOOP]; static uint64_t iteration; /* @@ -109,19 +106,19 @@ static void guest_code(void) */ for (i = 0; i < guest_num_pages; i++) { addr = guest_test_virt_mem + i * guest_page_size; - *(uint64_t *)addr = READ_ONCE(iteration); + vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); } while (true) { for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { addr = guest_test_virt_mem; - addr += (READ_ONCE(random_array[i]) % guest_num_pages) + addr += (guest_random_u64(&guest_rng) % guest_num_pages) * guest_page_size; addr = align_down(addr, host_page_size); - *(uint64_t *)addr = READ_ONCE(iteration); + + vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration)); } - /* Tell the host that we need more random numbers */ GUEST_SYNC(1); } } @@ -508,20 +505,10 @@ static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) mode->after_vcpu_run(vcpu, ret, err); } -static void generate_random_array(uint64_t *guest_array, uint64_t size) -{ - uint64_t i; - - for (i = 0; i < size; i++) - guest_array[i] = random(); -} - static void *vcpu_worker(void *data) { int ret; struct kvm_vcpu *vcpu = data; - struct kvm_vm *vm = vcpu->vm; - uint64_t *guest_array; uint64_t pages_count = 0; struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset) + sizeof(sigset_t)); @@ -540,11 +527,8 @@ static void *vcpu_worker(void *data) sigemptyset(sigset); sigaddset(sigset, SIG_IPI); - guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array); - while (!READ_ONCE(host_quit)) { /* Clear any existing kick signals */ - generate_random_array(guest_array, TEST_PAGES_PER_LOOP); pages_count += TEST_PAGES_PER_LOOP; /* Let the guest dirty the random pages */ ret = __vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index 92eae206ba..ba0c8e9960 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -4,8 +4,6 @@ * * Author: Chao Peng <chao.p.peng@linux.intel.com> */ - -#define _GNU_SOURCE #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -19,8 +17,8 @@ #include <sys/types.h> #include <sys/stat.h> +#include "kvm_util.h" #include "test_util.h" -#include "kvm_util_base.h" static void test_file_read_write(int fd) { diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c index 3502caa359..8092c2d0f5 100644 --- a/tools/testing/selftests/kvm/guest_print_test.c +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -13,6 +13,7 @@ #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "ucall_common.h" struct guest_vals { uint64_t a; diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index decc521fc7..bce73bcb97 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -4,9 +4,6 @@ * kvm_arch_hardware_disable is called and it attempts to unregister the user * return notifiers. */ - -#define _GNU_SOURCE - #include <fcntl.h> #include <pthread.h> #include <semaphore.h> diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h index b217ea17ca..baeb3c8593 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic.h +++ b/tools/testing/selftests/kvm/include/aarch64/gic.h @@ -6,11 +6,26 @@ #ifndef SELFTEST_KVM_GIC_H #define SELFTEST_KVM_GIC_H +#include <asm/kvm.h> + enum gic_type { GIC_V3, GIC_TYPE_MAX, }; +/* + * Note that the redistributor frames are at the end, as the range scales + * with the number of vCPUs in the VM. + */ +#define GITS_BASE_GPA 0x8000000ULL +#define GICD_BASE_GPA (GITS_BASE_GPA + KVM_VGIC_V3_ITS_SIZE) +#define GICR_BASE_GPA (GICD_BASE_GPA + KVM_VGIC_V3_DIST_SIZE) + +/* The GIC is identity-mapped into the guest at the time of setup. */ +#define GITS_BASE_GVA ((volatile void *)GITS_BASE_GPA) +#define GICD_BASE_GVA ((volatile void *)GICD_BASE_GPA) +#define GICR_BASE_GVA ((volatile void *)GICR_BASE_GPA) + #define MIN_SGI 0 #define MIN_PPI 16 #define MIN_SPI 32 @@ -21,8 +36,7 @@ enum gic_type { #define INTID_IS_PPI(intid) (MIN_PPI <= (intid) && (intid) < MIN_SPI) #define INTID_IS_SPI(intid) (MIN_SPI <= (intid) && (intid) <= MAX_SPI) -void gic_init(enum gic_type type, unsigned int nr_cpus, - void *dist_base, void *redist_base); +void gic_init(enum gic_type type, unsigned int nr_cpus); void gic_irq_enable(unsigned int intid); void gic_irq_disable(unsigned int intid); unsigned int gic_get_and_ack_irq(void); @@ -44,4 +58,7 @@ void gic_irq_clear_pending(unsigned int intid); bool gic_irq_get_pending(unsigned int intid); void gic_irq_set_config(unsigned int intid, bool is_edge); +void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, + vm_paddr_t pend_table); + #endif /* SELFTEST_KVM_GIC_H */ diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h index ba0886e8a2..a76615fa39 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h +++ b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h @@ -1,82 +1,604 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * ARM Generic Interrupt Controller (GIC) v3 specific defines + * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier <marc.zyngier@arm.com> */ - -#ifndef SELFTEST_KVM_GICV3_H -#define SELFTEST_KVM_GICV3_H - -#include <asm/sysreg.h> +#ifndef __SELFTESTS_GIC_V3_H +#define __SELFTESTS_GIC_V3_H /* - * Distributor registers + * Distributor registers. We assume we're running non-secure, with ARE + * being set. Secure-only and non-ARE registers are not described. */ #define GICD_CTLR 0x0000 #define GICD_TYPER 0x0004 +#define GICD_IIDR 0x0008 +#define GICD_TYPER2 0x000C +#define GICD_STATUSR 0x0010 +#define GICD_SETSPI_NSR 0x0040 +#define GICD_CLRSPI_NSR 0x0048 +#define GICD_SETSPI_SR 0x0050 +#define GICD_CLRSPI_SR 0x0058 #define GICD_IGROUPR 0x0080 #define GICD_ISENABLER 0x0100 #define GICD_ICENABLER 0x0180 #define GICD_ISPENDR 0x0200 #define GICD_ICPENDR 0x0280 -#define GICD_ICACTIVER 0x0380 #define GICD_ISACTIVER 0x0300 +#define GICD_ICACTIVER 0x0380 #define GICD_IPRIORITYR 0x0400 #define GICD_ICFGR 0x0C00 +#define GICD_IGRPMODR 0x0D00 +#define GICD_NSACR 0x0E00 +#define GICD_IGROUPRnE 0x1000 +#define GICD_ISENABLERnE 0x1200 +#define GICD_ICENABLERnE 0x1400 +#define GICD_ISPENDRnE 0x1600 +#define GICD_ICPENDRnE 0x1800 +#define GICD_ISACTIVERnE 0x1A00 +#define GICD_ICACTIVERnE 0x1C00 +#define GICD_IPRIORITYRnE 0x2000 +#define GICD_ICFGRnE 0x3000 +#define GICD_IROUTER 0x6000 +#define GICD_IROUTERnE 0x8000 +#define GICD_IDREGS 0xFFD0 +#define GICD_PIDR2 0xFFE8 + +#define ESPI_BASE_INTID 4096 /* - * The assumption is that the guest runs in a non-secure mode. - * The following bits of GICD_CTLR are defined accordingly. + * Those registers are actually from GICv2, but the spec demands that they + * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3). */ +#define GICD_ITARGETSR 0x0800 +#define GICD_SGIR 0x0F00 +#define GICD_CPENDSGIR 0x0F10 +#define GICD_SPENDSGIR 0x0F20 + #define GICD_CTLR_RWP (1U << 31) #define GICD_CTLR_nASSGIreq (1U << 8) +#define GICD_CTLR_DS (1U << 6) #define GICD_CTLR_ARE_NS (1U << 4) #define GICD_CTLR_ENABLE_G1A (1U << 1) #define GICD_CTLR_ENABLE_G1 (1U << 0) +#define GICD_IIDR_IMPLEMENTER_SHIFT 0 +#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT) +#define GICD_IIDR_REVISION_SHIFT 12 +#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT) +#define GICD_IIDR_VARIANT_SHIFT 16 +#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT) +#define GICD_IIDR_PRODUCT_ID_SHIFT 24 +#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT) + + +/* + * In systems with a single security state (what we emulate in KVM) + * the meaning of the interrupt group enable bits is slightly different + */ +#define GICD_CTLR_ENABLE_SS_G1 (1U << 1) +#define GICD_CTLR_ENABLE_SS_G0 (1U << 0) + +#define GICD_TYPER_RSS (1U << 26) +#define GICD_TYPER_LPIS (1U << 17) +#define GICD_TYPER_MBIS (1U << 16) +#define GICD_TYPER_ESPI (1U << 8) + +#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) +#define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1) #define GICD_TYPER_SPIS(typer) ((((typer) & 0x1f) + 1) * 32) -#define GICD_INT_DEF_PRI_X4 0xa0a0a0a0 +#define GICD_TYPER_ESPIS(typer) \ + (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0) + +#define GICD_TYPER2_nASSGIcap (1U << 8) +#define GICD_TYPER2_VIL (1U << 7) +#define GICD_TYPER2_VID GENMASK(4, 0) + +#define GICD_IROUTER_SPI_MODE_ONE (0U << 31) +#define GICD_IROUTER_SPI_MODE_ANY (1U << 31) + +#define GIC_PIDR2_ARCH_MASK 0xf0 +#define GIC_PIDR2_ARCH_GICv3 0x30 +#define GIC_PIDR2_ARCH_GICv4 0x40 + +#define GIC_V3_DIST_SIZE 0x10000 + +#define GIC_PAGE_SIZE_4K 0ULL +#define GIC_PAGE_SIZE_16K 1ULL +#define GIC_PAGE_SIZE_64K 2ULL +#define GIC_PAGE_SIZE_MASK 3ULL /* - * Redistributor registers + * Re-Distributor registers, offsets from RD_base */ -#define GICR_CTLR 0x000 -#define GICR_WAKER 0x014 +#define GICR_CTLR GICD_CTLR +#define GICR_IIDR 0x0004 +#define GICR_TYPER 0x0008 +#define GICR_STATUSR GICD_STATUSR +#define GICR_WAKER 0x0014 +#define GICR_SETLPIR 0x0040 +#define GICR_CLRLPIR 0x0048 +#define GICR_PROPBASER 0x0070 +#define GICR_PENDBASER 0x0078 +#define GICR_INVLPIR 0x00A0 +#define GICR_INVALLR 0x00B0 +#define GICR_SYNCR 0x00C0 +#define GICR_IDREGS GICD_IDREGS +#define GICR_PIDR2 GICD_PIDR2 + +#define GICR_CTLR_ENABLE_LPIS (1UL << 0) +#define GICR_CTLR_CES (1UL << 1) +#define GICR_CTLR_IR (1UL << 2) +#define GICR_CTLR_RWP (1UL << 3) -#define GICR_CTLR_RWP (1U << 3) +#define GICR_TYPER_CPU_NUMBER(r) (((r) >> 8) & 0xffff) + +#define EPPI_BASE_INTID 1056 + +#define GICR_TYPER_NR_PPIS(r) \ + ({ \ + unsigned int __ppinum = ((r) >> 27) & 0x1f; \ + unsigned int __nr_ppis = 16; \ + if (__ppinum == 1 || __ppinum == 2) \ + __nr_ppis += __ppinum * 32; \ + \ + __nr_ppis; \ + }) #define GICR_WAKER_ProcessorSleep (1U << 1) #define GICR_WAKER_ChildrenAsleep (1U << 2) +#define GIC_BASER_CACHE_nCnB 0ULL +#define GIC_BASER_CACHE_SameAsInner 0ULL +#define GIC_BASER_CACHE_nC 1ULL +#define GIC_BASER_CACHE_RaWt 2ULL +#define GIC_BASER_CACHE_RaWb 3ULL +#define GIC_BASER_CACHE_WaWt 4ULL +#define GIC_BASER_CACHE_WaWb 5ULL +#define GIC_BASER_CACHE_RaWaWt 6ULL +#define GIC_BASER_CACHE_RaWaWb 7ULL +#define GIC_BASER_CACHE_MASK 7ULL +#define GIC_BASER_NonShareable 0ULL +#define GIC_BASER_InnerShareable 1ULL +#define GIC_BASER_OuterShareable 2ULL +#define GIC_BASER_SHAREABILITY_MASK 3ULL + +#define GIC_BASER_CACHEABILITY(reg, inner_outer, type) \ + (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT) + +#define GIC_BASER_SHAREABILITY(reg, type) \ + (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT) + +/* encode a size field of width @w containing @n - 1 units */ +#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0)) + +#define GICR_PROPBASER_SHAREABILITY_SHIFT (10) +#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_PROPBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK) +#define GICR_PROPBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK) +#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK) +#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK + +#define GICR_PROPBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable) + +#define GICR_PROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB) +#define GICR_PROPBASER_nC GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC) +#define GICR_PROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt) +#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb) +#define GICR_PROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt) +#define GICR_PROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb) +#define GICR_PROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt) +#define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb) + +#define GICR_PROPBASER_IDBITS_MASK (0x1f) +#define GICR_PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 12)) +#define GICR_PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 16)) + +#define GICR_PENDBASER_SHAREABILITY_SHIFT (10) +#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_PENDBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK) +#define GICR_PENDBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK) +#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK) +#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK + +#define GICR_PENDBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable) + +#define GICR_PENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB) +#define GICR_PENDBASER_nC GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC) +#define GICR_PENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt) +#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) +#define GICR_PENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt) +#define GICR_PENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb) +#define GICR_PENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt) +#define GICR_PENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb) + +#define GICR_PENDBASER_PTZ BIT_ULL(62) + /* - * Redistributor registers, offsets from SGI base + * Re-Distributor registers, offsets from SGI_base */ #define GICR_IGROUPR0 GICD_IGROUPR #define GICR_ISENABLER0 GICD_ISENABLER #define GICR_ICENABLER0 GICD_ICENABLER #define GICR_ISPENDR0 GICD_ISPENDR +#define GICR_ICPENDR0 GICD_ICPENDR #define GICR_ISACTIVER0 GICD_ISACTIVER #define GICR_ICACTIVER0 GICD_ICACTIVER -#define GICR_ICENABLER GICD_ICENABLER -#define GICR_ICACTIVER GICD_ICACTIVER #define GICR_IPRIORITYR0 GICD_IPRIORITYR +#define GICR_ICFGR0 GICD_ICFGR +#define GICR_IGRPMODR0 GICD_IGRPMODR +#define GICR_NSACR GICD_NSACR + +#define GICR_TYPER_PLPIS (1U << 0) +#define GICR_TYPER_VLPIS (1U << 1) +#define GICR_TYPER_DIRTY (1U << 2) +#define GICR_TYPER_DirectLPIS (1U << 3) +#define GICR_TYPER_LAST (1U << 4) +#define GICR_TYPER_RVPEID (1U << 7) +#define GICR_TYPER_COMMON_LPI_AFF GENMASK_ULL(25, 24) +#define GICR_TYPER_AFFINITY GENMASK_ULL(63, 32) + +#define GICR_INVLPIR_INTID GENMASK_ULL(31, 0) +#define GICR_INVLPIR_VPEID GENMASK_ULL(47, 32) +#define GICR_INVLPIR_V GENMASK_ULL(63, 63) + +#define GICR_INVALLR_VPEID GICR_INVLPIR_VPEID +#define GICR_INVALLR_V GICR_INVLPIR_V + +#define GIC_V3_REDIST_SIZE 0x20000 + +#define LPI_PROP_GROUP1 (1 << 1) +#define LPI_PROP_ENABLED (1 << 0) + +/* + * Re-Distributor registers, offsets from VLPI_base + */ +#define GICR_VPROPBASER 0x0070 + +#define GICR_VPROPBASER_IDBITS_MASK 0x1f + +#define GICR_VPROPBASER_SHAREABILITY_SHIFT (10) +#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT (56) + +#define GICR_VPROPBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK) +#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK) +#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK) +#define GICR_VPROPBASER_CACHEABILITY_MASK \ + GICR_VPROPBASER_INNER_CACHEABILITY_MASK + +#define GICR_VPROPBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable) + +#define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB) +#define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC) +#define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt) +#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb) +#define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt) +#define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb) +#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt) +#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb) + +/* + * GICv4.1 VPROPBASER reinvention. A subtle mix between the old + * VPROPBASER and ITS_BASER. Just not quite any of the two. + */ +#define GICR_VPROPBASER_4_1_VALID (1ULL << 63) +#define GICR_VPROPBASER_4_1_ENTRY_SIZE GENMASK_ULL(61, 59) +#define GICR_VPROPBASER_4_1_INDIRECT (1ULL << 55) +#define GICR_VPROPBASER_4_1_PAGE_SIZE GENMASK_ULL(54, 53) +#define GICR_VPROPBASER_4_1_Z (1ULL << 52) +#define GICR_VPROPBASER_4_1_ADDR GENMASK_ULL(51, 12) +#define GICR_VPROPBASER_4_1_SIZE GENMASK_ULL(6, 0) + +#define GICR_VPENDBASER 0x0078 + +#define GICR_VPENDBASER_SHAREABILITY_SHIFT (10) +#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_VPENDBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK) +#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK) +#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK) +#define GICR_VPENDBASER_CACHEABILITY_MASK \ + GICR_VPENDBASER_INNER_CACHEABILITY_MASK + +#define GICR_VPENDBASER_NonShareable \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable) + +#define GICR_VPENDBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable) + +#define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB) +#define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC) +#define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt) +#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb) +#define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt) +#define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb) +#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt) +#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb) + +#define GICR_VPENDBASER_Dirty (1ULL << 60) +#define GICR_VPENDBASER_PendingLast (1ULL << 61) +#define GICR_VPENDBASER_IDAI (1ULL << 62) +#define GICR_VPENDBASER_Valid (1ULL << 63) + +/* + * GICv4.1 VPENDBASER, used for VPE residency. On top of these fields, + * also use the above Valid, PendingLast and Dirty. + */ +#define GICR_VPENDBASER_4_1_DB (1ULL << 62) +#define GICR_VPENDBASER_4_1_VGRP0EN (1ULL << 59) +#define GICR_VPENDBASER_4_1_VGRP1EN (1ULL << 58) +#define GICR_VPENDBASER_4_1_VPEID GENMASK_ULL(15, 0) + +#define GICR_VSGIR 0x0080 + +#define GICR_VSGIR_VPEID GENMASK(15, 0) + +#define GICR_VSGIPENDR 0x0088 + +#define GICR_VSGIPENDR_BUSY (1U << 31) +#define GICR_VSGIPENDR_PENDING GENMASK(15, 0) + +/* + * ITS registers, offsets from ITS_base + */ +#define GITS_CTLR 0x0000 +#define GITS_IIDR 0x0004 +#define GITS_TYPER 0x0008 +#define GITS_MPIDR 0x0018 +#define GITS_CBASER 0x0080 +#define GITS_CWRITER 0x0088 +#define GITS_CREADR 0x0090 +#define GITS_BASER 0x0100 +#define GITS_IDREGS_BASE 0xffd0 +#define GITS_PIDR0 0xffe0 +#define GITS_PIDR1 0xffe4 +#define GITS_PIDR2 GICR_PIDR2 +#define GITS_PIDR4 0xffd0 +#define GITS_CIDR0 0xfff0 +#define GITS_CIDR1 0xfff4 +#define GITS_CIDR2 0xfff8 +#define GITS_CIDR3 0xfffc + +#define GITS_TRANSLATER 0x10040 + +#define GITS_SGIR 0x20020 + +#define GITS_SGIR_VPEID GENMASK_ULL(47, 32) +#define GITS_SGIR_VINTID GENMASK_ULL(3, 0) + +#define GITS_CTLR_ENABLE (1U << 0) +#define GITS_CTLR_ImDe (1U << 1) +#define GITS_CTLR_ITS_NUMBER_SHIFT 4 +#define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT) +#define GITS_CTLR_QUIESCENT (1U << 31) + +#define GITS_TYPER_PLPIS (1UL << 0) +#define GITS_TYPER_VLPIS (1UL << 1) +#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 +#define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4) +#define GITS_TYPER_IDBITS_SHIFT 8 +#define GITS_TYPER_DEVBITS_SHIFT 13 +#define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13) +#define GITS_TYPER_PTA (1UL << 19) +#define GITS_TYPER_HCC_SHIFT 24 +#define GITS_TYPER_HCC(r) (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff) +#define GITS_TYPER_VMOVP (1ULL << 37) +#define GITS_TYPER_VMAPP (1ULL << 40) +#define GITS_TYPER_SVPET GENMASK_ULL(42, 41) -/* CPU interface registers */ -#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) -#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) -#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) -#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) -#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) -#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) -#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) +#define GITS_IIDR_REV_SHIFT 12 +#define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT) +#define GITS_IIDR_REV(r) (((r) >> GITS_IIDR_REV_SHIFT) & 0xf) +#define GITS_IIDR_PRODUCTID_SHIFT 24 -#define SYS_ICV_AP1R0_EL1 sys_reg(3, 0, 12, 9, 0) +#define GITS_CBASER_VALID (1ULL << 63) +#define GITS_CBASER_SHAREABILITY_SHIFT (10) +#define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59) +#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT (53) +#define GITS_CBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK) +#define GITS_CBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK) +#define GITS_CBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK) +#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK -#define ICC_PMR_DEF_PRIO 0xf0 +#define GITS_CBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable) +#define GITS_CBASER_nCnB GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB) +#define GITS_CBASER_nC GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC) +#define GITS_CBASER_RaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt) +#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb) +#define GITS_CBASER_WaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt) +#define GITS_CBASER_WaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb) +#define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt) +#define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb) + +#define GITS_CBASER_ADDRESS(cbaser) ((cbaser) & GENMASK_ULL(51, 12)) + +#define GITS_BASER_NR_REGS 8 + +#define GITS_BASER_VALID (1ULL << 63) +#define GITS_BASER_INDIRECT (1ULL << 62) + +#define GITS_BASER_INNER_CACHEABILITY_SHIFT (59) +#define GITS_BASER_OUTER_CACHEABILITY_SHIFT (53) +#define GITS_BASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK) +#define GITS_BASER_CACHEABILITY_MASK GITS_BASER_INNER_CACHEABILITY_MASK +#define GITS_BASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK) +#define GITS_BASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK) + +#define GITS_BASER_nCnB GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB) +#define GITS_BASER_nC GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC) +#define GITS_BASER_RaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt) +#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) +#define GITS_BASER_WaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt) +#define GITS_BASER_WaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb) +#define GITS_BASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt) +#define GITS_BASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb) + +#define GITS_BASER_TYPE_SHIFT (56) +#define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) +#define GITS_BASER_ENTRY_SIZE_SHIFT (48) +#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) +#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) +#define GITS_BASER_PHYS_52_to_48(phys) \ + (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12) +#define GITS_BASER_ADDR_48_to_52(baser) \ + (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48) + +#define GITS_BASER_SHAREABILITY_SHIFT (10) +#define GITS_BASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) +#define GITS_BASER_PAGE_SIZE_SHIFT (8) +#define __GITS_BASER_PSZ(sz) (GIC_PAGE_SIZE_ ## sz << GITS_BASER_PAGE_SIZE_SHIFT) +#define GITS_BASER_PAGE_SIZE_4K __GITS_BASER_PSZ(4K) +#define GITS_BASER_PAGE_SIZE_16K __GITS_BASER_PSZ(16K) +#define GITS_BASER_PAGE_SIZE_64K __GITS_BASER_PSZ(64K) +#define GITS_BASER_PAGE_SIZE_MASK __GITS_BASER_PSZ(MASK) +#define GITS_BASER_PAGES_MAX 256 +#define GITS_BASER_PAGES_SHIFT (0) +#define GITS_BASER_NR_PAGES(r) (((r) & 0xff) + 1) + +#define GITS_BASER_TYPE_NONE 0 +#define GITS_BASER_TYPE_DEVICE 1 +#define GITS_BASER_TYPE_VCPU 2 +#define GITS_BASER_TYPE_RESERVED3 3 +#define GITS_BASER_TYPE_COLLECTION 4 +#define GITS_BASER_TYPE_RESERVED5 5 +#define GITS_BASER_TYPE_RESERVED6 6 +#define GITS_BASER_TYPE_RESERVED7 7 + +#define GITS_LVL1_ENTRY_SIZE (8UL) + +/* + * ITS commands + */ +#define GITS_CMD_MAPD 0x08 +#define GITS_CMD_MAPC 0x09 +#define GITS_CMD_MAPTI 0x0a +#define GITS_CMD_MAPI 0x0b +#define GITS_CMD_MOVI 0x01 +#define GITS_CMD_DISCARD 0x0f +#define GITS_CMD_INV 0x0c +#define GITS_CMD_MOVALL 0x0e +#define GITS_CMD_INVALL 0x0d +#define GITS_CMD_INT 0x03 +#define GITS_CMD_CLEAR 0x04 +#define GITS_CMD_SYNC 0x05 + +/* + * GICv4 ITS specific commands + */ +#define GITS_CMD_GICv4(x) ((x) | 0x20) +#define GITS_CMD_VINVALL GITS_CMD_GICv4(GITS_CMD_INVALL) +#define GITS_CMD_VMAPP GITS_CMD_GICv4(GITS_CMD_MAPC) +#define GITS_CMD_VMAPTI GITS_CMD_GICv4(GITS_CMD_MAPTI) +#define GITS_CMD_VMOVI GITS_CMD_GICv4(GITS_CMD_MOVI) +#define GITS_CMD_VSYNC GITS_CMD_GICv4(GITS_CMD_SYNC) +/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */ +#define GITS_CMD_VMOVP GITS_CMD_GICv4(2) +#define GITS_CMD_VSGI GITS_CMD_GICv4(3) +#define GITS_CMD_INVDB GITS_CMD_GICv4(0xe) + +/* + * ITS error numbers + */ +#define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107 +#define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109 +#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307 +#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507 +#define E_ITS_MAPD_DEVICE_OOR 0x010801 +#define E_ITS_MAPD_ITTSIZE_OOR 0x010802 +#define E_ITS_MAPC_PROCNUM_OOR 0x010902 +#define E_ITS_MAPC_COLLECTION_OOR 0x010903 +#define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04 +#define E_ITS_MAPTI_ID_OOR 0x010a05 +#define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06 +#define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07 +#define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09 +#define E_ITS_MOVALL_PROCNUM_OOR 0x010e01 +#define E_ITS_DISCARD_UNMAPPED_INTERRUPT 0x010f07 + +/* + * CPU interface registers + */ +#define ICC_CTLR_EL1_EOImode_SHIFT (1) +#define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_CBPR_SHIFT 0 +#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT) +#define ICC_CTLR_EL1_PMHE_SHIFT 6 +#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT) +#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8 +#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT) +#define ICC_CTLR_EL1_ID_BITS_SHIFT 11 +#define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT) +#define ICC_CTLR_EL1_SEIS_SHIFT 14 +#define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT) +#define ICC_CTLR_EL1_A3V_SHIFT 15 +#define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT) +#define ICC_CTLR_EL1_RSS (0x1 << 18) +#define ICC_CTLR_EL1_ExtRange (0x1 << 19) +#define ICC_PMR_EL1_SHIFT 0 +#define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT) +#define ICC_BPR0_EL1_SHIFT 0 +#define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT) +#define ICC_BPR1_EL1_SHIFT 0 +#define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT) +#define ICC_IGRPEN0_EL1_SHIFT 0 +#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) +#define ICC_IGRPEN1_EL1_SHIFT 0 +#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) +#define ICC_SRE_EL1_DIB (1U << 2) +#define ICC_SRE_EL1_DFB (1U << 1) #define ICC_SRE_EL1_SRE (1U << 0) -#define ICC_IGRPEN1_EL1_ENABLE (1U << 0) +/* These are for GICv2 emulation only */ +#define GICH_LR_VIRTUALID (0x3ffUL << 0) +#define GICH_LR_PHYSID_CPUID_SHIFT (10) +#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) + +#define ICC_IAR1_EL1_SPURIOUS 0x3ff + +#define ICC_SRE_EL2_SRE (1 << 0) +#define ICC_SRE_EL2_ENABLE (1 << 3) -#define GICV3_MAX_CPUS 512 +#define ICC_SGI1R_TARGET_LIST_SHIFT 0 +#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT) +#define ICC_SGI1R_AFFINITY_1_SHIFT 16 +#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT) +#define ICC_SGI1R_SGI_ID_SHIFT 24 +#define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT) +#define ICC_SGI1R_AFFINITY_2_SHIFT 32 +#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT) +#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40 +#define ICC_SGI1R_RS_SHIFT 44 +#define ICC_SGI1R_RS_MASK (0xfULL << ICC_SGI1R_RS_SHIFT) +#define ICC_SGI1R_AFFINITY_3_SHIFT 48 +#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT) -#endif /* SELFTEST_KVM_GICV3_H */ +#endif diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h new file mode 100644 index 0000000000..3722ed9c8f --- /dev/null +++ b/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __SELFTESTS_GIC_V3_ITS_H__ +#define __SELFTESTS_GIC_V3_ITS_H__ + +#include <linux/sizes.h> + +void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz, + vm_paddr_t device_tbl, size_t device_tbl_sz, + vm_paddr_t cmdq, size_t cmdq_size); + +void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base, + size_t itt_size, bool valid); +void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid); +void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id, + u32 collection_id, u32 intid); +void its_send_invall_cmd(void *cmdq_base, u32 collection_id); + +#endif // __SELFTESTS_GIC_V3_ITS_H__ diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index 9e518b5628..9b20a355d8 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -8,6 +8,8 @@ #define SELFTEST_KVM_PROCESSOR_H #include "kvm_util.h" +#include "ucall_common.h" + #include <linux/stringify.h> #include <linux/types.h> #include <asm/sysreg.h> @@ -58,8 +60,6 @@ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) -#define MPIDR_HWID_BITMASK (0xff00fffffful) - void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init); struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_vcpu_init *init, void *guest_code); @@ -177,11 +177,28 @@ static __always_inline u32 __raw_readl(const volatile void *addr) return val; } +static __always_inline void __raw_writeq(u64 val, volatile void *addr) +{ + asm volatile("str %0, [%1]" : : "rZ" (val), "r" (addr)); +} + +static __always_inline u64 __raw_readq(const volatile void *addr) +{ + u64 val; + asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr)); + return val; +} + #define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c))) #define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; }) +#define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c))) +#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; }) #define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c));}) #define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; }) +#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c));}) +#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; }) + static inline void local_irq_enable(void) { diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/aarch64/ucall.h index 4b68f37efd..4ec801f37f 100644 --- a/tools/testing/selftests/kvm/include/aarch64/ucall.h +++ b/tools/testing/selftests/kvm/include/aarch64/ucall.h @@ -2,7 +2,7 @@ #ifndef SELFTEST_KVM_UCALL_H #define SELFTEST_KVM_UCALL_H -#include "kvm_util_base.h" +#include "kvm_util.h" #define UCALL_EXIT_REASON KVM_EXIT_MMIO diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h index 0ac6f05c63..c481d0c00a 100644 --- a/tools/testing/selftests/kvm/include/aarch64/vgic.h +++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h @@ -16,8 +16,7 @@ ((uint64_t)(flags) << 12) | \ index) -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, - uint64_t gicd_base_gpa, uint64_t gicr_base_gpa); +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs); #define VGIC_MAX_RESERVED 1023 @@ -33,4 +32,6 @@ void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); #define KVM_IRQCHIP_NUM_PINS (1020 - 32) +int vgic_its_setup(struct kvm_vm *vm); + #endif // SELFTEST_KVM_VGIC_H diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index c9286811a4..63c2aaae51 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -1,13 +1,1116 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * tools/testing/selftests/kvm/include/kvm_util.h - * * Copyright (C) 2018, Google LLC. */ #ifndef SELFTEST_KVM_UTIL_H #define SELFTEST_KVM_UTIL_H -#include "kvm_util_base.h" -#include "ucall_common.h" +#include "test_util.h" + +#include <linux/compiler.h> +#include "linux/hashtable.h" +#include "linux/list.h" +#include <linux/kernel.h> +#include <linux/kvm.h> +#include "linux/rbtree.h" +#include <linux/types.h> + +#include <asm/atomic.h> +#include <asm/kvm.h> + +#include <sys/ioctl.h> + +#include "kvm_util_arch.h" +#include "kvm_util_types.h" +#include "sparsebit.h" + +#define KVM_DEV_PATH "/dev/kvm" +#define KVM_MAX_VCPUS 512 + +#define NSEC_PER_SEC 1000000000L + +struct userspace_mem_region { + struct kvm_userspace_memory_region2 region; + struct sparsebit *unused_phy_pages; + struct sparsebit *protected_phy_pages; + int fd; + off_t offset; + enum vm_mem_backing_src_type backing_src_type; + void *host_mem; + void *host_alias; + void *mmap_start; + void *mmap_alias; + size_t mmap_size; + struct rb_node gpa_node; + struct rb_node hva_node; + struct hlist_node slot_node; +}; + +struct kvm_vcpu { + struct list_head list; + uint32_t id; + int fd; + struct kvm_vm *vm; + struct kvm_run *run; +#ifdef __x86_64__ + struct kvm_cpuid2 *cpuid; +#endif + struct kvm_dirty_gfn *dirty_gfns; + uint32_t fetch_index; + uint32_t dirty_gfns_count; +}; + +struct userspace_mem_regions { + struct rb_root gpa_tree; + struct rb_root hva_tree; + DECLARE_HASHTABLE(slot_hash, 9); +}; + +enum kvm_mem_region_type { + MEM_REGION_CODE, + MEM_REGION_DATA, + MEM_REGION_PT, + MEM_REGION_TEST_DATA, + NR_MEM_REGIONS, +}; + +struct kvm_vm { + int mode; + unsigned long type; + int kvm_fd; + int fd; + unsigned int pgtable_levels; + unsigned int page_size; + unsigned int page_shift; + unsigned int pa_bits; + unsigned int va_bits; + uint64_t max_gfn; + struct list_head vcpus; + struct userspace_mem_regions regions; + struct sparsebit *vpages_valid; + struct sparsebit *vpages_mapped; + bool has_irqchip; + bool pgd_created; + vm_paddr_t ucall_mmio_addr; + vm_paddr_t pgd; + vm_vaddr_t handlers; + uint32_t dirty_ring_size; + uint64_t gpa_tag_mask; + + struct kvm_vm_arch arch; + + /* Cache of information for binary stats interface */ + int stats_fd; + struct kvm_stats_header stats_header; + struct kvm_stats_desc *stats_desc; + + /* + * KVM region slots. These are the default memslots used by page + * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE] + * memslot. + */ + uint32_t memslots[NR_MEM_REGIONS]; +}; + +struct vcpu_reg_sublist { + const char *name; + long capability; + int feature; + int feature_type; + bool finalize; + __u64 *regs; + __u64 regs_n; + __u64 *rejects_set; + __u64 rejects_set_n; + __u64 *skips_set; + __u64 skips_set_n; +}; + +struct vcpu_reg_list { + char *name; + struct vcpu_reg_sublist sublists[]; +}; + +#define for_each_sublist(c, s) \ + for ((s) = &(c)->sublists[0]; (s)->regs; ++(s)) + +#define kvm_for_each_vcpu(vm, i, vcpu) \ + for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \ + if (!((vcpu) = vm->vcpus[i])) \ + continue; \ + else + +struct userspace_mem_region * +memslot2region(struct kvm_vm *vm, uint32_t memslot); + +static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm, + enum kvm_mem_region_type type) +{ + assert(type < NR_MEM_REGIONS); + return memslot2region(vm, vm->memslots[type]); +} + +/* Minimum allocated guest virtual and physical addresses */ +#define KVM_UTIL_MIN_VADDR 0x2000 +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 + +#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 +#define DEFAULT_STACK_PGS 5 + +enum vm_guest_mode { + VM_MODE_P52V48_4K, + VM_MODE_P52V48_16K, + VM_MODE_P52V48_64K, + VM_MODE_P48V48_4K, + VM_MODE_P48V48_16K, + VM_MODE_P48V48_64K, + VM_MODE_P40V48_4K, + VM_MODE_P40V48_16K, + VM_MODE_P40V48_64K, + VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ + VM_MODE_P47V64_4K, + VM_MODE_P44V64_4K, + VM_MODE_P36V48_4K, + VM_MODE_P36V48_16K, + VM_MODE_P36V48_64K, + VM_MODE_P36V47_16K, + NUM_VM_MODES, +}; + +struct vm_shape { + uint32_t type; + uint8_t mode; + uint8_t pad0; + uint16_t pad1; +}; + +kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t)); + +#define VM_TYPE_DEFAULT 0 + +#define VM_SHAPE(__mode) \ +({ \ + struct vm_shape shape = { \ + .mode = (__mode), \ + .type = VM_TYPE_DEFAULT \ + }; \ + \ + shape; \ +}) + +#if defined(__aarch64__) + +extern enum vm_guest_mode vm_mode_default; + +#define VM_MODE_DEFAULT vm_mode_default +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#elif defined(__x86_64__) + +#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#elif defined(__s390x__) + +#define VM_MODE_DEFAULT VM_MODE_P44V64_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 16) + +#elif defined(__riscv) + +#if __riscv_xlen == 32 +#error "RISC-V 32-bit kvm selftests not supported" +#endif + +#define VM_MODE_DEFAULT VM_MODE_P40V48_4K +#define MIN_PAGE_SHIFT 12U +#define ptes_per_page(page_size) ((page_size) / 8) + +#endif + +#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT) + +#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) +#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) + +struct vm_guest_mode_params { + unsigned int pa_bits; + unsigned int va_bits; + unsigned int page_size; + unsigned int page_shift; +}; +extern const struct vm_guest_mode_params vm_guest_mode_params[]; + +int open_path_or_exit(const char *path, int flags); +int open_kvm_dev_path_or_exit(void); + +bool get_kvm_param_bool(const char *param); +bool get_kvm_intel_param_bool(const char *param); +bool get_kvm_amd_param_bool(const char *param); + +int get_kvm_param_integer(const char *param); +int get_kvm_intel_param_integer(const char *param); +int get_kvm_amd_param_integer(const char *param); + +unsigned int kvm_check_cap(long cap); + +static inline bool kvm_has_cap(long cap) +{ + return kvm_check_cap(cap); +} + +#define __KVM_SYSCALL_ERROR(_name, _ret) \ + "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) + +/* + * Use the "inner", double-underscore macro when reporting errors from within + * other macros so that the name of ioctl() and not its literal numeric value + * is printed on error. The "outer" macro is strongly preferred when reporting + * errors "directly", i.e. without an additional layer of macros, as it reduces + * the probability of passing in the wrong string. + */ +#define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret) +#define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret) + +#define kvm_do_ioctl(fd, cmd, arg) \ +({ \ + kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \ + ioctl(fd, cmd, arg); \ +}) + +#define __kvm_ioctl(kvm_fd, cmd, arg) \ + kvm_do_ioctl(kvm_fd, cmd, arg) + +#define kvm_ioctl(kvm_fd, cmd, arg) \ +({ \ + int ret = __kvm_ioctl(kvm_fd, cmd, arg); \ + \ + TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \ +}) + +static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { } + +#define __vm_ioctl(vm, cmd, arg) \ +({ \ + static_assert_is_vm(vm); \ + kvm_do_ioctl((vm)->fd, cmd, arg); \ +}) + +/* + * Assert that a VM or vCPU ioctl() succeeded, with extra magic to detect if + * the ioctl() failed because KVM killed/bugged the VM. To detect a dead VM, + * probe KVM_CAP_USER_MEMORY, which (a) has been supported by KVM since before + * selftests existed and (b) should never outright fail, i.e. is supposed to + * return 0 or 1. If KVM kills a VM, KVM returns -EIO for all ioctl()s for the + * VM and its vCPUs, including KVM_CHECK_EXTENSION. + */ +#define __TEST_ASSERT_VM_VCPU_IOCTL(cond, name, ret, vm) \ +do { \ + int __errno = errno; \ + \ + static_assert_is_vm(vm); \ + \ + if (cond) \ + break; \ + \ + if (errno == EIO && \ + __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY) < 0) { \ + TEST_ASSERT(errno == EIO, "KVM killed the VM, should return -EIO"); \ + TEST_FAIL("KVM killed/bugged the VM, check the kernel log for clues"); \ + } \ + errno = __errno; \ + TEST_ASSERT(cond, __KVM_IOCTL_ERROR(name, ret)); \ +} while (0) + +#define TEST_ASSERT_VM_VCPU_IOCTL(cond, cmd, ret, vm) \ + __TEST_ASSERT_VM_VCPU_IOCTL(cond, #cmd, ret, vm) + +#define vm_ioctl(vm, cmd, arg) \ +({ \ + int ret = __vm_ioctl(vm, cmd, arg); \ + \ + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \ +}) + +static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { } + +#define __vcpu_ioctl(vcpu, cmd, arg) \ +({ \ + static_assert_is_vcpu(vcpu); \ + kvm_do_ioctl((vcpu)->fd, cmd, arg); \ +}) + +#define vcpu_ioctl(vcpu, cmd, arg) \ +({ \ + int ret = __vcpu_ioctl(vcpu, cmd, arg); \ + \ + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \ +}) + +/* + * Looks up and returns the value corresponding to the capability + * (KVM_CAP_*) given by cap. + */ +static inline int vm_check_cap(struct kvm_vm *vm, long cap) +{ + int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap); + + TEST_ASSERT_VM_VCPU_IOCTL(ret >= 0, KVM_CHECK_EXTENSION, ret, vm); + return ret; +} + +static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) +{ + struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; + + return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); +} +static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) +{ + struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; + + vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); +} + +static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa, + uint64_t size, uint64_t attributes) +{ + struct kvm_memory_attributes attr = { + .attributes = attributes, + .address = gpa, + .size = size, + .flags = 0, + }; + + /* + * KVM_SET_MEMORY_ATTRIBUTES overwrites _all_ attributes. These flows + * need significant enhancements to support multiple attributes. + */ + TEST_ASSERT(!attributes || attributes == KVM_MEMORY_ATTRIBUTE_PRIVATE, + "Update me to support multiple attributes!"); + + vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr); +} + + +static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); +} + +static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_set_memory_attributes(vm, gpa, size, 0); +} + +void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size, + bool punch_hole); + +static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_guest_mem_fallocate(vm, gpa, size, true); +} + +static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_guest_mem_fallocate(vm, gpa, size, false); +} + +void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); +const char *vm_guest_mode_string(uint32_t i); + +void kvm_vm_free(struct kvm_vm *vmp); +void kvm_vm_restart(struct kvm_vm *vmp); +void kvm_vm_release(struct kvm_vm *vmp); +int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, + size_t len); +void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); +int kvm_memfd_alloc(size_t size, bool hugepages); + +void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); + +static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) +{ + struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot }; + + vm_ioctl(vm, KVM_GET_DIRTY_LOG, &args); +} + +static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, + uint64_t first_page, uint32_t num_pages) +{ + struct kvm_clear_dirty_log args = { + .dirty_bitmap = log, + .slot = slot, + .first_page = first_page, + .num_pages = num_pages + }; + + vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args); +} + +static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) +{ + return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL); +} + +static inline int vm_get_stats_fd(struct kvm_vm *vm) +{ + int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL); + + TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vm); + return fd; +} + +static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header) +{ + ssize_t ret; + + ret = pread(stats_fd, header, sizeof(*header), 0); + TEST_ASSERT(ret == sizeof(*header), + "Failed to read '%lu' header bytes, ret = '%ld'", + sizeof(*header), ret); +} + +struct kvm_stats_desc *read_stats_descriptors(int stats_fd, + struct kvm_stats_header *header); + +static inline ssize_t get_stats_descriptor_size(struct kvm_stats_header *header) +{ + /* + * The base size of the descriptor is defined by KVM's ABI, but the + * size of the name field is variable, as far as KVM's ABI is + * concerned. For a given instance of KVM, the name field is the same + * size for all stats and is provided in the overall stats header. + */ + return sizeof(struct kvm_stats_desc) + header->name_size; +} + +static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc *stats, + int index, + struct kvm_stats_header *header) +{ + /* + * Note, size_desc includes the size of the name field, which is + * variable. i.e. this is NOT equivalent to &stats_desc[i]. + */ + return (void *)stats + index * get_stats_descriptor_size(header); +} + +void read_stat_data(int stats_fd, struct kvm_stats_header *header, + struct kvm_stats_desc *desc, uint64_t *data, + size_t max_elements); + +void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, + size_t max_elements); + +static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) +{ + uint64_t data; + + __vm_get_stat(vm, stat_name, &data, 1); + return data; +} + +void vm_create_irqchip(struct kvm_vm *vm); + +static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + uint64_t flags) +{ + struct kvm_create_guest_memfd guest_memfd = { + .size = size, + .flags = flags, + }; + + return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd); +} + +static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + uint64_t flags) +{ + int fd = __vm_create_guest_memfd(vm, size, flags); + + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd)); + return fd; +} + +void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva); +int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva); +void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset); +int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset); + +void vm_userspace_mem_region_add(struct kvm_vm *vm, + enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags); +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset); + +#ifndef vm_arch_has_protected_memory +static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) +{ + return false; +} +#endif + +void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); +void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); +void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); +struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); +void vm_populate_vaddr_bitmap(struct kvm_vm *vm); +vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); +vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type); +vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, + vm_vaddr_t vaddr_min, + enum kvm_mem_region_type type); +vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); +vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, + enum kvm_mem_region_type type); +vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); + +void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + unsigned int npages); +void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); +void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); +vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); +void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa); + +#ifndef vcpu_arch_put_guest +#define vcpu_arch_put_guest(mem, val) do { (mem) = (val); } while (0) +#endif + +static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa) +{ + return gpa & ~vm->gpa_tag_mask; +} + +void vcpu_run(struct kvm_vcpu *vcpu); +int _vcpu_run(struct kvm_vcpu *vcpu); + +static inline int __vcpu_run(struct kvm_vcpu *vcpu) +{ + return __vcpu_ioctl(vcpu, KVM_RUN, NULL); +} + +void vcpu_run_complete_io(struct kvm_vcpu *vcpu); +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu); + +static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap, + uint64_t arg0) +{ + struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; + + vcpu_ioctl(vcpu, KVM_ENABLE_CAP, &enable_cap); +} + +static inline void vcpu_guest_debug_set(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *debug) +{ + vcpu_ioctl(vcpu, KVM_SET_GUEST_DEBUG, debug); +} + +static inline void vcpu_mp_state_get(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + vcpu_ioctl(vcpu, KVM_GET_MP_STATE, mp_state); +} +static inline void vcpu_mp_state_set(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + vcpu_ioctl(vcpu, KVM_SET_MP_STATE, mp_state); +} + +static inline void vcpu_regs_get(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_ioctl(vcpu, KVM_GET_REGS, regs); +} + +static inline void vcpu_regs_set(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_ioctl(vcpu, KVM_SET_REGS, regs); +} +static inline void vcpu_sregs_get(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + vcpu_ioctl(vcpu, KVM_GET_SREGS, sregs); + +} +static inline void vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs); +} +static inline int _vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + return __vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs); +} +static inline void vcpu_fpu_get(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + vcpu_ioctl(vcpu, KVM_GET_FPU, fpu); +} +static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + vcpu_ioctl(vcpu, KVM_SET_FPU, fpu); +} + +static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +{ + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; + + return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); +} +static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +{ + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + + return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); +} +static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +{ + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; + + vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); +} +static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +{ + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + + vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); +} + +#ifdef __KVM_HAVE_VCPU_EVENTS +static inline void vcpu_events_get(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + vcpu_ioctl(vcpu, KVM_GET_VCPU_EVENTS, events); +} +static inline void vcpu_events_set(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + vcpu_ioctl(vcpu, KVM_SET_VCPU_EVENTS, events); +} +#endif +#ifdef __x86_64__ +static inline void vcpu_nested_state_get(struct kvm_vcpu *vcpu, + struct kvm_nested_state *state) +{ + vcpu_ioctl(vcpu, KVM_GET_NESTED_STATE, state); +} +static inline int __vcpu_nested_state_set(struct kvm_vcpu *vcpu, + struct kvm_nested_state *state) +{ + return __vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state); +} + +static inline void vcpu_nested_state_set(struct kvm_vcpu *vcpu, + struct kvm_nested_state *state) +{ + vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state); +} +#endif +static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu) +{ + int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL); + + TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm); + return fd; +} + +int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr); + +static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) +{ + int ret = __kvm_has_device_attr(dev_fd, group, attr); + + TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); +} + +int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val); + +static inline void kvm_device_attr_get(int dev_fd, uint32_t group, + uint64_t attr, void *val) +{ + int ret = __kvm_device_attr_get(dev_fd, group, attr, val); + + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret)); +} + +int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val); + +static inline void kvm_device_attr_set(int dev_fd, uint32_t group, + uint64_t attr, void *val) +{ + int ret = __kvm_device_attr_set(dev_fd, group, attr, val); + + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret)); +} + +static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr) +{ + return __kvm_has_device_attr(vcpu->fd, group, attr); +} + +static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr) +{ + kvm_has_device_attr(vcpu->fd, group, attr); +} + +static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + return __kvm_device_attr_get(vcpu->fd, group, attr, val); +} + +static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + kvm_device_attr_get(vcpu->fd, group, attr, val); +} + +static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + return __kvm_device_attr_set(vcpu->fd, group, attr, val); +} + +static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + kvm_device_attr_set(vcpu->fd, group, attr, val); +} + +int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type); +int __kvm_create_device(struct kvm_vm *vm, uint64_t type); + +static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type) +{ + int fd = __kvm_create_device(vm, type); + + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_DEVICE, fd)); + return fd; +} + +void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu); + +/* + * VM VCPU Args Set + * + * Input Args: + * vm - Virtual Machine + * num - number of arguments + * ... - arguments, each of type uint64_t + * + * Output Args: None + * + * Return: None + * + * Sets the first @num input parameters for the function at @vcpu's entry point, + * per the C calling convention of the architecture, to the values given as + * variable args. Each of the variable args is expected to be of type uint64_t. + * The maximum @num can be is specific to the architecture. + */ +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...); + +void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); +int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); + +#define KVM_MAX_IRQ_ROUTES 4096 + +struct kvm_irq_routing *kvm_gsi_routing_create(void); +void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, + uint32_t gsi, uint32_t pin); +int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); +void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); + +const char *exit_reason_str(unsigned int exit_reason); + +vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, + uint32_t memslot); +vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + vm_paddr_t paddr_min, uint32_t memslot, + bool protected); +vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); + +static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, + vm_paddr_t paddr_min, uint32_t memslot) +{ + /* + * By default, allocate memory as protected for VMs that support + * protected memory, as the majority of memory for such VMs is + * protected, i.e. using shared memory is effectively opt-in. + */ + return __vm_phy_pages_alloc(vm, num, paddr_min, memslot, + vm_arch_has_protected_memory(vm)); +} + +/* + * ____vm_create() does KVM_CREATE_VM and little else. __vm_create() also + * loads the test binary into guest memory and creates an IRQ chip (x86 only). + * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to + * calculate the amount of memory needed for per-vCPU data, e.g. stacks. + */ +struct kvm_vm *____vm_create(struct vm_shape shape); +struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, + uint64_t nr_extra_pages); + +static inline struct kvm_vm *vm_create_barebones(void) +{ + return ____vm_create(VM_SHAPE_DEFAULT); +} + +static inline struct kvm_vm *vm_create_barebones_type(unsigned long type) +{ + const struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = type, + }; + + return ____vm_create(shape); +} + +static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) +{ + return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0); +} + +struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, + uint64_t extra_mem_pages, + void *guest_code, struct kvm_vcpu *vcpus[]); + +static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus, + void *guest_code, + struct kvm_vcpu *vcpus[]) +{ + return __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, 0, + guest_code, vcpus); +} + + +struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, + struct kvm_vcpu **vcpu, + uint64_t extra_mem_pages, + void *guest_code); + +/* + * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages + * additional pages of guest memory. Returns the VM and vCPU (via out param). + */ +static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, + uint64_t extra_mem_pages, + void *guest_code) +{ + return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu, + extra_mem_pages, guest_code); +} + +static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, + void *guest_code) +{ + return __vm_create_with_one_vcpu(vcpu, 0, guest_code); +} + +static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape, + struct kvm_vcpu **vcpu, + void *guest_code) +{ + return __vm_create_shape_with_one_vcpu(shape, vcpu, 0, guest_code); +} + +struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); + +void kvm_pin_this_task_to_pcpu(uint32_t pcpu); +void kvm_print_vcpu_pinning_help(void); +void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], + int nr_vcpus); + +unsigned long vm_compute_max_gfn(struct kvm_vm *vm); +unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); +unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); +unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages); +static inline unsigned int +vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) +{ + unsigned int n; + n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages)); +#ifdef __s390x__ + /* s390 requires 1M aligned guest sizes */ + n = (n + 255) & ~255; +#endif + return n; +} + +#define sync_global_to_guest(vm, g) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + memcpy(_p, &(g), sizeof(g)); \ +}) + +#define sync_global_from_guest(vm, g) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + memcpy(&(g), _p, sizeof(g)); \ +}) + +/* + * Write a global value, but only in the VM's (guest's) domain. Primarily used + * for "globals" that hold per-VM values (VMs always duplicate code and global + * data into their own region of physical memory), but can be used anytime it's + * undesirable to change the host's copy of the global. + */ +#define write_guest_global(vm, g, val) ({ \ + typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ + typeof(g) _val = val; \ + \ + memcpy(_p, &(_val), sizeof(g)); \ +}) + +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu); + +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, + uint8_t indent); + +static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu, + uint8_t indent) +{ + vcpu_arch_dump(stream, vcpu, indent); +} + +/* + * Adds a vCPU with reasonable defaults (e.g. a stack) + * + * Input Args: + * vm - Virtual Machine + * vcpu_id - The id of the VCPU to add to the VM. + */ +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); +void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code); + +static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + void *guest_code) +{ + struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id); + + vcpu_arch_set_entry_point(vcpu, guest_code); + + return vcpu; +} + +/* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */ +struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id); + +static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm, + uint32_t vcpu_id) +{ + return vm_arch_vcpu_recreate(vm, vcpu_id); +} + +void vcpu_arch_free(struct kvm_vcpu *vcpu); + +void virt_arch_pgd_alloc(struct kvm_vm *vm); + +static inline void virt_pgd_alloc(struct kvm_vm *vm) +{ + virt_arch_pgd_alloc(vm); +} + +/* + * VM Virtual Page Map + * + * Input Args: + * vm - Virtual Machine + * vaddr - VM Virtual Address + * paddr - VM Physical Address + * memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within @vm, creates a virtual translation for the page starting + * at @vaddr to the page starting at @paddr. + */ +void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); + +static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +{ + virt_arch_pg_map(vm, vaddr, paddr); +} + + +/* + * Address Guest Virtual to Guest Physical + * + * Input Args: + * vm - Virtual Machine + * gva - VM virtual address + * + * Output Args: None + * + * Return: + * Equivalent VM physical address + * + * Returns the VM physical address of the translated VM virtual + * address given by @gva. + */ +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); + +static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + return addr_arch_gva2gpa(vm, gva); +} + +/* + * Virtual Translation Tables Dump + * + * Input Args: + * stream - Output FILE stream + * vm - Virtual Machine + * indent - Left margin indent amount + * + * Output Args: None + * + * Return: None + * + * Dumps to the FILE stream given by @stream, the contents of all the + * virtual translation tables for the VM given by @vm. + */ +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); + +static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + virt_arch_dump(stream, vm, indent); +} + + +static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) +{ + return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0); +} + +/* + * Arch hook that is invoked via a constructor, i.e. before exeucting main(), + * to allow for arch-specific setup that is common to all tests, e.g. computing + * the default guest "mode". + */ +void kvm_selftest_arch_init(void); + +void kvm_arch_vm_post_create(struct kvm_vm *vm); + +bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr); + +uint32_t guest_get_vcpuid(void); #endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h deleted file mode 100644 index 3e0db283a4..0000000000 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ /dev/null @@ -1,1135 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tools/testing/selftests/kvm/include/kvm_util_base.h - * - * Copyright (C) 2018, Google LLC. - */ -#ifndef SELFTEST_KVM_UTIL_BASE_H -#define SELFTEST_KVM_UTIL_BASE_H - -#include "test_util.h" - -#include <linux/compiler.h> -#include "linux/hashtable.h" -#include "linux/list.h" -#include <linux/kernel.h> -#include <linux/kvm.h> -#include "linux/rbtree.h" -#include <linux/types.h> - -#include <asm/atomic.h> -#include <asm/kvm.h> - -#include <sys/ioctl.h> - -#include "kvm_util_arch.h" -#include "sparsebit.h" - -/* - * Provide a version of static_assert() that is guaranteed to have an optional - * message param. If _ISOC11_SOURCE is defined, glibc (/usr/include/assert.h) - * #undefs and #defines static_assert() as a direct alias to _Static_assert(), - * i.e. effectively makes the message mandatory. Many KVM selftests #define - * _GNU_SOURCE for various reasons, and _GNU_SOURCE implies _ISOC11_SOURCE. As - * a result, static_assert() behavior is non-deterministic and may or may not - * require a message depending on #include order. - */ -#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg) -#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr) - -#define KVM_DEV_PATH "/dev/kvm" -#define KVM_MAX_VCPUS 512 - -#define NSEC_PER_SEC 1000000000L - -typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ -typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ - -struct userspace_mem_region { - struct kvm_userspace_memory_region2 region; - struct sparsebit *unused_phy_pages; - struct sparsebit *protected_phy_pages; - int fd; - off_t offset; - enum vm_mem_backing_src_type backing_src_type; - void *host_mem; - void *host_alias; - void *mmap_start; - void *mmap_alias; - size_t mmap_size; - struct rb_node gpa_node; - struct rb_node hva_node; - struct hlist_node slot_node; -}; - -struct kvm_vcpu { - struct list_head list; - uint32_t id; - int fd; - struct kvm_vm *vm; - struct kvm_run *run; -#ifdef __x86_64__ - struct kvm_cpuid2 *cpuid; -#endif - struct kvm_dirty_gfn *dirty_gfns; - uint32_t fetch_index; - uint32_t dirty_gfns_count; -}; - -struct userspace_mem_regions { - struct rb_root gpa_tree; - struct rb_root hva_tree; - DECLARE_HASHTABLE(slot_hash, 9); -}; - -enum kvm_mem_region_type { - MEM_REGION_CODE, - MEM_REGION_DATA, - MEM_REGION_PT, - MEM_REGION_TEST_DATA, - NR_MEM_REGIONS, -}; - -struct kvm_vm { - int mode; - unsigned long type; - uint8_t subtype; - int kvm_fd; - int fd; - unsigned int pgtable_levels; - unsigned int page_size; - unsigned int page_shift; - unsigned int pa_bits; - unsigned int va_bits; - uint64_t max_gfn; - struct list_head vcpus; - struct userspace_mem_regions regions; - struct sparsebit *vpages_valid; - struct sparsebit *vpages_mapped; - bool has_irqchip; - bool pgd_created; - vm_paddr_t ucall_mmio_addr; - vm_paddr_t pgd; - vm_vaddr_t gdt; - vm_vaddr_t tss; - vm_vaddr_t idt; - vm_vaddr_t handlers; - uint32_t dirty_ring_size; - uint64_t gpa_tag_mask; - - struct kvm_vm_arch arch; - - /* Cache of information for binary stats interface */ - int stats_fd; - struct kvm_stats_header stats_header; - struct kvm_stats_desc *stats_desc; - - /* - * KVM region slots. These are the default memslots used by page - * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE] - * memslot. - */ - uint32_t memslots[NR_MEM_REGIONS]; -}; - -struct vcpu_reg_sublist { - const char *name; - long capability; - int feature; - int feature_type; - bool finalize; - __u64 *regs; - __u64 regs_n; - __u64 *rejects_set; - __u64 rejects_set_n; - __u64 *skips_set; - __u64 skips_set_n; -}; - -struct vcpu_reg_list { - char *name; - struct vcpu_reg_sublist sublists[]; -}; - -#define for_each_sublist(c, s) \ - for ((s) = &(c)->sublists[0]; (s)->regs; ++(s)) - -#define kvm_for_each_vcpu(vm, i, vcpu) \ - for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \ - if (!((vcpu) = vm->vcpus[i])) \ - continue; \ - else - -struct userspace_mem_region * -memslot2region(struct kvm_vm *vm, uint32_t memslot); - -static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm, - enum kvm_mem_region_type type) -{ - assert(type < NR_MEM_REGIONS); - return memslot2region(vm, vm->memslots[type]); -} - -/* Minimum allocated guest virtual and physical addresses */ -#define KVM_UTIL_MIN_VADDR 0x2000 -#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 - -#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 -#define DEFAULT_STACK_PGS 5 - -enum vm_guest_mode { - VM_MODE_P52V48_4K, - VM_MODE_P52V48_16K, - VM_MODE_P52V48_64K, - VM_MODE_P48V48_4K, - VM_MODE_P48V48_16K, - VM_MODE_P48V48_64K, - VM_MODE_P40V48_4K, - VM_MODE_P40V48_16K, - VM_MODE_P40V48_64K, - VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ - VM_MODE_P47V64_4K, - VM_MODE_P44V64_4K, - VM_MODE_P36V48_4K, - VM_MODE_P36V48_16K, - VM_MODE_P36V48_64K, - VM_MODE_P36V47_16K, - NUM_VM_MODES, -}; - -struct vm_shape { - uint32_t type; - uint8_t mode; - uint8_t subtype; - uint16_t padding; -}; - -kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t)); - -#define VM_TYPE_DEFAULT 0 - -#define VM_SHAPE(__mode) \ -({ \ - struct vm_shape shape = { \ - .mode = (__mode), \ - .type = VM_TYPE_DEFAULT \ - }; \ - \ - shape; \ -}) - -#if defined(__aarch64__) - -extern enum vm_guest_mode vm_mode_default; - -#define VM_MODE_DEFAULT vm_mode_default -#define MIN_PAGE_SHIFT 12U -#define ptes_per_page(page_size) ((page_size) / 8) - -#elif defined(__x86_64__) - -#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K -#define MIN_PAGE_SHIFT 12U -#define ptes_per_page(page_size) ((page_size) / 8) - -#elif defined(__s390x__) - -#define VM_MODE_DEFAULT VM_MODE_P44V64_4K -#define MIN_PAGE_SHIFT 12U -#define ptes_per_page(page_size) ((page_size) / 16) - -#elif defined(__riscv) - -#if __riscv_xlen == 32 -#error "RISC-V 32-bit kvm selftests not supported" -#endif - -#define VM_MODE_DEFAULT VM_MODE_P40V48_4K -#define MIN_PAGE_SHIFT 12U -#define ptes_per_page(page_size) ((page_size) / 8) - -#endif - -#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT) - -#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) -#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) - -struct vm_guest_mode_params { - unsigned int pa_bits; - unsigned int va_bits; - unsigned int page_size; - unsigned int page_shift; -}; -extern const struct vm_guest_mode_params vm_guest_mode_params[]; - -int open_path_or_exit(const char *path, int flags); -int open_kvm_dev_path_or_exit(void); - -bool get_kvm_param_bool(const char *param); -bool get_kvm_intel_param_bool(const char *param); -bool get_kvm_amd_param_bool(const char *param); - -int get_kvm_param_integer(const char *param); -int get_kvm_intel_param_integer(const char *param); -int get_kvm_amd_param_integer(const char *param); - -unsigned int kvm_check_cap(long cap); - -static inline bool kvm_has_cap(long cap) -{ - return kvm_check_cap(cap); -} - -#define __KVM_SYSCALL_ERROR(_name, _ret) \ - "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) - -/* - * Use the "inner", double-underscore macro when reporting errors from within - * other macros so that the name of ioctl() and not its literal numeric value - * is printed on error. The "outer" macro is strongly preferred when reporting - * errors "directly", i.e. without an additional layer of macros, as it reduces - * the probability of passing in the wrong string. - */ -#define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret) -#define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret) - -#define kvm_do_ioctl(fd, cmd, arg) \ -({ \ - kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \ - ioctl(fd, cmd, arg); \ -}) - -#define __kvm_ioctl(kvm_fd, cmd, arg) \ - kvm_do_ioctl(kvm_fd, cmd, arg) - -#define kvm_ioctl(kvm_fd, cmd, arg) \ -({ \ - int ret = __kvm_ioctl(kvm_fd, cmd, arg); \ - \ - TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \ -}) - -static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { } - -#define __vm_ioctl(vm, cmd, arg) \ -({ \ - static_assert_is_vm(vm); \ - kvm_do_ioctl((vm)->fd, cmd, arg); \ -}) - -/* - * Assert that a VM or vCPU ioctl() succeeded, with extra magic to detect if - * the ioctl() failed because KVM killed/bugged the VM. To detect a dead VM, - * probe KVM_CAP_USER_MEMORY, which (a) has been supported by KVM since before - * selftests existed and (b) should never outright fail, i.e. is supposed to - * return 0 or 1. If KVM kills a VM, KVM returns -EIO for all ioctl()s for the - * VM and its vCPUs, including KVM_CHECK_EXTENSION. - */ -#define __TEST_ASSERT_VM_VCPU_IOCTL(cond, name, ret, vm) \ -do { \ - int __errno = errno; \ - \ - static_assert_is_vm(vm); \ - \ - if (cond) \ - break; \ - \ - if (errno == EIO && \ - __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY) < 0) { \ - TEST_ASSERT(errno == EIO, "KVM killed the VM, should return -EIO"); \ - TEST_FAIL("KVM killed/bugged the VM, check the kernel log for clues"); \ - } \ - errno = __errno; \ - TEST_ASSERT(cond, __KVM_IOCTL_ERROR(name, ret)); \ -} while (0) - -#define TEST_ASSERT_VM_VCPU_IOCTL(cond, cmd, ret, vm) \ - __TEST_ASSERT_VM_VCPU_IOCTL(cond, #cmd, ret, vm) - -#define vm_ioctl(vm, cmd, arg) \ -({ \ - int ret = __vm_ioctl(vm, cmd, arg); \ - \ - __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \ -}) - -static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { } - -#define __vcpu_ioctl(vcpu, cmd, arg) \ -({ \ - static_assert_is_vcpu(vcpu); \ - kvm_do_ioctl((vcpu)->fd, cmd, arg); \ -}) - -#define vcpu_ioctl(vcpu, cmd, arg) \ -({ \ - int ret = __vcpu_ioctl(vcpu, cmd, arg); \ - \ - __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \ -}) - -/* - * Looks up and returns the value corresponding to the capability - * (KVM_CAP_*) given by cap. - */ -static inline int vm_check_cap(struct kvm_vm *vm, long cap) -{ - int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap); - - TEST_ASSERT_VM_VCPU_IOCTL(ret >= 0, KVM_CHECK_EXTENSION, ret, vm); - return ret; -} - -static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) -{ - struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; - - return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); -} -static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) -{ - struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; - - vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); -} - -static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa, - uint64_t size, uint64_t attributes) -{ - struct kvm_memory_attributes attr = { - .attributes = attributes, - .address = gpa, - .size = size, - .flags = 0, - }; - - /* - * KVM_SET_MEMORY_ATTRIBUTES overwrites _all_ attributes. These flows - * need significant enhancements to support multiple attributes. - */ - TEST_ASSERT(!attributes || attributes == KVM_MEMORY_ATTRIBUTE_PRIVATE, - "Update me to support multiple attributes!"); - - vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr); -} - - -static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) -{ - vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); -} - -static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) -{ - vm_set_memory_attributes(vm, gpa, size, 0); -} - -void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size, - bool punch_hole); - -static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) -{ - vm_guest_mem_fallocate(vm, gpa, size, true); -} - -static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa, - uint64_t size) -{ - vm_guest_mem_fallocate(vm, gpa, size, false); -} - -void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); -const char *vm_guest_mode_string(uint32_t i); - -void kvm_vm_free(struct kvm_vm *vmp); -void kvm_vm_restart(struct kvm_vm *vmp); -void kvm_vm_release(struct kvm_vm *vmp); -int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, - size_t len); -void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); -int kvm_memfd_alloc(size_t size, bool hugepages); - -void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); - -static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) -{ - struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot }; - - vm_ioctl(vm, KVM_GET_DIRTY_LOG, &args); -} - -static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, - uint64_t first_page, uint32_t num_pages) -{ - struct kvm_clear_dirty_log args = { - .dirty_bitmap = log, - .slot = slot, - .first_page = first_page, - .num_pages = num_pages - }; - - vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args); -} - -static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) -{ - return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL); -} - -static inline int vm_get_stats_fd(struct kvm_vm *vm) -{ - int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL); - - TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vm); - return fd; -} - -static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header) -{ - ssize_t ret; - - ret = pread(stats_fd, header, sizeof(*header), 0); - TEST_ASSERT(ret == sizeof(*header), - "Failed to read '%lu' header bytes, ret = '%ld'", - sizeof(*header), ret); -} - -struct kvm_stats_desc *read_stats_descriptors(int stats_fd, - struct kvm_stats_header *header); - -static inline ssize_t get_stats_descriptor_size(struct kvm_stats_header *header) -{ - /* - * The base size of the descriptor is defined by KVM's ABI, but the - * size of the name field is variable, as far as KVM's ABI is - * concerned. For a given instance of KVM, the name field is the same - * size for all stats and is provided in the overall stats header. - */ - return sizeof(struct kvm_stats_desc) + header->name_size; -} - -static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc *stats, - int index, - struct kvm_stats_header *header) -{ - /* - * Note, size_desc includes the size of the name field, which is - * variable. i.e. this is NOT equivalent to &stats_desc[i]. - */ - return (void *)stats + index * get_stats_descriptor_size(header); -} - -void read_stat_data(int stats_fd, struct kvm_stats_header *header, - struct kvm_stats_desc *desc, uint64_t *data, - size_t max_elements); - -void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, - size_t max_elements); - -static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) -{ - uint64_t data; - - __vm_get_stat(vm, stat_name, &data, 1); - return data; -} - -void vm_create_irqchip(struct kvm_vm *vm); - -static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, - uint64_t flags) -{ - struct kvm_create_guest_memfd guest_memfd = { - .size = size, - .flags = flags, - }; - - return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd); -} - -static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, - uint64_t flags) -{ - int fd = __vm_create_guest_memfd(vm, size, flags); - - TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd)); - return fd; -} - -void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva); -int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva); -void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva, - uint32_t guest_memfd, uint64_t guest_memfd_offset); -int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, - uint64_t gpa, uint64_t size, void *hva, - uint32_t guest_memfd, uint64_t guest_memfd_offset); - -void vm_userspace_mem_region_add(struct kvm_vm *vm, - enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags); -void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset); - -#ifndef vm_arch_has_protected_memory -static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm) -{ - return false; -} -#endif - -void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); -void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); -void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); -struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); -void vm_populate_vaddr_bitmap(struct kvm_vm *vm); -vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); -vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); -vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type); -vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, - vm_vaddr_t vaddr_min, - enum kvm_mem_region_type type); -vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); -vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, - enum kvm_mem_region_type type); -vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); - -void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, - unsigned int npages); -void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); -void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); -vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); -void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa); - - -static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa) -{ - return gpa & ~vm->gpa_tag_mask; -} - -void vcpu_run(struct kvm_vcpu *vcpu); -int _vcpu_run(struct kvm_vcpu *vcpu); - -static inline int __vcpu_run(struct kvm_vcpu *vcpu) -{ - return __vcpu_ioctl(vcpu, KVM_RUN, NULL); -} - -void vcpu_run_complete_io(struct kvm_vcpu *vcpu); -struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu); - -static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap, - uint64_t arg0) -{ - struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; - - vcpu_ioctl(vcpu, KVM_ENABLE_CAP, &enable_cap); -} - -static inline void vcpu_guest_debug_set(struct kvm_vcpu *vcpu, - struct kvm_guest_debug *debug) -{ - vcpu_ioctl(vcpu, KVM_SET_GUEST_DEBUG, debug); -} - -static inline void vcpu_mp_state_get(struct kvm_vcpu *vcpu, - struct kvm_mp_state *mp_state) -{ - vcpu_ioctl(vcpu, KVM_GET_MP_STATE, mp_state); -} -static inline void vcpu_mp_state_set(struct kvm_vcpu *vcpu, - struct kvm_mp_state *mp_state) -{ - vcpu_ioctl(vcpu, KVM_SET_MP_STATE, mp_state); -} - -static inline void vcpu_regs_get(struct kvm_vcpu *vcpu, struct kvm_regs *regs) -{ - vcpu_ioctl(vcpu, KVM_GET_REGS, regs); -} - -static inline void vcpu_regs_set(struct kvm_vcpu *vcpu, struct kvm_regs *regs) -{ - vcpu_ioctl(vcpu, KVM_SET_REGS, regs); -} -static inline void vcpu_sregs_get(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) -{ - vcpu_ioctl(vcpu, KVM_GET_SREGS, sregs); - -} -static inline void vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) -{ - vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs); -} -static inline int _vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) -{ - return __vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs); -} -static inline void vcpu_fpu_get(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) -{ - vcpu_ioctl(vcpu, KVM_GET_FPU, fpu); -} -static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) -{ - vcpu_ioctl(vcpu, KVM_SET_FPU, fpu); -} - -static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) -{ - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; - - return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); -} -static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) -{ - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; - - return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); -} -static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) -{ - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; - - vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); -} -static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) -{ - struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; - - vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); -} - -#ifdef __KVM_HAVE_VCPU_EVENTS -static inline void vcpu_events_get(struct kvm_vcpu *vcpu, - struct kvm_vcpu_events *events) -{ - vcpu_ioctl(vcpu, KVM_GET_VCPU_EVENTS, events); -} -static inline void vcpu_events_set(struct kvm_vcpu *vcpu, - struct kvm_vcpu_events *events) -{ - vcpu_ioctl(vcpu, KVM_SET_VCPU_EVENTS, events); -} -#endif -#ifdef __x86_64__ -static inline void vcpu_nested_state_get(struct kvm_vcpu *vcpu, - struct kvm_nested_state *state) -{ - vcpu_ioctl(vcpu, KVM_GET_NESTED_STATE, state); -} -static inline int __vcpu_nested_state_set(struct kvm_vcpu *vcpu, - struct kvm_nested_state *state) -{ - return __vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state); -} - -static inline void vcpu_nested_state_set(struct kvm_vcpu *vcpu, - struct kvm_nested_state *state) -{ - vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state); -} -#endif -static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu) -{ - int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL); - - TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm); - return fd; -} - -int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr); - -static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) -{ - int ret = __kvm_has_device_attr(dev_fd, group, attr); - - TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); -} - -int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val); - -static inline void kvm_device_attr_get(int dev_fd, uint32_t group, - uint64_t attr, void *val) -{ - int ret = __kvm_device_attr_get(dev_fd, group, attr, val); - - TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret)); -} - -int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val); - -static inline void kvm_device_attr_set(int dev_fd, uint32_t group, - uint64_t attr, void *val) -{ - int ret = __kvm_device_attr_set(dev_fd, group, attr, val); - - TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret)); -} - -static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr) -{ - return __kvm_has_device_attr(vcpu->fd, group, attr); -} - -static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr) -{ - kvm_has_device_attr(vcpu->fd, group, attr); -} - -static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) -{ - return __kvm_device_attr_get(vcpu->fd, group, attr, val); -} - -static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) -{ - kvm_device_attr_get(vcpu->fd, group, attr, val); -} - -static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) -{ - return __kvm_device_attr_set(vcpu->fd, group, attr, val); -} - -static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, - uint64_t attr, void *val) -{ - kvm_device_attr_set(vcpu->fd, group, attr, val); -} - -int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type); -int __kvm_create_device(struct kvm_vm *vm, uint64_t type); - -static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type) -{ - int fd = __kvm_create_device(vm, type); - - TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_DEVICE, fd)); - return fd; -} - -void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu); - -/* - * VM VCPU Args Set - * - * Input Args: - * vm - Virtual Machine - * num - number of arguments - * ... - arguments, each of type uint64_t - * - * Output Args: None - * - * Return: None - * - * Sets the first @num input parameters for the function at @vcpu's entry point, - * per the C calling convention of the architecture, to the values given as - * variable args. Each of the variable args is expected to be of type uint64_t. - * The maximum @num can be is specific to the architecture. - */ -void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...); - -void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); -int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); - -#define KVM_MAX_IRQ_ROUTES 4096 - -struct kvm_irq_routing *kvm_gsi_routing_create(void); -void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing, - uint32_t gsi, uint32_t pin); -int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); -void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); - -const char *exit_reason_str(unsigned int exit_reason); - -vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, - uint32_t memslot); -vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - vm_paddr_t paddr_min, uint32_t memslot, - bool protected); -vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); - -static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, - vm_paddr_t paddr_min, uint32_t memslot) -{ - /* - * By default, allocate memory as protected for VMs that support - * protected memory, as the majority of memory for such VMs is - * protected, i.e. using shared memory is effectively opt-in. - */ - return __vm_phy_pages_alloc(vm, num, paddr_min, memslot, - vm_arch_has_protected_memory(vm)); -} - -/* - * ____vm_create() does KVM_CREATE_VM and little else. __vm_create() also - * loads the test binary into guest memory and creates an IRQ chip (x86 only). - * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to - * calculate the amount of memory needed for per-vCPU data, e.g. stacks. - */ -struct kvm_vm *____vm_create(struct vm_shape shape); -struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, - uint64_t nr_extra_pages); - -static inline struct kvm_vm *vm_create_barebones(void) -{ - return ____vm_create(VM_SHAPE_DEFAULT); -} - -#ifdef __x86_64__ -static inline struct kvm_vm *vm_create_barebones_protected_vm(void) -{ - const struct vm_shape shape = { - .mode = VM_MODE_DEFAULT, - .type = KVM_X86_SW_PROTECTED_VM, - }; - - return ____vm_create(shape); -} -#endif - -static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) -{ - return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0); -} - -struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, - uint64_t extra_mem_pages, - void *guest_code, struct kvm_vcpu *vcpus[]); - -static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus, - void *guest_code, - struct kvm_vcpu *vcpus[]) -{ - return __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, 0, - guest_code, vcpus); -} - - -struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, - struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, - void *guest_code); - -/* - * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages - * additional pages of guest memory. Returns the VM and vCPU (via out param). - */ -static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, - void *guest_code) -{ - return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu, - extra_mem_pages, guest_code); -} - -static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, - void *guest_code) -{ - return __vm_create_with_one_vcpu(vcpu, 0, guest_code); -} - -static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape, - struct kvm_vcpu **vcpu, - void *guest_code) -{ - return __vm_create_shape_with_one_vcpu(shape, vcpu, 0, guest_code); -} - -struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); - -void kvm_pin_this_task_to_pcpu(uint32_t pcpu); -void kvm_print_vcpu_pinning_help(void); -void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], - int nr_vcpus); - -unsigned long vm_compute_max_gfn(struct kvm_vm *vm); -unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); -unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); -unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages); -static inline unsigned int -vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) -{ - unsigned int n; - n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages)); -#ifdef __s390x__ - /* s390 requires 1M aligned guest sizes */ - n = (n + 255) & ~255; -#endif - return n; -} - -#define sync_global_to_guest(vm, g) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ - memcpy(_p, &(g), sizeof(g)); \ -}) - -#define sync_global_from_guest(vm, g) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ - memcpy(&(g), _p, sizeof(g)); \ -}) - -/* - * Write a global value, but only in the VM's (guest's) domain. Primarily used - * for "globals" that hold per-VM values (VMs always duplicate code and global - * data into their own region of physical memory), but can be used anytime it's - * undesirable to change the host's copy of the global. - */ -#define write_guest_global(vm, g, val) ({ \ - typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ - typeof(g) _val = val; \ - \ - memcpy(_p, &(_val), sizeof(g)); \ -}) - -void assert_on_unhandled_exception(struct kvm_vcpu *vcpu); - -void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, - uint8_t indent); - -static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu, - uint8_t indent) -{ - vcpu_arch_dump(stream, vcpu, indent); -} - -/* - * Adds a vCPU with reasonable defaults (e.g. a stack) - * - * Input Args: - * vm - Virtual Machine - * vcpu_id - The id of the VCPU to add to the VM. - */ -struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); -void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code); - -static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, - void *guest_code) -{ - struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id); - - vcpu_arch_set_entry_point(vcpu, guest_code); - - return vcpu; -} - -/* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */ -struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id); - -static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm, - uint32_t vcpu_id) -{ - return vm_arch_vcpu_recreate(vm, vcpu_id); -} - -void vcpu_arch_free(struct kvm_vcpu *vcpu); - -void virt_arch_pgd_alloc(struct kvm_vm *vm); - -static inline void virt_pgd_alloc(struct kvm_vm *vm) -{ - virt_arch_pgd_alloc(vm); -} - -/* - * VM Virtual Page Map - * - * Input Args: - * vm - Virtual Machine - * vaddr - VM Virtual Address - * paddr - VM Physical Address - * memslot - Memory region slot for new virtual translation tables - * - * Output Args: None - * - * Return: None - * - * Within @vm, creates a virtual translation for the page starting - * at @vaddr to the page starting at @paddr. - */ -void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); - -static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) -{ - virt_arch_pg_map(vm, vaddr, paddr); -} - - -/* - * Address Guest Virtual to Guest Physical - * - * Input Args: - * vm - Virtual Machine - * gva - VM virtual address - * - * Output Args: None - * - * Return: - * Equivalent VM physical address - * - * Returns the VM physical address of the translated VM virtual - * address given by @gva. - */ -vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); - -static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) -{ - return addr_arch_gva2gpa(vm, gva); -} - -/* - * Virtual Translation Tables Dump - * - * Input Args: - * stream - Output FILE stream - * vm - Virtual Machine - * indent - Left margin indent amount - * - * Output Args: None - * - * Return: None - * - * Dumps to the FILE stream given by @stream, the contents of all the - * virtual translation tables for the VM given by @vm. - */ -void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); - -static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) -{ - virt_arch_dump(stream, vm, indent); -} - - -static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) -{ - return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0); -} - -/* - * Arch hook that is invoked via a constructor, i.e. before exeucting main(), - * to allow for arch-specific setup that is common to all tests, e.g. computing - * the default guest "mode". - */ -void kvm_selftest_arch_init(void); - -void kvm_arch_vm_post_create(struct kvm_vm *vm); - -bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr); - -uint32_t guest_get_vcpuid(void); - -#endif /* SELFTEST_KVM_UTIL_BASE_H */ diff --git a/tools/testing/selftests/kvm/include/kvm_util_types.h b/tools/testing/selftests/kvm/include/kvm_util_types.h new file mode 100644 index 0000000000..ec787b97cf --- /dev/null +++ b/tools/testing/selftests/kvm/include/kvm_util_types.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UTIL_TYPES_H +#define SELFTEST_KVM_UTIL_TYPES_H + +/* + * Provide a version of static_assert() that is guaranteed to have an optional + * message param. _GNU_SOURCE is defined for all KVM selftests, _GNU_SOURCE + * implies _ISOC11_SOURCE, and if _ISOC11_SOURCE is defined, glibc #undefs and + * #defines static_assert() as a direct alias to _Static_assert() (see + * usr/include/assert.h). Define a custom macro instead of redefining + * static_assert() to avoid creating non-deterministic behavior that is + * dependent on include order. + */ +#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg) +#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr) + +typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ +typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ + +#endif /* SELFTEST_KVM_UTIL_TYPES_H */ diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h index ce4e603050..9071eb6dea 100644 --- a/tools/testing/selftests/kvm/include/memstress.h +++ b/tools/testing/selftests/kvm/include/memstress.h @@ -62,7 +62,6 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, void memstress_destroy_vm(struct kvm_vm *vm); void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent); -void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed); void memstress_set_random_access(struct kvm_vm *vm, bool random_access); void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vcpu_args *)); diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index ce473fe251..5f38916633 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -50,6 +50,16 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype, bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext); +static inline bool __vcpu_has_isa_ext(struct kvm_vcpu *vcpu, uint64_t isa_ext) +{ + return __vcpu_has_ext(vcpu, RISCV_ISA_EXT_REG(isa_ext)); +} + +static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext) +{ + return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext)); +} + struct ex_regs { unsigned long ra; unsigned long sp; @@ -154,45 +164,6 @@ void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handle #define PGTBL_PAGE_SIZE PGTBL_L0_BLOCK_SIZE #define PGTBL_PAGE_SIZE_SHIFT PGTBL_L0_BLOCK_SHIFT -/* SBI return error codes */ -#define SBI_SUCCESS 0 -#define SBI_ERR_FAILURE -1 -#define SBI_ERR_NOT_SUPPORTED -2 -#define SBI_ERR_INVALID_PARAM -3 -#define SBI_ERR_DENIED -4 -#define SBI_ERR_INVALID_ADDRESS -5 -#define SBI_ERR_ALREADY_AVAILABLE -6 -#define SBI_ERR_ALREADY_STARTED -7 -#define SBI_ERR_ALREADY_STOPPED -8 - -#define SBI_EXT_EXPERIMENTAL_START 0x08000000 -#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF - -#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END -#define KVM_RISCV_SELFTESTS_SBI_UCALL 0 -#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1 - -enum sbi_ext_id { - SBI_EXT_BASE = 0x10, - SBI_EXT_STA = 0x535441, -}; - -enum sbi_ext_base_fid { - SBI_EXT_BASE_PROBE_EXT = 3, -}; - -struct sbiret { - long error; - long value; -}; - -struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, - unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4, - unsigned long arg5); - -bool guest_sbi_probe_extension(int extid, long *out_val); - static inline void local_irq_enable(void) { csr_set(CSR_SSTATUS, SR_SIE); diff --git a/tools/testing/selftests/kvm/include/riscv/sbi.h b/tools/testing/selftests/kvm/include/riscv/sbi.h new file mode 100644 index 0000000000..046b432ae8 --- /dev/null +++ b/tools/testing/selftests/kvm/include/riscv/sbi.h @@ -0,0 +1,141 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * RISC-V SBI specific definitions + * + * Copyright (C) 2024 Rivos Inc. + */ + +#ifndef SELFTEST_KVM_SBI_H +#define SELFTEST_KVM_SBI_H + +/* SBI spec version fields */ +#define SBI_SPEC_VERSION_DEFAULT 0x1 +#define SBI_SPEC_VERSION_MAJOR_SHIFT 24 +#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f +#define SBI_SPEC_VERSION_MINOR_MASK 0xffffff + +/* SBI return error codes */ +#define SBI_SUCCESS 0 +#define SBI_ERR_FAILURE -1 +#define SBI_ERR_NOT_SUPPORTED -2 +#define SBI_ERR_INVALID_PARAM -3 +#define SBI_ERR_DENIED -4 +#define SBI_ERR_INVALID_ADDRESS -5 +#define SBI_ERR_ALREADY_AVAILABLE -6 +#define SBI_ERR_ALREADY_STARTED -7 +#define SBI_ERR_ALREADY_STOPPED -8 + +#define SBI_EXT_EXPERIMENTAL_START 0x08000000 +#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF + +#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END +#define KVM_RISCV_SELFTESTS_SBI_UCALL 0 +#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1 + +enum sbi_ext_id { + SBI_EXT_BASE = 0x10, + SBI_EXT_STA = 0x535441, + SBI_EXT_PMU = 0x504D55, +}; + +enum sbi_ext_base_fid { + SBI_EXT_BASE_GET_SPEC_VERSION = 0, + SBI_EXT_BASE_GET_IMP_ID, + SBI_EXT_BASE_GET_IMP_VERSION, + SBI_EXT_BASE_PROBE_EXT = 3, +}; +enum sbi_ext_pmu_fid { + SBI_EXT_PMU_NUM_COUNTERS = 0, + SBI_EXT_PMU_COUNTER_GET_INFO, + SBI_EXT_PMU_COUNTER_CFG_MATCH, + SBI_EXT_PMU_COUNTER_START, + SBI_EXT_PMU_COUNTER_STOP, + SBI_EXT_PMU_COUNTER_FW_READ, + SBI_EXT_PMU_COUNTER_FW_READ_HI, + SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, +}; + +union sbi_pmu_ctr_info { + unsigned long value; + struct { + unsigned long csr:12; + unsigned long width:6; +#if __riscv_xlen == 32 + unsigned long reserved:13; +#else + unsigned long reserved:45; +#endif + unsigned long type:1; + }; +}; + +struct riscv_pmu_snapshot_data { + u64 ctr_overflow_mask; + u64 ctr_values[64]; + u64 reserved[447]; +}; + +struct sbiret { + long error; + long value; +}; + +/** General pmu event codes specified in SBI PMU extension */ +enum sbi_pmu_hw_generic_events_t { + SBI_PMU_HW_NO_EVENT = 0, + SBI_PMU_HW_CPU_CYCLES = 1, + SBI_PMU_HW_INSTRUCTIONS = 2, + SBI_PMU_HW_CACHE_REFERENCES = 3, + SBI_PMU_HW_CACHE_MISSES = 4, + SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5, + SBI_PMU_HW_BRANCH_MISSES = 6, + SBI_PMU_HW_BUS_CYCLES = 7, + SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8, + SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9, + SBI_PMU_HW_REF_CPU_CYCLES = 10, + + SBI_PMU_HW_GENERAL_MAX, +}; + +/* SBI PMU counter types */ +enum sbi_pmu_ctr_type { + SBI_PMU_CTR_TYPE_HW = 0x0, + SBI_PMU_CTR_TYPE_FW, +}; + +/* Flags defined for config matching function */ +#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0) +#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1) +#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2) +#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3) +#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4) +#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5) +#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6) +#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7) + +/* Flags defined for counter start function */ +#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0) +#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1) + +/* Flags defined for counter stop function */ +#define SBI_PMU_STOP_FLAG_RESET BIT(0) +#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1) + +struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, + unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5); + +bool guest_sbi_probe_extension(int extid, long *out_val); + +/* Make SBI version */ +static inline unsigned long sbi_mk_version(unsigned long major, + unsigned long minor) +{ + return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT) + | (minor & SBI_SPEC_VERSION_MINOR_MASK); +} + +unsigned long get_host_sbi_spec_version(void); + +#endif /* SELFTEST_KVM_SBI_H */ diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h index be46eb32ec..a695ae36f3 100644 --- a/tools/testing/selftests/kvm/include/riscv/ucall.h +++ b/tools/testing/selftests/kvm/include/riscv/ucall.h @@ -3,6 +3,7 @@ #define SELFTEST_KVM_UCALL_H #include "processor.h" +#include "sbi.h" #define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390x/ucall.h index b231bf2e49..8035a872a3 100644 --- a/tools/testing/selftests/kvm/include/s390x/ucall.h +++ b/tools/testing/selftests/kvm/include/s390x/ucall.h @@ -2,7 +2,7 @@ #ifndef SELFTEST_KVM_UCALL_H #define SELFTEST_KVM_UCALL_H -#include "kvm_util_base.h" +#include "kvm_util.h" #define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 8a6e30612c..3e47305884 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -91,9 +91,28 @@ struct guest_random_state { uint32_t seed; }; +extern uint32_t guest_random_seed; +extern struct guest_random_state guest_rng; + struct guest_random_state new_guest_random_state(uint32_t seed); uint32_t guest_random_u32(struct guest_random_state *state); +static inline bool __guest_random_bool(struct guest_random_state *state, + uint8_t percent) +{ + return (guest_random_u32(state) % 100) < percent; +} + +static inline bool guest_random_bool(struct guest_random_state *state) +{ + return __guest_random_bool(state, 50); +} + +static inline uint64_t guest_random_u64(struct guest_random_state *state) +{ + return ((uint64_t)guest_random_u32(state) << 32) | guest_random_u32(state); +} + enum vm_mem_backing_src_type { VM_MEM_SRC_ANONYMOUS, VM_MEM_SRC_ANONYMOUS_THP, diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h index 877449c345..60f7f9d435 100644 --- a/tools/testing/selftests/kvm/include/userfaultfd_util.h +++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h @@ -5,9 +5,6 @@ * Copyright (C) 2018, Red Hat, Inc. * Copyright (C) 2019-2022 Google LLC */ - -#define _GNU_SOURCE /* for pipe2 */ - #include <inttypes.h> #include <time.h> #include <pthread.h> @@ -17,17 +14,27 @@ typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg); -struct uffd_desc { +struct uffd_reader_args { int uffd_mode; int uffd; - int pipefds[2]; useconds_t delay; uffd_handler_t handler; - pthread_t thread; + /* Holds the read end of the pipe for killing the reader. */ + int pipe; +}; + +struct uffd_desc { + int uffd; + uint64_t num_readers; + /* Holds the write ends of the pipes for killing the readers. */ + int *pipefds; + pthread_t *readers; + struct uffd_reader_args *reader_args; }; struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, void *hva, uint64_t len, + uint64_t num_readers, uffd_handler_t handler); void uffd_stop_demand_paging(struct uffd_desc *uffd); diff --git a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h index 9f1725192a..972bb1c4ab 100644 --- a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h +++ b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h @@ -5,7 +5,16 @@ #include <stdbool.h> #include <stdint.h> +#include "kvm_util_types.h" +#include "test_util.h" + +extern bool is_forced_emulation_enabled; + struct kvm_vm_arch { + vm_vaddr_t gdt; + vm_vaddr_t tss; + vm_vaddr_t idt; + uint64_t c_bit; uint64_t s_bit; int sev_fd; @@ -20,4 +29,23 @@ static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch) #define vm_arch_has_protected_memory(vm) \ __vm_arch_has_protected_memory(&(vm)->arch) +#define vcpu_arch_put_guest(mem, __val) \ +do { \ + const typeof(mem) val = (__val); \ + \ + if (!is_forced_emulation_enabled || guest_random_bool(&guest_rng)) { \ + (mem) = val; \ + } else if (guest_random_bool(&guest_rng)) { \ + __asm__ __volatile__(KVM_FEP "mov %1, %0" \ + : "+m" (mem) \ + : "r" (val) : "memory"); \ + } else { \ + uint64_t __old = READ_ONCE(mem); \ + \ + __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]" \ + : [ptr] "+m" (mem), [old] "+a" (__old) \ + : [new]"r" (val) : "memory", "cc"); \ + } \ +} while (0) + #endif // SELFTEST_KVM_UTIL_ARCH_H diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 81ce37ec40..c0c7c1fe93 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -18,17 +18,12 @@ #include <linux/kvm_para.h> #include <linux/stringify.h> -#include "../kvm_util.h" +#include "kvm_util.h" +#include "ucall_common.h" extern bool host_cpu_is_intel; extern bool host_cpu_is_amd; -enum vm_guest_x86_subtype { - VM_SUBTYPE_NONE = 0, - VM_SUBTYPE_SEV, - VM_SUBTYPE_SEV_ES, -}; - /* Forced emulation prefix, used to invoke the emulator unconditionally. */ #define KVM_FEP "ud2; .byte 'k', 'v', 'm';" @@ -282,6 +277,7 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31) #define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7) #define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15) +#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23) #define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5) #define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11) @@ -1139,8 +1135,6 @@ struct idt_entry { uint32_t offset2; uint32_t reserved; }; -void vm_init_descriptor_tables(struct kvm_vm *vm); -void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86_64/sev.h index 8a1bf88474..82c11c81a9 100644 --- a/tools/testing/selftests/kvm/include/x86_64/sev.h +++ b/tools/testing/selftests/kvm/include/x86_64/sev.h @@ -31,8 +31,9 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy); void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement); void sev_vm_launch_finish(struct kvm_vm *vm); -struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code, +struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, struct kvm_vcpu **cpu); +void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement); kvm_static_assert(SEV_RET_SUCCESS == 0); @@ -67,20 +68,8 @@ kvm_static_assert(SEV_RET_SUCCESS == 0); __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \ }) -static inline void sev_vm_init(struct kvm_vm *vm) -{ - vm->arch.sev_fd = open_sev_dev_path_or_exit(); - - vm_sev_ioctl(vm, KVM_SEV_INIT, NULL); -} - - -static inline void sev_es_vm_init(struct kvm_vm *vm) -{ - vm->arch.sev_fd = open_sev_dev_path_or_exit(); - - vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL); -} +void sev_vm_init(struct kvm_vm *vm); +void sev_es_vm_init(struct kvm_vm *vm); static inline void sev_register_encrypted_memory(struct kvm_vm *vm, struct userspace_mem_region *region) diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86_64/ucall.h index 06b244bd06..d3825dcc3c 100644 --- a/tools/testing/selftests/kvm/include/x86_64/ucall.h +++ b/tools/testing/selftests/kvm/include/x86_64/ucall.h @@ -2,7 +2,7 @@ #ifndef SELFTEST_KVM_UCALL_H #define SELFTEST_KVM_UCALL_H -#include "kvm_util_base.h" +#include "kvm_util.h" #define UCALL_EXIT_REASON KVM_EXIT_IO diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c index 698c1cfa31..f02355c3c4 100644 --- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c +++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c @@ -6,8 +6,6 @@ * * Test the fd-based interface for KVM statistics. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index b9e23265e4..c78f34699f 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -6,8 +6,6 @@ * * Test for KVM_CAP_MAX_VCPUS and KVM_CAP_MAX_VCPU_ID. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index e0ba97ac1c..dd8b12f626 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -8,9 +8,6 @@ * page size have been pre-allocated on your system, if you are planning to * use hugepages to back the guest memory for testing. */ - -#define _GNU_SOURCE /* for program_invocation_name */ - #include <stdio.h> #include <stdlib.h> #include <time.h> @@ -21,6 +18,7 @@ #include "kvm_util.h" #include "processor.h" #include "guest_modes.h" +#include "ucall_common.h" #define TEST_MEM_SLOT_INDEX 1 diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c index 55668631d5..7abbf88665 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/gic.c @@ -17,13 +17,12 @@ static const struct gic_common_ops *gic_common_ops; static struct spinlock gic_lock; -static void gic_cpu_init(unsigned int cpu, void *redist_base) +static void gic_cpu_init(unsigned int cpu) { - gic_common_ops->gic_cpu_init(cpu, redist_base); + gic_common_ops->gic_cpu_init(cpu); } -static void -gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base) +static void gic_dist_init(enum gic_type type, unsigned int nr_cpus) { const struct gic_common_ops *gic_ops = NULL; @@ -40,7 +39,7 @@ gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base) GUEST_ASSERT(gic_ops); - gic_ops->gic_init(nr_cpus, dist_base); + gic_ops->gic_init(nr_cpus); gic_common_ops = gic_ops; /* Make sure that the initialized data is visible to all the vCPUs */ @@ -49,18 +48,15 @@ gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base) spin_unlock(&gic_lock); } -void gic_init(enum gic_type type, unsigned int nr_cpus, - void *dist_base, void *redist_base) +void gic_init(enum gic_type type, unsigned int nr_cpus) { uint32_t cpu = guest_get_vcpuid(); GUEST_ASSERT(type < GIC_TYPE_MAX); - GUEST_ASSERT(dist_base); - GUEST_ASSERT(redist_base); GUEST_ASSERT(nr_cpus); - gic_dist_init(type, nr_cpus, dist_base); - gic_cpu_init(cpu, redist_base); + gic_dist_init(type, nr_cpus); + gic_cpu_init(cpu); } void gic_irq_enable(unsigned int intid) diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h index 75d07313c8..d24e9ecc96 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h +++ b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h @@ -8,8 +8,8 @@ #define SELFTEST_KVM_GIC_PRIVATE_H struct gic_common_ops { - void (*gic_init)(unsigned int nr_cpus, void *dist_base); - void (*gic_cpu_init)(unsigned int cpu, void *redist_base); + void (*gic_init)(unsigned int nr_cpus); + void (*gic_cpu_init)(unsigned int cpu); void (*gic_irq_enable)(unsigned int intid); void (*gic_irq_disable)(unsigned int intid); uint64_t (*gic_read_iar)(void); diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c index 263bf3ed8f..66d05506f7 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c +++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c @@ -9,12 +9,21 @@ #include "processor.h" #include "delay.h" +#include "gic.h" #include "gic_v3.h" #include "gic_private.h" +#define GICV3_MAX_CPUS 512 + +#define GICD_INT_DEF_PRI 0xa0 +#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\ + (GICD_INT_DEF_PRI << 16) |\ + (GICD_INT_DEF_PRI << 8) |\ + GICD_INT_DEF_PRI) + +#define ICC_PMR_DEF_PRIO 0xf0 + struct gicv3_data { - void *dist_base; - void *redist_base[GICV3_MAX_CPUS]; unsigned int nr_cpus; unsigned int nr_spis; }; @@ -35,17 +44,23 @@ static void gicv3_gicd_wait_for_rwp(void) { unsigned int count = 100000; /* 1s */ - while (readl(gicv3_data.dist_base + GICD_CTLR) & GICD_CTLR_RWP) { + while (readl(GICD_BASE_GVA + GICD_CTLR) & GICD_CTLR_RWP) { GUEST_ASSERT(count--); udelay(10); } } -static void gicv3_gicr_wait_for_rwp(void *redist_base) +static inline volatile void *gicr_base_cpu(uint32_t cpu) +{ + /* Align all the redistributors sequentially */ + return GICR_BASE_GVA + cpu * SZ_64K * 2; +} + +static void gicv3_gicr_wait_for_rwp(uint32_t cpu) { unsigned int count = 100000; /* 1s */ - while (readl(redist_base + GICR_CTLR) & GICR_CTLR_RWP) { + while (readl(gicr_base_cpu(cpu) + GICR_CTLR) & GICR_CTLR_RWP) { GUEST_ASSERT(count--); udelay(10); } @@ -56,7 +71,7 @@ static void gicv3_wait_for_rwp(uint32_t cpu_or_dist) if (cpu_or_dist & DIST_BIT) gicv3_gicd_wait_for_rwp(); else - gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu_or_dist]); + gicv3_gicr_wait_for_rwp(cpu_or_dist); } static enum gicv3_intid_range get_intid_range(unsigned int intid) @@ -116,15 +131,15 @@ static void gicv3_set_eoi_split(bool split) uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset) { - void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base - : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]); + volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA + : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); return readl(base + offset); } void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val) { - void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base - : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]); + volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA + : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist)); writel(reg_val, base + offset); } @@ -263,7 +278,7 @@ static bool gicv3_irq_get_pending(uint32_t intid) return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1); } -static void gicv3_enable_redist(void *redist_base) +static void gicv3_enable_redist(volatile void *redist_base) { uint32_t val = readl(redist_base + GICR_WAKER); unsigned int count = 100000; /* 1s */ @@ -278,21 +293,15 @@ static void gicv3_enable_redist(void *redist_base) } } -static inline void *gicr_base_cpu(void *redist_base, uint32_t cpu) +static void gicv3_cpu_init(unsigned int cpu) { - /* Align all the redistributors sequentially */ - return redist_base + cpu * SZ_64K * 2; -} - -static void gicv3_cpu_init(unsigned int cpu, void *redist_base) -{ - void *sgi_base; + volatile void *sgi_base; unsigned int i; - void *redist_base_cpu; + volatile void *redist_base_cpu; GUEST_ASSERT(cpu < gicv3_data.nr_cpus); - redist_base_cpu = gicr_base_cpu(redist_base, cpu); + redist_base_cpu = gicr_base_cpu(cpu); sgi_base = sgi_base_from_redist(redist_base_cpu); gicv3_enable_redist(redist_base_cpu); @@ -310,7 +319,7 @@ static void gicv3_cpu_init(unsigned int cpu, void *redist_base) writel(GICD_INT_DEF_PRI_X4, sgi_base + GICR_IPRIORITYR0 + i); - gicv3_gicr_wait_for_rwp(redist_base_cpu); + gicv3_gicr_wait_for_rwp(cpu); /* Enable the GIC system register (ICC_*) access */ write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE, @@ -320,18 +329,15 @@ static void gicv3_cpu_init(unsigned int cpu, void *redist_base) write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1); /* Enable non-secure Group-1 interrupts */ - write_sysreg_s(ICC_IGRPEN1_EL1_ENABLE, SYS_ICC_GRPEN1_EL1); - - gicv3_data.redist_base[cpu] = redist_base_cpu; + write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1); } static void gicv3_dist_init(void) { - void *dist_base = gicv3_data.dist_base; unsigned int i; /* Disable the distributor until we set things up */ - writel(0, dist_base + GICD_CTLR); + writel(0, GICD_BASE_GVA + GICD_CTLR); gicv3_gicd_wait_for_rwp(); /* @@ -339,33 +345,32 @@ static void gicv3_dist_init(void) * Also, deactivate and disable them. */ for (i = 32; i < gicv3_data.nr_spis; i += 32) { - writel(~0, dist_base + GICD_IGROUPR + i / 8); - writel(~0, dist_base + GICD_ICACTIVER + i / 8); - writel(~0, dist_base + GICD_ICENABLER + i / 8); + writel(~0, GICD_BASE_GVA + GICD_IGROUPR + i / 8); + writel(~0, GICD_BASE_GVA + GICD_ICACTIVER + i / 8); + writel(~0, GICD_BASE_GVA + GICD_ICENABLER + i / 8); } /* Set a default priority for all the SPIs */ for (i = 32; i < gicv3_data.nr_spis; i += 4) writel(GICD_INT_DEF_PRI_X4, - dist_base + GICD_IPRIORITYR + i); + GICD_BASE_GVA + GICD_IPRIORITYR + i); /* Wait for the settings to sync-in */ gicv3_gicd_wait_for_rwp(); /* Finally, enable the distributor globally with ARE */ writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | - GICD_CTLR_ENABLE_G1, dist_base + GICD_CTLR); + GICD_CTLR_ENABLE_G1, GICD_BASE_GVA + GICD_CTLR); gicv3_gicd_wait_for_rwp(); } -static void gicv3_init(unsigned int nr_cpus, void *dist_base) +static void gicv3_init(unsigned int nr_cpus) { GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS); gicv3_data.nr_cpus = nr_cpus; - gicv3_data.dist_base = dist_base; gicv3_data.nr_spis = GICD_TYPER_SPIS( - readl(gicv3_data.dist_base + GICD_TYPER)); + readl(GICD_BASE_GVA + GICD_TYPER)); if (gicv3_data.nr_spis > 1020) gicv3_data.nr_spis = 1020; @@ -396,3 +401,27 @@ const struct gic_common_ops gicv3_ops = { .gic_irq_get_pending = gicv3_irq_get_pending, .gic_irq_set_config = gicv3_irq_set_config, }; + +void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size, + vm_paddr_t pend_table) +{ + volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid()); + + u32 ctlr; + u64 val; + + val = (cfg_table | + GICR_PROPBASER_InnerShareable | + GICR_PROPBASER_RaWaWb | + ((ilog2(cfg_table_size) - 1) & GICR_PROPBASER_IDBITS_MASK)); + writeq_relaxed(val, rdist_base + GICR_PROPBASER); + + val = (pend_table | + GICR_PENDBASER_InnerShareable | + GICR_PENDBASER_RaWaWb); + writeq_relaxed(val, rdist_base + GICR_PENDBASER); + + ctlr = readl_relaxed(rdist_base + GICR_CTLR); + ctlr |= GICR_CTLR_ENABLE_LPIS; + writel_relaxed(ctlr, rdist_base + GICR_CTLR); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c new file mode 100644 index 0000000000..09f2705456 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Guest ITS library, generously donated by drivers/irqchip/irq-gic-v3-its.c + * over in the kernel tree. + */ + +#include <linux/kvm.h> +#include <linux/sizes.h> +#include <asm/kvm_para.h> +#include <asm/kvm.h> + +#include "kvm_util.h" +#include "vgic.h" +#include "gic.h" +#include "gic_v3.h" +#include "processor.h" + +static u64 its_read_u64(unsigned long offset) +{ + return readq_relaxed(GITS_BASE_GVA + offset); +} + +static void its_write_u64(unsigned long offset, u64 val) +{ + writeq_relaxed(val, GITS_BASE_GVA + offset); +} + +static u32 its_read_u32(unsigned long offset) +{ + return readl_relaxed(GITS_BASE_GVA + offset); +} + +static void its_write_u32(unsigned long offset, u32 val) +{ + writel_relaxed(val, GITS_BASE_GVA + offset); +} + +static unsigned long its_find_baser(unsigned int type) +{ + int i; + + for (i = 0; i < GITS_BASER_NR_REGS; i++) { + u64 baser; + unsigned long offset = GITS_BASER + (i * sizeof(baser)); + + baser = its_read_u64(offset); + if (GITS_BASER_TYPE(baser) == type) + return offset; + } + + GUEST_FAIL("Couldn't find an ITS BASER of type %u", type); + return -1; +} + +static void its_install_table(unsigned int type, vm_paddr_t base, size_t size) +{ + unsigned long offset = its_find_baser(type); + u64 baser; + + baser = ((size / SZ_64K) - 1) | + GITS_BASER_PAGE_SIZE_64K | + GITS_BASER_InnerShareable | + base | + GITS_BASER_RaWaWb | + GITS_BASER_VALID; + + its_write_u64(offset, baser); +} + +static void its_install_cmdq(vm_paddr_t base, size_t size) +{ + u64 cbaser; + + cbaser = ((size / SZ_4K) - 1) | + GITS_CBASER_InnerShareable | + base | + GITS_CBASER_RaWaWb | + GITS_CBASER_VALID; + + its_write_u64(GITS_CBASER, cbaser); +} + +void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz, + vm_paddr_t device_tbl, size_t device_tbl_sz, + vm_paddr_t cmdq, size_t cmdq_size) +{ + u32 ctlr; + + its_install_table(GITS_BASER_TYPE_COLLECTION, coll_tbl, coll_tbl_sz); + its_install_table(GITS_BASER_TYPE_DEVICE, device_tbl, device_tbl_sz); + its_install_cmdq(cmdq, cmdq_size); + + ctlr = its_read_u32(GITS_CTLR); + ctlr |= GITS_CTLR_ENABLE; + its_write_u32(GITS_CTLR, ctlr); +} + +struct its_cmd_block { + union { + u64 raw_cmd[4]; + __le64 raw_cmd_le[4]; + }; +}; + +static inline void its_fixup_cmd(struct its_cmd_block *cmd) +{ + /* Let's fixup BE commands */ + cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]); + cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]); + cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]); + cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]); +} + +static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l) +{ + u64 mask = GENMASK_ULL(h, l); + *raw_cmd &= ~mask; + *raw_cmd |= (val << l) & mask; +} + +static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr) +{ + its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0); +} + +static void its_encode_devid(struct its_cmd_block *cmd, u32 devid) +{ + its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32); +} + +static void its_encode_event_id(struct its_cmd_block *cmd, u32 id) +{ + its_mask_encode(&cmd->raw_cmd[1], id, 31, 0); +} + +static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id) +{ + its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32); +} + +static void its_encode_size(struct its_cmd_block *cmd, u8 size) +{ + its_mask_encode(&cmd->raw_cmd[1], size, 4, 0); +} + +static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr) +{ + its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8); +} + +static void its_encode_valid(struct its_cmd_block *cmd, int valid) +{ + its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63); +} + +static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr) +{ + its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16); +} + +static void its_encode_collection(struct its_cmd_block *cmd, u16 col) +{ + its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); +} + +#define GITS_CMDQ_POLL_ITERATIONS 0 + +static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd) +{ + u64 cwriter = its_read_u64(GITS_CWRITER); + struct its_cmd_block *dst = cmdq_base + cwriter; + u64 cbaser = its_read_u64(GITS_CBASER); + size_t cmdq_size; + u64 next; + int i; + + cmdq_size = ((cbaser & 0xFF) + 1) * SZ_4K; + + its_fixup_cmd(cmd); + + WRITE_ONCE(*dst, *cmd); + dsb(ishst); + next = (cwriter + sizeof(*cmd)) % cmdq_size; + its_write_u64(GITS_CWRITER, next); + + /* + * Polling isn't necessary considering KVM's ITS emulation at the time + * of writing this, as the CMDQ is processed synchronously after a write + * to CWRITER. + */ + for (i = 0; its_read_u64(GITS_CREADR) != next; i++) { + __GUEST_ASSERT(i < GITS_CMDQ_POLL_ITERATIONS, + "ITS didn't process command at offset %lu after %d iterations\n", + cwriter, i); + + cpu_relax(); + } +} + +void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base, + size_t itt_size, bool valid) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_MAPD); + its_encode_devid(&cmd, device_id); + its_encode_size(&cmd, ilog2(itt_size) - 1); + its_encode_itt(&cmd, itt_base); + its_encode_valid(&cmd, valid); + + its_send_cmd(cmdq_base, &cmd); +} + +void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_MAPC); + its_encode_collection(&cmd, collection_id); + its_encode_target(&cmd, vcpu_id); + its_encode_valid(&cmd, valid); + + its_send_cmd(cmdq_base, &cmd); +} + +void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id, + u32 collection_id, u32 intid) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_MAPTI); + its_encode_devid(&cmd, device_id); + its_encode_event_id(&cmd, event_id); + its_encode_phys_id(&cmd, intid); + its_encode_collection(&cmd, collection_id); + + its_send_cmd(cmdq_base, &cmd); +} + +void its_send_invall_cmd(void *cmdq_base, u32 collection_id) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_INVALL); + its_encode_collection(&cmd, collection_id); + + its_send_cmd(cmdq_base, &cmd); +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index a9eb17295b..0ac7cc89f3 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -11,6 +11,8 @@ #include "guest_modes.h" #include "kvm_util.h" #include "processor.h" +#include "ucall_common.h" + #include <linux/bitfield.h> #include <linux/sizes.h> diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c index 184378d593..4427f43f73 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c @@ -3,8 +3,10 @@ * ARM Generic Interrupt Controller (GIC) v3 host support */ +#include <linux/kernel.h> #include <linux/kvm.h> #include <linux/sizes.h> +#include <asm/cputype.h> #include <asm/kvm_para.h> #include <asm/kvm.h> @@ -19,8 +21,6 @@ * Input args: * vm - KVM VM * nr_vcpus - Number of vCPUs supported by this VM - * gicd_base_gpa - Guest Physical Address of the Distributor region - * gicr_base_gpa - Guest Physical Address of the Redistributor region * * Output args: None * @@ -30,11 +30,10 @@ * redistributor regions of the guest. Since it depends on the number of * vCPUs for the VM, it must be called after all the vCPUs have been created. */ -int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, - uint64_t gicd_base_gpa, uint64_t gicr_base_gpa) +int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs) { int gic_fd; - uint64_t redist_attr; + uint64_t attr; struct list_head *iter; unsigned int nr_gic_pages, nr_vcpus_created = 0; @@ -60,18 +59,19 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + attr = GICD_BASE_GPA; kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa); + KVM_VGIC_V3_ADDR_TYPE_DIST, &attr); nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE); - virt_map(vm, gicd_base_gpa, gicd_base_gpa, nr_gic_pages); + virt_map(vm, GICD_BASE_GPA, GICD_BASE_GPA, nr_gic_pages); /* Redistributor setup */ - redist_attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, gicr_base_gpa, 0, 0); + attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, GICR_BASE_GPA, 0, 0); kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &redist_attr); + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &attr); nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_REDIST_SIZE * nr_vcpus); - virt_map(vm, gicr_base_gpa, gicr_base_gpa, nr_gic_pages); + virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages); kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); @@ -168,3 +168,21 @@ void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) { vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER); } + +int vgic_its_setup(struct kvm_vm *vm) +{ + int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS); + u64 attr; + + attr = GITS_BASE_GPA; + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, &attr); + + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + virt_map(vm, GITS_BASE_GPA, GITS_BASE_GPA, + vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_ITS_SIZE)); + + return its_fd; +} diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index 2bd25b191d..b49690658c 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -4,9 +4,6 @@ * * Copyright (C) 2018, Google LLC. */ - -#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/ - #include "test_util.h" #include <execinfo.h> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index b2262b5fad..ad00e47618 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -4,11 +4,10 @@ * * Copyright (C) 2018, Google LLC. */ - -#define _GNU_SOURCE /* for program_invocation_name */ #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "ucall_common.h" #include <assert.h> #include <sched.h> @@ -20,6 +19,9 @@ #define KVM_UTIL_MIN_PFN 2 +uint32_t guest_random_seed; +struct guest_random_state guest_rng; + static int vcpu_mmap_sz(void); int open_path_or_exit(const char *path, int flags) @@ -276,7 +278,6 @@ struct kvm_vm *____vm_create(struct vm_shape shape) vm->mode = shape.mode; vm->type = shape.type; - vm->subtype = shape.subtype; vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits; vm->va_bits = vm_guest_mode_params[vm->mode].va_bits; @@ -433,6 +434,10 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, slot0 = memslot2region(vm, 0); ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); + pr_info("Random seed: 0x%x\n", guest_random_seed); + guest_rng = new_guest_random_state(guest_random_seed); + sync_global_to_guest(vm, guest_rng); + kvm_arch_vm_post_create(vm); return vm; @@ -930,6 +935,10 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, errno, strerror(errno)); } +#define TEST_REQUIRE_SET_USER_MEMORY_REGION2() \ + __TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \ + "KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)") + int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, uint64_t gpa, uint64_t size, void *hva, uint32_t guest_memfd, uint64_t guest_memfd_offset) @@ -944,6 +953,8 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flag .guest_memfd_offset = guest_memfd_offset, }; + TEST_REQUIRE_SET_USER_MEMORY_REGION2(); + return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, ®ion); } @@ -970,6 +981,8 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, size_t mem_size = npages * vm->page_size; size_t alignment; + TEST_REQUIRE_SET_USER_MEMORY_REGION2(); + TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, "Number of guest pages is not compatible with the host. " "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages)); @@ -2306,6 +2319,8 @@ void __attribute((constructor)) kvm_selftest_init(void) /* Tell stdout not to buffer its content. */ setbuf(stdout, NULL); + guest_random_seed = random(); + kvm_selftest_arch_init(); } diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index cf2c739713..313277486a 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -2,14 +2,13 @@ /* * Copyright (C) 2020, Google LLC. */ -#define _GNU_SOURCE - #include <inttypes.h> #include <linux/bitmap.h> #include "kvm_util.h" #include "memstress.h" #include "processor.h" +#include "ucall_common.h" struct memstress_args memstress_args; @@ -56,7 +55,7 @@ void memstress_guest_code(uint32_t vcpu_idx) uint64_t page; int i; - rand_state = new_guest_random_state(args->random_seed + vcpu_idx); + rand_state = new_guest_random_state(guest_random_seed + vcpu_idx); gva = vcpu_args->gva; pages = vcpu_args->pages; @@ -76,7 +75,7 @@ void memstress_guest_code(uint32_t vcpu_idx) addr = gva + (page * args->guest_page_size); - if (guest_random_u32(&rand_state) % 100 < args->write_percent) + if (__guest_random_bool(&rand_state, args->write_percent)) *(uint64_t *)addr = 0x0123456789ABCDEF; else READ_ONCE(*(uint64_t *)addr); @@ -243,12 +242,6 @@ void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent) sync_global_to_guest(vm, memstress_args.write_percent); } -void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed) -{ - memstress_args.random_seed = random_seed; - sync_global_to_guest(vm, memstress_args.random_seed); -} - void memstress_set_random_access(struct kvm_vm *vm, bool random_access) { memstress_args.random_access = random_access; diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index e8211f5d68..6ae47b3d6b 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -10,6 +10,7 @@ #include "kvm_util.h" #include "processor.h" +#include "ucall_common.h" #define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000 @@ -502,3 +503,15 @@ bool guest_sbi_probe_extension(int extid, long *out_val) return true; } + +unsigned long get_host_sbi_spec_version(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_GET_SPEC_VERSION, 0, + 0, 0, 0, 0, 0); + + GUEST_ASSERT(!ret.error); + + return ret.value; +} diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 14ee17151a..b5035c63d5 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -9,6 +9,7 @@ #include "kvm_util.h" #include "processor.h" +#include "sbi.h" void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 5a8f8becb1..8ed0b74ae8 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -4,8 +4,6 @@ * * Copyright (C) 2020, Google LLC. */ - -#define _GNU_SOURCE #include <stdio.h> #include <stdarg.h> #include <assert.h> diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index f5af65a41c..42151e5719 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -1,9 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "kvm_util.h" #include "linux/types.h" #include "linux/bitmap.h" #include "linux/atomic.h" +#include "kvm_util.h" +#include "ucall_common.h" + + #define GUEST_UCALL_FAILED -1 struct ucall_header { diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c index f4eef6eb2d..7c9de84144 100644 --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -6,9 +6,6 @@ * Copyright (C) 2018, Red Hat, Inc. * Copyright (C) 2019-2022 Google LLC */ - -#define _GNU_SOURCE /* for pipe2 */ - #include <inttypes.h> #include <stdio.h> #include <stdlib.h> @@ -16,6 +13,7 @@ #include <poll.h> #include <pthread.h> #include <linux/userfaultfd.h> +#include <sys/epoll.h> #include <sys/syscall.h> #include "kvm_util.h" @@ -27,76 +25,69 @@ static void *uffd_handler_thread_fn(void *arg) { - struct uffd_desc *uffd_desc = (struct uffd_desc *)arg; - int uffd = uffd_desc->uffd; - int pipefd = uffd_desc->pipefds[0]; - useconds_t delay = uffd_desc->delay; + struct uffd_reader_args *reader_args = (struct uffd_reader_args *)arg; + int uffd = reader_args->uffd; int64_t pages = 0; struct timespec start; struct timespec ts_diff; + struct epoll_event evt; + int epollfd; + + epollfd = epoll_create(1); + TEST_ASSERT(epollfd >= 0, "Failed to create epollfd."); + + evt.events = EPOLLIN | EPOLLEXCLUSIVE; + evt.data.u32 = 0; + TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, uffd, &evt), + "Failed to add uffd to epollfd"); + + evt.events = EPOLLIN; + evt.data.u32 = 1; + TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, reader_args->pipe, &evt), + "Failed to add pipe to epollfd"); clock_gettime(CLOCK_MONOTONIC, &start); while (1) { struct uffd_msg msg; - struct pollfd pollfd[2]; - char tmp_chr; int r; - pollfd[0].fd = uffd; - pollfd[0].events = POLLIN; - pollfd[1].fd = pipefd; - pollfd[1].events = POLLIN; + r = epoll_wait(epollfd, &evt, 1, -1); + TEST_ASSERT(r == 1, + "Unexpected number of events (%d) from epoll, errno = %d", + r, errno); - r = poll(pollfd, 2, -1); - switch (r) { - case -1: - pr_info("poll err"); - continue; - case 0: - continue; - case 1: - break; - default: - pr_info("Polling uffd returned %d", r); - return NULL; - } - - if (pollfd[0].revents & POLLERR) { - pr_info("uffd revents has POLLERR"); - return NULL; - } + if (evt.data.u32 == 1) { + char tmp_chr; - if (pollfd[1].revents & POLLIN) { - r = read(pollfd[1].fd, &tmp_chr, 1); + TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)), + "Reader thread received EPOLLERR or EPOLLHUP on pipe."); + r = read(reader_args->pipe, &tmp_chr, 1); TEST_ASSERT(r == 1, - "Error reading pipefd in UFFD thread"); + "Error reading pipefd in uffd reader thread"); break; } - if (!(pollfd[0].revents & POLLIN)) - continue; + TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)), + "Reader thread received EPOLLERR or EPOLLHUP on uffd."); r = read(uffd, &msg, sizeof(msg)); if (r == -1) { - if (errno == EAGAIN) - continue; - pr_info("Read of uffd got errno %d\n", errno); - return NULL; + TEST_ASSERT(errno == EAGAIN, + "Error reading from UFFD: errno = %d", errno); + continue; } - if (r != sizeof(msg)) { - pr_info("Read on uffd returned unexpected size: %d bytes", r); - return NULL; - } + TEST_ASSERT(r == sizeof(msg), + "Read on uffd returned unexpected number of bytes (%d)", r); if (!(msg.event & UFFD_EVENT_PAGEFAULT)) continue; - if (delay) - usleep(delay); - r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg); - if (r < 0) - return NULL; + if (reader_args->delay) + usleep(reader_args->delay); + r = reader_args->handler(reader_args->uffd_mode, uffd, &msg); + TEST_ASSERT(r >= 0, + "Reader thread handler fn returned negative value %d", r); pages++; } @@ -110,6 +101,7 @@ static void *uffd_handler_thread_fn(void *arg) struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, void *hva, uint64_t len, + uint64_t num_readers, uffd_handler_t handler) { struct uffd_desc *uffd_desc; @@ -118,14 +110,25 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, struct uffdio_api uffdio_api; struct uffdio_register uffdio_register; uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; - int ret; + int ret, i; PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", is_minor ? "MINOR" : "MISSING", is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY"); uffd_desc = malloc(sizeof(struct uffd_desc)); - TEST_ASSERT(uffd_desc, "malloc failed"); + TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor"); + + uffd_desc->pipefds = calloc(sizeof(int), num_readers); + TEST_ASSERT(uffd_desc->pipefds, "Failed to alloc pipes"); + + uffd_desc->readers = calloc(sizeof(pthread_t), num_readers); + TEST_ASSERT(uffd_desc->readers, "Failed to alloc reader threads"); + + uffd_desc->reader_args = calloc(sizeof(struct uffd_reader_args), num_readers); + TEST_ASSERT(uffd_desc->reader_args, "Failed to alloc reader_args"); + + uffd_desc->num_readers = num_readers; /* In order to get minor faults, prefault via the alias. */ if (is_minor) @@ -148,18 +151,28 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == expected_ioctls, "missing userfaultfd ioctls"); - ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK); - TEST_ASSERT(!ret, "Failed to set up pipefd"); - - uffd_desc->uffd_mode = uffd_mode; uffd_desc->uffd = uffd; - uffd_desc->delay = delay; - uffd_desc->handler = handler; - pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn, - uffd_desc); + for (i = 0; i < uffd_desc->num_readers; ++i) { + int pipes[2]; - PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n", - hva, hva + len); + ret = pipe2((int *) &pipes, O_CLOEXEC | O_NONBLOCK); + TEST_ASSERT(!ret, "Failed to set up pipefd %i for uffd_desc %p", + i, uffd_desc); + + uffd_desc->pipefds[i] = pipes[1]; + + uffd_desc->reader_args[i].uffd_mode = uffd_mode; + uffd_desc->reader_args[i].uffd = uffd; + uffd_desc->reader_args[i].delay = delay; + uffd_desc->reader_args[i].handler = handler; + uffd_desc->reader_args[i].pipe = pipes[0]; + + pthread_create(&uffd_desc->readers[i], NULL, uffd_handler_thread_fn, + &uffd_desc->reader_args[i]); + + PER_VCPU_DEBUG("Created uffd thread %i for HVA range [%p, %p)\n", + i, hva, hva + len); + } return uffd_desc; } @@ -167,19 +180,26 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay, void uffd_stop_demand_paging(struct uffd_desc *uffd) { char c = 0; - int ret; + int i; - ret = write(uffd->pipefds[1], &c, 1); - TEST_ASSERT(ret == 1, "Unable to write to pipefd"); + for (i = 0; i < uffd->num_readers; ++i) + TEST_ASSERT(write(uffd->pipefds[i], &c, 1) == 1, + "Unable to write to pipefd %i for uffd_desc %p", i, uffd); - ret = pthread_join(uffd->thread, NULL); - TEST_ASSERT(ret == 0, "Pthread_join failed."); + for (i = 0; i < uffd->num_readers; ++i) + TEST_ASSERT(!pthread_join(uffd->readers[i], NULL), + "Pthread_join failed on reader %i for uffd_desc %p", i, uffd); close(uffd->uffd); - close(uffd->pipefds[1]); - close(uffd->pipefds[0]); + for (i = 0; i < uffd->num_readers; ++i) { + close(uffd->pipefds[i]); + close(uffd->reader_args[i].pipe); + } + free(uffd->pipefds); + free(uffd->readers); + free(uffd->reader_args); free(uffd); } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 74a4c736c9..594b061aef 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -15,14 +15,16 @@ #define NUM_INTERRUPTS 256 #endif -#define DEFAULT_CODE_SELECTOR 0x8 -#define DEFAULT_DATA_SELECTOR 0x10 +#define KERNEL_CS 0x8 +#define KERNEL_DS 0x10 +#define KERNEL_TSS 0x18 #define MAX_NR_CPUID_ENTRIES 100 vm_vaddr_t exception_handlers; bool host_cpu_is_amd; bool host_cpu_is_intel; +bool is_forced_emulation_enabled; static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) { @@ -417,7 +419,7 @@ static void kvm_seg_set_unusable(struct kvm_segment *segp) static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) { - void *gdt = addr_gva2hva(vm, vm->gdt); + void *gdt = addr_gva2hva(vm, vm->arch.gdt); struct desc64 *desc = gdt + (segp->selector >> 3) * 8; desc->limit0 = segp->limit & 0xFFFF; @@ -437,27 +439,10 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) desc->base3 = segp->base >> 32; } - -/* - * Set Long Mode Flat Kernel Code Segment - * - * Input Args: - * vm - VM whose GDT is being filled, or NULL to only write segp - * selector - selector value - * - * Output Args: - * segp - Pointer to KVM segment - * - * Return: None - * - * Sets up the KVM segment pointed to by @segp, to be a code segment - * with the selector value given by @selector. - */ -static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, - struct kvm_segment *segp) +static void kvm_seg_set_kernel_code_64bit(struct kvm_segment *segp) { memset(segp, 0, sizeof(*segp)); - segp->selector = selector; + segp->selector = KERNEL_CS; segp->limit = 0xFFFFFFFFu; segp->s = 0x1; /* kTypeCodeData */ segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed @@ -466,30 +451,12 @@ static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector, segp->g = true; segp->l = true; segp->present = 1; - if (vm) - kvm_seg_fill_gdt_64bit(vm, segp); } -/* - * Set Long Mode Flat Kernel Data Segment - * - * Input Args: - * vm - VM whose GDT is being filled, or NULL to only write segp - * selector - selector value - * - * Output Args: - * segp - Pointer to KVM segment - * - * Return: None - * - * Sets up the KVM segment pointed to by @segp, to be a data segment - * with the selector value given by @selector. - */ -static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, - struct kvm_segment *segp) +static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp) { memset(segp, 0, sizeof(*segp)); - segp->selector = selector; + segp->selector = KERNEL_DS; segp->limit = 0xFFFFFFFFu; segp->s = 0x1; /* kTypeCodeData */ segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed @@ -497,8 +464,6 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, */ segp->g = true; segp->present = true; - if (vm) - kvm_seg_fill_gdt_64bit(vm, segp); } vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) @@ -516,72 +481,153 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level)); } -static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt) -{ - if (!vm->gdt) - vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - - dt->base = vm->gdt; - dt->limit = getpagesize(); -} - -static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, - int selector) +static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp) { - if (!vm->tss) - vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - memset(segp, 0, sizeof(*segp)); - segp->base = vm->tss; + segp->base = base; segp->limit = 0x67; - segp->selector = selector; + segp->selector = KERNEL_TSS; segp->type = 0xb; segp->present = 1; - kvm_seg_fill_gdt_64bit(vm, segp); } -static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { struct kvm_sregs sregs; + TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K); + /* Set mode specific system register values. */ vcpu_sregs_get(vcpu, &sregs); - sregs.idt.limit = 0; + sregs.idt.base = vm->arch.idt; + sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; + sregs.gdt.base = vm->arch.gdt; + sregs.gdt.limit = getpagesize() - 1; + + sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; + sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; + sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); + + kvm_seg_set_unusable(&sregs.ldt); + kvm_seg_set_kernel_code_64bit(&sregs.cs); + kvm_seg_set_kernel_data_64bit(&sregs.ds); + kvm_seg_set_kernel_data_64bit(&sregs.es); + kvm_seg_set_kernel_data_64bit(&sregs.gs); + kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr); + + sregs.cr3 = vm->pgd; + vcpu_sregs_set(vcpu, &sregs); +} + +static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, + int dpl, unsigned short selector) +{ + struct idt_entry *base = + (struct idt_entry *)addr_gva2hva(vm, vm->arch.idt); + struct idt_entry *e = &base[vector]; + + memset(e, 0, sizeof(*e)); + e->offset0 = addr; + e->selector = selector; + e->ist = 0; + e->type = 14; + e->dpl = dpl; + e->p = 1; + e->offset1 = addr >> 16; + e->offset2 = addr >> 32; +} + +static bool kvm_fixup_exception(struct ex_regs *regs) +{ + if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10) + return false; - kvm_setup_gdt(vm, &sregs.gdt); + if (regs->vector == DE_VECTOR) + return false; - switch (vm->mode) { - case VM_MODE_PXXV48_4K: - sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; - sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; - sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); + regs->rip = regs->r11; + regs->r9 = regs->vector; + regs->r10 = regs->error_code; + return true; +} - kvm_seg_set_unusable(&sregs.ldt); - kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs); - kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds); - kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es); - kvm_setup_tss_64bit(vm, &sregs.tr, 0x18); - break; +void route_exception(struct ex_regs *regs) +{ + typedef void(*handler)(struct ex_regs *); + handler *handlers = (handler *)exception_handlers; - default: - TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); + if (handlers && handlers[regs->vector]) { + handlers[regs->vector](regs); + return; } - sregs.cr3 = vm->pgd; - vcpu_sregs_set(vcpu, &sregs); + if (kvm_fixup_exception(regs)) + return; + + ucall_assert(UCALL_UNHANDLED, + "Unhandled exception in guest", __FILE__, __LINE__, + "Unhandled exception '0x%lx' at guest RIP '0x%lx'", + regs->vector, regs->rip); +} + +static void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + extern void *idt_handlers; + struct kvm_segment seg; + int i; + + vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); + + /* Handlers have the same address in both address spaces.*/ + for (i = 0; i < NUM_INTERRUPTS; i++) + set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS); + + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; + + kvm_seg_set_kernel_code_64bit(&seg); + kvm_seg_fill_gdt_64bit(vm, &seg); + + kvm_seg_set_kernel_data_64bit(&seg); + kvm_seg_fill_gdt_64bit(vm, &seg); + + kvm_seg_set_tss_64bit(vm->arch.tss, &seg); + kvm_seg_fill_gdt_64bit(vm, &seg); +} + +void vm_install_exception_handler(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) +{ + vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); + + handlers[vector] = (vm_vaddr_t)handler; +} + +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) + REPORT_GUEST_ASSERT(uc); } void kvm_arch_vm_post_create(struct kvm_vm *vm) { vm_create_irqchip(vm); + vm_init_descriptor_tables(vm); + sync_global_to_guest(vm, host_cpu_is_intel); sync_global_to_guest(vm, host_cpu_is_amd); + sync_global_to_guest(vm, is_forced_emulation_enabled); + + if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + struct kvm_sev_init init = { 0 }; - if (vm->subtype == VM_SUBTYPE_SEV) - sev_vm_init(vm); - else if (vm->subtype == VM_SUBTYPE_SEV_ES) - sev_es_vm_init(vm); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); + } } void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code) @@ -621,7 +667,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); - vcpu_setup(vm, vcpu); + vcpu_init_sregs(vm, vcpu); /* Setup guest general purpose registers */ vcpu_regs_get(vcpu, ®s); @@ -1081,108 +1127,15 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) void kvm_init_vm_address_properties(struct kvm_vm *vm) { - if (vm->subtype == VM_SUBTYPE_SEV || vm->subtype == VM_SUBTYPE_SEV_ES) { + if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) { + vm->arch.sev_fd = open_sev_dev_path_or_exit(); vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT)); vm->gpa_tag_mask = vm->arch.c_bit; + } else { + vm->arch.sev_fd = -1; } } -static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, - int dpl, unsigned short selector) -{ - struct idt_entry *base = - (struct idt_entry *)addr_gva2hva(vm, vm->idt); - struct idt_entry *e = &base[vector]; - - memset(e, 0, sizeof(*e)); - e->offset0 = addr; - e->selector = selector; - e->ist = 0; - e->type = 14; - e->dpl = dpl; - e->p = 1; - e->offset1 = addr >> 16; - e->offset2 = addr >> 32; -} - - -static bool kvm_fixup_exception(struct ex_regs *regs) -{ - if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10) - return false; - - if (regs->vector == DE_VECTOR) - return false; - - regs->rip = regs->r11; - regs->r9 = regs->vector; - regs->r10 = regs->error_code; - return true; -} - -void route_exception(struct ex_regs *regs) -{ - typedef void(*handler)(struct ex_regs *); - handler *handlers = (handler *)exception_handlers; - - if (handlers && handlers[regs->vector]) { - handlers[regs->vector](regs); - return; - } - - if (kvm_fixup_exception(regs)) - return; - - ucall_assert(UCALL_UNHANDLED, - "Unhandled exception in guest", __FILE__, __LINE__, - "Unhandled exception '0x%lx' at guest RIP '0x%lx'", - regs->vector, regs->rip); -} - -void vm_init_descriptor_tables(struct kvm_vm *vm) -{ - extern void *idt_handlers; - int i; - - vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA); - /* Handlers have the same address in both address spaces.*/ - for (i = 0; i < NUM_INTERRUPTS; i++) - set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, - DEFAULT_CODE_SELECTOR); -} - -void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) -{ - struct kvm_vm *vm = vcpu->vm; - struct kvm_sregs sregs; - - vcpu_sregs_get(vcpu, &sregs); - sregs.idt.base = vm->idt; - sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; - sregs.gdt.base = vm->gdt; - sregs.gdt.limit = getpagesize() - 1; - kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs); - vcpu_sregs_set(vcpu, &sregs); - *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; -} - -void vm_install_exception_handler(struct kvm_vm *vm, int vector, - void (*handler)(struct ex_regs *)) -{ - vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); - - handlers[vector] = (vm_vaddr_t)handler; -} - -void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) - REPORT_GUEST_ASSERT(uc); -} - const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, uint32_t function, uint32_t index) { @@ -1294,9 +1247,20 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ unsigned long ht_gfn, max_gfn, max_pfn; - uint8_t maxphyaddr; + uint8_t maxphyaddr, guest_maxphyaddr; + + /* + * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR + * enumerates the max _mappable_ GPA, which can be less than the raw + * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU + * doesn't support 5-level TDP. + */ + guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR); + guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits; + TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits, + "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR"); - max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; + max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1; /* Avoid reserved HyperTransport region on AMD processors. */ if (!host_cpu_is_amd) @@ -1344,6 +1308,7 @@ void kvm_selftest_arch_init(void) { host_cpu_is_intel = this_cpu_is_intel(); host_cpu_is_amd = this_cpu_is_amd(); + is_forced_emulation_enabled = kvm_is_forced_emulation_enabled(); } bool sys_clocksource_is_based_on_tsc(void) diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86_64/sev.c index e248d3364b..e9535ee20b 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/sev.c +++ b/tools/testing/selftests/kvm/lib/x86_64/sev.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0-only -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <stdint.h> #include <stdbool.h> @@ -35,6 +34,32 @@ static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *regio } } +void sev_vm_init(struct kvm_vm *vm) +{ + if (vm->type == KVM_X86_DEFAULT_VM) { + assert(vm->arch.sev_fd == -1); + vm->arch.sev_fd = open_sev_dev_path_or_exit(); + vm_sev_ioctl(vm, KVM_SEV_INIT, NULL); + } else { + struct kvm_sev_init init = { 0 }; + assert(vm->type == KVM_X86_SEV_VM); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); + } +} + +void sev_es_vm_init(struct kvm_vm *vm) +{ + if (vm->type == KVM_X86_DEFAULT_VM) { + assert(vm->arch.sev_fd == -1); + vm->arch.sev_fd = open_sev_dev_path_or_exit(); + vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL); + } else { + struct kvm_sev_init init = { 0 }; + assert(vm->type == KVM_X86_SEV_ES_VM); + vm_sev_ioctl(vm, KVM_SEV_INIT2, &init); + } +} + void sev_vm_launch(struct kvm_vm *vm, uint32_t policy) { struct kvm_sev_launch_start launch_start = { @@ -87,28 +112,30 @@ void sev_vm_launch_finish(struct kvm_vm *vm) TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING); } -struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code, +struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code, struct kvm_vcpu **cpu) { struct vm_shape shape = { - .type = VM_TYPE_DEFAULT, .mode = VM_MODE_DEFAULT, - .subtype = policy & SEV_POLICY_ES ? VM_SUBTYPE_SEV_ES : - VM_SUBTYPE_SEV, + .type = type, }; struct kvm_vm *vm; struct kvm_vcpu *cpus[1]; - uint8_t measurement[512]; vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus); *cpu = cpus[0]; + return vm; +} + +void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement) +{ sev_vm_launch(vm, policy); - /* TODO: Validate the measurement is as expected. */ + if (!measurement) + measurement = alloca(256); + sev_vm_launch_measure(vm, measurement); sev_vm_launch_finish(vm); - - return vm; } diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c index 1a6da7389b..0b9678858b 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/max_guest_memory_test.c @@ -1,6 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE - #include <stdio.h> #include <stdlib.h> #include <pthread.h> diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 1563619666..05fcf902e0 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -6,9 +6,6 @@ * Copyright (C) 2018, Red Hat, Inc. * Copyright (C) 2020, Google, Inc. */ - -#define _GNU_SOURCE /* for program_invocation_name */ - #include <stdio.h> #include <stdlib.h> #include <sys/syscall.h> diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c index 0f9cabd99f..2c792228ac 100644 --- a/tools/testing/selftests/kvm/riscv/arch_timer.c +++ b/tools/testing/selftests/kvm/riscv/arch_timer.c @@ -7,13 +7,11 @@ * * Copyright (c) 2024, Intel Corporation. */ - -#define _GNU_SOURCE - #include "arch_timer.h" #include "kvm_util.h" #include "processor.h" #include "timer_test.h" +#include "ucall_common.h" static int timer_irq = IRQ_S_TIMER; @@ -85,7 +83,7 @@ struct kvm_vm *test_vm_create(void) int nr_vcpus = test_args.nr_vcpus; vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); - __TEST_REQUIRE(__vcpu_has_ext(vcpus[0], RISCV_ISA_EXT_REG(KVM_RISCV_ISA_EXT_SSTC)), + __TEST_REQUIRE(__vcpu_has_isa_ext(vcpus[0], KVM_RISCV_ISA_EXT_SSTC), "SSTC not available, skipping test\n"); vm_init_vector_tables(vm); diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c new file mode 100644 index 0000000000..0e07128549 --- /dev/null +++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * RISC-V KVM ebreak test. + * + * Copyright 2024 Beijing ESWIN Computing Technology Co., Ltd. + * + */ +#include "kvm_util.h" +#include "ucall_common.h" + +#define LABEL_ADDRESS(v) ((uint64_t)&(v)) + +extern unsigned char sw_bp_1, sw_bp_2; +static uint64_t sw_bp_addr; + +static void guest_code(void) +{ + asm volatile( + ".option push\n" + ".option norvc\n" + "sw_bp_1: ebreak\n" + "sw_bp_2: ebreak\n" + ".option pop\n" + ); + GUEST_ASSERT_EQ(READ_ONCE(sw_bp_addr), LABEL_ADDRESS(sw_bp_2)); + + GUEST_DONE(); +} + +static void guest_breakpoint_handler(struct ex_regs *regs) +{ + WRITE_ONCE(sw_bp_addr, regs->epc); + regs->epc += 4; +} + +int main(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + uint64_t pc; + struct kvm_guest_debug debug = { + .control = KVM_GUESTDBG_ENABLE, + }; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_init_vector_tables(vm); + vcpu_init_vector_tables(vcpu); + vm_install_exception_handler(vm, EXC_BREAKPOINT, + guest_breakpoint_handler); + + /* + * Enable the guest debug. + * ebreak should exit to the VMM with KVM_EXIT_DEBUG reason. + */ + vcpu_guest_debug_set(vcpu, &debug); + vcpu_run(vcpu); + + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); + + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &pc); + TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1)); + + /* skip sw_bp_1 */ + vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), pc + 4); + + /* + * Disable all debug controls. + * Guest should handle the ebreak without exiting to the VMM. + */ + memset(&debug, 0, sizeof(debug)); + vcpu_guest_debug_set(vcpu, &debug); + + vcpu_run(vcpu); + + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index b882b7b9b7..222198dd6d 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -43,6 +43,7 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSCOFPMF: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSTC: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: @@ -408,6 +409,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(V), KVM_ISA_EXT_ARR(SMSTATEEN), KVM_ISA_EXT_ARR(SSAIA), + KVM_ISA_EXT_ARR(SSCOFPMF), KVM_ISA_EXT_ARR(SSTC), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), @@ -931,6 +933,7 @@ KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F); KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D); KVM_ISA_EXT_SIMPLE_CONFIG(h, H); KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN); +KVM_ISA_EXT_SIMPLE_CONFIG(sscofpmf, SSCOFPMF); KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC); KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); @@ -986,6 +989,7 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_fp_d, &config_h, &config_smstateen, + &config_sscofpmf, &config_sstc, &config_svinval, &config_svnapot, diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c new file mode 100644 index 0000000000..f299cbfd23 --- /dev/null +++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c @@ -0,0 +1,682 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * sbi_pmu_test.c - Tests the riscv64 SBI PMU functionality. + * + * Copyright (c) 2024, Rivos Inc. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include "kvm_util.h" +#include "test_util.h" +#include "processor.h" +#include "sbi.h" +#include "arch_timer.h" +#include "ucall_common.h" + +/* Maximum counters(firmware + hardware) */ +#define RISCV_MAX_PMU_COUNTERS 64 +union sbi_pmu_ctr_info ctrinfo_arr[RISCV_MAX_PMU_COUNTERS]; + +/* Snapshot shared memory data */ +#define PMU_SNAPSHOT_GPA_BASE BIT(30) +static void *snapshot_gva; +static vm_paddr_t snapshot_gpa; + +static int vcpu_shared_irq_count; +static int counter_in_use; + +/* Cache the available counters in a bitmask */ +static unsigned long counter_mask_available; + +static bool illegal_handler_invoked; + +#define SBI_PMU_TEST_BASIC BIT(0) +#define SBI_PMU_TEST_EVENTS BIT(1) +#define SBI_PMU_TEST_SNAPSHOT BIT(2) +#define SBI_PMU_TEST_OVERFLOW BIT(3) + +static int disabled_tests; + +unsigned long pmu_csr_read_num(int csr_num) +{ +#define switchcase_csr_read(__csr_num, __val) {\ + case __csr_num: \ + __val = csr_read(__csr_num); \ + break; } +#define switchcase_csr_read_2(__csr_num, __val) {\ + switchcase_csr_read(__csr_num + 0, __val) \ + switchcase_csr_read(__csr_num + 1, __val)} +#define switchcase_csr_read_4(__csr_num, __val) {\ + switchcase_csr_read_2(__csr_num + 0, __val) \ + switchcase_csr_read_2(__csr_num + 2, __val)} +#define switchcase_csr_read_8(__csr_num, __val) {\ + switchcase_csr_read_4(__csr_num + 0, __val) \ + switchcase_csr_read_4(__csr_num + 4, __val)} +#define switchcase_csr_read_16(__csr_num, __val) {\ + switchcase_csr_read_8(__csr_num + 0, __val) \ + switchcase_csr_read_8(__csr_num + 8, __val)} +#define switchcase_csr_read_32(__csr_num, __val) {\ + switchcase_csr_read_16(__csr_num + 0, __val) \ + switchcase_csr_read_16(__csr_num + 16, __val)} + + unsigned long ret = 0; + + switch (csr_num) { + switchcase_csr_read_32(CSR_CYCLE, ret) + switchcase_csr_read_32(CSR_CYCLEH, ret) + default : + break; + } + + return ret; +#undef switchcase_csr_read_32 +#undef switchcase_csr_read_16 +#undef switchcase_csr_read_8 +#undef switchcase_csr_read_4 +#undef switchcase_csr_read_2 +#undef switchcase_csr_read +} + +static inline void dummy_func_loop(uint64_t iter) +{ + int i = 0; + + while (i < iter) { + asm volatile("nop"); + i++; + } +} + +static void start_counter(unsigned long counter, unsigned long start_flags, + unsigned long ival) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, counter, 1, start_flags, + ival, 0, 0); + __GUEST_ASSERT(ret.error == 0, "Unable to start counter %ld\n", counter); +} + +/* This should be invoked only for reset counter use case */ +static void stop_reset_counter(unsigned long counter, unsigned long stop_flags) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, + stop_flags | SBI_PMU_STOP_FLAG_RESET, 0, 0, 0); + __GUEST_ASSERT(ret.error == SBI_ERR_ALREADY_STOPPED, + "Unable to stop counter %ld\n", counter); +} + +static void stop_counter(unsigned long counter, unsigned long stop_flags) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags, + 0, 0, 0); + __GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n", + counter, ret.error); +} + +static void guest_illegal_exception_handler(struct ex_regs *regs) +{ + __GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL, + "Unexpected exception handler %lx\n", regs->cause); + + illegal_handler_invoked = true; + /* skip the trapping instruction */ + regs->epc += 4; +} + +static void guest_irq_handler(struct ex_regs *regs) +{ + unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG; + struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; + unsigned long overflown_mask; + unsigned long counter_val = 0; + + /* Validate that we are in the correct irq handler */ + GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF); + + /* Stop all counters first to avoid further interrupts */ + stop_counter(counter_in_use, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + + csr_clear(CSR_SIP, BIT(IRQ_PMU_OVF)); + + overflown_mask = READ_ONCE(snapshot_data->ctr_overflow_mask); + GUEST_ASSERT(overflown_mask & 0x01); + + WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1); + + counter_val = READ_ONCE(snapshot_data->ctr_values[0]); + /* Now start the counter to mimick the real driver behavior */ + start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val); +} + +static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask, + unsigned long cflags, + unsigned long event) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask, + cflags, event, 0, 0); + __GUEST_ASSERT(ret.error == 0, "config matching failed %ld\n", ret.error); + GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS); + GUEST_ASSERT(BIT(ret.value) & counter_mask_available); + + return ret.value; +} + +static unsigned long get_num_counters(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0); + + __GUEST_ASSERT(ret.error == 0, "Unable to retrieve number of counters from SBI PMU"); + __GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS, + "Invalid number of counters %ld\n", ret.value); + + return ret.value; +} + +static void update_counter_info(int num_counters) +{ + int i = 0; + struct sbiret ret; + + for (i = 0; i < num_counters; i++) { + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0); + + /* There can be gaps in logical counter indicies*/ + if (ret.error) + continue; + GUEST_ASSERT_NE(ret.value, 0); + + ctrinfo_arr[i].value = ret.value; + counter_mask_available |= BIT(i); + } + + GUEST_ASSERT(counter_mask_available > 0); +} + +static unsigned long read_fw_counter(int idx, union sbi_pmu_ctr_info ctrinfo) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, idx, 0, 0, 0, 0, 0); + GUEST_ASSERT(ret.error == 0); + return ret.value; +} + +static unsigned long read_counter(int idx, union sbi_pmu_ctr_info ctrinfo) +{ + unsigned long counter_val = 0; + + __GUEST_ASSERT(ctrinfo.type < 2, "Invalid counter type %d", ctrinfo.type); + + if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW) + counter_val = pmu_csr_read_num(ctrinfo.csr); + else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) + counter_val = read_fw_counter(idx, ctrinfo); + + return counter_val; +} + +static inline void verify_sbi_requirement_assert(void) +{ + long out_val = 0; + bool probe; + + probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val); + GUEST_ASSERT(probe && out_val == 1); + + if (get_host_sbi_spec_version() < sbi_mk_version(2, 0)) + __GUEST_ASSERT(0, "SBI implementation version doesn't support PMU Snapshot"); +} + +static void snapshot_set_shmem(vm_paddr_t gpa, unsigned long flags) +{ + unsigned long lo = (unsigned long)gpa; +#if __riscv_xlen == 32 + unsigned long hi = (unsigned long)(gpa >> 32); +#else + unsigned long hi = gpa == -1 ? -1 : 0; +#endif + struct sbiret ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, + lo, hi, flags, 0, 0, 0); + + GUEST_ASSERT(ret.value == 0 && ret.error == 0); +} + +static void test_pmu_event(unsigned long event) +{ + unsigned long counter; + unsigned long counter_value_pre, counter_value_post; + unsigned long counter_init_value = 100; + + counter = get_counter_index(0, counter_mask_available, 0, event); + counter_value_pre = read_counter(counter, ctrinfo_arr[counter]); + + /* Do not set the initial value */ + start_counter(counter, 0, 0); + dummy_func_loop(10000); + stop_counter(counter, 0); + + counter_value_post = read_counter(counter, ctrinfo_arr[counter]); + __GUEST_ASSERT(counter_value_post > counter_value_pre, + "Event update verification failed: post [%lx] pre [%lx]\n", + counter_value_post, counter_value_pre); + + /* + * We can't just update the counter without starting it. + * Do start/stop twice to simulate that by first initializing to a very + * high value and a low value after that. + */ + start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, ULONG_MAX/2); + stop_counter(counter, 0); + counter_value_pre = read_counter(counter, ctrinfo_arr[counter]); + + start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value); + stop_counter(counter, 0); + counter_value_post = read_counter(counter, ctrinfo_arr[counter]); + __GUEST_ASSERT(counter_value_pre > counter_value_post, + "Counter reinitialization verification failed : post [%lx] pre [%lx]\n", + counter_value_post, counter_value_pre); + + /* Now set the initial value and compare */ + start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value); + dummy_func_loop(10000); + stop_counter(counter, 0); + + counter_value_post = read_counter(counter, ctrinfo_arr[counter]); + __GUEST_ASSERT(counter_value_post > counter_init_value, + "Event update verification failed: post [%lx] pre [%lx]\n", + counter_value_post, counter_init_value); + + stop_reset_counter(counter, 0); +} + +static void test_pmu_event_snapshot(unsigned long event) +{ + unsigned long counter; + unsigned long counter_value_pre, counter_value_post; + unsigned long counter_init_value = 100; + struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; + + counter = get_counter_index(0, counter_mask_available, 0, event); + counter_value_pre = read_counter(counter, ctrinfo_arr[counter]); + + /* Do not set the initial value */ + start_counter(counter, 0, 0); + dummy_func_loop(10000); + stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + + /* The counter value is updated w.r.t relative index of cbase */ + counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]); + __GUEST_ASSERT(counter_value_post > counter_value_pre, + "Event update verification failed: post [%lx] pre [%lx]\n", + counter_value_post, counter_value_pre); + + /* + * We can't just update the counter without starting it. + * Do start/stop twice to simulate that by first initializing to a very + * high value and a low value after that. + */ + WRITE_ONCE(snapshot_data->ctr_values[0], ULONG_MAX/2); + start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0); + stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + counter_value_pre = READ_ONCE(snapshot_data->ctr_values[0]); + + WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value); + start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0); + stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]); + __GUEST_ASSERT(counter_value_pre > counter_value_post, + "Counter reinitialization verification failed : post [%lx] pre [%lx]\n", + counter_value_post, counter_value_pre); + + /* Now set the initial value and compare */ + WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value); + start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0); + dummy_func_loop(10000); + stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + + counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]); + __GUEST_ASSERT(counter_value_post > counter_init_value, + "Event update verification failed: post [%lx] pre [%lx]\n", + counter_value_post, counter_init_value); + + stop_reset_counter(counter, 0); +} + +static void test_pmu_event_overflow(unsigned long event) +{ + unsigned long counter; + unsigned long counter_value_post; + unsigned long counter_init_value = ULONG_MAX - 10000; + struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; + + counter = get_counter_index(0, counter_mask_available, 0, event); + counter_in_use = counter; + + /* The counter value is updated w.r.t relative index of cbase passed to start/stop */ + WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value); + start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0); + dummy_func_loop(10000); + udelay(msecs_to_usecs(2000)); + /* irq handler should have stopped the counter */ + stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT); + + counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]); + /* The counter value after stopping should be less the init value due to overflow */ + __GUEST_ASSERT(counter_value_post < counter_init_value, + "counter_value_post %lx counter_init_value %lx for counter\n", + counter_value_post, counter_init_value); + + stop_reset_counter(counter, 0); +} + +static void test_invalid_event(void) +{ + struct sbiret ret; + unsigned long event = 0x1234; /* A random event */ + + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, 0, + counter_mask_available, 0, event, 0, 0); + GUEST_ASSERT_EQ(ret.error, SBI_ERR_NOT_SUPPORTED); +} + +static void test_pmu_events(void) +{ + int num_counters = 0; + + /* Get the counter details */ + num_counters = get_num_counters(); + update_counter_info(num_counters); + + /* Sanity testing for any random invalid event */ + test_invalid_event(); + + /* Only these two events are guaranteed to be present */ + test_pmu_event(SBI_PMU_HW_CPU_CYCLES); + test_pmu_event(SBI_PMU_HW_INSTRUCTIONS); + + GUEST_DONE(); +} + +static void test_pmu_basic_sanity(void) +{ + long out_val = 0; + bool probe; + struct sbiret ret; + int num_counters = 0, i; + union sbi_pmu_ctr_info ctrinfo; + + probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val); + GUEST_ASSERT(probe && out_val == 1); + + num_counters = get_num_counters(); + + for (i = 0; i < num_counters; i++) { + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, + 0, 0, 0, 0, 0); + + /* There can be gaps in logical counter indicies*/ + if (ret.error) + continue; + GUEST_ASSERT_NE(ret.value, 0); + + ctrinfo.value = ret.value; + + /** + * Accessibility check of hardware and read capability of firmware counters. + * The spec doesn't mandate any initial value. No need to check any value. + */ + if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW) { + pmu_csr_read_num(ctrinfo.csr); + GUEST_ASSERT(illegal_handler_invoked); + } else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) { + read_fw_counter(i, ctrinfo); + } + } + + GUEST_DONE(); +} + +static void test_pmu_events_snaphost(void) +{ + int num_counters = 0; + struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva; + int i; + + /* Verify presence of SBI PMU and minimum requrired SBI version */ + verify_sbi_requirement_assert(); + + snapshot_set_shmem(snapshot_gpa, 0); + + /* Get the counter details */ + num_counters = get_num_counters(); + update_counter_info(num_counters); + + /* Validate shared memory access */ + GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_overflow_mask), 0); + for (i = 0; i < num_counters; i++) { + if (counter_mask_available & (BIT(i))) + GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_values[i]), 0); + } + /* Only these two events are guranteed to be present */ + test_pmu_event_snapshot(SBI_PMU_HW_CPU_CYCLES); + test_pmu_event_snapshot(SBI_PMU_HW_INSTRUCTIONS); + + GUEST_DONE(); +} + +static void test_pmu_events_overflow(void) +{ + int num_counters = 0; + + /* Verify presence of SBI PMU and minimum requrired SBI version */ + verify_sbi_requirement_assert(); + + snapshot_set_shmem(snapshot_gpa, 0); + csr_set(CSR_IE, BIT(IRQ_PMU_OVF)); + local_irq_enable(); + + /* Get the counter details */ + num_counters = get_num_counters(); + update_counter_info(num_counters); + + /* + * Qemu supports overflow for cycle/instruction. + * This test may fail on any platform that do not support overflow for these two events. + */ + test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES); + GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1); + + test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS); + GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2); + + GUEST_DONE(); +} + +static void run_vcpu(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + case UCALL_SYNC: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + break; + } +} + +void test_vm_destroy(struct kvm_vm *vm) +{ + memset(ctrinfo_arr, 0, sizeof(union sbi_pmu_ctr_info) * RISCV_MAX_PMU_COUNTERS); + counter_mask_available = 0; + kvm_vm_free(vm); +} + +static void test_vm_basic_test(void *guest_code) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU), + "SBI PMU not available, skipping test"); + vm_init_vector_tables(vm); + /* Illegal instruction handler is required to verify read access without configuration */ + vm_install_exception_handler(vm, EXC_INST_ILLEGAL, guest_illegal_exception_handler); + + vcpu_init_vector_tables(vcpu); + run_vcpu(vcpu); + + test_vm_destroy(vm); +} + +static void test_vm_events_test(void *guest_code) +{ + struct kvm_vm *vm = NULL; + struct kvm_vcpu *vcpu = NULL; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU), + "SBI PMU not available, skipping test"); + run_vcpu(vcpu); + + test_vm_destroy(vm); +} + +static void test_vm_setup_snapshot_mem(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + /* PMU Snapshot requires single page only */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, PMU_SNAPSHOT_GPA_BASE, 1, 1, 0); + /* PMU_SNAPSHOT_GPA_BASE is identity mapped */ + virt_map(vm, PMU_SNAPSHOT_GPA_BASE, PMU_SNAPSHOT_GPA_BASE, 1); + + snapshot_gva = (void *)(PMU_SNAPSHOT_GPA_BASE); + snapshot_gpa = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)snapshot_gva); + sync_global_to_guest(vcpu->vm, snapshot_gva); + sync_global_to_guest(vcpu->vm, snapshot_gpa); +} + +static void test_vm_events_snapshot_test(void *guest_code) +{ + struct kvm_vm *vm = NULL; + struct kvm_vcpu *vcpu; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU), + "SBI PMU not available, skipping test"); + + test_vm_setup_snapshot_mem(vm, vcpu); + + run_vcpu(vcpu); + + test_vm_destroy(vm); +} + +static void test_vm_events_overflow(void *guest_code) +{ + struct kvm_vm *vm = NULL; + struct kvm_vcpu *vcpu; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU), + "SBI PMU not available, skipping test"); + + __TEST_REQUIRE(__vcpu_has_isa_ext(vcpu, KVM_RISCV_ISA_EXT_SSCOFPMF), + "Sscofpmf is not available, skipping overflow test"); + + test_vm_setup_snapshot_mem(vm, vcpu); + vm_init_vector_tables(vm); + vm_install_interrupt_handler(vm, guest_irq_handler); + + vcpu_init_vector_tables(vcpu); + /* Initialize guest timer frequency. */ + vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency), &timer_freq); + sync_global_to_guest(vm, timer_freq); + + run_vcpu(vcpu); + + test_vm_destroy(vm); +} + +static void test_print_help(char *name) +{ + pr_info("Usage: %s [-h] [-d <test name>]\n", name); + pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n"); + pr_info("\t-h: print this help screen\n"); +} + +static bool parse_args(int argc, char *argv[]) +{ + int opt; + + while ((opt = getopt(argc, argv, "hd:")) != -1) { + switch (opt) { + case 'd': + if (!strncmp("basic", optarg, 5)) + disabled_tests |= SBI_PMU_TEST_BASIC; + else if (!strncmp("events", optarg, 6)) + disabled_tests |= SBI_PMU_TEST_EVENTS; + else if (!strncmp("snapshot", optarg, 8)) + disabled_tests |= SBI_PMU_TEST_SNAPSHOT; + else if (!strncmp("overflow", optarg, 8)) + disabled_tests |= SBI_PMU_TEST_OVERFLOW; + else + goto done; + break; + case 'h': + default: + goto done; + } + } + + return true; +done: + test_print_help(argv[0]); + return false; +} + +int main(int argc, char *argv[]) +{ + if (!parse_args(argc, argv)) + exit(KSFT_SKIP); + + if (!(disabled_tests & SBI_PMU_TEST_BASIC)) { + test_vm_basic_test(test_pmu_basic_sanity); + pr_info("SBI PMU basic test : PASS\n"); + } + + if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) { + test_vm_events_test(test_pmu_events); + pr_info("SBI PMU event verification test : PASS\n"); + } + + if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) { + test_vm_events_snapshot_test(test_pmu_events_snaphost); + pr_info("SBI PMU event verification with snapshot test : PASS\n"); + } + + if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) { + test_vm_events_overflow(test_pmu_events_overflow); + pr_info("SBI PMU event verification with overflow test : PASS\n"); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index 28f97fb520..e5898678bf 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -1,5 +1,13 @@ // SPDX-License-Identifier: GPL-2.0-only -#define _GNU_SOURCE /* for program_invocation_short_name */ + +/* + * Include rseq.c without _GNU_SOURCE defined, before including any headers, so + * that rseq.c is compiled with its configuration, not KVM selftests' config. + */ +#undef _GNU_SOURCE +#include "../rseq/rseq.c" +#define _GNU_SOURCE + #include <errno.h> #include <fcntl.h> #include <pthread.h> @@ -19,8 +27,7 @@ #include "kvm_util.h" #include "processor.h" #include "test_util.h" - -#include "../rseq/rseq.c" +#include "ucall_common.h" /* * Any bug related to task migration is likely to be timing-dependent; perform @@ -186,12 +193,35 @@ static void calc_min_max_cpu(void) "Only one usable CPU, task migration not possible"); } +static void help(const char *name) +{ + puts(""); + printf("usage: %s [-h] [-u]\n", name); + printf(" -u: Don't sanity check the number of successful KVM_RUNs\n"); + puts(""); + exit(0); +} + int main(int argc, char *argv[]) { + bool skip_sanity_check = false; int r, i, snapshot; struct kvm_vm *vm; struct kvm_vcpu *vcpu; u32 cpu, rseq_cpu; + int opt; + + while ((opt = getopt(argc, argv, "hu")) != -1) { + switch (opt) { + case 'u': + skip_sanity_check = true; + break; + case 'h': + default: + help(argv[0]); + break; + } + } r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, @@ -254,9 +284,17 @@ int main(int argc, char *argv[]) * getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a fairly * conservative ratio on x86-64, which can do _more_ KVM_RUNs than * migrations given the 1us+ delay in the migration task. + * + * Another reason why it may have small migration:KVM_RUN ratio is that, + * on systems with large low power mode wakeup latency, it may happen + * quite often that the scheduler is not able to wake up the target CPU + * before the vCPU thread is scheduled to another CPU. */ - TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2), - "Only performed %d KVM_RUNs, task stalled too much?", i); + TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2), + "Only performed %d KVM_RUNs, task stalled too much?\n\n" + " Try disabling deep sleep states to reduce CPU wakeup latency,\n" + " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n" + " or run with -u to disable this sanity check.", i); pthread_join(migration_thread, NULL); diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c index 626a2b8a20..b390338447 100644 --- a/tools/testing/selftests/kvm/s390x/cmma_test.c +++ b/tools/testing/selftests/kvm/s390x/cmma_test.c @@ -7,8 +7,6 @@ * Authors: * Nico Boehr <nrb@linux.ibm.com> */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -18,6 +16,7 @@ #include "test_util.h" #include "kvm_util.h" #include "kselftest.h" +#include "ucall_common.h" #define MAIN_PAGE_COUNT 512 diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 48cb910e66..f2df7416be 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -15,6 +15,7 @@ #include "test_util.h" #include "kvm_util.h" #include "kselftest.h" +#include "ucall_common.h" enum mop_target { LOGICAL, diff --git a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c new file mode 100644 index 0000000000..bba0d9a6dc --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test shared zeropage handling (with/without storage keys) + * + * Copyright (C) 2024, Red Hat, Inc. + */ +#include <sys/mman.h> + +#include <linux/fs.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "kselftest.h" +#include "ucall_common.h" + +static void set_storage_key(void *addr, uint8_t skey) +{ + asm volatile("sske %0,%1" : : "d" (skey), "a" (addr)); +} + +static void guest_code(void) +{ + /* Issue some storage key instruction. */ + set_storage_key((void *)0, 0x98); + GUEST_DONE(); +} + +/* + * Returns 1 if the shared zeropage is mapped, 0 if something else is mapped. + * Returns < 0 on error or if nothing is mapped. + */ +static int maps_shared_zeropage(int pagemap_fd, void *addr) +{ + struct page_region region; + struct pm_scan_arg arg = { + .start = (uintptr_t)addr, + .end = (uintptr_t)addr + 4096, + .vec = (uintptr_t)®ion, + .vec_len = 1, + .size = sizeof(struct pm_scan_arg), + .category_mask = PAGE_IS_PFNZERO, + .category_anyof_mask = PAGE_IS_PRESENT, + .return_mask = PAGE_IS_PFNZERO, + }; + return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg); +} + +int main(int argc, char *argv[]) +{ + char *mem, *page0, *page1, *page2, tmp; + const size_t pagesize = getpagesize(); + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + int pagemap_fd; + + ksft_print_header(); + ksft_set_plan(3); + + /* + * We'll use memory that is not mapped into the VM for simplicity. + * Shared zeropages are enabled/disabled per-process. + */ + mem = mmap(0, 3 * pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); + TEST_ASSERT(mem != MAP_FAILED, "mmap() failed"); + + /* Disable THP. Ignore errors on older kernels. */ + madvise(mem, 3 * pagesize, MADV_NOHUGEPAGE); + + page0 = mem; + page1 = page0 + pagesize; + page2 = page1 + pagesize; + + /* Can we even detect shared zeropages? */ + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + TEST_REQUIRE(pagemap_fd >= 0); + + tmp = *page0; + asm volatile("" : "+r" (tmp)); + TEST_REQUIRE(maps_shared_zeropage(pagemap_fd, page0) == 1); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + /* Verify that we get the shared zeropage after VM creation. */ + tmp = *page1; + asm volatile("" : "+r" (tmp)); + ksft_test_result(maps_shared_zeropage(pagemap_fd, page1) == 1, + "Shared zeropages should be enabled\n"); + + /* + * Let our VM execute a storage key instruction that should + * unshare all shared zeropages. + */ + vcpu_run(vcpu); + get_ucall(vcpu, &uc); + TEST_ASSERT_EQ(uc.cmd, UCALL_DONE); + + /* Verify that we don't have a shared zeropage anymore. */ + ksft_test_result(!maps_shared_zeropage(pagemap_fd, page1), + "Shared zeropage should be gone\n"); + + /* Verify that we don't get any new shared zeropages. */ + tmp = *page2; + asm volatile("" : "+r" (tmp)); + ksft_test_result(!maps_shared_zeropage(pagemap_fd, page2), + "Shared zeropages should be disabled\n"); + + kvm_vm_free(vm); + + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index 43fb25ddc3..53def355cc 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -10,8 +10,6 @@ * * Test expected behavior of the KVM_CAP_SYNC_REGS functionality. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c index c73f948c9b..7a742a673b 100644 --- a/tools/testing/selftests/kvm/s390x/tprot.c +++ b/tools/testing/selftests/kvm/s390x/tprot.c @@ -8,6 +8,7 @@ #include "test_util.h" #include "kvm_util.h" #include "kselftest.h" +#include "ucall_common.h" #define PAGE_SHIFT 12 #define PAGE_SIZE (1 << PAGE_SHIFT) diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index bd57d991e2..bb8002084f 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <pthread.h> #include <sched.h> @@ -221,8 +220,20 @@ static void test_move_memory_region(void) static void guest_code_delete_memory_region(void) { + struct desc_ptr idt; uint64_t val; + /* + * Clobber the IDT so that a #PF due to the memory region being deleted + * escalates to triple-fault shutdown. Because the memory region is + * deleted, there will be no valid mappings. As a result, KVM will + * repeatedly intercepts the state-2 page fault that occurs when trying + * to vector the guest's #PF. I.e. trying to actually handle the #PF + * in the guest will never succeed, and so isn't an option. + */ + memset(&idt, 0, sizeof(idt)); + __asm__ __volatile__("lidt %0" :: "m"(idt)); + GUEST_SYNC(0); /* Spin until the memory region is deleted. */ @@ -339,7 +350,7 @@ static void test_invalid_memory_region_flags(void) #ifdef __x86_64__ if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) - vm = vm_create_barebones_protected_vm(); + vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); else #endif vm = vm_create_barebones(); @@ -462,7 +473,7 @@ static void test_add_private_memory_region(void) pr_info("Testing ADD of KVM_MEM_GUEST_MEMFD memory regions\n"); - vm = vm_create_barebones_protected_vm(); + vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); test_invalid_guest_memfd(vm, vm->kvm_fd, 0, "KVM fd should fail"); test_invalid_guest_memfd(vm, vm->fd, 0, "VM's fd should fail"); @@ -471,7 +482,7 @@ static void test_add_private_memory_region(void) test_invalid_guest_memfd(vm, memfd, 0, "Regular memfd() should fail"); close(memfd); - vm2 = vm_create_barebones_protected_vm(); + vm2 = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); memfd = vm_create_guest_memfd(vm2, MEM_REGION_SIZE, 0); test_invalid_guest_memfd(vm, memfd, 0, "Other VM's guest_memfd() should fail"); @@ -499,7 +510,7 @@ static void test_add_overlapping_private_memory_regions(void) pr_info("Testing ADD of overlapping KVM_MEM_GUEST_MEMFD memory regions\n"); - vm = vm_create_barebones_protected_vm(); + vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM); memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE * 4, 0); diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index bae0c5026f..a8d3afa0b8 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -4,20 +4,22 @@ * * Copyright (C) 2020, Red Hat, Inc. */ -#define _GNU_SOURCE #include <stdio.h> #include <time.h> #include <sched.h> #include <pthread.h> #include <linux/kernel.h> #include <asm/kvm.h> -#ifndef __riscv +#ifdef __riscv +#include "sbi.h" +#else #include <asm/kvm_para.h> #endif #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "ucall_common.h" #define NR_VCPUS 4 #define ST_GPA_BASE (1 << 30) @@ -83,20 +85,18 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) { struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); - int i; - pr_info("VCPU%d:\n", vcpu_idx); - pr_info(" steal: %lld\n", st->steal); - pr_info(" version: %d\n", st->version); - pr_info(" flags: %d\n", st->flags); - pr_info(" preempted: %d\n", st->preempted); - pr_info(" u8_pad: "); - for (i = 0; i < 3; ++i) - pr_info("%d", st->u8_pad[i]); - pr_info("\n pad: "); - for (i = 0; i < 11; ++i) - pr_info("%d", st->pad[i]); - pr_info("\n"); + ksft_print_msg("VCPU%d:\n", vcpu_idx); + ksft_print_msg(" steal: %lld\n", st->steal); + ksft_print_msg(" version: %d\n", st->version); + ksft_print_msg(" flags: %d\n", st->flags); + ksft_print_msg(" preempted: %d\n", st->preempted); + ksft_print_msg(" u8_pad: %d %d %d\n", + st->u8_pad[0], st->u8_pad[1], st->u8_pad[2]); + ksft_print_msg(" pad: %d %d %d %d %d %d %d %d %d %d %d\n", + st->pad[0], st->pad[1], st->pad[2], st->pad[3], + st->pad[4], st->pad[5], st->pad[6], st->pad[7], + st->pad[8], st->pad[9], st->pad[10]); } #elif defined(__aarch64__) @@ -199,10 +199,10 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) { struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); - pr_info("VCPU%d:\n", vcpu_idx); - pr_info(" rev: %d\n", st->rev); - pr_info(" attr: %d\n", st->attr); - pr_info(" st_time: %ld\n", st->st_time); + ksft_print_msg("VCPU%d:\n", vcpu_idx); + ksft_print_msg(" rev: %d\n", st->rev); + ksft_print_msg(" attr: %d\n", st->attr); + ksft_print_msg(" st_time: %ld\n", st->st_time); } #elif defined(__riscv) @@ -366,7 +366,9 @@ int main(int ac, char **av) vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages); + ksft_print_header(); TEST_REQUIRE(is_steal_time_supported(vcpus[0])); + ksft_set_plan(NR_VCPUS); /* Run test on each VCPU */ for (i = 0; i < NR_VCPUS; ++i) { @@ -407,14 +409,15 @@ int main(int ac, char **av) run_delay, stolen_time); if (verbose) { - pr_info("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld", i, - guest_stolen_time[i], stolen_time); - if (stolen_time == run_delay) - pr_info(" (BONUS: guest test-stolen-time even exactly matches test-run_delay)"); - pr_info("\n"); + ksft_print_msg("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld%s\n", + i, guest_stolen_time[i], stolen_time, + stolen_time == run_delay ? + " (BONUS: guest test-stolen-time even exactly matches test-run_delay)" : ""); steal_time_dump(vm, i); } + ksft_test_result_pass("vcpu%d\n", i); } - return 0; + /* Print results and exit() accordingly */ + ksft_finished(); } diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index eae521f050..903940c54d 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -6,8 +6,6 @@ * * Tests for amx #NM exception and save/restore. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -246,8 +244,6 @@ int main(int argc, char *argv[]) vcpu_regs_get(vcpu, ®s1); /* Register #NM handler */ - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler); /* amx cfg for guest_code */ diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c index ee3b384b99..2929c067c2 100644 --- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c +++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c @@ -17,6 +17,7 @@ #include "test_util.h" #include "memstress.h" #include "guest_modes.h" +#include "ucall_common.h" #define VCPUS 2 #define SLOTS 2 diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c index 6c2e5e0ceb..81055476d3 100644 --- a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c +++ b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c @@ -4,12 +4,9 @@ * * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ - #include "flds_emulation.h" - #include "test_util.h" +#include "ucall_common.h" #define MMIO_GPA 0x700000000 #define MMIO_GVA MMIO_GPA diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c index f3c2239228..762628f7d4 100644 --- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c +++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c @@ -110,8 +110,6 @@ static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk) { struct kvm_vm *vm = vcpu->vm; - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler); if (disable_quirk) diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c index df351ae170..10b1b0ba37 100644 --- a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c +++ b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c @@ -2,8 +2,6 @@ /* * Copyright (C) 2023, Google LLC. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <sys/ioctl.h> #include "test_util.h" diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index 5c27efbf40..4f5881d4ef 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -7,8 +7,6 @@ * This work is licensed under the terms of the GNU GPL, version 2. * */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c index 4c7257ecd2..e192720bfe 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c @@ -4,7 +4,6 @@ * * Tests for Enlightened VMCS, including nested guest state. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -258,8 +257,6 @@ int main(int argc, char *argv[]) vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index b923a285e9..068e9c6971 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -156,9 +156,6 @@ static void guest_test_msrs_access(void) vcpu_init_cpuid(vcpu, prev_cpuid); } - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - /* TODO: Make this entire test easier to maintain. */ if (stage >= 21) vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0); @@ -532,9 +529,6 @@ static void guest_test_hcalls_access(void) while (true) { vm = vm_create_with_one_vcpu(&vcpu, guest_hcall); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - /* Hypercall input/output */ hcall_page = vm_vaddr_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c index f1617762c2..22c0c12458 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c @@ -5,8 +5,6 @@ * Copyright (C) 2022, Red Hat, Inc. * */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <pthread.h> #include <inttypes.h> @@ -256,16 +254,13 @@ int main(int argc, char *argv[]) hcall_page = vm_vaddr_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); - vm_init_descriptor_tables(vm); vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code); - vcpu_init_descriptor_tables(vcpu[1]); vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1); vcpu_set_hv_cpuid(vcpu[1]); vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code); - vcpu_init_descriptor_tables(vcpu[2]); vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page)); vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2); vcpu_set_hv_cpuid(vcpu[2]); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index c9b18707ed..b987a3d797 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -4,7 +4,6 @@ * * Tests for Hyper-V extensions to SVM. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c index 05b56095cf..077cd0ec30 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c @@ -5,8 +5,6 @@ * Copyright (C) 2022, Red Hat, Inc. * */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <asm/barrier.h> #include <pthread.h> #include <inttypes.h> diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c index 40cc59f4e6..78878b3a27 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -183,9 +183,6 @@ int main(void) vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - enter_guest(vcpu); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c index 853802641e..2b550eff35 100644 --- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c @@ -75,14 +75,12 @@ int main(int argc, char *argv[]) struct ucall uc; int testcase; + TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - while (1) { vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c index 3670331adf..3eb0313ffa 100644 --- a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c +++ b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c @@ -1,6 +1,4 @@ // SPDX-License-Identifier: GPL-2.0-only -#define _GNU_SOURCE /* for program_invocation_short_name */ - #include "test_util.h" #include "kvm_util.h" #include "processor.h" diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 17bbb96fc4..e7efb2b35f 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -5,9 +5,6 @@ * * Copyright (C) 2022, Google LLC. */ - -#define _GNU_SOURCE - #include <fcntl.h> #include <stdint.h> #include <time.h> diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh index 7cbb409801..caad084b8b 100755 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh @@ -13,10 +13,21 @@ NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_reco NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms) HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages) +# If we're already root, the host might not have sudo. +if [ $(whoami) == "root" ]; then + function do_sudo () { + "$@" + } +else + function do_sudo () { + sudo "$@" + } +fi + set +e function sudo_echo () { - echo "$1" | sudo tee -a "$2" > /dev/null + echo "$1" | do_sudo tee -a "$2" > /dev/null } NXECUTABLE="$(dirname $0)/nx_huge_pages_test" diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index 87011965dc..eda88080c1 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -9,8 +9,6 @@ * Verifies expected behavior of controlling guest access to * MSR_PLATFORM_INFO. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -26,36 +24,18 @@ static void guest_code(void) { uint64_t msr_platform_info; + uint8_t vector; - for (;;) { - msr_platform_info = rdmsr(MSR_PLATFORM_INFO); - GUEST_SYNC(msr_platform_info); - asm volatile ("inc %r11"); - } -} - -static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, true); - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + GUEST_SYNC(true); + msr_platform_info = rdmsr(MSR_PLATFORM_INFO); + GUEST_ASSERT_EQ(msr_platform_info & MSR_PLATFORM_INFO_MAX_TURBO_RATIO, + MSR_PLATFORM_INFO_MAX_TURBO_RATIO); - get_ucall(vcpu, &uc); - TEST_ASSERT(uc.cmd == UCALL_SYNC, - "Received ucall other than UCALL_SYNC: %lu", uc.cmd); - TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) == - MSR_PLATFORM_INFO_MAX_TURBO_RATIO, - "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.", - MSR_PLATFORM_INFO_MAX_TURBO_RATIO); -} + GUEST_SYNC(false); + vector = rdmsr_safe(MSR_PLATFORM_INFO, &msr_platform_info); + GUEST_ASSERT_EQ(vector, GP_VECTOR); -static void test_msr_platform_info_disabled(struct kvm_vcpu *vcpu) -{ - vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, false); - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN); + GUEST_DONE(); } int main(int argc, char *argv[]) @@ -63,6 +43,7 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; struct kvm_vm *vm; uint64_t msr_platform_info; + struct ucall uc; TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO)); @@ -71,8 +52,26 @@ int main(int argc, char *argv[]) msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO); vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO); - test_msr_platform_info_enabled(vcpu); - test_msr_platform_info_disabled(vcpu); + + for (;;) { + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + vm_enable_cap(vm, KVM_CAP_MSR_PLATFORM_INFO, uc.args[1]); + break; + case UCALL_DONE: + goto done; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unexpected ucall %lu", uc.cmd); + break; + } + } + +done: vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c index 26c85815f7..96446134c0 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c @@ -2,8 +2,6 @@ /* * Copyright (C) 2023, Tencent, Inc. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <x86intrin.h> #include "pmu.h" @@ -21,7 +19,6 @@ static uint8_t kvm_pmu_version; static bool kvm_has_perf_caps; -static bool is_forced_emulation_enabled; static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, void *guest_code, @@ -31,11 +28,7 @@ static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, struct kvm_vm *vm; vm = vm_create_with_one_vcpu(vcpu, guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(*vcpu); - sync_global_to_guest(vm, kvm_pmu_version); - sync_global_to_guest(vm, is_forced_emulation_enabled); /* * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling @@ -630,7 +623,6 @@ int main(int argc, char *argv[]) kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM); - is_forced_emulation_enabled = kvm_is_forced_emulation_enabled(); test_intel_counters(); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 3c85d1ae98..26b3e7efe5 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -9,9 +9,6 @@ * Verifies the expected behavior of allow lists and deny lists for * virtual PMU events. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ - #include "kvm_util.h" #include "pmu.h" #include "processor.h" @@ -337,9 +334,6 @@ static void test_pmu_config_disable(void (*guest_code)(void)) vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE); vcpu = vm_vcpu_add(vm, 0, guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - TEST_ASSERT(!sanity_check_pmu(vcpu), "Guest should not be able to use disabled PMU."); @@ -876,9 +870,6 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - TEST_REQUIRE(sanity_check_pmu(vcpu)); if (use_amd_pmu()) diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c index e0f642d2a3..82a8d88b53 100644 --- a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c +++ b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c @@ -2,7 +2,6 @@ /* * Copyright (C) 2022, Google LLC. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <limits.h> #include <pthread.h> diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c index 366cf18600..d691d86e5b 100644 --- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c +++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c @@ -4,7 +4,6 @@ * * Copyright (C) 2020, Red Hat, Inc. */ -#define _GNU_SOURCE /* for program_invocation_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c index 3610981d91..c021c0795a 100644 --- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c @@ -10,7 +10,6 @@ * That bug allowed a user-mode program that called the KVM_SET_SREGS * ioctl to put a VCPU's local APIC into an invalid state. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c new file mode 100644 index 0000000000..3fb967f40c --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/kvm.h> +#include <linux/psp-sev.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <stdlib.h> +#include <errno.h> +#include <pthread.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "kselftest.h" + +#define SVM_SEV_FEAT_DEBUG_SWAP 32u + +/* + * Some features may have hidden dependencies, or may only work + * for certain VM types. Err on the side of safety and don't + * expect that all supported features can be passed one by one + * to KVM_SEV_INIT2. + * + * (Well, right now there's only one...) + */ +#define KNOWN_FEATURES SVM_SEV_FEAT_DEBUG_SWAP + +int kvm_fd; +u64 supported_vmsa_features; +bool have_sev_es; + +static int __sev_ioctl(int vm_fd, int cmd_id, void *data) +{ + struct kvm_sev_cmd cmd = { + .id = cmd_id, + .data = (uint64_t)data, + .sev_fd = open_sev_dev_path_or_exit(), + }; + int ret; + + ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd); + TEST_ASSERT(ret < 0 || cmd.error == SEV_RET_SUCCESS, + "%d failed: fw error: %d\n", + cmd_id, cmd.error); + + return ret; +} + +static void test_init2(unsigned long vm_type, struct kvm_sev_init *init) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create_barebones_type(vm_type); + ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init); + TEST_ASSERT(ret == 0, + "KVM_SEV_INIT2 return code is %d (expected 0), errno: %d", + ret, errno); + kvm_vm_free(vm); +} + +static void test_init2_invalid(unsigned long vm_type, struct kvm_sev_init *init, const char *msg) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create_barebones_type(vm_type); + ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "KVM_SEV_INIT2 should fail, %s.", + msg); + kvm_vm_free(vm); +} + +void test_vm_types(void) +{ + test_init2(KVM_X86_SEV_VM, &(struct kvm_sev_init){}); + + /* + * TODO: check that unsupported types cannot be created. Probably + * a separate selftest. + */ + if (have_sev_es) + test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){}); + + test_init2_invalid(0, &(struct kvm_sev_init){}, + "VM type is KVM_X86_DEFAULT_VM"); + if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) + test_init2_invalid(KVM_X86_SW_PROTECTED_VM, &(struct kvm_sev_init){}, + "VM type is KVM_X86_SW_PROTECTED_VM"); +} + +void test_flags(uint32_t vm_type) +{ + int i; + + for (i = 0; i < 32; i++) + test_init2_invalid(vm_type, + &(struct kvm_sev_init){ .flags = BIT(i) }, + "invalid flag"); +} + +void test_features(uint32_t vm_type, uint64_t supported_features) +{ + int i; + + for (i = 0; i < 64; i++) { + if (!(supported_features & BIT_ULL(i))) + test_init2_invalid(vm_type, + &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }, + "unknown feature"); + else if (KNOWN_FEATURES & BIT_ULL(i)) + test_init2(vm_type, + &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) }); + } +} + +int main(int argc, char *argv[]) +{ + int kvm_fd = open_kvm_dev_path_or_exit(); + bool have_sev; + + TEST_REQUIRE(__kvm_has_device_attr(kvm_fd, KVM_X86_GRP_SEV, + KVM_X86_SEV_VMSA_FEATURES) == 0); + kvm_device_attr_get(kvm_fd, KVM_X86_GRP_SEV, + KVM_X86_SEV_VMSA_FEATURES, + &supported_vmsa_features); + + have_sev = kvm_cpu_has(X86_FEATURE_SEV); + TEST_ASSERT(have_sev == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)), + "sev: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)", + kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM); + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)); + have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES); + + TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)), + "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)", + kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM); + + test_vm_types(); + + test_flags(KVM_X86_SEV_VM); + if (have_sev_es) + test_flags(KVM_X86_SEV_ES_VM); + + test_features(KVM_X86_SEV_VM, 0); + if (have_sev_es) + test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c index 026779f3ed..7c70c0da4f 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c @@ -4,6 +4,7 @@ #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> +#include <math.h> #include "test_util.h" #include "kvm_util.h" @@ -13,6 +14,8 @@ #include "sev.h" +#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM) + static void guest_sev_es_code(void) { /* TODO: Check CPUID after GHCB-based hypercall support is added. */ @@ -35,13 +38,98 @@ static void guest_sev_code(void) GUEST_DONE(); } +/* Stash state passed via VMSA before any compiled code runs. */ +extern void guest_code_xsave(void); +asm("guest_code_xsave:\n" + "mov $-1, %eax\n" + "mov $-1, %edx\n" + "xsave (%rdi)\n" + "jmp guest_sev_es_code"); + +static void compare_xsave(u8 *from_host, u8 *from_guest) +{ + int i; + bool bad = false; + for (i = 0; i < 4095; i++) { + if (from_host[i] != from_guest[i]) { + printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]); + bad = true; + } + } + + if (bad) + abort(); +} + +static void test_sync_vmsa(uint32_t policy) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + vm_vaddr_t gva; + void *hva; + + double x87val = M_PI; + struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 }; + struct kvm_sregs sregs; + struct kvm_xcrs xcrs = { + .nr_xcrs = 1, + .xcrs[0].xcr = 0, + .xcrs[0].value = XFEATURE_MASK_X87_AVX, + }; + + vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu); + gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, + MEM_REGION_TEST_DATA); + hva = addr_gva2hva(vm, gva); + + vcpu_args_set(vcpu, 1, gva); + + vcpu_sregs_get(vcpu, &sregs); + sregs.cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXSAVE; + vcpu_sregs_set(vcpu, &sregs); + + vcpu_xcrs_set(vcpu, &xcrs); + asm("fninit\n" + "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n" + "fldl %3\n" + "xsave (%2)\n" + "fstp %%st\n" + : "=m"(xsave) + : "A"(XFEATURE_MASK_X87_AVX), "r"(&xsave), "m" (x87val) + : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"); + vcpu_xsave_set(vcpu, &xsave); + + vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL); + + /* This page is shared, so make it decrypted. */ + memset(hva, 0, 4096); + + vcpu_run(vcpu); + + TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT, + "Wanted SYSTEM_EVENT, got %s", + exit_reason_str(vcpu->run->exit_reason)); + TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM); + TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1); + TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ); + + compare_xsave((u8 *)&xsave, (u8 *)hva); + + kvm_vm_free(vm); +} + static void test_sev(void *guest_code, uint64_t policy) { struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; - vm = vm_sev_create_with_one_vcpu(policy, guest_code, &vcpu); + uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; + + vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu); + + /* TODO: Validate the measurement is as expected. */ + vm_sev_launch(vm, policy, NULL); for (;;) { vcpu_run(vcpu); @@ -82,6 +170,12 @@ int main(int argc, char *argv[]) if (kvm_cpu_has(X86_FEATURE_SEV_ES)) { test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG); test_sev(guest_sev_es_code, SEV_POLICY_ES); + + if (kvm_has_cap(KVM_CAP_XCRS) && + (xgetbv(0) & XFEATURE_MASK_X87_AVX) == XFEATURE_MASK_X87_AVX) { + test_sync_vmsa(0); + test_sync_vmsa(SEV_POLICY_NO_DBG); + } } return 0; diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c index 416207c38a..fabeeaddfb 100644 --- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c @@ -5,9 +5,6 @@ * Test that KVM emulates instructions in response to EPT violations when * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ - #include "flds_emulation.h" #include "test_util.h" @@ -60,9 +57,6 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, guest_code); vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled()); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR); rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE); diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index e18b86666e..55c88d664a 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -4,7 +4,6 @@ * * Tests for SMM. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 88b58aab72..1c756db329 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -6,7 +6,6 @@ * * Tests for vCPU state save/restore, including nested guest state. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c index 32bef39bec..916e04248f 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c @@ -93,9 +93,6 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler); vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler); diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c index d6fcdcc3af..00135cbba3 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c @@ -48,12 +48,9 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vcpu_alloc_svm(vm, &svm_gva); - vcpu_args_set(vcpu, 2, svm_gva, vm->idt); + vcpu_args_set(vcpu, 2, svm_gva, vm->arch.idt); vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN); diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c index 0c7ce3d4e8..7b6481d6c0 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c @@ -152,9 +152,6 @@ static void run_test(bool is_nmi) vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler); vm_install_exception_handler(vm, INT_NR, guest_int_handler); @@ -166,7 +163,7 @@ static void run_test(bool is_nmi) idt_alt_vm = vm_vaddr_alloc_page(vm); idt_alt = addr_gva2hva(vm, idt_alt_vm); - idt = addr_gva2hva(vm, vm->idt); + idt = addr_gva2hva(vm, vm->arch.idt); memcpy(idt_alt, idt, getpagesize()); } else { idt_alt_vm = 0; diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index adb5593daf..8fa3948b01 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -8,8 +8,6 @@ * including requesting an invalid register set, updates to/from values * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c index dcbb3c29fb..57f157c06b 100644 --- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c +++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c @@ -17,14 +17,11 @@ * delivered into the guest or not. * */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <pthread.h> #include <inttypes.h> #include <string.h> #include <time.h> -#include "kvm_util_base.h" #include "kvm_util.h" #include "mce.h" #include "processor.h" @@ -285,10 +282,6 @@ int main(int argc, char *argv[]) cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code); cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(ucna_vcpu); - vcpu_init_descriptor_tables(cmcidis_vcpu); - vcpu_init_descriptor_tables(cmci_vcpu); vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler); vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c index f4f61a2d24..32b2794b78 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c @@ -4,8 +4,6 @@ * * Tests for exiting into userspace on registered MSRs */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <sys/ioctl.h> #include "kvm_test_harness.h" @@ -13,8 +11,6 @@ #include "kvm_util.h" #include "vmx.h" -static bool fep_available; - #define MSR_NON_EXISTENT 0x474f4f00 static u64 deny_bits = 0; @@ -258,7 +254,7 @@ static void guest_code_filter_allow(void) GUEST_ASSERT(data == 2); GUEST_ASSERT(guest_exception_count == 0); - if (fep_available) { + if (is_forced_emulation_enabled) { /* Let userspace know we aren't done. */ GUEST_SYNC(0); @@ -520,8 +516,6 @@ KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow) uint64_t cmd; int rc; - sync_global_to_guest(vm, fep_available); - rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER); @@ -531,9 +525,6 @@ KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow) vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); /* Process guest code userspace exits. */ @@ -551,7 +542,7 @@ KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow) vcpu_run(vcpu); cmd = process_ucall(vcpu); - if (fep_available) { + if (is_forced_emulation_enabled) { TEST_ASSERT_EQ(cmd, UCALL_SYNC); vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler); @@ -774,7 +765,5 @@ KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL) int main(int argc, char *argv[]) { - fep_available = kvm_is_forced_emulation_enabled(); - return test_harness_run(argc, argv); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index 977948fd52..fa512d0332 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -4,9 +4,6 @@ * * Copyright (C) 2018, Red Hat, Inc. */ - -#define _GNU_SOURCE /* for program_invocation_name */ - #include <stdio.h> #include <stdlib.h> #include <linux/bitmap.h> diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c index fad3634fd9..3fd6eceab4 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c @@ -115,9 +115,6 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, guest_code); get_set_sigalrm_vcpu(vcpu); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); /* diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index ea0cb3cae0..7c92536551 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -10,7 +10,6 @@ * and check it can be retrieved with KVM_GET_MSR, also test * the invalid LBR formats are rejected. */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <sys/ioctl.h> #include <linux/bitmap.h> @@ -86,9 +85,6 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code) struct ucall uc; int r, i; - vm_init_descriptor_tables(vcpu->vm); - vcpu_init_descriptor_tables(vcpu); - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); vcpu_args_set(vcpu, 1, host_cap.capabilities); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c index affc328001..00dd2ac07a 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c @@ -9,7 +9,6 @@ * value instead of partially decayed timer value * */ -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c index 725c206ba0..a76078a08f 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c @@ -19,8 +19,6 @@ * Migration is a command line option. When used on non-numa machines will * exit with error. Test is still usefull on non-numa for testing IPIs. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <getopt.h> #include <pthread.h> #include <inttypes.h> @@ -410,8 +408,6 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(params[0].vcpu); vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index ab75b873a4..69849acd95 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0-only -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c index 25a0b0db5c..95ce192d07 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c @@ -109,9 +109,6 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&vcpu, guest_code); run = vcpu->run; - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); - while (1) { vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index d2ea0435f4..a59b3c799b 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -125,7 +125,7 @@ struct compat_vcpu_runstate_info { uint32_t state; uint64_t state_entry_time; uint64_t time[5]; -} __attribute__((__packed__));; +} __attribute__((__packed__)); struct arch_vcpu_info { unsigned long cr2; @@ -171,8 +171,9 @@ static volatile bool guest_saw_irq; static void evtchn_handler(struct ex_regs *regs) { struct vcpu_info *vi = (void *)VCPU_INFO_VADDR; - vi->evtchn_upcall_pending = 0; - vi->evtchn_pending_sel = 0; + + vcpu_arch_put_guest(vi->evtchn_upcall_pending, 0); + vcpu_arch_put_guest(vi->evtchn_pending_sel, 0); guest_saw_irq = true; GUEST_SYNC(TEST_GUEST_SAW_IRQ); @@ -380,20 +381,6 @@ wait_for_timer: GUEST_SYNC(TEST_DONE); } -static int cmp_timespec(struct timespec *a, struct timespec *b) -{ - if (a->tv_sec > b->tv_sec) - return 1; - else if (a->tv_sec < b->tv_sec) - return -1; - else if (a->tv_nsec > b->tv_nsec) - return 1; - else if (a->tv_nsec < b->tv_nsec) - return -1; - else - return 0; -} - static struct shared_info *shinfo; static struct vcpu_info *vinfo; static struct kvm_vcpu *vcpu; @@ -449,7 +436,6 @@ static void *juggle_shinfo_state(void *arg) int main(int argc, char *argv[]) { - struct timespec min_ts, max_ts, vm_ts; struct kvm_xen_hvm_attr evt_reset; struct kvm_vm *vm; pthread_t thread; @@ -468,8 +454,6 @@ int main(int argc, char *argv[]) bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND); bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA); - clock_gettime(CLOCK_REALTIME, &min_ts); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); /* Map a region for the shared_info page */ @@ -553,8 +537,6 @@ int main(int argc, char *argv[]) }; vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); if (do_runstate_tests) { @@ -1010,7 +992,6 @@ int main(int argc, char *argv[]) vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset); alarm(0); - clock_gettime(CLOCK_REALTIME, &max_ts); /* * Just a *really* basic check that things are being put in the @@ -1019,6 +1000,8 @@ int main(int argc, char *argv[]) */ struct pvclock_wall_clock *wc; struct pvclock_vcpu_time_info *ti, *ti2; + struct kvm_clock_data kcdata; + long long delta; wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00); ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20); @@ -1034,12 +1017,34 @@ int main(int argc, char *argv[]) ti2->tsc_shift, ti2->flags); } - vm_ts.tv_sec = wc->sec; - vm_ts.tv_nsec = wc->nsec; TEST_ASSERT(wc->version && !(wc->version & 1), "Bad wallclock version %x", wc->version); - TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); - TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); + + vm_ioctl(vm, KVM_GET_CLOCK, &kcdata); + + if (kcdata.flags & KVM_CLOCK_REALTIME) { + if (verbose) { + printf("KVM_GET_CLOCK clock: %lld.%09lld\n", + kcdata.clock / NSEC_PER_SEC, kcdata.clock % NSEC_PER_SEC); + printf("KVM_GET_CLOCK realtime: %lld.%09lld\n", + kcdata.realtime / NSEC_PER_SEC, kcdata.realtime % NSEC_PER_SEC); + } + + delta = (wc->sec * NSEC_PER_SEC + wc->nsec) - (kcdata.realtime - kcdata.clock); + + /* + * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but + * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s + * delta as testing clock accuracy is not the goal here. The test just needs to + * check that the value in shinfo is somewhat sane. + */ + TEST_ASSERT(llabs(delta) < NSEC_PER_SEC, + "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld", + wc->sec, wc->nsec, (kcdata.realtime - kcdata.clock) / NSEC_PER_SEC, + (kcdata.realtime - kcdata.clock) % NSEC_PER_SEC); + } else { + pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n"); + } TEST_ASSERT(ti->version && !(ti->version & 1), "Bad time_info version %x", ti->version); diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c index 167c97abff..f331a4e9ba 100644 --- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c +++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c @@ -4,8 +4,6 @@ * * Tests for the IA32_XSS MSR. */ - -#define _GNU_SOURCE /* for program_invocation_short_name */ #include <sys/ioctl.h> #include "test_util.h" diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c index a6f89aaea7..3b26bf3cf5 100644 --- a/tools/testing/selftests/landlock/base_test.c +++ b/tools/testing/selftests/landlock/base_test.c @@ -9,6 +9,7 @@ #define _GNU_SOURCE #include <errno.h> #include <fcntl.h> +#include <linux/keyctl.h> #include <linux/landlock.h> #include <string.h> #include <sys/prctl.h> @@ -75,7 +76,7 @@ TEST(abi_version) const struct landlock_ruleset_attr ruleset_attr = { .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, }; - ASSERT_EQ(4, landlock_create_ruleset(NULL, 0, + ASSERT_EQ(5, landlock_create_ruleset(NULL, 0, LANDLOCK_CREATE_RULESET_VERSION)); ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, @@ -326,4 +327,77 @@ TEST(ruleset_fd_transfer) ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status)); } +TEST(cred_transfer) +{ + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR, + }; + int ruleset_fd, dir_fd; + pid_t child; + int status; + + drop_caps(_metadata); + + dir_fd = open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC); + EXPECT_LE(0, dir_fd); + EXPECT_EQ(0, close(dir_fd)); + + /* Denies opening directories. */ + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); + EXPECT_EQ(0, close(ruleset_fd)); + + /* Checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); + + /* Needed for KEYCTL_SESSION_TO_PARENT permission checks */ + EXPECT_NE(-1, syscall(__NR_keyctl, KEYCTL_JOIN_SESSION_KEYRING, NULL, 0, + 0, 0)) + { + TH_LOG("Failed to join session keyring: %s", strerror(errno)); + } + + child = fork(); + ASSERT_LE(0, child); + if (child == 0) { + /* Checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); + + /* + * KEYCTL_SESSION_TO_PARENT is a no-op unless we have a + * different session keyring in the child, so make that happen. + */ + EXPECT_NE(-1, syscall(__NR_keyctl, KEYCTL_JOIN_SESSION_KEYRING, + NULL, 0, 0, 0)); + + /* + * KEYCTL_SESSION_TO_PARENT installs credentials on the parent + * that never go through the cred_prepare hook, this path uses + * cred_transfer instead. + */ + EXPECT_EQ(0, syscall(__NR_keyctl, KEYCTL_SESSION_TO_PARENT, 0, + 0, 0, 0)); + + /* Re-checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); + + _exit(_metadata->exit_code); + return; + } + + EXPECT_EQ(child, waitpid(child, &status, 0)); + EXPECT_EQ(1, WIFEXITED(status)); + EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status)); + + /* Re-checks ruleset enforcement. */ + EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); + EXPECT_EQ(EACCES, errno); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config index 0086efaa7b..29af19c4e9 100644 --- a/tools/testing/selftests/landlock/config +++ b/tools/testing/selftests/landlock/config @@ -2,6 +2,7 @@ CONFIG_CGROUPS=y CONFIG_CGROUP_SCHED=y CONFIG_INET=y CONFIG_IPV6=y +CONFIG_KEYS=y CONFIG_NET=y CONFIG_NET_NS=y CONFIG_OVERLAY_FS=y diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 27744524df..7d063c652b 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -8,22 +8,35 @@ */ #define _GNU_SOURCE +#include <asm/termbits.h> #include <fcntl.h> #include <libgen.h> +#include <linux/fiemap.h> #include <linux/landlock.h> #include <linux/magic.h> #include <sched.h> +#include <stddef.h> #include <stdio.h> #include <string.h> #include <sys/capability.h> +#include <sys/ioctl.h> #include <sys/mount.h> #include <sys/prctl.h> #include <sys/sendfile.h> +#include <sys/socket.h> #include <sys/stat.h> #include <sys/sysmacros.h> +#include <sys/un.h> #include <sys/vfs.h> #include <unistd.h> +/* + * Intentionally included last to work around header conflict. + * See https://sourceware.org/glibc/wiki/Synchronizing_Headers. + */ +#include <linux/fs.h> +#include <linux/mount.h> + #include "common.h" #ifndef renameat2 @@ -35,6 +48,13 @@ int renameat2(int olddirfd, const char *oldpath, int newdirfd, } #endif +#ifndef open_tree +int open_tree(int dfd, const char *filename, unsigned int flags) +{ + return syscall(__NR_open_tree, dfd, filename, flags); +} +#endif + #ifndef RENAME_EXCHANGE #define RENAME_EXCHANGE (1 << 1) #endif @@ -536,9 +556,10 @@ TEST_F_FORK(layout1, inval) LANDLOCK_ACCESS_FS_EXECUTE | \ LANDLOCK_ACCESS_FS_WRITE_FILE | \ LANDLOCK_ACCESS_FS_READ_FILE | \ - LANDLOCK_ACCESS_FS_TRUNCATE) + LANDLOCK_ACCESS_FS_TRUNCATE | \ + LANDLOCK_ACCESS_FS_IOCTL_DEV) -#define ACCESS_LAST LANDLOCK_ACCESS_FS_TRUNCATE +#define ACCESS_LAST LANDLOCK_ACCESS_FS_IOCTL_DEV #define ACCESS_ALL ( \ ACCESS_FILE | \ @@ -743,6 +764,9 @@ static int create_ruleset(struct __test_metadata *const _metadata, } for (i = 0; rules[i].path; i++) { + if (!rules[i].access) + continue; + add_path_beneath(_metadata, ruleset_fd, rules[i].access, rules[i].path); } @@ -2384,6 +2408,43 @@ TEST_F_FORK(layout1, refer_denied_by_default4) layer_dir_s1d1_refer); } +/* + * Tests walking through a denied root mount. + */ +TEST_F_FORK(layout1, refer_mount_root_deny) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_DIR, + }; + int root_fd, ruleset_fd; + + /* Creates a mount object from a non-mount point. */ + set_cap(_metadata, CAP_SYS_ADMIN); + root_fd = + open_tree(AT_FDCWD, dir_s1d1, + AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); + clear_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_LE(0, root_fd); + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); + EXPECT_EQ(0, close(ruleset_fd)); + + /* Link denied by Landlock: EACCES. */ + EXPECT_EQ(-1, linkat(root_fd, ".", root_fd, "does_not_exist", 0)); + EXPECT_EQ(EACCES, errno); + + /* renameat2() always returns EBUSY. */ + EXPECT_EQ(-1, renameat2(root_fd, ".", root_fd, "does_not_exist", 0)); + EXPECT_EQ(EBUSY, errno); + + EXPECT_EQ(0, close(root_fd)); +} + TEST_F_FORK(layout1, reparent_link) { const struct rule layer1[] = { @@ -3451,7 +3512,7 @@ TEST_F_FORK(layout1, truncate_unhandled) LANDLOCK_ACCESS_FS_WRITE_FILE; int ruleset_fd; - /* Enable Landlock. */ + /* Enables Landlock. */ ruleset_fd = create_ruleset(_metadata, handled, rules); ASSERT_LE(0, ruleset_fd); @@ -3534,7 +3595,7 @@ TEST_F_FORK(layout1, truncate) LANDLOCK_ACCESS_FS_TRUNCATE; int ruleset_fd; - /* Enable Landlock. */ + /* Enables Landlock. */ ruleset_fd = create_ruleset(_metadata, handled, rules); ASSERT_LE(0, ruleset_fd); @@ -3760,7 +3821,7 @@ TEST_F_FORK(ftruncate, open_and_ftruncate) }; int fd, ruleset_fd; - /* Enable Landlock. */ + /* Enables Landlock. */ ruleset_fd = create_ruleset(_metadata, variant->handled, rules); ASSERT_LE(0, ruleset_fd); enforce_ruleset(_metadata, ruleset_fd); @@ -3837,20 +3898,469 @@ TEST_F_FORK(ftruncate, open_and_ftruncate_in_different_processes) ASSERT_EQ(0, close(socket_fds[1])); } -TEST(memfd_ftruncate) +/* Invokes the FS_IOC_GETFLAGS IOCTL and returns its errno or 0. */ +static int test_fs_ioc_getflags_ioctl(int fd) { - int fd; + uint32_t flags; + + if (ioctl(fd, FS_IOC_GETFLAGS, &flags) < 0) + return errno; + return 0; +} + +TEST(memfd_ftruncate_and_ioctl) +{ + const struct landlock_ruleset_attr attr = { + .handled_access_fs = ACCESS_ALL, + }; + int ruleset_fd, fd, i; + + /* + * We exercise the same test both with and without Landlock enabled, to + * ensure that it behaves the same in both cases. + */ + for (i = 0; i < 2; i++) { + /* Creates a new memfd. */ + fd = memfd_create("name", MFD_CLOEXEC); + ASSERT_LE(0, fd); + + /* + * Checks that operations associated with the opened file + * (ftruncate, ioctl) are permitted on file descriptors that are + * created in ways other than open(2). + */ + EXPECT_EQ(0, test_ftruncate(fd)); + EXPECT_EQ(0, test_fs_ioc_getflags_ioctl(fd)); + + ASSERT_EQ(0, close(fd)); + + /* Enables Landlock. */ + ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + } +} + +static int test_fionread_ioctl(int fd) +{ + size_t sz = 0; + + if (ioctl(fd, FIONREAD, &sz) < 0 && errno == EACCES) + return errno; + return 0; +} + +TEST_F_FORK(layout1, o_path_ftruncate_and_ioctl) +{ + const struct landlock_ruleset_attr attr = { + .handled_access_fs = ACCESS_ALL, + }; + int ruleset_fd, fd; + + /* + * Checks that for files opened with O_PATH, both ioctl(2) and + * ftruncate(2) yield EBADF, as it is documented in open(2) for the + * O_PATH flag. + */ + fd = open(dir_s1d1, O_PATH | O_CLOEXEC); + ASSERT_LE(0, fd); + + EXPECT_EQ(EBADF, test_ftruncate(fd)); + EXPECT_EQ(EBADF, test_fs_ioc_getflags_ioctl(fd)); + + ASSERT_EQ(0, close(fd)); + + /* Enables Landlock. */ + ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* + * Checks that after enabling Landlock, + * - the file can still be opened with O_PATH + * - both ioctl and truncate still yield EBADF (not EACCES). + */ + fd = open(dir_s1d1, O_PATH | O_CLOEXEC); + ASSERT_LE(0, fd); + + EXPECT_EQ(EBADF, test_ftruncate(fd)); + EXPECT_EQ(EBADF, test_fs_ioc_getflags_ioctl(fd)); + + ASSERT_EQ(0, close(fd)); +} + +/* + * ioctl_error - generically call the given ioctl with a pointer to a + * sufficiently large zeroed-out memory region. + * + * Returns the IOCTLs error, or 0. + */ +static int ioctl_error(struct __test_metadata *const _metadata, int fd, + unsigned int cmd) +{ + char buf[128]; /* sufficiently large */ + int res, stdinbak_fd; + + /* + * Depending on the IOCTL command, parts of the zeroed-out buffer might + * be interpreted as file descriptor numbers. We do not want to + * accidentally operate on file descriptor 0 (stdin), so we temporarily + * move stdin to a different FD and close FD 0 for the IOCTL call. + */ + stdinbak_fd = dup(0); + ASSERT_LT(0, stdinbak_fd); + ASSERT_EQ(0, close(0)); + + /* Invokes the IOCTL with a zeroed-out buffer. */ + bzero(&buf, sizeof(buf)); + res = ioctl(fd, cmd, &buf); + + /* Restores the old FD 0 and closes the backup FD. */ + ASSERT_EQ(0, dup2(stdinbak_fd, 0)); + ASSERT_EQ(0, close(stdinbak_fd)); + + if (res < 0) + return errno; + + return 0; +} + +/* Define some linux/falloc.h IOCTL commands which are not available in uapi headers. */ +struct space_resv { + __s16 l_type; + __s16 l_whence; + __s64 l_start; + __s64 l_len; /* len == 0 means until end of file */ + __s32 l_sysid; + __u32 l_pid; + __s32 l_pad[4]; /* reserved area */ +}; + +#define FS_IOC_RESVSP _IOW('X', 40, struct space_resv) +#define FS_IOC_UNRESVSP _IOW('X', 41, struct space_resv) +#define FS_IOC_RESVSP64 _IOW('X', 42, struct space_resv) +#define FS_IOC_UNRESVSP64 _IOW('X', 43, struct space_resv) +#define FS_IOC_ZERO_RANGE _IOW('X', 57, struct space_resv) + +/* + * Tests a series of blanket-permitted and denied IOCTLs. + */ +TEST_F_FORK(layout1, blanket_permitted_ioctls) +{ + const struct landlock_ruleset_attr attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_IOCTL_DEV, + }; + int ruleset_fd, fd; + + /* Enables Landlock. */ + ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); - fd = memfd_create("name", MFD_CLOEXEC); + fd = open("/dev/null", O_RDWR | O_CLOEXEC); ASSERT_LE(0, fd); /* - * Checks that ftruncate is permitted on file descriptors that are - * created in ways other than open(2). + * Checks permitted commands. + * These ones may return errors, but should not be blocked by Landlock. */ - EXPECT_EQ(0, test_ftruncate(fd)); + EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIOCLEX)); + EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIONCLEX)); + EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIONBIO)); + EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIOASYNC)); + EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIOQSIZE)); + EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIFREEZE)); + EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FITHAW)); + EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FS_IOC_FIEMAP)); + EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIGETBSZ)); + EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FICLONE)); + EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FICLONERANGE)); + EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIDEDUPERANGE)); + EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FS_IOC_GETFSUUID)); + EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FS_IOC_GETFSSYSFSPATH)); + + /* + * Checks blocked commands. + * A call to a blocked IOCTL command always returns EACCES. + */ + EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FIONREAD)); + EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_GETFLAGS)); + EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_SETFLAGS)); + EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_FSGETXATTR)); + EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_FSSETXATTR)); + EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FIBMAP)); + EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_RESVSP)); + EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_RESVSP64)); + EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_UNRESVSP)); + EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_UNRESVSP64)); + EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_ZERO_RANGE)); + + /* Default case is also blocked. */ + EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, 0xc00ffeee)); + + ASSERT_EQ(0, close(fd)); +} + +/* + * Named pipes are not governed by the LANDLOCK_ACCESS_FS_IOCTL_DEV right, + * because they are not character or block devices. + */ +TEST_F_FORK(layout1, named_pipe_ioctl) +{ + pid_t child_pid; + int fd, ruleset_fd; + const char *const path = file1_s1d1; + const struct landlock_ruleset_attr attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_IOCTL_DEV, + }; + + ASSERT_EQ(0, unlink(path)); + ASSERT_EQ(0, mkfifo(path, 0600)); + + /* Enables Landlock. */ + ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* The child process opens the pipe for writing. */ + child_pid = fork(); + ASSERT_NE(-1, child_pid); + if (child_pid == 0) { + fd = open(path, O_WRONLY); + close(fd); + exit(0); + } + + fd = open(path, O_RDONLY); + ASSERT_LE(0, fd); + + /* FIONREAD is implemented by pipefifo_fops. */ + EXPECT_EQ(0, test_fionread_ioctl(fd)); ASSERT_EQ(0, close(fd)); + ASSERT_EQ(0, unlink(path)); + + ASSERT_EQ(child_pid, waitpid(child_pid, NULL, 0)); +} + +/* For named UNIX domain sockets, no IOCTL restrictions apply. */ +TEST_F_FORK(layout1, named_unix_domain_socket_ioctl) +{ + const char *const path = file1_s1d1; + int srv_fd, cli_fd, ruleset_fd; + socklen_t size; + struct sockaddr_un srv_un, cli_un; + const struct landlock_ruleset_attr attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_IOCTL_DEV, + }; + + /* Sets up a server */ + srv_un.sun_family = AF_UNIX; + strncpy(srv_un.sun_path, path, sizeof(srv_un.sun_path)); + + ASSERT_EQ(0, unlink(path)); + srv_fd = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_LE(0, srv_fd); + + size = offsetof(struct sockaddr_un, sun_path) + strlen(srv_un.sun_path); + ASSERT_EQ(0, bind(srv_fd, (struct sockaddr *)&srv_un, size)); + ASSERT_EQ(0, listen(srv_fd, 10 /* qlen */)); + + /* Enables Landlock. */ + ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Sets up a client connection to it */ + cli_un.sun_family = AF_UNIX; + cli_fd = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_LE(0, cli_fd); + + size = offsetof(struct sockaddr_un, sun_path) + strlen(cli_un.sun_path); + ASSERT_EQ(0, bind(cli_fd, (struct sockaddr *)&cli_un, size)); + + bzero(&cli_un, sizeof(cli_un)); + cli_un.sun_family = AF_UNIX; + strncpy(cli_un.sun_path, path, sizeof(cli_un.sun_path)); + size = offsetof(struct sockaddr_un, sun_path) + strlen(cli_un.sun_path); + + ASSERT_EQ(0, connect(cli_fd, (struct sockaddr *)&cli_un, size)); + + /* FIONREAD and other IOCTLs should not be forbidden. */ + EXPECT_EQ(0, test_fionread_ioctl(cli_fd)); + + ASSERT_EQ(0, close(cli_fd)); +} + +/* clang-format off */ +FIXTURE(ioctl) {}; + +FIXTURE_SETUP(ioctl) {}; + +FIXTURE_TEARDOWN(ioctl) {}; +/* clang-format on */ + +FIXTURE_VARIANT(ioctl) +{ + const __u64 handled; + const __u64 allowed; + const mode_t open_mode; + /* + * FIONREAD is used as a characteristic device-specific IOCTL command. + * It is implemented in fs/ioctl.c for regular files, + * but we do not blanket-permit it for devices. + */ + const int expected_fionread_result; +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ioctl, handled_i_allowed_none) { + /* clang-format on */ + .handled = LANDLOCK_ACCESS_FS_IOCTL_DEV, + .allowed = 0, + .open_mode = O_RDWR, + .expected_fionread_result = EACCES, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ioctl, handled_i_allowed_i) { + /* clang-format on */ + .handled = LANDLOCK_ACCESS_FS_IOCTL_DEV, + .allowed = LANDLOCK_ACCESS_FS_IOCTL_DEV, + .open_mode = O_RDWR, + .expected_fionread_result = 0, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(ioctl, unhandled) { + /* clang-format on */ + .handled = LANDLOCK_ACCESS_FS_EXECUTE, + .allowed = LANDLOCK_ACCESS_FS_EXECUTE, + .open_mode = O_RDWR, + .expected_fionread_result = 0, +}; + +TEST_F_FORK(ioctl, handle_dir_access_file) +{ + const int flag = 0; + const struct rule rules[] = { + { + .path = "/dev", + .access = variant->allowed, + }, + {}, + }; + int file_fd, ruleset_fd; + + /* Enables Landlock. */ + ruleset_fd = create_ruleset(_metadata, variant->handled, rules); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + file_fd = open("/dev/zero", variant->open_mode); + ASSERT_LE(0, file_fd); + + /* Checks that IOCTL commands return the expected errors. */ + EXPECT_EQ(variant->expected_fionread_result, + test_fionread_ioctl(file_fd)); + + /* Checks that unrestrictable commands are unrestricted. */ + EXPECT_EQ(0, ioctl(file_fd, FIOCLEX)); + EXPECT_EQ(0, ioctl(file_fd, FIONCLEX)); + EXPECT_EQ(0, ioctl(file_fd, FIONBIO, &flag)); + EXPECT_EQ(0, ioctl(file_fd, FIOASYNC, &flag)); + EXPECT_EQ(0, ioctl(file_fd, FIGETBSZ, &flag)); + + ASSERT_EQ(0, close(file_fd)); +} + +TEST_F_FORK(ioctl, handle_dir_access_dir) +{ + const int flag = 0; + const struct rule rules[] = { + { + .path = "/dev", + .access = variant->allowed, + }, + {}, + }; + int dir_fd, ruleset_fd; + + /* Enables Landlock. */ + ruleset_fd = create_ruleset(_metadata, variant->handled, rules); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* + * Ignore variant->open_mode for this test, as we intend to open a + * directory. If the directory can not be opened, the variant is + * infeasible to test with an opened directory. + */ + dir_fd = open("/dev", O_RDONLY); + if (dir_fd < 0) + return; + + /* + * Checks that IOCTL commands return the expected errors. + * We do not use the expected values from the fixture here. + * + * When using IOCTL on a directory, no Landlock restrictions apply. + */ + EXPECT_EQ(0, test_fionread_ioctl(dir_fd)); + + /* Checks that unrestrictable commands are unrestricted. */ + EXPECT_EQ(0, ioctl(dir_fd, FIOCLEX)); + EXPECT_EQ(0, ioctl(dir_fd, FIONCLEX)); + EXPECT_EQ(0, ioctl(dir_fd, FIONBIO, &flag)); + EXPECT_EQ(0, ioctl(dir_fd, FIOASYNC, &flag)); + EXPECT_EQ(0, ioctl(dir_fd, FIGETBSZ, &flag)); + + ASSERT_EQ(0, close(dir_fd)); +} + +TEST_F_FORK(ioctl, handle_file_access_file) +{ + const int flag = 0; + const struct rule rules[] = { + { + .path = "/dev/zero", + .access = variant->allowed, + }, + {}, + }; + int file_fd, ruleset_fd; + + /* Enables Landlock. */ + ruleset_fd = create_ruleset(_metadata, variant->handled, rules); + ASSERT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + file_fd = open("/dev/zero", variant->open_mode); + ASSERT_LE(0, file_fd) + { + TH_LOG("Failed to open /dev/zero: %s", strerror(errno)); + } + + /* Checks that IOCTL commands return the expected errors. */ + EXPECT_EQ(variant->expected_fionread_result, + test_fionread_ioctl(file_fd)); + + /* Checks that unrestrictable commands are unrestricted. */ + EXPECT_EQ(0, ioctl(file_fd, FIOCLEX)); + EXPECT_EQ(0, ioctl(file_fd, FIONCLEX)); + EXPECT_EQ(0, ioctl(file_fd, FIONBIO, &flag)); + EXPECT_EQ(0, ioctl(file_fd, FIOASYNC, &flag)); + EXPECT_EQ(0, ioctl(file_fd, FIGETBSZ, &flag)); + + ASSERT_EQ(0, close(file_fd)); } /* clang-format off */ diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 8ae203d8ed..429535816d 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -52,10 +52,33 @@ endif selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST)))) top_srcdir = $(selfdir)/../../.. +# msg: emit succinct information message describing current building step +# $1 - generic step name (e.g., CC, LINK, etc); +# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor]; +# $3 - target (assumed to be file); only file name will be emitted; +# $4 - optional extra arg, emitted as-is, if provided. +ifeq ($(V),1) +Q = +msg = +else +Q = @ +msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))"; +MAKEFLAGS += --no-print-directory +endif + ifeq ($(KHDR_INCLUDES),) KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include endif +# In order to use newer items that haven't yet been added to the user's system +# header files, add $(TOOLS_INCLUDES) to the compiler invocation in each +# each selftest. +# You may need to add files to that location, or to refresh an existing file. In +# order to do that, run "make headers" from $(top_srcdir), then copy the +# header file that you want from $(top_srcdir)/usr/include/... , to the matching +# subdir in $(TOOLS_INCLUDE). +TOOLS_INCLUDES := -isystem $(top_srcdir)/tools/include/uapi + # The following are built by lib.mk common compile rules. # TEST_CUSTOM_PROGS should be used by tests that require # custom build rule and prevent common build rule use. @@ -184,7 +207,8 @@ endif ifeq ($(OVERRIDE_TARGETS),) LOCAL_HDRS += $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h $(OUTPUT)/%:%.c $(LOCAL_HDRS) - $(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@ + $(call msg,CC,,$@) + $(Q)$(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@ $(OUTPUT)/%.o:%.S $(COMPILE.S) $^ -o $@ diff --git a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c index a9cc17facf..4e14dba812 100644 --- a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c +++ b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c @@ -69,5 +69,5 @@ int main(int argc, char **argv) /* Multi-threaded */ test_mt_membarrier(); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c index 4cdc8b1d12..fa3f1d6c37 100644 --- a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c +++ b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c @@ -24,5 +24,5 @@ int main(int argc, char **argv) test_membarrier_get_registrations(/*cmd=*/0); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c index 93798c8c5d..dbc171a380 100644 --- a/tools/testing/selftests/memfd/fuse_test.c +++ b/tools/testing/selftests/memfd/fuse_test.c @@ -306,7 +306,7 @@ int main(int argc, char **argv) * then the kernel did a page-replacement or canceled the read() (or * whatever magic it did..). In that case, the memfd object is still * all zero. - * In case the memfd-object was *not* sealed, the read() was successfull + * In case the memfd-object was *not* sealed, the read() was successful * and the memfd object must *not* be all zero. * Note that in real scenarios, there might be a mixture of both, but * in this test-cases, we have explicit 200ms delays which should be diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 18f585684e..95af2d78fd 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -1528,7 +1528,7 @@ static void test_share_open(char *banner, char *b_suffix) /* * Test sharing via fork() - * Test whether seal-modifications work as expected with forked childs. + * Test whether seal-modifications work as expected with forked children. */ static void test_share_fork(char *banner, char *b_suffix) { diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index d26e962f2a..0b9ab98760 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -47,3 +47,5 @@ mkdirty va_high_addr_switch hugetlb_fault_after_madv hugetlb_madv_vs_map +mseal_test +seal_elf diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 410495e0a6..3b49bc3d0a 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -32,7 +32,7 @@ endif # LDLIBS. MAKEFLAGS += --no-builtin-rules -CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) +CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES) LDLIBS = -lrt -lpthread -lm TEST_GEN_FILES = cow @@ -59,6 +59,8 @@ TEST_GEN_FILES += mlock2-tests TEST_GEN_FILES += mrelease_test TEST_GEN_FILES += mremap_dontunmap TEST_GEN_FILES += mremap_test +TEST_GEN_FILES += mseal_test +TEST_GEN_FILES += seal_elf TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl TEST_GEN_FILES += thuge-gen diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index fe078d6e18..32c6ccc2a6 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -199,7 +199,7 @@ static int child_vmsplice_memcmp_fn(char *mem, size_t size, typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes); static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, - child_fn fn) + child_fn fn, bool xfail) { struct comm_pipes comm_pipes; char buf; @@ -247,33 +247,47 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, else ret = -EINVAL; - ksft_test_result(!ret, "No leak from parent into child\n"); + if (!ret) { + ksft_test_result_pass("No leak from parent into child\n"); + } else if (xfail) { + /* + * With hugetlb, some vmsplice() tests are currently expected to + * fail because (a) harder to fix and (b) nobody really cares. + * Flag them as expected failure for now. + */ + ksft_test_result_xfail("Leak from parent into child\n"); + } else { + ksft_test_result_fail("Leak from parent into child\n"); + } close_comm_pipes: close_comm_pipes(&comm_pipes); } -static void test_cow_in_parent(char *mem, size_t size) +static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb) { - do_test_cow_in_parent(mem, size, false, child_memcmp_fn); + do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false); } -static void test_cow_in_parent_mprotect(char *mem, size_t size) +static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb) { - do_test_cow_in_parent(mem, size, true, child_memcmp_fn); + do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false); } -static void test_vmsplice_in_child(char *mem, size_t size) +static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb) { - do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn); + do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn, + is_hugetlb); } -static void test_vmsplice_in_child_mprotect(char *mem, size_t size) +static void test_vmsplice_in_child_mprotect(char *mem, size_t size, + bool is_hugetlb) { - do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn); + do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn, + is_hugetlb); } static void do_test_vmsplice_in_parent(char *mem, size_t size, - bool before_fork) + bool before_fork, bool xfail) { struct iovec iov = { .iov_base = mem, @@ -355,8 +369,18 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, } } - ksft_test_result(!memcmp(old, new, transferred), - "No leak from child into parent\n"); + if (!memcmp(old, new, transferred)) { + ksft_test_result_pass("No leak from child into parent\n"); + } else if (xfail) { + /* + * With hugetlb, some vmsplice() tests are currently expected to + * fail because (a) harder to fix and (b) nobody really cares. + * Flag them as expected failure for now. + */ + ksft_test_result_xfail("Leak from child into parent\n"); + } else { + ksft_test_result_fail("Leak from child into parent\n"); + } close_pipe: close(fds[0]); close(fds[1]); @@ -367,14 +391,14 @@ free: free(new); } -static void test_vmsplice_before_fork(char *mem, size_t size) +static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb) { - do_test_vmsplice_in_parent(mem, size, true); + do_test_vmsplice_in_parent(mem, size, true, is_hugetlb); } -static void test_vmsplice_after_fork(char *mem, size_t size) +static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb) { - do_test_vmsplice_in_parent(mem, size, false); + do_test_vmsplice_in_parent(mem, size, false, is_hugetlb); } #ifdef LOCAL_CONFIG_HAVE_LIBURING @@ -529,12 +553,12 @@ close_comm_pipes: close_comm_pipes(&comm_pipes); } -static void test_iouring_ro(char *mem, size_t size) +static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb) { do_test_iouring(mem, size, false); } -static void test_iouring_fork(char *mem, size_t size) +static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb) { do_test_iouring(mem, size, true); } @@ -678,37 +702,41 @@ free_tmp: free(tmp); } -static void test_ro_pin_on_shared(char *mem, size_t size) +static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb) { do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false); } -static void test_ro_fast_pin_on_shared(char *mem, size_t size) +static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb) { do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true); } -static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size) +static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size, + bool is_hugetlb) { do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false); } -static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size) +static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size, + bool is_hugetlb) { do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true); } -static void test_ro_pin_on_ro_exclusive(char *mem, size_t size) +static void test_ro_pin_on_ro_exclusive(char *mem, size_t size, + bool is_hugetlb) { do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false); } -static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size) +static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size, + bool is_hugetlb) { do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true); } -typedef void (*test_fn)(char *mem, size_t size); +typedef void (*test_fn)(char *mem, size_t size, bool hugetlb); static void do_run_with_base_page(test_fn fn, bool swapout) { @@ -740,7 +768,7 @@ static void do_run_with_base_page(test_fn fn, bool swapout) } } - fn(mem, pagesize); + fn(mem, pagesize, false); munmap: munmap(mem, pagesize); } @@ -904,7 +932,7 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) break; } - fn(mem, size); + fn(mem, size, false); munmap: munmap(mmap_mem, mmap_size); if (mremap_mem != MAP_FAILED) @@ -997,7 +1025,7 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) } munmap(dummy, hugetlbsize); - fn(mem, hugetlbsize); + fn(mem, hugetlbsize, true); munmap: munmap(mem, hugetlbsize); } @@ -1036,7 +1064,7 @@ static const struct test_case anon_test_cases[] = { */ { "vmsplice() + unmap in child", - test_vmsplice_in_child + test_vmsplice_in_child, }, /* * vmsplice() test, but do an additional mprotect(PROT_READ)+ @@ -1044,7 +1072,7 @@ static const struct test_case anon_test_cases[] = { */ { "vmsplice() + unmap in child with mprotect() optimization", - test_vmsplice_in_child_mprotect + test_vmsplice_in_child_mprotect, }, /* * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after @@ -1322,23 +1350,31 @@ close_comm_pipes: close_comm_pipes(&comm_pipes); } -static void test_anon_thp_collapse_unshared(char *mem, size_t size) +static void test_anon_thp_collapse_unshared(char *mem, size_t size, + bool is_hugetlb) { + assert(!is_hugetlb); do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED); } -static void test_anon_thp_collapse_fully_shared(char *mem, size_t size) +static void test_anon_thp_collapse_fully_shared(char *mem, size_t size, + bool is_hugetlb) { + assert(!is_hugetlb); do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED); } -static void test_anon_thp_collapse_lower_shared(char *mem, size_t size) +static void test_anon_thp_collapse_lower_shared(char *mem, size_t size, + bool is_hugetlb) { + assert(!is_hugetlb); do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED); } -static void test_anon_thp_collapse_upper_shared(char *mem, size_t size) +static void test_anon_thp_collapse_upper_shared(char *mem, size_t size, + bool is_hugetlb) { + assert(!is_hugetlb); do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED); } diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index d7eaca5bbe..9423ad439a 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -118,15 +118,22 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) return; } - /* - * Fault in the page writable such that GUP-fast can eventually pin - * it immediately. - */ + /* Fault in the page such that GUP-fast can pin it directly. */ memset(mem, 0, size); switch (type) { case TEST_TYPE_RO: case TEST_TYPE_RO_FAST: + /* + * Cover more cases regarding unsharing decisions when + * long-term R/O pinning by mapping the page R/O. + */ + ret = mprotect(mem, size, PROT_READ); + if (ret) { + ksft_test_result_fail("mprotect() failed\n"); + goto munmap; + } + /* FALLTHROUGH */ case TEST_TYPE_RW: case TEST_TYPE_RW_FAST: { struct pin_longterm_test args; @@ -228,6 +235,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) assert(false); } +munmap: munmap(mem, size); } diff --git a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c index d01e8d4901..8f122a0f08 100644 --- a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c +++ b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c @@ -27,9 +27,9 @@ #include "vm_util.h" #include "../kselftest.h" -#define MMAP_SIZE (1 << 21) #define INLOOP_ITER 100 +size_t mmap_size; char *huge_ptr; /* Touch the memory while it is being madvised() */ @@ -44,7 +44,7 @@ void *touch(void *unused) void *madv(void *unused) { for (int i = 0; i < INLOOP_ITER; i++) - madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED); + madvise(huge_ptr, mmap_size, MADV_DONTNEED); return NULL; } @@ -59,7 +59,7 @@ void *map_extra(void *unused) void *ptr; for (int i = 0; i < INLOOP_ITER; i++) { - ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE, + ptr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); @@ -93,14 +93,16 @@ int main(void) free_hugepages); } + mmap_size = default_huge_page_size(); + while (max--) { - huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE, + huge_ptr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); if ((unsigned long)huge_ptr == -1) { - ksft_exit_skip("Failed to allocated huge page\n"); - return KSFT_SKIP; + ksft_test_result_fail("Failed to allocate huge page\n"); + return KSFT_FAIL; } pthread_create(&thread1, NULL, madv, NULL); @@ -117,7 +119,7 @@ int main(void) } /* Unmap and restart */ - munmap(huge_ptr, MMAP_SIZE); + munmap(huge_ptr, mmap_size); } return KSFT_PASS; diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 508287560c..b61803e36d 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -28,6 +28,15 @@ #define MiB (1024 * KiB) #define FORK_EXEC_CHILD_PRG_NAME "ksm_fork_exec_child" +#define MAP_MERGE_FAIL ((void *)-1) +#define MAP_MERGE_SKIP ((void *)-2) + +enum ksm_merge_mode { + KSM_MERGE_PRCTL, + KSM_MERGE_MADVISE, + KSM_MERGE_NONE, /* PRCTL already set */ +}; + static int mem_fd; static int ksm_fd; static int ksm_full_scans_fd; @@ -146,33 +155,34 @@ static int ksm_unmerge(void) return 0; } -static char *mmap_and_merge_range(char val, unsigned long size, int prot, - bool use_prctl) +static char *__mmap_and_merge_range(char val, unsigned long size, int prot, + enum ksm_merge_mode mode) { char *map; + char *err_map = MAP_MERGE_FAIL; int ret; /* Stabilize accounting by disabling KSM completely. */ if (ksm_unmerge()) { - ksft_test_result_fail("Disabling (unmerging) KSM failed\n"); - return MAP_FAILED; + ksft_print_msg("Disabling (unmerging) KSM failed\n"); + return err_map; } if (get_my_merging_pages() > 0) { - ksft_test_result_fail("Still pages merged\n"); - return MAP_FAILED; + ksft_print_msg("Still pages merged\n"); + return err_map; } map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); if (map == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); - return MAP_FAILED; + ksft_print_msg("mmap() failed\n"); + return err_map; } /* Don't use THP. Ignore if THP are not around on a kernel. */ if (madvise(map, size, MADV_NOHUGEPAGE) && errno != EINVAL) { - ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); + ksft_print_msg("MADV_NOHUGEPAGE failed\n"); goto unmap; } @@ -180,27 +190,36 @@ static char *mmap_and_merge_range(char val, unsigned long size, int prot, memset(map, val, size); if (mprotect(map, size, prot)) { - ksft_test_result_skip("mprotect() failed\n"); + ksft_print_msg("mprotect() failed\n"); + err_map = MAP_MERGE_SKIP; goto unmap; } - if (use_prctl) { + switch (mode) { + case KSM_MERGE_PRCTL: ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0); if (ret < 0 && errno == EINVAL) { - ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n"); + ksft_print_msg("PR_SET_MEMORY_MERGE not supported\n"); + err_map = MAP_MERGE_SKIP; goto unmap; } else if (ret) { - ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n"); + ksft_print_msg("PR_SET_MEMORY_MERGE=1 failed\n"); goto unmap; } - } else if (madvise(map, size, MADV_MERGEABLE)) { - ksft_test_result_fail("MADV_MERGEABLE failed\n"); - goto unmap; + break; + case KSM_MERGE_MADVISE: + if (madvise(map, size, MADV_MERGEABLE)) { + ksft_print_msg("MADV_MERGEABLE failed\n"); + goto unmap; + } + break; + case KSM_MERGE_NONE: + break; } /* Run KSM to trigger merging and wait. */ if (ksm_merge()) { - ksft_test_result_fail("Running KSM failed\n"); + ksft_print_msg("Running KSM failed\n"); goto unmap; } @@ -209,14 +228,31 @@ static char *mmap_and_merge_range(char val, unsigned long size, int prot, * accounted differently (depending on kernel support). */ if (val && !get_my_merging_pages()) { - ksft_test_result_fail("No pages got merged\n"); + ksft_print_msg("No pages got merged\n"); goto unmap; } return map; unmap: munmap(map, size); - return MAP_FAILED; + return err_map; +} + +static char *mmap_and_merge_range(char val, unsigned long size, int prot, + enum ksm_merge_mode mode) +{ + char *map; + char *ret = MAP_FAILED; + + map = __mmap_and_merge_range(val, size, prot, mode); + if (map == MAP_MERGE_FAIL) + ksft_test_result_fail("Merging memory failed"); + else if (map == MAP_MERGE_SKIP) + ksft_test_result_skip("Merging memory skipped"); + else + ret = map; + + return ret; } static void test_unmerge(void) @@ -226,7 +262,7 @@ static void test_unmerge(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE); if (map == MAP_FAILED) return; @@ -264,7 +300,7 @@ static void test_unmerge_zero_pages(void) } /* Let KSM deduplicate zero pages. */ - map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, false); + map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE); if (map == MAP_FAILED) return; @@ -312,7 +348,7 @@ static void test_unmerge_discarded(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE); if (map == MAP_FAILED) return; @@ -344,7 +380,7 @@ static void test_unmerge_uffd_wp(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE); if (map == MAP_FAILED) return; @@ -439,6 +475,36 @@ static void test_prctl(void) ksft_test_result_pass("Setting/clearing PR_SET_MEMORY_MERGE works\n"); } +static int test_child_ksm(void) +{ + const unsigned int size = 2 * MiB; + char *map; + + /* Test if KSM is enabled for the process. */ + if (prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) != 1) + return -1; + + /* Test if merge could really happen. */ + map = __mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_NONE); + if (map == MAP_MERGE_FAIL) + return -2; + else if (map == MAP_MERGE_SKIP) + return -3; + + munmap(map, size); + return 0; +} + +static void test_child_ksm_err(int status) +{ + if (status == -1) + ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n"); + else if (status == -2) + ksft_test_result_fail("Merge in child failed\n"); + else if (status == -3) + ksft_test_result_skip("Merge in child skipped\n"); +} + /* Verify that prctl ksm flag is inherited. */ static void test_prctl_fork(void) { @@ -458,7 +524,7 @@ static void test_prctl_fork(void) child_pid = fork(); if (!child_pid) { - exit(prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0)); + exit(test_child_ksm()); } else if (child_pid < 0) { ksft_test_result_fail("fork() failed\n"); return; @@ -467,8 +533,11 @@ static void test_prctl_fork(void) if (waitpid(child_pid, &status, 0) < 0) { ksft_test_result_fail("waitpid() failed\n"); return; - } else if (WEXITSTATUS(status) != 1) { - ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n"); + } + + status = WEXITSTATUS(status); + if (status) { + test_child_ksm_err(status); return; } @@ -480,12 +549,6 @@ static void test_prctl_fork(void) ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n"); } -static int ksm_fork_exec_child(void) -{ - /* Test if KSM is enabled for the process. */ - return prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) == 1; -} - static void test_prctl_fork_exec(void) { int ret, status; @@ -518,7 +581,7 @@ static void test_prctl_fork_exec(void) if (WIFEXITED(status)) { status = WEXITSTATUS(status); if (status) { - ksft_test_result_fail("KSM not enabled\n"); + test_child_ksm_err(status); return; } } else { @@ -545,7 +608,7 @@ static void test_prctl_unmerge(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, true); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_PRCTL); if (map == MAP_FAILED) return; @@ -568,7 +631,7 @@ static void test_prot_none(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0x11, size, PROT_NONE, false); + map = mmap_and_merge_range(0x11, size, PROT_NONE, KSM_MERGE_MADVISE); if (map == MAP_FAILED) goto unmap; @@ -593,13 +656,34 @@ unmap: munmap(map, size); } +static void init_global_file_handles(void) +{ + mem_fd = open("/proc/self/mem", O_RDWR); + if (mem_fd < 0) + ksft_exit_fail_msg("opening /proc/self/mem failed\n"); + ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); + if (ksm_fd < 0) + ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); + ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); + if (ksm_full_scans_fd < 0) + ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n"); + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); + proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); + proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", + O_RDONLY); + ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); +} + int main(int argc, char **argv) { unsigned int tests = 8; int err; if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) { - exit(ksm_fork_exec_child() == 1 ? 0 : 1); + init_global_file_handles(); + exit(test_child_ksm()); } #ifdef __NR_userfaultfd @@ -611,22 +695,7 @@ int main(int argc, char **argv) pagesize = getpagesize(); - mem_fd = open("/proc/self/mem", O_RDWR); - if (mem_fd < 0) - ksft_exit_fail_msg("opening /proc/self/mem failed\n"); - ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); - if (ksm_fd < 0) - ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); - ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY); - if (ksm_full_scans_fd < 0) - ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n"); - pagemap_fd = open("/proc/self/pagemap", O_RDONLY); - if (pagemap_fd < 0) - ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); - proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); - proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", - O_RDONLY); - ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); + init_global_file_handles(); test_unmerge(); test_unmerge_zero_pages(); diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c index b74813fdc9..d53de24860 100644 --- a/tools/testing/selftests/mm/map_fixed_noreplace.c +++ b/tools/testing/selftests/mm/map_fixed_noreplace.c @@ -67,7 +67,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error: munmap failed!?\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n"); addr = base_addr + page_size; size = 3 * page_size; @@ -76,7 +77,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error: first mmap() failed unexpectedly\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 3*PAGE_SIZE at base+PAGE_SIZE\n"); /* * Exact same mapping again: @@ -93,7 +95,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n"); /* * Second mapping contained within first: @@ -111,7 +114,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:2: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+PAGE_SIZE\n"); /* * Overlap end of existing mapping: @@ -128,7 +132,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:3: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+(3*PAGE_SIZE)\n"); /* * Overlap start of existing mapping: @@ -145,7 +150,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:4: mmap() succeeded when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() 2*PAGE_SIZE bytes at base\n"); /* * Adjacent to start of existing mapping: @@ -162,7 +168,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:5: mmap() failed when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() PAGE_SIZE at base\n"); /* * Adjacent to end of existing mapping: @@ -179,7 +186,8 @@ int main(void) dump_maps(); ksft_exit_fail_msg("Error:6: mmap() failed when it shouldn't have\n"); } - ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); + ksft_test_result_pass("mmap() PAGE_SIZE at base+(4*PAGE_SIZE)\n"); addr = base_addr; size = 5 * page_size; diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c index 9b298f6a04..9a0597310a 100644 --- a/tools/testing/selftests/mm/memfd_secret.c +++ b/tools/testing/selftests/mm/memfd_secret.c @@ -20,6 +20,7 @@ #include <unistd.h> #include <errno.h> #include <stdio.h> +#include <fcntl.h> #include "../kselftest.h" @@ -83,6 +84,45 @@ static void test_mlock_limit(int fd) pass("mlock limit is respected\n"); } +static void test_vmsplice(int fd, const char *desc) +{ + ssize_t transferred; + struct iovec iov; + int pipefd[2]; + char *mem; + + if (pipe(pipefd)) { + fail("pipe failed: %s\n", strerror(errno)); + return; + } + + mem = mmap(NULL, page_size, prot, mode, fd, 0); + if (mem == MAP_FAILED) { + fail("Unable to mmap secret memory\n"); + goto close_pipe; + } + + /* + * vmsplice() may use GUP-fast, which must also fail. Prefault the + * page table, so GUP-fast could find it. + */ + memset(mem, PATTERN, page_size); + + iov.iov_base = mem; + iov.iov_len = page_size; + transferred = vmsplice(pipefd[1], &iov, 1, 0); + + if (transferred < 0 && errno == EFAULT) + pass("vmsplice is blocked as expected with %s\n", desc); + else + fail("vmsplice: unexpected memory access with %s\n", desc); + + munmap(mem, page_size); +close_pipe: + close(pipefd[0]); + close(pipefd[1]); +} + static void try_process_vm_read(int fd, int pipefd[2]) { struct iovec liov, riov; @@ -187,7 +227,6 @@ static void test_remote_access(int fd, const char *name, return; } - ftruncate(fd, page_size); memset(mem, PATTERN, page_size); if (write(pipefd[1], &mem, sizeof(mem)) < 0) { @@ -258,7 +297,7 @@ static void prepare(void) strerror(errno)); } -#define NUM_TESTS 4 +#define NUM_TESTS 6 int main(int argc, char *argv[]) { @@ -277,9 +316,17 @@ int main(int argc, char *argv[]) ksft_exit_fail_msg("memfd_secret failed: %s\n", strerror(errno)); } + if (ftruncate(fd, page_size)) + ksft_exit_fail_msg("ftruncate failed: %s\n", strerror(errno)); test_mlock_limit(fd); test_file_apis(fd); + /* + * We have to run the first vmsplice test before any secretmem page was + * allocated for this fd. + */ + test_vmsplice(fd, "fresh page"); + test_vmsplice(fd, "existing page"); test_process_vm_read(fd); test_ptrace(fd); diff --git a/tools/testing/selftests/mm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c index 26f744188a..7f0d50fa36 100644 --- a/tools/testing/selftests/mm/mlock2-tests.c +++ b/tools/testing/selftests/mm/mlock2-tests.c @@ -20,8 +20,6 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area) FILE *file; int ret = 1; char line[1024] = {0}; - char *end_addr; - char *stop; unsigned long start; unsigned long end; @@ -37,21 +35,10 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area) memset(area, 0, sizeof(struct vm_boundaries)); while(fgets(line, 1024, file)) { - end_addr = strchr(line, '-'); - if (!end_addr) { + if (sscanf(line, "%lx-%lx", &start, &end) != 2) { ksft_print_msg("cannot parse /proc/self/maps\n"); goto out; } - *end_addr = '\0'; - end_addr++; - stop = strchr(end_addr, ' '); - if (!stop) { - ksft_print_msg("cannot parse /proc/self/maps\n"); - goto out; - } - - sscanf(line, "%lx", &start); - sscanf(end_addr, "%lx", &end); if (start <= addr && end > addr) { area->start = start; diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 2f8b991f78..1b03bcfaef 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -23,6 +23,7 @@ #define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */ #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) #define SIZE_MB(m) ((size_t)m * (1024 * 1024)) #define SIZE_KB(k) ((size_t)k * 1024) @@ -69,6 +70,27 @@ enum { .expect_failure = should_fail \ } +/* compute square root using binary search */ +static unsigned long get_sqrt(unsigned long val) +{ + unsigned long low = 1; + + /* assuming rand_size is less than 1TB */ + unsigned long high = (1UL << 20); + + while (low <= high) { + unsigned long mid = low + (high - low) / 2; + unsigned long temp = mid * mid; + + if (temp == val) + return mid; + if (temp < val) + low = mid + 1; + high = mid - 1; + } + return low; +} + /* * Returns false if the requested remap region overlaps with an * existing mapping (e.g text, stack) else returns true. @@ -126,19 +148,21 @@ static unsigned long long get_mmap_min_addr(void) * Using /proc/self/maps, assert that the specified address range is contained * within a single mapping. */ -static bool is_range_mapped(FILE *maps_fp, void *start, void *end) +static bool is_range_mapped(FILE *maps_fp, unsigned long start, + unsigned long end) { char *line = NULL; size_t len = 0; bool success = false; + unsigned long first_val, second_val; rewind(maps_fp); while (getline(&line, &len, maps_fp) != -1) { - char *first = strtok(line, "- "); - void *first_val = (void *)strtol(first, NULL, 16); - char *second = strtok(NULL, "- "); - void *second_val = (void *) strtol(second, NULL, 16); + if (sscanf(line, "%lx-%lx", &first_val, &second_val) != 2) { + ksft_exit_fail_msg("cannot parse /proc/self/maps\n"); + break; + } if (first_val <= start && second_val >= end) { success = true; @@ -233,7 +257,8 @@ static void mremap_expand_merge(FILE *maps_fp, unsigned long page_size) goto out; } - success = is_range_mapped(maps_fp, start, start + 3 * page_size); + success = is_range_mapped(maps_fp, (unsigned long)start, + (unsigned long)(start + 3 * page_size)); munmap(start, 3 * page_size); out: @@ -272,7 +297,8 @@ static void mremap_expand_merge_offset(FILE *maps_fp, unsigned long page_size) goto out; } - success = is_range_mapped(maps_fp, start, start + 3 * page_size); + success = is_range_mapped(maps_fp, (unsigned long)start, + (unsigned long)(start + 3 * page_size)); munmap(start, 3 * page_size); out: @@ -296,7 +322,7 @@ out: * * |DDDDddddSSSSssss| */ -static void mremap_move_within_range(char pattern_seed) +static void mremap_move_within_range(unsigned int pattern_seed, char *rand_addr) { char *test_name = "mremap mremap move within range"; void *src, *dest; @@ -316,10 +342,7 @@ static void mremap_move_within_range(char pattern_seed) src = (void *)((unsigned long)src & ~(SIZE_MB(2) - 1)); /* Set byte pattern for source block. */ - srand(pattern_seed); - for (i = 0; i < SIZE_MB(2); i++) { - ((char *)src)[i] = (char) rand(); - } + memcpy(src, rand_addr, SIZE_MB(2)); dest = src - SIZE_MB(2); @@ -357,14 +380,14 @@ out: /* Returns the time taken for the remap on success else returns -1. */ static long long remap_region(struct config c, unsigned int threshold_mb, - char pattern_seed) + char *rand_addr) { void *addr, *src_addr, *dest_addr, *dest_preamble_addr; - int d; - unsigned long long t; + unsigned long long t, d; struct timespec t_start = {0, 0}, t_end = {0, 0}; long long start_ns, end_ns, align_mask, ret, offset; unsigned long long threshold; + unsigned long num_chunks; if (threshold_mb == VALIDATION_NO_THRESHOLD) threshold = c.region_size; @@ -378,9 +401,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb, } /* Set byte pattern for source block. */ - srand(pattern_seed); - for (t = 0; t < threshold; t++) - memset((char *) src_addr + t, (char) rand(), 1); + memcpy(src_addr, rand_addr, threshold); /* Mask to zero out lower bits of address for alignment */ align_mask = ~(c.dest_alignment - 1); @@ -420,9 +441,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb, } /* Set byte pattern for the dest preamble block. */ - srand(pattern_seed); - for (d = 0; d < c.dest_preamble_size; d++) - memset((char *) dest_preamble_addr + d, (char) rand(), 1); + memcpy(dest_preamble_addr, rand_addr, c.dest_preamble_size); } clock_gettime(CLOCK_MONOTONIC, &t_start); @@ -436,15 +455,42 @@ static long long remap_region(struct config c, unsigned int threshold_mb, goto clean_up_dest_preamble; } - /* Verify byte pattern after remapping */ - srand(pattern_seed); - for (t = 0; t < threshold; t++) { - char c = (char) rand(); + /* + * Verify byte pattern after remapping. Employ an algorithm with a + * square root time complexity in threshold: divide the range into + * chunks, if memcmp() returns non-zero, only then perform an + * iteration in that chunk to find the mismatch index. + */ + num_chunks = get_sqrt(threshold); + for (unsigned long i = 0; i < num_chunks; ++i) { + size_t chunk_size = threshold / num_chunks; + unsigned long shift = i * chunk_size; + + if (!memcmp(dest_addr + shift, rand_addr + shift, chunk_size)) + continue; + + /* brute force iteration only over mismatch segment */ + for (t = shift; t < shift + chunk_size; ++t) { + if (((char *) dest_addr)[t] != rand_addr[t]) { + ksft_print_msg("Data after remap doesn't match at offset %llu\n", + t); + ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[t] & 0xff, + ((char *) dest_addr)[t] & 0xff); + ret = -1; + goto clean_up_dest; + } + } + } - if (((char *) dest_addr)[t] != c) { + /* + * if threshold is not divisible by num_chunks, then check the + * last chunk + */ + for (t = num_chunks * (threshold / num_chunks); t < threshold; ++t) { + if (((char *) dest_addr)[t] != rand_addr[t]) { ksft_print_msg("Data after remap doesn't match at offset %llu\n", - t); - ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, + t); + ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[t] & 0xff, ((char *) dest_addr)[t] & 0xff); ret = -1; goto clean_up_dest; @@ -452,22 +498,44 @@ static long long remap_region(struct config c, unsigned int threshold_mb, } /* Verify the dest preamble byte pattern after remapping */ - if (c.dest_preamble_size) { - srand(pattern_seed); - for (d = 0; d < c.dest_preamble_size; d++) { - char c = (char) rand(); - - if (((char *) dest_preamble_addr)[d] != c) { - ksft_print_msg("Preamble data after remap doesn't match at offset %d\n", - d); - ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, - ((char *) dest_preamble_addr)[d] & 0xff); + if (!c.dest_preamble_size) + goto no_preamble; + + num_chunks = get_sqrt(c.dest_preamble_size); + + for (unsigned long i = 0; i < num_chunks; ++i) { + size_t chunk_size = c.dest_preamble_size / num_chunks; + unsigned long shift = i * chunk_size; + + if (!memcmp(dest_preamble_addr + shift, rand_addr + shift, + chunk_size)) + continue; + + /* brute force iteration only over mismatched segment */ + for (d = shift; d < shift + chunk_size; ++d) { + if (((char *) dest_preamble_addr)[d] != rand_addr[d]) { + ksft_print_msg("Preamble data after remap doesn't match at offset %llu\n", + d); + ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[d] & 0xff, + ((char *) dest_preamble_addr)[d] & 0xff); ret = -1; goto clean_up_dest; } } } + for (d = num_chunks * (c.dest_preamble_size / num_chunks); d < c.dest_preamble_size; ++d) { + if (((char *) dest_preamble_addr)[d] != rand_addr[d]) { + ksft_print_msg("Preamble data after remap doesn't match at offset %llu\n", + d); + ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[d] & 0xff, + ((char *) dest_preamble_addr)[d] & 0xff); + ret = -1; + goto clean_up_dest; + } + } + +no_preamble: start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec; end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec; ret = end_ns - start_ns; @@ -494,7 +562,8 @@ out: * the beginning of the mapping just because the aligned * down address landed on a mapping that maybe does not exist. */ -static void mremap_move_1mb_from_start(char pattern_seed) +static void mremap_move_1mb_from_start(unsigned int pattern_seed, + char *rand_addr) { char *test_name = "mremap move 1mb from start at 1MB+256KB aligned src"; void *src = NULL, *dest = NULL; @@ -520,10 +589,7 @@ static void mremap_move_1mb_from_start(char pattern_seed) } /* Set byte pattern for source block. */ - srand(pattern_seed); - for (i = 0; i < SIZE_MB(2); i++) { - ((char *)src)[i] = (char) rand(); - } + memcpy(src, rand_addr, SIZE_MB(2)); /* * Unmap the beginning of dest so that the aligned address @@ -568,10 +634,10 @@ out: static void run_mremap_test_case(struct test test_case, int *failures, unsigned int threshold_mb, - unsigned int pattern_seed) + unsigned int pattern_seed, char *rand_addr) { long long remap_time = remap_region(test_case.config, threshold_mb, - pattern_seed); + rand_addr); if (remap_time < 0) { if (test_case.expect_failure) @@ -642,7 +708,15 @@ int main(int argc, char **argv) int failures = 0; int i, run_perf_tests; unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD; + + /* hard-coded test configs */ + size_t max_test_variable_region_size = _2GB; + size_t max_test_constant_region_size = _2MB; + size_t dest_preamble_size = 10 * _4MB; + unsigned int pattern_seed; + char *rand_addr; + size_t rand_size; int num_expand_tests = 2; int num_misc_tests = 2; struct test test_cases[MAX_TEST] = {}; @@ -659,6 +733,31 @@ int main(int argc, char **argv) ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n", threshold_mb, pattern_seed); + /* + * set preallocated random array according to test configs; see the + * functions for the logic of setting the size + */ + if (!threshold_mb) + rand_size = MAX(max_test_variable_region_size, + max_test_constant_region_size); + else + rand_size = MAX(MIN(threshold_mb * _1MB, + max_test_variable_region_size), + max_test_constant_region_size); + rand_size = MAX(dest_preamble_size, rand_size); + + rand_addr = (char *)mmap(NULL, rand_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (rand_addr == MAP_FAILED) { + perror("mmap"); + ksft_exit_fail_msg("cannot mmap rand_addr\n"); + } + + /* fill stream of random bytes */ + srand(pattern_seed); + for (unsigned long i = 0; i < rand_size; ++i) + rand_addr[i] = (char) rand(); + page_size = sysconf(_SC_PAGESIZE); /* Expected mremap failures */ @@ -730,13 +829,13 @@ int main(int argc, char **argv) for (i = 0; i < ARRAY_SIZE(test_cases); i++) run_mremap_test_case(test_cases[i], &failures, threshold_mb, - pattern_seed); + pattern_seed, rand_addr); maps_fp = fopen("/proc/self/maps", "r"); if (maps_fp == NULL) { - ksft_print_msg("Failed to read /proc/self/maps: %s\n", strerror(errno)); - exit(KSFT_FAIL); + munmap(rand_addr, rand_size); + ksft_exit_fail_msg("Failed to read /proc/self/maps: %s\n", strerror(errno)); } mremap_expand_merge(maps_fp, page_size); @@ -744,17 +843,20 @@ int main(int argc, char **argv) fclose(maps_fp); - mremap_move_within_range(pattern_seed); - mremap_move_1mb_from_start(pattern_seed); + mremap_move_within_range(pattern_seed, rand_addr); + mremap_move_1mb_from_start(pattern_seed, rand_addr); if (run_perf_tests) { ksft_print_msg("\n%s\n", "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:"); for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++) run_mremap_test_case(perf_test_cases[i], &failures, - threshold_mb, pattern_seed); + threshold_mb, pattern_seed, + rand_addr); } + munmap(rand_addr, rand_size); + if (failures > 0) ksft_exit_fail(); else diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c new file mode 100644 index 0000000000..41998cf1dc --- /dev/null +++ b/tools/testing/selftests/mm/mseal_test.c @@ -0,0 +1,1894 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <linux/mman.h> +#include <sys/mman.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <stdbool.h> +#include "../kselftest.h" +#include <syscall.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/vfs.h> +#include <sys/stat.h> + +/* + * need those definition for manually build using gcc. + * gcc -I ../../../../usr/include -DDEBUG -O3 -DDEBUG -O3 mseal_test.c -o mseal_test + */ +#ifndef PKEY_DISABLE_ACCESS +# define PKEY_DISABLE_ACCESS 0x1 +#endif + +#ifndef PKEY_DISABLE_WRITE +# define PKEY_DISABLE_WRITE 0x2 +#endif + +#ifndef PKEY_BITS_PER_PKEY +#define PKEY_BITS_PER_PKEY 2 +#endif + +#ifndef PKEY_MASK +#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) +#endif + +#define FAIL_TEST_IF_FALSE(c) do {\ + if (!(c)) {\ + ksft_test_result_fail("%s, line:%d\n", __func__, __LINE__);\ + goto test_end;\ + } \ + } \ + while (0) + +#define SKIP_TEST_IF_FALSE(c) do {\ + if (!(c)) {\ + ksft_test_result_skip("%s, line:%d\n", __func__, __LINE__);\ + goto test_end;\ + } \ + } \ + while (0) + + +#define TEST_END_CHECK() {\ + ksft_test_result_pass("%s\n", __func__);\ + return;\ +test_end:\ + return;\ +} + +#ifndef u64 +#define u64 unsigned long long +#endif + +static unsigned long get_vma_size(void *addr, int *prot) +{ + FILE *maps; + char line[256]; + int size = 0; + uintptr_t addr_start, addr_end; + char protstr[5]; + *prot = 0; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) + return 0; + + while (fgets(line, sizeof(line), maps)) { + if (sscanf(line, "%lx-%lx %4s", &addr_start, &addr_end, protstr) == 3) { + if (addr_start == (uintptr_t) addr) { + size = addr_end - addr_start; + if (protstr[0] == 'r') + *prot |= 0x4; + if (protstr[1] == 'w') + *prot |= 0x2; + if (protstr[2] == 'x') + *prot |= 0x1; + break; + } + } + } + fclose(maps); + return size; +} + +/* + * define sys_xyx to call syscall directly. + */ +static int sys_mseal(void *start, size_t len) +{ + int sret; + + errno = 0; + sret = syscall(__NR_mseal, start, len, 0); + return sret; +} + +static int sys_mprotect(void *ptr, size_t size, unsigned long prot) +{ + int sret; + + errno = 0; + sret = syscall(__NR_mprotect, ptr, size, prot); + return sret; +} + +static int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey) +{ + int sret; + + errno = 0; + sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey); + return sret; +} + +static void *sys_mmap(void *addr, unsigned long len, unsigned long prot, + unsigned long flags, unsigned long fd, unsigned long offset) +{ + void *sret; + + errno = 0; + sret = (void *) syscall(__NR_mmap, addr, len, prot, + flags, fd, offset); + return sret; +} + +static int sys_munmap(void *ptr, size_t size) +{ + int sret; + + errno = 0; + sret = syscall(__NR_munmap, ptr, size); + return sret; +} + +static int sys_madvise(void *start, size_t len, int types) +{ + int sret; + + errno = 0; + sret = syscall(__NR_madvise, start, len, types); + return sret; +} + +static int sys_pkey_alloc(unsigned long flags, unsigned long init_val) +{ + int ret = syscall(__NR_pkey_alloc, flags, init_val); + + return ret; +} + +static unsigned int __read_pkey_reg(void) +{ + unsigned int pkey_reg = 0; +#if defined(__i386__) || defined(__x86_64__) /* arch */ + unsigned int eax, edx; + unsigned int ecx = 0; + + asm volatile(".byte 0x0f,0x01,0xee\n\t" + : "=a" (eax), "=d" (edx) + : "c" (ecx)); + pkey_reg = eax; +#endif + return pkey_reg; +} + +static void __write_pkey_reg(u64 pkey_reg) +{ +#if defined(__i386__) || defined(__x86_64__) /* arch */ + unsigned int eax = pkey_reg; + unsigned int ecx = 0; + unsigned int edx = 0; + + asm volatile(".byte 0x0f,0x01,0xef\n\t" + : : "a" (eax), "c" (ecx), "d" (edx)); +#endif +} + +static unsigned long pkey_bit_position(int pkey) +{ + return pkey * PKEY_BITS_PER_PKEY; +} + +static u64 set_pkey_bits(u64 reg, int pkey, u64 flags) +{ + unsigned long shift = pkey_bit_position(pkey); + + /* mask out bits from pkey in old value */ + reg &= ~((u64)PKEY_MASK << shift); + /* OR in new bits for pkey */ + reg |= (flags & PKEY_MASK) << shift; + return reg; +} + +static void set_pkey(int pkey, unsigned long pkey_value) +{ + u64 new_pkey_reg; + + new_pkey_reg = set_pkey_bits(__read_pkey_reg(), pkey, pkey_value); + __write_pkey_reg(new_pkey_reg); +} + +static void setup_single_address(int size, void **ptrOut) +{ + void *ptr; + + ptr = sys_mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + *ptrOut = ptr; +} + +static void setup_single_address_rw(int size, void **ptrOut) +{ + void *ptr; + unsigned long mapflags = MAP_ANONYMOUS | MAP_PRIVATE; + + ptr = sys_mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0); + *ptrOut = ptr; +} + +static int clean_single_address(void *ptr, int size) +{ + int ret; + ret = munmap(ptr, size); + return ret; +} + +static int seal_single_address(void *ptr, int size) +{ + int ret; + ret = sys_mseal(ptr, size); + return ret; +} + +bool seal_support(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + + ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (ptr == (void *) -1) + return false; + + ret = sys_mseal(ptr, page_size); + if (ret < 0) + return false; + + return true; +} + +bool pkey_supported(void) +{ +#if defined(__i386__) || defined(__x86_64__) /* arch */ + int pkey = sys_pkey_alloc(0, 0); + + if (pkey > 0) + return true; +#endif + return false; +} + +static void test_seal_addseal(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_unmapped_start(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* munmap 2 pages from ptr. */ + ret = sys_munmap(ptr, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* mprotect will fail because 2 pages from ptr are unmapped. */ + ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(ret < 0); + + /* mseal will fail because 2 pages from ptr are unmapped. */ + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(ret < 0); + + ret = sys_mseal(ptr + 2 * page_size, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_unmapped_middle(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* munmap 2 pages from ptr + page. */ + ret = sys_munmap(ptr + page_size, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* mprotect will fail, since middle 2 pages are unmapped. */ + ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(ret < 0); + + /* mseal will fail as well. */ + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(ret < 0); + + /* we still can add seal to the first page and last page*/ + ret = sys_mseal(ptr, page_size); + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_mseal(ptr + 3 * page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_unmapped_end(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* unmap last 2 pages. */ + ret = sys_munmap(ptr + 2 * page_size, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* mprotect will fail since last 2 pages are unmapped. */ + ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(ret < 0); + + /* mseal will fail as well. */ + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(ret < 0); + + /* The first 2 pages is not sealed, and can add seals */ + ret = sys_mseal(ptr, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_multiple_vmas(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split the vma into 3. */ + ret = sys_mprotect(ptr + page_size, 2 * page_size, + PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* mprotect will get applied to all 4 pages - 3 VMAs. */ + ret = sys_mprotect(ptr, size, PROT_READ); + FAIL_TEST_IF_FALSE(!ret); + + /* use mprotect to split the vma into 3. */ + ret = sys_mprotect(ptr + page_size, 2 * page_size, + PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* mseal get applied to all 4 pages - 3 VMAs. */ + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_split_start(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split at middle */ + ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* seal the first page, this will split the VMA */ + ret = sys_mseal(ptr, page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* add seal to the remain 3 pages */ + ret = sys_mseal(ptr + page_size, 3 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_split_end(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split at middle */ + ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* seal the last page */ + ret = sys_mseal(ptr + 3 * page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* Adding seals to the first 3 pages */ + ret = sys_mseal(ptr, 3 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_invalid_input(void) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(8 * page_size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + ret = clean_single_address(ptr + 4 * page_size, 4 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* invalid flag */ + ret = syscall(__NR_mseal, ptr, size, 0x20); + FAIL_TEST_IF_FALSE(ret < 0); + + /* unaligned address */ + ret = sys_mseal(ptr + 1, 2 * page_size); + FAIL_TEST_IF_FALSE(ret < 0); + + /* length too big */ + ret = sys_mseal(ptr, 5 * page_size); + FAIL_TEST_IF_FALSE(ret < 0); + + /* length overflow */ + ret = sys_mseal(ptr, UINT64_MAX/page_size); + FAIL_TEST_IF_FALSE(ret < 0); + + /* start is not in a valid VMA */ + ret = sys_mseal(ptr - page_size, 5 * page_size); + FAIL_TEST_IF_FALSE(ret < 0); + + TEST_END_CHECK(); +} + +static void test_seal_zero_length(void) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + ret = sys_mprotect(ptr, 0, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* seal 0 length will be OK, same as mprotect */ + ret = sys_mseal(ptr, 0); + FAIL_TEST_IF_FALSE(!ret); + + /* verify the 4 pages are not sealed by previous call. */ + ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_zero_address(void) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + int prot; + + /* use mmap to change protection. */ + ptr = sys_mmap(0, size, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + FAIL_TEST_IF_FALSE(ptr == 0); + + size = get_vma_size(ptr, &prot); + FAIL_TEST_IF_FALSE(size == 4 * page_size); + + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + + /* verify the 4 pages are sealed by previous call. */ + ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(ret); + + TEST_END_CHECK(); +} + +static void test_seal_twice(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + + /* apply the same seal will be OK. idempotent. */ + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_mprotect(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_start_mprotect(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr, page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* the first page is sealed. */ + ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + /* pages after the first page is not sealed. */ + ret = sys_mprotect(ptr + page_size, page_size * 3, + PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_end_mprotect(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr + page_size, 3 * page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* first page is not sealed */ + ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* last 3 page are sealed */ + ret = sys_mprotect(ptr + page_size, page_size * 3, + PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_mprotect_unalign_len(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr, page_size * 2 - 1); + FAIL_TEST_IF_FALSE(!ret); + } + + /* 2 pages are sealed. */ + ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_mprotect(ptr + page_size * 2, page_size, + PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_mprotect_unalign_len_variant_2(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + if (seal) { + ret = seal_single_address(ptr, page_size * 2 + 1); + FAIL_TEST_IF_FALSE(!ret); + } + + /* 3 pages are sealed. */ + ret = sys_mprotect(ptr, page_size * 3, PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_mprotect(ptr + page_size * 3, page_size, + PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_mprotect_two_vma(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split */ + ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + ret = seal_single_address(ptr, page_size * 4); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_mprotect(ptr + page_size * 2, page_size * 2, + PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_mprotect_two_vma_with_split(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split as two vma. */ + ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* mseal can apply across 2 vma, also split them. */ + if (seal) { + ret = seal_single_address(ptr + page_size, page_size * 2); + FAIL_TEST_IF_FALSE(!ret); + } + + /* the first page is not sealed. */ + ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* the second page is sealed. */ + ret = sys_mprotect(ptr + page_size, page_size, PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + /* the third page is sealed. */ + ret = sys_mprotect(ptr + 2 * page_size, page_size, + PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + /* the fouth page is not sealed. */ + ret = sys_mprotect(ptr + 3 * page_size, page_size, + PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_mprotect_partial_mprotect(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* seal one page. */ + if (seal) { + ret = seal_single_address(ptr, page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* mprotect first 2 page will fail, since the first page are sealed. */ + ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_mprotect_two_vma_with_gap(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split. */ + ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* use mprotect to split. */ + ret = sys_mprotect(ptr + 3 * page_size, page_size, + PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* use munmap to free two pages in the middle */ + ret = sys_munmap(ptr + page_size, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* mprotect will fail, because there is a gap in the address. */ + /* notes, internally mprotect still updated the first page. */ + ret = sys_mprotect(ptr, 4 * page_size, PROT_READ); + FAIL_TEST_IF_FALSE(ret < 0); + + /* mseal will fail as well. */ + ret = sys_mseal(ptr, 4 * page_size); + FAIL_TEST_IF_FALSE(ret < 0); + + /* the first page is not sealed. */ + ret = sys_mprotect(ptr, page_size, PROT_READ); + FAIL_TEST_IF_FALSE(ret == 0); + + /* the last page is not sealed. */ + ret = sys_mprotect(ptr + 3 * page_size, page_size, PROT_READ); + FAIL_TEST_IF_FALSE(ret == 0); + + TEST_END_CHECK(); +} + +static void test_seal_mprotect_split(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split. */ + ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* seal all 4 pages. */ + if (seal) { + ret = sys_mseal(ptr, 4 * page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* mprotect is sealed. */ + ret = sys_mprotect(ptr, 2 * page_size, PROT_READ); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + + ret = sys_mprotect(ptr + 2 * page_size, 2 * page_size, PROT_READ); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_mprotect_merge(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split one page. */ + ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + /* seal first two pages. */ + if (seal) { + ret = sys_mseal(ptr, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* 2 pages are sealed. */ + ret = sys_mprotect(ptr, 2 * page_size, PROT_READ); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + /* last 2 pages are not sealed. */ + ret = sys_mprotect(ptr + 2 * page_size, 2 * page_size, PROT_READ); + FAIL_TEST_IF_FALSE(ret == 0); + + TEST_END_CHECK(); +} + +static void test_seal_munmap(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* 4 pages are sealed. */ + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +/* + * allocate 4 pages, + * use mprotect to split it as two VMAs + * seal the whole range + * munmap will fail on both + */ +static void test_seal_munmap_two_vma(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect to split */ + ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_munmap(ptr, page_size * 2); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr + page_size, page_size * 2); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +/* + * allocate a VMA with 4 pages. + * munmap the middle 2 pages. + * seal the whole 4 pages, will fail. + * munmap the first page will be OK. + * munmap the last page will be OK. + */ +static void test_seal_munmap_vma_with_gap(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + ret = sys_munmap(ptr + page_size, page_size * 2); + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + /* can't have gap in the middle. */ + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(ret < 0); + } + + ret = sys_munmap(ptr, page_size); + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr + page_size * 2, page_size); + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_munmap_start_freed(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + int prot; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* unmap the first page. */ + ret = sys_munmap(ptr, page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* seal the last 3 pages. */ + if (seal) { + ret = sys_mseal(ptr + page_size, 3 * page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* unmap from the first page. */ + ret = sys_munmap(ptr, size); + if (seal) { + FAIL_TEST_IF_FALSE(ret < 0); + + size = get_vma_size(ptr + page_size, &prot); + FAIL_TEST_IF_FALSE(size == page_size * 3); + } else { + /* note: this will be OK, even the first page is */ + /* already unmapped. */ + FAIL_TEST_IF_FALSE(!ret); + + size = get_vma_size(ptr + page_size, &prot); + FAIL_TEST_IF_FALSE(size == 0); + } + + TEST_END_CHECK(); +} + +static void test_munmap_end_freed(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* unmap last page. */ + ret = sys_munmap(ptr + page_size * 3, page_size); + FAIL_TEST_IF_FALSE(!ret); + + /* seal the first 3 pages. */ + if (seal) { + ret = sys_mseal(ptr, 3 * page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* unmap all pages. */ + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_munmap_middle_freed(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + int prot; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* unmap 2 pages in the middle. */ + ret = sys_munmap(ptr + page_size, page_size * 2); + FAIL_TEST_IF_FALSE(!ret); + + /* seal the first page. */ + if (seal) { + ret = sys_mseal(ptr, page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* munmap all 4 pages. */ + ret = sys_munmap(ptr, size); + if (seal) { + FAIL_TEST_IF_FALSE(ret < 0); + + size = get_vma_size(ptr, &prot); + FAIL_TEST_IF_FALSE(size == page_size); + + size = get_vma_size(ptr + page_size * 3, &prot); + FAIL_TEST_IF_FALSE(size == page_size); + } else { + FAIL_TEST_IF_FALSE(!ret); + + size = get_vma_size(ptr, &prot); + FAIL_TEST_IF_FALSE(size == 0); + + size = get_vma_size(ptr + page_size * 3, &prot); + FAIL_TEST_IF_FALSE(size == 0); + } + + TEST_END_CHECK(); +} + +static void test_seal_mremap_shrink(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* shrink from 4 pages to 2 pages. */ + ret2 = mremap(ptr, size, 2 * page_size, 0, 0); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else { + FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED); + + } + + TEST_END_CHECK(); +} + +static void test_seal_mremap_expand(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + /* ummap last 2 pages. */ + ret = sys_munmap(ptr + 2 * page_size, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + ret = sys_mseal(ptr, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* expand from 2 page to 4 pages. */ + ret2 = mremap(ptr, 2 * page_size, 4 * page_size, 0, 0); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else { + FAIL_TEST_IF_FALSE(ret2 == ptr); + + } + + TEST_END_CHECK(); +} + +static void test_seal_mremap_move(bool seal) +{ + void *ptr, *newPtr; + unsigned long page_size = getpagesize(); + unsigned long size = page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + setup_single_address(size, &newPtr); + FAIL_TEST_IF_FALSE(newPtr != (void *)-1); + ret = clean_single_address(newPtr, size); + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* move from ptr to fixed address. */ + ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newPtr); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else { + FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED); + + } + + TEST_END_CHECK(); +} + +static void test_seal_mmap_overwrite_prot(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* use mmap to change protection. */ + ret2 = sys_mmap(ptr, size, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else + FAIL_TEST_IF_FALSE(ret2 == ptr); + + TEST_END_CHECK(); +} + +static void test_seal_mmap_expand(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 12 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + /* ummap last 4 pages. */ + ret = sys_munmap(ptr + 8 * page_size, 4 * page_size); + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + ret = sys_mseal(ptr, 8 * page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* use mmap to expand. */ + ret2 = sys_mmap(ptr, size, PROT_READ, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else + FAIL_TEST_IF_FALSE(ret2 == ptr); + + TEST_END_CHECK(); +} + +static void test_seal_mmap_shrink(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 12 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* use mmap to shrink. */ + ret2 = sys_mmap(ptr, 8 * page_size, PROT_READ, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else + FAIL_TEST_IF_FALSE(ret2 == ptr); + + TEST_END_CHECK(); +} + +static void test_seal_mremap_shrink_fixed(bool seal) +{ + void *ptr; + void *newAddr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + setup_single_address(size, &newAddr); + FAIL_TEST_IF_FALSE(newAddr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* mremap to move and shrink to fixed address */ + ret2 = mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, + newAddr); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else + FAIL_TEST_IF_FALSE(ret2 == newAddr); + + TEST_END_CHECK(); +} + +static void test_seal_mremap_expand_fixed(bool seal) +{ + void *ptr; + void *newAddr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(page_size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + setup_single_address(size, &newAddr); + FAIL_TEST_IF_FALSE(newAddr != (void *)-1); + + if (seal) { + ret = sys_mseal(newAddr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* mremap to move and expand to fixed address */ + ret2 = mremap(ptr, page_size, size, MREMAP_MAYMOVE | MREMAP_FIXED, + newAddr); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else + FAIL_TEST_IF_FALSE(ret2 == newAddr); + + TEST_END_CHECK(); +} + +static void test_seal_mremap_move_fixed(bool seal) +{ + void *ptr; + void *newAddr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + setup_single_address(size, &newAddr); + FAIL_TEST_IF_FALSE(newAddr != (void *)-1); + + if (seal) { + ret = sys_mseal(newAddr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* mremap to move to fixed address */ + ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newAddr); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else + FAIL_TEST_IF_FALSE(ret2 == newAddr); + + TEST_END_CHECK(); +} + +static void test_seal_mremap_move_fixed_zero(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* + * MREMAP_FIXED can move the mapping to zero address + */ + ret2 = mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, + 0); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else { + FAIL_TEST_IF_FALSE(ret2 == 0); + + } + + TEST_END_CHECK(); +} + +static void test_seal_mremap_move_dontunmap(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* mremap to move, and don't unmap src addr. */ + ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, 0); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else { + FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED); + + } + + TEST_END_CHECK(); +} + +static void test_seal_mremap_move_dontunmap_anyaddr(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + void *ret2; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* + * The 0xdeaddead should not have effect on dest addr + * when MREMAP_DONTUNMAP is set. + */ + ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, + 0xdeaddead); + if (seal) { + FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(errno == EPERM); + } else { + FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED); + FAIL_TEST_IF_FALSE((long)ret2 != 0xdeaddead); + + } + + TEST_END_CHECK(); +} + + +static void test_seal_merge_and_split(void) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size; + int ret; + int prot; + + /* (24 RO) */ + setup_single_address(24 * page_size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + /* use mprotect(NONE) to set out boundary */ + /* (1 NONE) (22 RO) (1 NONE) */ + ret = sys_mprotect(ptr, page_size, PROT_NONE); + FAIL_TEST_IF_FALSE(!ret); + ret = sys_mprotect(ptr + 23 * page_size, page_size, PROT_NONE); + FAIL_TEST_IF_FALSE(!ret); + size = get_vma_size(ptr + page_size, &prot); + FAIL_TEST_IF_FALSE(size == 22 * page_size); + FAIL_TEST_IF_FALSE(prot == 4); + + /* use mseal to split from beginning */ + /* (1 NONE) (1 RO_SEAL) (21 RO) (1 NONE) */ + ret = sys_mseal(ptr + page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + size = get_vma_size(ptr + page_size, &prot); + FAIL_TEST_IF_FALSE(size == page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + size = get_vma_size(ptr + 2 * page_size, &prot); + FAIL_TEST_IF_FALSE(size == 21 * page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + + /* use mseal to split from the end. */ + /* (1 NONE) (1 RO_SEAL) (20 RO) (1 RO_SEAL) (1 NONE) */ + ret = sys_mseal(ptr + 22 * page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + size = get_vma_size(ptr + 22 * page_size, &prot); + FAIL_TEST_IF_FALSE(size == page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + size = get_vma_size(ptr + 2 * page_size, &prot); + FAIL_TEST_IF_FALSE(size == 20 * page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + + /* merge with prev. */ + /* (1 NONE) (2 RO_SEAL) (19 RO) (1 RO_SEAL) (1 NONE) */ + ret = sys_mseal(ptr + 2 * page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + size = get_vma_size(ptr + page_size, &prot); + FAIL_TEST_IF_FALSE(size == 2 * page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + + /* merge with after. */ + /* (1 NONE) (2 RO_SEAL) (18 RO) (2 RO_SEALS) (1 NONE) */ + ret = sys_mseal(ptr + 21 * page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + size = get_vma_size(ptr + 21 * page_size, &prot); + FAIL_TEST_IF_FALSE(size == 2 * page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + + /* split and merge from prev */ + /* (1 NONE) (3 RO_SEAL) (17 RO) (2 RO_SEALS) (1 NONE) */ + ret = sys_mseal(ptr + 2 * page_size, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + size = get_vma_size(ptr + 1 * page_size, &prot); + FAIL_TEST_IF_FALSE(size == 3 * page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + ret = sys_munmap(ptr + page_size, page_size); + FAIL_TEST_IF_FALSE(ret < 0); + ret = sys_mprotect(ptr + 2 * page_size, page_size, PROT_NONE); + FAIL_TEST_IF_FALSE(ret < 0); + + /* split and merge from next */ + /* (1 NONE) (3 RO_SEAL) (16 RO) (3 RO_SEALS) (1 NONE) */ + ret = sys_mseal(ptr + 20 * page_size, 2 * page_size); + FAIL_TEST_IF_FALSE(!ret); + FAIL_TEST_IF_FALSE(prot == 0x4); + size = get_vma_size(ptr + 20 * page_size, &prot); + FAIL_TEST_IF_FALSE(size == 3 * page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + + /* merge from middle of prev and middle of next. */ + /* (1 NONE) (22 RO_SEAL) (1 NONE) */ + ret = sys_mseal(ptr + 2 * page_size, 20 * page_size); + FAIL_TEST_IF_FALSE(!ret); + size = get_vma_size(ptr + page_size, &prot); + FAIL_TEST_IF_FALSE(size == 22 * page_size); + FAIL_TEST_IF_FALSE(prot == 0x4); + + TEST_END_CHECK(); +} + +static void test_seal_discard_ro_anon_on_rw(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address_rw(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* sealing doesn't take effect on RW memory. */ + ret = sys_madvise(ptr, size, MADV_DONTNEED); + FAIL_TEST_IF_FALSE(!ret); + + /* base seal still apply. */ + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_discard_ro_anon_on_pkey(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + int pkey; + + SKIP_TEST_IF_FALSE(pkey_supported()); + + setup_single_address_rw(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + pkey = sys_pkey_alloc(0, 0); + FAIL_TEST_IF_FALSE(pkey > 0); + + ret = sys_mprotect_pkey((void *)ptr, size, PROT_READ | PROT_WRITE, pkey); + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* sealing doesn't take effect if PKRU allow write. */ + set_pkey(pkey, 0); + ret = sys_madvise(ptr, size, MADV_DONTNEED); + FAIL_TEST_IF_FALSE(!ret); + + /* sealing will take effect if PKRU deny write. */ + set_pkey(pkey, PKEY_DISABLE_WRITE); + ret = sys_madvise(ptr, size, MADV_DONTNEED); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + /* base seal still apply. */ + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_discard_ro_anon_on_filebacked(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + int fd; + unsigned long mapflags = MAP_PRIVATE; + + fd = memfd_create("test", 0); + FAIL_TEST_IF_FALSE(fd > 0); + + ret = fallocate(fd, 0, 0, size); + FAIL_TEST_IF_FALSE(!ret); + + ptr = sys_mmap(NULL, size, PROT_READ, mapflags, fd, 0); + FAIL_TEST_IF_FALSE(ptr != MAP_FAILED); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* sealing doesn't apply for file backed mapping. */ + ret = sys_madvise(ptr, size, MADV_DONTNEED); + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + close(fd); + + TEST_END_CHECK(); +} + +static void test_seal_discard_ro_anon_on_shared(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + unsigned long mapflags = MAP_ANONYMOUS | MAP_SHARED; + + ptr = sys_mmap(NULL, size, PROT_READ, mapflags, -1, 0); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* sealing doesn't apply for shared mapping. */ + ret = sys_madvise(ptr, size, MADV_DONTNEED); + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +static void test_seal_discard_ro_anon(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_madvise(ptr, size, MADV_DONTNEED); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + TEST_END_CHECK(); +} + +int main(int argc, char **argv) +{ + bool test_seal = seal_support(); + + ksft_print_header(); + + if (!test_seal) + ksft_exit_skip("sealing not supported, check CONFIG_64BIT\n"); + + if (!pkey_supported()) + ksft_print_msg("PKEY not supported\n"); + + ksft_set_plan(80); + + test_seal_addseal(); + test_seal_unmapped_start(); + test_seal_unmapped_middle(); + test_seal_unmapped_end(); + test_seal_multiple_vmas(); + test_seal_split_start(); + test_seal_split_end(); + test_seal_invalid_input(); + test_seal_zero_length(); + test_seal_twice(); + + test_seal_mprotect(false); + test_seal_mprotect(true); + + test_seal_start_mprotect(false); + test_seal_start_mprotect(true); + + test_seal_end_mprotect(false); + test_seal_end_mprotect(true); + + test_seal_mprotect_unalign_len(false); + test_seal_mprotect_unalign_len(true); + + test_seal_mprotect_unalign_len_variant_2(false); + test_seal_mprotect_unalign_len_variant_2(true); + + test_seal_mprotect_two_vma(false); + test_seal_mprotect_two_vma(true); + + test_seal_mprotect_two_vma_with_split(false); + test_seal_mprotect_two_vma_with_split(true); + + test_seal_mprotect_partial_mprotect(false); + test_seal_mprotect_partial_mprotect(true); + + test_seal_mprotect_two_vma_with_gap(false); + test_seal_mprotect_two_vma_with_gap(true); + + test_seal_mprotect_merge(false); + test_seal_mprotect_merge(true); + + test_seal_mprotect_split(false); + test_seal_mprotect_split(true); + + test_seal_munmap(false); + test_seal_munmap(true); + test_seal_munmap_two_vma(false); + test_seal_munmap_two_vma(true); + test_seal_munmap_vma_with_gap(false); + test_seal_munmap_vma_with_gap(true); + + test_munmap_start_freed(false); + test_munmap_start_freed(true); + test_munmap_middle_freed(false); + test_munmap_middle_freed(true); + test_munmap_end_freed(false); + test_munmap_end_freed(true); + + test_seal_mremap_shrink(false); + test_seal_mremap_shrink(true); + test_seal_mremap_expand(false); + test_seal_mremap_expand(true); + test_seal_mremap_move(false); + test_seal_mremap_move(true); + + test_seal_mremap_shrink_fixed(false); + test_seal_mremap_shrink_fixed(true); + test_seal_mremap_expand_fixed(false); + test_seal_mremap_expand_fixed(true); + test_seal_mremap_move_fixed(false); + test_seal_mremap_move_fixed(true); + test_seal_mremap_move_dontunmap(false); + test_seal_mremap_move_dontunmap(true); + test_seal_mremap_move_fixed_zero(false); + test_seal_mremap_move_fixed_zero(true); + test_seal_mremap_move_dontunmap_anyaddr(false); + test_seal_mremap_move_dontunmap_anyaddr(true); + test_seal_discard_ro_anon(false); + test_seal_discard_ro_anon(true); + test_seal_discard_ro_anon_on_rw(false); + test_seal_discard_ro_anon_on_rw(true); + test_seal_discard_ro_anon_on_shared(false); + test_seal_discard_ro_anon_on_shared(true); + test_seal_discard_ro_anon_on_filebacked(false); + test_seal_discard_ro_anon_on_filebacked(true); + test_seal_mmap_overwrite_prot(false); + test_seal_mmap_overwrite_prot(true); + test_seal_mmap_expand(false); + test_seal_mmap_expand(true); + test_seal_mmap_shrink(false); + test_seal_mmap_shrink(true); + + test_seal_merge_and_split(); + test_seal_zero_address(); + + test_seal_discard_ro_anon_on_pkey(false); + test_seal_discard_ro_anon_on_pkey(true); + + ksft_finished(); +} diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 4bdb3a0c7a..3157204b90 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -152,9 +152,13 @@ done < /proc/meminfo # both of these requirements into account and attempt to increase # number of huge pages available. nr_cpus=$(nproc) -hpgsize_MB=$((hpgsize_KB / 1024)) -half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128)) -needmem_KB=$((half_ufd_size_MB * 2 * 1024)) +uffd_min_KB=$((hpgsize_KB * nr_cpus * 2)) +hugetlb_min_KB=$((256 * 1024)) +if [[ $uffd_min_KB -gt $hugetlb_min_KB ]]; then + needmem_KB=$uffd_min_KB +else + needmem_KB=$hugetlb_min_KB +fi # set proper nr_hugepages if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then @@ -294,7 +298,8 @@ CATEGORY="userfaultfd" run_test ./uffd-unit-tests uffd_stress_bin=./uffd-stress CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16 # Hugetlb tests require source and destination huge pages. Pass in half -# the size ($half_ufd_size_MB), which is used for *each*. +# the size of the free pages we have, which is used for *each*. +half_ufd_size_MB=$((freepgs / 2)) CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16 diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c new file mode 100644 index 0000000000..f2babec79b --- /dev/null +++ b/tools/testing/selftests/mm/seal_elf.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sys/mman.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <stdbool.h> +#include "../kselftest.h" +#include <syscall.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/vfs.h> +#include <sys/stat.h> + +/* + * need those definition for manually build using gcc. + * gcc -I ../../../../usr/include -DDEBUG -O3 -DDEBUG -O3 seal_elf.c -o seal_elf + */ +#define FAIL_TEST_IF_FALSE(c) do {\ + if (!(c)) {\ + ksft_test_result_fail("%s, line:%d\n", __func__, __LINE__);\ + goto test_end;\ + } \ + } \ + while (0) + +#define SKIP_TEST_IF_FALSE(c) do {\ + if (!(c)) {\ + ksft_test_result_skip("%s, line:%d\n", __func__, __LINE__);\ + goto test_end;\ + } \ + } \ + while (0) + + +#define TEST_END_CHECK() {\ + ksft_test_result_pass("%s\n", __func__);\ + return;\ +test_end:\ + return;\ +} + +#ifndef u64 +#define u64 unsigned long long +#endif + +/* + * define sys_xyx to call syscall directly. + */ +static int sys_mseal(void *start, size_t len) +{ + int sret; + + errno = 0; + sret = syscall(__NR_mseal, start, len, 0); + return sret; +} + +static void *sys_mmap(void *addr, unsigned long len, unsigned long prot, + unsigned long flags, unsigned long fd, unsigned long offset) +{ + void *sret; + + errno = 0; + sret = (void *) syscall(__NR_mmap, addr, len, prot, + flags, fd, offset); + return sret; +} + +static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot) +{ + int sret; + + errno = 0; + sret = syscall(__NR_mprotect, ptr, size, prot); + return sret; +} + +static bool seal_support(void) +{ + int ret; + void *ptr; + unsigned long page_size = getpagesize(); + + ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (ptr == (void *) -1) + return false; + + ret = sys_mseal(ptr, page_size); + if (ret < 0) + return false; + + return true; +} + +const char somestr[4096] = {"READONLY"}; + +static void test_seal_elf(void) +{ + int ret; + FILE *maps; + char line[512]; + uintptr_t addr_start, addr_end; + char prot[5]; + char filename[256]; + unsigned long page_size = getpagesize(); + unsigned long long ptr = (unsigned long long) somestr; + char *somestr2 = (char *)somestr; + + /* + * Modify the protection of readonly somestr + */ + if (((unsigned long long)ptr % page_size) != 0) + ptr = (unsigned long long)ptr & ~(page_size - 1); + + ksft_print_msg("somestr = %s\n", somestr); + ksft_print_msg("change protection to rw\n"); + ret = sys_mprotect((void *)ptr, page_size, PROT_READ|PROT_WRITE); + FAIL_TEST_IF_FALSE(!ret); + *somestr2 = 'A'; + ksft_print_msg("somestr is modified to: %s\n", somestr); + ret = sys_mprotect((void *)ptr, page_size, PROT_READ); + FAIL_TEST_IF_FALSE(!ret); + + maps = fopen("/proc/self/maps", "r"); + FAIL_TEST_IF_FALSE(maps); + + /* + * apply sealing to elf binary + */ + while (fgets(line, sizeof(line), maps)) { + if (sscanf(line, "%lx-%lx %4s %*x %*x:%*x %*u %255[^\n]", + &addr_start, &addr_end, prot, filename) == 4) { + if (strlen(filename)) { + /* + * seal the mapping if read only. + */ + if (strstr(prot, "r-")) { + ret = sys_mseal((void *)addr_start, addr_end - addr_start); + FAIL_TEST_IF_FALSE(!ret); + ksft_print_msg("sealed: %lx-%lx %s %s\n", + addr_start, addr_end, prot, filename); + if ((uintptr_t) somestr >= addr_start && + (uintptr_t) somestr <= addr_end) + ksft_print_msg("mapping for somestr found\n"); + } + } + } + } + fclose(maps); + + ret = sys_mprotect((void *)ptr, page_size, PROT_READ | PROT_WRITE); + FAIL_TEST_IF_FALSE(ret < 0); + ksft_print_msg("somestr is sealed, mprotect is rejected\n"); + + TEST_END_CHECK(); +} + +int main(int argc, char **argv) +{ + bool test_seal = seal_support(); + + ksft_print_header(); + ksft_print_msg("pid=%d\n", getpid()); + + if (!test_seal) + ksft_exit_skip("sealing not supported, check CONFIG_64BIT\n"); + + ksft_set_plan(1); + + test_seal_elf(); + + ksft_finished(); +} diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c index d9dbf87974..bdfa5d085f 100644 --- a/tools/testing/selftests/mm/soft-dirty.c +++ b/tools/testing/selftests/mm/soft-dirty.c @@ -209,5 +209,5 @@ int main(int argc, char **argv) close(pagemap_fd); - ksft_exit_pass(); + ksft_finished(); } diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index 7bcf8d4825..4e4c1e3112 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -12,6 +12,8 @@ #include <errno.h> #include <sys/mman.h> #include <sys/time.h> +#include <fcntl.h> + #include "../kselftest.h" /* @@ -85,7 +87,7 @@ static int validate_lower_address_hint(void) char *ptr; ptr = mmap((void *) (1UL << 45), MAP_CHUNK_SIZE, PROT_READ | - PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (ptr == MAP_FAILED) return 0; @@ -93,6 +95,66 @@ static int validate_lower_address_hint(void) return 1; } +static int validate_complete_va_space(void) +{ + unsigned long start_addr, end_addr, prev_end_addr; + char line[400]; + char prot[6]; + FILE *file; + int fd; + + fd = open("va_dump", O_CREAT | O_WRONLY, 0600); + unlink("va_dump"); + if (fd < 0) { + ksft_test_result_skip("cannot create or open dump file\n"); + ksft_finished(); + } + + file = fopen("/proc/self/maps", "r"); + if (file == NULL) + ksft_exit_fail_msg("cannot open /proc/self/maps\n"); + + prev_end_addr = 0; + while (fgets(line, sizeof(line), file)) { + unsigned long hop; + + if (sscanf(line, "%lx-%lx %s[rwxp-]", + &start_addr, &end_addr, prot) != 3) + ksft_exit_fail_msg("cannot parse /proc/self/maps\n"); + + /* end of userspace mappings; ignore vsyscall mapping */ + if (start_addr & (1UL << 63)) + return 0; + + /* /proc/self/maps must have gaps less than MAP_CHUNK_SIZE */ + if (start_addr - prev_end_addr >= MAP_CHUNK_SIZE) + return 1; + + prev_end_addr = end_addr; + + if (prot[0] != 'r') + continue; + + /* + * Confirm whether MAP_CHUNK_SIZE chunk can be found or not. + * If write succeeds, no need to check MAP_CHUNK_SIZE - 1 + * addresses after that. If the address was not held by this + * process, write would fail with errno set to EFAULT. + * Anyways, if write returns anything apart from 1, exit the + * program since that would mean a bug in /proc/self/maps. + */ + hop = 0; + while (start_addr + hop < end_addr) { + if (write(fd, (void *)(start_addr + hop), 1) != 1) + return 1; + lseek(fd, 0, SEEK_SET); + + hop += MAP_CHUNK_SIZE; + } + } + return 0; +} + int main(int argc, char *argv[]) { char *ptr[NR_CHUNKS_LOW]; @@ -105,13 +167,11 @@ int main(int argc, char *argv[]) for (i = 0; i < NR_CHUNKS_LOW; i++) { ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (ptr[i] == MAP_FAILED) { - if (validate_lower_address_hint()) { - ksft_test_result_skip("Memory constraint not fulfilled\n"); - ksft_finished(); - } + if (validate_lower_address_hint()) + ksft_exit_fail_msg("mmap unexpectedly succeeded with hint\n"); break; } @@ -127,7 +187,7 @@ int main(int argc, char *argv[]) for (i = 0; i < NR_CHUNKS_HIGH; i++) { hint = hind_addr(); hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (hptr[i] == MAP_FAILED) break; @@ -135,6 +195,10 @@ int main(int argc, char *argv[]) validate_addr(hptr[i], 1); } hchunks = i; + if (validate_complete_va_space()) { + ksft_test_result_fail("BUG in mmap() or /proc/self/maps\n"); + ksft_finished(); + } for (i = 0; i < lchunks; i++) munmap(ptr[i], MAP_CHUNK_SIZE); diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 2f9d378ede..666ab7d939 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -2,9 +2,9 @@ bind_bhash bind_timewait bind_wildcard -csum cmsg_sender diag_uid +epoll_busy_poll fin_ack_lat gro hwtstamp_config @@ -31,6 +31,7 @@ reuseport_dualstack rxtimestamp sctp_hello scm_pidfd +scm_rights sk_bind_sendto_listen sk_connect_zero_addr socket @@ -42,7 +43,6 @@ tap tcp_fastopen_backup_key tcp_inq tcp_mmap -test_unix_oob timestamping tls toeplitz diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 7b6918d5f4..d9393569d0 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -20,7 +20,6 @@ TEST_PROGS += reuseaddr_ports_exhausted.sh TEST_PROGS += txtimestamp.sh TEST_PROGS += vrf-xfrm-tests.sh TEST_PROGS += rxtimestamp.sh -TEST_PROGS += devlink_port_split.py TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh TEST_PROGS += bareudp.sh @@ -35,6 +34,7 @@ TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += netns-name.sh +TEST_PROGS += nl_netdev.py TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh @@ -43,6 +43,8 @@ TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh TEST_PROGS += srv6_end_flavors_test.sh +TEST_PROGS += srv6_end_dx4_netfilter_test.sh +TEST_PROGS += srv6_end_dx6_netfilter_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS += ndisc_unsolicited_na_test.sh @@ -67,7 +69,7 @@ TEST_GEN_FILES += ipsec TEST_GEN_FILES += ioam6_parser TEST_GEN_FILES += gro TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa -TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap +TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_poll TEST_GEN_FILES += toeplitz TEST_GEN_FILES += cmsg_sender TEST_GEN_FILES += stress_reuseport_listen @@ -81,9 +83,6 @@ TEST_PROGS += test_ingress_egress_chaining.sh TEST_GEN_PROGS += so_incoming_cpu TEST_PROGS += sctp_vrf.sh TEST_GEN_FILES += sctp_hello -TEST_GEN_FILES += csum -TEST_GEN_FILES += nat6to4.o -TEST_GEN_FILES += xdp_dummy.o TEST_GEN_FILES += ip_local_port_range TEST_GEN_FILES += bind_wildcard TEST_PROGS += test_vxlan_mdb.sh @@ -93,63 +92,22 @@ TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += fdb_flush.sh TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh +TEST_PROGS += bpf_offload.py TEST_FILES := settings TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh +TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) + TEST_INCLUDES := forwarding/lib.sh include ../lib.mk +$(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto $(OUTPUT)/tcp_inq: LDLIBS += -lpthread $(OUTPUT)/bind_bhash: LDLIBS += -lpthread $(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/ -# Rules to generate bpf objs -CLANG ?= clang -SCRATCH_DIR := $(OUTPUT)/tools -BUILD_DIR := $(SCRATCH_DIR)/build -BPFDIR := $(abspath ../../../lib/bpf) -APIDIR := $(abspath ../../../include/uapi) - -CCINCLUDE += -I../bpf -CCINCLUDE += -I../../../../usr/include/ -CCINCLUDE += -I$(SCRATCH_DIR)/include - -BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a - -MAKE_DIRS := $(BUILD_DIR)/libbpf -$(MAKE_DIRS): - mkdir -p $@ - -# Get Clang's default includes on this system, as opposed to those seen by -# '--target=bpf'. This fixes "missing" files on some architectures/distros, -# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. -# -# Use '-idirafter': Don't interfere with include mechanics except where the -# build would have failed anyways. -define get_sys_includes -$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ - | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') -endef - -ifneq ($(CROSS_COMPILE),) -CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) -endif - -CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) - -$(OUTPUT)/nat6to4.o $(OUTPUT)/xdp_dummy.o: $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS) - $(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ - -$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ - $(APIDIR)/linux/bpf.h \ - | $(BUILD_DIR)/libbpf - $(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ - EXTRA_CFLAGS='-g -O0' \ - DESTDIR=$(SCRATCH_DIR) prefix= all install_headers - -EXTRA_CLEAN := $(SCRATCH_DIR) +include bpf.mk diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 221c387a7d..5058447954 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd +TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config new file mode 100644 index 0000000000..3736856776 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/config @@ -0,0 +1,3 @@ +CONFIG_UNIX=y +CONFIG_AF_UNIX_OOB=y +CONFIG_UNIX_DIAG=m diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c new file mode 100644 index 0000000000..16d0c172ea --- /dev/null +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -0,0 +1,734 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include <fcntl.h> +#include <string.h> +#include <unistd.h> + +#include <netinet/in.h> +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/signalfd.h> +#include <sys/socket.h> + +#include "../../kselftest_harness.h" + +#define BUF_SZ 32 + +FIXTURE(msg_oob) +{ + int fd[4]; /* 0: AF_UNIX sender + * 1: AF_UNIX receiver + * 2: TCP sender + * 3: TCP receiver + */ + int signal_fd; + int epoll_fd[2]; /* 0: AF_UNIX receiver + * 1: TCP receiver + */ + bool tcp_compliant; +}; + +FIXTURE_VARIANT(msg_oob) +{ + bool peek; +}; + +FIXTURE_VARIANT_ADD(msg_oob, no_peek) +{ + .peek = false, +}; + +FIXTURE_VARIANT_ADD(msg_oob, peek) +{ + .peek = true +}; + +static void create_unix_socketpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + int ret; + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, self->fd); + ASSERT_EQ(ret, 0); +} + +static void create_tcp_socketpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct sockaddr_in addr; + socklen_t addrlen; + int listen_fd; + int ret; + + listen_fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(listen_fd, 0); + + ret = listen(listen_fd, -1); + ASSERT_EQ(ret, 0); + + addrlen = sizeof(addr); + ret = getsockname(listen_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_EQ(ret, 0); + + self->fd[2] = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_GE(self->fd[2], 0); + + ret = connect(self->fd[2], (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(ret, 0); + + self->fd[3] = accept(listen_fd, (struct sockaddr *)&addr, &addrlen); + ASSERT_GE(self->fd[3], 0); + + ret = fcntl(self->fd[3], F_SETFL, O_NONBLOCK); + ASSERT_EQ(ret, 0); +} + +static void setup_sigurg(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct signalfd_siginfo siginfo; + int pid = getpid(); + sigset_t mask; + int i, ret; + + for (i = 0; i < 2; i++) { + ret = ioctl(self->fd[i * 2 + 1], FIOSETOWN, &pid); + ASSERT_EQ(ret, 0); + } + + ret = sigemptyset(&mask); + ASSERT_EQ(ret, 0); + + ret = sigaddset(&mask, SIGURG); + ASSERT_EQ(ret, 0); + + ret = sigprocmask(SIG_BLOCK, &mask, NULL); + ASSERT_EQ(ret, 0); + + self->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK); + ASSERT_GE(self->signal_fd, 0); + + ret = read(self->signal_fd, &siginfo, sizeof(siginfo)); + ASSERT_EQ(ret, -1); +} + +static void setup_epollpri(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + struct epoll_event event = { + .events = EPOLLPRI, + }; + int i; + + for (i = 0; i < 2; i++) { + int ret; + + self->epoll_fd[i] = epoll_create1(0); + ASSERT_GE(self->epoll_fd[i], 0); + + ret = epoll_ctl(self->epoll_fd[i], EPOLL_CTL_ADD, self->fd[i * 2 + 1], &event); + ASSERT_EQ(ret, 0); + } +} + +static void close_sockets(FIXTURE_DATA(msg_oob) *self) +{ + int i; + + for (i = 0; i < 4; i++) + close(self->fd[i]); +} + +FIXTURE_SETUP(msg_oob) +{ + create_unix_socketpair(_metadata, self); + create_tcp_socketpair(_metadata, self); + + setup_sigurg(_metadata, self); + setup_epollpri(_metadata, self); + + self->tcp_compliant = true; +} + +FIXTURE_TEARDOWN(msg_oob) +{ + close_sockets(self); +} + +static void __epollpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + bool oob_remaining) +{ + struct epoll_event event[2] = {}; + int i, ret[2]; + + for (i = 0; i < 2; i++) + ret[i] = epoll_wait(self->epoll_fd[i], &event[i], 1, 0); + + ASSERT_EQ(ret[0], oob_remaining); + + if (self->tcp_compliant) + ASSERT_EQ(ret[0], ret[1]); + + if (oob_remaining) { + ASSERT_EQ(event[0].events, EPOLLPRI); + + if (self->tcp_compliant) + ASSERT_EQ(event[0].events, event[1].events); + } +} + +static void __sendpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const void *buf, size_t len, int flags) +{ + int i, ret[2]; + + for (i = 0; i < 2; i++) { + struct signalfd_siginfo siginfo = {}; + int bytes; + + ret[i] = send(self->fd[i * 2], buf, len, flags); + + bytes = read(self->signal_fd, &siginfo, sizeof(siginfo)); + + if (flags & MSG_OOB) { + ASSERT_EQ(bytes, sizeof(siginfo)); + ASSERT_EQ(siginfo.ssi_signo, SIGURG); + + bytes = read(self->signal_fd, &siginfo, sizeof(siginfo)); + } + + ASSERT_EQ(bytes, -1); + } + + ASSERT_EQ(ret[0], len); + ASSERT_EQ(ret[0], ret[1]); +} + +static void __recvpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const void *expected_buf, int expected_len, + int buf_len, int flags) +{ + int i, ret[2], recv_errno[2], expected_errno = 0; + char recv_buf[2][BUF_SZ] = {}; + bool printed = false; + + ASSERT_GE(BUF_SZ, buf_len); + + errno = 0; + + for (i = 0; i < 2; i++) { + ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags); + recv_errno[i] = errno; + } + + if (expected_len < 0) { + expected_errno = -expected_len; + expected_len = -1; + } + + if (ret[0] != expected_len || recv_errno[0] != expected_errno) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf); + + ASSERT_EQ(ret[0], expected_len); + ASSERT_EQ(recv_errno[0], expected_errno); + } + + if (ret[0] != ret[1] || recv_errno[0] != recv_errno[1]) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]); + + printed = true; + + if (self->tcp_compliant) { + ASSERT_EQ(ret[0], ret[1]); + ASSERT_EQ(recv_errno[0], recv_errno[1]); + } + } + + if (expected_len >= 0) { + int cmp; + + cmp = strncmp(expected_buf, recv_buf[0], expected_len); + if (cmp) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf); + + ASSERT_EQ(cmp, 0); + } + + cmp = strncmp(recv_buf[0], recv_buf[1], expected_len); + if (cmp) { + if (!printed) { + TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]); + TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]); + } + + if (self->tcp_compliant) + ASSERT_EQ(cmp, 0); + } + } +} + +static void __setinlinepair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self) +{ + int i, oob_inline = 1; + + for (i = 0; i < 2; i++) { + int ret; + + ret = setsockopt(self->fd[i * 2 + 1], SOL_SOCKET, SO_OOBINLINE, + &oob_inline, sizeof(oob_inline)); + ASSERT_EQ(ret, 0); + } +} + +static void __siocatmarkpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + bool oob_head) +{ + int answ[2] = {}; + int i; + + for (i = 0; i < 2; i++) { + int ret; + + ret = ioctl(self->fd[i * 2 + 1], SIOCATMARK, &answ[i]); + ASSERT_EQ(ret, 0); + } + + ASSERT_EQ(answ[0], oob_head); + + if (self->tcp_compliant) + ASSERT_EQ(answ[0], answ[1]); +} + +#define sendpair(buf, len, flags) \ + __sendpair(_metadata, self, buf, len, flags) + +#define recvpair(expected_buf, expected_len, buf_len, flags) \ + do { \ + if (variant->peek) \ + __recvpair(_metadata, self, \ + expected_buf, expected_len, \ + buf_len, (flags) | MSG_PEEK); \ + __recvpair(_metadata, self, \ + expected_buf, expected_len, buf_len, flags); \ + } while (0) + +#define epollpair(oob_remaining) \ + __epollpair(_metadata, self, oob_remaining) + +#define siocatmarkpair(oob_head) \ + __siocatmarkpair(_metadata, self, oob_head) + +#define setinlinepair() \ + __setinlinepair(_metadata, self) + +#define tcp_incompliant \ + for (self->tcp_compliant = false; \ + self->tcp_compliant == false; \ + self->tcp_compliant = true) + +TEST_F(msg_oob, non_oob) +{ + sendpair("x", 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); +} + +TEST_F(msg_oob, oob_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); /* Drop OOB. */ + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob_ahead) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); + epollpair(false); + siocatmarkpair(true); +} + +TEST_F(msg_oob, oob_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 5, 0); /* Break at OOB even with enough buffer. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("world", 5, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + recvpair("hell", 4, 9, 0); /* Break at OOB even after it's recv()ed. */ + epollpair(false); + siocatmarkpair(true); + + recvpair("world", 5, 5, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, oob_break_drop) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("world", 5, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 10, 0); /* Break at OOB even with enough buffer. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("world", 5, 10, 0); /* Drop OOB and recv() the next skb. */ + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, ex_oob_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("wor", 3, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("ld", 2, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("r", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + recvpair("ld", 2, 2, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, ex_oob_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); + + tcp_incompliant { + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); /* TCP drops "y" by passing through it. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); /* TCP returns -EINVAL. */ + epollpair(false); + siocatmarkpair(true); + } +} + +TEST_F(msg_oob, ex_oob_drop_2) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); + + tcp_incompliant { + siocatmarkpair(false); + } + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + + tcp_incompliant { + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); /* TCP returns -EAGAIN. */ + epollpair(false); + siocatmarkpair(true); + } +} + +TEST_F(msg_oob, ex_oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("wor", 3, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("r", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + sendpair("ld", 2, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + tcp_incompliant { + recvpair("hellowol", 8, 10, 0); /* TCP recv()s "helloworl", why "r" ?? */ + } + + epollpair(true); + siocatmarkpair(true); + + recvpair("d", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); +} + +TEST_F(msg_oob, ex_oob_siocatmark) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + sendpair("world", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ + epollpair(true); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_oob) +{ + setinlinepair(); + + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_oob_break) +{ + setinlinepair(); + + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 5, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("o", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_oob_ahead_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("world", 5, 0); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + setinlinepair(); + + recvpair("hell", 4, 9, 0); /* Break at OOB even with enough buffer. */ + epollpair(false); + siocatmarkpair(true); + + tcp_incompliant { + recvpair("world", 5, 6, 0); /* TCP recv()s "oworld", ... "o" ??? */ + } + + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_ex_oob_break) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("wor", 3, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + sendpair("ld", 2, 0); + epollpair(true); + siocatmarkpair(false); + + setinlinepair(); + + recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */ + epollpair(true); + siocatmarkpair(true); + + recvpair("rld", 3, 3, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_ex_oob_no_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + setinlinepair(); + + sendpair("y", 1, MSG_OOB); /* TCP does NOT drops "x" at this moment. */ + epollpair(true); + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); +} + +TEST_F(msg_oob, inline_ex_oob_drop) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */ + epollpair(true); + + setinlinepair(); + + tcp_incompliant { + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); /* TCP recv()s "y". */ + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, 0); /* TCP returns -EAGAIN. */ + epollpair(false); + siocatmarkpair(false); + } +} + +TEST_F(msg_oob, inline_ex_oob_siocatmark) +{ + sendpair("hello", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("o", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + setinlinepair(); + + sendpair("world", 5, MSG_OOB); + epollpair(true); + siocatmarkpair(false); + + recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ + epollpair(true); + siocatmarkpair(false); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c new file mode 100644 index 0000000000..d663362565 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ +#define _GNU_SOURCE +#include <sched.h> + +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> + +#include "../../kselftest_harness.h" + +FIXTURE(scm_rights) +{ + int fd[32]; +}; + +FIXTURE_VARIANT(scm_rights) +{ + char name[32]; + int type; + int flags; + bool test_listener; +}; + +FIXTURE_VARIANT_ADD(scm_rights, dgram) +{ + .name = "UNIX ", + .type = SOCK_DGRAM, + .flags = 0, + .test_listener = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_oob) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = true, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = true, +}; + +static int count_sockets(struct __test_metadata *_metadata, + const FIXTURE_VARIANT(scm_rights) *variant) +{ + int sockets = -1, len, ret; + char *line = NULL; + size_t unused; + FILE *f; + + f = fopen("/proc/net/protocols", "r"); + ASSERT_NE(NULL, f); + + len = strlen(variant->name); + + while (getline(&line, &unused, f) != -1) { + int unused2; + + if (strncmp(line, variant->name, len)) + continue; + + ret = sscanf(line + len, "%d %d", &unused2, &sockets); + ASSERT_EQ(2, ret); + + break; + } + + free(line); + + ret = fclose(f); + ASSERT_EQ(0, ret); + + return sockets; +} + +FIXTURE_SETUP(scm_rights) +{ + int ret; + + ret = unshare(CLONE_NEWNET); + ASSERT_EQ(0, ret); + + ret = count_sockets(_metadata, variant); + ASSERT_EQ(0, ret); +} + +FIXTURE_TEARDOWN(scm_rights) +{ + int ret; + + sleep(1); + + ret = count_sockets(_metadata, variant); + ASSERT_EQ(0, ret); +} + +static void create_listeners(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + int n) +{ + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + socklen_t addrlen; + int i, ret; + + for (i = 0; i < n * 2; i += 2) { + self->fd[i] = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_LE(0, self->fd[i]); + + addrlen = sizeof(addr.sun_family); + ret = bind(self->fd[i], (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(0, ret); + + ret = listen(self->fd[i], -1); + ASSERT_EQ(0, ret); + + addrlen = sizeof(addr); + ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen); + ASSERT_EQ(0, ret); + + self->fd[i + 1] = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_LE(0, self->fd[i + 1]); + + ret = connect(self->fd[i + 1], (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(0, ret); + } +} + +static void create_socketpairs(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, + int n) +{ + int i, ret; + + ASSERT_GE(sizeof(self->fd) / sizeof(int), n); + + for (i = 0; i < n * 2; i += 2) { + ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i); + ASSERT_EQ(0, ret); + } +} + +static void __create_sockets(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, + int n) +{ + ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0])); + + if (variant->test_listener) + create_listeners(_metadata, self, n); + else + create_socketpairs(_metadata, self, variant, n); +} + +static void __close_sockets(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + int n) +{ + int i, ret; + + ASSERT_GE(sizeof(self->fd) / sizeof(int), n); + + for (i = 0; i < n * 2; i++) { + ret = close(self->fd[i]); + ASSERT_EQ(0, ret); + } +} + +void __send_fd(struct __test_metadata *_metadata, + const FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, + int inflight, int receiver) +{ +#define MSG "x" +#define MSGLEN 1 + struct { + struct cmsghdr cmsghdr; + int fd[2]; + } cmsg = { + .cmsghdr = { + .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)), + .cmsg_level = SOL_SOCKET, + .cmsg_type = SCM_RIGHTS, + }, + .fd = { + self->fd[inflight * 2], + self->fd[inflight * 2], + }, + }; + struct iovec iov = { + .iov_base = MSG, + .iov_len = MSGLEN, + }; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = &cmsg, + .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)), + }; + int ret; + + ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags); + ASSERT_EQ(MSGLEN, ret); +} + +#define create_sockets(n) \ + __create_sockets(_metadata, self, variant, n) +#define close_sockets(n) \ + __close_sockets(_metadata, self, n) +#define send_fd(inflight, receiver) \ + __send_fd(_metadata, self, variant, inflight, receiver) + +TEST_F(scm_rights, self_ref) +{ + create_sockets(2); + + send_fd(0, 0); + + send_fd(1, 1); + + close_sockets(2); +} + +TEST_F(scm_rights, triangle) +{ + create_sockets(6); + + send_fd(0, 1); + send_fd(1, 2); + send_fd(2, 0); + + send_fd(3, 4); + send_fd(4, 5); + send_fd(5, 3); + + close_sockets(6); +} + +TEST_F(scm_rights, cross_edge) +{ + create_sockets(8); + + send_fd(0, 1); + send_fd(1, 2); + send_fd(2, 0); + send_fd(1, 3); + send_fd(3, 2); + + send_fd(4, 5); + send_fd(5, 6); + send_fd(6, 4); + send_fd(5, 7); + send_fd(7, 6); + + close_sockets(8); +} + +TEST_F(scm_rights, backtrack_from_scc) +{ + create_sockets(10); + + send_fd(0, 1); + send_fd(0, 4); + send_fd(1, 2); + send_fd(2, 3); + send_fd(3, 1); + + send_fd(5, 6); + send_fd(5, 9); + send_fd(6, 7); + send_fd(7, 8); + send_fd(8, 6); + + close_sockets(10); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c deleted file mode 100644 index a7c51889ac..0000000000 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ /dev/null @@ -1,436 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -#include <stdio.h> -#include <stdlib.h> -#include <sys/socket.h> -#include <arpa/inet.h> -#include <unistd.h> -#include <string.h> -#include <fcntl.h> -#include <sys/ioctl.h> -#include <errno.h> -#include <netinet/tcp.h> -#include <sys/un.h> -#include <sys/signal.h> -#include <sys/poll.h> - -static int pipefd[2]; -static int signal_recvd; -static pid_t producer_id; -static char sock_name[32]; - -static void sig_hand(int sn, siginfo_t *si, void *p) -{ - signal_recvd = sn; -} - -static int set_sig_handler(int signal) -{ - struct sigaction sa; - - sa.sa_sigaction = sig_hand; - sigemptyset(&sa.sa_mask); - sa.sa_flags = SA_SIGINFO | SA_RESTART; - - return sigaction(signal, &sa, NULL); -} - -static void set_filemode(int fd, int set) -{ - int flags = fcntl(fd, F_GETFL, 0); - - if (set) - flags &= ~O_NONBLOCK; - else - flags |= O_NONBLOCK; - fcntl(fd, F_SETFL, flags); -} - -static void signal_producer(int fd) -{ - char cmd; - - cmd = 'S'; - write(fd, &cmd, sizeof(cmd)); -} - -static void wait_for_signal(int fd) -{ - char buf[5]; - - read(fd, buf, 5); -} - -static void die(int status) -{ - fflush(NULL); - unlink(sock_name); - kill(producer_id, SIGTERM); - exit(status); -} - -int is_sioctatmark(int fd) -{ - int ans = -1; - - if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) { -#ifdef DEBUG - perror("SIOCATMARK Failed"); -#endif - } - return ans; -} - -void read_oob(int fd, char *c) -{ - - *c = ' '; - if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) { -#ifdef DEBUG - perror("Reading MSG_OOB Failed"); -#endif - } -} - -int read_data(int pfd, char *buf, int size) -{ - int len = 0; - - memset(buf, size, '0'); - len = read(pfd, buf, size); -#ifdef DEBUG - if (len < 0) - perror("read failed"); -#endif - return len; -} - -static void wait_for_data(int pfd, int event) -{ - struct pollfd pfds[1]; - - pfds[0].fd = pfd; - pfds[0].events = event; - poll(pfds, 1, -1); -} - -void producer(struct sockaddr_un *consumer_addr) -{ - int cfd; - char buf[64]; - int i; - - memset(buf, 'x', sizeof(buf)); - cfd = socket(AF_UNIX, SOCK_STREAM, 0); - - wait_for_signal(pipefd[0]); - if (connect(cfd, (struct sockaddr *)consumer_addr, - sizeof(*consumer_addr)) != 0) { - perror("Connect failed"); - kill(0, SIGTERM); - exit(1); - } - - for (i = 0; i < 2; i++) { - /* Test 1: Test for SIGURG and OOB */ - wait_for_signal(pipefd[0]); - memset(buf, 'x', sizeof(buf)); - buf[63] = '@'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - wait_for_signal(pipefd[0]); - - /* Test 2: Test for OOB being overwitten */ - memset(buf, 'x', sizeof(buf)); - buf[63] = '%'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - buf[63] = '#'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - wait_for_signal(pipefd[0]); - - /* Test 3: Test for SIOCATMARK */ - memset(buf, 'x', sizeof(buf)); - buf[63] = '@'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - buf[63] = '%'; - send(cfd, buf, sizeof(buf), MSG_OOB); - - memset(buf, 'x', sizeof(buf)); - send(cfd, buf, sizeof(buf), 0); - - wait_for_signal(pipefd[0]); - - /* Test 4: Test for 1byte OOB msg */ - memset(buf, 'x', sizeof(buf)); - buf[0] = '@'; - send(cfd, buf, 1, MSG_OOB); - } -} - -int -main(int argc, char **argv) -{ - int lfd, pfd; - struct sockaddr_un consumer_addr, paddr; - socklen_t len = sizeof(consumer_addr); - char buf[1024]; - int on = 0; - char oob; - int atmark; - - lfd = socket(AF_UNIX, SOCK_STREAM, 0); - memset(&consumer_addr, 0, sizeof(consumer_addr)); - consumer_addr.sun_family = AF_UNIX; - sprintf(sock_name, "unix_oob_%d", getpid()); - unlink(sock_name); - strcpy(consumer_addr.sun_path, sock_name); - - if ((bind(lfd, (struct sockaddr *)&consumer_addr, - sizeof(consumer_addr))) != 0) { - perror("socket bind failed"); - exit(1); - } - - pipe(pipefd); - - listen(lfd, 1); - - producer_id = fork(); - if (producer_id == 0) { - producer(&consumer_addr); - exit(0); - } - - set_sig_handler(SIGURG); - signal_producer(pipefd[1]); - - pfd = accept(lfd, (struct sockaddr *) &paddr, &len); - fcntl(pfd, F_SETOWN, getpid()); - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 1: - * veriyf that SIGURG is - * delivered, 63 bytes are - * read, oob is '@', and POLLPRI works. - */ - wait_for_data(pfd, POLLPRI); - read_oob(pfd, &oob); - len = read_data(pfd, buf, 1024); - if (!signal_recvd || len != 63 || oob != '@') { - fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 2: - * Verify that the first OOB is over written by - * the 2nd one and the first OOB is returned as - * part of the read, and sigurg is received. - */ - wait_for_data(pfd, POLLIN | POLLPRI); - len = 0; - while (len < 70) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - read_oob(pfd, &oob); - if (!signal_recvd || len != 127 || oob != '#') { - fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 3: - * verify that 2nd oob over writes - * the first one and read breaks at - * oob boundary returning 127 bytes - * and sigurg is received and atmark - * is set. - * oob is '%' and second read returns - * 64 bytes. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 150) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - read_oob(pfd, &oob); - - if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) { - fprintf(stderr, - "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n", - signal_recvd, len, oob, atmark); - die(1); - } - - signal_recvd = 0; - - len = read_data(pfd, buf, 1024); - if (len != 64) { - fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 4: - * verify that a single byte - * oob message is delivered. - * set non blocking mode and - * check proper error is - * returned and sigurg is - * received and correct - * oob is read. - */ - - set_filemode(pfd, 0); - - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - if ((len == -1) && (errno == 11)) - len = 0; - - read_oob(pfd, &oob); - - if (!signal_recvd || len != 0 || oob != '@') { - fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n", - signal_recvd, len, oob); - die(1); - } - - set_filemode(pfd, 1); - - /* Inline Testing */ - - on = 1; - if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) { - perror("SO_OOBINLINE"); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 1 -- Inline: - * Check that SIGURG is - * delivered and 63 bytes are - * read and oob is '@' - */ - - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - - if (!signal_recvd || len != 63) { - fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n", - signal_recvd, len); - die(1); - } - - len = read_data(pfd, buf, 1024); - - if (len != 1) { - fprintf(stderr, - "Test 1.1 Inline failed, sigurg %d len %d oob %c\n", - signal_recvd, len, oob); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 2 -- Inline: - * Verify that the first OOB is over written by - * the 2nd one and read breaks correctly on - * 2nd OOB boundary with the first OOB returned as - * part of the read, and sigurg is delivered and - * siocatmark returns true. - * next read returns one byte, the oob byte - * and siocatmark returns false. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 70) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 127 || atmark != 1 || !signal_recvd) { - fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n", - len, atmark); - die(1); - } - - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 1 || buf[0] != '#' || atmark == 1) { - fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n", - len, buf[0], atmark); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 3 -- Inline: - * verify that 2nd oob over writes - * the first one and read breaks at - * oob boundary returning 127 bytes - * and sigurg is received and siocatmark - * is true after the read. - * subsequent read returns 65 bytes - * because of oob which should be '%'. - */ - len = 0; - wait_for_data(pfd, POLLIN | POLLPRI); - while (len < 126) - len = recv(pfd, buf, 1024, MSG_PEEK); - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (!signal_recvd || len != 127 || !atmark) { - fprintf(stderr, - "Test 3 Inline failed, sigurg %d len %d data %c\n", - signal_recvd, len, buf[0]); - die(1); - } - - len = read_data(pfd, buf, 1024); - atmark = is_sioctatmark(pfd); - if (len != 65 || buf[0] != '%' || atmark != 0) { - fprintf(stderr, - "Test 3.1 Inline failed, len %d oob %c atmark %d\n", - len, buf[0], atmark); - die(1); - } - - signal_recvd = 0; - signal_producer(pipefd[1]); - - /* Test 4 -- Inline: - * verify that a single - * byte oob message is delivered - * and read returns one byte, the oob - * byte and sigurg is received - */ - wait_for_data(pfd, POLLIN | POLLPRI); - len = read_data(pfd, buf, 1024); - if (!signal_recvd || len != 1 || buf[0] != '@') { - fprintf(stderr, - "Test 4 Inline failed, signal %d len %d data %c\n", - signal_recvd, len, buf[0]); - die(1); - } - die(0); -} diff --git a/tools/testing/selftests/net/bpf.mk b/tools/testing/selftests/net/bpf.mk new file mode 100644 index 0000000000..a4f6755dd8 --- /dev/null +++ b/tools/testing/selftests/net/bpf.mk @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: GPL-2.0 +# Rules to generate bpf objs +CLANG ?= clang +SCRATCH_DIR := $(OUTPUT)/tools +BUILD_DIR := $(SCRATCH_DIR)/build +BPFDIR := $(top_srcdir)/tools/lib/bpf +APIDIR := $(top_srcdir)/tools/include/uapi + +CCINCLUDE += -I$(selfdir)/bpf +CCINCLUDE += -I$(top_srcdir)/usr/include/ +CCINCLUDE += -I$(SCRATCH_DIR)/include + +BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a + +MAKE_DIRS := $(BUILD_DIR)/libbpf +$(MAKE_DIRS): + $(call msg,MKDIR,,$@) + $(Q)mkdir -p $@ + +# Get Clang's default includes on this system, as opposed to those seen by +# '--target=bpf'. This fixes "missing" files on some architectures/distros, +# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +define get_sys_includes +$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +endef + +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) + +BPF_PROG_OBJS := $(patsubst %.c,$(OUTPUT)/%.o,$(wildcard *.bpf.c)) + +$(BPF_PROG_OBJS): $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS) + $(call msg,BPF_PROG,,$@) + $(Q)$(CLANG) -O2 -g --target=bpf $(CCINCLUDE) $(CLANG_SYS_INCLUDES) \ + -c $< -o $@ + +$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ + $(APIDIR)/linux/bpf.h \ + | $(BUILD_DIR)/libbpf + $(call msg,MAKE,,$@) + $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ + EXTRA_CFLAGS='-g -O0' \ + DESTDIR=$(SCRATCH_DIR) prefix= all install_headers + +EXTRA_CLEAN += $(SCRATCH_DIR) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/net/bpf_offload.py index 6157f884d0..3efe44f6e9 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -29,6 +29,9 @@ import subprocess import time import traceback +from lib.py import NetdevSim, NetdevSimDev + + logfile = None log_level = 1 skip_extack = False @@ -145,8 +148,10 @@ def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False): if JSON: params += "%s " % (flags["json"]) - if ns != "": + if ns: ns = "ip netns exec %s " % (ns) + elif ns is None: + ns = "" if include_stderr: ret, stdout, stderr = cmd(ns + name + " " + params + args, @@ -201,11 +206,11 @@ def bpftool_prog_list_wait(expected=0, n_retry=20): time.sleep(0.05) raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) -def bpftool_map_list_wait(expected=0, n_retry=20): +def bpftool_map_list_wait(expected=0, n_retry=20, ns=""): for i in range(n_retry): - nmaps = len(bpftool_map_list()) - if nmaps == expected: - return + maps = bpftool_map_list(ns=ns) + if len(maps) == expected: + return maps time.sleep(0.05) raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) @@ -237,7 +242,7 @@ def tc(args, JSON=True, ns="", fail=True, include_stderr=False): def ethtool(dev, opt, args, fail=True): return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail) -def bpf_obj(name, sec=".text", path=bpf_test_dir,): +def bpf_obj(name, sec="xdp", path=bpf_test_dir,): return "obj %s sec %s" % (os.path.join(path, name), sec) def bpf_pinned(name): @@ -334,72 +339,16 @@ class DebugfsDir: return dfs -class NetdevSimDev: +class BpfNetdevSimDev(NetdevSimDev): """ Class for netdevsim bus device and its attributes. """ - @staticmethod - def ctrl_write(path, val): - fullpath = os.path.join("/sys/bus/netdevsim/", path) - try: - with open(fullpath, "w") as f: - f.write(val) - except OSError as e: - log("WRITE %s: %r" % (fullpath, val), -e.errno) - raise e - log("WRITE %s: %r" % (fullpath, val), 0) - - def __init__(self, port_count=1): - addr = 0 - while True: - try: - self.ctrl_write("new_device", "%u %u" % (addr, port_count)) - except OSError as e: - if e.errno == errno.ENOSPC: - addr += 1 - continue - raise e - break - self.addr = addr - - # As probe of netdevsim device might happen from a workqueue, - # so wait here until all netdevs appear. - self.wait_for_netdevs(port_count) - - ret, out = cmd("udevadm settle", fail=False) - if ret: - raise Exception("udevadm settle failed") - ifnames = self.get_ifnames() - + def __init__(self, port_count=1, ns=None): + super().__init__(port_count, ns=ns) devs.append(self) - self.dfs_dir = "/sys/kernel/debug/netdevsim/netdevsim%u/" % addr - - self.nsims = [] - for port_index in range(port_count): - self.nsims.append(NetdevSim(self, port_index, ifnames[port_index])) - - def get_ifnames(self): - ifnames = [] - listdir = os.listdir("/sys/bus/netdevsim/devices/netdevsim%u/net/" % self.addr) - for ifname in listdir: - ifnames.append(ifname) - ifnames.sort() - return ifnames - - def wait_for_netdevs(self, port_count): - timeout = 5 - timeout_start = time.time() - - while True: - try: - ifnames = self.get_ifnames() - except FileNotFoundError as e: - ifnames = [] - if len(ifnames) == port_count: - break - if time.time() < timeout_start + timeout: - continue - raise Exception("netdevices did not appear within timeout") + + def _make_port(self, port_index, ifname): + return BpfNetdevSim(self, port_index, ifname, self.ns) def dfs_num_bound_progs(self): path = os.path.join(self.dfs_dir, "bpf_bound_progs") @@ -415,33 +364,20 @@ class NetdevSimDev: return progs def remove(self): - self.ctrl_write("del_device", "%u" % (self.addr, )) + super().remove() devs.remove(self) - def remove_nsim(self, nsim): - self.nsims.remove(nsim) - self.ctrl_write("devices/netdevsim%u/del_port" % (self.addr, ), - "%u" % (nsim.port_index, )) -class NetdevSim: +class BpfNetdevSim(NetdevSim): """ Class for netdevsim netdevice and its attributes. """ - def __init__(self, nsimdev, port_index, ifname): - # In case udev renamed the netdev to according to new schema, - # check if the name matches the port_index. - nsimnamere = re.compile("eni\d+np(\d+)") - match = nsimnamere.match(ifname) - if match and int(match.groups()[0]) != port_index + 1: - raise Exception("netdevice name mismatches the expected one") - - self.nsimdev = nsimdev - self.port_index = port_index - self.ns = "" + def __init__(self, nsimdev, port_index, ifname, ns=None): + super().__init__(nsimdev, port_index, ifname, ns=ns) + self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index) self.dfs_refresh() - _, [self.dev] = ip("link show dev %s" % ifname) def __getitem__(self, key): return self.dev[key] @@ -468,7 +404,7 @@ class NetdevSim: raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs)) def set_ns(self, ns): - name = "1" if ns == "" else ns + name = ns if ns else "1" ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns) self.ns = ns @@ -605,7 +541,7 @@ def pin_prog(file_name, idx=0): return file_name, bpf_pinned(file_name) def pin_map(file_name, idx=0, expected=1): - maps = bpftool_map_list(expected=expected) + maps = bpftool_map_list_wait(expected=expected) m = maps[idx] bpftool("map pin id %d %s" % (m["id"], file_name)) files.append(file_name) @@ -618,7 +554,7 @@ def check_dev_info_removed(prog_file=None, map_file=None): ret, err = bpftool("prog show pin %s" % (prog_file), fail=False) fail(ret != 0, "failed to show prog with removed device") - bpftool_map_list(expected=0) + bpftool_map_list_wait(expected=0) ret, err = bpftool("map show pin %s" % (map_file), fail=False) fail(ret == 0, "Showing map with removed device did not fail") fail(err["error"].find("No such device") == -1, @@ -642,7 +578,7 @@ def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False): else: fail("ifname" in dev.keys(), "Ifname is reported for other ns") - maps = bpftool_map_list(expected=2, ns=ns) + maps = bpftool_map_list_wait(expected=2, ns=ns) for m in maps: fail("dev" not in m.keys(), "Device parameters not reported") fail(dev != m["dev"], "Map's device different than program's") @@ -744,7 +680,7 @@ def test_multi_prog(simdev, sim, obj, modename, modeid): start_test("Test multi-attachment XDP - device remove...") simdev.remove() - simdev = NetdevSimDev() + simdev = BpfNetdevSimDev() sim, = simdev.nsims sim.set_ethtool_tc_offloads(True) return [simdev, sim] @@ -809,13 +745,13 @@ try: bytecode = bpf_bytecode("1,6 0 0 4294967295,") start_test("Test destruction of generic XDP...") - simdev = NetdevSimDev() + simdev = BpfNetdevSimDev() sim, = simdev.nsims sim.set_xdp(obj, "generic") simdev.remove() bpftool_prog_list_wait(expected=0) - simdev = NetdevSimDev() + simdev = BpfNetdevSimDev() sim, = simdev.nsims sim.tc_add_ingress() @@ -967,7 +903,7 @@ try: simdev.remove() bpftool_prog_list_wait(expected=0) - simdev = NetdevSimDev() + simdev = BpfNetdevSimDev() sim, = simdev.nsims sim.set_ethtool_tc_offloads(True) @@ -976,7 +912,7 @@ try: simdev.remove() bpftool_prog_list_wait(expected=0) - simdev = NetdevSimDev() + simdev = BpfNetdevSimDev() sim, = simdev.nsims sim.set_ethtool_tc_offloads(True) @@ -1080,7 +1016,7 @@ try: bpftool_prog_list_wait(expected=0) start_test("Test attempt to use a program for a wrong device...") - simdev2 = NetdevSimDev() + simdev2 = BpfNetdevSimDev() sim2, = simdev2.nsims sim2.set_xdp(obj, "offload") pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") @@ -1169,7 +1105,7 @@ try: clean_up() bpftool_prog_list_wait(expected=0) - simdev = NetdevSimDev() + simdev = BpfNetdevSimDev() sim, = simdev.nsims map_obj = bpf_obj("sample_map_ret0.bpf.o") start_test("Test loading program with maps...") @@ -1201,12 +1137,12 @@ try: clean_up() bpftool_prog_list_wait(expected=0) - simdev = NetdevSimDev() + simdev = BpfNetdevSimDev() sim, = simdev.nsims start_test("Test map update (no flags)...") sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON - maps = bpftool_map_list(expected=2) + maps = bpftool_map_list_wait(expected=2) array = maps[0] if maps[0]["type"] == "array" else maps[1] htab = maps[0] if maps[0]["type"] == "hash" else maps[1] for m in maps: @@ -1285,14 +1221,14 @@ try: bpftool_map_list_wait(expected=0) simdev.remove() - simdev = NetdevSimDev() + simdev = BpfNetdevSimDev() sim, = simdev.nsims sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON simdev.remove() bpftool_map_list_wait(expected=0) start_test("Test map creation fail path...") - simdev = NetdevSimDev() + simdev = BpfNetdevSimDev() sim, = simdev.nsims sim.dfs["bpf_map_accept"] = "N" ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False) @@ -1302,9 +1238,9 @@ try: simdev.remove() start_test("Test multi-dev ASIC program reuse...") - simdevA = NetdevSimDev() + simdevA = BpfNetdevSimDev() simA, = simdevA.nsims - simdevB = NetdevSimDev(3) + simdevB = BpfNetdevSimDev(3) simB1, simB2, simB3 = simdevB.nsims sims = (simA, simB1, simB2, simB3) simB = (simB1, simB2, simB3) diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 161db24e3c..876c2db02a 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -260,15 +260,8 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit); if (opt.txtime.ena) { - struct sock_txtime so_txtime = { - .clockid = CLOCK_MONOTONIC, - }; __u64 txtime; - if (setsockopt(fd, SOL_SOCKET, SO_TXTIME, - &so_txtime, sizeof(so_txtime))) - error(ERN_SOCKOPT, errno, "setsockopt TXTIME"); - txtime = time_start_mono.tv_sec * (1000ULL * 1000 * 1000) + time_start_mono.tv_nsec + opt.txtime.delay * 1000; @@ -284,13 +277,6 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) memcpy(CMSG_DATA(cmsg), &txtime, sizeof(txtime)); } if (opt.ts.ena) { - __u32 val = SOF_TIMESTAMPING_SOFTWARE | - SOF_TIMESTAMPING_OPT_TSONLY; - - if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, - &val, sizeof(val))) - error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING"); - cmsg = (struct cmsghdr *)(cbuf + cmsg_len); cmsg_len += CMSG_SPACE(sizeof(__u32)); if (cbuf_sz < cmsg_len) @@ -426,6 +412,24 @@ static void ca_set_sockopts(int fd) setsockopt(fd, SOL_SOCKET, SO_PRIORITY, &opt.sockopt.priority, sizeof(opt.sockopt.priority))) error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY"); + + if (opt.txtime.ena) { + struct sock_txtime so_txtime = { + .clockid = CLOCK_MONOTONIC, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_TXTIME, + &so_txtime, sizeof(so_txtime))) + error(ERN_SOCKOPT, errno, "setsockopt TXTIME"); + } + if (opt.ts.ena) { + __u32 val = SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_TSONLY; + + if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, + &val, sizeof(val))) + error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING"); + } } int main(int argc, char *argv[]) diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh index af85267ad1..1d7e756644 100755 --- a/tools/testing/selftests/net/cmsg_time.sh +++ b/tools/testing/selftests/net/cmsg_time.sh @@ -66,10 +66,13 @@ for i in "-4 $TGT4" "-6 $TGT6"; do awk '/SND/ { if ($3 > 1000) print "OK"; }') check_result $? "$ts" "OK" "$prot - TXTIME abs" - ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 | + [ "$KSFT_MACHINE_SLOW" = yes ] && delay=8000 || delay=1000 + + ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d $delay | awk '/SND/ {snd=$3} /SCHED/ {sch=$3} - END { if (snd - sch > 500) print "OK"; }') + END { if (snd - sch > '$((delay/2))') print "OK"; + else print snd, "-", sch, "<", '$((delay/2))'; }') check_result $? "$ts" "OK" "$prot - TXTIME rel" done done diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 04de7a6ba6..d4891f7a2b 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -101,3 +101,5 @@ CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_CRYPTO_ARIA=y CONFIG_XFRM_INTERFACE=m CONFIG_XFRM_USER=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RPFILTER=m diff --git a/tools/testing/selftests/net/epoll_busy_poll.c b/tools/testing/selftests/net/epoll_busy_poll.c new file mode 100644 index 0000000000..16e457c2f8 --- /dev/null +++ b/tools/testing/selftests/net/epoll_busy_poll.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +/* Basic per-epoll context busy poll test. + * + * Only tests the ioctls, but should be expanded to test two connected hosts in + * the future + */ + +#define _GNU_SOURCE + +#include <error.h> +#include <errno.h> +#include <inttypes.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sys/capability.h> + +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/socket.h> + +#include "../kselftest_harness.h" + +/* if the headers haven't been updated, we need to define some things */ +#if !defined(EPOLL_IOC_TYPE) +struct epoll_params { + uint32_t busy_poll_usecs; + uint16_t busy_poll_budget; + uint8_t prefer_busy_poll; + + /* pad the struct to a multiple of 64bits */ + uint8_t __pad; +}; + +#define EPOLL_IOC_TYPE 0x8A +#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params) +#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) +#endif + +FIXTURE(invalid_fd) +{ + int invalid_fd; + struct epoll_params params; +}; + +FIXTURE_SETUP(invalid_fd) +{ + int ret; + + ret = socket(AF_UNIX, SOCK_DGRAM, 0); + EXPECT_NE(-1, ret) + TH_LOG("error creating unix socket"); + + self->invalid_fd = ret; +} + +FIXTURE_TEARDOWN(invalid_fd) +{ + int ret; + + ret = close(self->invalid_fd); + EXPECT_EQ(0, ret); +} + +TEST_F(invalid_fd, test_invalid_fd) +{ + int ret; + + ret = ioctl(self->invalid_fd, EPIOCGPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCGPARAMS on invalid epoll FD should error"); + + EXPECT_EQ(ENOTTY, errno) + TH_LOG("EPIOCGPARAMS on invalid epoll FD should set errno to ENOTTY"); + + memset(&self->params, 0, sizeof(struct epoll_params)); + + ret = ioctl(self->invalid_fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS on invalid epoll FD should error"); + + EXPECT_EQ(ENOTTY, errno) + TH_LOG("EPIOCSPARAMS on invalid epoll FD should set errno to ENOTTY"); +} + +FIXTURE(epoll_busy_poll) +{ + int fd; + struct epoll_params params; + struct epoll_params *invalid_params; + cap_t caps; +}; + +FIXTURE_SETUP(epoll_busy_poll) +{ + int ret; + + ret = epoll_create1(0); + EXPECT_NE(-1, ret) + TH_LOG("epoll_create1 failed?"); + + self->fd = ret; + + self->caps = cap_get_proc(); + EXPECT_NE(NULL, self->caps); +} + +FIXTURE_TEARDOWN(epoll_busy_poll) +{ + int ret; + + ret = close(self->fd); + EXPECT_EQ(0, ret); + + ret = cap_free(self->caps); + EXPECT_NE(-1, ret) + TH_LOG("unable to free capabilities"); +} + +TEST_F(epoll_busy_poll, test_get_params) +{ + /* begin by getting the epoll params from the kernel + * + * the default should be default and all fields should be zero'd by the + * kernel, so set params fields to garbage to test this. + */ + int ret = 0; + + self->params.busy_poll_usecs = 0xff; + self->params.busy_poll_budget = 0xff; + self->params.prefer_busy_poll = 1; + self->params.__pad = 0xf; + + ret = ioctl(self->fd, EPIOCGPARAMS, &self->params); + EXPECT_EQ(0, ret) + TH_LOG("ioctl EPIOCGPARAMS should succeed"); + + EXPECT_EQ(0, self->params.busy_poll_usecs) + TH_LOG("EPIOCGPARAMS busy_poll_usecs should have been 0"); + + EXPECT_EQ(0, self->params.busy_poll_budget) + TH_LOG("EPIOCGPARAMS busy_poll_budget should have been 0"); + + EXPECT_EQ(0, self->params.prefer_busy_poll) + TH_LOG("EPIOCGPARAMS prefer_busy_poll should have been 0"); + + EXPECT_EQ(0, self->params.__pad) + TH_LOG("EPIOCGPARAMS __pad should have been 0"); + + self->invalid_params = (struct epoll_params *)0xdeadbeef; + ret = ioctl(self->fd, EPIOCGPARAMS, self->invalid_params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCGPARAMS should error with invalid params"); + + EXPECT_EQ(EFAULT, errno) + TH_LOG("EPIOCGPARAMS with invalid params should set errno to EFAULT"); +} + +TEST_F(epoll_busy_poll, test_set_invalid) +{ + int ret; + + memset(&self->params, 0, sizeof(struct epoll_params)); + + self->params.__pad = 1; + + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS non-zero __pad should error"); + + EXPECT_EQ(EINVAL, errno) + TH_LOG("EPIOCSPARAMS non-zero __pad errno should be EINVAL"); + + self->params.__pad = 0; + self->params.busy_poll_usecs = (uint32_t)INT_MAX + 1; + + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS should error busy_poll_usecs > S32_MAX"); + + EXPECT_EQ(EINVAL, errno) + TH_LOG("EPIOCSPARAMS busy_poll_usecs > S32_MAX errno should be EINVAL"); + + self->params.__pad = 0; + self->params.busy_poll_usecs = 32; + self->params.prefer_busy_poll = 2; + + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS should error prefer_busy_poll > 1"); + + EXPECT_EQ(EINVAL, errno) + TH_LOG("EPIOCSPARAMS prefer_busy_poll > 1 errno should be EINVAL"); + + self->params.__pad = 0; + self->params.busy_poll_usecs = 32; + self->params.prefer_busy_poll = 1; + + /* set budget well above kernel's NAPI_POLL_WEIGHT of 64 */ + self->params.busy_poll_budget = UINT16_MAX; + + /* test harness should run with CAP_NET_ADMIN, but let's make sure */ + cap_flag_value_t tmp; + + ret = cap_get_flag(self->caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &tmp); + EXPECT_EQ(0, ret) + TH_LOG("unable to get CAP_NET_ADMIN cap flag"); + + EXPECT_EQ(CAP_SET, tmp) + TH_LOG("expecting CAP_NET_ADMIN to be set for the test harness"); + + /* at this point we know CAP_NET_ADMIN is available, so setting the + * params with a busy_poll_budget > NAPI_POLL_WEIGHT should succeed + */ + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(0, ret) + TH_LOG("EPIOCSPARAMS should allow busy_poll_budget > NAPI_POLL_WEIGHT"); + + /* remove CAP_NET_ADMIN from our effective set */ + cap_value_t net_admin[] = { CAP_NET_ADMIN }; + + ret = cap_set_flag(self->caps, CAP_EFFECTIVE, 1, net_admin, CAP_CLEAR); + EXPECT_EQ(0, ret) + TH_LOG("couldn't clear CAP_NET_ADMIN"); + + ret = cap_set_proc(self->caps); + EXPECT_EQ(0, ret) + TH_LOG("cap_set_proc should drop CAP_NET_ADMIN"); + + /* this is now expected to fail */ + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS should error busy_poll_budget > NAPI_POLL_WEIGHT"); + + EXPECT_EQ(EPERM, errno) + TH_LOG("EPIOCSPARAMS errno should be EPERM busy_poll_budget > NAPI_POLL_WEIGHT"); + + /* restore CAP_NET_ADMIN to our effective set */ + ret = cap_set_flag(self->caps, CAP_EFFECTIVE, 1, net_admin, CAP_SET); + EXPECT_EQ(0, ret) + TH_LOG("couldn't restore CAP_NET_ADMIN"); + + ret = cap_set_proc(self->caps); + EXPECT_EQ(0, ret) + TH_LOG("cap_set_proc should set CAP_NET_ADMIN"); + + self->invalid_params = (struct epoll_params *)0xdeadbeef; + ret = ioctl(self->fd, EPIOCSPARAMS, self->invalid_params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS should error when epoll_params is invalid"); + + EXPECT_EQ(EFAULT, errno) + TH_LOG("EPIOCSPARAMS should set errno to EFAULT when epoll_params is invalid"); +} + +TEST_F(epoll_busy_poll, test_set_and_get_valid) +{ + int ret; + + memset(&self->params, 0, sizeof(struct epoll_params)); + + self->params.busy_poll_usecs = 25; + self->params.busy_poll_budget = 16; + self->params.prefer_busy_poll = 1; + + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(0, ret) + TH_LOG("EPIOCSPARAMS with valid params should not error"); + + /* check that the kernel returns the same values back */ + + memset(&self->params, 0, sizeof(struct epoll_params)); + + ret = ioctl(self->fd, EPIOCGPARAMS, &self->params); + + EXPECT_EQ(0, ret) + TH_LOG("EPIOCGPARAMS should not error"); + + EXPECT_EQ(25, self->params.busy_poll_usecs) + TH_LOG("params.busy_poll_usecs incorrect"); + + EXPECT_EQ(16, self->params.busy_poll_budget) + TH_LOG("params.busy_poll_budget incorrect"); + + EXPECT_EQ(1, self->params.prefer_busy_poll) + TH_LOG("params.prefer_busy_poll incorrect"); + + EXPECT_EQ(0, self->params.__pad) + TH_LOG("params.__pad was not 0"); +} + +TEST_F(epoll_busy_poll, test_invalid_ioctl) +{ + int invalid_ioctl = EPIOCGPARAMS + 10; + int ret; + + ret = ioctl(self->fd, invalid_ioctl, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("invalid ioctl should return error"); + + EXPECT_EQ(EINVAL, errno) + TH_LOG("invalid ioctl should set errno to EINVAL"); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 51157a5559..7c01f58a20 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -9,6 +9,7 @@ PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} RTABLE=100 RTABLE_PEER=101 +RTABLE_VRF=102 GW_IP4=192.51.100.2 SRC_IP=192.51.100.3 GW_IP6=2001:db8:1::2 @@ -17,7 +18,14 @@ SRC_IP6=2001:db8:1::3 DEV_ADDR=192.51.100.1 DEV_ADDR6=2001:db8:1::1 DEV=dummy0 -TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect" +TESTS=" + fib_rule6 + fib_rule4 + fib_rule6_connect + fib_rule4_connect + fib_rule6_vrf + fib_rule4_vrf +" SELFTEST_PATH="" @@ -27,13 +35,18 @@ log_test() local expected=$2 local msg="$3" + $IP rule show | grep -q l3mdev + if [ $? -eq 0 ]; then + msg="$msg (VRF)" + fi + if [ ${rc} -eq ${expected} ]; then nsuccess=$((nsuccess+1)) - printf "\n TEST: %-50s [ OK ]\n" "${msg}" + printf "\n TEST: %-60s [ OK ]\n" "${msg}" else ret=1 nfail=$((nfail+1)) - printf "\n TEST: %-50s [FAIL]\n" "${msg}" + printf "\n TEST: %-60s [FAIL]\n" "${msg}" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" @@ -130,6 +143,17 @@ cleanup_peer() ip netns del $peerns } +setup_vrf() +{ + $IP link add name vrf0 up type vrf table $RTABLE_VRF + $IP link set dev $DEV master vrf0 +} + +cleanup_vrf() +{ + $IP link del dev vrf0 +} + fib_check_iproute_support() { ip rule help 2>&1 | grep -q $1 @@ -248,6 +272,13 @@ fib_rule6_test() fi } +fib_rule6_vrf_test() +{ + setup_vrf + fib_rule6_test + cleanup_vrf +} + # Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly # taken into account when connecting the socket and when sending packets. fib_rule6_connect_test() @@ -385,6 +416,13 @@ fib_rule4_test() fi } +fib_rule4_vrf_test() +{ + setup_vrf + fib_rule4_test + cleanup_vrf +} + # Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken # into account when connecting the socket and when sending packets. fib_rule4_connect_test() @@ -467,6 +505,8 @@ do fib_rule4_test|fib_rule4) fib_rule4_test;; fib_rule6_connect_test|fib_rule6_connect) fib_rule6_connect_test;; fib_rule4_connect_test|fib_rule4_connect) fib_rule4_connect_test;; + fib_rule6_vrf_test|fib_rule6_vrf) fib_rule6_vrf_test;; + fib_rule4_vrf_test|fib_rule4_vrf) fib_rule4_vrf_test;; help) echo "Test names: $TESTS"; exit 0;; diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 73895711cd..5f3c28fc86 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1737,53 +1737,53 @@ ipv4_rt_dsfield() # DSCP 0x10 should match the specific route, no matter the ECN bits $IP route get fibmatch 172.16.102.1 dsfield 0x10 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:Not-ECT" $IP route get fibmatch 172.16.102.1 dsfield 0x11 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:ECT(1)" $IP route get fibmatch 172.16.102.1 dsfield 0x12 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:ECT(0)" $IP route get fibmatch 172.16.102.1 dsfield 0x13 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:CE" # Unknown DSCP should match the generic route, no matter the ECN bits $IP route get fibmatch 172.16.102.1 dsfield 0x14 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:Not-ECT" $IP route get fibmatch 172.16.102.1 dsfield 0x15 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(1)" $IP route get fibmatch 172.16.102.1 dsfield 0x16 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(0)" $IP route get fibmatch 172.16.102.1 dsfield 0x17 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:CE" # Null DSCP should match the generic route, no matter the ECN bits $IP route get fibmatch 172.16.102.1 dsfield 0x00 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:Not-ECT" $IP route get fibmatch 172.16.102.1 dsfield 0x01 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(1)" $IP route get fibmatch 172.16.102.1 dsfield 0x02 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(0)" $IP route get fibmatch 172.16.102.1 dsfield 0x03 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:CE" } diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 535865b3d1..fa7b59ff40 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -15,18 +15,12 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ bridge_vlan_unaware.sh \ custom_multipath_hash.sh \ dual_vxlan_bridge.sh \ - ethtool_extended_state.sh \ - ethtool_mm.sh \ - ethtool_rmon.sh \ - ethtool.sh \ gre_custom_multipath_hash.sh \ gre_inner_v4_multipath.sh \ gre_inner_v6_multipath.sh \ gre_multipath_nh_res.sh \ gre_multipath_nh.sh \ gre_multipath.sh \ - hw_stats_l3.sh \ - hw_stats_l3_gre.sh \ ip6_forward_instats_vrf.sh \ ip6gre_custom_multipath_hash.sh \ ip6gre_flat_key.sh \ @@ -43,8 +37,8 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ ipip_hier_gre_key.sh \ ipip_hier_gre_keys.sh \ ipip_hier_gre.sh \ + lib_sh_test.sh \ local_termination.sh \ - loopback.sh \ mirror_gre_bound.sh \ mirror_gre_bridge_1d.sh \ mirror_gre_bridge_1d_vlan.sh \ @@ -113,7 +107,6 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_symmetric.sh TEST_FILES := devlink_lib.sh \ - ethtool_lib.sh \ fib_offload_lib.sh \ forwarding.config.sample \ ip6gre_lib.sh \ diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README index b8a2af8fcf..7fdb6a9ca5 100644 --- a/tools/testing/selftests/net/forwarding/README +++ b/tools/testing/selftests/net/forwarding/README @@ -56,3 +56,36 @@ o Checks shall be added to lib.sh for any external dependencies. o Code shall be checked using ShellCheck [1] prior to submission. 1. https://www.shellcheck.net/ + +Customization +============= + +The forwarding selftests framework uses a number of variables that +influence its behavior and tools it invokes, and how it invokes them, in +various ways. A number of these variables can be overridden. The way these +overridable variables are specified is typically one of the following two +syntaxes: + + : "${VARIABLE:=default_value}" + VARIABLE=${VARIABLE:=default_value} + +Any of these variables can be overridden. Notably net/forwarding/lib.sh and +net/lib.sh contain a number of overridable variables. + +One way of overriding these variables is through the environment: + + PAUSE_ON_FAIL=yes ./some_test.sh + +The variable NETIFS is special. Since it is an array variable, there is no +way to pass it through the environment. Its value can instead be given as +consecutive arguments to the selftest: + + ./some_test.sh swp{1..8} + +A way to customize variables in a persistent fashion is to create a file +named forwarding.config in this directory. lib.sh sources the file if +present, so it can contain any shell code. Typically it will contain +assignments of variables whose value should be overridden. + +forwarding.config.sample is available in the directory as an example of +how forwarding.config might look. diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh index 0760a34b71..a21b7085da 100755 --- a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh +++ b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh @@ -178,6 +178,22 @@ fdb_del() check_err $? "Failed to remove a FDB entry of type ${type}" } +check_fdb_n_learned_support() +{ + if ! ip link help bridge 2>&1 | grep -q "fdb_max_learned"; then + echo "SKIP: iproute2 too old, missing bridge max learned support" + exit $ksft_skip + fi + + ip link add dev br0 type bridge + local learned=$(fdb_get_n_learned) + ip link del dev br0 + if [ "$learned" == "null" ]; then + echo "SKIP: kernel too old; bridge fdb_n_learned feature not supported." + exit $ksft_skip + fi +} + check_accounting_one_type() { local type=$1 is_counted=$2 overrides_learned=$3 @@ -274,6 +290,8 @@ check_limit() done } +check_fdb_n_learned_support + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index f1de525cfa..62a05bca1e 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -122,6 +122,8 @@ devlink_reload() still_pending=$(devlink resource show "$DEVLINK_DEV" | \ grep -c "size_new") check_err $still_pending "Failed reload - There are still unset sizes" + + udevadm settle } declare -A DEVLINK_ORIG diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index 1fc4f0242f..f1ca95e79a 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -3,51 +3,28 @@ ############################################################################## # Topology description. p1 looped back to p2, p3 to p4 and so on. -declare -A NETIFS -NETIFS[p1]=veth0 -NETIFS[p2]=veth1 -NETIFS[p3]=veth2 -NETIFS[p4]=veth3 -NETIFS[p5]=veth4 -NETIFS[p6]=veth5 -NETIFS[p7]=veth6 -NETIFS[p8]=veth7 -NETIFS[p9]=veth8 -NETIFS[p10]=veth9 +NETIFS=( + [p1]=veth0 + [p2]=veth1 + [p3]=veth2 + [p4]=veth3 + [p5]=veth4 + [p6]=veth5 + [p7]=veth6 + [p8]=veth7 + [p9]=veth8 + [p10]=veth9 +) # Port that does not have a cable connected. NETIF_NO_CABLE=eth8 ############################################################################## -# Defines +# In addition to the topology-related variables, it is also possible to override +# in this file other variables that net/lib.sh, net/forwarding/lib.sh or other +# libraries or selftests use. E.g.: -# IPv4 ping utility name -PING=ping -# IPv6 ping utility name. Some distributions use 'ping' for IPv6. PING6=ping6 -# Packet generator. Some distributions use 'mz'. MZ=mausezahn -# mausezahn delay between transmissions in microseconds. -MZ_DELAY=0 -# Time to wait after interfaces participating in the test are all UP WAIT_TIME=5 -# Whether to pause on failure or not. -PAUSE_ON_FAIL=no -# Whether to pause on cleanup or not. -PAUSE_ON_CLEANUP=no -# Type of network interface to create -NETIF_TYPE=veth -# Whether to create virtual interfaces (veth) or not -NETIF_CREATE=yes -# Timeout (in seconds) before ping exits regardless of how many packets have -# been sent or received -PING_TIMEOUT=5 -# Minimum ageing_time (in centiseconds) supported by hardware -LOW_AGEING_TIME=1000 -# Flag for tc match, supposed to be skip_sw/skip_hw which means do not process -# filter by software/hardware -TC_FLAG=skip_hw -# IPv6 traceroute utility name. -TROUTE6=traceroute6 - diff --git a/tools/testing/selftests/net/forwarding/ipip_lib.sh b/tools/testing/selftests/net/forwarding/ipip_lib.sh index 30f36a57ba..01e62c4ac9 100644 --- a/tools/testing/selftests/net/forwarding/ipip_lib.sh +++ b/tools/testing/selftests/net/forwarding/ipip_lib.sh @@ -141,7 +141,6 @@ # | $h2 + | # | 192.0.2.18/28 | # +---------------------------+ -source lib.sh h1_create() { diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index e78f11140e..eabbdf00d8 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -2,33 +2,124 @@ # SPDX-License-Identifier: GPL-2.0 ############################################################################## +# Topology description. p1 looped back to p2, p3 to p4 and so on. + +declare -A NETIFS=( + [p1]=veth0 + [p2]=veth1 + [p3]=veth2 + [p4]=veth3 + [p5]=veth4 + [p6]=veth5 + [p7]=veth6 + [p8]=veth7 + [p9]=veth8 + [p10]=veth9 +) + +# Port that does not have a cable connected. +: "${NETIF_NO_CABLE:=eth8}" + +############################################################################## # Defines -# Can be overridden by the configuration file. -PING=${PING:=ping} -PING6=${PING6:=ping6} -MZ=${MZ:=mausezahn} -MZ_DELAY=${MZ_DELAY:=0} -ARPING=${ARPING:=arping} -TEAMD=${TEAMD:=teamd} -WAIT_TIME=${WAIT_TIME:=5} -PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} -PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no} -NETIF_TYPE=${NETIF_TYPE:=veth} -NETIF_CREATE=${NETIF_CREATE:=yes} -MCD=${MCD:=smcrouted} -MC_CLI=${MC_CLI:=smcroutectl} -PING_COUNT=${PING_COUNT:=10} -PING_TIMEOUT=${PING_TIMEOUT:=5} -WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} -INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600} -LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000} -REQUIRE_JQ=${REQUIRE_JQ:=yes} -REQUIRE_MZ=${REQUIRE_MZ:=yes} -REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no} -STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no} -TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=} -TROUTE6=${TROUTE6:=traceroute6} +# Networking utilities. +: "${PING:=ping}" +: "${PING6:=ping6}" # Some distros just use ping. +: "${ARPING:=arping}" +: "${TROUTE6:=traceroute6}" + +# Packet generator. +: "${MZ:=mausezahn}" # Some distributions use 'mz'. +: "${MZ_DELAY:=0}" + +# Host configuration tools. +: "${TEAMD:=teamd}" +: "${MCD:=smcrouted}" +: "${MC_CLI:=smcroutectl}" + +# Constants for netdevice bring-up: +# Default time in seconds to wait for an interface to come up before giving up +# and bailing out. Used during initial setup. +: "${INTERFACE_TIMEOUT:=600}" +# Like INTERFACE_TIMEOUT, but default for ad-hoc waiting in testing scripts. +: "${WAIT_TIMEOUT:=20}" +# Time to wait after interfaces participating in the test are all UP. +: "${WAIT_TIME:=5}" + +# Whether to pause on, respectively, after a failure and before cleanup. +: "${PAUSE_ON_FAIL:=no}" +: "${PAUSE_ON_CLEANUP:=no}" + +# Whether to create virtual interfaces, and what netdevice type they should be. +: "${NETIF_CREATE:=yes}" +: "${NETIF_TYPE:=veth}" + +# Constants for ping tests: +# How many packets should be sent. +: "${PING_COUNT:=10}" +# Timeout (in seconds) before ping exits regardless of how many packets have +# been sent or received +: "${PING_TIMEOUT:=5}" + +# Minimum ageing_time (in centiseconds) supported by hardware +: "${LOW_AGEING_TIME:=1000}" + +# Whether to check for availability of certain tools. +: "${REQUIRE_JQ:=yes}" +: "${REQUIRE_MZ:=yes}" +: "${REQUIRE_MTOOLS:=no}" + +# Whether to override MAC addresses on interfaces participating in the test. +: "${STABLE_MAC_ADDRS:=no}" + +# Flags for tcpdump +: "${TCPDUMP_EXTRA_FLAGS:=}" + +# Flags for TC filters. +: "${TC_FLAG:=skip_hw}" + +# Whether the machine is "slow" -- i.e. might be incapable of running tests +# involving heavy traffic. This might be the case on a debug kernel, a VM, or +# e.g. a low-power board. +: "${KSFT_MACHINE_SLOW:=no}" + +############################################################################## +# Find netifs by test-specified driver name + +driver_name_get() +{ + local dev=$1; shift + local driver_path="/sys/class/net/$dev/device/driver" + + if [[ -L $driver_path ]]; then + basename `realpath $driver_path` + fi +} + +netif_find_driver() +{ + local ifnames=`ip -j link show | jq -r ".[].ifname"` + local count=0 + + for ifname in $ifnames + do + local driver_name=`driver_name_get $ifname` + if [[ ! -z $driver_name && $driver_name == $NETIF_FIND_DRIVER ]]; then + count=$((count + 1)) + NETIFS[p$count]="$ifname" + fi + done +} + +# Whether to find netdevice according to the driver speficied by the importer +: "${NETIF_FIND_DRIVER:=}" + +if [[ $NETIF_FIND_DRIVER ]]; then + unset NETIFS + declare -A NETIFS + netif_find_driver +fi net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") @@ -179,22 +270,23 @@ check_port_mab_support() fi } -skip_on_veth() +if [[ "$(id -u)" -ne 0 ]]; then + echo "SKIP: need root privileges" + exit $ksft_skip +fi + +check_driver() { - local kind=$(ip -j -d link show dev ${NETIFS[p1]} | - jq -r '.[].linkinfo.info_kind') + local dev=$1; shift + local expected=$1; shift + local driver_name=`driver_name_get $dev` - if [[ $kind == veth ]]; then - echo "SKIP: Test cannot be run with veth pairs" + if [[ $driver_name != $expected ]]; then + echo "SKIP: expected driver $expected for $dev, got $driver_name instead" exit $ksft_skip fi } -if [[ "$(id -u)" -ne 0 ]]; then - echo "SKIP: need root privileges" - exit $ksft_skip -fi - if [[ "$CHECK_TC" = "yes" ]]; then check_tc_version fi @@ -209,6 +301,21 @@ require_command() fi } +# IPv6 support was added in v3.0 +check_mtools_version() +{ + local version="$(msend -v)" + local major + + version=${version##msend version } + major=$(echo $version | cut -d. -f1) + + if [ $major -lt 3 ]; then + echo "SKIP: expected mtools version 3.0, got $version" + exit $ksft_skip + fi +} + if [[ "$REQUIRE_JQ" = "yes" ]]; then require_command jq fi @@ -216,15 +323,10 @@ if [[ "$REQUIRE_MZ" = "yes" ]]; then require_command $MZ fi if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then - # https://github.com/vladimiroltean/mtools/ - # patched for IPv6 support + # https://github.com/troglobit/mtools require_command msend require_command mreceive -fi - -if [[ ! -v NUM_NETIFS ]]; then - echo "SKIP: importer does not define \"NUM_NETIFS\"" - exit $ksft_skip + check_mtools_version fi ############################################################################## @@ -245,6 +347,23 @@ done ############################################################################## # Network interfaces configuration +if [[ ! -v NUM_NETIFS ]]; then + echo "SKIP: importer does not define \"NUM_NETIFS\"" + exit $ksft_skip +fi + +if (( NUM_NETIFS > ${#NETIFS[@]} )); then + echo "SKIP: Importer requires $NUM_NETIFS NETIFS, but only ${#NETIFS[@]} are defined (${NETIFS[@]})" + exit $ksft_skip +fi + +for i in $(seq ${#NETIFS[@]}); do + if [[ ! ${NETIFS[p$i]} ]]; then + echo "SKIP: NETIFS[p$i] not given" + exit $ksft_skip + fi +done + create_netif_veth() { local i @@ -343,13 +462,20 @@ ret_set_ksft_status() fi } +# Whether FAILs should be interpreted as XFAILs. Internal. +FAIL_TO_XFAIL= + check_err() { local err=$1 local msg=$2 if ((err)); then - ret_set_ksft_status $ksft_fail "$msg" + if [[ $FAIL_TO_XFAIL = yes ]]; then + ret_set_ksft_status $ksft_xfail "$msg" + else + ret_set_ksft_status $ksft_fail "$msg" + fi fi } @@ -374,6 +500,29 @@ check_err_fail() fi } +xfail_on_slow() +{ + if [[ $KSFT_MACHINE_SLOW = yes ]]; then + FAIL_TO_XFAIL=yes "$@" + else + "$@" + fi +} + +xfail_on_veth() +{ + local dev=$1; shift + local kind + + kind=$(ip -j -d link show dev $dev | + jq -r '.[].linkinfo.info_kind') + if [[ $kind = veth ]]; then + FAIL_TO_XFAIL=yes "$@" + else + "$@" + fi +} + log_test_result() { local test_name=$1; shift @@ -569,6 +718,19 @@ setup_wait() sleep $WAIT_TIME } +wait_for_dev() +{ + local dev=$1; shift + local timeout=${1:-$WAIT_TIMEOUT}; shift + + slowwait $timeout ip link show dev $dev &> /dev/null + if (( $? )); then + check_err 1 + log_test wait_for_dev "Interface $dev did not appear." + exit $EXIT_STATUS + fi +} + cmd_jq() { local cmd=$1 @@ -1995,6 +2157,8 @@ bail_on_lldpad() { local reason1="$1"; shift local reason2="$1"; shift + local caller=${FUNCNAME[1]} + local src=${BASH_SOURCE[1]} if systemctl is-active --quiet lldpad; then @@ -2015,7 +2179,8 @@ bail_on_lldpad() an environment variable ALLOW_LLDPAD to a non-empty string. EOF - exit 1 + log_test_skip $src:$caller + exit $EXIT_STATUS else return fi diff --git a/tools/testing/selftests/net/forwarding/lib_sh_test.sh b/tools/testing/selftests/net/forwarding/lib_sh_test.sh new file mode 100755 index 0000000000..ff2accccaf --- /dev/null +++ b/tools/testing/selftests/net/forwarding/lib_sh_test.sh @@ -0,0 +1,208 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This tests the operation of lib.sh itself. + +ALL_TESTS=" + test_ret + test_exit_status +" +NUM_NETIFS=0 +source lib.sh + +# Simulated checks. + +do_test() +{ + local msg=$1; shift + + "$@" + check_err $? "$msg" +} + +tpass() +{ + do_test "tpass" true +} + +tfail() +{ + do_test "tfail" false +} + +txfail() +{ + FAIL_TO_XFAIL=yes do_test "txfail" false +} + +# Simulated tests. + +pass() +{ + RET=0 + do_test "true" true + log_test "true" +} + +fail() +{ + RET=0 + do_test "false" false + log_test "false" +} + +xfail() +{ + RET=0 + FAIL_TO_XFAIL=yes do_test "xfalse" false + log_test "xfalse" +} + +skip() +{ + RET=0 + log_test_skip "skip" +} + +slow_xfail() +{ + RET=0 + xfail_on_slow do_test "slow_false" false + log_test "slow_false" +} + +# lib.sh tests. + +ret_tests_run() +{ + local t + + RET=0 + retmsg= + for t in "$@"; do + $t + done + echo "$retmsg" + return $RET +} + +ret_subtest() +{ + local expect_ret=$1; shift + local expect_retmsg=$1; shift + local -a tests=( "$@" ) + + local status_names=(pass fail xfail xpass skip) + local ret + local out + + RET=0 + + # Run this in a subshell, so that our environment is intact. + out=$(ret_tests_run "${tests[@]}") + ret=$? + + (( ret == expect_ret )) + check_err $? "RET=$ret expected $expect_ret" + + [[ $out == $expect_retmsg ]] + check_err $? "retmsg=$out expected $expect_retmsg" + + log_test "RET $(echo ${tests[@]}) -> ${status_names[$ret]}" +} + +test_ret() +{ + ret_subtest $ksft_pass "" + + ret_subtest $ksft_pass "" tpass + ret_subtest $ksft_fail "tfail" tfail + ret_subtest $ksft_xfail "txfail" txfail + + ret_subtest $ksft_pass "" tpass tpass + ret_subtest $ksft_fail "tfail" tpass tfail + ret_subtest $ksft_xfail "txfail" tpass txfail + + ret_subtest $ksft_fail "tfail" tfail tpass + ret_subtest $ksft_xfail "txfail" txfail tpass + + ret_subtest $ksft_fail "tfail" tfail tfail + ret_subtest $ksft_fail "tfail" tfail txfail + + ret_subtest $ksft_fail "tfail" txfail tfail + + ret_subtest $ksft_xfail "txfail" txfail txfail +} + +exit_status_tests_run() +{ + EXIT_STATUS=0 + tests_run > /dev/null + return $EXIT_STATUS +} + +exit_status_subtest() +{ + local expect_exit_status=$1; shift + local tests=$1; shift + local what=$1; shift + + local status_names=(pass fail xfail xpass skip) + local exit_status + local out + + RET=0 + + # Run this in a subshell, so that our environment is intact. + out=$(TESTS="$tests" exit_status_tests_run) + exit_status=$? + + (( exit_status == expect_exit_status )) + check_err $? "EXIT_STATUS=$exit_status, expected $expect_exit_status" + + log_test "EXIT_STATUS $tests$what -> ${status_names[$exit_status]}" +} + +test_exit_status() +{ + exit_status_subtest $ksft_pass ":" + + exit_status_subtest $ksft_pass "pass" + exit_status_subtest $ksft_fail "fail" + exit_status_subtest $ksft_pass "xfail" + exit_status_subtest $ksft_skip "skip" + + exit_status_subtest $ksft_pass "pass pass" + exit_status_subtest $ksft_fail "pass fail" + exit_status_subtest $ksft_pass "pass xfail" + exit_status_subtest $ksft_skip "pass skip" + + exit_status_subtest $ksft_fail "fail pass" + exit_status_subtest $ksft_pass "xfail pass" + exit_status_subtest $ksft_skip "skip pass" + + exit_status_subtest $ksft_fail "fail fail" + exit_status_subtest $ksft_fail "fail xfail" + exit_status_subtest $ksft_fail "fail skip" + + exit_status_subtest $ksft_fail "xfail fail" + exit_status_subtest $ksft_fail "skip fail" + + exit_status_subtest $ksft_pass "xfail xfail" + exit_status_subtest $ksft_skip "xfail skip" + exit_status_subtest $ksft_skip "skip xfail" + + exit_status_subtest $ksft_skip "skip skip" + + KSFT_MACHINE_SLOW=yes \ + exit_status_subtest $ksft_pass "slow_xfail" ": slow" + + KSFT_MACHINE_SLOW=no \ + exit_status_subtest $ksft_fail "slow_xfail" ": fast" +} + +trap pre_cleanup EXIT + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index c5b0cbc85b..4b364cdf3e 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -155,25 +155,30 @@ run_test() "$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \ true - check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ - "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ - false + xfail_on_veth $h1 \ + check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ + "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ + false check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \ "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \ true - check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, allmulti" \ - "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ - false + xfail_on_veth $h1 \ + check_rcv $rcv_if_name \ + "Unicast IPv4 to unknown MAC address, allmulti" \ + "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ + false check_rcv $rcv_if_name "Multicast IPv4 to joined group" \ "$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \ true - check_rcv $rcv_if_name "Multicast IPv4 to unknown group" \ - "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \ - false + xfail_on_veth $h1 \ + check_rcv $rcv_if_name \ + "Multicast IPv4 to unknown group" \ + "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \ + false check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \ @@ -187,9 +192,10 @@ run_test() "$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \ true - check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \ - "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \ - false + xfail_on_veth $h1 \ + check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \ + "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \ + false check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \ diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index 3f0f5dc955..2ba44247c6 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -1,6 +1,41 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.2/24 | | +# | 2001:db8:1::2/64 | | +# +-------------------|-----+ +# | +# +-------------------|----------------------+ +# | | R1 | +# | $rp11 + | +# | 192.0.2.1/24 | +# | 2001:db8:1::1/64 | +# | | +# | + $rp12 + $rp13 | +# | | 169.254.2.12/24 | 169.254.3.13/24 | +# | | fe80:2::12/64 | fe80:3::13/64 | +# +--|--------------------|------------------+ +# | | +# +--|--------------------|------------------+ +# | + $rp22 + $rp23 | +# | 169.254.2.22/24 169.254.3.23/24 | +# | fe80:2::22/64 fe80:3::23/64 | +# | | +# | $rp21 + | +# | 198.51.100.1/24 | | +# | 2001:db8:2::1/64 | R2 | +# +-------------------|----------------------+ +# | +# +-------------------|-----+ +# | | | +# | $h2 + | +# | 198.51.100.2/24 | +# | 2001:db8:2::2/64 H2 | +# +-------------------------+ + ALL_TESTS=" ping_ipv4 ping_ipv6 diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh index b2d2c6cecc..2903294d8b 100644 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh @@ -56,21 +56,12 @@ nh_stats_test_dispatch_swhw() local group_id=$1; shift local mz="$@" - local used - nh_stats_do_test "$what" "$nh1_id" "$nh2_id" "$group_id" \ nh_stats_get "${mz[@]}" - used=$(ip -s -j -d nexthop show id $group_id | - jq '.[].hw_stats.used') - kind=$(ip -j -d link show dev $rp11 | - jq -r '.[].linkinfo.info_kind') - if [[ $used == true ]]; then + xfail_on_veth $rp11 \ nh_stats_do_test "HW $what" "$nh1_id" "$nh2_id" "$group_id" \ nh_stats_get_hw "${mz[@]}" - elif [[ $kind == veth ]]; then - log_test_xfail "HW stats not offloaded on veth topology" - fi } nh_stats_test_dispatch() @@ -83,7 +74,6 @@ nh_stats_test_dispatch() local mz="$@" local enabled - local kind if ! ip nexthop help 2>&1 | grep -q hw_stats; then log_test_skip "NH stats test: ip doesn't support HW stats" diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh index 4b483d24ad..cd9e346436 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh @@ -1,6 +1,41 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.2/24 | | +# | 2001:db8:1::2/64 | | +# +-------------------|-----+ +# | +# +-------------------|----------------------+ +# | | R1 | +# | $rp11 + | +# | 192.0.2.1/24 | +# | 2001:db8:1::1/64 | +# | | +# | + $rp12 + $rp13 | +# | | 169.254.2.12/24 | 169.254.3.13/24 | +# | | fe80:2::12/64 | fe80:3::13/64 | +# +--|--------------------|------------------+ +# | | +# +--|--------------------|------------------+ +# | + $rp22 + $rp23 | +# | 169.254.2.22/24 169.254.3.23/24 | +# | fe80:2::22/64 fe80:3::23/64 | +# | | +# | $rp21 + | +# | 198.51.100.1/24 | | +# | 2001:db8:2::1/64 | R2 | +# +-------------------|----------------------+ +# | +# +-------------------|-----+ +# | | | +# | $h2 + | +# | 198.51.100.2/24 | +# | 2001:db8:2::2/64 H2 | +# +-------------------------+ + ALL_TESTS=" ping_ipv4 ping_ipv6 diff --git a/tools/testing/selftests/net/forwarding/router_nh.sh b/tools/testing/selftests/net/forwarding/router_nh.sh index f3a53738bd..92904b01ea 100755 --- a/tools/testing/selftests/net/forwarding/router_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_nh.sh @@ -1,6 +1,20 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +-------------------------+ +-------------------------+ +# | H1 | | H2 | +# | $h1 + | | $h2 + | +# | 192.0.2.2/24 | | | 198.51.100.2/24 | | +# | 2001:db8:1::2/64 | | | 2001:db8:2::2/64 | | +# +-------------------|-----+ +-------------------|-----+ +# | | +# +-------------------|----------------------------|-----+ +# | R1 | | | +# | $rp1 + $rp2 + | +# | 192.0.2.1/24 198.51.100.1/24 | +# | 2001:db8:1::1/64 2001:db8:2::1/64 | +# +------------------------------------------------------+ + ALL_TESTS=" ping_ipv4 ping_ipv6 diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh index cdf689e994..f9d26a7911 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -199,25 +199,28 @@ ets_set_dwrr_two_bands() ets_test_strict() { ets_set_strict - ets_dwrr_test_01 - ets_dwrr_test_12 + xfail_on_slow ets_dwrr_test_01 + xfail_on_slow ets_dwrr_test_12 } ets_test_mixed() { ets_set_mixed - ets_dwrr_test_01 - ets_dwrr_test_12 + xfail_on_slow ets_dwrr_test_01 + xfail_on_slow ets_dwrr_test_12 } ets_test_dwrr() { ets_set_dwrr_uniform - ets_dwrr_test_012 + xfail_on_slow ets_dwrr_test_012 + ets_set_dwrr_varying - ets_dwrr_test_012 + xfail_on_slow ets_dwrr_test_012 + ets_change_quantum - ets_dwrr_test_012 + xfail_on_slow ets_dwrr_test_012 + ets_set_dwrr_two_bands - ets_dwrr_test_01 + xfail_on_slow ets_dwrr_test_01 } diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh index 81f31179ac..17f2864456 100755 --- a/tools/testing/selftests/net/forwarding/sch_red.sh +++ b/tools/testing/selftests/net/forwarding/sch_red.sh @@ -451,35 +451,35 @@ uninstall_qdisc() ecn_test() { install_qdisc ecn - do_ecn_test $BACKLOG + xfail_on_slow do_ecn_test $BACKLOG uninstall_qdisc } ecn_nodrop_test() { install_qdisc ecn nodrop - do_ecn_nodrop_test $BACKLOG + xfail_on_slow do_ecn_nodrop_test $BACKLOG uninstall_qdisc } red_test() { install_qdisc - do_red_test $BACKLOG + xfail_on_slow do_red_test $BACKLOG uninstall_qdisc } red_qevent_test() { install_qdisc qevent early_drop block 10 - do_red_qevent_test $BACKLOG + xfail_on_slow do_red_qevent_test $BACKLOG uninstall_qdisc } ecn_qevent_test() { install_qdisc ecn qevent mark block 10 - do_ecn_qevent_test $BACKLOG + xfail_on_slow do_ecn_qevent_test $BACKLOG uninstall_qdisc } diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh index d1f26cb7cd..9cd884d4a5 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh @@ -227,7 +227,7 @@ do_tbf_test() local nr=$(rate $t2 $t3 10) local nr_pct=$((100 * (nr - er) / er)) ((-5 <= nr_pct && nr_pct <= 5)) - check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%." + xfail_on_slow check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%." log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit" } diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh index bce8bb8d2b..2e3326edfa 100644 --- a/tools/testing/selftests/net/forwarding/tc_common.sh +++ b/tools/testing/selftests/net/forwarding/tc_common.sh @@ -4,7 +4,7 @@ CHECK_TC="yes" # Can be overridden by the configuration file. See lib.sh -TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms +: "${TC_HIT_TIMEOUT:=1000}" # ms tc_check_packets() { diff --git a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh index 5a5dd90348..79775b10b9 100755 --- a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh +++ b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh @@ -1,7 +1,5 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 ALL_TESTS="tunnel_key_nofrag_test" diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index 6038b96ece..b2184847e3 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -93,6 +93,7 @@ static bool tx_socket = true; static int tcp_offset = -1; static int total_hdr_len = -1; static int ethhdr_proto = -1; +static const int num_flush_id_cases = 6; static void vlog(const char *fmt, ...) { @@ -620,6 +621,113 @@ static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE); } +static void fix_ip4_checksum(struct iphdr *iph) +{ + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); +} + +static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) +{ + static char buf1[MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf2[MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf3[MAX_HDR_LEN + PAYLOAD_LEN]; + bool send_three = false; + struct iphdr *iph1; + struct iphdr *iph2; + struct iphdr *iph3; + + iph1 = (struct iphdr *)(buf1 + ETH_HLEN); + iph2 = (struct iphdr *)(buf2 + ETH_HLEN); + iph3 = (struct iphdr *)(buf3 + ETH_HLEN); + + create_packet(buf1, 0, 0, PAYLOAD_LEN, 0); + create_packet(buf2, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + + switch (tcase) { + case 0: /* DF=1, Incrementing - should coalesce */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(9); + break; + + case 1: /* DF=1, Fixed - should coalesce */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(8); + break; + + case 2: /* DF=0, Incrementing - should coalesce */ + iph1->frag_off &= ~htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off &= ~htons(IP_DF); + iph2->id = htons(9); + break; + + case 3: /* DF=0, Fixed - should not coalesce */ + iph1->frag_off &= ~htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off &= ~htons(IP_DF); + iph2->id = htons(8); + break; + + case 4: /* DF=1, two packets incrementing, and one fixed - should + * coalesce only the first two packets + */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(9); + + iph3->frag_off |= htons(IP_DF); + iph3->id = htons(9); + send_three = true; + break; + + case 5: /* DF=1, two packets fixed, and one incrementing - should + * coalesce only the first two packets + */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(8); + + iph3->frag_off |= htons(IP_DF); + iph3->id = htons(9); + send_three = true; + break; + } + + fix_ip4_checksum(iph1); + fix_ip4_checksum(iph2); + write_packet(fd, buf1, total_hdr_len + PAYLOAD_LEN, daddr); + write_packet(fd, buf2, total_hdr_len + PAYLOAD_LEN, daddr); + + if (send_three) { + fix_ip4_checksum(iph3); + write_packet(fd, buf3, total_hdr_len + PAYLOAD_LEN, daddr); + } +} + +static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt) +{ + for (int i = 0; i < num_flush_id_cases; i++) { + sleep(1); + send_flush_id_case(fd, daddr, i); + sleep(1); + write_packet(fd, fin_pkt, total_hdr_len, daddr); + } +} + static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2) { static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; @@ -938,6 +1046,8 @@ static void gro_sender(void) send_fragment4(txfd, &daddr); sleep(1); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + test_flush_id(txfd, &daddr, fin_pkt); } else if (proto == PF_INET6) { sleep(1); send_fragment6(txfd, &daddr); @@ -1064,6 +1174,34 @@ static void gro_receiver(void) printf("fragmented ip4 doesn't coalesce: "); check_recv_pkts(rxfd, correct_payload, 2); + + /* is_atomic checks */ + printf("DF=1, Incrementing - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("DF=1, Fixed - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("DF=0, Incrementing - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("DF=0, Fixed - should not coalesce: "); + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + + printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + + printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); } else if (proto == PF_INET6) { /* GRO doesn't check for ipv6 hop limit when flushing. * Hence no corresponding test to the ipv4 case. diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile index 92c1d9d080..884cd2cc06 100644 --- a/tools/testing/selftests/net/hsr/Makefile +++ b/tools/testing/selftests/net/hsr/Makefile @@ -2,6 +2,7 @@ top_srcdir = ../../../../.. -TEST_PROGS := hsr_ping.sh +TEST_PROGS := hsr_ping.sh hsr_redbox.sh +TEST_FILES += hsr_common.sh include ../../lib.mk diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config index 22061204fb..241542441c 100644 --- a/tools/testing/selftests/net/hsr/config +++ b/tools/testing/selftests/net/hsr/config @@ -2,3 +2,4 @@ CONFIG_IPV6=y CONFIG_NET_SCH_NETEM=m CONFIG_HSR=y CONFIG_VETH=y +CONFIG_BRIDGE=y diff --git a/tools/testing/selftests/net/hsr/hsr_common.sh b/tools/testing/selftests/net/hsr/hsr_common.sh new file mode 100644 index 0000000000..8e97b1f2e7 --- /dev/null +++ b/tools/testing/selftests/net/hsr/hsr_common.sh @@ -0,0 +1,84 @@ +# SPDX-License-Identifier: GPL-2.0 +# Common code for HSR testing scripts + +source ../lib.sh +ret=0 +ksft_skip=4 + +# $1: IP address +is_v6() +{ + [ -z "${1##*:*}" ] +} + +do_ping() +{ + local netns="$1" + local connect_addr="$2" + local ping_args="-q -c 2" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null + if [ $? -ne 0 ] ; then + echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2 + ret=1 + return 1 + fi + + return 0 +} + +do_ping_long() +{ + local netns="$1" + local connect_addr="$2" + local ping_args="-q -c 10" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)" + if [ $? -ne 0 ] ; then + echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2 + ret=1 + return 1 + fi + + VAL="$(echo $OUT | cut -d' ' -f1-8)" + SED_VAL="$(echo ${VAL} | sed -r -e 's/([0-9]{2}).*([0-9]{2}).*[[:space:]]([0-9]+%).*/\1 transmitted \2 received \3 loss/')" + if [ "${SED_VAL}" != "10 transmitted 10 received 0% loss" ] + then + echo "$netns -> $connect_addr ping TEST [ FAIL ]" + echo "Expect to send and receive 10 packets and no duplicates." + echo "Full message: ${OUT}." + ret=1 + return 1 + fi + + return 0 +} + +stop_if_error() +{ + local msg="$1" + + if [ ${ret} -ne 0 ]; then + echo "FAIL: ${msg}" 1>&2 + exit ${ret} + fi +} + +check_prerequisites() +{ + ip -Version > /dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip + fi +} diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index 1c6457e546..3684b813b0 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -1,10 +1,10 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ret=0 -ksft_skip=4 ipv6=true +source ./hsr_common.sh + optstring="h4" usage() { echo "Usage: $0 [OPTION]" @@ -27,88 +27,6 @@ while getopts "$optstring" option;do esac done -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" -ns2="ns2-$rndh" -ns3="ns3-$rndh" - -cleanup() -{ - local netns - for netns in "$ns1" "$ns2" "$ns3" ;do - ip netns del $netns - done -} - -# $1: IP address -is_v6() -{ - [ -z "${1##*:*}" ] -} - -do_ping() -{ - local netns="$1" - local connect_addr="$2" - local ping_args="-q -c 2" - - if is_v6 "${connect_addr}"; then - $ipv6 || return 0 - ping_args="${ping_args} -6" - fi - - ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null - if [ $? -ne 0 ] ; then - echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2 - ret=1 - return 1 - fi - - return 0 -} - -do_ping_long() -{ - local netns="$1" - local connect_addr="$2" - local ping_args="-q -c 10" - - if is_v6 "${connect_addr}"; then - $ipv6 || return 0 - ping_args="${ping_args} -6" - fi - - OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)" - if [ $? -ne 0 ] ; then - echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2 - ret=1 - return 1 - fi - - VAL="$(echo $OUT | cut -d' ' -f1-8)" - if [ "$VAL" != "10 packets transmitted, 10 received, 0% packet loss," ] - then - echo "$netns -> $connect_addr ping TEST [ FAIL ]" - echo "Expect to send and receive 10 packets and no duplicates." - echo "Full message: ${OUT}." - ret=1 - return 1 - fi - - return 0 -} - -stop_if_error() -{ - local msg="$1" - - if [ ${ret} -ne 0 ]; then - echo "FAIL: ${msg}" 1>&2 - exit ${ret} - fi -} - do_complete_ping_test() { echo "INFO: Initial validation ping." @@ -248,27 +166,15 @@ setup_hsr_interfaces() ip -net "$ns3" link set hsr3 up } -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi +check_prerequisites +setup_ns ns1 ns2 ns3 -trap cleanup EXIT - -for i in "$ns1" "$ns2" "$ns3" ;do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up -done +trap cleanup_all_ns EXIT setup_hsr_interfaces 0 do_complete_ping_test -cleanup -for i in "$ns1" "$ns2" "$ns3" ;do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up -done +setup_ns ns1 ns2 ns3 setup_hsr_interfaces 1 do_complete_ping_test diff --git a/tools/testing/selftests/net/hsr/hsr_redbox.sh b/tools/testing/selftests/net/hsr/hsr_redbox.sh new file mode 100755 index 0000000000..1f36785347 --- /dev/null +++ b/tools/testing/selftests/net/hsr/hsr_redbox.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ipv6=false + +source ./hsr_common.sh + +do_complete_ping_test() +{ + echo "INFO: Initial validation ping (HSR-SAN/RedBox)." + # Each node has to be able to reach each one. + do_ping "${ns1}" 100.64.0.2 + do_ping "${ns2}" 100.64.0.1 + # Ping between SANs (test bridge) + do_ping "${ns4}" 100.64.0.51 + do_ping "${ns5}" 100.64.0.41 + # Ping from SANs to hsr1 (via hsr2) (and opposite) + do_ping "${ns3}" 100.64.0.1 + do_ping "${ns1}" 100.64.0.3 + do_ping "${ns1}" 100.64.0.41 + do_ping "${ns4}" 100.64.0.1 + do_ping "${ns1}" 100.64.0.51 + do_ping "${ns5}" 100.64.0.1 + stop_if_error "Initial validation failed." + + # Wait for MGNT HSR frames being received and nodes being + # merged. + sleep 5 + + echo "INFO: Longer ping test (HSR-SAN/RedBox)." + # Ping from SAN to hsr1 (via hsr2) + do_ping_long "${ns3}" 100.64.0.1 + # Ping from hsr1 (via hsr2) to SANs (and opposite) + do_ping_long "${ns1}" 100.64.0.3 + do_ping_long "${ns1}" 100.64.0.41 + do_ping_long "${ns4}" 100.64.0.1 + do_ping_long "${ns1}" 100.64.0.51 + do_ping_long "${ns5}" 100.64.0.1 + stop_if_error "Longer ping test failed." + + echo "INFO: All good." +} + +setup_hsr_interfaces() +{ + local HSRv="$1" + + echo "INFO: preparing interfaces for HSRv${HSRv} (HSR-SAN/RedBox)." +# +# IPv4 addresses (100.64.X.Y/24), and [X.Y] is presented on below diagram: +# +# +# |NS1 | |NS4 | +# | [0.1] | | | +# | /-- hsr1 --\ | | [0.41] | +# | ns1eth1 ns1eth2 | | ns4eth1 (SAN) | +# |------------------------| |-------------------| +# | | | +# | | | +# | | | +# |------------------------| |-------------------------------| +# | ns2eth1 ns2eth2 | | ns3eth2 | +# | \-- hsr2 --/ | | / | +# | [0.2] \ | | / | |------------| +# | ns2eth3 |---| ns3eth1 -- ns3br1 -- ns3eth3--|--| ns5eth1 | +# | (interlink)| | [0.3] [0.11] | | [0.51] | +# |NS2 (RedBOX) | |NS3 (BR) | | NS5 (SAN) | +# +# + # Check if iproute2 supports adding interlink port to hsrX device + ip link help hsr | grep -q INTERLINK + [ $? -ne 0 ] && { echo "iproute2: HSR interlink interface not supported!"; exit 0; } + + # Create interfaces for name spaces + ip link add ns1eth1 netns "${ns1}" type veth peer name ns2eth1 netns "${ns2}" + ip link add ns1eth2 netns "${ns1}" type veth peer name ns2eth2 netns "${ns2}" + ip link add ns2eth3 netns "${ns2}" type veth peer name ns3eth1 netns "${ns3}" + ip link add ns3eth2 netns "${ns3}" type veth peer name ns4eth1 netns "${ns4}" + ip link add ns3eth3 netns "${ns3}" type veth peer name ns5eth1 netns "${ns5}" + + sleep 1 + + ip -n "${ns1}" link set ns1eth1 up + ip -n "${ns1}" link set ns1eth2 up + + ip -n "${ns2}" link set ns2eth1 up + ip -n "${ns2}" link set ns2eth2 up + ip -n "${ns2}" link set ns2eth3 up + + ip -n "${ns3}" link add name ns3br1 type bridge + ip -n "${ns3}" link set ns3br1 up + ip -n "${ns3}" link set ns3eth1 master ns3br1 up + ip -n "${ns3}" link set ns3eth2 master ns3br1 up + ip -n "${ns3}" link set ns3eth3 master ns3br1 up + + ip -n "${ns4}" link set ns4eth1 up + ip -n "${ns5}" link set ns5eth1 up + + ip -net "${ns1}" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version ${HSRv} proto 0 + ip -net "${ns2}" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 interlink ns2eth3 supervision 45 version ${HSRv} proto 0 + + ip -n "${ns1}" addr add 100.64.0.1/24 dev hsr1 + ip -n "${ns2}" addr add 100.64.0.2/24 dev hsr2 + ip -n "${ns3}" addr add 100.64.0.11/24 dev ns3br1 + ip -n "${ns3}" addr add 100.64.0.3/24 dev ns3eth1 + ip -n "${ns4}" addr add 100.64.0.41/24 dev ns4eth1 + ip -n "${ns5}" addr add 100.64.0.51/24 dev ns5eth1 + + ip -n "${ns1}" link set hsr1 up + ip -n "${ns2}" link set hsr2 up +} + +check_prerequisites +setup_ns ns1 ns2 ns3 ns4 ns5 + +trap cleanup_all_ns EXIT + +setup_hsr_interfaces 1 +do_complete_ping_test + +exit $ret diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 16372ca167..9155c914c0 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -4,11 +4,16 @@ ############################################################################## # Defines -WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} +: "${WAIT_TIMEOUT:=20}" + BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms -# Kselftest framework requirement - SKIP code is 4. +# Kselftest framework constants. +ksft_pass=0 +ksft_fail=1 +ksft_xfail=2 ksft_skip=4 + # namespace list created by setup_ns NS_LIST=() diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore new file mode 100644 index 0000000000..1ebc6187f4 --- /dev/null +++ b/tools/testing/selftests/net/lib/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +csum diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile new file mode 100644 index 0000000000..82c3264b11 --- /dev/null +++ b/tools/testing/selftests/net/lib/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS = -Wall -Wl,--no-as-needed -O2 -g +CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES) +# Additional include paths needed by kselftest.h +CFLAGS += -I../../ + +TEST_FILES := ../../../../../Documentation/netlink/specs +TEST_FILES += ../../../../net/ynl + +TEST_GEN_FILES += csum + +TEST_INCLUDES := $(wildcard py/*.py) + +include ../../lib.mk diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/lib/csum.c index 90eb06fefa..b9f3fc3c34 100644 --- a/tools/testing/selftests/net/csum.c +++ b/tools/testing/selftests/net/lib/csum.c @@ -682,7 +682,7 @@ static int recv_verify_packet_ipv6(void *nh, int len) } /* return whether auxdata includes TP_STATUS_CSUM_VALID */ -static bool recv_verify_packet_csum(struct msghdr *msg) +static uint32_t recv_get_packet_csum_status(struct msghdr *msg) { struct tpacket_auxdata *aux = NULL; struct cmsghdr *cm; @@ -706,7 +706,7 @@ static bool recv_verify_packet_csum(struct msghdr *msg) if (!aux) error(1, 0, "cmsg: no auxdata"); - return aux->tp_status & TP_STATUS_CSUM_VALID; + return aux->tp_status; } static int recv_packet(int fd) @@ -716,6 +716,7 @@ static int recv_packet(int fd) char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))]; struct pkt *buf = (void *)_buf; struct msghdr msg = {0}; + uint32_t tp_status; struct iovec iov; int len, ret; @@ -737,6 +738,17 @@ static int recv_packet(int fd) if (len == -1) error(1, errno, "recv p"); + tp_status = recv_get_packet_csum_status(&msg); + + /* GRO might coalesce randomized packets. Such GSO packets are + * then reinitialized for csum offload (CHECKSUM_PARTIAL), with + * a pseudo csum. Do not try to validate these checksums. + */ + if (tp_status & TP_STATUS_CSUMNOTREADY) { + fprintf(stderr, "cmsg: GSO packet has partial csum: skip\n"); + continue; + } + if (cfg_family == PF_INET6) ret = recv_verify_packet_ipv6(buf, len); else @@ -753,7 +765,7 @@ static int recv_packet(int fd) * Do not fail if kernel does not validate a good csum: * Absence of validation does not imply invalid. */ - if (recv_verify_packet_csum(&msg) && cfg_bad_csum) { + if (tp_status & TP_STATUS_CSUM_VALID && cfg_bad_csum) { fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n"); bad_validations++; } diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py new file mode 100644 index 0000000000..b6d498d125 --- /dev/null +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 + +from .consts import KSRC +from .ksft import * +from .netns import NetNS +from .nsim import * +from .utils import * +from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily diff --git a/tools/testing/selftests/net/lib/py/consts.py b/tools/testing/selftests/net/lib/py/consts.py new file mode 100644 index 0000000000..f518ce79d8 --- /dev/null +++ b/tools/testing/selftests/net/lib/py/consts.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +import sys +from pathlib import Path + +KSFT_DIR = (Path(__file__).parent / "../../..").resolve() +KSRC = (Path(__file__).parent / "../../../../../..").resolve() + +KSFT_MAIN_NAME = Path(sys.argv[0]).with_suffix("").name diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py new file mode 100644 index 0000000000..4769b4eb1e --- /dev/null +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -0,0 +1,159 @@ +# SPDX-License-Identifier: GPL-2.0 + +import builtins +import inspect +import sys +import time +import traceback +from .consts import KSFT_MAIN_NAME + +KSFT_RESULT = None +KSFT_RESULT_ALL = True + + +class KsftFailEx(Exception): + pass + + +class KsftSkipEx(Exception): + pass + + +class KsftXfailEx(Exception): + pass + + +def ksft_pr(*objs, **kwargs): + print("#", *objs, **kwargs) + + +def _fail(*args): + global KSFT_RESULT + KSFT_RESULT = False + + frame = inspect.stack()[2] + ksft_pr("At " + frame.filename + " line " + str(frame.lineno) + ":") + ksft_pr(*args) + + +def ksft_eq(a, b, comment=""): + global KSFT_RESULT + if a != b: + _fail("Check failed", a, "!=", b, comment) + + +def ksft_true(a, comment=""): + if not a: + _fail("Check failed", a, "does not eval to True", comment) + + +def ksft_in(a, b, comment=""): + if a not in b: + _fail("Check failed", a, "not in", b, comment) + + +def ksft_ge(a, b, comment=""): + if a < b: + _fail("Check failed", a, "<", b, comment) + + +class ksft_raises: + def __init__(self, expected_type): + self.exception = None + self.expected_type = expected_type + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if exc_type is None: + _fail(f"Expected exception {str(self.expected_type.__name__)}, none raised") + elif self.expected_type != exc_type: + _fail(f"Expected exception {str(self.expected_type.__name__)}, raised {str(exc_type.__name__)}") + self.exception = exc_val + # Suppress the exception if its the expected one + return self.expected_type == exc_type + + +def ksft_busy_wait(cond, sleep=0.005, deadline=1, comment=""): + end = time.monotonic() + deadline + while True: + if cond(): + return + if time.monotonic() > end: + _fail("Waiting for condition timed out", comment) + return + time.sleep(sleep) + + +def ktap_result(ok, cnt=1, case="", comment=""): + global KSFT_RESULT_ALL + KSFT_RESULT_ALL = KSFT_RESULT_ALL and ok + + res = "" + if not ok: + res += "not " + res += "ok " + res += str(cnt) + " " + res += KSFT_MAIN_NAME + if case: + res += "." + str(case.__name__) + if comment: + res += " # " + comment + print(res) + + +def ksft_run(cases=None, globs=None, case_pfx=None, args=()): + cases = cases or [] + + if globs and case_pfx: + for key, value in globs.items(): + if not callable(value): + continue + for prefix in case_pfx: + if key.startswith(prefix): + cases.append(value) + break + + totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} + + print("KTAP version 1") + print("1.." + str(len(cases))) + + global KSFT_RESULT + cnt = 0 + for case in cases: + KSFT_RESULT = True + cnt += 1 + try: + case(*args) + except KsftSkipEx as e: + ktap_result(True, cnt, case, comment="SKIP " + str(e)) + totals['skip'] += 1 + continue + except KsftXfailEx as e: + ktap_result(True, cnt, case, comment="XFAIL " + str(e)) + totals['xfail'] += 1 + continue + except Exception as e: + tb = traceback.format_exc() + for line in tb.strip().split('\n'): + ksft_pr("Exception|", line) + ktap_result(False, cnt, case) + totals['fail'] += 1 + continue + + ktap_result(KSFT_RESULT, cnt, case) + if KSFT_RESULT: + totals['pass'] += 1 + else: + totals['fail'] += 1 + + print( + f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0" + ) + + +def ksft_exit(): + global KSFT_RESULT_ALL + sys.exit(0 if KSFT_RESULT_ALL else 1) diff --git a/tools/testing/selftests/net/lib/py/netns.py b/tools/testing/selftests/net/lib/py/netns.py new file mode 100644 index 0000000000..ecff85f907 --- /dev/null +++ b/tools/testing/selftests/net/lib/py/netns.py @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 + +from .utils import ip +import random +import string + + +class NetNS: + def __init__(self, name=None): + if name: + self.name = name + else: + self.name = ''.join(random.choice(string.ascii_lowercase) for _ in range(8)) + ip('netns add ' + self.name) + + def __del__(self): + if self.name: + ip('netns del ' + self.name) + self.name = None + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + self.__del__() + + def __str__(self): + return self.name + + def __repr__(self): + return f"NetNS({self.name})" diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py new file mode 100644 index 0000000000..f571a8b313 --- /dev/null +++ b/tools/testing/selftests/net/lib/py/nsim.py @@ -0,0 +1,134 @@ +# SPDX-License-Identifier: GPL-2.0 + +import json +import os +import random +import re +import time +from .utils import cmd, ip + + +class NetdevSim: + """ + Class for netdevsim netdevice and its attributes. + """ + + def __init__(self, nsimdev, port_index, ifname, ns=None): + # In case udev renamed the netdev to according to new schema, + # check if the name matches the port_index. + nsimnamere = re.compile(r"eni\d+np(\d+)") + match = nsimnamere.match(ifname) + if match and int(match.groups()[0]) != port_index + 1: + raise Exception("netdevice name mismatches the expected one") + + self.ifname = ifname + self.nsimdev = nsimdev + self.port_index = port_index + self.ns = ns + self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index) + ret = ip("-j link show dev %s" % ifname, ns=ns) + self.dev = json.loads(ret.stdout)[0] + self.ifindex = self.dev["ifindex"] + + def dfs_write(self, path, val): + self.nsimdev.dfs_write(f'ports/{self.port_index}/' + path, val) + + +class NetdevSimDev: + """ + Class for netdevsim bus device and its attributes. + """ + @staticmethod + def ctrl_write(path, val): + fullpath = os.path.join("/sys/bus/netdevsim/", path) + with open(fullpath, "w") as f: + f.write(val) + + def dfs_write(self, path, val): + fullpath = os.path.join(f"/sys/kernel/debug/netdevsim/netdevsim{self.addr}/", path) + with open(fullpath, "w") as f: + f.write(val) + + def __init__(self, port_count=1, queue_count=1, ns=None): + # nsim will spawn in init_net, we'll set to actual ns once we switch it there + self.ns = None + + if not os.path.exists("/sys/bus/netdevsim"): + cmd("modprobe netdevsim") + + addr = random.randrange(1 << 15) + while True: + try: + self.ctrl_write("new_device", "%u %u %u" % (addr, port_count, queue_count)) + except OSError as e: + if e.errno == errno.ENOSPC: + addr = random.randrange(1 << 15) + continue + raise e + break + self.addr = addr + + # As probe of netdevsim device might happen from a workqueue, + # so wait here until all netdevs appear. + self.wait_for_netdevs(port_count) + + if ns: + cmd(f"devlink dev reload netdevsim/netdevsim{addr} netns {ns.name}") + self.ns = ns + + cmd("udevadm settle", ns=self.ns) + ifnames = self.get_ifnames() + + self.dfs_dir = "/sys/kernel/debug/netdevsim/netdevsim%u/" % addr + + self.nsims = [] + for port_index in range(port_count): + self.nsims.append(self._make_port(port_index, ifnames[port_index])) + + self.removed = False + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + """ + __exit__ gets called at the end of a "with" block. + """ + self.remove() + + def _make_port(self, port_index, ifname): + return NetdevSim(self, port_index, ifname, self.ns) + + def get_ifnames(self): + ifnames = [] + listdir = cmd(f"ls /sys/bus/netdevsim/devices/netdevsim{self.addr}/net/", + ns=self.ns).stdout.split() + for ifname in listdir: + ifnames.append(ifname) + ifnames.sort() + return ifnames + + def wait_for_netdevs(self, port_count): + timeout = 5 + timeout_start = time.time() + + while True: + try: + ifnames = self.get_ifnames() + except FileNotFoundError as e: + ifnames = [] + if len(ifnames) == port_count: + break + if time.time() < timeout_start + timeout: + continue + raise Exception("netdevices did not appear within timeout") + + def remove(self): + if not self.removed: + self.ctrl_write("del_device", "%u" % (self.addr, )) + self.removed = True + + def remove_nsim(self, nsim): + self.nsims.remove(nsim) + self.ctrl_write("devices/netdevsim%u/del_port" % (self.addr, ), + "%u" % (nsim.port_index, )) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py new file mode 100644 index 0000000000..0540ea2492 --- /dev/null +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: GPL-2.0 + +import json as _json +import random +import re +import subprocess +import time + + +class cmd: + def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None, timeout=5): + if ns: + comm = f'ip netns exec {ns} ' + comm + + self.stdout = None + self.stderr = None + self.ret = None + + self.comm = comm + if host: + self.proc = host.cmd(comm) + else: + self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if not background: + self.process(terminate=False, fail=fail, timeout=timeout) + + def process(self, terminate=True, fail=None, timeout=5): + if fail is None: + fail = not terminate + + if terminate: + self.proc.terminate() + stdout, stderr = self.proc.communicate(timeout) + self.stdout = stdout.decode("utf-8") + self.stderr = stderr.decode("utf-8") + self.proc.stdout.close() + self.proc.stderr.close() + self.ret = self.proc.returncode + + if self.proc.returncode != 0 and fail: + if len(stderr) > 0 and stderr[-1] == "\n": + stderr = stderr[:-1] + raise Exception("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" % + (self.proc.args, stdout, stderr)) + + +class bkg(cmd): + def __init__(self, comm, shell=True, fail=None, ns=None, host=None, + exit_wait=False): + super().__init__(comm, background=True, + shell=shell, fail=fail, ns=ns, host=host) + self.terminate = not exit_wait + self.check_fail = fail + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + return self.process(terminate=self.terminate, fail=self.check_fail) + + +def tool(name, args, json=None, ns=None, host=None): + cmd_str = name + ' ' + if json: + cmd_str += '--json ' + cmd_str += args + cmd_obj = cmd(cmd_str, ns=ns, host=host) + if json: + return _json.loads(cmd_obj.stdout) + return cmd_obj + + +def ip(args, json=None, ns=None, host=None): + if ns: + args = f'-netns {ns} ' + args + return tool('ip', args, json=json, host=host) + + +def rand_port(): + """ + Get unprivileged port, for now just random, one day we may decide to check if used. + """ + return random.randint(10000, 65535) + + +def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5): + end = time.monotonic() + deadline + + pattern = f":{port:04X} .* " + if proto == "tcp": # for tcp protocol additionally check the socket state + pattern += "0A" + pattern = re.compile(pattern) + + while True: + data = cmd(f'cat /proc/net/{proto}*', ns=ns, host=host, shell=True).stdout + for row in data.split("\n"): + if pattern.search(row): + return + if time.monotonic() > end: + raise Exception("Waiting for port listen timed out") + time.sleep(sleep) diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py new file mode 100644 index 0000000000..1ace58370c --- /dev/null +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: GPL-2.0 + +import sys +from pathlib import Path +from .consts import KSRC, KSFT_DIR +from .ksft import ksft_pr, ktap_result + +# Resolve paths +try: + if (KSFT_DIR / "kselftest-list.txt").exists(): + # Running in "installed" selftests + tools_full_path = KSFT_DIR + SPEC_PATH = KSFT_DIR / "net/lib/specs" + + sys.path.append(tools_full_path.as_posix()) + from net.lib.ynl.lib import YnlFamily, NlError + else: + # Running in tree + tools_full_path = KSRC / "tools" + SPEC_PATH = KSRC / "Documentation/netlink/specs" + + sys.path.append(tools_full_path.as_posix()) + from net.ynl.lib import YnlFamily, NlError +except ModuleNotFoundError as e: + ksft_pr("Failed importing `ynl` library from kernel sources") + ksft_pr(str(e)) + ktap_result(True, comment="SKIP") + sys.exit(4) + +# +# Wrapper classes, loading the right specs +# Set schema='' to avoid jsonschema validation, it's slow +# +class EthtoolFamily(YnlFamily): + def __init__(self): + super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(), + schema='') + + +class RtnlFamily(YnlFamily): + def __init__(self): + super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), + schema='') + + +class NetdevFamily(YnlFamily): + def __init__(self): + super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(), + schema='') diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index bc97ab33a0..776d43a692 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -200,6 +200,58 @@ chk_msk_cestab() "${expected}" "${msg}" "" } +msk_info_get_value() +{ + local port="${1}" + local info="${2}" + + ss -N "${ns}" -inHM dport "${port}" | \ + mptcp_lib_get_info_value "${info}" "${info}" +} + +chk_msk_info() +{ + local port="${1}" + local info="${2}" + local cnt="${3}" + local msg="....chk ${info}" + local delta_ms=250 # half what we waited before, just to be sure + local now + + now=$(msk_info_get_value "${port}" "${info}") + + mptcp_lib_print_title "${msg}" + if { [ -z "${cnt}" ] || [ -z "${now}" ]; } && + ! mptcp_lib_expect_all_features; then + mptcp_lib_pr_skip "Feature probably not supported" + mptcp_lib_result_skip "${msg}" + elif [ "$((cnt + delta_ms))" -lt "${now}" ]; then + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + else + mptcp_lib_pr_fail "value of ${info} changed by $((now - cnt))ms," \ + "expected at least ${delta_ms}ms" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi +} + +chk_last_time_info() +{ + local port="${1}" + local data_sent data_recv ack_recv + + data_sent=$(msk_info_get_value "${port}" "last_data_sent") + data_recv=$(msk_info_get_value "${port}" "last_data_recv") + ack_recv=$(msk_info_get_value "${port}" "last_ack_recv") + + sleep 0.5 # wait to check after if the timestamps difference + + chk_msk_info "${port}" "last_data_sent" "${data_sent}" + chk_msk_info "${port}" "last_data_recv" "${data_recv}" + chk_msk_info "${port}" "last_ack_recv" "${ack_recv}" +} + wait_connected() { local listener_ns="${1}" @@ -233,6 +285,7 @@ echo "b" | \ 127.0.0.1 >/dev/null & wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " +chk_last_time_info 10000 chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" chk_msk_inuse 2 diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 4131f3263a..b77fb7065b 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -147,7 +147,7 @@ cleanup() mptcp_lib_check_mptcp mptcp_lib_check_kallsyms -mptcp_lib_check_tools ip +mptcp_lib_check_tools ip tc sin=$(mktemp) sout=$(mktemp) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 2290125f6d..108aeeb84e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -31,7 +31,6 @@ timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) capture=false checksum=false -ip_mptcp=0 check_invert=0 validate_checksum=false init=0 @@ -142,7 +141,7 @@ init() { mptcp_lib_check_mptcp mptcp_lib_check_kallsyms - mptcp_lib_check_tools ip ss "${iptables}" "${ip6tables}" + mptcp_lib_check_tools ip tc ss "${iptables}" "${ip6tables}" sin=$(mktemp) sout=$(mktemp) @@ -610,173 +609,65 @@ kill_events_pids() pm_nl_set_limits() { - local ns=$1 - local addrs=$2 - local subflows=$3 - - if [ $ip_mptcp -eq 1 ]; then - ip -n $ns mptcp limits set add_addr_accepted $addrs subflows $subflows - else - ip netns exec $ns ./pm_nl_ctl limits $addrs $subflows - fi + mptcp_lib_pm_nl_set_limits "${@}" } pm_nl_add_endpoint() { - local ns=$1 - local addr=$2 - local flags _flags - local port _port - local dev _dev - local id _id - local nr=2 - - local p - for p in "${@}" - do - if [ $p = "flags" ]; then - eval _flags=\$"$nr" - [ -n "$_flags" ]; flags="flags $_flags" - fi - if [ $p = "dev" ]; then - eval _dev=\$"$nr" - [ -n "$_dev" ]; dev="dev $_dev" - fi - if [ $p = "id" ]; then - eval _id=\$"$nr" - [ -n "$_id" ]; id="id $_id" - fi - if [ $p = "port" ]; then - eval _port=\$"$nr" - [ -n "$_port" ]; port="port $_port" - fi - - nr=$((nr + 1)) - done - - if [ $ip_mptcp -eq 1 ]; then - ip -n $ns mptcp endpoint add $addr ${_flags//","/" "} $dev $id $port - else - ip netns exec $ns ./pm_nl_ctl add $addr $flags $dev $id $port - fi + mptcp_lib_pm_nl_add_endpoint "${@}" } pm_nl_del_endpoint() { - local ns=$1 - local id=$2 - local addr=$3 - - if [ $ip_mptcp -eq 1 ]; then - [ $id -ne 0 ] && addr='' - ip -n $ns mptcp endpoint delete id $id $addr - else - ip netns exec $ns ./pm_nl_ctl del $id $addr - fi + mptcp_lib_pm_nl_del_endpoint "${@}" } pm_nl_flush_endpoint() { - local ns=$1 - - if [ $ip_mptcp -eq 1 ]; then - ip -n $ns mptcp endpoint flush - else - ip netns exec $ns ./pm_nl_ctl flush - fi + mptcp_lib_pm_nl_flush_endpoint "${@}" } pm_nl_show_endpoints() { - local ns=$1 - - if [ $ip_mptcp -eq 1 ]; then - ip -n $ns mptcp endpoint show - else - ip netns exec $ns ./pm_nl_ctl dump - fi + mptcp_lib_pm_nl_show_endpoints "${@}" } pm_nl_change_endpoint() { - local ns=$1 - local id=$2 - local flags=$3 - - if [ $ip_mptcp -eq 1 ]; then - ip -n $ns mptcp endpoint change id $id ${flags//","/" "} - else - ip netns exec $ns ./pm_nl_ctl set id $id flags $flags - fi + mptcp_lib_pm_nl_change_endpoint "${@}" } pm_nl_check_endpoint() { - local line expected_line local msg="$1" local ns=$2 local addr=$3 - local _flags="" - local flags - local _port - local port - local dev - local _id - local id + local flags dev id port print_check "${msg}" shift 3 while [ -n "$1" ]; do - if [ $1 = "flags" ]; then - _flags=$2 - [ -n "$_flags" ]; flags="flags $_flags" - shift - elif [ $1 = "dev" ]; then - [ -n "$2" ]; dev="dev $2" + case "${1}" in + "flags" | "dev" | "id" | "port") + eval "${1}"="${2}" shift - elif [ $1 = "id" ]; then - _id=$2 - [ -n "$_id" ]; id="id $_id" - shift - elif [ $1 = "port" ]; then - _port=$2 - [ -n "$_port" ]; port=" port $_port" - shift - fi + ;; + *) + ;; + esac shift done - if [ -z "$id" ]; then + if [ -z "${id}" ]; then test_fail "bad test - missing endpoint id" return fi - if [ $ip_mptcp -eq 1 ]; then - # get line and trim trailing whitespace - line=$(ip -n $ns mptcp endpoint show $id) - line="${line% }" - # the dump order is: address id flags port dev - [ -n "$addr" ] && expected_line="$addr" - expected_line+=" $id" - [ -n "$_flags" ] && expected_line+=" ${_flags//","/" "}" - [ -n "$dev" ] && expected_line+=" $dev" - [ -n "$port" ] && expected_line+=" $port" - else - line=$(ip netns exec $ns ./pm_nl_ctl get $_id) - # the dump order is: id flags dev address port - expected_line="$id" - [ -n "$flags" ] && expected_line+=" $flags" - [ -n "$dev" ] && expected_line+=" $dev" - [ -n "$addr" ] && expected_line+=" $addr" - [ -n "$_port" ] && expected_line+=" $_port" - fi - if [ "$line" = "$expected_line" ]; then - print_ok - else - fail_test "expected '$expected_line' found '$line'" - fi + check_output "mptcp_lib_pm_nl_get_endpoint ${ns} ${id}" \ + "$(mptcp_lib_pm_nl_format_endpoints \ + "${id},${addr},${flags//","/" "},${dev},${port}")" } pm_nl_set_endpoint() @@ -3711,7 +3602,7 @@ while getopts "${all_tests_args}cCih" opt; do checksum=true ;; i) - ip_mptcp=1 + mptcp_lib_set_ip_mptcp ;; h) usage diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index d529b4b37a..6ffa9b7a32 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -21,8 +21,10 @@ declare -rx MPTCP_LIB_AF_INET6=10 MPTCP_LIB_SUBTESTS=() MPTCP_LIB_SUBTESTS_DUPLICATED=0 +MPTCP_LIB_SUBTEST_FLAKY=0 MPTCP_LIB_TEST_COUNTER=0 MPTCP_LIB_TEST_FORMAT="%02u %-50s" +MPTCP_LIB_IP_MPTCP=0 # only if supported (or forced) and not disabled, see no-color.org if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } && @@ -40,6 +42,16 @@ else readonly MPTCP_LIB_COLOR_RESET= fi +# SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY env var can be set not to ignore errors +# from subtests marked as flaky +mptcp_lib_override_flaky() { + [ "${SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY:-}" = 1 ] +} + +mptcp_lib_subtest_is_flaky() { + [ "${MPTCP_LIB_SUBTEST_FLAKY}" = 1 ] && ! mptcp_lib_override_flaky +} + # $1: color, $2: text mptcp_lib_print_color() { echo -e "${MPTCP_LIB_START_PRINT:-}${*}${MPTCP_LIB_COLOR_RESET}" @@ -71,7 +83,16 @@ mptcp_lib_pr_skip() { } mptcp_lib_pr_fail() { - mptcp_lib_print_err "[FAIL]${1:+ ${*}}" + local title cmt + + if mptcp_lib_subtest_is_flaky; then + title="IGNO" + cmt=" (flaky)" + else + title="FAIL" + fi + + mptcp_lib_print_err "[${title}]${cmt}${1:+ ${*}}" } mptcp_lib_pr_info() { @@ -207,7 +228,13 @@ mptcp_lib_result_pass() { # $1: test name mptcp_lib_result_fail() { - __mptcp_lib_result_add "not ok" "${1}" + if mptcp_lib_subtest_is_flaky; then + # It might sound better to use 'not ok # TODO' or 'ok # SKIP', + # but some CIs don't understand 'TODO' and treat SKIP as errors. + __mptcp_lib_result_add "ok" "${1} # IGNORE Flaky" + else + __mptcp_lib_result_add "not ok" "${1}" + fi } # $1: test name @@ -384,6 +411,12 @@ mptcp_lib_check_tools() { exit ${KSFT_SKIP} fi ;; + "tc") + if ! tc -help &> /dev/null; then + mptcp_lib_pr_skip "Could not run test without tc tool" + exit ${KSFT_SKIP} + fi + ;; "ss") if ! ss -h | grep -q MPTCP; then mptcp_lib_pr_skip "ss tool does not support MPTCP" @@ -505,3 +538,131 @@ mptcp_lib_verify_listener_events() { mptcp_lib_check_expected "type" "family" "saddr" "sport" || rc="${?}" return "${rc}" } + +mptcp_lib_set_ip_mptcp() { + MPTCP_LIB_IP_MPTCP=1 +} + +mptcp_lib_is_ip_mptcp() { + [ "${MPTCP_LIB_IP_MPTCP}" = "1" ] +} + +# format: <id>,<ip>,<flags>,<dev> +mptcp_lib_pm_nl_format_endpoints() { + local entry id ip flags dev port + + for entry in "${@}"; do + IFS=, read -r id ip flags dev port <<< "${entry}" + if mptcp_lib_is_ip_mptcp; then + echo -n "${ip}" + [ -n "${port}" ] && echo -n " port ${port}" + echo -n " id ${id}" + [ -n "${flags}" ] && echo -n " ${flags}" + [ -n "${dev}" ] && echo -n " dev ${dev}" + echo " " # always a space at the end + else + echo -n "id ${id}" + echo -n " flags ${flags//" "/","}" + [ -n "${dev}" ] && echo -n " dev ${dev}" + echo -n " ${ip}" + [ -n "${port}" ] && echo -n " ${port}" + echo "" + fi + done +} + +mptcp_lib_pm_nl_get_endpoint() { + local ns=${1} + local id=${2} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp endpoint show id "${id}" + else + ip netns exec "${ns}" ./pm_nl_ctl get "${id}" + fi +} + +mptcp_lib_pm_nl_set_limits() { + local ns=${1} + local addrs=${2} + local subflows=${3} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp limits set add_addr_accepted "${addrs}" subflows "${subflows}" + else + ip netns exec "${ns}" ./pm_nl_ctl limits "${addrs}" "${subflows}" + fi +} + +mptcp_lib_pm_nl_add_endpoint() { + local ns=${1} + local addr=${2} + local flags dev id port + local nr=2 + + local p + for p in "${@}"; do + case "${p}" in + "flags" | "dev" | "id" | "port") + eval "${p}"=\$"${nr}" + ;; + esac + + nr=$((nr + 1)) + done + + if mptcp_lib_is_ip_mptcp; then + # shellcheck disable=SC2086 # blanks in flags, no double quote + ip -n "${ns}" mptcp endpoint add "${addr}" ${flags//","/" "} \ + ${dev:+dev "${dev}"} ${id:+id "${id}"} ${port:+port "${port}"} + else + ip netns exec "${ns}" ./pm_nl_ctl add "${addr}" ${flags:+flags "${flags}"} \ + ${dev:+dev "${dev}"} ${id:+id "${id}"} ${port:+port "${port}"} + fi +} + +mptcp_lib_pm_nl_del_endpoint() { + local ns=${1} + local id=${2} + local addr=${3} + + if mptcp_lib_is_ip_mptcp; then + [ "${id}" -ne 0 ] && addr='' + ip -n "${ns}" mptcp endpoint delete id "${id}" ${addr:+"${addr}"} + else + ip netns exec "${ns}" ./pm_nl_ctl del "${id}" "${addr}" + fi +} + +mptcp_lib_pm_nl_flush_endpoint() { + local ns=${1} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp endpoint flush + else + ip netns exec "${ns}" ./pm_nl_ctl flush + fi +} + +mptcp_lib_pm_nl_show_endpoints() { + local ns=${1} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp endpoint show + else + ip netns exec "${ns}" ./pm_nl_ctl dump + fi +} + +mptcp_lib_pm_nl_change_endpoint() { + local ns=${1} + local id=${2} + local flags=${3} + + if mptcp_lib_is_ip_mptcp; then + # shellcheck disable=SC2086 # blanks in flags, no double quote + ip -n "${ns}" mptcp endpoint change id "${id}" ${flags//","/" "} + else + ip netns exec "${ns}" ./pm_nl_ctl set id "${id}" flags "${flags}" + fi +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index e2d70c1878..68899a303a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -22,6 +22,28 @@ ns1="" ns2="" ns_sbox="" +usage() { + echo "Usage: $0 [ -i ] [ -h ]" + echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" + echo -e "\t-h: help" +} + +while getopts "hi" option;do + case "$option" in + "h") + usage "$0" + exit ${KSFT_PASS} + ;; + "i") + mptcp_lib_set_ip_mptcp + ;; + "?") + usage "$0" + exit ${KSFT_FAIL} + ;; + esac +done + add_mark_rules() { local ns=$1 @@ -58,15 +80,15 @@ init() # let $ns2 reach any $ns1 address from any interface ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i - ip netns exec $ns1 ./pm_nl_ctl add 10.0.$i.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add dead:beef:$i::1 flags signal + mptcp_lib_pm_nl_add_endpoint "${ns1}" "10.0.${i}.1" flags signal + mptcp_lib_pm_nl_add_endpoint "${ns1}" "dead:beef:${i}::1" flags signal - ip netns exec $ns2 ./pm_nl_ctl add 10.0.$i.2 flags signal - ip netns exec $ns2 ./pm_nl_ctl add dead:beef:$i::2 flags signal + mptcp_lib_pm_nl_add_endpoint "${ns2}" "10.0.${i}.2" flags signal + mptcp_lib_pm_nl_add_endpoint "${ns2}" "dead:beef:${i}::2" flags signal done - ip netns exec $ns1 ./pm_nl_ctl limits 8 8 - ip netns exec $ns2 ./pm_nl_ctl limits 8 8 + mptcp_lib_pm_nl_set_limits "${ns1}" 8 8 + mptcp_lib_pm_nl_set_limits "${ns2}" 8 8 add_mark_rules $ns1 1 add_mark_rules $ns2 2 diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 6ab8c5d363..2757378b1b 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -1,28 +1,28 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# Double quotes to prevent globbing and word splitting is recommended in new -# code but we accept it, especially because there were too many before having -# address all other issues detected by shellcheck. -#shellcheck disable=SC2086 - . "$(dirname "${0}")/mptcp_lib.sh" ret=0 usage() { - echo "Usage: $0 [ -h ]" + echo "Usage: $0 [ -i ] [ -h ]" + echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" + echo -e "\t-h: help" } -optstring=h +optstring=hi while getopts "$optstring" option;do case "$option" in "h") - usage $0 + usage "$0" exit ${KSFT_PASS} ;; + "i") + mptcp_lib_set_ip_mptcp + ;; "?") - usage $0 + usage "$0" exit ${KSFT_FAIL} ;; esac @@ -35,7 +35,7 @@ err=$(mktemp) #shellcheck disable=SC2317 cleanup() { - rm -f $err + rm -f "${err}" mptcp_lib_ns_exit "${ns1}" } @@ -46,6 +46,76 @@ trap cleanup EXIT mptcp_lib_ns_init ns1 +format_limits() { + local accept="${1}" + local subflows="${2}" + + if mptcp_lib_is_ip_mptcp; then + # with a space at the end + printf "add_addr_accepted %d subflows %d \n" "${accept}" "${subflows}" + else + printf "accept %d\nsubflows %d\n" "${accept}" "${subflows}" + fi +} + +get_limits() { + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns1}" mptcp limits + else + ip netns exec "${ns1}" ./pm_nl_ctl limits + fi +} + +format_endpoints() { + mptcp_lib_pm_nl_format_endpoints "${@}" +} + +get_endpoint() { + # shellcheck disable=SC2317 # invoked indirectly + mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}" +} + +change_address() { + local addr=${1} + local flags=${2} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns1}" mptcp endpoint change "${addr}" "${flags}" + else + ip netns exec "${ns1}" ./pm_nl_ctl set "${addr}" flags "${flags}" + fi +} + +set_limits() +{ + mptcp_lib_pm_nl_set_limits "${ns1}" "${@}" +} + +add_endpoint() +{ + mptcp_lib_pm_nl_add_endpoint "${ns1}" "${@}" +} + +del_endpoint() +{ + mptcp_lib_pm_nl_del_endpoint "${ns1}" "${@}" +} + +flush_endpoint() +{ + mptcp_lib_pm_nl_flush_endpoint "${ns1}" +} + +show_endpoints() +{ + mptcp_lib_pm_nl_show_endpoints "${ns1}" +} + +change_endpoint() +{ + mptcp_lib_pm_nl_change_endpoint "${ns1}" "${@}" +} + check() { local cmd="$1" @@ -67,125 +137,126 @@ check() fi } -check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list" +check "show_endpoints" "" "defaults addr list" -default_limits="$(ip netns exec $ns1 ./pm_nl_ctl limits)" +default_limits="$(get_limits)" if mptcp_lib_expect_all_features; then - check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 2" "defaults limits" + check "get_limits" "$(format_limits 0 2)" "defaults limits" fi -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 flags signal,backup -check "ip netns exec $ns1 ./pm_nl_ctl get 1" "id 1 flags 10.0.1.1" "simple add/get addr" +add_endpoint 10.0.1.1 +add_endpoint 10.0.1.2 flags subflow dev lo +add_endpoint 10.0.1.3 flags signal,backup +check "get_endpoint 1" "$(format_endpoints "1,10.0.1.1")" "simple add/get addr" -check "ip netns exec $ns1 ./pm_nl_ctl dump" \ -"id 1 flags 10.0.1.1 -id 2 flags subflow dev lo 10.0.1.2 -id 3 flags signal,backup 10.0.1.3" "dump addrs" +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "2,10.0.1.2,subflow,lo" \ + "3,10.0.1.3,signal backup")" "dump addrs" -ip netns exec $ns1 ./pm_nl_ctl del 2 -check "ip netns exec $ns1 ./pm_nl_ctl get 2" "" "simple del addr" -check "ip netns exec $ns1 ./pm_nl_ctl dump" \ -"id 1 flags 10.0.1.1 -id 3 flags signal,backup 10.0.1.3" "dump addrs after del" +del_endpoint 2 +check "get_endpoint 2" "" "simple del addr" +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "3,10.0.1.3,signal backup")" "dump addrs after del" -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 2>/dev/null -check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr" +add_endpoint 10.0.1.3 2>/dev/null +check "get_endpoint 4" "" "duplicate addr" -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 flags signal -check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment" +add_endpoint 10.0.1.4 flags signal +check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment" for i in $(seq 5 9); do - ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1 + add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1 done -check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit" -check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit" +check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit" +check "get_endpoint 10" "" "above hard addr limit" -ip netns exec $ns1 ./pm_nl_ctl del 9 +del_endpoint 9 for i in $(seq 10 255); do - ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i - ip netns exec $ns1 ./pm_nl_ctl del $i + add_endpoint 10.0.0.9 id "${i}" + del_endpoint "${i}" done -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 -id 3 flags signal,backup 10.0.1.3 -id 4 flags signal 10.0.1.4 -id 5 flags signal 10.0.1.5 -id 6 flags signal 10.0.1.6 -id 7 flags signal 10.0.1.7 -id 8 flags signal 10.0.1.8" "id limit" - -ip netns exec $ns1 ./pm_nl_ctl flush -check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs" - -ip netns exec $ns1 ./pm_nl_ctl limits 9 1 2>/dev/null -check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "rcv addrs above hard limit" - -ip netns exec $ns1 ./pm_nl_ctl limits 1 9 2>/dev/null -check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "subflows above hard limit" - -ip netns exec $ns1 ./pm_nl_ctl limits 8 8 -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8 -subflows 8" "set limits" - -ip netns exec $ns1 ./pm_nl_ctl flush -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 id 100 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.5 id 254 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.6 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.7 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.8 -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 -id 2 flags 10.0.1.2 -id 3 flags 10.0.1.7 -id 4 flags 10.0.1.8 -id 100 flags 10.0.1.3 -id 101 flags 10.0.1.4 -id 254 flags 10.0.1.5 -id 255 flags 10.0.1.6" "set ids" - -ip netns exec $ns1 ./pm_nl_ctl flush -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.2 id 254 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.4 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.5 id 253 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.6 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.7 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.8 -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.0.1 -id 2 flags 10.0.0.4 -id 3 flags 10.0.0.6 -id 4 flags 10.0.0.7 -id 5 flags 10.0.0.8 -id 253 flags 10.0.0.5 -id 254 flags 10.0.0.2 -id 255 flags 10.0.0.3" "wrap-around ids" - -ip netns exec $ns1 ./pm_nl_ctl flush -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags subflow -ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags backup -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -subflow,backup 10.0.1.1" "set flags (backup)" -ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -subflow 10.0.1.1" " (nobackup)" +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "3,10.0.1.3,signal backup" \ + "4,10.0.1.4,signal" \ + "5,10.0.1.5,signal" \ + "6,10.0.1.6,signal" \ + "7,10.0.1.7,signal" \ + "8,10.0.1.8,signal")" "id limit" + +flush_endpoint +check "show_endpoints" "" "flush addrs" + +set_limits 9 1 2>/dev/null +check "get_limits" "${default_limits}" "rcv addrs above hard limit" + +set_limits 1 9 2>/dev/null +check "get_limits" "${default_limits}" "subflows above hard limit" + +set_limits 8 8 +check "get_limits" "$(format_limits 8 8)" "set limits" + +flush_endpoint +add_endpoint 10.0.1.1 +add_endpoint 10.0.1.2 +add_endpoint 10.0.1.3 id 100 +add_endpoint 10.0.1.4 +add_endpoint 10.0.1.5 id 254 +add_endpoint 10.0.1.6 +add_endpoint 10.0.1.7 +add_endpoint 10.0.1.8 +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "2,10.0.1.2" \ + "3,10.0.1.7" \ + "4,10.0.1.8" \ + "100,10.0.1.3" \ + "101,10.0.1.4" \ + "254,10.0.1.5" \ + "255,10.0.1.6")" "set ids" + +flush_endpoint +add_endpoint 10.0.0.1 +add_endpoint 10.0.0.2 id 254 +add_endpoint 10.0.0.3 +add_endpoint 10.0.0.4 +add_endpoint 10.0.0.5 id 253 +add_endpoint 10.0.0.6 +add_endpoint 10.0.0.7 +add_endpoint 10.0.0.8 +check "show_endpoints" \ + "$(format_endpoints "1,10.0.0.1" \ + "2,10.0.0.4" \ + "3,10.0.0.6" \ + "4,10.0.0.7" \ + "5,10.0.0.8" \ + "253,10.0.0.5" \ + "254,10.0.0.2" \ + "255,10.0.0.3")" "wrap-around ids" + +flush_endpoint +add_endpoint 10.0.1.1 flags subflow +change_address 10.0.1.1 backup +check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow backup")" \ + "set flags (backup)" +change_address 10.0.1.1 nobackup +check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow")" \ + " (nobackup)" # fullmesh support has been added later -ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh 2>/dev/null -if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" || +change_endpoint 1 fullmesh 2>/dev/null +if show_endpoints | grep -q "fullmesh" || mptcp_lib_expect_all_features; then - check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -subflow,fullmesh 10.0.1.1" " (fullmesh)" - ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh - check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -subflow 10.0.1.1" " (nofullmesh)" - ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh - check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)" + check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow fullmesh")" \ + " (fullmesh)" + change_endpoint 1 nofullmesh + check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow")" \ + " (nofullmesh)" + change_endpoint 1 backup,fullmesh + check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow backup fullmesh")" \ + " (backup,fullmesh)" else for st in fullmesh nofullmesh backup,fullmesh; do st=" (${st})" diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 7322e1e4e5..f74e1c3c12 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -27,10 +27,11 @@ capout="" size=0 usage() { - echo "Usage: $0 [ -b ] [ -c ] [ -d ]" + echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]" echo -e "\t-b: bail out after first error, otherwise runs al testcases" echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" echo -e "\t-d: debug this script" + echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" } # This function is used in the cleanup trap @@ -45,7 +46,7 @@ cleanup() } mptcp_lib_check_mptcp -mptcp_lib_check_tools ip +mptcp_lib_check_tools ip tc # "$ns1" ns2 ns3 # ns1eth1 ns2eth1 ns2eth3 ns3eth1 @@ -85,8 +86,8 @@ setup() ip -net "$ns1" route add default via 10.0.2.2 metric 101 ip -net "$ns1" route add default via dead:beef:2::2 metric 101 - ip netns exec "$ns1" ./pm_nl_ctl limits 1 1 - ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow + mptcp_lib_pm_nl_set_limits "${ns1}" 1 1 + mptcp_lib_pm_nl_add_endpoint "${ns1}" 10.0.2.1 dev ns1eth2 flags subflow ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1 ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad @@ -108,7 +109,7 @@ setup() ip -net "$ns3" route add default via 10.0.3.2 ip -net "$ns3" route add default via dead:beef:3::2 - ip netns exec "$ns3" ./pm_nl_ctl limits 1 1 + mptcp_lib_pm_nl_set_limits "${ns3}" 1 1 # debug build can slow down measurably the test program # we use quite tight time limit on the run-time, to ensure @@ -259,7 +260,7 @@ run_test() fi } -while getopts "bcdh" option;do +while getopts "bcdhi" option;do case "$option" in "h") usage $0 @@ -274,6 +275,9 @@ while getopts "bcdh" option;do "d") set -x ;; + "i") + mptcp_lib_set_ip_mptcp + ;; "?") usage $0 exit ${KSFT_FAIL} diff --git a/tools/testing/selftests/net/nat6to4.c b/tools/testing/selftests/net/nat6to4.bpf.c index ac54c36b25..ac54c36b25 100644 --- a/tools/testing/selftests/net/nat6to4.c +++ b/tools/testing/selftests/net/nat6to4.bpf.c diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore index c2229b3e40..0a64d6d0e2 100644 --- a/tools/testing/selftests/netfilter/.gitignore +++ b/tools/testing/selftests/net/netfilter/.gitignore @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only -nf-queue -connect_close audit_logread +connect_close conntrack_dump_flush sctp_collision +nf_queue diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile new file mode 100644 index 0000000000..47945b2b3f --- /dev/null +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +HOSTPKG_CONFIG := pkg-config +MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) +MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) + +TEST_PROGS := br_netfilter.sh bridge_brouter.sh +TEST_PROGS += conntrack_icmp_related.sh +TEST_PROGS += conntrack_ipip_mtu.sh +TEST_PROGS += conntrack_tcp_unreplied.sh +TEST_PROGS += conntrack_sctp_collision.sh +TEST_PROGS += conntrack_vrf.sh +TEST_PROGS += ipvs.sh +TEST_PROGS += nf_conntrack_packetdrill.sh +TEST_PROGS += nf_nat_edemux.sh +TEST_PROGS += nft_audit.sh +TEST_PROGS += nft_concat_range.sh +TEST_PROGS += nft_conntrack_helper.sh +TEST_PROGS += nft_fib.sh +TEST_PROGS += nft_flowtable.sh +TEST_PROGS += nft_meta.sh +TEST_PROGS += nft_nat.sh +TEST_PROGS += nft_nat_zones.sh +TEST_PROGS += nft_queue.sh +TEST_PROGS += nft_synproxy.sh +TEST_PROGS += nft_zones_many.sh +TEST_PROGS += rpath.sh +TEST_PROGS += xt_string.sh + +TEST_PROGS_EXTENDED = nft_concat_range_perf.sh + +TEST_GEN_PROGS = conntrack_dump_flush + +TEST_GEN_FILES = audit_logread +TEST_GEN_FILES += connect_close nf_queue +TEST_GEN_FILES += sctp_collision + +include ../../lib.mk + +$(OUTPUT)/nf_queue: CFLAGS += $(MNL_CFLAGS) +$(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS) + +$(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS) +$(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS) + +TEST_FILES := lib.sh +TEST_FILES += packetdrill + +TEST_INCLUDES := \ + ../lib.sh diff --git a/tools/testing/selftests/netfilter/audit_logread.c b/tools/testing/selftests/net/netfilter/audit_logread.c index a0a880fc2d..a0a880fc2d 100644 --- a/tools/testing/selftests/netfilter/audit_logread.c +++ b/tools/testing/selftests/net/netfilter/audit_logread.c diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh new file mode 100755 index 0000000000..c28379a965 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh @@ -0,0 +1,171 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test for legacy br_netfilter module combined with connection tracking, +# a combination that doesn't really work. +# Multicast/broadcast packets race for hash table insertion. + +# eth0 br0 eth0 +# setup is: ns1 <->,ns0 <-> ns3 +# ns2 <-' `'-> ns4 + +source lib.sh + +checktool "nft --version" "run test without nft tool" + +cleanup() { + cleanup_all_ns +} + +trap cleanup EXIT + +setup_ns ns0 ns1 ns2 ns3 ns4 + +ret=0 + +do_ping() +{ + fromns="$1" + dstip="$2" + + if ! ip netns exec "$fromns" ping -c 1 -q "$dstip" > /dev/null; then + echo "ERROR: ping from $fromns to $dstip" + ip netns exec "$ns0" nft list ruleset + ret=1 + fi +} + +bcast_ping() +{ + fromns="$1" + dstip="$2" + + local packets=500 + + [ "$KSFT_MACHINE_SLOW" = yes ] && packets=100 + + for i in $(seq 1 $packets); do + if ! ip netns exec "$fromns" ping -q -f -b -c 1 -q "$dstip" > /dev/null 2>&1; then + echo "ERROR: ping -b from $fromns to $dstip" + ip netns exec "$ns0" nft list ruleset + ret=1 + break + fi + done +} + +ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0 +ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0 + +if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then + echo "SKIP: Can't create veth device" + exit $ksft_skip +fi + +ip link add veth2 netns "$ns0" type veth peer name eth0 netns "$ns2" +ip link add veth3 netns "$ns0" type veth peer name eth0 netns "$ns3" +ip link add veth4 netns "$ns0" type veth peer name eth0 netns "$ns4" + +for i in $(seq 1 4); do + ip -net "$ns0" link set "veth$i" up +done + +if ! ip -net "$ns0" link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1; then + echo "SKIP: Can't create bridge br0" + exit $ksft_skip +fi + +# make veth0,1,2 part of bridge. +for i in $(seq 1 3); do + ip -net "$ns0" link set "veth$i" master br0 +done + +# add a macvlan on top of the bridge. +MACVLAN_ADDR=ba:f3:13:37:42:23 +ip -net "$ns0" link add link br0 name macvlan0 type macvlan mode private +ip -net "$ns0" link set macvlan0 address ${MACVLAN_ADDR} +ip -net "$ns0" link set macvlan0 up +ip -net "$ns0" addr add 10.23.0.1/24 dev macvlan0 + +# add a macvlan on top of veth4. +MACVLAN_ADDR=ba:f3:13:37:42:24 +ip -net "$ns0" link add link veth4 name macvlan4 type macvlan mode passthru +ip -net "$ns0" link set macvlan4 address ${MACVLAN_ADDR} +ip -net "$ns0" link set macvlan4 up + +# make the macvlan part of the bridge. +# veth4 is not a bridge port, only the macvlan on top of it. +ip -net "$ns0" link set macvlan4 master br0 + +ip -net "$ns0" link set br0 up +ip -net "$ns0" addr add 10.0.0.1/24 dev br0 + +modprobe -q br_netfilter +if ! ip netns exec "$ns0" sysctl -q net.bridge.bridge-nf-call-iptables=1; then + echo "SKIP: bridge netfilter not available" + ret=$ksft_skip +fi + +# for testing, so namespaces will reply to ping -b probes. +ip netns exec "$ns0" sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0 + +# enable conntrack in ns0 and drop broadcast packets in forward to +# avoid them from getting confirmed in the postrouting hook before +# the cloned skb is passed up the stack. +ip netns exec "$ns0" nft -f - <<EOF +table ip filter { + chain input { + type filter hook input priority 1; policy accept + iifname br0 counter + ct state new accept + } +} + +table bridge filter { + chain forward { + type filter hook forward priority 0; policy accept + meta pkttype broadcast ip protocol icmp counter drop + } +} +EOF +if [ "$?" -ne 0 ];then + echo "SKIP: could not add nftables ruleset" + exit $ksft_skip +fi + +# place 1, 2 & 3 in same subnet, connected via ns0:br0. +# ns4 is placed in same subnet as well, but its not +# part of the bridge: the corresponding veth4 is not +# part of the bridge, only its macvlan interface. +for i in $(seq 1 4); do + eval ip -net \$ns"$i" link set eth0 up +done +for i in $(seq 1 2); do + eval ip -net \$ns"$i" addr add "10.0.0.1$i/24" dev eth0 +done + +ip -net "$ns3" addr add 10.23.0.13/24 dev eth0 +ip -net "$ns4" addr add 10.23.0.14/24 dev eth0 + +# test basic connectivity +do_ping "$ns1" 10.0.0.12 +do_ping "$ns3" 10.23.0.1 +do_ping "$ns4" 10.23.0.1 + +bcast_ping "$ns1" 10.0.0.255 + +# This should deliver broadcast to macvlan0, which is on top of ns0:br0. +bcast_ping "$ns3" 10.23.0.255 + +# same, this time via veth4:macvlan4. +bcast_ping "$ns4" 10.23.0.255 + +read t < /proc/sys/kernel/tainted +if [ "$t" -eq 0 ];then + echo PASS: kernel not tainted +else + echo ERROR: kernel is tainted + ret=1 +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh new file mode 100755 index 0000000000..2549b65906 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# +# This test is for bridge 'brouting', i.e. make some packets being routed +# rather than getting bridged even though they arrive on interface that is +# part of a bridge. + +# eth0 br0 eth0 +# setup is: ns1 <-> nsbr <-> ns2 + +source lib.sh + +if ! ebtables -V > /dev/null 2>&1;then + echo "SKIP: Could not run test without ebtables" + exit $ksft_skip +fi + +cleanup() { + cleanup_all_ns +} + +trap cleanup EXIT + +setup_ns nsbr ns1 ns2 + +ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0 +ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0 +if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then + echo "SKIP: Can't create veth device" + exit $ksft_skip +fi +ip link add veth1 netns "$nsbr" type veth peer name eth0 netns "$ns2" + +if ! ip -net "$nsbr" link add br0 type bridge; then + echo "SKIP: Can't create bridge br0" + exit $ksft_skip +fi + +ip -net "$nsbr" link set veth0 up +ip -net "$nsbr" link set veth1 up + +ip -net "$nsbr" link set veth0 master br0 +ip -net "$nsbr" link set veth1 master br0 +ip -net "$nsbr" link set br0 up +ip -net "$nsbr" addr add 10.0.0.1/24 dev br0 + +# place both in same subnet, ${ns1} and ${ns2} connected via ${nsbr}:br0 +ip -net "$ns1" link set eth0 up +ip -net "$ns2" link set eth0 up +ip -net "$ns1" addr add 10.0.0.11/24 dev eth0 +ip -net "$ns2" addr add 10.0.0.12/24 dev eth0 + +test_ebtables_broute() +{ + # redirect is needed so the dstmac is rewritten to the bridge itself, + # ip stack won't process OTHERHOST (foreign unicast mac) packets. + if ! ip netns exec "$nsbr" ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP; then + echo "SKIP: Could not add ebtables broute redirect rule" + return $ksft_skip + fi + + ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth0.forwarding=0 + + # ping net${ns1}, expected to not work (ip forwarding is off) + if ip netns exec "$ns1" ping -q -c 1 10.0.0.12 -W 0.5 > /dev/null 2>&1; then + echo "ERROR: ping works, should have failed" 1>&2 + return 1 + fi + + # enable forwarding on both interfaces. + # neither needs an ip address, but at least the bridge needs + # an ip address in same network segment as ${ns1} and ${ns2} (${nsbr} + # needs to be able to determine route for to-be-forwarded packet). + ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth0.forwarding=1 + ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth1.forwarding=1 + + if ! ip netns exec "$ns1" ping -q -c 1 10.0.0.12 > /dev/null; then + echo "ERROR: ping did not work, but it should (broute+forward)" 1>&2 + return 1 + fi + + echo "PASS: ${ns1}/${ns2} connectivity with active broute rule" + ip netns exec "$nsbr" ebtables -t broute -F + + # ping net${ns1}, expected to work (frames are bridged) + if ! ip netns exec "$ns1" ping -q -c 1 10.0.0.12 > /dev/null; then + echo "ERROR: ping did not work, but it should (bridged)" 1>&2 + return 1 + fi + + ip netns exec "$nsbr" ebtables -t filter -A FORWARD -p ipv4 --ip-protocol icmp -j DROP + + # ping net${ns1}, expected to not work (DROP in bridge forward) + if ip netns exec "$ns1" ping -q -c 1 10.0.0.12 -W 0.5 > /dev/null 2>&1; then + echo "ERROR: ping works, should have failed (icmp forward drop)" 1>&2 + return 1 + fi + + # re-activate brouter + ip netns exec "$nsbr" ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.0.11 > /dev/null; then + echo "ERROR: ping did not work, but it should (broute+forward 2)" 1>&2 + return 1 + fi + + echo "PASS: ${ns1}/${ns2} connectivity with active broute rule and bridge forward drop" + return 0 +} + +# test basic connectivity +if ! ip netns exec "$ns1" ping -c 1 -q 10.0.0.12 > /dev/null; then + echo "ERROR: Could not reach ${ns2} from ${ns1}" 1>&2 + exit 1 +fi + +if ! ip netns exec "$ns2" ping -c 1 -q 10.0.0.11 > /dev/null; then + echo "ERROR: Could not reach ${ns1} from ${ns2}" 1>&2 + exit 1 +fi + +test_ebtables_broute +exit $? diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config new file mode 100644 index 0000000000..63ef80ef47 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/config @@ -0,0 +1,89 @@ +CONFIG_AUDIT=y +CONFIG_BPF_SYSCALL=y +CONFIG_BRIDGE=m +CONFIG_BRIDGE_EBT_BROUTE=m +CONFIG_BRIDGE_EBT_IP=m +CONFIG_BRIDGE_EBT_REDIRECT=m +CONFIG_BRIDGE_EBT_T_FILTER=m +CONFIG_BRIDGE_NETFILTER=m +CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_CGROUP_BPF=y +CONFIG_DUMMY=m +CONFIG_INET_ESP=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP_NF_RAW=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP_SCTP=m +CONFIG_IP_VS=m +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_RR=m +CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_MACVLAN=m +CONFIG_NAMESPACES=y +CONFIG_NET_CLS_U32=m +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_NS=y +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_IPIP=m +CONFIG_NET_VRF=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_SYNPROXY=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XT_NAT=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_TARGET_REDIRECT=m +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_ZONES=y +CONFIG_NF_CT_NETLINK=m +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_LOG_IPV4=m +CONFIG_NF_LOG_IPV6=m +CONFIG_NF_NAT=m +CONFIG_NF_NAT_REDIRECT=y +CONFIG_NF_NAT_MASQUERADE=y +CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_IPV4=y +CONFIG_NF_TABLES_IPV6=y +CONFIG_NF_TABLES_NETDEV=y +CONFIG_NF_FLOW_TABLE_INET=m +CONFIG_NFT_BRIDGE_META=m +CONFIG_NFT_COMPAT=m +CONFIG_NFT_CT=m +CONFIG_NFT_FIB=m +CONFIG_NFT_FIB_INET=m +CONFIG_NFT_FIB_IPV4=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NFT_FLOW_OFFLOAD=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_LOG=m +CONFIG_NFT_MASQ=m +CONFIG_NFT_NAT=m +CONFIG_NFT_NUMGEN=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_QUOTA=m +CONFIG_NFT_REDIR=m +CONFIG_NFT_SYNPROXY=m +CONFIG_VETH=m +CONFIG_VLAN_8021Q=m +CONFIG_XFRM_USER=m +CONFIG_XFRM_STATISTICS=y +CONFIG_NET_PKTGEN=m +CONFIG_TUN=m diff --git a/tools/testing/selftests/netfilter/connect_close.c b/tools/testing/selftests/net/netfilter/connect_close.c index 1c3b0add54..1c3b0add54 100644 --- a/tools/testing/selftests/netfilter/connect_close.c +++ b/tools/testing/selftests/net/netfilter/connect_close.c diff --git a/tools/testing/selftests/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c index b11ea8ee67..bd9317bf5a 100644 --- a/tools/testing/selftests/netfilter/conntrack_dump_flush.c +++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c @@ -10,7 +10,7 @@ #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> #include <linux/netfilter/nf_conntrack_tcp.h> -#include "../kselftest_harness.h" +#include "../../kselftest_harness.h" #define TEST_ZONE_ID 123 #define NF_CT_DEFAULT_ZONE_ID 0 @@ -313,13 +313,11 @@ FIXTURE_SETUP(conntrack_dump_flush) self->sock = mnl_socket_open(NETLINK_NETFILTER); if (!self->sock) { perror("mnl_socket_open"); - exit(EXIT_FAILURE); + SKIP(return, "cannot open netlink_netfilter socket"); } - if (mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID) < 0) { - perror("mnl_socket_bind"); - exit(EXIT_FAILURE); - } + ret = mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID); + EXPECT_EQ(ret, 0); ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); if (ret < 0 && errno == EPERM) diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh index 76645aaf2b..c63d840ead 100755 --- a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh @@ -14,35 +14,32 @@ # check the icmp errors are propagated to the correct host as per # nat of "established" icmp-echo "connection". -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 -ret=0 +source lib.sh -nft --version > /dev/null 2>&1 -if [ $? -ne 0 ];then +if ! nft --version > /dev/null 2>&1;then echo "SKIP: Could not run test without nft tool" exit $ksft_skip fi -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - cleanup() { - for i in 1 2;do ip netns del nsclient$i;done - for i in 1 2;do ip netns del nsrouter$i;done + cleanup_all_ns } trap cleanup EXIT -ipv4() { - echo -n 192.168.$1.2 -} +setup_ns nsclient1 nsclient2 nsrouter1 nsrouter2 + +ret=0 + +add_addr() +{ + ns=$1 + dev=$2 + i=$3 -ipv6 () { - echo -n dead:$1::2 + ip -net "$ns" link set "$dev" up + ip -net "$ns" addr add "192.168.$i.2/24" dev "$dev" + ip -net "$ns" addr add "dead:$i::2/64" dev "$dev" nodad } check_counter() @@ -52,10 +49,9 @@ check_counter() expect=$3 local lret=0 - cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns" nft list counter inet filter "$name" | grep -q "$expect"; then echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2 - ip netns exec $ns nft list counter inet filter "$name" 1>&2 + ip netns exec "$ns" nft list counter inet filter "$name" 1>&2 lret=1 fi @@ -65,9 +61,8 @@ check_counter() check_unknown() { expect="packets 0 bytes 0" - for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do - check_counter $n "unknown" "$expect" - if [ $? -ne 0 ] ;then + for n in ${nsclient1} ${nsclient2} ${nsrouter1} ${nsrouter2}; do + if ! check_counter "$n" "unknown" "$expect"; then return 1 fi done @@ -75,61 +70,48 @@ check_unknown() return 0 } -for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do - ip netns add $n - ip -net $n link set lo up -done - -DEV=veth0 -ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1 DEV=veth0 -ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2 +ip link add "$DEV" netns "$nsclient1" type veth peer name eth1 netns "$nsrouter1" +ip link add "$DEV" netns "$nsclient2" type veth peer name eth1 netns "$nsrouter2" +ip link add "$DEV" netns "$nsrouter1" type veth peer name eth2 netns "$nsrouter2" -DEV=veth0 -ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2 +add_addr "$nsclient1" $DEV 1 +add_addr "$nsclient2" $DEV 2 -DEV=veth0 -for i in 1 2; do - ip -net nsclient$i link set $DEV up - ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV - ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV -done - -ip -net nsrouter1 link set eth1 up -ip -net nsrouter1 link set veth0 up +ip -net "$nsrouter1" link set eth1 up +ip -net "$nsrouter1" link set $DEV up -ip -net nsrouter2 link set eth1 up -ip -net nsrouter2 link set eth2 up +ip -net "$nsrouter2" link set eth1 mtu 1280 up +ip -net "$nsrouter2" link set eth2 up -ip -net nsclient1 route add default via 192.168.1.1 -ip -net nsclient1 -6 route add default via dead:1::1 +ip -net "$nsclient1" route add default via 192.168.1.1 +ip -net "$nsclient1" -6 route add default via dead:1::1 -ip -net nsclient2 route add default via 192.168.2.1 -ip -net nsclient2 route add default via dead:2::1 +ip -net "$nsclient2" route add default via 192.168.2.1 +ip -net "$nsclient2" route add default via dead:2::1 +ip -net "$nsclient2" link set veth0 mtu 1280 -i=3 -ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1 -ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0 -ip -net nsrouter1 addr add dead:1::1/64 dev eth1 -ip -net nsrouter1 addr add dead:3::1/64 dev veth0 -ip -net nsrouter1 route add default via 192.168.3.10 -ip -net nsrouter1 -6 route add default via dead:3::10 +ip -net "$nsrouter1" addr add 192.168.1.1/24 dev eth1 +ip -net "$nsrouter1" addr add 192.168.3.1/24 dev veth0 +ip -net "$nsrouter1" addr add dead:1::1/64 dev eth1 nodad +ip -net "$nsrouter1" addr add dead:3::1/64 dev veth0 nodad +ip -net "$nsrouter1" route add default via 192.168.3.10 +ip -net "$nsrouter1" -6 route add default via dead:3::10 -ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1 -ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2 -ip -net nsrouter2 addr add dead:2::1/64 dev eth1 -ip -net nsrouter2 addr add dead:3::10/64 dev eth2 -ip -net nsrouter2 route add default via 192.168.3.1 -ip -net nsrouter2 route add default via dead:3::1 +ip -net "$nsrouter2" addr add 192.168.2.1/24 dev eth1 +ip -net "$nsrouter2" addr add 192.168.3.10/24 dev eth2 +ip -net "$nsrouter2" addr add dead:2::1/64 dev eth1 nodad +ip -net "$nsrouter2" addr add dead:3::10/64 dev eth2 nodad +ip -net "$nsrouter2" route add default via 192.168.3.1 +ip -net "$nsrouter2" route add default via dead:3::1 -sleep 2 for i in 4 6; do - ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1 - ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1 + ip netns exec "$nsrouter1" sysctl -q net.ipv$i.conf.all.forwarding=1 + ip netns exec "$nsrouter2" sysctl -q net.ipv$i.conf.all.forwarding=1 done -for netns in nsrouter1 nsrouter2; do -ip netns exec $netns nft -f - <<EOF +for netns in "$nsrouter1" "$nsrouter2"; do +ip netns exec "$netns" nft -f - <<EOF table inet filter { counter unknown { } counter related { } @@ -144,7 +126,7 @@ table inet filter { EOF done -ip netns exec nsclient1 nft -f - <<EOF +ip netns exec "$nsclient1" nft -f - <<EOF table inet filter { counter unknown { } counter related { } @@ -164,7 +146,7 @@ table inet filter { } EOF -ip netns exec nsclient2 nft -f - <<EOF +ip netns exec "$nsclient2" nft -f - <<EOF table inet filter { counter unknown { } counter new { } @@ -189,11 +171,10 @@ table inet filter { } EOF - # make sure NAT core rewrites adress of icmp error if nat is used according to # conntrack nat information (icmp error will be directed at nsrouter1 address, # but it needs to be routed to nsclient1 address). -ip netns exec nsrouter1 nft -f - <<EOF +ip netns exec "$nsrouter1" nft -f - <<EOF table ip nat { chain postrouting { type nat hook postrouting priority 0; policy accept; @@ -208,44 +189,32 @@ table ip6 nat { } EOF -ip netns exec nsrouter2 ip link set eth1 mtu 1280 -ip netns exec nsclient2 ip link set veth0 mtu 1280 -sleep 1 - -ip netns exec nsclient1 ping -c 1 -s 1000 -q -M do 192.168.2.2 >/dev/null -if [ $? -ne 0 ]; then +if ! ip netns exec "$nsclient1" ping -c 1 -s 1000 -q -M "do" 192.168.2.2 >/dev/null; then echo "ERROR: netns ip routing/connectivity broken" 1>&2 - cleanup exit 1 fi -ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null -if [ $? -ne 0 ]; then +if ! ip netns exec "$nsclient1" ping -c 1 -s 1000 -q dead:2::2 >/dev/null; then echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2 - cleanup exit 1 fi -check_unknown -if [ $? -ne 0 ]; then +if ! check_unknown; then ret=1 fi expect="packets 0 bytes 0" -for netns in nsrouter1 nsrouter2 nsclient1;do - check_counter "$netns" "related" "$expect" - if [ $? -ne 0 ]; then +for netns in "$nsrouter1" "$nsrouter2" "$nsclient1";do + if ! check_counter "$netns" "related" "$expect"; then ret=1 fi done expect="packets 2 bytes 2076" -check_counter nsclient2 "new" "$expect" -if [ $? -ne 0 ]; then +if ! check_counter "$nsclient2" "new" "$expect"; then ret=1 fi -ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null -if [ $? -eq 0 ]; then +if ip netns exec "$nsclient1" ping -W 0.5 -q -c 1 -s 1300 -M "do" 192.168.2.2 > /dev/null; then echo "ERROR: ping should have failed with PMTU too big error" 1>&2 ret=1 fi @@ -253,30 +222,26 @@ fi # nsrouter2 should have generated the icmp error, so # related counter should be 0 (its in forward). expect="packets 0 bytes 0" -check_counter "nsrouter2" "related" "$expect" -if [ $? -ne 0 ]; then +if ! check_counter "$nsrouter2" "related" "$expect"; then ret=1 fi # but nsrouter1 should have seen it, same for nsclient1. expect="packets 1 bytes 576" -for netns in nsrouter1 nsclient1;do - check_counter "$netns" "related" "$expect" - if [ $? -ne 0 ]; then +for netns in ${nsrouter1} ${nsclient1};do + if ! check_counter "$netns" "related" "$expect"; then ret=1 fi done -ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null -if [ $? -eq 0 ]; then +if ip netns exec "${nsclient1}" ping6 -W 0.5 -c 1 -s 1300 dead:2::2 > /dev/null; then echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2 ret=1 fi expect="packets 2 bytes 1856" -for netns in nsrouter1 nsclient1;do - check_counter "$netns" "related" "$expect" - if [ $? -ne 0 ]; then +for netns in "${nsrouter1}" "${nsclient1}";do + if ! check_counter "$netns" "related" "$expect"; then ret=1 fi done @@ -288,21 +253,19 @@ else fi # add 'bad' route, expect icmp REDIRECT to be generated -ip netns exec nsclient1 ip route add 192.168.1.42 via 192.168.1.1 -ip netns exec nsclient1 ip route add dead:1::42 via dead:1::1 +ip netns exec "${nsclient1}" ip route add 192.168.1.42 via 192.168.1.1 +ip netns exec "${nsclient1}" ip route add dead:1::42 via dead:1::1 -ip netns exec "nsclient1" ping -q -c 2 192.168.1.42 > /dev/null +ip netns exec "$nsclient1" ping -W 1 -q -i 0.5 -c 2 192.168.1.42 > /dev/null expect="packets 1 bytes 112" -check_counter nsclient1 "redir4" "$expect" -if [ $? -ne 0 ];then +if ! check_counter "$nsclient1" "redir4" "$expect"; then ret=1 fi -ip netns exec "nsclient1" ping -c 1 dead:1::42 > /dev/null +ip netns exec "$nsclient1" ping -W 1 -c 1 dead:1::42 > /dev/null expect="packets 1 bytes 192" -check_counter nsclient1 "redir6" "$expect" -if [ $? -ne 0 ];then +if ! check_counter "$nsclient1" "redir6" "$expect"; then ret=1 fi diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh index eb9553e498..9832a5d019 100755 --- a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh @@ -1,8 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh # Conntrack needs to reassemble fragments in order to have complete # packets for rule matching. Reassembly can lead to packet loss. @@ -23,56 +22,44 @@ ksft_skip=4 # between Client A and Client B over WAN. Wanrouter has MTU 1400 set # on its interfaces. -rnd=$(mktemp -u XXXXXXXX) rx=$(mktemp) -r_a="ns-ra-$rnd" -r_b="ns-rb-$rnd" -r_w="ns-rw-$rnd" -c_a="ns-ca-$rnd" -c_b="ns-cb-$rnd" - -checktool (){ - if ! $1 > /dev/null 2>&1; then - echo "SKIP: Could not $2" - exit $ksft_skip - fi -} - checktool "iptables --version" "run test without iptables" -checktool "ip -Version" "run test without ip tool" -checktool "which socat" "run test without socat" -checktool "ip netns add ${r_a}" "create net namespace" +checktool "socat -h" "run test without socat" -for n in ${r_b} ${r_w} ${c_a} ${c_b};do - ip netns add ${n} -done +setup_ns r_a r_b r_w c_a c_b cleanup() { - for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do - ip netns del ${n} - done - rm -f ${rx} + cleanup_all_ns + rm -f "$rx" } trap cleanup EXIT +listener_ready() +{ + ns="$1" + port="$2" + ss -N "$ns" -lnu -o "sport = :$port" | grep -q "$port" +} + test_path() { msg="$1" - ip netns exec ${c_b} socat -t 3 - udp4-listen:5000,reuseaddr > ${rx} < /dev/null & + ip netns exec "$c_b" socat -t 3 - udp4-listen:5000,reuseaddr > "$rx" < /dev/null & + + busywait $BUSYWAIT_TIMEOUT listener_ready "$c_b" 5000 - sleep 1 for i in 1 2 3; do head -c1400 /dev/zero | tr "\000" "a" | \ - ip netns exec ${c_a} socat -t 1 -u STDIN UDP:192.168.20.2:5000 + ip netns exec "$c_a" socat -t 1 -u STDIN UDP:192.168.20.2:5000 done wait - bytes=$(wc -c < ${rx}) + bytes=$(wc -c < "$rx") - if [ $bytes -eq 1400 ];then + if [ "$bytes" -eq 1400 ];then echo "OK: PMTU $msg connection tracking" else echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400" @@ -91,24 +78,24 @@ test_path() { # 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter) # No iptables rules at all. -ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w} -ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a} +ip link add veth0 netns "$r_a" type veth peer name veth0 netns "$r_w" +ip link add veth1 netns "$r_a" type veth peer name veth0 netns "$c_a" l_addr="10.2.2.1" r_addr="10.4.4.1" -ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip +ip netns exec "$r_a" ip link add ipip0 type ipip local "$l_addr" remote "$r_addr" mode ipip || exit $ksft_skip for dev in lo veth0 veth1 ipip0; do - ip -net ${r_a} link set $dev up + ip -net "$r_a" link set "$dev" up done -ip -net ${r_a} addr add 10.2.2.1/24 dev veth0 -ip -net ${r_a} addr add 192.168.10.1/24 dev veth1 +ip -net "$r_a" addr add 10.2.2.1/24 dev veth0 +ip -net "$r_a" addr add 192.168.10.1/24 dev veth1 -ip -net ${r_a} route add 192.168.20.0/24 dev ipip0 -ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254 +ip -net "$r_a" route add 192.168.20.0/24 dev ipip0 +ip -net "$r_a" route add 10.4.4.0/24 via 10.2.2.254 -ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null +ip netns exec "$r_a" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null # Detailed setup for Router B # --------------------------- @@ -121,49 +108,46 @@ ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null # 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter) # No iptables rules at all. -ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w} -ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b} +ip link add veth0 netns "$r_b" type veth peer name veth1 netns "$r_w" +ip link add veth1 netns "$r_b" type veth peer name veth0 netns "$c_b" l_addr="10.4.4.1" r_addr="10.2.2.1" -ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip +ip netns exec "$r_b" ip link add ipip0 type ipip local "${l_addr}" remote "${r_addr}" mode ipip || exit $ksft_skip -for dev in lo veth0 veth1 ipip0; do - ip -net ${r_b} link set $dev up +for dev in veth0 veth1 ipip0; do + ip -net "$r_b" link set $dev up done -ip -net ${r_b} addr add 10.4.4.1/24 dev veth0 -ip -net ${r_b} addr add 192.168.20.1/24 dev veth1 +ip -net "$r_b" addr add 10.4.4.1/24 dev veth0 +ip -net "$r_b" addr add 192.168.20.1/24 dev veth1 -ip -net ${r_b} route add 192.168.10.0/24 dev ipip0 -ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254 -ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null +ip -net "$r_b" route add 192.168.10.0/24 dev ipip0 +ip -net "$r_b" route add 10.2.2.0/24 via 10.4.4.254 +ip netns exec "$r_b" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null # Client A -ip -net ${c_a} addr add 192.168.10.2/24 dev veth0 -ip -net ${c_a} link set dev lo up -ip -net ${c_a} link set dev veth0 up -ip -net ${c_a} route add default via 192.168.10.1 +ip -net "$c_a" addr add 192.168.10.2/24 dev veth0 +ip -net "$c_a" link set dev veth0 up +ip -net "$c_a" route add default via 192.168.10.1 # Client A -ip -net ${c_b} addr add 192.168.20.2/24 dev veth0 -ip -net ${c_b} link set dev veth0 up -ip -net ${c_b} link set dev lo up -ip -net ${c_b} route add default via 192.168.20.1 +ip -net "$c_b" addr add 192.168.20.2/24 dev veth0 +ip -net "$c_b" link set dev veth0 up +ip -net "$c_b" route add default via 192.168.20.1 # Wan -ip -net ${r_w} addr add 10.2.2.254/24 dev veth0 -ip -net ${r_w} addr add 10.4.4.254/24 dev veth1 +ip -net "$r_w" addr add 10.2.2.254/24 dev veth0 +ip -net "$r_w" addr add 10.4.4.254/24 dev veth1 -ip -net ${r_w} link set dev lo up -ip -net ${r_w} link set dev veth0 up mtu 1400 -ip -net ${r_w} link set dev veth1 up mtu 1400 +ip -net "$r_w" link set dev veth0 up mtu 1400 +ip -net "$r_w" link set dev veth1 up mtu 1400 -ip -net ${r_a} link set dev veth0 mtu 1400 -ip -net ${r_b} link set dev veth0 mtu 1400 +ip -net "$r_a" link set dev veth0 mtu 1400 +ip -net "$r_b" link set dev veth0 mtu 1400 -ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null +ip netns exec "$r_w" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null # Path MTU discovery # ------------------ @@ -203,5 +187,5 @@ test_path "without" #packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is #dropped on Router A before sending. -ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW +ip netns exec "$r_a" iptables -A FORWARD -m conntrack --ctstate NEW test_path "with" diff --git a/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh new file mode 100755 index 0000000000..d860f7d974 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For SCTP COLLISION SCENARIO as Below: +# +# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359] +# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187] +# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359] +# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO] +# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK] +# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970] +# +# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS + +source lib.sh + +CLIENT_IP="198.51.200.1" +CLIENT_PORT=1234 + +SERVER_IP="198.51.100.1" +SERVER_PORT=1234 + +CLIENT_GW="198.51.200.2" +SERVER_GW="198.51.100.2" + +# setup the topo +setup() { + setup_ns CLIENT_NS SERVER_NS ROUTER_NS + ip -n "$SERVER_NS" link add link0 type veth peer name link1 netns "$ROUTER_NS" + ip -n "$CLIENT_NS" link add link3 type veth peer name link2 netns "$ROUTER_NS" + + ip -n "$SERVER_NS" link set link0 up + ip -n "$SERVER_NS" addr add $SERVER_IP/24 dev link0 + ip -n "$SERVER_NS" route add $CLIENT_IP dev link0 via $SERVER_GW + + ip -n "$ROUTER_NS" link set link1 up + ip -n "$ROUTER_NS" link set link2 up + ip -n "$ROUTER_NS" addr add $SERVER_GW/24 dev link1 + ip -n "$ROUTER_NS" addr add $CLIENT_GW/24 dev link2 + ip net exec "$ROUTER_NS" sysctl -wq net.ipv4.ip_forward=1 + + ip -n "$CLIENT_NS" link set link3 up + ip -n "$CLIENT_NS" addr add $CLIENT_IP/24 dev link3 + ip -n "$CLIENT_NS" route add $SERVER_IP dev link3 via $CLIENT_GW + + # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with + # tc on $SERVER_NS side + tc -n "$SERVER_NS" qdisc add dev link0 root handle 1: htb r2q 64 + tc -n "$SERVER_NS" class add dev link0 parent 1: classid 1:1 htb rate 100mbit + tc -n "$SERVER_NS" filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \ + 0xff match u8 2 0xff at 32 flowid 1:1 + if ! tc -n "$SERVER_NS" qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms; then + echo "SKIP: Cannot add netem qdisc" + exit $ksft_skip + fi + + # simulate the ctstate check on OVS nf_conntrack + ip net exec "$ROUTER_NS" iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP + ip net exec "$ROUTER_NS" iptables -A INPUT -p sctp -j DROP + + # use a smaller number for assoc's max_retrans to reproduce the issue + modprobe -q sctp + ip net exec "$CLIENT_NS" sysctl -wq net.sctp.association_max_retrans=3 +} + +cleanup() { + ip net exec "$CLIENT_NS" pkill sctp_collision >/dev/null 2>&1 + ip net exec "$SERVER_NS" pkill sctp_collision >/dev/null 2>&1 + cleanup_all_ns +} + +do_test() { + ip net exec "$SERVER_NS" ./sctp_collision server \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT & + ip net exec "$CLIENT_NS" ./sctp_collision client \ + $CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT +} + +# NOTE: one way to work around the issue is set a smaller hb_interval +# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500 + +# run the test case +trap cleanup EXIT +setup && \ +echo "Test for SCTP Collision in nf_conntrack:" && \ +do_test && echo "PASS!" +exit $? diff --git a/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh new file mode 100755 index 0000000000..121ea93c01 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh @@ -0,0 +1,164 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check that UNREPLIED tcp conntrack will eventually timeout. +# + +source lib.sh + +if ! nft --version > /dev/null 2>&1;then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +if ! conntrack --version > /dev/null 2>&1;then + echo "SKIP: Could not run test without conntrack tool" + exit $ksft_skip +fi + +ret=0 + +cleanup() { + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + + cleanup_all_ns +} + +ipv4() { + echo -n 192.168."$1".2 +} + +check_counter() +{ + ns=$1 + name=$2 + expect=$3 + local lret=0 + + if ! ip netns exec "$ns2" nft list counter inet filter "$name" | grep -q "$expect"; then + echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2 + ip netns exec "$ns2" nft list counter inet filter "$name" 1>&2 + lret=1 + fi + + return $lret +} + +trap cleanup EXIT + +# Create test namespaces +setup_ns ns1 ns2 + +# Connect the namespace to the host using a veth pair +ip -net "$ns1" link add name veth1 type veth peer name veth2 +ip -net "$ns1" link set netns "$ns2" dev veth2 + +ip -net "$ns1" link set up dev lo +ip -net "$ns2" link set up dev lo +ip -net "$ns1" link set up dev veth1 +ip -net "$ns2" link set up dev veth2 + +ip -net "$ns2" addr add 10.11.11.2/24 dev veth2 +ip -net "$ns2" route add default via 10.11.11.1 + +ip netns exec "$ns2" sysctl -q net.ipv4.conf.veth2.forwarding=1 + +# add a rule inside NS so we enable conntrack +ip netns exec "$ns1" nft -f - <<EOF +table inet filter { + chain input { + type filter hook input priority 0; policy accept; + ct state established accept + } +} +EOF + +ip -net "$ns1" addr add 10.11.11.1/24 dev veth1 +ip -net "$ns1" route add 10.99.99.99 via 10.11.11.2 + +# Check connectivity works +ip netns exec "$ns1" ping -q -c 2 10.11.11.2 >/dev/null || exit 1 + +ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT & + +ip netns exec "$ns2" nft -f - <<EOF +table inet filter { + counter connreq { } + counter redir { } + chain input { + type filter hook input priority 0; policy accept; + ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept + ct state new ct status dnat tcp dport 8080 counter name "redir" accept + } +} +EOF +if [ $? -ne 0 ]; then + echo "ERROR: Could not load nft rules" + exit 1 +fi + +ip netns exec "$ns2" sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10 + +echo "INFO: connect $ns1 -> $ns2 to the virtual ip" +ip netns exec "$ns1" bash -c 'for i in $(seq 1 $BUSYWAIT_TIMEOUT) ; do + socat -u STDIN TCP:10.99.99.99:80 < /dev/null + sleep 0.1 + done' & + +wait_for_attempt() +{ + count=$(ip netns exec "$ns2" conntrack -L -p tcp --dport 80 2>/dev/null | wc -l) + if [ "$count" -gt 0 ]; then + return 0 + fi + + return 1 +} + +# wait for conntrack to pick the new connection request up before loading +# the nat redirect rule. +if ! busywait "$BUSYWAIT_TIMEOUT" wait_for_attempt; then + echo "ERROR: $ns2 did not pick up tcp connection from peer" + exit 1 +fi + +ip netns exec "$ns2" nft -f - <<EOF +table inet nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + ip daddr 10.99.99.99 tcp dport 80 redirect to :8080 + } +} +EOF +if [ $? -ne 0 ]; then + echo "ERROR: Could not load nat redirect" + exit 1 +fi + +wait_for_redirect() +{ + count=$(ip netns exec "$ns2" conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l) + if [ "$count" -gt 0 ]; then + return 0 + fi + + return 1 +} +echo "INFO: NAT redirect added in ns $ns2, waiting for $BUSYWAIT_TIMEOUT ms for nat to take effect" + +busywait "$BUSYWAIT_TIMEOUT" wait_for_redirect +ret=$? + +expect="packets 1 bytes 60" +if ! check_counter "$ns2" "redir" "$expect"; then + ret=1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: redirection counter has expected values" +else + echo "ERROR: no tcp connection was redirected" +fi + +exit $ret diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh index 8b5ea92345..073e8e62d3 100755 --- a/tools/testing/selftests/netfilter/conntrack_vrf.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # This script demonstrates interaction of conntrack and vrf. # The vrf driver calls the netfilter hooks again, with oif/iif @@ -28,84 +28,67 @@ # that was supposed to be fixed by the commit mentioned above to make sure # that any fix to test case 1 won't break masquerade again. -ksft_skip=4 +source lib.sh IP0=172.30.30.1 IP1=172.30.30.2 PFXL=30 ret=0 -sfx=$(mktemp -u "XXXXXXXX") -ns0="ns0-$sfx" -ns1="ns1-$sfx" - cleanup() { ip netns pids $ns0 | xargs kill 2>/dev/null ip netns pids $ns1 | xargs kill 2>/dev/null - ip netns del $ns0 $ns1 + cleanup_all_ns } -nft --version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without nft tool" - exit $ksft_skip -fi - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -ip netns add "$ns0" -if [ $? -ne 0 ];then - echo "SKIP: Could not create net namespace $ns0" - exit $ksft_skip -fi -ip netns add "$ns1" +checktool "nft --version" "run test without nft" +checktool "conntrack --version" "run test without conntrack" +checktool "socat -h" "run test without socat" trap cleanup EXIT -ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0 -ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 -ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 +setup_ns ns0 ns1 + +ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0 +ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 +ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 -ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1 -if [ $? -ne 0 ];then +if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then echo "SKIP: Could not add veth device" exit $ksft_skip fi -ip -net $ns0 li add tvrf type vrf table 9876 -if [ $? -ne 0 ];then +if ! ip -net "$ns0" li add tvrf type vrf table 9876; then echo "SKIP: Could not add vrf device" exit $ksft_skip fi -ip -net $ns0 li set lo up +ip -net "$ns0" li set veth0 master tvrf +ip -net "$ns0" li set tvrf up +ip -net "$ns0" li set veth0 up +ip -net "$ns1" li set veth0 up -ip -net $ns0 li set veth0 master tvrf -ip -net $ns0 li set tvrf up -ip -net $ns0 li set veth0 up -ip -net $ns1 li set veth0 up +ip -net "$ns0" addr add $IP0/$PFXL dev veth0 +ip -net "$ns1" addr add $IP1/$PFXL dev veth0 -ip -net $ns0 addr add $IP0/$PFXL dev veth0 -ip -net $ns1 addr add $IP1/$PFXL dev veth0 +listener_ready() +{ + local ns="$1" -ip netns exec $ns1 iperf3 -s > /dev/null 2>&1& -if [ $? -ne 0 ];then - echo "SKIP: Could not start iperf3" - exit $ksft_skip -fi + ss -N "$ns" -l -n -t -o "sport = :55555" | grep -q "55555" +} + +ip netns exec "$ns1" socat -u -4 TCP-LISTEN:55555,reuseaddr,fork STDOUT > /dev/null & +busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" # test vrf ingress handling. # The incoming connection should be placed in conntrack zone 1, # as decided by the first iteration of the ruleset. test_ct_zone_in() { -ip netns exec $ns0 nft -f - <<EOF +ip netns exec "$ns0" nft -f - <<EOF table testct { chain rawpre { type filter hook prerouting priority raw; @@ -126,21 +109,21 @@ table testct { } } EOF - ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null + ip netns exec "$ns1" ping -W 1 -c 1 -I veth0 "$IP0" > /dev/null # should be in zone 1, not zone 2 - count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) - if [ $count -eq 1 ]; then + count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) + if [ "$count" -eq 1 ]; then echo "PASS: entry found in conntrack zone 1" else echo "FAIL: entry not found in conntrack zone 1" - count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) - if [ $count -eq 1 ]; then + count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) + if [ "$count" -eq 1 ]; then echo "FAIL: entry found in zone 2 instead" else echo "FAIL: entry not in zone 1 or 2, dumping table" - ip netns exec $ns0 conntrack -L - ip netns exec $ns0 nft list ruleset + ip netns exec "$ns0" conntrack -L + ip netns exec "$ns0" nft list ruleset fi fi } @@ -153,12 +136,12 @@ test_masquerade_vrf() local qdisc=$1 if [ "$qdisc" != "default" ]; then - tc -net $ns0 qdisc add dev tvrf root $qdisc + tc -net "$ns0" qdisc add dev tvrf root "$qdisc" fi - ip netns exec $ns0 conntrack -F 2>/dev/null + ip netns exec "$ns0" conntrack -F 2>/dev/null -ip netns exec $ns0 nft -f - <<EOF +ip netns exec "$ns0" nft -f - <<EOF flush ruleset table ip nat { chain rawout { @@ -179,25 +162,23 @@ table ip nat { } } EOF - ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null - if [ $? -ne 0 ]; then - echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device" + if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then + echo "FAIL: connect failure with masquerade + sport rewrite on vrf device" ret=1 return fi # must also check that nat table was evaluated on second (lower device) iteration. - ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' && - ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]' - if [ $? -eq 0 ]; then - echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)" + if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1' && + ip netns exec "$ns0" nft list table ip nat |grep -q 'untracked counter packets [1-9]'; then + echo "PASS: connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)" else echo "FAIL: vrf rules have unexpected counter value" ret=1 fi if [ "$qdisc" != "default" ]; then - tc -net $ns0 qdisc del dev tvrf root + tc -net "$ns0" qdisc del dev tvrf root fi } @@ -206,8 +187,8 @@ EOF # oifname is the lower device (veth0 in this case). test_masquerade_veth() { - ip netns exec $ns0 conntrack -F 2>/dev/null -ip netns exec $ns0 nft -f - <<EOF + ip netns exec "$ns0" conntrack -F 2>/dev/null +ip netns exec "$ns0" nft -f - <<EOF flush ruleset table ip nat { chain postrouting { @@ -216,17 +197,15 @@ table ip nat { } } EOF - ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null - if [ $? -ne 0 ]; then - echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device" + if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then + echo "FAIL: connect failure with masquerade + sport rewrite on veth device" ret=1 return fi # must also check that nat table was evaluated on second (lower device) iteration. - ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' - if [ $? -eq 0 ]; then - echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device" + if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1'; then + echo "PASS: connect with masquerade + sport rewrite on veth device" else echo "FAIL: vrf masq rule has unexpected counter value" ret=1 diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh new file mode 100755 index 0000000000..4ceee9fb39 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/ipvs.sh @@ -0,0 +1,211 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# End-to-end ipvs test suite +# Topology: +#--------------------------------------------------------------+ +# | | +# ns0 | ns1 | +# ----------- | ----------- ----------- | +# | veth01 | --------- | veth10 | | veth12 | | +# ----------- peer ----------- ----------- | +# | | | | +# ----------- | | | +# | br0 | |----------------- peer |--------------| +# ----------- | | | +# | | | | +# ---------- peer ---------- ----------- | +# | veth02 | --------- | veth20 | | veth21 | | +# ---------- | ---------- ----------- | +# | ns2 | +# | | +#--------------------------------------------------------------+ +# +# We assume that all network driver are loaded +# + +source lib.sh + +ret=0 +GREEN='\033[0;92m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +readonly port=8080 + +readonly vip_v4=207.175.44.110 +readonly cip_v4=10.0.0.2 +readonly gip_v4=10.0.0.1 +readonly dip_v4=172.16.0.1 +readonly rip_v4=172.16.0.2 +readonly sip_v4=10.0.0.3 + +readonly infile="$(mktemp)" +readonly outfile="$(mktemp)" +readonly datalen=32 + +sysipvsnet="/proc/sys/net/ipv4/vs/" +if [ ! -d $sysipvsnet ]; then + if ! modprobe -q ip_vs; then + echo "skip: could not run test without ipvs module" + exit $ksft_skip + fi +fi + +checktool "ipvsadm -v" "run test without ipvsadm" +checktool "socat -h" "run test without socat" + +setup() { + setup_ns ns0 ns1 ns2 + + ip link add veth01 netns "${ns0}" type veth peer name veth10 netns "${ns1}" + ip link add veth02 netns "${ns0}" type veth peer name veth20 netns "${ns2}" + ip link add veth12 netns "${ns1}" type veth peer name veth21 netns "${ns2}" + + ip netns exec "${ns0}" ip link set veth01 up + ip netns exec "${ns0}" ip link set veth02 up + ip netns exec "${ns0}" ip link add br0 type bridge + ip netns exec "${ns0}" ip link set veth01 master br0 + ip netns exec "${ns0}" ip link set veth02 master br0 + ip netns exec "${ns0}" ip link set br0 up + ip netns exec "${ns0}" ip addr add "${cip_v4}/24" dev br0 + + ip netns exec "${ns1}" ip link set veth10 up + ip netns exec "${ns1}" ip addr add "${gip_v4}/24" dev veth10 + ip netns exec "${ns1}" ip link set veth12 up + ip netns exec "${ns1}" ip addr add "${dip_v4}/24" dev veth12 + + ip netns exec "${ns2}" ip link set veth21 up + ip netns exec "${ns2}" ip addr add "${rip_v4}/24" dev veth21 + ip netns exec "${ns2}" ip link set veth20 up + ip netns exec "${ns2}" ip addr add "${sip_v4}/24" dev veth20 + + sleep 1 + + dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none +} + +cleanup() { + cleanup_all_ns + + if [ -f "${outfile}" ]; then + rm "${outfile}" + fi + if [ -f "${infile}" ]; then + rm "${infile}" + fi +} + +server_listen() { + ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT > "${outfile}" & + server_pid=$! + sleep 0.2 +} + +client_connect() { + ip netns exec "${ns0}" timeout 2 socat -u -4 STDIN TCP:"${vip_v4}":"${port}" < "${infile}" +} + +verify_data() { + wait "${server_pid}" + cmp "$infile" "$outfile" 2>/dev/null +} + +test_service() { + server_listen + client_connect + verify_data +} + + +test_dr() { + ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0 + + ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=1 + ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr + ip netns exec "${ns1}" ipvsadm -a -t "${vip_v4}:${port}" -r "${rip_v4}:${port}" + ip netns exec "${ns1}" ip addr add "${vip_v4}/32" dev lo:1 + + # avoid incorrect arp response + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 + # avoid reverse route lookup + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0 + ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 + + test_service +} + +test_nat() { + ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0 + + ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=1 + ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr + ip netns exec "${ns1}" ipvsadm -a -m -t "${vip_v4}:${port}" -r "${rip_v4}:${port}" + ip netns exec "${ns1}" ip addr add "${vip_v4}/32" dev lo:1 + + ip netns exec "${ns2}" ip link del veth20 + ip netns exec "${ns2}" ip route add default via "${dip_v4}" dev veth21 + + test_service +} + +test_tun() { + ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0 + + ip netns exec "${ns1}" modprobe -q ipip + ip netns exec "${ns1}" ip link set tunl0 up + ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0 + ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0 + ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.default.send_redirects=0 + ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr + ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port} + ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1 + + ip netns exec "${ns2}" modprobe -q ipip + ip netns exec "${ns2}" ip link set tunl0 up + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0 + ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 + + test_service +} + +run_tests() { + local errors= + + echo "Testing DR mode..." + cleanup + setup + test_dr + errors=$(( $errors + $? )) + + echo "Testing NAT mode..." + cleanup + setup + test_nat + errors=$(( $errors + $? )) + + echo "Testing Tunnel mode..." + cleanup + setup + test_tun + errors=$(( $errors + $? )) + + return $errors +} + +trap cleanup EXIT + +run_tests + +if [ $? -ne 0 ]; then + echo -e "$(basename $0): ${RED}FAIL${NC}" + exit 1 +fi +echo -e "$(basename $0): ${GREEN}PASS${NC}" +exit 0 diff --git a/tools/testing/selftests/net/netfilter/lib.sh b/tools/testing/selftests/net/netfilter/lib.sh new file mode 100644 index 0000000000..bedd35298e --- /dev/null +++ b/tools/testing/selftests/net/netfilter/lib.sh @@ -0,0 +1,10 @@ +net_netfilter_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "$net_netfilter_dir/../lib.sh" + +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} diff --git a/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh b/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh new file mode 100755 index 0000000000..c6fdd2079f --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +checktool "conntrack --version" "run test without conntrack" +checktool "iptables --version" "run test without iptables" +checktool "ip6tables --version" "run test without ip6tables" + +modprobe -q tun +modprobe -q nf_conntrack +# echo 1 > /proc/sys/net/netfilter/nf_log_all_netns + +PDRILL_TIMEOUT=10 + +files=" +conntrack_ack_loss_stall.pkt +conntrack_inexact_rst.pkt +conntrack_syn_challenge_ack.pkt +conntrack_synack_old.pkt +conntrack_synack_reuse.pkt +conntrack_rst_invalid.pkt +" + +if ! packetdrill --dry_run --verbose "packetdrill/conntrack_ack_loss_stall.pkt";then + echo "SKIP: packetdrill not installed" + exit ${ksft_skip} +fi + +ret=0 + +run_packetdrill() +{ + filename="$1" + ipver="$2" + local mtu=1500 + + export NFCT_IP_VERSION="$ipver" + + if [ "$ipver" = "ipv4" ];then + export xtables="iptables" + elif [ "$ipver" = "ipv6" ];then + export xtables="ip6tables" + mtu=1520 + fi + + timeout "$PDRILL_TIMEOUT" unshare -n packetdrill --ip_version="$ipver" --mtu=$mtu \ + --tolerance_usecs=1000000 --non_fatal packet "$filename" +} + +run_one_test_file() +{ + filename="$1" + + for v in ipv4 ipv6;do + printf "%-50s(%s)%-20s" "$filename" "$v" "" + if run_packetdrill packetdrill/"$f" "$v";then + echo OK + else + echo FAIL + ret=1 + fi + done +} + +echo "Replaying packetdrill test cases:" +for f in $files;do + run_one_test_file packetdrill/"$f" +done + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh new file mode 100755 index 0000000000..1014551dd7 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test NAT source port clash resolution +# + +source lib.sh +ret=0 +socatpid=0 + +cleanup() +{ + [ "$socatpid" -gt 0 ] && kill "$socatpid" + + cleanup_all_ns +} + +checktool "socat -h" "run test without socat" +checktool "iptables --version" "run test without iptables" + +trap cleanup EXIT + +setup_ns ns1 ns2 + +# Connect the namespaces using a veth pair +ip link add name veth2 type veth peer name veth1 +ip link set netns "$ns1" dev veth1 +ip link set netns "$ns2" dev veth2 + +ip netns exec "$ns1" ip link set up dev lo +ip netns exec "$ns1" ip link set up dev veth1 +ip netns exec "$ns1" ip addr add 192.168.1.1/24 dev veth1 + +ip netns exec "$ns2" ip link set up dev lo +ip netns exec "$ns2" ip link set up dev veth2 +ip netns exec "$ns2" ip addr add 192.168.1.2/24 dev veth2 + +# Create a server in one namespace +ip netns exec "$ns1" socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 & +socatpid=$! + +# Restrict source port to just one so we don't have to exhaust +# all others. +ip netns exec "$ns2" sysctl -q net.ipv4.ip_local_port_range="10000 10000" + +# add a virtual IP using DNAT +ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 + +# ... and route it to the other namespace +ip netns exec "$ns2" ip route add 10.96.0.1 via 192.168.1.1 + +# add a persistent connection from the other namespace +ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null & + +sleep 1 + +# ip daddr:dport will be rewritten to 192.168.1.1 5201 +# NAT must reallocate source port 10000 because +# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use +echo test | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null +ret=$? + +# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201). +if [ $ret -eq 0 ]; then + echo "PASS: socat can connect via NAT'd address" +else + echo "FAIL: socat cannot connect via NAT'd address" +fi + +# check sport clashres. +ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201 +ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201 + +sleep 5 | ip netns exec "$ns2" socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null & + +# if connect succeeds, client closes instantly due to EOF on stdin. +# if connect hangs, it will time out after 5s. +echo | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null & +cpid2=$! + +time_then=$(date +%s) +wait $cpid2 +rv=$? +time_now=$(date +%s) + +# Check how much time has elapsed, expectation is for +# 'cpid2' to connect and then exit (and no connect delay). +delta=$((time_now - time_then)) + +if [ $delta -lt 2 ] && [ $rv -eq 0 ]; then + echo "PASS: could connect to service via redirected ports" +else + echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)" + ret=1 +fi + +exit $ret diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/net/netfilter/nf_queue.c index 9e56b9d470..9e56b9d470 100644 --- a/tools/testing/selftests/netfilter/nf-queue.c +++ b/tools/testing/selftests/net/netfilter/nf_queue.c diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/net/netfilter/nft_audit.sh index 99ed5bd6e8..902f8114bc 100755 --- a/tools/testing/selftests/netfilter/nft_audit.sh +++ b/tools/testing/selftests/net/netfilter/nft_audit.sh @@ -6,11 +6,34 @@ SKIP_RC=4 RC=0 +if [ -r /var/run/auditd.pid ];then + read pid < /var/run/auditd.pid + p=$(pgrep ^auditd$) + + if [ "$pid" -eq "$p" ]; then + echo "SKIP: auditd is running" + exit $SKIP_RC + fi +fi + nft --version >/dev/null 2>&1 || { echo "SKIP: missing nft tool" exit $SKIP_RC } +# nft must be recent enough to support "reset" keyword. +nft --check -f /dev/stdin >/dev/null 2>&1 <<EOF +add table t +add chain t c +reset rules t c +EOF + +if [ "$?" -ne 0 ];then + echo -n "SKIP: nft reset feature test failed: " + nft --version + exit $SKIP_RC +fi + # Run everything in a separate network namespace [ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } @@ -73,7 +96,7 @@ done for ((i = 0; i < 500; i++)); do echo "add rule t2 c3 counter accept comment \"rule $i\"" -done >$rulefile +done > "$rulefile" do_test "nft -f $rulefile" \ 'table=t2 family=2 entries=500 op=nft_register_rule' @@ -101,7 +124,7 @@ do_test 'nft add counter t2 c1; add counter t2 c2' \ for ((i = 3; i <= 500; i++)); do echo "add counter t2 c$i" -done >$rulefile +done > "$rulefile" do_test "nft -f $rulefile" \ 'table=t2 family=2 entries=498 op=nft_register_obj' @@ -115,7 +138,7 @@ do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \ for ((i = 3; i <= 500; i++)); do echo "add quota t2 q$i { 10 bytes }" -done >$rulefile +done > "$rulefile" do_test "nft -f $rulefile" \ 'table=t2 family=2 entries=498 op=nft_register_obj' @@ -157,7 +180,7 @@ table=t2 family=2 entries=135 op=nft_reset_rule' # resetting sets and elements -elem=(22 ,80 ,443) +elem=(22 ",80" ",443") relem="" for i in {1..3}; do relem+="${elem[((i - 1))]}" diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index e908009576..6d66240e14 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # nft_concat_range.sh - Tests for sets with concatenation of ranged fields @@ -7,10 +7,10 @@ # # Author: Stefano Brivio <sbrivio@redhat.com> # -# shellcheck disable=SC2154,SC2034,SC2016,SC2030,SC2031 +# shellcheck disable=SC2154,SC2034,SC2016,SC2030,SC2031,SC2317 # ^ Configuration and templates sourced with eval, counters reused in subshells -KSELFTEST_SKIP=4 +source lib.sh # Available test groups: # - reported_issues: check for issues that were reported in the past @@ -19,7 +19,7 @@ KSELFTEST_SKIP=4 # - timeout: check that packets match entries until they expire # - performance: estimate matching rate, compare with rbtree and hash baselines TESTS="reported_issues correctness concurrency timeout" -[ "${quicktest}" != "1" ] && TESTS="${TESTS} performance" +[ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}" # Set types, defined by TYPE_ variables below TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto @@ -31,7 +31,7 @@ BUGS="flush_remove_add reload" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" - ../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh + ../../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh pktgen/pktgen_bench_xmit_mode_netif_receive.sh" # Definition of set types: @@ -66,7 +66,7 @@ src start 1 count 5 src_delta 2000 -tools sendip nc bash +tools sendip bash proto udp race_repeat 3 @@ -91,7 +91,7 @@ src start 1 count 5 src_delta 2000 -tools sendip socat nc bash +tools sendip socat bash proto udp race_repeat 3 @@ -116,7 +116,7 @@ src start 10 count 5 src_delta 2000 -tools sendip socat nc bash +tools sendip socat bash proto udp6 race_repeat 3 @@ -141,7 +141,7 @@ src start 1 count 5 src_delta 2000 -tools sendip socat nc bash +tools sendip socat bash proto udp race_repeat 0 @@ -163,7 +163,7 @@ src mac start 10 count 5 src_delta 2000 -tools sendip socat nc bash +tools sendip socat bash proto udp6 race_repeat 0 @@ -185,7 +185,7 @@ src mac proto start 10 count 5 src_delta 2000 -tools sendip socat nc bash +tools sendip socat bash proto udp6 race_repeat 0 @@ -207,7 +207,7 @@ src addr4 start 1 count 5 src_delta 2000 -tools sendip socat nc bash +tools sendip socat bash proto udp race_repeat 3 @@ -227,7 +227,7 @@ src addr6 port start 10 count 5 src_delta 2000 -tools sendip socat nc +tools sendip socat proto udp6 race_repeat 3 @@ -247,7 +247,7 @@ src mac proto addr4 start 1 count 5 src_delta 2000 -tools sendip socat nc bash +tools sendip socat bash proto udp race_repeat 0 @@ -264,7 +264,7 @@ src mac start 1 count 5 src_delta 2000 -tools sendip socat nc bash +tools sendip socat bash proto udp race_repeat 0 @@ -286,7 +286,7 @@ src mac addr4 start 1 count 5 src_delta 2000 -tools sendip socat nc bash +tools sendip socat bash proto udp race_repeat 0 @@ -337,7 +337,7 @@ src addr4 start 1 count 5 src_delta 2000 -tools sendip socat nc +tools sendip socat proto udp race_repeat 3 @@ -363,7 +363,7 @@ src mac start 1 count 1 src_delta 2000 -tools sendip socat nc bash +tools sendip socat bash proto udp race_repeat 0 @@ -473,8 +473,6 @@ setup_veth() { B() { ip netns exec B "$@" >/dev/null 2>&1 } - - sleep 2 } # Fill in set template and initialise set @@ -488,12 +486,6 @@ check_tools() { __tools= for tool in ${tools}; do - if [ "${tool}" = "nc" ] && [ "${proto}" = "udp6" ] && \ - ! nc -u -w0 1.1.1.1 1 2>/dev/null; then - # Some GNU netcat builds might not support IPv6 - __tools="${__tools} netcat-openbsd" - continue - fi __tools="${__tools} ${tool}" command -v "${tool}" >/dev/null && return 0 @@ -554,30 +546,7 @@ setup_send_udp() { ip addr add "${dst_addr4}" dev veth_a 2>/dev/null [ -z "${dst_port}" ] && dst_port=12345 - echo "test4" | B socat -t 0.01 STDIN UDP4-DATAGRAM:${dst_addr4}:${dst_port}"${__socatbind}" - - src_addr4= - src_port= - } - elif command -v nc >/dev/null; then - if nc -u -w0 1.1.1.1 1 2>/dev/null; then - # OpenBSD netcat - nc_opt="-w0" - else - # GNU netcat - nc_opt="-q0" - fi - - send_udp() { - if [ -n "${src_addr4}" ]; then - B ip addr add "${src_addr4}" dev veth_b - __src_addr4="-s ${src_addr4}" - fi - ip addr add "${dst_addr4}" dev veth_a 2>/dev/null - [ -n "${src_port}" ] && src_port="-p ${src_port}" - - echo "" | B nc -u "${nc_opt}" "${__src_addr4}" \ - "${src_port}" "${dst_addr4}" "${dst_port}" + echo "test4" | B socat -t 0.01 STDIN UDP4-DATAGRAM:"$dst_addr4":"$dst_port""${__socatbind}" src_addr4= src_port= @@ -632,11 +601,7 @@ setup_send_udp6() { __socatbind6= if [ -n "${src_addr6}" ]; then - if [ -n "${src_addr6} != "${src_addr6_added} ]; then - B ip addr add "${src_addr6}" dev veth_b nodad - - src_addr6_added=${src_addr6} - fi + B ip addr add "${src_addr6}" dev veth_b nodad __socatbind6=",bind=[${src_addr6}]" @@ -645,26 +610,7 @@ setup_send_udp6() { fi fi - echo "test6" | B socat -t 0.01 STDIN UDP6-DATAGRAM:[${dst_addr6}]:${dst_port}"${__socatbind6}" - } - elif command -v nc >/dev/null && nc -u -w0 1.1.1.1 1 2>/dev/null; then - # GNU netcat might not work with IPv6, try next tool - send_udp6() { - ip -6 addr add "${dst_addr6}" dev veth_a nodad \ - 2>/dev/null - if [ -n "${src_addr6}" ]; then - B ip addr add "${src_addr6}" dev veth_b nodad - else - src_addr6="2001:db8::2" - fi - [ -n "${src_port}" ] && src_port="-p ${src_port}" - - # shellcheck disable=SC2086 # this needs split options - echo "" | B nc -u w0 "-s${src_addr6}" ${src_port} \ - ${dst_addr6} ${dst_port} - - src_addr6= - src_port= + echo "test6" | B socat -t 0.01 STDIN UDP6-DATAGRAM:["$dst_addr6"]:"$dst_port""${__socatbind6}" } elif [ -z "$(bash -c 'type -p')" ]; then send_udp6() { @@ -679,10 +625,17 @@ setup_send_udp6() { fi } +listener_ready() +{ + port="$1" + ss -lnt -o "sport = :$port" | grep -q "$port" +} + # Set up function to send TCP traffic on IPv4 setup_flood_tcp() { if command -v iperf3 >/dev/null; then flood_tcp() { + local n_port="${dst_port}" [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" if [ -n "${src_addr4}" ]; then B ip addr add "${src_addr4}/16" dev veth_b @@ -699,7 +652,7 @@ setup_flood_tcp() { # shellcheck disable=SC2086 # this needs split options iperf3 -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1 - sleep 2 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" # shellcheck disable=SC2086 # this needs split options B iperf3 -c "${dst_addr4}" ${dst_port} ${src_port} \ @@ -711,6 +664,7 @@ setup_flood_tcp() { } elif command -v iperf >/dev/null; then flood_tcp() { + local n_port="${dst_port}" [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" if [ -n "${src_addr4}" ]; then B ip addr add "${src_addr4}/16" dev veth_b @@ -727,7 +681,7 @@ setup_flood_tcp() { # shellcheck disable=SC2086 # this needs split options iperf -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1 - sleep 2 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" # shellcheck disable=SC2086 # this needs split options B iperf -c "${dst_addr4}" ${dst_port} ${src_addr4} \ @@ -739,6 +693,7 @@ setup_flood_tcp() { } elif command -v netperf >/dev/null; then flood_tcp() { + local n_port="${dst_port}" [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" if [ -n "${src_addr4}" ]; then B ip addr add "${src_addr4}/16" dev veth_b @@ -755,7 +710,7 @@ setup_flood_tcp() { # shellcheck disable=SC2086 # this needs split options netserver -4 ${dst_port} -L "${dst_addr4}" \ >/dev/null 2>&1 - sleep 2 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "${n_port}" # shellcheck disable=SC2086 # this needs split options B netperf -4 -H "${dst_addr4}" ${dst_port} \ @@ -774,6 +729,7 @@ setup_flood_tcp() { setup_flood_tcp6() { if command -v iperf3 >/dev/null; then flood_tcp6() { + local n_port="${dst_port}" [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" if [ -n "${src_addr6}" ]; then B ip addr add "${src_addr6}" dev veth_b nodad @@ -790,7 +746,7 @@ setup_flood_tcp6() { # shellcheck disable=SC2086 # this needs split options iperf3 -s -DB "${dst_addr6}" ${dst_port} >/dev/null 2>&1 - sleep 2 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "${n_port}" # shellcheck disable=SC2086 # this needs split options B iperf3 -c "${dst_addr6}" ${dst_port} \ @@ -802,6 +758,7 @@ setup_flood_tcp6() { } elif command -v iperf >/dev/null; then flood_tcp6() { + local n_port="${dst_port}" [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" if [ -n "${src_addr6}" ]; then B ip addr add "${src_addr6}" dev veth_b nodad @@ -818,7 +775,7 @@ setup_flood_tcp6() { # shellcheck disable=SC2086 # this needs split options iperf -s -VDB "${dst_addr6}" ${dst_port} >/dev/null 2>&1 - sleep 2 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" # shellcheck disable=SC2086 # this needs split options B iperf -c "${dst_addr6}" -V ${dst_port} \ @@ -830,6 +787,7 @@ setup_flood_tcp6() { } elif command -v netperf >/dev/null; then flood_tcp6() { + local n_port="${dst_port}" [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" if [ -n "${src_addr6}" ]; then B ip addr add "${src_addr6}" dev veth_b nodad @@ -846,7 +804,7 @@ setup_flood_tcp6() { # shellcheck disable=SC2086 # this needs split options netserver -6 ${dst_port} -L "${dst_addr6}" \ >/dev/null 2>&1 - sleep 2 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" # shellcheck disable=SC2086 # this needs split options B netperf -6 -H "${dst_addr6}" ${dst_port} \ @@ -865,6 +823,7 @@ setup_flood_tcp6() { setup_flood_udp() { if command -v iperf3 >/dev/null; then flood_udp() { + local n_port="${dst_port}" [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" if [ -n "${src_addr4}" ]; then B ip addr add "${src_addr4}/16" dev veth_b @@ -881,7 +840,7 @@ setup_flood_udp() { # shellcheck disable=SC2086 # this needs split options iperf3 -s -DB "${dst_addr4}" ${dst_port} - sleep 2 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" # shellcheck disable=SC2086 # this needs split options B iperf3 -u -c "${dst_addr4}" -Z -b 100M -l16 -t1000 \ @@ -893,6 +852,7 @@ setup_flood_udp() { } elif command -v iperf >/dev/null; then flood_udp() { + local n_port="${dst_port}" [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" if [ -n "${src_addr4}" ]; then B ip addr add "${src_addr4}/16" dev veth_b @@ -909,7 +869,7 @@ setup_flood_udp() { # shellcheck disable=SC2086 # this needs split options iperf -u -sDB "${dst_addr4}" ${dst_port} >/dev/null 2>&1 - sleep 2 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" # shellcheck disable=SC2086 # this needs split options B iperf -u -c "${dst_addr4}" -b 100M -l1 -t1000 \ @@ -921,6 +881,7 @@ setup_flood_udp() { } elif command -v netperf >/dev/null; then flood_udp() { + local n_port="${dst_port}" [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" if [ -n "${src_addr4}" ]; then B ip addr add "${src_addr4}/16" dev veth_b @@ -937,7 +898,7 @@ setup_flood_udp() { # shellcheck disable=SC2086 # this needs split options netserver -4 ${dst_port} -L "${dst_addr4}" \ >/dev/null 2>&1 - sleep 2 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" # shellcheck disable=SC2086 # this needs split options B netperf -4 -H "${dst_addr4}" ${dst_port} \ @@ -982,6 +943,7 @@ cleanup() { ip link del dummy0 2>/dev/null ip route del default 2>/dev/null ip -6 route del default 2>/dev/null + ip netns pids B 2>/dev/null | xargs kill 2>/dev/null ip netns del B 2>/dev/null ip link del veth_a 2>/dev/null timeout= @@ -989,15 +951,18 @@ cleanup() { killall iperf 2>/dev/null killall netperf 2>/dev/null killall netserver 2>/dev/null - rm -f ${tmp} - sleep 2 +} + +cleanup_exit() { + cleanup + rm -f "$tmp" } # Entry point for setup functions setup() { if [ "$(id -u)" -ne 0 ]; then echo " need to run as root" - exit ${KSELFTEST_SKIP} + exit ${ksft_skip} fi cleanup @@ -1258,7 +1223,7 @@ send_nomatch() { # - check that packets outside range don't match it # - remove some elements, check that packets don't match anymore test_correctness() { - setup veth send_"${proto}" set || return ${KSELFTEST_SKIP} + setup veth send_"${proto}" set || return ${ksft_skip} range_size=1 for i in $(seq "${start}" $((start + count))); do @@ -1273,7 +1238,7 @@ test_correctness() { srcend=$((end + src_delta)) add "$(format)" || return 1 - for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do send_match "${j}" $((j + src_delta)) || return 1 done send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1 @@ -1281,7 +1246,7 @@ test_correctness() { # Delete elements now and then if [ $((i % 3)) -eq 0 ]; then del "$(format)" || return 1 - for j in $(seq ${start} \ + for j in $(seq "$start" \ $((range_size / 2 + 1)) ${end}); do send_nomatch "${j}" $((j + src_delta)) \ || return 1 @@ -1307,12 +1272,12 @@ test_concurrency() { proto=${flood_proto} tools=${flood_tools} chain_spec=${flood_spec} - setup veth flood_"${proto}" set || return ${KSELFTEST_SKIP} + setup veth flood_"${proto}" set || return ${ksft_skip} range_size=1 cstart=${start} flood_pids= - for i in $(seq ${start} $((start + count))); do + for i in $(seq "$start" $((start + count))); do end=$((start + range_size)) srcstart=$((start + src_delta)) srcend=$((end + src_delta)) @@ -1325,7 +1290,7 @@ test_concurrency() { start=$((end + range_size)) done - sleep 10 + sleep $((RANDOM%10)) pids= for c in $(seq 1 "$(nproc)"); do ( @@ -1335,7 +1300,7 @@ test_concurrency() { # $start needs to be local to this subshell # shellcheck disable=SC2030 start=${cstart} - for i in $(seq ${start} $((start + count))); do + for i in $(seq "$start" $((start + count))); do end=$((start + range_size)) srcstart=$((start + src_delta)) srcend=$((end + src_delta)) @@ -1350,7 +1315,7 @@ test_concurrency() { range_size=1 start=${cstart} - for i in $(seq ${start} $((start + count))); do + for i in $(seq "$start" $((start + count))); do end=$((start + range_size)) srcstart=$((start + src_delta)) srcend=$((end + src_delta)) @@ -1366,7 +1331,7 @@ test_concurrency() { range_size=1 start=${cstart} - for i in $(seq ${start} $((start + count))); do + for i in $(seq "$start" $((start + count))); do end=$((start + range_size)) srcstart=$((start + src_delta)) srcend=$((end + src_delta)) @@ -1379,7 +1344,7 @@ test_concurrency() { range_size=1 start=${cstart} - for i in $(seq ${start} $((start + count))); do + for i in $(seq "$start" $((start + count))); do end=$((start + range_size)) srcstart=$((start + src_delta)) srcend=$((end + src_delta)) @@ -1407,31 +1372,34 @@ test_concurrency() { # - add all the elements with 3s timeout while checking that packets match # - wait 3s after the last insertion, check that packets don't match any entry test_timeout() { - setup veth send_"${proto}" set || return ${KSELFTEST_SKIP} + setup veth send_"${proto}" set || return ${ksft_skip} timeout=3 + + [ "$KSFT_MACHINE_SLOW" = "yes" ] && timeout=8 + range_size=1 - for i in $(seq "${start}" $((start + count))); do + for i in $(seq "$start" $((start + count))); do end=$((start + range_size)) srcstart=$((start + src_delta)) srcend=$((end + src_delta)) add "$(format)" || return 1 - for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do send_match "${j}" $((j + src_delta)) || return 1 done range_size=$((range_size + 1)) start=$((end + range_size)) done - sleep 3 - for i in $(seq ${start} $((start + count))); do + sleep $timeout + for i in $(seq "$start" $((start + count))); do end=$((start + range_size)) srcstart=$((start + src_delta)) srcend=$((end + src_delta)) - for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do send_nomatch "${j}" $((j + src_delta)) || return 1 done @@ -1450,13 +1418,13 @@ test_performance() { chain_spec=${perf_spec} dst="${perf_dst}" src="${perf_src}" - setup veth perf set || return ${KSELFTEST_SKIP} + setup veth perf set || return ${ksft_skip} first=${start} range_size=1 for set in test norange noconcat; do start=${first} - for i in $(seq ${start} $((start + perf_entries))); do + for i in $(seq "$start" $((start + perf_entries))); do end=$((start + range_size)) srcstart=$((start + src_delta)) srcend=$((end + src_delta)) @@ -1464,7 +1432,7 @@ test_performance() { if [ $((end / 65534)) -gt $((start / 65534)) ]; then start=${end} end=$((end + 1)) - elif [ ${start} -eq ${end} ]; then + elif [ "$start" -eq "$end" ]; then end=$((start + 1)) fi @@ -1475,7 +1443,7 @@ test_performance() { nft -f "${tmp}" done - perf $((end - 1)) ${srcstart} + perf $((end - 1)) "$srcstart" sleep 2 @@ -1519,14 +1487,17 @@ test_performance() { } test_bug_flush_remove_add() { + rounds=100 + [ "$KSFT_MACHINE_SLOW" = "yes" ] && rounds=10 + set_cmd='{ set s { type ipv4_addr . inet_service; flags interval; }; }' elem1='{ 10.0.0.1 . 22-25, 10.0.0.1 . 10-20 }' elem2='{ 10.0.0.1 . 10-20, 10.0.0.1 . 22-25 }' - for i in `seq 1 100`; do - nft add table t ${set_cmd} || return ${KSELFTEST_SKIP} - nft add element t s ${elem1} 2>/dev/null || return 1 + for i in $(seq 1 $rounds); do + nft add table t "$set_cmd" || return ${ksft_skip} + nft add element t s "$elem1" 2>/dev/null || return 1 nft flush set t s 2>/dev/null || return 1 - nft add element t s ${elem2} 2>/dev/null || return 1 + nft add element t s "$elem2" 2>/dev/null || return 1 done nft flush ruleset } @@ -1534,7 +1505,7 @@ test_bug_flush_remove_add() { # - add ranged element, check that packets match it # - reload the set, check packets still match test_bug_reload() { - setup veth send_"${proto}" set || return ${KSELFTEST_SKIP} + setup veth send_"${proto}" set || return ${ksft_skip} rstart=${start} range_size=1 @@ -1573,7 +1544,7 @@ test_bug_reload() { srcstart=$((start + src_delta)) srcend=$((end + src_delta)) - for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do send_match "${j}" $((j + src_delta)) || return 1 done @@ -1591,12 +1562,12 @@ test_reported_issues() { # Run everything in a separate network namespace [ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } tmp="$(mktemp)" -trap cleanup EXIT +trap cleanup_exit EXIT # Entry point for test runs passed=0 for name in ${TESTS}; do - printf "TEST: %s\n" "$(echo ${name} | tr '_' ' ')" + printf "TEST: %s\n" "$(echo "$name" | tr '_' ' ')" if [ "${name}" = "reported_issues" ]; then SUBTESTS="${BUGS}" else @@ -1623,10 +1594,16 @@ for name in ${TESTS}; do continue fi - printf " %-60s " "${display}" + [ "$KSFT_MACHINE_SLOW" = "yes" ] && count=1 + + printf " %-32s " "${display}" + tthen=$(date +%s) eval test_"${name}" ret=$? + tnow=$(date +%s) + printf "%5ds%-30s" $((tnow-tthen)) + if [ $ret -eq 0 ]; then printf "[ OK ]\n" info_flush @@ -1635,11 +1612,11 @@ for name in ${TESTS}; do printf "[FAIL]\n" err_flush exit 1 - elif [ $ret -eq ${KSELFTEST_SKIP} ]; then + elif [ $ret -eq ${ksft_skip} ]; then printf "[SKIP]\n" err_flush fi done done -[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} || exit 0 +[ ${passed} -eq 0 ] && exit ${ksft_skip} || exit 0 diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh new file mode 100755 index 0000000000..5d276995a5 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# + +source lib.sh + +[ "$KSFT_MACHINE_SLOW" = yes ] && exit ${ksft_skip} + +NFT_CONCAT_RANGE_TESTS="performance" exec ./nft_concat_range.sh diff --git a/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh new file mode 100755 index 0000000000..abcaa73371 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh @@ -0,0 +1,171 @@ +#!/bin/bash +# +# This tests connection tracking helper assignment: +# 1. can attach ftp helper to a connection from nft ruleset. +# 2. auto-assign still works. +# +# Kselftest framework requirement - SKIP code is 4. + +source lib.sh + +ret=0 + +testipv6=1 + +checktool "socat -h" "run test without socat" +checktool "conntrack --version" "run test without conntrack" +checktool "nft --version" "run test without nft" + +cleanup() +{ + ip netns pids "$ns1" | xargs kill 2>/dev/null + + ip netns del "$ns1" + ip netns del "$ns2" +} + +trap cleanup EXIT + +setup_ns ns1 ns2 + +if ! ip link add veth0 netns "$ns1" type veth peer name veth0 netns "$ns2" > /dev/null 2>&1;then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi + +ip -net "$ns1" link set veth0 up +ip -net "$ns2" link set veth0 up + +ip -net "$ns1" addr add 10.0.1.1/24 dev veth0 +ip -net "$ns1" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$ns2" addr add 10.0.1.2/24 dev veth0 +ip -net "$ns2" addr add dead:1::2/64 dev veth0 nodad + +load_ruleset_family() { + local family=$1 + local ns=$2 + +ip netns exec "$ns" nft -f - <<EOF +table $family raw { + ct helper ftp { + type "ftp" protocol tcp + } + chain pre { + type filter hook prerouting priority 0; policy accept; + tcp dport 2121 ct helper set "ftp" + } + chain output { + type filter hook output priority 0; policy accept; + tcp dport 2121 ct helper set "ftp" + } +} +EOF + return $? +} + +check_for_helper() +{ + local netns=$1 + local message=$2 + local port=$3 + + if echo "$message" |grep -q 'ipv6';then + local family="ipv6" + else + local family="ipv4" + fi + + if ! ip netns exec "$netns" conntrack -L -f $family -p tcp --dport "$port" 2> /dev/null |grep -q 'helper=ftp';then + if [ "$autoassign" -eq 0 ] ;then + echo "FAIL: ${netns} did not show attached helper $message" 1>&2 + ret=1 + else + echo "PASS: ${netns} did not show attached helper $message" 1>&2 + fi + else + if [ "$autoassign" -eq 0 ] ;then + echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2 + else + echo "FAIL: ${netns} connection on port $port has ftp helper attached" 1>&2 + ret=1 + fi + fi + + return 0 +} + +listener_ready() +{ + ns="$1" + port="$2" + proto="$3" + ss -N "$ns" -lnt -o "sport = :$port" | grep -q "$port" +} + +test_helper() +{ + local port=$1 + local autoassign=$2 + + if [ "$autoassign" -eq 0 ] ;then + msg="set via ruleset" + else + msg="auto-assign" + fi + + ip netns exec "$ns2" socat -t 3 -u -4 TCP-LISTEN:"$port",reuseaddr STDOUT > /dev/null & + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" "$port" "-4" + + ip netns exec "$ns1" socat -u -4 STDIN TCP:10.0.1.2:"$port" < /dev/null > /dev/null + + check_for_helper "$ns1" "ip $msg" "$port" "$autoassign" + check_for_helper "$ns2" "ip $msg" "$port" "$autoassign" + + if [ $testipv6 -eq 0 ] ;then + return 0 + fi + + ip netns exec "$ns1" conntrack -F 2> /dev/null + ip netns exec "$ns2" conntrack -F 2> /dev/null + + ip netns exec "$ns2" socat -t 3 -u -6 TCP-LISTEN:"$port",reuseaddr STDOUT > /dev/null & + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns2" "$port" "-6" + + ip netns exec "$ns1" socat -t 3 -u -6 STDIN TCP:"[dead:1::2]":"$port" < /dev/null > /dev/null + + check_for_helper "$ns1" "ipv6 $msg" "$port" + check_for_helper "$ns2" "ipv6 $msg" "$port" +} + +if ! load_ruleset_family ip "$ns1"; then + echo "FAIL: ${ns1} cannot load ip ruleset" 1>&2 + exit 1 +fi + +if ! load_ruleset_family ip6 "$ns1"; then + echo "SKIP: ${ns1} cannot load ip6 ruleset" 1>&2 + testipv6=0 +fi + +if ! load_ruleset_family inet "${ns2}"; then + echo "SKIP: ${ns1} cannot load inet ruleset" 1>&2 + if ! load_ruleset_family ip "${ns2}"; then + echo "FAIL: ${ns2} cannot load ip ruleset" 1>&2 + exit 1 + fi + + if [ "$testipv6" -eq 1 ] ;then + if ! load_ruleset_family ip6 "$ns2"; then + echo "FAIL: ${ns2} cannot load ip6 ruleset" 1>&2 + exit 1 + fi + fi +fi + +test_helper 2121 0 +ip netns exec "$ns1" sysctl -qe 'net.netfilter.nf_conntrack_helper=1' +ip netns exec "$ns2" sysctl -qe 'net.netfilter.nf_conntrack_helper=1' +test_helper 21 1 + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh new file mode 100755 index 0000000000..ce1451c275 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_fib.sh @@ -0,0 +1,234 @@ +#!/bin/bash +# +# This tests the fib expression. +# +# Kselftest framework requirement - SKIP code is 4. + +source lib.sh + +ret=0 + +timeout=4 + +log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) + +cleanup() +{ + cleanup_all_ns + + [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns +} + +checktool "nft --version" "run test without nft" + +setup_ns nsrouter ns1 ns2 + +trap cleanup EXIT + +if dmesg | grep -q ' nft_rpfilter: ';then + dmesg -c | grep ' nft_rpfilter: ' + echo "WARN: a previous test run has failed" 1>&2 +fi + +sysctl -q net.netfilter.nf_log_all_netns=1 + +load_ruleset() { + local netns=$1 + +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority 0; policy accept; + fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop + } +} +EOF +} + +load_pbr_ruleset() { + local netns=$1 + +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table inet filter { + chain forward { + type filter hook forward priority raw; + fib saddr . iif oif gt 0 accept + log drop + } +} +EOF +} + +load_ruleset_count() { + local netns=$1 + +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority 0; policy accept; + ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop + ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop + } +} +EOF +} + +check_drops() { + if dmesg | grep -q ' nft_rpfilter: ';then + dmesg | grep ' nft_rpfilter: ' + echo "FAIL: rpfilter did drop packets" + return 1 + fi + + return 0 +} + +check_fib_counter() { + local want=$1 + local ns=$2 + local address=$3 + + if ! ip netns exec "$ns" nft list table inet filter | grep 'fib saddr . iif' | grep "$address" | grep -q "packets $want";then + echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2 + ip netns exec "$ns" nft list table inet filter + return 1 + fi + + if [ "$want" -gt 0 ]; then + echo "PASS: fib expression did drop packets for $address" + fi + + return 0 +} + +load_ruleset "$nsrouter" +load_ruleset "$ns1" +load_ruleset "$ns2" + +if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2" + +ip -net "$nsrouter" link set veth0 up +ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$nsrouter" link set veth1 up +ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1 +ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad + +ip -net "$ns1" link set eth0 up +ip -net "$ns2" link set eth0 up + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" route add default via dead:1::1 + +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns2" route add default via dead:2::1 + +test_ping() { + local daddr4=$1 + local daddr6=$2 + + if ! ip netns exec "$ns1" ping -c 1 -q "$daddr4" > /dev/null; then + check_drops + echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2 + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q "$daddr6" > /dev/null; then + check_drops + echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2 + return 1 + fi + + return 0 +} + +ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null + +test_ping 10.0.2.1 dead:2::1 || exit 1 +check_drops || exit 1 + +test_ping 10.0.2.99 dead:2::99 || exit 1 +check_drops || exit 1 + +echo "PASS: fib expression did not cause unwanted packet drops" + +ip netns exec "$nsrouter" nft flush table inet filter + +ip -net "$ns1" route del default +ip -net "$ns1" -6 route del default + +ip -net "$ns1" addr del 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr del dead:1::99/64 dev eth0 + +ip -net "$ns1" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns1" addr add dead:2::99/64 dev eth0 nodad + +ip -net "$ns1" route add default via 10.0.2.1 +ip -net "$ns1" -6 route add default via dead:2::1 + +ip -net "$nsrouter" addr add dead:2::1/64 dev veth0 nodad + +# switch to ruleset that doesn't log, this time +# its expected that this does drop the packets. +load_ruleset_count "$nsrouter" + +# ns1 has a default route, but nsrouter does not. +# must not check return value, ping to 1.1.1.1 will +# fail. +check_fib_counter 0 "$nsrouter" 1.1.1.1 || exit 1 +check_fib_counter 0 "$nsrouter" 1c3::c01d || exit 1 + +ip netns exec "$ns1" ping -W 0.5 -c 1 -q 1.1.1.1 > /dev/null +check_fib_counter 1 "$nsrouter" 1.1.1.1 || exit 1 + +ip netns exec "$ns1" ping -W 0.5 -i 0.1 -c 3 -q 1c3::c01d > /dev/null +check_fib_counter 3 "$nsrouter" 1c3::c01d || exit 1 + +# delete all rules +ip netns exec "$ns1" nft flush ruleset +ip netns exec "$ns2" nft flush ruleset +ip netns exec "$nsrouter" nft flush ruleset + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad + +ip -net "$ns1" addr del 10.0.2.99/24 dev eth0 +ip -net "$ns1" addr del dead:2::99/64 dev eth0 + +ip -net "$nsrouter" addr del dead:2::1/64 dev veth0 + +# ... pbr ruleset for the router, check iif+oif. +if ! load_pbr_ruleset "$nsrouter";then + echo "SKIP: Could not load fib forward ruleset" + exit $ksft_skip +fi + +ip -net "$nsrouter" rule add from all table 128 +ip -net "$nsrouter" rule add from all iif veth0 table 129 +ip -net "$nsrouter" route add table 128 to 10.0.1.0/24 dev veth0 +ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1 + +# drop main ipv4 table +ip -net "$nsrouter" -4 rule delete table main + +if ! test_ping 10.0.2.99 dead:2::99;then + ip -net "$nsrouter" nft list ruleset + echo "FAIL: fib mismatch in pbr setup" + exit 1 +fi + +echo "PASS: fib expression forward check with policy based routing" +exit 0 diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh index a32f490f75..b399555085 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -14,15 +14,10 @@ # nft_flowtable.sh -o8000 -l1500 -r2000 # -sfx=$(mktemp -u "XXXXXXXX") -ns1="ns1-$sfx" -ns2="ns2-$sfx" -nsr1="nsr1-$sfx" -nsr2="nsr2-$sfx" - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh + ret=0 +SOCAT_TIMEOUT=60 nsin="" ns1out="" @@ -30,52 +25,41 @@ ns2out="" log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) -checktool (){ - if ! $1 > /dev/null 2>&1; then - echo "SKIP: Could not $2" - exit $ksft_skip - fi -} - checktool "nft --version" "run test without nft tool" -checktool "ip -Version" "run test without ip tool" -checktool "which nc" "run test without nc (netcat)" -checktool "ip netns add $nsr1" "create net namespace $nsr1" +checktool "socat -h" "run test without socat" -ip netns add $ns1 -ip netns add $ns2 -ip netns add $nsr2 +setup_ns ns1 ns2 nsr1 nsr2 cleanup() { - ip netns del $ns1 - ip netns del $ns2 - ip netns del $nsr1 - ip netns del $nsr2 + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + + cleanup_all_ns rm -f "$nsin" "$ns1out" "$ns2out" - [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns + [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns" } trap cleanup EXIT sysctl -q net.netfilter.nf_log_all_netns=1 -ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1 -ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2 +ip link add veth0 netns "$nsr1" type veth peer name eth0 netns "$ns1" +ip link add veth1 netns "$nsr1" type veth peer name veth0 netns "$nsr2" -ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2 +ip link add veth1 netns "$nsr2" type veth peer name eth0 netns "$ns2" -for dev in lo veth0 veth1; do - ip -net $nsr1 link set $dev up - ip -net $nsr2 link set $dev up +for dev in veth0 veth1; do + ip -net "$nsr1" link set "$dev" up + ip -net "$nsr2" link set "$dev" up done -ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 -ip -net $nsr1 addr add dead:1::1/64 dev veth0 +ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad -ip -net $nsr2 addr add 10.0.2.1/24 dev veth1 -ip -net $nsr2 addr add dead:2::1/64 dev veth1 +ip -net "$nsr2" addr add 10.0.2.1/24 dev veth1 +ip -net "$nsr2" addr add dead:2::1/64 dev veth1 nodad # set different MTUs so we need to push packets coming from ns1 (large MTU) # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), @@ -107,56 +91,63 @@ do esac done -if ! ip -net $nsr1 link set veth0 mtu $omtu; then +if ! ip -net "$nsr1" link set veth0 mtu "$omtu"; then + exit 1 +fi + +ip -net "$ns1" link set eth0 mtu "$omtu" + +if ! ip -net "$nsr2" link set veth1 mtu "$rmtu"; then exit 1 fi -ip -net $ns1 link set eth0 mtu $omtu +if ! ip -net "$nsr1" link set veth1 mtu "$lmtu"; then + exit 1 +fi -if ! ip -net $nsr2 link set veth1 mtu $rmtu; then +if ! ip -net "$nsr2" link set veth0 mtu "$lmtu"; then exit 1 fi -ip -net $ns2 link set eth0 mtu $rmtu +ip -net "$ns2" link set eth0 mtu "$rmtu" # transfer-net between nsr1 and nsr2. # these addresses are not used for connections. -ip -net $nsr1 addr add 192.168.10.1/24 dev veth1 -ip -net $nsr1 addr add fee1:2::1/64 dev veth1 +ip -net "$nsr1" addr add 192.168.10.1/24 dev veth1 +ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad -ip -net $nsr2 addr add 192.168.10.2/24 dev veth0 -ip -net $nsr2 addr add fee1:2::2/64 dev veth0 +ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0 +ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad for i in 0 1; do - ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null - ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null + ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null + ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null done -for ns in $ns1 $ns2;do - ip -net $ns link set lo up - ip -net $ns link set eth0 up +for ns in "$ns1" "$ns2";do + ip -net "$ns" link set eth0 up - if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then + if ! ip netns exec "$ns" sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then echo "ERROR: Check Originator/Responder values (problem during address addition)" exit 1 fi # don't set ip DF bit for first two tests - ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null + ip netns exec "$ns" sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null done -ip -net $ns1 addr add 10.0.1.99/24 dev eth0 -ip -net $ns2 addr add 10.0.2.99/24 dev eth0 -ip -net $ns1 route add default via 10.0.1.1 -ip -net $ns2 route add default via 10.0.2.1 -ip -net $ns1 addr add dead:1::99/64 dev eth0 -ip -net $ns2 addr add dead:2::99/64 dev eth0 -ip -net $ns1 route add default via dead:1::1 -ip -net $ns2 route add default via dead:2::1 +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad +ip -net "$ns1" route add default via dead:1::1 +ip -net "$ns2" route add default via dead:2::1 -ip -net $nsr1 route add default via 192.168.10.2 -ip -net $nsr2 route add default via 192.168.10.1 +ip -net "$nsr1" route add default via 192.168.10.2 +ip -net "$nsr2" route add default via 192.168.10.1 -ip netns exec $nsr1 nft -f - <<EOF +ip netns exec "$nsr1" nft -f - <<EOF table inet filter { flowtable f1 { hook ingress priority 0 @@ -188,7 +179,7 @@ if [ $? -ne 0 ]; then exit $ksft_skip fi -ip netns exec $ns2 nft -f - <<EOF +ip netns exec "$ns2" nft -f - <<EOF table inet filter { counter ip4dscp0 { } counter ip4dscp3 { } @@ -204,25 +195,22 @@ table inet filter { EOF if [ $? -ne 0 ]; then - echo "SKIP: Could not load nft ruleset" + echo -n "SKIP: Could not load ruleset: " + nft --version exit $ksft_skip fi # test basic connectivity -if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then +if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then echo "ERROR: $ns1 cannot reach ns2" 1>&2 exit 1 fi -if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then +if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then echo "ERROR: $ns2 cannot reach $ns1" 1>&2 exit 1 fi -if [ $ret -eq 0 ];then - echo "PASS: netns routing/connectivity: $ns1 can reach $ns2" -fi - nsin=$(mktemp) ns1out=$(mktemp) ns2out=$(mktemp) @@ -248,23 +236,27 @@ check_counters() local what=$1 local ok=1 - local orig=$(ip netns exec $nsr1 nft reset counter inet filter routed_orig | grep packets) - local repl=$(ip netns exec $nsr1 nft reset counter inet filter routed_repl | grep packets) + local orig repl + orig=$(ip netns exec "$nsr1" nft reset counter inet filter routed_orig | grep packets) + repl=$(ip netns exec "$nsr1" nft reset counter inet filter routed_repl | grep packets) local orig_cnt=${orig#*bytes} local repl_cnt=${repl#*bytes} - local fs=$(du -sb $nsin) + local fs + fs=$(du -sb "$nsin") local max_orig=${fs%%/*} local max_repl=$((max_orig/4)) - if [ $orig_cnt -gt $max_orig ];then + # flowtable fastpath should bypass normal routing one, i.e. the counters in forward hook + # should always be lower than the size of the transmitted file (max_orig). + if [ "$orig_cnt" -gt "$max_orig" ];then echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2 ret=1 ok=0 fi - if [ $repl_cnt -gt $max_repl ];then + if [ "$repl_cnt" -gt $max_repl ];then echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2 ret=1 ok=0 @@ -280,39 +272,40 @@ check_dscp() local what=$1 local ok=1 - local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp3 | grep packets) + local counter + counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp3 | grep packets) local pc4=${counter%*bytes*} local pc4=${pc4#*packets} - local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp0 | grep packets) + counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp0 | grep packets) local pc4z=${counter%*bytes*} local pc4z=${pc4z#*packets} case "$what" in "dscp_none") - if [ $pc4 -gt 0 ] || [ $pc4z -eq 0 ]; then + if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_fwd") - if [ $pc4 -eq 0 ] || [ $pc4z -eq 0 ]; then + if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_ingress") - if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then + if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_egress") - if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then + if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 @@ -324,7 +317,7 @@ check_dscp() ok=0 esac - if [ $ok -eq 1 ] ;then + if [ "$ok" -eq 1 ] ;then echo "PASS: $what: dscp packet counters match" fi } @@ -345,6 +338,11 @@ check_transfer() return 0 } +listener_ready() +{ + ss -N "$nsb" -lnt -o "sport = :12345" | grep -q 12345 +} + test_tcp_forwarding_ip() { local nsa=$1 @@ -353,40 +351,23 @@ test_tcp_forwarding_ip() local dstport=$4 local lret=0 - ip netns exec $nsb nc -w 5 -l -p 12345 < "$nsin" > "$ns2out" & + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" & lpid=$! - sleep 1 - ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$nsin" > "$ns1out" & - cpid=$! - - sleep 1 - - prev="$(ls -l $ns1out $ns2out)" - sleep 1 - - while [[ "$prev" != "$(ls -l $ns1out $ns2out)" ]]; do - sleep 1; - prev="$(ls -l $ns1out $ns2out)" - done + busywait 1000 listener_ready - if test -d /proc/"$lpid"/; then - kill $lpid - fi - - if test -d /proc/"$cpid"/; then - kill $cpid - fi + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out" wait $lpid - wait $cpid if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then lret=1 + ret=1 fi if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then lret=1 + ret=1 fi return $lret @@ -403,7 +384,7 @@ test_tcp_forwarding_set_dscp() { check_dscp "dscp_none" -ip netns exec $nsr1 nft -f - <<EOF +ip netns exec "$nsr1" nft -f - <<EOF table netdev dscpmangle { chain setdscp0 { type filter hook ingress device "veth0" priority 0; policy accept @@ -415,12 +396,12 @@ if [ $? -eq 0 ]; then test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 check_dscp "dscp_ingress" - ip netns exec $nsr1 nft delete table netdev dscpmangle + ip netns exec "$nsr1" nft delete table netdev dscpmangle else echo "SKIP: Could not load netdev:ingress for veth0" fi -ip netns exec $nsr1 nft -f - <<EOF +ip netns exec "$nsr1" nft -f - <<EOF table netdev dscpmangle { chain setdscp0 { type filter hook egress device "veth1" priority 0; policy accept @@ -432,14 +413,14 @@ if [ $? -eq 0 ]; then test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 check_dscp "dscp_egress" - ip netns exec $nsr1 nft flush table netdev dscpmangle + ip netns exec "$nsr1" nft flush table netdev dscpmangle else echo "SKIP: Could not load netdev:egress for veth1" fi # partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3 # counters should have seen packets (before and after ft offload kicks in). - ip netns exec $nsr1 nft -a insert rule inet filter forward ip dscp set cs3 + ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 check_dscp "dscp_fwd" } @@ -455,8 +436,8 @@ test_tcp_forwarding_nat() pmtu=$3 what=$4 - if [ $lret -eq 0 ] ; then - if [ $pmtu -eq 1 ] ;then + if [ "$lret" -eq 0 ] ; then + if [ "$pmtu" -eq 1 ] ;then check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what" else echo "PASS: flow offload for ns1/ns2 with masquerade $what" @@ -464,9 +445,9 @@ test_tcp_forwarding_nat() test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666 lret=$? - if [ $pmtu -eq 1 ] ;then + if [ "$pmtu" -eq 1 ] ;then check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what" - elif [ $lret -eq 0 ] ; then + elif [ "$lret" -eq 0 ] ; then echo "PASS: flow offload for ns1/ns2 with dnat $what" fi fi @@ -481,25 +462,25 @@ make_file "$nsin" # Due to MTU mismatch in both directions, all packets (except small packets like pure # acks) have to be handled by normal forwarding path. Therefore, packet counters # are not checked. -if test_tcp_forwarding $ns1 $ns2; then +if test_tcp_forwarding "$ns1" "$ns2"; then echo "PASS: flow offloaded for ns1/ns2" else echo "FAIL: flow offload for ns1/ns2:" 1>&2 - ip netns exec $nsr1 nft list ruleset + ip netns exec "$nsr1" nft list ruleset ret=1 fi # delete default route, i.e. ns2 won't be able to reach ns1 and # will depend on ns1 being masqueraded in nsr1. # expect ns1 has nsr1 address. -ip -net $ns2 route del default via 10.0.2.1 -ip -net $ns2 route del default via dead:2::1 -ip -net $ns2 route add 192.168.10.1 via 10.0.2.1 +ip -net "$ns2" route del default via 10.0.2.1 +ip -net "$ns2" route del default via dead:2::1 +ip -net "$ns2" route add 192.168.10.1 via 10.0.2.1 # Second test: # Same, but with NAT enabled. Same as in first test: we expect normal forward path # to handle most packets. -ip netns exec $nsr1 nft -f - <<EOF +ip netns exec "$nsr1" nft -f - <<EOF table ip nat { chain prerouting { type nat hook prerouting priority 0; policy accept; @@ -513,14 +494,14 @@ table ip nat { } EOF -if ! test_tcp_forwarding_set_dscp $ns1 $ns2 0 ""; then +if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2 exit 0 fi -if ! test_tcp_forwarding_nat $ns1 $ns2 0 ""; then +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 0 ""; then echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 - ip netns exec $nsr1 nft list ruleset + ip netns exec "$nsr1" nft list ruleset ret=1 fi @@ -528,35 +509,40 @@ fi # Same as second test, but with PMTU discovery enabled. This # means that we expect the fastpath to handle packets as soon # as the endpoints adjust the packet size. -ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null -ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null +ip netns exec "$ns1" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null +ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null # reset counters. # With pmtu in-place we'll also check that nft counters # are lower than file size and packets were forwarded via flowtable layer. # For earlier tests (large mtus), packets cannot be handled via flowtable # (except pure acks and other small packets). -ip netns exec $nsr1 nft reset counters table inet filter >/dev/null +ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null -if ! test_tcp_forwarding_nat $ns1 $ns2 1 ""; then +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 - ip netns exec $nsr1 nft list ruleset + ip netns exec "$nsr1" nft list ruleset fi # Another test: # Add bridge interface br0 to Router1, with NAT enabled. -ip -net $nsr1 link add name br0 type bridge -ip -net $nsr1 addr flush dev veth0 -ip -net $nsr1 link set up dev veth0 -ip -net $nsr1 link set veth0 master br0 -ip -net $nsr1 addr add 10.0.1.1/24 dev br0 -ip -net $nsr1 addr add dead:1::1/64 dev br0 -ip -net $nsr1 link set up dev br0 +test_bridge() { +if ! ip -net "$nsr1" link add name br0 type bridge 2>/dev/null;then + echo "SKIP: could not add bridge br0" + [ "$ret" -eq 0 ] && ret=$ksft_skip + return +fi +ip -net "$nsr1" addr flush dev veth0 +ip -net "$nsr1" link set up dev veth0 +ip -net "$nsr1" link set veth0 master br0 +ip -net "$nsr1" addr add 10.0.1.1/24 dev br0 +ip -net "$nsr1" addr add dead:1::1/64 dev br0 nodad +ip -net "$nsr1" link set up dev br0 -ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null +ip netns exec "$nsr1" sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null # br0 with NAT enabled. -ip netns exec $nsr1 nft -f - <<EOF +ip netns exec "$nsr1" nft -f - <<EOF flush table ip nat table ip nat { chain prerouting { @@ -571,56 +557,59 @@ table ip nat { } EOF -if ! test_tcp_forwarding_nat $ns1 $ns2 1 "on bridge"; then +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "on bridge"; then echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2 - ip netns exec $nsr1 nft list ruleset + ip netns exec "$nsr1" nft list ruleset ret=1 fi # Another test: # Add bridge interface br0 to Router1, with NAT and VLAN. -ip -net $nsr1 link set veth0 nomaster -ip -net $nsr1 link set down dev veth0 -ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10 -ip -net $nsr1 link set up dev veth0 -ip -net $nsr1 link set up dev veth0.10 -ip -net $nsr1 link set veth0.10 master br0 - -ip -net $ns1 addr flush dev eth0 -ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10 -ip -net $ns1 link set eth0 up -ip -net $ns1 link set eth0.10 up -ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10 -ip -net $ns1 route add default via 10.0.1.1 -ip -net $ns1 addr add dead:1::99/64 dev eth0.10 - -if ! test_tcp_forwarding_nat $ns1 $ns2 1 "bridge and VLAN"; then +ip -net "$nsr1" link set veth0 nomaster +ip -net "$nsr1" link set down dev veth0 +ip -net "$nsr1" link add link veth0 name veth0.10 type vlan id 10 +ip -net "$nsr1" link set up dev veth0 +ip -net "$nsr1" link set up dev veth0.10 +ip -net "$nsr1" link set veth0.10 master br0 + +ip -net "$ns1" addr flush dev eth0 +ip -net "$ns1" link add link eth0 name eth0.10 type vlan id 10 +ip -net "$ns1" link set eth0 up +ip -net "$ns1" link set eth0.10 up +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0.10 +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" addr add dead:1::99/64 dev eth0.10 nodad + +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "bridge and VLAN"; then echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2 - ip netns exec $nsr1 nft list ruleset + ip netns exec "$nsr1" nft list ruleset ret=1 fi # restore test topology (remove bridge and VLAN) -ip -net $nsr1 link set veth0 nomaster -ip -net $nsr1 link set veth0 down -ip -net $nsr1 link set veth0.10 down -ip -net $nsr1 link delete veth0.10 type vlan -ip -net $nsr1 link delete br0 type bridge -ip -net $ns1 addr flush dev eth0.10 -ip -net $ns1 link set eth0.10 down -ip -net $ns1 link set eth0 down -ip -net $ns1 link delete eth0.10 type vlan +ip -net "$nsr1" link set veth0 nomaster +ip -net "$nsr1" link set veth0 down +ip -net "$nsr1" link set veth0.10 down +ip -net "$nsr1" link delete veth0.10 type vlan +ip -net "$nsr1" link delete br0 type bridge +ip -net "$ns1" addr flush dev eth0.10 +ip -net "$ns1" link set eth0.10 down +ip -net "$ns1" link set eth0 down +ip -net "$ns1" link delete eth0.10 type vlan # restore address in ns1 and nsr1 -ip -net $ns1 link set eth0 up -ip -net $ns1 addr add 10.0.1.99/24 dev eth0 -ip -net $ns1 route add default via 10.0.1.1 -ip -net $ns1 addr add dead:1::99/64 dev eth0 -ip -net $ns1 route add default via dead:1::1 -ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 -ip -net $nsr1 addr add dead:1::1/64 dev veth0 -ip -net $nsr1 link set up dev veth0 +ip -net "$ns1" link set eth0 up +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" route add default via dead:1::1 +ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad +ip -net "$nsr1" link set up dev veth0 +} + +test_bridge KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1) KEY_AES="0x"$(ps -af | md5sum | cut -d " " -f 1) @@ -640,33 +629,43 @@ do_esp() { local spi_out=$6 local spi_in=$7 - ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet - ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet + ip -net "$ns" xfrm state add src "$remote" dst "$me" proto esp spi "$spi_in" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$rnet" dst "$lnet" + ip -net "$ns" xfrm state add src "$me" dst "$remote" proto esp spi "$spi_out" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$lnet" dst "$rnet" # to encrypt packets as they go out (includes forwarded packets that need encapsulation) - ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow + ip -net "$ns" xfrm policy add src "$lnet" dst "$rnet" dir out tmpl src "$me" dst "$remote" proto esp mode tunnel priority 1 action allow # to fwd decrypted packets after esp processing: - ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow - + ip -net "$ns" xfrm policy add src "$rnet" dst "$lnet" dir fwd tmpl src "$remote" dst "$me" proto esp mode tunnel priority 1 action allow } -do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 +do_esp "$nsr1" 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 "$SPI1" "$SPI2" -do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 +do_esp "$nsr2" 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 "$SPI2" "$SPI1" -ip netns exec $nsr1 nft delete table ip nat +ip netns exec "$nsr1" nft delete table ip nat # restore default routes -ip -net $ns2 route del 192.168.10.1 via 10.0.2.1 -ip -net $ns2 route add default via 10.0.2.1 -ip -net $ns2 route add default via dead:2::1 +ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1 +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns2" route add default via dead:2::1 -if test_tcp_forwarding $ns1 $ns2; then +if test_tcp_forwarding "$ns1" "$ns2"; then check_counters "ipsec tunnel mode for ns1/ns2" else echo "FAIL: ipsec tunnel mode for ns1/ns2" - ip netns exec $nsr1 nft list ruleset 1>&2 - ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2 + ip netns exec "$nsr1" nft list ruleset 1>&2 + ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2 +fi + +if [ "$1" = "" ]; then + low=1280 + mtu=$((65536 - low)) + o=$(((RANDOM%mtu) + low)) + l=$(((RANDOM%mtu) + low)) + r=$(((RANDOM%mtu) + low)) + + echo "re-run with random mtus: -o $o -l $l -r $r" + $0 -o "$o" -l "$l" -r "$r" fi exit $ret diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/net/netfilter/nft_meta.sh index f33154c04d..71505b6cb2 100755 --- a/tools/testing/selftests/netfilter/nft_meta.sh +++ b/tools/testing/selftests/net/netfilter/nft_meta.sh @@ -91,10 +91,10 @@ check_one_counter() local want="packets $2" local verbose="$3" - if ! ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want"; then + if ! ip netns exec "$ns0" nft list counter inet filter "$cname" | grep -q "$want"; then echo "FAIL: $cname, want \"$want\", got" ret=1 - ip netns exec "$ns0" nft list counter inet filter $cname + ip netns exec "$ns0" nft list counter inet filter "$cname" fi } diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh index dd40d9f6f2..9e39de2645 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/net/netfilter/nft_nat.sh @@ -3,77 +3,60 @@ # This test is for basic NAT functionality: snat, dnat, redirect, masquerade. # -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh + ret=0 test_inet_nat=true -sfx=$(mktemp -u "XXXXXXXX") -ns0="ns0-$sfx" -ns1="ns1-$sfx" -ns2="ns2-$sfx" +checktool "nft --version" "run test without nft tool" +checktool "socat -h" "run test without socat" cleanup() { - for i in 0 1 2; do ip netns del ns$i-"$sfx";done -} + ip netns pids "$ns0" | xargs kill 2>/dev/null + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null -nft --version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without nft tool" - exit $ksft_skip -fi - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi + rm -f "$INFILE" "$OUTFILE" -ip netns add "$ns0" -if [ $? -ne 0 ];then - echo "SKIP: Could not create net namespace $ns0" - exit $ksft_skip -fi + cleanup_all_ns +} trap cleanup EXIT -ip netns add "$ns1" -if [ $? -ne 0 ];then - echo "SKIP: Could not create net namespace $ns1" - exit $ksft_skip -fi +INFILE=$(mktemp) +OUTFILE=$(mktemp) -ip netns add "$ns2" -if [ $? -ne 0 ];then - echo "SKIP: Could not create net namespace $ns2" - exit $ksft_skip -fi +setup_ns ns0 ns1 ns2 -ip link add veth0 netns "$ns0" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1 -if [ $? -ne 0 ];then +if ! ip link add veth0 netns "$ns0" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1;then echo "SKIP: No virtual ethernet pair device support in kernel" exit $ksft_skip fi ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns2" -ip -net "$ns0" link set lo up ip -net "$ns0" link set veth0 up ip -net "$ns0" addr add 10.0.1.1/24 dev veth0 -ip -net "$ns0" addr add dead:1::1/64 dev veth0 +ip -net "$ns0" addr add dead:1::1/64 dev veth0 nodad ip -net "$ns0" link set veth1 up ip -net "$ns0" addr add 10.0.2.1/24 dev veth1 -ip -net "$ns0" addr add dead:2::1/64 dev veth1 - -for i in 1 2; do - ip -net ns$i-$sfx link set lo up - ip -net ns$i-$sfx link set eth0 up - ip -net ns$i-$sfx addr add 10.0.$i.99/24 dev eth0 - ip -net ns$i-$sfx route add default via 10.0.$i.1 - ip -net ns$i-$sfx addr add dead:$i::99/64 dev eth0 - ip -net ns$i-$sfx route add default via dead:$i::1 -done +ip -net "$ns0" addr add dead:2::1/64 dev veth1 nodad + +do_config() +{ + ns="$1" + subnet="$2" + + ip -net "$ns" link set eth0 up + ip -net "$ns" addr add "10.0.$subnet.99/24" dev eth0 + ip -net "$ns" route add default via "10.0.$subnet.1" + ip -net "$ns" addr add "dead:$subnet::99/64" dev eth0 nodad + ip -net "$ns" route add default via "dead:$subnet::1" +} + +do_config "$ns1" 1 +do_config "$ns2" 2 bad_counter() { @@ -83,7 +66,7 @@ bad_counter() local tag=$4 echo "ERROR: $counter counter in $ns has unexpected value (expected $expect) at $tag" 1>&2 - ip netns exec $ns nft list counter inet filter $counter 1>&2 + ip netns exec "$ns" nft list counter inet filter "$counter" 1>&2 } check_counters() @@ -91,26 +74,23 @@ check_counters() ns=$1 local lret=0 - cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84") - if [ $? -ne 0 ]; then - bad_counter $ns ns0in "packets 1 bytes 84" "check_counters 1" + if ! ip netns exec "$ns" nft list counter inet filter ns0in | grep -q "packets 1 bytes 84";then + bad_counter "$ns" ns0in "packets 1 bytes 84" "check_counters 1" lret=1 fi - cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84") - if [ $? -ne 0 ]; then - bad_counter $ns ns0out "packets 1 bytes 84" "check_counters 2" + + if ! ip netns exec "$ns" nft list counter inet filter ns0out | grep -q "packets 1 bytes 84";then + bad_counter "$ns" ns0out "packets 1 bytes 84" "check_counters 2" lret=1 fi expect="packets 1 bytes 104" - cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter $ns ns0in6 "$expect" "check_counters 3" + if ! ip netns exec "$ns" nft list counter inet filter ns0in6 | grep -q "$expect";then + bad_counter "$ns" ns0in6 "$expect" "check_counters 3" lret=1 fi - cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter $ns ns0out6 "$expect" "check_counters 4" + if ! ip netns exec "$ns" nft list counter inet filter ns0out6 | grep -q "$expect";then + bad_counter "$ns" ns0out6 "$expect" "check_counters 4" lret=1 fi @@ -122,41 +102,35 @@ check_ns0_counters() local ns=$1 local lret=0 - cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0in | grep -q "packets 0 bytes 0") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft list counter inet filter ns0in | grep -q "packets 0 bytes 0";then bad_counter "$ns0" ns0in "packets 0 bytes 0" "check_ns0_counters 1" lret=1 fi - cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0";then bad_counter "$ns0" ns0in6 "packets 0 bytes 0" lret=1 fi - cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0out | grep -q "packets 0 bytes 0") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft list counter inet filter ns0out | grep -q "packets 0 bytes 0";then bad_counter "$ns0" ns0out "packets 0 bytes 0" "check_ns0_counters 2" lret=1 fi - cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0";then bad_counter "$ns0" ns0out6 "packets 0 bytes 0" "check_ns0_counters3 " lret=1 fi for dir in "in" "out" ; do expect="packets 1 bytes 84" - cnt=$(ip netns exec "$ns0" nft list counter inet filter ${ns}${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns0" $ns$dir "$expect" "check_ns0_counters 4" + if ! ip netns exec "$ns0" nft list counter inet filter "${ns}${dir}" | grep -q "$expect";then + bad_counter "$ns0" "$ns${dir}" "$expect" "check_ns0_counters 4" lret=1 fi expect="packets 1 bytes 104" - cnt=$(ip netns exec "$ns0" nft list counter inet filter ${ns}${dir}6 | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns0" $ns$dir6 "$expect" "check_ns0_counters 5" + if ! ip netns exec "$ns0" nft list counter inet filter "${ns}${dir}6" | grep -q "$expect";then + bad_counter "$ns0" "$ns${dir}6" "$expect" "check_ns0_counters 5" lret=1 fi done @@ -166,8 +140,8 @@ check_ns0_counters() reset_counters() { - for i in 0 1 2;do - ip netns exec ns$i-$sfx nft reset counters inet > /dev/null + for i in "$ns0" "$ns1" "$ns2" ;do + ip netns exec "$i" nft reset counters inet > /dev/null done } @@ -177,7 +151,7 @@ test_local_dnat6() local lret=0 local IPF="" - if [ $family = "inet" ];then + if [ "$family" = "inet" ];then IPF="ip6" fi @@ -195,8 +169,7 @@ EOF fi # ping netns1, expect rewrite to netns2 - ip netns exec "$ns0" ping -q -c 1 dead:1::99 > /dev/null - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" ping -q -c 1 dead:1::99 > /dev/null;then lret=1 echo "ERROR: ping6 failed" return $lret @@ -204,8 +177,7 @@ EOF expect="packets 0 bytes 0" for dir in "in6" "out6" ; do - cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat6 1" lret=1 fi @@ -213,8 +185,7 @@ EOF expect="packets 1 bytes 104" for dir in "in6" "out6" ; do - cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat6 2" lret=1 fi @@ -223,8 +194,7 @@ EOF # expect 0 count in ns1 expect="packets 0 bytes 0" for dir in "in6" "out6" ; do - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat6 3" lret=1 fi @@ -233,8 +203,7 @@ EOF # expect 1 packet in ns2 expect="packets 1 bytes 104" for dir in "in6" "out6" ; do - cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns2" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat6 4" lret=1 fi @@ -252,7 +221,7 @@ test_local_dnat() local lret=0 local IPF="" - if [ $family = "inet" ];then + if [ "$family" = "inet" ];then IPF="ip" fi @@ -265,7 +234,7 @@ table $family nat { } EOF if [ $? -ne 0 ]; then - if [ $family = "inet" ];then + if [ "$family" = "inet" ];then echo "SKIP: inet nat tests" test_inet_nat=false return $ksft_skip @@ -275,8 +244,7 @@ EOF fi # ping netns1, expect rewrite to netns2 - ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null;then lret=1 echo "ERROR: ping failed" return $lret @@ -284,18 +252,16 @@ EOF expect="packets 0 bytes 0" for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat 1" + if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns0" "ns1$dir" "$expect" "test_local_dnat 1" lret=1 fi done expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 2" + if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns0" "ns2$dir" "$expect" "test_local_dnat 2" lret=1 fi done @@ -303,9 +269,8 @@ EOF # expect 0 count in ns1 expect="packets 0 bytes 0" for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat 3" + if ! ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect";then + bad_counter "$ns1" "ns0$dir" "$expect" "test_local_dnat 3" lret=1 fi done @@ -313,20 +278,18 @@ EOF # expect 1 packet in ns2 expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 4" + if ! ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect";then + bad_counter "$ns2" "ns0$dir" "$expect" "test_local_dnat 4" lret=1 fi done test $lret -eq 0 && echo "PASS: ping to $ns1 was $family NATted to $ns2" - ip netns exec "$ns0" nft flush chain $family nat output + ip netns exec "$ns0" nft flush chain "$family" nat output reset_counters - ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null;then lret=1 echo "ERROR: ping failed" return $lret @@ -334,16 +297,14 @@ EOF expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then bad_counter "$ns1" ns1$dir "$expect" "test_local_dnat 5" lret=1 fi done expect="packets 0 bytes 0" for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 6" lret=1 fi @@ -352,8 +313,7 @@ EOF # expect 1 count in ns1 expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then bad_counter "$ns0" ns0$dir "$expect" "test_local_dnat 7" lret=1 fi @@ -362,8 +322,7 @@ EOF # expect 0 packet in ns2 expect="packets 0 bytes 0" for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns2" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 8" lret=1 fi @@ -374,13 +333,19 @@ EOF return $lret } +listener_ready() +{ + local ns="$1" + local port="$2" + local proto="$3" + ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port" +} + test_local_dnat_portonly() { local family=$1 local daddr=$2 local lret=0 - local sr_s - local sr_r ip netns exec "$ns0" nft -f /dev/stdin <<EOF table $family nat { @@ -392,7 +357,7 @@ table $family nat { } EOF if [ $? -ne 0 ]; then - if [ $family = "inet" ];then + if [ "$family" = "inet" ];then echo "SKIP: inet port test" test_inet_nat=false return @@ -401,17 +366,16 @@ EOF return fi - echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 & - sc_s=$! + echo "SERVER-$family" | ip netns exec "$ns1" timeout 3 socat -u STDIN TCP-LISTEN:2000 & - sleep 1 + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 2000 "-t" - result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT) + result=$(ip netns exec "$ns0" timeout 1 socat -u TCP:"$daddr":2000 STDOUT) if [ "$result" = "SERVER-inet" ];then echo "PASS: inet port rewrite without l3 address" else - echo "ERROR: inet port rewrite" + echo "ERROR: inet port rewrite without l3 address, got $result" ret=1 fi } @@ -424,24 +388,20 @@ test_masquerade6() ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null - ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 - if [ $? -ne 0 ] ; then + if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then echo "ERROR: cannot ping $ns1 from $ns2 via ipv6" return 1 - lret=1 fi expect="packets 1 bytes 104" for dir in "in6" "out6" ; do - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns1" ns2$dir "$expect" "test_masquerade6 1" + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" "ns2$dir" "$expect" "test_masquerade6 1" lret=1 fi - cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 2" + if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade6 2" lret=1 fi done @@ -462,8 +422,7 @@ EOF return $ksft_skip fi - ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 - if [ $? -ne 0 ] ; then + if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags" lret=1 fi @@ -471,14 +430,12 @@ EOF # ns1 should have seen packets from ns0, due to masquerade expect="packets 1 bytes 104" for dir in "in6" "out6" ; do - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 3" lret=1 fi - cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 4" lret=1 fi @@ -487,27 +444,23 @@ EOF # ns1 should not have seen packets from ns2, due to masquerade expect="packets 0 bytes 0" for dir in "in6" "out6" ; do - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 5" lret=1 fi - cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns0" ns1$dir "$expect" "test_masquerade6 6" + if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns0" "ns1$dir" "$expect" "test_masquerade6 6" lret=1 fi done - ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 - if [ $? -ne 0 ] ; then + if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then echo "ERROR: cannot ping $ns1 from $ns2 with active ipv6 masquerade $natflags (attempt 2)" lret=1 fi - ip netns exec "$ns0" nft flush chain $family nat postrouting - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft flush chain "$family" nat postrouting;then echo "ERROR: Could not flush $family nat postrouting" 1>&2 lret=1 fi @@ -526,23 +479,20 @@ test_masquerade() ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null - ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 - if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping $ns1 from "$ns2" $natflags" + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 $natflags" lret=1 fi expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns1" ns2$dir "$expect" "test_masquerade 1" + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" "ns2$dir" "$expect" "test_masquerade 1" lret=1 fi - cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns2" ns1$dir "$expect" "test_masquerade 2" + if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade 2" lret=1 fi done @@ -563,8 +513,7 @@ EOF return $ksft_skip fi - ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 - if [ $? -ne 0 ] ; then + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags" lret=1 fi @@ -572,15 +521,13 @@ EOF # ns1 should have seen packets from ns0, due to masquerade expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns1" ns0$dir "$expect" "test_masquerade 3" + if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then + bad_counter "$ns1" "ns0$dir" "$expect" "test_masquerade 3" lret=1 fi - cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns2" ns1$dir "$expect" "test_masquerade 4" + if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade 4" lret=1 fi done @@ -588,27 +535,23 @@ EOF # ns1 should not have seen packets from ns2, due to masquerade expect="packets 0 bytes 0" for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns1" ns0$dir "$expect" "test_masquerade 5" + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" "ns0$dir" "$expect" "test_masquerade 5" lret=1 fi - cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns0" ns1$dir "$expect" "test_masquerade 6" + if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns0" "ns1$dir" "$expect" "test_masquerade 6" lret=1 fi done - ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 - if [ $? -ne 0 ] ; then + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then echo "ERROR: cannot ping $ns1 from $ns2 with active ip masquerade $natflags (attempt 2)" lret=1 fi - ip netns exec "$ns0" nft flush chain $family nat postrouting - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft flush chain "$family" nat postrouting; then echo "ERROR: Could not flush $family nat postrouting" 1>&2 lret=1 fi @@ -625,22 +568,19 @@ test_redirect6() ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null - ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 - if [ $? -ne 0 ] ; then + if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6" lret=1 fi expect="packets 1 bytes 104" for dir in "in6" "out6" ; do - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then bad_counter "$ns1" ns2$dir "$expect" "test_redirect6 1" lret=1 fi - cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then bad_counter "$ns2" ns1$dir "$expect" "test_redirect6 2" lret=1 fi @@ -662,8 +602,7 @@ EOF return $ksft_skip fi - ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 - if [ $? -ne 0 ] ; then + if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then echo "ERROR: cannot ping $ns1 from $ns2 via ipv6 with active $family redirect" lret=1 fi @@ -671,8 +610,7 @@ EOF # ns1 should have seen no packets from ns2, due to redirection expect="packets 0 bytes 0" for dir in "in6" "out6" ; do - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 3" lret=1 fi @@ -681,15 +619,13 @@ EOF # ns0 should have seen packets from ns2, due to masquerade expect="packets 1 bytes 104" for dir in "in6" "out6" ; do - cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 4" lret=1 fi done - ip netns exec "$ns0" nft delete table $family nat - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft delete table "$family" nat;then echo "ERROR: Could not delete $family nat table" 1>&2 lret=1 fi @@ -707,22 +643,19 @@ test_redirect() ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null - ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 - if [ $? -ne 0 ] ; then + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then echo "ERROR: cannot ping $ns1 from $ns2" lret=1 fi expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then - bad_counter "$ns1" $ns2$dir "$expect" "test_redirect 1" + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" "$ns2$dir" "$expect" "test_redirect 1" lret=1 fi - cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect";then bad_counter "$ns2" ns1$dir "$expect" "test_redirect 2" lret=1 fi @@ -744,8 +677,7 @@ EOF return $ksft_skip fi - ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 - if [ $? -ne 0 ] ; then + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then echo "ERROR: cannot ping $ns1 from $ns2 with active $family ip redirect" lret=1 fi @@ -754,8 +686,7 @@ EOF expect="packets 0 bytes 0" for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then bad_counter "$ns1" ns0$dir "$expect" "test_redirect 3" lret=1 fi @@ -764,15 +695,13 @@ EOF # ns0 should have seen packets from ns2, due to masquerade expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then bad_counter "$ns0" ns0$dir "$expect" "test_redirect 4" lret=1 fi done - ip netns exec "$ns0" nft delete table $family nat - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft delete table "$family" nat;then echo "ERROR: Could not delete $family nat table" 1>&2 lret=1 fi @@ -803,13 +732,13 @@ test_port_shadow() # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405. echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405 - echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 & - sc_r=$! + echo ROUTER | ip netns exec "$ns0" timeout 3 socat -T 3 -u STDIN UDP4-LISTEN:1405 2>/dev/null & + local sc_r=$! + echo CLIENT | ip netns exec "$ns2" timeout 3 socat -T 3 -u STDIN UDP4-LISTEN:1405,reuseport 2>/dev/null & + local sc_c=$! - echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport & - sc_c=$! - - sleep 0.3 + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1405 "-u" + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns2" 1405 "-u" # ns1 tries to connect to ns0:1405. With default settings this should connect # to client, it matches the conntrack entry created above. @@ -846,7 +775,7 @@ table $family filter { EOF test_port_shadow "port-filter" "ROUTER" - ip netns exec "$ns0" nft delete table $family filter + ip netns exec "$ns0" nft delete table "$family" filter } # This prevents port shadow of router service via notrack. @@ -868,7 +797,7 @@ table $family raw { EOF test_port_shadow "port-notrack" "ROUTER" - ip netns exec "$ns0" nft delete table $family raw + ip netns exec "$ns0" nft delete table "$family" raw } # This prevents port shadow of router service via sport remap. @@ -886,21 +815,19 @@ table $family pat { EOF test_port_shadow "pat" "ROUTER" - ip netns exec "$ns0" nft delete table $family pat + ip netns exec "$ns0" nft delete table "$family" pat } test_port_shadowing() { local family="ip" - conntrack -h >/dev/null 2>&1 - if [ $? -ne 0 ];then + if ! conntrack -h >/dev/null 2>&1;then echo "SKIP: Could not run nat port shadowing test without conntrack tool" return fi - socat -h > /dev/null 2>&1 - if [ $? -ne 0 ];then + if ! socat -h > /dev/null 2>&1;then echo "SKIP: Could not run nat port shadowing test without socat tool" return fi @@ -946,8 +873,7 @@ test_stateless_nat_ip() ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null - ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 - if [ $? -ne 0 ] ; then + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules" return 1 fi @@ -981,23 +907,20 @@ EOF reset_counters - ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 - if [ $? -ne 0 ] ; then + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null; then echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules" lret=1 fi # ns1 should have seen packets from .2.2, due to stateless rewrite. expect="packets 1 bytes 84" - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect";then bad_counter "$ns1" ns0insl "$expect" "test_stateless 1" lret=1 fi for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect";then bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2" lret=1 fi @@ -1006,14 +929,12 @@ EOF # ns1 should not have seen packets from ns2, due to masquerade expect="packets 0 bytes 0" for dir in "in" "out" ; do - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect";then bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3" lret=1 fi - cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect";then bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4" lret=1 fi @@ -1021,8 +942,7 @@ EOF reset_counters - socat -h > /dev/null 2>&1 - if [ $? -ne 0 ];then + if ! socat -h > /dev/null 2>&1;then echo "SKIP: Could not run stateless nat frag test without socat tool" if [ $lret -eq 0 ]; then return $ksft_skip @@ -1032,42 +952,36 @@ EOF return $lret fi - local tmpfile=$(mktemp) - dd if=/dev/urandom of=$tmpfile bs=4096 count=1 2>/dev/null + dd if=/dev/urandom of="$INFILE" bs=4096 count=1 2>/dev/null - local outfile=$(mktemp) - ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:$outfile < /dev/null & - sc_r=$! + ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:"$OUTFILE" < /dev/null 2>/dev/null & + + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 4233 "-u" - sleep 1 # re-do with large ping -> ip fragmentation - ip netns exec "$ns2" timeout 3 socat - UDP4-SENDTO:"10.0.1.99:4233" < "$tmpfile" > /dev/null - if [ $? -ne 0 ] ; then + if ! ip netns exec "$ns2" timeout 3 socat -u STDIN UDP4-SENDTO:"10.0.1.99:4233" < "$INFILE" > /dev/null;then echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2 lret=1 fi wait - cmp "$tmpfile" "$outfile" - if [ $? -ne 0 ]; then - ls -l "$tmpfile" "$outfile" + if ! cmp "$INFILE" "$OUTFILE";then + ls -l "$INFILE" "$OUTFILE" echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2 lret=1 fi - rm -f "$tmpfile" "$outfile" + :> "$OUTFILE" # ns1 should have seen packets from 2.2, due to stateless rewrite. expect="packets 3 bytes 4164" - cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect") - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect";then bad_counter "$ns1" ns0insl "$expect" "test_stateless 5" lret=1 fi - ip netns exec "$ns0" nft delete table ip stateless - if [ $? -ne 0 ]; then + if ! ip netns exec "$ns0" nft delete table ip stateless; then echo "ERROR: Could not delete table ip stateless" 1>&2 lret=1 fi @@ -1078,8 +992,8 @@ EOF } # ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 -for i in 0 1 2; do -ip netns exec ns$i-$sfx nft -f /dev/stdin <<EOF +for i in "$ns0" "$ns1" "$ns2" ;do +ip netns exec "$i" nft -f /dev/stdin <<EOF table inet filter { counter ns0in {} counter ns1in {} @@ -1145,7 +1059,7 @@ done # special case for stateless nat check, counter needs to # be done before (input) ip defragmentation -ip netns exec ns1-$sfx nft -f /dev/stdin <<EOF +ip netns exec "$ns1" nft -f /dev/stdin <<EOF table inet filter { counter ns0insl {} @@ -1156,31 +1070,49 @@ table inet filter { } EOF -sleep 3 -# test basic connectivity -for i in 1 2; do - ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 > /dev/null - if [ $? -ne 0 ];then - echo "ERROR: Could not reach other namespace(s)" 1>&2 - ret=1 - fi - - ip netns exec "$ns0" ping -c 1 -q dead:$i::99 > /dev/null - if [ $? -ne 0 ];then - echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2 - ret=1 - fi - check_counters ns$i-$sfx - if [ $? -ne 0 ]; then - ret=1 - fi - - check_ns0_counters ns$i - if [ $? -ne 0 ]; then - ret=1 - fi - reset_counters -done +ping_basic() +{ + i="$1" + if ! ip netns exec "$ns0" ping -c 1 -q 10.0."$i".99 > /dev/null;then + echo "ERROR: Could not reach other namespace(s)" 1>&2 + ret=1 + fi + + if ! ip netns exec "$ns0" ping -c 1 -q dead:"$i"::99 > /dev/null;then + echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2 + ret=1 + fi +} + +test_basic_conn() +{ + local nsexec + name="$1" + + nsexec=$(eval echo \$"$1") + + ping_basic 1 + ping_basic 2 + + if ! check_counters "$nsexec";then + return 1 + fi + + if ! check_ns0_counters "$name";then + return 1 + fi + + reset_counters + return 0 +} + +if ! test_basic_conn "ns1" ; then + echo "ERROR: basic test for ns1 failed" 1>&2 + exit 1 +fi +if ! test_basic_conn "ns2"; then + echo "ERROR: basic test for ns1 failed" 1>&2 +fi if [ $ret -eq 0 ];then echo "PASS: netns routing/connectivity: $ns0 can reach $ns1 and $ns2" diff --git a/tools/testing/selftests/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh index b9ab37380f..3b81d88bdd 100755 --- a/tools/testing/selftests/netfilter/nft_nat_zones.sh +++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh @@ -3,17 +3,17 @@ # Test connection tracking zone and NAT source port reallocation support. # -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh # Don't increase too much, 2000 clients should work # just fine but script can then take several minutes with # KASAN/debug builds. maxclients=100 -have_iperf=1 +have_socat=0 ret=0 +[ "$KSFT_MACHINE_SLOW" = yes ] && maxclients=40 # client1---. # veth1-. # | @@ -31,12 +31,6 @@ ret=0 # NAT Gateway is supposed to do port reallocation for each of the # connections. -sfx=$(mktemp -u "XXXXXXXX") -gw="ns-gw-$sfx" -cl1="ns-cl1-$sfx" -cl2="ns-cl2-$sfx" -srv="ns-srv-$sfx" - v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null) v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null) v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null) @@ -46,61 +40,29 @@ v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null) cleanup() { - ip netns del $gw - ip netns del $srv - for i in $(seq 1 $maxclients); do - ip netns del ns-cl$i-$sfx 2>/dev/null - done - - sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null - sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null - sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null - sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null - sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null - sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null + cleanup_all_ns + + sysctl -q net.ipv4.neigh.default.gc_thresh1="$v4gc1" 2>/dev/null + sysctl -q net.ipv4.neigh.default.gc_thresh2="$v4gc2" 2>/dev/null + sysctl -q net.ipv4.neigh.default.gc_thresh3="$v4gc3" 2>/dev/null + sysctl -q net.ipv6.neigh.default.gc_thresh1="$v6gc1" 2>/dev/null + sysctl -q net.ipv6.neigh.default.gc_thresh2="$v6gc2" 2>/dev/null + sysctl -q net.ipv6.neigh.default.gc_thresh3="$v6gc3" 2>/dev/null } -nft --version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without nft tool" - exit $ksft_skip -fi +checktool "nft --version" echo "run test without nft tool" +checktool "conntrack -V" "run test without conntrack tool" -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip +if socat -h >/dev/null 2>&1; then + have_socat=1 fi -conntrack -V > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without conntrack tool" - exit $ksft_skip -fi - -iperf3 -v >/dev/null 2>&1 -if [ $? -ne 0 ];then - have_iperf=0 -fi - -ip netns add "$gw" -if [ $? -ne 0 ];then - echo "SKIP: Could not create net namespace $gw" - exit $ksft_skip -fi -ip -net "$gw" link set lo up +setup_ns gw srv trap cleanup EXIT -ip netns add "$srv" -if [ $? -ne 0 ];then - echo "SKIP: Could not create server netns $srv" - exit $ksft_skip -fi - ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv" ip -net "$gw" link set veth0 up -ip -net "$srv" link set lo up ip -net "$srv" link set eth0 up sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null @@ -110,55 +72,49 @@ sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null -for i in $(seq 1 $maxclients);do - cl="ns-cl$i-$sfx" +for i in $(seq 1 "$maxclients");do + setup_ns "cl$i" - ip netns add "$cl" - if [ $? -ne 0 ];then - echo "SKIP: Could not create client netns $cl" - exit $ksft_skip - fi - ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1 - if [ $? -ne 0 ];then + cl=$(eval echo \$cl"$i") + if ! ip link add veth"$i" netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1;then echo "SKIP: No virtual ethernet pair device support in kernel" exit $ksft_skip fi done -for i in $(seq 1 $maxclients);do - cl="ns-cl$i-$sfx" - echo netns exec "$cl" ip link set lo up +for i in $(seq 1 "$maxclients");do + cl=$(eval echo \$cl"$i") echo netns exec "$cl" ip link set eth0 up echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2 - echo netns exec "$gw" ip link set veth$i up - echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2 - echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0 + echo netns exec "$gw" ip link set "veth$i" up + echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2 + echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0 # clients have same IP addresses. echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0 - echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0 + echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0 nodad echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0 echo netns exec "$cl" ip route add default via dead:1::2 dev eth0 # NB: same addresses on client-facing interfaces. - echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i - echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i + echo netns exec "$gw" ip addr add 10.1.0.2/24 dev "veth$i" + echo netns exec "$gw" ip addr add dead:1::2/64 dev "veth$i" nodad # gw: policy routing - echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i)) - echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i)) + echo netns exec "$gw" ip route add 10.1.0.0/24 dev "veth$i" table $((1000+i)) + echo netns exec "$gw" ip route add dead:1::0/64 dev "veth$i" table $((1000+i)) echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i)) echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i)) - echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i)) + echo netns exec "$gw" ip rule add fwmark "$i" lookup $((1000+i)) done | ip -batch /dev/stdin ip -net "$gw" addr add 10.3.0.1/24 dev veth0 -ip -net "$gw" addr add dead:3::1/64 dev veth0 +ip -net "$gw" addr add dead:3::1/64 dev veth0 nodad ip -net "$srv" addr add 10.3.0.99/24 dev eth0 -ip -net "$srv" addr add dead:3::99/64 dev eth0 +ip -net "$srv" addr add dead:3::99/64 dev eth0 nodad -ip netns exec $gw nft -f /dev/stdin<<EOF +ip netns exec "$gw" nft -f /dev/stdin<<EOF table inet raw { map iiftomark { type ifname : mark @@ -203,18 +159,22 @@ table inet raw { } } EOF +if [ "$?" -ne 0 ];then + echo "SKIP: Could not add nftables rules" + exit $ksft_skip +fi ( echo add element inet raw iiftomark \{ for i in $(seq 1 $((maxclients-1))); do - echo \"veth$i\" : $i, + echo \"veth"$i"\" : "$i", done - echo \"veth$maxclients\" : $maxclients \} + echo \"veth"$maxclients"\" : "$maxclients" \} echo add element inet raw iiftozone \{ for i in $(seq 1 $((maxclients-1))); do - echo \"veth$i\" : $i, + echo \"veth"$i"\" : "$i", done echo \"veth$maxclients\" : $maxclients \} -) | ip netns exec $gw nft -f /dev/stdin +) | ip netns exec "$gw" nft -f /dev/stdin ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null @@ -224,73 +184,72 @@ ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null -for i in $(seq 1 $maxclients); do - cl="ns-cl$i-$sfx" - ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 & - if [ $? -ne 0 ]; then - echo FAIL: Ping failure from $cl 1>&2 - ret=1 - break - fi +for i in $(seq 1 "$maxclients"); do + cl=$(eval echo \$cl"$i") + ip netns exec "$cl" ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 & done -wait +wait || ret=1 -for i in $(seq 1 $maxclients); do - ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }" - if [ $? -ne 0 ];then +[ "$ret" -ne 0 ] && "FAIL: Ping failure from $cl" 1>&2 + +for i in $(seq 1 "$maxclients"); do + if ! ip netns exec "$gw" nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"; then ret=1 echo "FAIL: counter icmp mismatch for veth$i" 1>&2 - ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2 + ip netns exec "$gw" nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2 break fi done -ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }" -if [ $? -ne 0 ];then +if ! ip netns exec "$gw" nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * maxclients)) bytes $((252 * maxclients)) }"; then ret=1 - echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }" - ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2 + echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * maxclients)) bytes $((252 * maxclients)) }" + ip netns exec "$gw" nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2 fi -if [ $ret -eq 0 ]; then +if [ $ret -eq 0 ]; then echo "PASS: ping test from all $maxclients namespaces" fi -if [ $have_iperf -eq 0 ];then - echo "SKIP: iperf3 not installed" +if [ $have_socat -eq 0 ];then + echo "SKIP: socat not installed" if [ $ret -ne 0 ];then exit $ret fi exit $ksft_skip fi -ip netns exec $srv iperf3 -s > /dev/null 2>&1 & -iperfpid=$! -sleep 1 +listener_ready() +{ + ss -N "$1" -lnt -o "sport = :5201" | grep -q 5201 +} + +ip netns exec "$srv" socat -u TCP-LISTEN:5201,fork STDOUT > /dev/null 2>/dev/null & +socatpid=$! + +busywait 1000 listener_ready "$srv" -for i in $(seq 1 $maxclients); do +for i in $(seq 1 "$maxclients"); do if [ $ret -ne 0 ]; then break fi - cl="ns-cl$i-$sfx" - ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null - if [ $? -ne 0 ]; then - echo FAIL: Failure to connect for $cl 1>&2 - ip netns exec $gw conntrack -S 1>&2 + cl=$(eval echo \$cl"$i") + if ! ip netns exec "$cl" socat -4 -u STDIN TCP:10.3.0.99:5201,sourceport=10000 < /dev/null > /dev/null; then + echo "FAIL: Failure to connect for $cl" 1>&2 + ip netns exec "$gw" conntrack -S 1>&2 ret=1 fi done if [ $ret -eq 0 ];then - echo "PASS: iperf3 connections for all $maxclients net namespaces" + echo "PASS: socat connections for all $maxclients net namespaces" fi -kill $iperfpid +kill $socatpid wait -for i in $(seq 1 $maxclients); do - ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null - if [ $? -ne 0 ];then +for i in $(seq 1 "$maxclients"); do + if ! ip netns exec "$gw" nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null;then ret=1 echo "FAIL: can't find expected tcp entry for veth$i" 1>&2 break @@ -300,8 +259,7 @@ if [ $ret -eq 0 ];then echo "PASS: Found client connection for all $maxclients net namespaces" fi -ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null -if [ $? -ne 0 ];then +if ! ip netns exec "$gw" nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null;then ret=1 echo "FAIL: cannot find return entry on veth0" 1>&2 fi diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh new file mode 100755 index 0000000000..8538f08c64 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -0,0 +1,417 @@ +#!/bin/bash +# +# This tests nf_queue: +# 1. can process packets from all hooks +# 2. support running nfqueue from more than one base chain +# +# shellcheck disable=SC2162,SC2317 + +source lib.sh +ret=0 +timeout=2 + +cleanup() +{ + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + ip netns pids "$nsrouter" | xargs kill 2>/dev/null + + cleanup_all_ns + + rm -f "$TMPINPUT" + rm -f "$TMPFILE0" + rm -f "$TMPFILE1" + rm -f "$TMPFILE2" "$TMPFILE3" +} + +checktool "nft --version" "test without nft tool" + +trap cleanup EXIT + +setup_ns ns1 ns2 nsrouter + +TMPFILE0=$(mktemp) +TMPFILE1=$(mktemp) +TMPFILE2=$(mktemp) +TMPFILE3=$(mktemp) + +TMPINPUT=$(mktemp) +dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT" + +if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2" + +ip -net "$nsrouter" link set veth0 up +ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$nsrouter" link set veth1 up +ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1 +ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad + +ip -net "$ns1" link set eth0 up +ip -net "$ns2" link set eth0 up + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" route add default via dead:1::1 + +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns2" route add default via dead:2::1 + +load_ruleset() { + local name=$1 + local prio=$2 + +ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +table inet $name { + chain nfq { + ip protocol icmp queue bypass + icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass + } + chain pre { + type filter hook prerouting priority $prio; policy accept; + jump nfq + } + chain input { + type filter hook input priority $prio; policy accept; + jump nfq + } + chain forward { + type filter hook forward priority $prio; policy accept; + tcp dport 12345 queue num 2 + jump nfq + } + chain output { + type filter hook output priority $prio; policy accept; + tcp dport 12345 queue num 3 + tcp sport 23456 queue num 3 + jump nfq + } + chain post { + type filter hook postrouting priority $prio; policy accept; + jump nfq + } +} +EOF +} + +load_counter_ruleset() { + local prio=$1 + +ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +table inet countrules { + chain pre { + type filter hook prerouting priority $prio; policy accept; + counter + } + chain input { + type filter hook input priority $prio; policy accept; + counter + } + chain forward { + type filter hook forward priority $prio; policy accept; + counter + } + chain output { + type filter hook output priority $prio; policy accept; + counter + } + chain post { + type filter hook postrouting priority $prio; policy accept; + counter + } +} +EOF +} + +test_ping() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then + return 2 + fi + + return 0 +} + +test_ping_router() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then + return 3 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then + return 4 + fi + + return 0 +} + +test_queue_blackhole() { + local proto=$1 + +ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +table $proto blackh { + chain forward { + type filter hook forward priority 0; policy accept; + queue num 600 + } +} +EOF + if [ "$proto" = "ip" ] ;then + ip netns exec "$ns1" ping -W 2 -c 1 -q 10.0.2.99 > /dev/null + lret=$? + elif [ "$proto" = "ip6" ]; then + ip netns exec "$ns1" ping -W 2 -c 1 -q dead:2::99 > /dev/null + lret=$? + else + lret=111 + fi + + # queue without bypass keyword should drop traffic if no listener exists. + if [ "$lret" -eq 0 ];then + echo "FAIL: $proto expected failure, got $lret" 1>&2 + exit 1 + fi + + if ! ip netns exec "$nsrouter" nft delete table "$proto" blackh; then + echo "FAIL: $proto: Could not delete blackh table" + exit 1 + fi + + echo "PASS: $proto: statement with no listener results in packet drop" +} + +nf_queue_wait() +{ + local procfile="/proc/self/net/netfilter/nfnetlink_queue" + local netns id + + netns="$1" + id="$2" + + # if this file doesn't exist, nfnetlink_module isn't loaded. + # rather than loading it ourselves, wait for kernel module autoload + # completion, nfnetlink should do so automatically because nf_queue + # helper program, spawned in the background, asked for this functionality. + test -f "$procfile" && + ip netns exec "$netns" cat "$procfile" | grep -q "^ *$id " +} + +test_queue() +{ + local expected="$1" + local last="" + + # spawn nf_queue listeners + ip netns exec "$nsrouter" ./nf_queue -c -q 0 -t $timeout > "$TMPFILE0" & + ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t $timeout > "$TMPFILE1" & + + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 0 + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1 + + if ! test_ping;then + echo "FAIL: netns routing/connectivity with active listener on queues 0 and 1: $ret" 1>&2 + exit $ret + fi + + if ! test_ping_router;then + echo "FAIL: netns router unreachable listener on queue 0 and 1: $ret" 1>&2 + exit $ret + fi + + wait + ret=$? + + for file in $TMPFILE0 $TMPFILE1; do + last=$(tail -n1 "$file") + if [ x"$last" != x"$expected packets total" ]; then + echo "FAIL: Expected $expected packets total, but got $last" 1>&2 + ip netns exec "$nsrouter" nft list ruleset + exit 1 + fi + done + + echo "PASS: Expected and received $last" +} + +listener_ready() +{ + ss -N "$1" -lnt -o "sport = :12345" | grep -q 12345 +} + +test_tcp_forward() +{ + ip netns exec "$nsrouter" ./nf_queue -q 2 -t "$timeout" & + local nfqpid=$! + + timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & + local rpid=$! + + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" + + ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null + + wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain" +} + +test_tcp_localhost() +{ + dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT" + timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & + local rpid=$! + + ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" & + local nfqpid=$! + + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" + + ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null + + wait "$rpid" && echo "PASS: tcp via loopback" + wait 2>/dev/null +} + +test_tcp_localhost_connectclose() +{ + ip netns exec "$nsrouter" ./connect_close -p 23456 -t "$timeout" & + ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" & + + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3 + + wait && echo "PASS: tcp via loopback with connect/close" + wait 2>/dev/null +} + +test_tcp_localhost_requeue() +{ +ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet filter { + chain output { + type filter hook output priority 0; policy accept; + tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0 + } + chain post { + type filter hook postrouting priority 0; policy accept; + tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0 + } +} +EOF + timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & + local rpid=$! + + ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t "$timeout" > "$TMPFILE2" & + + # nfqueue 1 will be called via output hook. But this time, + # re-queue the packet to nfqueue program on queue 2. + ip netns exec "$nsrouter" ./nf_queue -G -d 150 -c -q 0 -Q 1 -t "$timeout" > "$TMPFILE3" & + + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" + ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" > /dev/null + + wait + + if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then + echo "FAIL: lost packets during requeue?!" 1>&2 + return + fi + + echo "PASS: tcp via loopback and re-queueing" +} + +test_icmp_vrf() { + if ! ip -net "$ns1" link add tvrf type vrf table 9876;then + echo "SKIP: Could not add vrf device" + return + fi + + ip -net "$ns1" li set eth0 master tvrf + ip -net "$ns1" li set tvrf up + + ip -net "$ns1" route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876 +ip netns exec "$ns1" nft -f /dev/stdin <<EOF +flush ruleset +table inet filter { + chain output { + type filter hook output priority 0; policy accept; + meta oifname "tvrf" icmp type echo-request counter queue num 1 + meta oifname "eth0" icmp type echo-request counter queue num 1 + } + chain post { + type filter hook postrouting priority 0; policy accept; + meta oifname "tvrf" icmp type echo-request counter queue num 1 + meta oifname "eth0" icmp type echo-request counter queue num 1 + } +} +EOF + ip netns exec "$ns1" ./nf_queue -q 1 -t "$timeout" & + local nfqpid=$! + + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1 + + ip netns exec "$ns1" ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null + + for n in output post; do + for d in tvrf eth0; do + if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then + echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2 + ip netns exec "$ns1" nft list ruleset + ret=1 + return + fi + done + done + + wait "$nfqpid" && echo "PASS: icmp+nfqueue via vrf" + wait 2>/dev/null +} + +ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + +load_ruleset "filter" 0 + +if test_ping; then + # queue bypass works (rules were skipped, no listener) + echo "PASS: ${ns1} can reach ${ns2}" +else + echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2 + exit $ret +fi + +test_queue_blackhole ip +test_queue_blackhole ip6 + +# dummy ruleset to add base chains between the +# queueing rules. We don't want the second reinject +# to re-execute the old hooks. +load_counter_ruleset 10 + +# we are hooking all: prerouting/input/forward/output/postrouting. +# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so: +# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply). +# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply. +# so we expect that userspace program receives 10 packets. +test_queue 10 + +# same. We queue to a second program as well. +load_ruleset "filter2" 20 +test_queue 20 + +test_tcp_forward +test_tcp_localhost +test_tcp_localhost_connectclose +test_tcp_localhost_requeue +test_icmp_vrf + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_synproxy.sh b/tools/testing/selftests/net/netfilter/nft_synproxy.sh new file mode 100755 index 0000000000..293f667a6a --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_synproxy.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +ret=0 + +checktool "nft --version" "run test without nft tool" +checktool "iperf3 --version" "run test without iperf3" + +setup_ns nsr ns1 ns2 + +modprobe -q nf_conntrack + +cleanup() { + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + + cleanup_all_ns +} + +trap cleanup EXIT + +ip link add veth0 netns "$nsr" type veth peer name eth0 netns "$ns1" +ip link add veth1 netns "$nsr" type veth peer name eth0 netns "$ns2" + +for dev in veth0 veth1; do + ip -net "$nsr" link set "$dev" up +done + +ip -net "$nsr" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsr" addr add 10.0.2.1/24 dev veth1 + +ip netns exec "$nsr" sysctl -q net.ipv4.conf.veth0.forwarding=1 +ip netns exec "$nsr" sysctl -q net.ipv4.conf.veth1.forwarding=1 +ip netns exec "$nsr" sysctl -q net.netfilter.nf_conntrack_tcp_loose=0 + +for n in $ns1 $ns2; do + ip -net "$n" link set eth0 up +done +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns2" route add default via 10.0.2.1 + +# test basic connectivity +if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then + echo "ERROR: $ns1 cannot reach $ns2" 1>&2 + exit 1 +fi + +if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then + echo "ERROR: $ns2 cannot reach $ns1" 1>&2 + exit 1 +fi + +ip netns exec "$ns2" iperf3 -s > /dev/null 2>&1 & +# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 & + +sleep 1 + +ip netns exec "$nsr" nft -f - <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority -300; policy accept; + meta iif veth0 tcp flags syn counter notrack + } + + chain forward { + type filter hook forward priority 0; policy accept; + + ct state new,established counter accept + + meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp + + ct state invalid counter drop + + # make ns2 unreachable w.o. tcp synproxy + tcp flags syn counter drop + } +} +EOF +if [ $? -ne 0 ]; then + echo "SKIP: Cannot add nft synproxy" + exit $ksft_skip +fi + +if ! ip netns exec "$ns1" timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null; then + echo "FAIL: iperf3 returned an error" 1>&2 + ret=1 + ip netns exec "$nsr" nft list ruleset +else + echo "PASS: synproxy connection successful" +fi + +exit $ret diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/net/netfilter/nft_zones_many.sh index 5a8db0b489..7db9982ba5 100755 --- a/tools/testing/selftests/netfilter/nft_zones_many.sh +++ b/tools/testing/selftests/net/netfilter/nft_zones_many.sh @@ -3,47 +3,34 @@ # Test insertion speed for packets with identical addresses/ports # that are all placed in distinct conntrack zones. -sfx=$(mktemp -u "XXXXXXXX") -ns="ns-$sfx" - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh zones=2000 +[ "$KSFT_MACHINE_SLOW" = yes ] && zones=500 + have_ct_tool=0 ret=0 cleanup() { - ip netns del $ns -} - -checktool (){ - if ! $1 > /dev/null 2>&1; then - echo "SKIP: Could not $2" - exit $ksft_skip - fi + cleanup_all_ns } checktool "nft --version" "run test without nft tool" -checktool "ip -Version" "run test without ip tool" checktool "socat -V" "run test without socat tool" -checktool "ip netns add $ns" "create net namespace" + +setup_ns ns1 trap cleanup EXIT -conntrack -V > /dev/null 2>&1 -if [ $? -eq 0 ];then +if conntrack -V > /dev/null 2>&1; then have_ct_tool=1 fi -ip -net "$ns" link set lo up - test_zones() { local max_zones=$1 -ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600 -ip netns exec $ns nft -f /dev/stdin<<EOF +ip netns exec "$ns1" nft -f /dev/stdin<<EOF flush ruleset table inet raw { map rndzone { @@ -56,29 +43,39 @@ table inet raw { } } EOF +if [ "$?" -ne 0 ];then + echo "SKIP: Cannot add nftables rules" + exit $ksft_skip +fi + + ip netns exec "$ns1" sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600 + ( echo "add element inet raw rndzone {" - for i in $(seq 1 $max_zones);do + for i in $(seq 1 "$max_zones");do echo -n "$i : $i" - if [ $i -lt $max_zones ]; then + if [ "$i" -lt "$max_zones" ]; then echo "," else echo "}" fi done - ) | ip netns exec $ns nft -f /dev/stdin + ) | ip netns exec "$ns1" nft -f /dev/stdin local i=0 local j=0 - local outerstart=$(date +%s%3N) - local stop=$outerstart - - while [ $i -lt $max_zones ]; do - local start=$(date +%s%3N) + local outerstart + local stop + outerstart=$(date +%s%3N) + stop=$outerstart + + while [ "$i" -lt "$max_zones" ]; do + local start + start=$(date +%s%3N) i=$((i + 1000)) j=$((j + 1)) # nft rule in output places each packet in a different zone. - dd if=/dev/zero of=/dev/stdout bs=8k count=1000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 + dd if=/dev/zero bs=8k count=1000 2>/dev/null | ip netns exec "$ns1" socat -u STDIN UDP:127.0.0.1:12345,sourceport=12345 if [ $? -ne 0 ] ;then ret=1 break @@ -89,14 +86,15 @@ EOF echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)" done - if [ $have_ct_tool -eq 1 ]; then - local count=$(ip netns exec "$ns" conntrack -C) - local duration=$((stop-outerstart)) + if [ "$have_ct_tool" -eq 1 ]; then + local count duration + count=$(ip netns exec "$ns1" conntrack -C) + duration=$((stop-outerstart)) - if [ $count -eq $max_zones ]; then + if [ "$count" -ge "$max_zones" ]; then echo "PASS: inserted $count entries from packet path in $duration ms total" else - ip netns exec $ns conntrack -S 1>&2 + ip netns exec "$ns1" conntrack -S 1>&2 echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries" ret=1 fi @@ -110,18 +108,19 @@ EOF test_conntrack_tool() { local max_zones=$1 - ip netns exec $ns conntrack -F >/dev/null 2>/dev/null + ip netns exec "$ns1" conntrack -F >/dev/null 2>/dev/null - local outerstart=$(date +%s%3N) - local start=$(date +%s%3N) - local stop=$start - local i=0 - while [ $i -lt $max_zones ]; do + local outerstart start stop i + outerstart=$(date +%s%3N) + start=$(date +%s%3N) + stop="$start" + i=0 + while [ "$i" -lt "$max_zones" ]; do i=$((i + 1)) - ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \ + ip netns exec "$ns1" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1 if [ $? -ne 0 ];then - ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \ + ip netns exec "$ns1" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null echo "FAIL: conntrack -I returned an error" ret=1 @@ -137,13 +136,15 @@ test_conntrack_tool() { fi done - local count=$(ip netns exec "$ns" conntrack -C) - local duration=$((stop-outerstart)) + local count + local duration + count=$(ip netns exec "$ns1" conntrack -C) + duration=$((stop-outerstart)) - if [ $count -eq $max_zones ]; then + if [ "$count" -eq "$max_zones" ]; then echo "PASS: inserted $count entries via ctnetlink in $duration ms" else - ip netns exec $ns conntrack -S 1>&2 + ip netns exec "$ns1" conntrack -S 1>&2 echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)" ret=1 fi @@ -151,7 +152,7 @@ test_conntrack_tool() { test_zones $zones -if [ $have_ct_tool -eq 1 ];then +if [ "$have_ct_tool" -eq 1 ];then test_conntrack_tool $zones else echo "SKIP: Could not run ctnetlink insertion test without conntrack tool" diff --git a/tools/testing/selftests/net/netfilter/packetdrill/common.sh b/tools/testing/selftests/net/netfilter/packetdrill/common.sh new file mode 100755 index 0000000000..ed36d53519 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/common.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# for debugging set net.netfilter.nf_log_all_netns=1 in init_net +# or do not use net namespaces. +modprobe -q nf_conntrack +sysctl -q net.netfilter.nf_conntrack_log_invalid=6 + +# Flush old cached data (fastopen cookies). +ip tcp_metrics flush all > /dev/null 2>&1 + +# TCP min, default, and max receive and send buffer sizes. +sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))" +sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304" + +# TCP congestion control. +sysctl -q net.ipv4.tcp_congestion_control=cubic + +# TCP slow start after idle. +sysctl -q net.ipv4.tcp_slow_start_after_idle=0 + +# TCP Explicit Congestion Notification (ECN) +sysctl -q net.ipv4.tcp_ecn=0 + +sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1 + +# Override the default qdisc on the tun device. +# Many tests fail with timing errors if the default +# is FQ and that paces their flows. +tc qdisc add dev tun0 root pfifo + +# Enable conntrack +$xtables -A INPUT -m conntrack --ctstate NEW -p tcp --syn diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt new file mode 100644 index 0000000000..d755bd64c5 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt @@ -0,0 +1,118 @@ +// check that already-acked (retransmitted) packet is let through rather +// than tagged as INVALID. + +`packetdrill/common.sh` + +// should set -P DROP but it disconnects VM w.o. extra netns ++0 `$xtables -A INPUT -m conntrack --ctstate INVALID -j DROP` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 10) = 0 + ++0 < S 0:0(0) win 32792 <mss 1000> ++0 > S. 0:0(0) ack 1 <mss 1460> ++.01 < . 1:1(0) ack 1 win 65535 ++0 accept(3, ..., ...) = 4 + ++0.0001 < P. 1:1461(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 1461 win 65535 ++0.0001 < P. 1461:2921(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 2921 win 65535 ++0.0001 < P. 2921:4381(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 4381 win 65535 ++0.0001 < P. 4381:5841(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 5841 win 65535 ++0.0001 < P. 5841:7301(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 7301 win 65535 ++0.0001 < P. 7301:8761(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 8761 win 65535 ++0.0001 < P. 8761:10221(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 10221 win 65535 ++0.0001 < P. 10221:11681(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 11681 win 65535 ++0.0001 < P. 11681:13141(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 13141 win 65535 ++0.0001 < P. 13141:14601(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 14601 win 65535 ++0.0001 < P. 14601:16061(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 16061 win 65535 ++0.0001 < P. 16061:17521(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 17521 win 65535 ++0.0001 < P. 17521:18981(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 18981 win 65535 ++0.0001 < P. 18981:20441(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 20441 win 65535 ++0.0001 < P. 20441:21901(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 21901 win 65535 ++0.0001 < P. 21901:23361(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 23361 win 65535 ++0.0001 < P. 23361:24821(1460) ack 1 win 257 +0.055 > . 1:1(0) ack 24821 win 65535 ++0.0001 < P. 24821:26281(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 26281 win 65535 ++0.0001 < P. 26281:27741(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 27741 win 65535 ++0.0001 < P. 27741:29201(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 29201 win 65535 ++0.0001 < P. 29201:30661(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 30661 win 65535 ++0.0001 < P. 30661:32121(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 32121 win 65535 ++0.0001 < P. 32121:33581(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 33581 win 65535 ++0.0001 < P. 33581:35041(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 35041 win 65535 ++0.0001 < P. 35041:36501(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 36501 win 65535 ++0.0001 < P. 36501:37961(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 37961 win 65535 ++0.0001 < P. 37961:39421(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 39421 win 65535 ++0.0001 < P. 39421:40881(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 40881 win 65535 ++0.0001 < P. 40881:42341(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 42341 win 65535 ++0.0001 < P. 42341:43801(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 43801 win 65535 ++0.0001 < P. 43801:45261(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 45261 win 65535 ++0.0001 < P. 45261:46721(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 46721 win 65535 ++0.0001 < P. 46721:48181(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 48181 win 65535 ++0.0001 < P. 48181:49641(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 49641 win 65535 ++0.0001 < P. 49641:51101(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 51101 win 65535 ++0.0001 < P. 51101:52561(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 52561 win 65535 ++0.0001 < P. 52561:54021(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 54021 win 65535 ++0.0001 < P. 54021:55481(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 55481 win 65535 ++0.0001 < P. 55481:56941(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 56941 win 65535 ++0.0001 < P. 56941:58401(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 58401 win 65535 ++0.0001 < P. 58401:59861(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 59861 win 65535 ++0.0001 < P. 59861:61321(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 61321 win 65535 ++0.0001 < P. 61321:62781(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 62781 win 65535 ++0.0001 < P. 62781:64241(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 64241 win 65535 ++0.0001 < P. 64241:65701(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 65701 win 65535 ++0.0001 < P. 65701:67161(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 67161 win 65535 + +// nf_ct_proto_6: SEQ is under the lower bound (already ACKed data retransmitted) IN=tun0 OUT= MAC= SRC=192.0.2.1 DST=192.168.24.72 LEN=1500 TOS=0x00 PREC=0x00 TTL=255 ID=0 PROTO=TCP SPT=34375 DPT=8080 SEQ=1 ACK=4162510439 WINDOW=257 RES=0x00 ACK PSH URGP=0 ++0.0001 < P. 1:1461(1460) ack 1 win 257 + +// only sent if above packet isn't flagged as invalid ++.0 > . 1:1(0) ack 67161 win 65535 + ++0 `$xtables -D INPUT -m conntrack --ctstate INVALID -j DROP` diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt new file mode 100644 index 0000000000..dccdd4c009 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt @@ -0,0 +1,62 @@ +// check RST packet that doesn't exactly match expected next sequence +// number still transitions conntrack state to CLOSE iff its already in +// FIN/CLOSE_WAIT. + +`packetdrill/common.sh` + +// 5.771921 server_ip > client_ip TLSv1.2 337 [Packet size limited during capture] +// 5.771994 server_ip > client_ip TLSv1.2 337 [Packet size limited during capture] +// 5.772212 client_ip > server_ip TCP 66 45020 > 443 [ACK] Seq=1905874048 Ack=781810658 Win=36352 Len=0 TSval=3317842872 TSecr=675936334 +// 5.787924 server_ip > client_ip TLSv1.2 1300 [Packet size limited during capture] +// 5.788126 server_ip > client_ip TLSv1.2 90 Application Data +// 5.788207 server_ip > client_ip TCP 66 443 > 45020 [FIN, ACK] Seq=781811916 Ack=1905874048 Win=31104 Len=0 TSval=675936350 TSecr=3317842872 +// 5.788447 client_ip > server_ip TLSv1.2 90 Application Data +// 5.788479 client_ip > server_ip TCP 66 45020 > 443 [RST, ACK] Seq=1905874072 Ack=781811917 Win=39040 Len=0 TSval=3317842889 TSecr=675936350 +// 5.788581 server_ip > client_ip TCP 54 8443 > 45020 [RST] Seq=781811892 Win=0 Len=0 + ++0 `iptables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP` ++0 `iptables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> + ++0.1 < S. 1:1(0) ack 1 win 65535 <mss 1460> + ++0 > . 1:1(0) ack 1 win 65535 ++0 < . 1:1001(1000) ack 1 win 65535 ++0 < . 1001:2001(1000) ack 1 win 65535 ++0 < . 2001:3001(1000) ack 1 win 65535 + ++0 > . 1:1(0) ack 1001 win 65535 ++0 > . 1:1(0) ack 2001 win 65535 ++0 > . 1:1(0) ack 3001 win 65535 + ++0 write(3, ..., 1000) = 1000 + ++0.0 > P. 1:1001(1000) ack 3001 win 65535 + ++0.1 read(3, ..., 1000) = 1000 + +// Conntrack should move to FIN_WAIT, then CLOSE_WAIT. ++0 < F. 3001:3001(0) ack 1001 win 65535 ++0 > . 1001:1001(0) ack 3002 win 65535 + ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE_WAIT` + ++1 close(3) = 0 +// RST: unread data. FIN was seen, hence ack + 1 ++0 > R. 1001:1001(0) ack 3002 win 65535 +// ... and then, CLOSE. ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE\ ` + +// Spurious RST from peer -- no sk state. Should NOT get +// marked INVALID, because conntrack is already closing. ++0.1 < R 2001:2001(0) win 0 + +// No packets should have been marked INVALID ++0 `iptables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"` ++0 `iptables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"` diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt new file mode 100644 index 0000000000..686f18a3d9 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt @@ -0,0 +1,59 @@ +// check that out of window resets are marked as INVALID and conntrack remains +// in ESTABLISHED state. + +`packetdrill/common.sh` + ++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP` ++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> + ++0.1 < S. 1:1(0) ack 1 win 65535 <mss 1460> + ++0 > . 1:1(0) ack 1 win 65535 ++0 < . 1:1001(1000) ack 1 win 65535 ++0 < . 1001:2001(1000) ack 1 win 65535 ++0 < . 2001:3001(1000) ack 1 win 65535 + ++0 > . 1:1(0) ack 1001 win 65535 ++0 > . 1:1(0) ack 2001 win 65535 ++0 > . 1:1(0) ack 3001 win 65535 + ++0 write(3, ..., 1000) = 1000 + +// out of window ++0.0 < R 0:0(0) win 0 ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED` + +// out of window ++0.0 < R 1000000:1000000(0) win 0 ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED` + +// in-window but not exact match ++0.0 < R 42:42(0) win 0 ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED` + ++0.0 > P. 1:1001(1000) ack 3001 win 65535 + ++0.1 read(3, ..., 1000) = 1000 ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED` + ++0 < . 3001:3001(0) ack 1001 win 65535 + ++0.0 < R. 3000:3000(0) ack 1001 win 0 ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED` + +// exact next sequence ++0.0 < R. 3001:3001(0) ack 1001 win 0 +// Conntrack should move to CLOSE + +// Expect four invalid RSTs ++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 4 "` ++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"` + ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE\ ` diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt new file mode 100644 index 0000000000..3442cd29bc --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt @@ -0,0 +1,44 @@ +// Check connection re-use, i.e. peer that receives the SYN answers with +// a challenge-ACK. +// Check that conntrack lets all packets pass, including the challenge ack, +// and that a new connection is established. + +`packetdrill/common.sh` + +// S > +// . < (challnge-ack) +// R. > +// S > +// S. < +// Expected outcome: established connection. + ++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP` ++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) +0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> + +// Challenge ACK, old incarnation. +0.1 < . 145824453:145824453(0) ack 643160523 win 240 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0> + ++0.01 > R 643160523:643160523(0) win 0 + ++0.01 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT` + +// Must go through. ++0.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> + +// correct synack ++0.1 < S. 0:0(0) ack 1 win 250 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0> + +// 3whs completes. ++0.01 > . 1:1(0) ack 1 win 256 <nop,nop,TS val 1 ecr 1> + ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ESTABLISHED | grep -q ASSURED` + +// No packets should have been marked INVALID ++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"` ++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"` diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt new file mode 100644 index 0000000000..3047160c4b --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt @@ -0,0 +1,51 @@ +// Check conntrack copes with syn/ack reply for a previous, old incarnation. + +// tcpdump with buggy sequence +// 10.176.25.8.829 > 10.192.171.30.2049: Flags [S], seq 2375731741, win 29200, options [mss 1460,sackOK,TS val 2083107423 ecr 0,nop,wscale 7], length 0 +// OLD synack, for old/previous S +// 10.192.171.30.2049 > 10.176.25.8.829: Flags [S.], seq 145824453, ack 643160523, win 65535, options [mss 8952,nop,wscale 5,TS val 3215437785 ecr 2082921663,nop,nop], length 0 +// This reset never makes it to the endpoint, elided in the packetdrill script +// 10.192.171.30.2049 > 10.176.25.8.829: Flags [R.], seq 1, ack 1, win 65535, options [mss 8952,nop,wscale 5,TS val 3215443451 ecr 2082921663,nop,nop], length 0 +// Syn retransmit, no change +// 10.176.25.8.829 > 10.192.171.30.2049: Flags [S], seq 2375731741, win 29200, options [mss 1460,sackOK,TS val 2083115583 ecr 0,nop,wscale 7], length 0 +// CORRECT synack, should be accepted, but conntrack classified this as INVALID: +// SEQ is over the upper bound (over the window of the receiver) IN=tun0 OUT= MAC= SRC=192.0.2.1 DST=192.168.37.78 LEN=40 TOS=0x00 PREC=0x00 TTL=255 ID=0 PROTO=TCP SPT=8080 DPT=34500 SEQ=162602411 ACK=2124350315 .. +// 10.192.171.30.2049 > 10.176.25.8.829: Flags [S.], seq 162602410, ack 2375731742, win 65535, options [mss 8952,nop,wscale 5,TS val 3215445754 ecr 2083115583,nop,nop], length 0 + +`packetdrill/common.sh` + ++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP` ++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) +0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> + +// bogus/outdated synack, invalid ack value +0.1 < S. 145824453:145824453(0) ack 643160523 win 240 <mss 1440,nop,nop,TS val 1 ecr 1,nop,wscale 0> + +// syn retransmitted +1.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1015 ecr 0,nop,wscale 8> ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT` + +// correct synack ++0 < S. 145758918:145758918(0) ack 1 win 250 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0> ++0 write(3, ..., 1) = 1 + +// with buggy conntrack above packet is dropped, so SYN rtx is seen: +// script packet: 1.054007 . 1:1(0) ack 16777958 win 256 <nop,nop,TS val 1033 ecr 1> +// actual packet: 3.010000 S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1015 ecr 0,nop,wscale 8> ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ESTABLISHED | grep -q ASSURED` + ++0 > P. 1:2(1) ack 4294901762 win 256 <nop,nop,TS val 1067 ecr 1> + ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ASSURED | grep -q ESTABLISHED` + +// No packets should have been marked INVALID in OUTPUT direction, 1 in INPUT ++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"` ++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 1 "` + ++0 `$xtables -D INPUT -p tcp -m conntrack --ctstate INVALID -j DROP` ++0 `$xtables -D OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP` diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt new file mode 100644 index 0000000000..842242f8cc --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt @@ -0,0 +1,34 @@ +// Check reception of another SYN while we have an established conntrack state. +// Challenge ACK is supposed to pass through, RST reply should clear conntrack +// state and SYN retransmit should give us new 'SYN_RECV' connection state. + +`packetdrill/common.sh` + +// should show a match if bug is present: ++0 `iptables -A INPUT -m conntrack --ctstate INVALID -p tcp --tcp-flags SYN,ACK SYN,ACK` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 10) = 0 + ++0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7, TS val 1 ecr 0,nop,nop> ++0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,TS val 100 ecr 1,nop,wscale 8> ++.01 < . 1:1(0) ack 1 win 257 <TS val 1 ecr 100,nop,nop> ++0 accept(3, ..., ...) = 4 + ++0 < P. 1:101(100) ack 1 win 257 <TS val 2 ecr 100,nop,nop> ++.001 > . 1:1(0) ack 101 win 256 <nop,nop,TS val 110 ecr 2> ++0 read(4, ..., 101) = 100 + +1.0 < S 2000:2000(0) win 32792 <mss 1000,nop,wscale 7, TS val 233 ecr 0,nop,nop> +// Won't expect this: challenge ack. + ++0 > . 1:1(0) ack 101 win 256 <nop,nop,TS val 112 ecr 2> ++0 < R. 101:101(0) ack 1 win 257 ++0 close(4) = 0 + +1.5 < S 2000:2000(0) win 32792 <mss 1000,nop,wscale 0, TS val 233 ecr 0,nop,nop> + ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep -q SYN_RECV` ++0 `iptables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"` diff --git a/tools/testing/selftests/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh index 5289c8447a..4485fd7675 100755 --- a/tools/testing/selftests/netfilter/rpath.sh +++ b/tools/testing/selftests/net/netfilter/rpath.sh @@ -64,12 +64,18 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad # firewall matches to test [ -n "$iptables" ] && { common='-t raw -A PREROUTING -s 192.168.0.0/16' - ip netns exec "$ns2" "$iptables" $common -m rpfilter + if ! ip netns exec "$ns2" "$iptables" $common -m rpfilter;then + echo "Cannot add rpfilter rule" + exit $ksft_skip + fi ip netns exec "$ns2" "$iptables" $common -m rpfilter --invert } [ -n "$ip6tables" ] && { common='-t raw -A PREROUTING -s fec0::/16' - ip netns exec "$ns2" "$ip6tables" $common -m rpfilter + if ! ip netns exec "$ns2" "$ip6tables" $common -m rpfilter;then + echo "Cannot add rpfilter rule" + exit $ksft_skip + fi ip netns exec "$ns2" "$ip6tables" $common -m rpfilter --invert } [ -n "$nft" ] && ip netns exec "$ns2" $nft -f - <<EOF diff --git a/tools/testing/selftests/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c index 21bb1cfd8a..21bb1cfd8a 100644 --- a/tools/testing/selftests/netfilter/sctp_collision.c +++ b/tools/testing/selftests/net/netfilter/sctp_collision.c diff --git a/tools/testing/selftests/net/netfilter/settings b/tools/testing/selftests/net/netfilter/settings new file mode 100644 index 0000000000..abc5648b59 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/settings @@ -0,0 +1 @@ +timeout=1800 diff --git a/tools/testing/selftests/netfilter/xt_string.sh b/tools/testing/selftests/net/netfilter/xt_string.sh index 1802653a47..8d401c69e3 100755 --- a/tools/testing/selftests/netfilter/xt_string.sh +++ b/tools/testing/selftests/net/netfilter/xt_string.sh @@ -5,53 +5,57 @@ ksft_skip=4 rc=0 -if ! iptables --version >/dev/null 2>&1; then - echo "SKIP: Test needs iptables" - exit $ksft_skip -fi -if ! ip -V >/dev/null 2>&1; then - echo "SKIP: Test needs iproute2" - exit $ksft_skip -fi -if ! nc -h >/dev/null 2>&1; then - echo "SKIP: Test needs netcat" - exit $ksft_skip -fi +source lib.sh + +checktool "socat -h" "run test without socat" +checktool "iptables --version" "test needs iptables" + +infile=$(mktemp) + +cleanup() +{ + ip netns del "$netns" + rm -f "$infile" +} + +trap cleanup EXIT + +setup_ns netns + +ip -net "$netns" link add d0 type dummy +ip -net "$netns" link set d0 up +ip -net "$netns" addr add 10.1.2.1/24 dev d0 pattern="foo bar baz" patlen=11 hdrlen=$((20 + 8)) # IPv4 + UDP -ns="ns-$(mktemp -u XXXXXXXX)" -trap 'ip netns del $ns' EXIT -ip netns add "$ns" -ip -net "$ns" link add d0 type dummy -ip -net "$ns" link set d0 up -ip -net "$ns" addr add 10.1.2.1/24 dev d0 - -#ip netns exec "$ns" tcpdump -npXi d0 & + +#ip netns exec "$netns" tcpdump -npXi d0 & #tcpdump_pid=$! -#trap 'kill $tcpdump_pid; ip netns del $ns' EXIT +#trap 'kill $tcpdump_pid; ip netns del $netns' EXIT add_rule() { # (alg, from, to) - ip netns exec "$ns" \ + ip netns exec "$netns" \ iptables -A OUTPUT -o d0 -m string \ - --string "$pattern" --algo $1 --from $2 --to $3 + --string "$pattern" --algo "$1" --from "$2" --to "$3" } showrules() { # () - ip netns exec "$ns" iptables -v -S OUTPUT | grep '^-A' + ip netns exec "$netns" iptables -v -S OUTPUT | grep '^-A' } zerorules() { - ip netns exec "$ns" iptables -Z OUTPUT + ip netns exec "$netns" iptables -Z OUTPUT } countrule() { # (pattern) showrules | grep -c -- "$*" } send() { # (offset) - ( for ((i = 0; i < $1 - $hdrlen; i++)); do - printf " " + ( for ((i = 0; i < $1 - hdrlen; i++)); do + echo -n " " done - printf "$pattern" - ) | ip netns exec "$ns" nc -w 1 -u 10.1.2.2 27374 + echo -n "$pattern" + ) > "$infile" + + ip netns exec "$netns" socat -t 1 -u STDIN UDP-SENDTO:10.1.2.2:27374 < "$infile" } add_rule bm 1000 1500 @@ -61,8 +65,8 @@ add_rule kmp 1400 1600 zerorules send 0 -send $((1000 - $patlen)) -if [ $(countrule -c 0 0) -ne 4 ]; then +send $((1000 - patlen)) +if [ "$(countrule -c 0 0)" -ne 4 ]; then echo "FAIL: rules match data before --from" showrules ((rc--)) @@ -70,16 +74,16 @@ fi zerorules send 1000 -send $((1400 - $patlen)) -if [ $(countrule -c 2) -ne 2 ]; then +send $((1400 - patlen)) +if [ "$(countrule -c 2)" -ne 2 ]; then echo "FAIL: only two rules should match at low offset" showrules ((rc--)) fi zerorules -send $((1500 - $patlen)) -if [ $(countrule -c 1) -ne 4 ]; then +send $((1500 - patlen)) +if [ "$(countrule -c 1)" -ne 4 ]; then echo "FAIL: all rules should match at end of packet" showrules ((rc--)) @@ -87,7 +91,7 @@ fi zerorules send 1495 -if [ $(countrule -c 1) -ne 1 ]; then +if [ "$(countrule -c 1)" -ne 1 ]; then echo "FAIL: only kmp with proper --to should match pattern spanning fragments" showrules ((rc--)) @@ -95,23 +99,23 @@ fi zerorules send 1500 -if [ $(countrule -c 1) -ne 2 ]; then +if [ "$(countrule -c 1)" -ne 2 ]; then echo "FAIL: two rules should match pattern at start of second fragment" showrules ((rc--)) fi zerorules -send $((1600 - $patlen)) -if [ $(countrule -c 1) -ne 2 ]; then +send $((1600 - patlen)) +if [ "$(countrule -c 1)" -ne 2 ]; then echo "FAIL: two rules should match pattern at end of largest --to" showrules ((rc--)) fi zerorules -send $((1600 - $patlen + 1)) -if [ $(countrule -c 1) -ne 0 ]; then +send $((1600 - patlen + 1)) +if [ "$(countrule -c 1)" -ne 0 ]; then echo "FAIL: no rules should match pattern extending largest --to" showrules ((rc--)) @@ -119,10 +123,11 @@ fi zerorules send 1600 -if [ $(countrule -c 1) -ne 0 ]; then +if [ "$(countrule -c 1)" -ne 0 ]; then echo "FAIL: no rule should match pattern past largest --to" showrules ((rc--)) fi +[ $rc -eq 0 ] && echo "PASS: string match tests" exit $rc diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py new file mode 100755 index 0000000000..93d9d91452 --- /dev/null +++ b/tools/testing/selftests/net/nl_netdev.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import time +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_eq, ksft_ge, ksft_busy_wait +from lib.py import NetdevFamily, NetdevSimDev, ip + + +def empty_check(nf) -> None: + devs = nf.dev_get({}, dump=True) + ksft_ge(len(devs), 1) + + +def lo_check(nf) -> None: + lo_info = nf.dev_get({"ifindex": 1}) + ksft_eq(len(lo_info['xdp-features']), 0) + ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0) + + +def page_pool_check(nf) -> None: + with NetdevSimDev() as nsimdev: + nsim = nsimdev.nsims[0] + + def up(): + ip(f"link set dev {nsim.ifname} up") + + def down(): + ip(f"link set dev {nsim.ifname} down") + + def get_pp(): + pp_list = nf.page_pool_get({}, dump=True) + return [pp for pp in pp_list if pp.get("ifindex") == nsim.ifindex] + + # No page pools when down + down() + ksft_eq(len(get_pp()), 0) + + # Up, empty page pool appears + up() + pp_list = get_pp() + ksft_ge(len(pp_list), 0) + refs = sum([pp["inflight"] for pp in pp_list]) + ksft_eq(refs, 0) + + # Down, it disappears, again + down() + pp_list = get_pp() + ksft_eq(len(pp_list), 0) + + # Up, allocate a page + up() + nsim.dfs_write("pp_hold", "y") + pp_list = nf.page_pool_get({}, dump=True) + refs = sum([pp["inflight"] for pp in pp_list if pp.get("ifindex") == nsim.ifindex]) + ksft_ge(refs, 1) + + # Now let's leak a page + down() + pp_list = get_pp() + ksft_eq(len(pp_list), 1) + refs = sum([pp["inflight"] for pp in pp_list]) + ksft_eq(refs, 1) + attached = [pp for pp in pp_list if "detach-time" not in pp] + ksft_eq(len(attached), 0) + + # New pp can get created, and we'll have two + up() + pp_list = get_pp() + attached = [pp for pp in pp_list if "detach-time" not in pp] + detached = [pp for pp in pp_list if "detach-time" in pp] + ksft_eq(len(attached), 1) + ksft_eq(len(detached), 1) + + # Free the old page and the old pp is gone + nsim.dfs_write("pp_hold", "n") + # Freeing check is once a second so we may need to retry + ksft_busy_wait(lambda: len(get_pp()) == 1, deadline=2) + + # And down... + down() + ksft_eq(len(get_pp()), 0) + + # Last, leave the page hanging for destroy, nothing to check + # we're trying to exercise the orphaning path in the kernel + up() + nsim.dfs_write("pp_hold", "y") + + +def main() -> None: + nf = NetdevFamily() + ksft_run([empty_check, lo_check, page_pool_check], + args=(nf, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 8b12071876..9f8dec2f65 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -489,7 +489,7 @@ class ovsactions(nla): actstr, reason = parse_extract_field( actstr, "drop(", - "([0-9]+)", + r"([0-9]+)", lambda x: int(x, 0), False, None, @@ -502,9 +502,9 @@ class ovsactions(nla): actstr = actstr[len("drop"): ] return (totallen - len(actstr)) - elif parse_starts_block(actstr, "^(\d+)", False, True): + elif parse_starts_block(actstr, r"^(\d+)", False, True): actstr, output = parse_extract_field( - actstr, None, "(\d+)", lambda x: int(x), False, "0" + actstr, None, r"(\d+)", lambda x: int(x), False, "0" ) self["attrs"].append(["OVS_ACTION_ATTR_OUTPUT", output]) parsed = True @@ -512,7 +512,7 @@ class ovsactions(nla): actstr, recircid = parse_extract_field( actstr, "recirc(", - "([0-9a-fA-Fx]+)", + r"([0-9a-fA-Fx]+)", lambda x: int(x, 0), False, 0, @@ -588,17 +588,17 @@ class ovsactions(nla): actstr = actstr[3:] actstr, ip_block_min = parse_extract_field( - actstr, "=", "([0-9a-fA-F\.]+)", str, False + actstr, "=", r"([0-9a-fA-F\.]+)", str, False ) actstr, ip_block_max = parse_extract_field( - actstr, "-", "([0-9a-fA-F\.]+)", str, False + actstr, "-", r"([0-9a-fA-F\.]+)", str, False ) actstr, proto_min = parse_extract_field( - actstr, ":", "(\d+)", int, False + actstr, ":", r"(\d+)", int, False ) actstr, proto_max = parse_extract_field( - actstr, "-", "(\d+)", int, False + actstr, "-", r"(\d+)", int, False ) if t is not None: diff --git a/tools/testing/selftests/bpf/progs/sample_map_ret0.c b/tools/testing/selftests/net/sample_map_ret0.bpf.c index 495990d355..43ca925949 100644 --- a/tools/testing/selftests/bpf/progs/sample_map_ret0.c +++ b/tools/testing/selftests/net/sample_map_ret0.bpf.c @@ -17,7 +17,7 @@ struct { } array SEC(".maps"); /* Sample program which should always load for testing control paths. */ -SEC(".text") int func() +SEC("xdp") int func() { __u64 key64 = 0; __u32 key = 0; diff --git a/tools/testing/selftests/bpf/progs/sample_ret0.c b/tools/testing/selftests/net/sample_ret0.bpf.c index fec99750d6..1df5ca98bb 100644 --- a/tools/testing/selftests/bpf/progs/sample_ret0.c +++ b/tools/testing/selftests/net/sample_ret0.bpf.c @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ +#define SEC(name) __attribute__((section(name), used)) + /* Sample program which should always load for testing control paths. */ +SEC("xdp") int func() { return 0; diff --git a/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh new file mode 100755 index 0000000000..e23210aa54 --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh @@ -0,0 +1,335 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Jianguo Wu <wujianguo@chinatelecom.cn> +# +# Mostly copied from tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh. +# +# This script is designed for testing the support of netfilter hooks for +# SRv6 End.DX4 behavior. +# +# Hereafter a network diagram is shown, where one tenants (named 100) offer +# IPv4 L3 VPN services allowing hosts to communicate with each other across +# an IPv6 network. +# +# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DX4 behavior. +# +# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-1 pings +# the host hs-2. +# +# First of all, L2 reachability of the host hs-2 is taken into account by +# the router rt-1 which acts as an arp proxy. +# +# When the host hs-1 sends an IPv4 packet destined to hs-2, the router rt-1 +# receives the packet on the internal veth-t100 interface, rt-1 contains the +# SRv6 Encap route for encapsulating the IPv4 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and +# routs the packet to the specified nexthop. Afterwards, the packet is sent to +# the host hs-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a +# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING +# hooks. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-1 netns | | hs-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | 10.0.0.11/24 | +----------+ | | +----------+ | 10.0.0.22/24 | | +# | +-------+-------+ | route | | | | route | +-------+-------- | +# | | table | | | | table | | +# | +----------+ | | +----------+ | +# | +--------------+ | | +--------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | | +# | +--------------+ | | +--------------+ | +# | | | | +# | rt-1 netns | | rt-2 netns | +# | | | | +# +-----------------------------------+ +-----------------------------------+ +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table +# +----------------------------------------------------------------+ +# |SID |Action | +# +----------------------------------------------------------------+ +# |fc00:21:100::6004|apply SRv6 End.DX4 nh4 10.0.0.1 dev veth-t100 | +# +----------------------------------------------------------------+ +# +# rt-1: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table +# +---------------------------------------------------------------+ +# |SID |Action | +# +---------------------------------------------------------------+ +# |fc00:12:100::6004|apply SRv6 End.DX4 nh4 10.0.0.2 dev veth-t100| +# +---------------------------------------------------------------+ +# +# rt-2: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6004| +# +---------------------------------------------------+ +# |10.0.0.0/24|forward to dev veth_t100 | +# +---------------------------------------------------+ +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +readonly IPv6_RT_NETWORK=2001:11 +readonly IPv4_HS_NETWORK=10.0.0 +readonly SID_LOCATOR=fc00 + +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_rt_netfilter() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1 + ip netns exec ${nsname} iptables -t raw -A PREROUTING -m rpfilter --invert -j DROP +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.${rt}${hs}/24 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1 +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004 + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \ + via 2001:11::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \ + encap seg6local action End.DX4 nh4 ${IPv4_HS_NETWORK}.${hsdst} dev veth-t${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 + setup_hs 2 2 100 + + # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})" +} + +host_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +router_netfilter_tests() +{ + log_section "SRv6 VPN connectivity test with netfilter enabled in routers" + setup_rt_netfilter 1 + setup_rt_netfilter 2 + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup &>/dev/null + +setup + +host_tests +router_netfilter_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} diff --git a/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh new file mode 100755 index 0000000000..9e69a2ed5b --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh @@ -0,0 +1,340 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Jianguo Wu <wujianguo@chinatelecom.cn> +# +# Mostly copied from tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh. +# +# This script is designed for testing the support of netfilter hooks for +# SRv6 End.DX4 behavior. +# +# Hereafter a network diagram is shown, where one tenants (named 100) offer +# IPv6 L3 VPN services allowing hosts to communicate with each other across +# an IPv6 network. +# +# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6 +# architecture. The key components for such VPNs are: a) SRv6 Encap behavior, +# b) SRv6 End.DX4 behavior. +# +# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an +# example where, within the same domain of tenant 100, the host hs-1 pings +# the host hs-2. +# +# First of all, L2 reachability of the host hs-2 is taken into account by +# the router rt-1 which acts as an arp proxy. +# +# When the host hs-1 sends an IPv6 packet destined to hs-2, the router rt-1 +# receives the packet on the internal veth-t100 interface, rt-1 contains the +# SRv6 Encap route for encapsulating the IPv6 packet in a IPv6 plus the Segment +# Routing Header (SRH) packet. This packet is sent through the (IPv6) core +# network up to the router rt-2 that receives it on veth0 interface. +# +# The rt-2 router uses the 'localsid' routing table to process incoming +# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these +# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and +# routs the packet to the specified nexthop. Afterwards, the packet is sent to +# the host hs-2 through the veth-t100 interface. +# +# The ping response follows the same processing but this time the role of rt-1 +# and rt-2 are swapped. +# +# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a +# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING +# hooks. +# +# +# +-------------------+ +-------------------+ +# | | | | +# | hs-1 netns | | hs-2 netns | +# | | | | +# | +-------------+ | | +-------------+ | +# | | veth0 | | | | veth0 | | +# | | cafe::1/64 | | | | cafe::2/64 | | +# | +-------------+ | | +-------------+ | +# | . | | . | +# +-------------------+ +-------------------+ +# . . +# . . +# . . +# +-----------------------------------+ +-----------------------------------+ +# | . | | . | +# | +---------------+ | | +---------------- | +# | | veth-t100 | | | | veth-t100 | | +# | | cafe::11/64 | +----------+ | | +----------+ | cafe::22/64 | | +# | +-------+-------+ | route | | | | route | +-------+-------- | +# | | table | | | | table | | +# | +----------+ | | +----------+ | +# | +--------------+ | | +--------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | | +# | +--------------+ | | +--------------+ | +# | | | | +# | rt-1 netns | | rt-2 netns | +# | | | | +# +-----------------------------------+ +-----------------------------------+ +# +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# | Network configuration | +# ~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# rt-1: localsid table +# +----------------------------------------------------------------+ +# |SID |Action | +# +----------------------------------------------------------------+ +# |fc00:21:100::6004|apply SRv6 End.DX6 nh6 cafe::1 dev veth-t100 | +# +----------------------------------------------------------------+ +# +# rt-1: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::2 |apply seg6 encap segs fc00:12:100::6004| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# +# +# rt-2: localsid table +# +---------------------------------------------------------------+ +# |SID |Action | +# +---------------------------------------------------------------+ +# |fc00:12:100::6004|apply SRv6 End.DX6 nh6 cafe::2 dev veth-t100 | +# +---------------------------------------------------------------+ +# +# rt-2: route table +# +---------------------------------------------------+ +# |host |Action | +# +---------------------------------------------------+ +# |cafe::1 |apply seg6 encap segs fc00:21:100::6004| +# +---------------------------------------------------+ +# |cafe::/64 |forward to dev veth_t100 | +# +---------------------------------------------------+ +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +readonly IPv6_RT_NETWORK=2001:11 +readonly IPv6_HS_NETWORK=cafe +readonly SID_LOCATOR=fc00 + +PING_TIMEOUT_SEC=4 + +ret=0 + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +cleanup() +{ + ip link del veth-rt-1 2>/dev/null || true + ip link del veth-rt-2 2>/dev/null || true + + # destroy routers rt-* and hosts hs-* + for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do + ip netns del ${ns} || true + done +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns add ${nsname} + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip link set veth-rt-${rt} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${rt} name veth0 + + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} link set veth0 up + ip -netns ${nsname} link set lo up + + ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1 +} + +setup_rt_netfilter() +{ + local rt=$1 + local nsname=rt-${rt} + + ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1 + ip netns exec ${nsname} ip6tables -t raw -A PREROUTING -m rpfilter --invert -j DROP +} + +setup_hs() +{ + local hs=$1 + local rt=$2 + local tid=$3 + local hsname=hs-${hs} + local rtname=rt-${rt} + local rtveth=veth-t${tid} + + # set the networking for the host + ip netns add ${hsname} + + ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} + ip -netns ${hsname} link set ${rtveth} netns ${rtname} + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad + ip -netns ${hsname} link set veth0 up + ip -netns ${hsname} link set lo up + + ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::${rt}${hs}/64 dev ${rtveth} + ip -netns ${rtname} link set ${rtveth} up + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1 +} + +setup_vpn_config() +{ + local hssrc=$1 + local rtsrc=$2 + local hsdst=$3 + local rtdst=$4 + local tid=$5 + + local hssrc_name=hs-t${tid}-${hssrc} + local hsdst_name=hs-t${tid}-${hsdst} + local rtsrc_name=rt-${rtsrc} + local rtdst_name=rt-${rtdst} + local rtveth=veth-t${tid} + local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004 + + ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth} + + # set the encap route for encapsulating packets which arrive from the + # host hssrc and destined to the access router rtsrc. + ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 \ + encap seg6 mode encap segs ${vpn_sid} dev veth0 + ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \ + via 2001:11::${rtdst} dev veth0 + + # set the decap route for decapsulating packets which arrive from + # the rtdst router and destined to the hsdst host. + ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \ + encap seg6local action End.DX6 nh6 ${IPv6_HS_NETWORK}::${hsdst} dev veth-t${tid} +} + +setup() +{ + ip link add veth-rt-1 type veth peer name veth-rt-2 + # setup the networking for router rt-1 and router rt-2 + setup_rt_networking 1 + setup_rt_networking 2 + + # setup two hosts for the tenant 100. + # - host hs-1 is directly connected to the router rt-1; + # - host hs-2 is directly connected to the router rt-2. + setup_hs 1 1 100 + setup_hs 2 2 100 + + # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2. + setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant + setup_vpn_config 2 2 1 1 100 +} + +check_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + ip netns exec hs-${hssrc} ping -6 -c 1 -W ${PING_TIMEOUT_SEC} \ + ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 +} + +check_and_log_hs_connectivity() +{ + local hssrc=$1 + local hsdst=$2 + local tid=$3 + + check_hs_connectivity ${hssrc} ${hsdst} ${tid} + log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})" +} + +host_tests() +{ + log_section "SRv6 VPN connectivity test among hosts in the same tenant" + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +router_netfilter_tests() +{ + log_section "SRv6 VPN connectivity test with netfilter enabled in routers" + setup_rt_netfilter 1 + setup_rt_netfilter 2 + + check_and_log_hs_connectivity 1 2 100 + check_and_log_hs_connectivity 2 1 100 +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +cleanup &>/dev/null + +setup + +host_tests +router_netfilter_tests + +print_log_test_results + +cleanup &>/dev/null + +exit ${ret} diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 8802604148..11a1ebda56 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.o" +BPF_FILE="xdp_dummy.bpf.o" # set global exit status, but never reset nonzero one. check_err() diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 7080eae531..c51ea90a13 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.o" +BPF_FILE="xdp_dummy.bpf.o" cleanup() { local -r jobs="$(jobs -p)" diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index e1ff645bd3..17404f49cd 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -7,7 +7,7 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.o" +BPF_FILE="xdp_dummy.bpf.o" cleanup() { local -r jobs="$(jobs -p)" @@ -42,8 +42,8 @@ run_one() { ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp tc -n "${PEER_NS}" qdisc add dev veth1 clsact - tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.o section schedcls/ingress6/nat_6 direct-action - tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.o section schedcls/egress4/snat4 direct-action + tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.bpf.o section schedcls/ingress6/nat_6 direct-action + tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action echo ${rx_args} ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & @@ -89,7 +89,7 @@ if [ ! -f ${BPF_FILE} ]; then exit -1 fi -if [ ! -f nat6to4.o ]; then +if [ ! -f nat6to4.bpf.o ]; then echo "Missing nat6to4 helper. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 83ed987cff..550d8eb3e2 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -3,7 +3,7 @@ source net_helper.sh -BPF_FILE="xdp_dummy.o" +BPF_FILE="xdp_dummy.bpf.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 3a394b43e2..4f1edbafb9 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="xdp_dummy.o" +BPF_FILE="xdp_dummy.bpf.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 diff --git a/tools/testing/selftests/net/xdp_dummy.c b/tools/testing/selftests/net/xdp_dummy.bpf.c index d988b2e0ce..d988b2e0ce 100644 --- a/tools/testing/selftests/net/xdp_dummy.c +++ b/tools/testing/selftests/net/xdp_dummy.bpf.c diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index 4577895306..3eeeeffb40 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -293,7 +293,7 @@ check_random_order() local ns=$1 local log=$2 - for i in $(seq 100); do + for i in $(seq 50); do ip -net $ns xfrm policy flush for j in $(seq 0 16 255 | sort -R); do ip -net $ns xfrm policy add dst $j.0.0.0/24 dir out priority 10 action allow @@ -306,7 +306,7 @@ check_random_order() done done - for i in $(seq 100); do + for i in $(seq 50); do ip -net $ns xfrm policy flush for j in $(seq 0 16 255 | sort -R); do local addr=$(printf "e000:0000:%02x00::/56" $j) diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile deleted file mode 100644 index 936c3085bb..0000000000 --- a/tools/testing/selftests/netfilter/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# Makefile for netfilter selftests - -TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ - conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ - nft_concat_range.sh nft_conntrack_helper.sh \ - nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ - ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ - conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \ - conntrack_sctp_collision.sh xt_string.sh \ - bridge_netfilter.sh - -HOSTPKG_CONFIG := pkg-config - -CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) -LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) - -TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision \ - conntrack_dump_flush - -include ../lib.mk diff --git a/tools/testing/selftests/netfilter/bridge_brouter.sh b/tools/testing/selftests/netfilter/bridge_brouter.sh deleted file mode 100755 index 29f3955b9a..0000000000 --- a/tools/testing/selftests/netfilter/bridge_brouter.sh +++ /dev/null @@ -1,146 +0,0 @@ -#!/bin/bash -# -# This test is for bridge 'brouting', i.e. make some packets being routed -# rather than getting bridged even though they arrive on interface that is -# part of a bridge. - -# eth0 br0 eth0 -# setup is: ns1 <-> ns0 <-> ns2 - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 -ret=0 - -ebtables -V > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ebtables" - exit $ksft_skip -fi - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -ip netns add ns0 -ip netns add ns1 -ip netns add ns2 - -ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 -if [ $? -ne 0 ]; then - echo "SKIP: Can't create veth device" - exit $ksft_skip -fi -ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 - -ip -net ns0 link set lo up -ip -net ns0 link set veth0 up -ip -net ns0 link set veth1 up - -ip -net ns0 link add br0 type bridge -if [ $? -ne 0 ]; then - echo "SKIP: Can't create bridge br0" - exit $ksft_skip -fi - -ip -net ns0 link set veth0 master br0 -ip -net ns0 link set veth1 master br0 -ip -net ns0 link set br0 up -ip -net ns0 addr add 10.0.0.1/24 dev br0 - -# place both in same subnet, ns1 and ns2 connected via ns0:br0 -for i in 1 2; do - ip -net ns$i link set lo up - ip -net ns$i link set eth0 up - ip -net ns$i addr add 10.0.0.1$i/24 dev eth0 -done - -test_ebtables_broute() -{ - local cipt - - # redirect is needed so the dstmac is rewritten to the bridge itself, - # ip stack won't process OTHERHOST (foreign unicast mac) packets. - ip netns exec ns0 ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP - if [ $? -ne 0 ]; then - echo "SKIP: Could not add ebtables broute redirect rule" - return $ksft_skip - fi - - # ping netns1, expected to not work (ip forwarding is off) - ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null 2>&1 - if [ $? -eq 0 ]; then - echo "ERROR: ping works, should have failed" 1>&2 - return 1 - fi - - # enable forwarding on both interfaces. - # neither needs an ip address, but at least the bridge needs - # an ip address in same network segment as ns1 and ns2 (ns0 - # needs to be able to determine route for to-be-forwarded packet). - ip netns exec ns0 sysctl -q net.ipv4.conf.veth0.forwarding=1 - ip netns exec ns0 sysctl -q net.ipv4.conf.veth1.forwarding=1 - - sleep 1 - - ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null - if [ $? -ne 0 ]; then - echo "ERROR: ping did not work, but it should (broute+forward)" 1>&2 - return 1 - fi - - echo "PASS: ns1/ns2 connectivity with active broute rule" - ip netns exec ns0 ebtables -t broute -F - - # ping netns1, expected to work (frames are bridged) - ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null - if [ $? -ne 0 ]; then - echo "ERROR: ping did not work, but it should (bridged)" 1>&2 - return 1 - fi - - ip netns exec ns0 ebtables -t filter -A FORWARD -p ipv4 --ip-protocol icmp -j DROP - - # ping netns1, expected to not work (DROP in bridge forward) - ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null 2>&1 - if [ $? -eq 0 ]; then - echo "ERROR: ping works, should have failed (icmp forward drop)" 1>&2 - return 1 - fi - - # re-activate brouter - ip netns exec ns0 ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP - - ip netns exec ns2 ping -q -c 1 10.0.0.11 > /dev/null - if [ $? -ne 0 ]; then - echo "ERROR: ping did not work, but it should (broute+forward 2)" 1>&2 - return 1 - fi - - echo "PASS: ns1/ns2 connectivity with active broute rule and bridge forward drop" - return 0 -} - -# test basic connectivity -ip netns exec ns1 ping -c 1 -q 10.0.0.12 > /dev/null -if [ $? -ne 0 ]; then - echo "ERROR: Could not reach ns2 from ns1" 1>&2 - ret=1 -fi - -ip netns exec ns2 ping -c 1 -q 10.0.0.11 > /dev/null -if [ $? -ne 0 ]; then - echo "ERROR: Could not reach ns1 from ns2" 1>&2 - ret=1 -fi - -if [ $ret -eq 0 ];then - echo "PASS: netns connectivity: ns1 and ns2 can reach each other" -fi - -test_ebtables_broute -ret=$? -for i in 0 1 2; do ip netns del ns$i;done - -exit $ret diff --git a/tools/testing/selftests/netfilter/bridge_netfilter.sh b/tools/testing/selftests/netfilter/bridge_netfilter.sh deleted file mode 100644 index 659b3ab02c..0000000000 --- a/tools/testing/selftests/netfilter/bridge_netfilter.sh +++ /dev/null @@ -1,188 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Test bridge netfilter + conntrack, a combination that doesn't really work, -# with multicast/broadcast packets racing for hash table insertion. - -# eth0 br0 eth0 -# setup is: ns1 <->,ns0 <-> ns3 -# ns2 <-' `'-> ns4 - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 -ret=0 - -sfx=$(mktemp -u "XXXXXXXX") -ns0="ns0-$sfx" -ns1="ns1-$sfx" -ns2="ns2-$sfx" -ns3="ns3-$sfx" -ns4="ns4-$sfx" - -ebtables -V > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ebtables" - exit $ksft_skip -fi - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -for i in $(seq 0 4); do - eval ip netns add \$ns$i -done - -cleanup() { - for i in $(seq 0 4); do eval ip netns del \$ns$i;done -} - -trap cleanup EXIT - -do_ping() -{ - fromns="$1" - dstip="$2" - - ip netns exec $fromns ping -c 1 -q $dstip > /dev/null - if [ $? -ne 0 ]; then - echo "ERROR: ping from $fromns to $dstip" - ip netns exec ${ns0} nft list ruleset - ret=1 - fi -} - -bcast_ping() -{ - fromns="$1" - dstip="$2" - - for i in $(seq 1 1000); do - ip netns exec $fromns ping -q -f -b -c 1 -q $dstip > /dev/null 2>&1 - if [ $? -ne 0 ]; then - echo "ERROR: ping -b from $fromns to $dstip" - ip netns exec ${ns0} nft list ruleset - fi - done -} - -ip link add veth1 netns ${ns0} type veth peer name eth0 netns ${ns1} -if [ $? -ne 0 ]; then - echo "SKIP: Can't create veth device" - exit $ksft_skip -fi - -ip link add veth2 netns ${ns0} type veth peer name eth0 netns $ns2 -ip link add veth3 netns ${ns0} type veth peer name eth0 netns $ns3 -ip link add veth4 netns ${ns0} type veth peer name eth0 netns $ns4 - -ip -net ${ns0} link set lo up - -for i in $(seq 1 4); do - ip -net ${ns0} link set veth$i up -done - -ip -net ${ns0} link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1 -if [ $? -ne 0 ]; then - echo "SKIP: Can't create bridge br0" - exit $ksft_skip -fi - -# make veth0,1,2 part of bridge. -for i in $(seq 1 3); do - ip -net ${ns0} link set veth$i master br0 -done - -# add a macvlan on top of the bridge. -MACVLAN_ADDR=ba:f3:13:37:42:23 -ip -net ${ns0} link add link br0 name macvlan0 type macvlan mode private -ip -net ${ns0} link set macvlan0 address ${MACVLAN_ADDR} -ip -net ${ns0} link set macvlan0 up -ip -net ${ns0} addr add 10.23.0.1/24 dev macvlan0 - -# add a macvlan on top of veth4. -MACVLAN_ADDR=ba:f3:13:37:42:24 -ip -net ${ns0} link add link veth4 name macvlan4 type macvlan mode vepa -ip -net ${ns0} link set macvlan4 address ${MACVLAN_ADDR} -ip -net ${ns0} link set macvlan4 up - -# make the macvlan part of the bridge. -# veth4 is not a bridge port, only the macvlan on top of it. -ip -net ${ns0} link set macvlan4 master br0 - -ip -net ${ns0} link set br0 up -ip -net ${ns0} addr add 10.0.0.1/24 dev br0 -ip netns exec ${ns0} sysctl -q net.bridge.bridge-nf-call-iptables=1 -ret=$? -if [ $ret -ne 0 ] ; then - echo "SKIP: bridge netfilter not available" - ret=$ksft_skip -fi - -# for testing, so namespaces will reply to ping -b probes. -ip netns exec ${ns0} sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0 - -# enable conntrack in ns0 and drop broadcast packets in forward to -# avoid them from getting confirmed in the postrouting hook before -# the cloned skb is passed up the stack. -ip netns exec ${ns0} nft -f - <<EOF -table ip filter { - chain input { - type filter hook input priority 1; policy accept - iifname br0 counter - ct state new accept - } -} - -table bridge filter { - chain forward { - type filter hook forward priority 0; policy accept - meta pkttype broadcast ip protocol icmp counter drop - } -} -EOF - -# place 1, 2 & 3 in same subnet, connected via ns0:br0. -# ns4 is placed in same subnet as well, but its not -# part of the bridge: the corresponding veth4 is not -# part of the bridge, only its macvlan interface. -for i in $(seq 1 4); do - eval ip -net \$ns$i link set lo up - eval ip -net \$ns$i link set eth0 up -done -for i in $(seq 1 2); do - eval ip -net \$ns$i addr add 10.0.0.1$i/24 dev eth0 -done - -ip -net ${ns3} addr add 10.23.0.13/24 dev eth0 -ip -net ${ns4} addr add 10.23.0.14/24 dev eth0 - -# test basic connectivity -do_ping ${ns1} 10.0.0.12 -do_ping ${ns3} 10.23.0.1 -do_ping ${ns4} 10.23.0.1 - -if [ $ret -eq 0 ];then - echo "PASS: netns connectivity: ns1 can reach ns2, ns3 and ns4 can reach ns0" -fi - -bcast_ping ${ns1} 10.0.0.255 - -# This should deliver broadcast to macvlan0, which is on top of ns0:br0. -bcast_ping ${ns3} 10.23.0.255 - -# same, this time via veth4:macvlan4. -bcast_ping ${ns4} 10.23.0.255 - -read t < /proc/sys/kernel/tainted - -if [ $t -eq 0 ];then - echo PASS: kernel not tainted -else - echo ERROR: kernel is tainted - ret=1 -fi - -exit $ret diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config deleted file mode 100644 index 7c42b1b2c6..0000000000 --- a/tools/testing/selftests/netfilter/config +++ /dev/null @@ -1,9 +0,0 @@ -CONFIG_NET_NS=y -CONFIG_NF_TABLES_INET=y -CONFIG_NFT_QUEUE=m -CONFIG_NFT_NAT=m -CONFIG_NFT_REDIR=m -CONFIG_NFT_MASQ=m -CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NF_CT_NETLINK=m -CONFIG_AUDIT=y diff --git a/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh deleted file mode 100755 index a924e595cf..0000000000 --- a/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Testing For SCTP COLLISION SCENARIO as Below: -# -# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359] -# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187] -# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359] -# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO] -# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK] -# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970] -# -# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS - -CLIENT_NS=$(mktemp -u client-XXXXXXXX) -CLIENT_IP="198.51.200.1" -CLIENT_PORT=1234 - -SERVER_NS=$(mktemp -u server-XXXXXXXX) -SERVER_IP="198.51.100.1" -SERVER_PORT=1234 - -ROUTER_NS=$(mktemp -u router-XXXXXXXX) -CLIENT_GW="198.51.200.2" -SERVER_GW="198.51.100.2" - -# setup the topo -setup() { - ip net add $CLIENT_NS - ip net add $SERVER_NS - ip net add $ROUTER_NS - ip -n $SERVER_NS link add link0 type veth peer name link1 netns $ROUTER_NS - ip -n $CLIENT_NS link add link3 type veth peer name link2 netns $ROUTER_NS - - ip -n $SERVER_NS link set link0 up - ip -n $SERVER_NS addr add $SERVER_IP/24 dev link0 - ip -n $SERVER_NS route add $CLIENT_IP dev link0 via $SERVER_GW - - ip -n $ROUTER_NS link set link1 up - ip -n $ROUTER_NS link set link2 up - ip -n $ROUTER_NS addr add $SERVER_GW/24 dev link1 - ip -n $ROUTER_NS addr add $CLIENT_GW/24 dev link2 - ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1 - - ip -n $CLIENT_NS link set link3 up - ip -n $CLIENT_NS addr add $CLIENT_IP/24 dev link3 - ip -n $CLIENT_NS route add $SERVER_IP dev link3 via $CLIENT_GW - - # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with - # tc on $SERVER_NS side - tc -n $SERVER_NS qdisc add dev link0 root handle 1: htb - tc -n $SERVER_NS class add dev link0 parent 1: classid 1:1 htb rate 100mbit - tc -n $SERVER_NS filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \ - 0xff match u8 2 0xff at 32 flowid 1:1 - tc -n $SERVER_NS qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms - - # simulate the ctstate check on OVS nf_conntrack - ip net exec $ROUTER_NS iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP - ip net exec $ROUTER_NS iptables -A INPUT -p sctp -j DROP - - # use a smaller number for assoc's max_retrans to reproduce the issue - modprobe sctp - ip net exec $CLIENT_NS sysctl -wq net.sctp.association_max_retrans=3 -} - -cleanup() { - ip net exec $CLIENT_NS pkill sctp_collision 2>&1 >/dev/null - ip net exec $SERVER_NS pkill sctp_collision 2>&1 >/dev/null - ip net del "$CLIENT_NS" - ip net del "$SERVER_NS" - ip net del "$ROUTER_NS" -} - -do_test() { - ip net exec $SERVER_NS ./sctp_collision server \ - $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT & - ip net exec $CLIENT_NS ./sctp_collision client \ - $CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT -} - -# NOTE: one way to work around the issue is set a smaller hb_interval -# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500 - -# run the test case -trap cleanup EXIT -setup && \ -echo "Test for SCTP Collision in nf_conntrack:" && \ -do_test && echo "PASS!" -exit $? diff --git a/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh deleted file mode 100755 index e7d7bf13cf..0000000000 --- a/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh +++ /dev/null @@ -1,167 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Check that UNREPLIED tcp conntrack will eventually timeout. -# - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 -ret=0 - -waittime=20 -sfx=$(mktemp -u "XXXXXXXX") -ns1="ns1-$sfx" -ns2="ns2-$sfx" - -nft --version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without nft tool" - exit $ksft_skip -fi - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -cleanup() { - ip netns pids $ns1 | xargs kill 2>/dev/null - ip netns pids $ns2 | xargs kill 2>/dev/null - - ip netns del $ns1 - ip netns del $ns2 -} - -ipv4() { - echo -n 192.168.$1.2 -} - -check_counter() -{ - ns=$1 - name=$2 - expect=$3 - local lret=0 - - cnt=$(ip netns exec $ns2 nft list counter inet filter "$name" | grep -q "$expect") - if [ $? -ne 0 ]; then - echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2 - ip netns exec $ns2 nft list counter inet filter "$name" 1>&2 - lret=1 - fi - - return $lret -} - -# Create test namespaces -ip netns add $ns1 || exit 1 - -trap cleanup EXIT - -ip netns add $ns2 || exit 1 - -# Connect the namespace to the host using a veth pair -ip -net $ns1 link add name veth1 type veth peer name veth2 -ip -net $ns1 link set netns $ns2 dev veth2 - -ip -net $ns1 link set up dev lo -ip -net $ns2 link set up dev lo -ip -net $ns1 link set up dev veth1 -ip -net $ns2 link set up dev veth2 - -ip -net $ns2 addr add 10.11.11.2/24 dev veth2 -ip -net $ns2 route add default via 10.11.11.1 - -ip netns exec $ns2 sysctl -q net.ipv4.conf.veth2.forwarding=1 - -# add a rule inside NS so we enable conntrack -ip netns exec $ns1 iptables -A INPUT -m state --state established,related -j ACCEPT - -ip -net $ns1 addr add 10.11.11.1/24 dev veth1 -ip -net $ns1 route add 10.99.99.99 via 10.11.11.2 - -# Check connectivity works -ip netns exec $ns1 ping -q -c 2 10.11.11.2 >/dev/null || exit 1 - -ip netns exec $ns2 nc -l -p 8080 < /dev/null & - -# however, conntrack entries are there - -ip netns exec $ns2 nft -f - <<EOF -table inet filter { - counter connreq { } - counter redir { } - chain input { - type filter hook input priority 0; policy accept; - ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept - ct state new ct status dnat tcp dport 8080 counter name "redir" accept - } -} -EOF -if [ $? -ne 0 ]; then - echo "ERROR: Could not load nft rules" - exit 1 -fi - -ip netns exec $ns2 sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10 - -echo "INFO: connect $ns1 -> $ns2 to the virtual ip" -ip netns exec $ns1 bash -c 'while true ; do - nc -p 60000 10.99.99.99 80 - sleep 1 - done' & - -sleep 1 - -ip netns exec $ns2 nft -f - <<EOF -table inet nat { - chain prerouting { - type nat hook prerouting priority 0; policy accept; - ip daddr 10.99.99.99 tcp dport 80 redirect to :8080 - } -} -EOF -if [ $? -ne 0 ]; then - echo "ERROR: Could not load nat redirect" - exit 1 -fi - -count=$(ip netns exec $ns2 conntrack -L -p tcp --dport 80 2>/dev/null | wc -l) -if [ $count -eq 0 ]; then - echo "ERROR: $ns2 did not pick up tcp connection from peer" - exit 1 -fi - -echo "INFO: NAT redirect added in ns $ns2, waiting for $waittime seconds for nat to take effect" -for i in $(seq 1 $waittime); do - echo -n "." - - sleep 1 - - count=$(ip netns exec $ns2 conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l) - if [ $count -gt 0 ]; then - echo - echo "PASS: redirection took effect after $i seconds" - break - fi - - m=$((i%20)) - if [ $m -eq 0 ]; then - echo " waited for $i seconds" - fi -done - -expect="packets 1 bytes 60" -check_counter "$ns2" "redir" "$expect" -if [ $? -ne 0 ]; then - ret=1 -fi - -if [ $ret -eq 0 ];then - echo "PASS: redirection counter has expected values" -else - echo "ERROR: no tcp connection was redirected" -fi - -exit $ret diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh deleted file mode 100755 index c3b8f90c49..0000000000 --- a/tools/testing/selftests/netfilter/ipvs.sh +++ /dev/null @@ -1,228 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# -# End-to-end ipvs test suite -# Topology: -#--------------------------------------------------------------+ -# | | -# ns0 | ns1 | -# ----------- | ----------- ----------- | -# | veth01 | --------- | veth10 | | veth12 | | -# ----------- peer ----------- ----------- | -# | | | | -# ----------- | | | -# | br0 | |----------------- peer |--------------| -# ----------- | | | -# | | | | -# ---------- peer ---------- ----------- | -# | veth02 | --------- | veth20 | | veth21 | | -# ---------- | ---------- ----------- | -# | ns2 | -# | | -#--------------------------------------------------------------+ -# -# We assume that all network driver are loaded -# - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 -ret=0 -GREEN='\033[0;92m' -RED='\033[0;31m' -NC='\033[0m' # No Color - -readonly port=8080 - -readonly vip_v4=207.175.44.110 -readonly cip_v4=10.0.0.2 -readonly gip_v4=10.0.0.1 -readonly dip_v4=172.16.0.1 -readonly rip_v4=172.16.0.2 -readonly sip_v4=10.0.0.3 - -readonly infile="$(mktemp)" -readonly outfile="$(mktemp)" -readonly datalen=32 - -sysipvsnet="/proc/sys/net/ipv4/vs/" -if [ ! -d $sysipvsnet ]; then - modprobe -q ip_vs - if [ $? -ne 0 ]; then - echo "skip: could not run test without ipvs module" - exit $ksft_skip - fi -fi - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ]; then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -ipvsadm -v > /dev/null 2>&1 -if [ $? -ne 0 ]; then - echo "SKIP: Could not run test without ipvsadm" - exit $ksft_skip -fi - -setup() { - ip netns add ns0 - ip netns add ns1 - ip netns add ns2 - - ip link add veth01 netns ns0 type veth peer name veth10 netns ns1 - ip link add veth02 netns ns0 type veth peer name veth20 netns ns2 - ip link add veth12 netns ns1 type veth peer name veth21 netns ns2 - - ip netns exec ns0 ip link set veth01 up - ip netns exec ns0 ip link set veth02 up - ip netns exec ns0 ip link add br0 type bridge - ip netns exec ns0 ip link set veth01 master br0 - ip netns exec ns0 ip link set veth02 master br0 - ip netns exec ns0 ip link set br0 up - ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0 - - ip netns exec ns1 ip link set lo up - ip netns exec ns1 ip link set veth10 up - ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10 - ip netns exec ns1 ip link set veth12 up - ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12 - - ip netns exec ns2 ip link set lo up - ip netns exec ns2 ip link set veth21 up - ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21 - ip netns exec ns2 ip link set veth20 up - ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20 - - sleep 1 - - dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none -} - -cleanup() { - for i in 0 1 2 - do - ip netns del ns$i > /dev/null 2>&1 - done - - if [ -f "${outfile}" ]; then - rm "${outfile}" - fi - if [ -f "${infile}" ]; then - rm "${infile}" - fi -} - -server_listen() { - ip netns exec ns2 nc -l -p 8080 > "${outfile}" & - server_pid=$! - sleep 0.2 -} - -client_connect() { - ip netns exec ns0 timeout 2 nc -w 1 ${vip_v4} ${port} < "${infile}" -} - -verify_data() { - wait "${server_pid}" - cmp "$infile" "$outfile" 2>/dev/null -} - -test_service() { - server_listen - client_connect - verify_data -} - - -test_dr() { - ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 - - ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 - ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr - ip netns exec ns1 ipvsadm -a -t ${vip_v4}:${port} -r ${rip_v4}:${port} - ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 - - # avoid incorrect arp response - ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1 - ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2 - # avoid reverse route lookup - ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0 - ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1 - - test_service -} - -test_nat() { - ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 - - ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 - ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr - ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port} - ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 - - ip netns exec ns2 ip link del veth20 - ip netns exec ns2 ip route add default via ${dip_v4} dev veth21 - - test_service -} - -test_tun() { - ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 - - ip netns exec ns1 modprobe ipip - ip netns exec ns1 ip link set tunl0 up - ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0 - ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0 - ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0 - ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr - ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port} - ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 - - ip netns exec ns2 modprobe ipip - ip netns exec ns2 ip link set tunl0 up - ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1 - ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2 - ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 - ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0 - ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1 - - test_service -} - -run_tests() { - local errors= - - echo "Testing DR mode..." - cleanup - setup - test_dr - errors=$(( $errors + $? )) - - echo "Testing NAT mode..." - cleanup - setup - test_nat - errors=$(( $errors + $? )) - - echo "Testing Tunnel mode..." - cleanup - setup - test_tun - errors=$(( $errors + $? )) - - return $errors -} - -trap cleanup EXIT - -run_tests - -if [ $? -ne 0 ]; then - echo -e "$(basename $0): ${RED}FAIL${NC}" - exit 1 -fi -echo -e "$(basename $0): ${GREEN}PASS${NC}" -exit 0 diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh deleted file mode 100755 index a1aa8f4a58..0000000000 --- a/tools/testing/selftests/netfilter/nf_nat_edemux.sh +++ /dev/null @@ -1,127 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Test NAT source port clash resolution -# - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 -ret=0 - -sfx=$(mktemp -u "XXXXXXXX") -ns1="ns1-$sfx" -ns2="ns2-$sfx" -socatpid=0 - -cleanup() -{ - [ $socatpid -gt 0 ] && kill $socatpid - ip netns del $ns1 - ip netns del $ns2 -} - -socat -h > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without socat" - exit $ksft_skip -fi - -iptables --version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without iptables" - exit $ksft_skip -fi - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -ip netns add "$ns1" -if [ $? -ne 0 ];then - echo "SKIP: Could not create net namespace $ns1" - exit $ksft_skip -fi - -trap cleanup EXIT - -ip netns add $ns2 - -# Connect the namespaces using a veth pair -ip link add name veth2 type veth peer name veth1 -ip link set netns $ns1 dev veth1 -ip link set netns $ns2 dev veth2 - -ip netns exec $ns1 ip link set up dev lo -ip netns exec $ns1 ip link set up dev veth1 -ip netns exec $ns1 ip addr add 192.168.1.1/24 dev veth1 - -ip netns exec $ns2 ip link set up dev lo -ip netns exec $ns2 ip link set up dev veth2 -ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2 - -# Create a server in one namespace -ip netns exec $ns1 socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 & -socatpid=$! - -# Restrict source port to just one so we don't have to exhaust -# all others. -ip netns exec $ns2 sysctl -q net.ipv4.ip_local_port_range="10000 10000" - -# add a virtual IP using DNAT -ip netns exec $ns2 iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 - -# ... and route it to the other namespace -ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1 - -sleep 1 - -# add a persistent connection from the other namespace -ip netns exec $ns2 socat -t 10 - TCP:192.168.1.1:5201 > /dev/null & - -sleep 1 - -# ip daddr:dport will be rewritten to 192.168.1.1 5201 -# NAT must reallocate source port 10000 because -# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use -echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null -ret=$? - -# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201). -if [ $ret -eq 0 ]; then - echo "PASS: socat can connect via NAT'd address" -else - echo "FAIL: socat cannot connect via NAT'd address" -fi - -# check sport clashres. -ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201 -ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201 - -sleep 5 | ip netns exec $ns2 socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null & -cpid1=$! -sleep 1 - -# if connect succeeds, client closes instantly due to EOF on stdin. -# if connect hangs, it will time out after 5s. -echo | ip netns exec $ns2 socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null & -cpid2=$! - -time_then=$(date +%s) -wait $cpid2 -rv=$? -time_now=$(date +%s) - -# Check how much time has elapsed, expectation is for -# 'cpid2' to connect and then exit (and no connect delay). -delta=$((time_now - time_then)) - -if [ $delta -lt 2 -a $rv -eq 0 ]; then - echo "PASS: could connect to service via redirected ports" -else - echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)" - ret=1 -fi - -exit $ret diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh deleted file mode 100755 index faa7778d7b..0000000000 --- a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh +++ /dev/null @@ -1,197 +0,0 @@ -#!/bin/bash -# -# This tests connection tracking helper assignment: -# 1. can attach ftp helper to a connection from nft ruleset. -# 2. auto-assign still works. -# -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 -ret=0 - -sfx=$(mktemp -u "XXXXXXXX") -ns1="ns1-$sfx" -ns2="ns2-$sfx" -testipv6=1 - -cleanup() -{ - ip netns del ${ns1} - ip netns del ${ns2} -} - -nft --version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without nft tool" - exit $ksft_skip -fi - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -conntrack -V > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without conntrack tool" - exit $ksft_skip -fi - -which nc >/dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without netcat tool" - exit $ksft_skip -fi - -trap cleanup EXIT - -ip netns add ${ns1} -ip netns add ${ns2} - -ip link add veth0 netns ${ns1} type veth peer name veth0 netns ${ns2} > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: No virtual ethernet pair device support in kernel" - exit $ksft_skip -fi - -ip -net ${ns1} link set lo up -ip -net ${ns1} link set veth0 up - -ip -net ${ns2} link set lo up -ip -net ${ns2} link set veth0 up - -ip -net ${ns1} addr add 10.0.1.1/24 dev veth0 -ip -net ${ns1} addr add dead:1::1/64 dev veth0 - -ip -net ${ns2} addr add 10.0.1.2/24 dev veth0 -ip -net ${ns2} addr add dead:1::2/64 dev veth0 - -load_ruleset_family() { - local family=$1 - local ns=$2 - -ip netns exec ${ns} nft -f - <<EOF -table $family raw { - ct helper ftp { - type "ftp" protocol tcp - } - chain pre { - type filter hook prerouting priority 0; policy accept; - tcp dport 2121 ct helper set "ftp" - } - chain output { - type filter hook output priority 0; policy accept; - tcp dport 2121 ct helper set "ftp" - } -} -EOF - return $? -} - -check_for_helper() -{ - local netns=$1 - local message=$2 - local port=$3 - - if echo $message |grep -q 'ipv6';then - local family="ipv6" - else - local family="ipv4" - fi - - ip netns exec ${netns} conntrack -L -f $family -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp' - if [ $? -ne 0 ] ; then - if [ $autoassign -eq 0 ] ;then - echo "FAIL: ${netns} did not show attached helper $message" 1>&2 - ret=1 - else - echo "PASS: ${netns} did not show attached helper $message" 1>&2 - fi - else - if [ $autoassign -eq 0 ] ;then - echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2 - else - echo "FAIL: ${netns} connection on port $port has ftp helper attached" 1>&2 - ret=1 - fi - fi - - return 0 -} - -test_helper() -{ - local port=$1 - local autoassign=$2 - - if [ $autoassign -eq 0 ] ;then - msg="set via ruleset" - else - msg="auto-assign" - fi - - sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null & - - sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null & - sleep 1 - - check_for_helper "$ns1" "ip $msg" $port $autoassign - check_for_helper "$ns2" "ip $msg" $port $autoassign - - wait - - if [ $testipv6 -eq 0 ] ;then - return 0 - fi - - ip netns exec ${ns1} conntrack -F 2> /dev/null - ip netns exec ${ns2} conntrack -F 2> /dev/null - - sleep 3 | ip netns exec ${ns2} nc -w 2 -6 -l -p $port > /dev/null & - - sleep 1 | ip netns exec ${ns1} nc -w 2 -6 dead:1::2 $port > /dev/null & - sleep 1 - - check_for_helper "$ns1" "ipv6 $msg" $port - check_for_helper "$ns2" "ipv6 $msg" $port - - wait -} - -load_ruleset_family ip ${ns1} -if [ $? -ne 0 ];then - echo "FAIL: ${ns1} cannot load ip ruleset" 1>&2 - exit 1 -fi - -load_ruleset_family ip6 ${ns1} -if [ $? -ne 0 ];then - echo "SKIP: ${ns1} cannot load ip6 ruleset" 1>&2 - testipv6=0 -fi - -load_ruleset_family inet ${ns2} -if [ $? -ne 0 ];then - echo "SKIP: ${ns1} cannot load inet ruleset" 1>&2 - load_ruleset_family ip ${ns2} - if [ $? -ne 0 ];then - echo "FAIL: ${ns2} cannot load ip ruleset" 1>&2 - exit 1 - fi - - if [ $testipv6 -eq 1 ] ;then - load_ruleset_family ip6 ${ns2} - if [ $? -ne 0 ];then - echo "FAIL: ${ns2} cannot load ip6 ruleset" 1>&2 - exit 1 - fi - fi -fi - -test_helper 2121 0 -ip netns exec ${ns1} sysctl -qe 'net.netfilter.nf_conntrack_helper=1' -ip netns exec ${ns2} sysctl -qe 'net.netfilter.nf_conntrack_helper=1' -test_helper 21 1 - -exit $ret diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh deleted file mode 100755 index dff476e45e..0000000000 --- a/tools/testing/selftests/netfilter/nft_fib.sh +++ /dev/null @@ -1,273 +0,0 @@ -#!/bin/bash -# -# This tests the fib expression. -# -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 -ret=0 - -sfx=$(mktemp -u "XXXXXXXX") -ns1="ns1-$sfx" -ns2="ns2-$sfx" -nsrouter="nsrouter-$sfx" -timeout=4 - -log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) - -cleanup() -{ - ip netns del ${ns1} - ip netns del ${ns2} - ip netns del ${nsrouter} - - [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns -} - -nft --version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without nft tool" - exit $ksft_skip -fi - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -ip netns add ${nsrouter} -if [ $? -ne 0 ];then - echo "SKIP: Could not create net namespace" - exit $ksft_skip -fi - -trap cleanup EXIT - -dmesg | grep -q ' nft_rpfilter: ' -if [ $? -eq 0 ]; then - dmesg -c | grep ' nft_rpfilter: ' - echo "WARN: a previous test run has failed" 1>&2 -fi - -sysctl -q net.netfilter.nf_log_all_netns=1 -ip netns add ${ns1} -ip netns add ${ns2} - -load_ruleset() { - local netns=$1 - -ip netns exec ${netns} nft -f /dev/stdin <<EOF -table inet filter { - chain prerouting { - type filter hook prerouting priority 0; policy accept; - fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop - } -} -EOF -} - -load_pbr_ruleset() { - local netns=$1 - -ip netns exec ${netns} nft -f /dev/stdin <<EOF -table inet filter { - chain forward { - type filter hook forward priority raw; - fib saddr . iif oif gt 0 accept - log drop - } -} -EOF -} - -load_ruleset_count() { - local netns=$1 - -ip netns exec ${netns} nft -f /dev/stdin <<EOF -table inet filter { - chain prerouting { - type filter hook prerouting priority 0; policy accept; - ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop - ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop - } -} -EOF -} - -check_drops() { - dmesg | grep -q ' nft_rpfilter: ' - if [ $? -eq 0 ]; then - dmesg | grep ' nft_rpfilter: ' - echo "FAIL: rpfilter did drop packets" - return 1 - fi - - return 0 -} - -check_fib_counter() { - local want=$1 - local ns=$2 - local address=$3 - - line=$(ip netns exec ${ns} nft list table inet filter | grep 'fib saddr . iif' | grep $address | grep "packets $want" ) - ret=$? - - if [ $ret -ne 0 ];then - echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2 - ip netns exec ${ns} nft list table inet filter - return 1 - fi - - if [ $want -gt 0 ]; then - echo "PASS: fib expression did drop packets for $address" - fi - - return 0 -} - -load_ruleset ${nsrouter} -load_ruleset ${ns1} -load_ruleset ${ns2} - -ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: No virtual ethernet pair device support in kernel" - exit $ksft_skip -fi -ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2} - -ip -net ${nsrouter} link set lo up -ip -net ${nsrouter} link set veth0 up -ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0 -ip -net ${nsrouter} addr add dead:1::1/64 dev veth0 - -ip -net ${nsrouter} link set veth1 up -ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1 -ip -net ${nsrouter} addr add dead:2::1/64 dev veth1 - -ip -net ${ns1} link set lo up -ip -net ${ns1} link set eth0 up - -ip -net ${ns2} link set lo up -ip -net ${ns2} link set eth0 up - -ip -net ${ns1} addr add 10.0.1.99/24 dev eth0 -ip -net ${ns1} addr add dead:1::99/64 dev eth0 -ip -net ${ns1} route add default via 10.0.1.1 -ip -net ${ns1} route add default via dead:1::1 - -ip -net ${ns2} addr add 10.0.2.99/24 dev eth0 -ip -net ${ns2} addr add dead:2::99/64 dev eth0 -ip -net ${ns2} route add default via 10.0.2.1 -ip -net ${ns2} route add default via dead:2::1 - -test_ping() { - local daddr4=$1 - local daddr6=$2 - - ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null - ret=$? - if [ $ret -ne 0 ];then - check_drops - echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2 - return 1 - fi - - ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null - ret=$? - if [ $ret -ne 0 ];then - check_drops - echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2 - return 1 - fi - - return 0 -} - -ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null -ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null -ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null -ip netns exec ${nsrouter} sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null -ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null - -sleep 3 - -test_ping 10.0.2.1 dead:2::1 || exit 1 -check_drops || exit 1 - -test_ping 10.0.2.99 dead:2::99 || exit 1 -check_drops || exit 1 - -echo "PASS: fib expression did not cause unwanted packet drops" - -ip netns exec ${nsrouter} nft flush table inet filter - -ip -net ${ns1} route del default -ip -net ${ns1} -6 route del default - -ip -net ${ns1} addr del 10.0.1.99/24 dev eth0 -ip -net ${ns1} addr del dead:1::99/64 dev eth0 - -ip -net ${ns1} addr add 10.0.2.99/24 dev eth0 -ip -net ${ns1} addr add dead:2::99/64 dev eth0 - -ip -net ${ns1} route add default via 10.0.2.1 -ip -net ${ns1} -6 route add default via dead:2::1 - -ip -net ${nsrouter} addr add dead:2::1/64 dev veth0 - -# switch to ruleset that doesn't log, this time -# its expected that this does drop the packets. -load_ruleset_count ${nsrouter} - -# ns1 has a default route, but nsrouter does not. -# must not check return value, ping to 1.1.1.1 will -# fail. -check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1 -check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1 - -ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null -check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1 - -sleep 2 -ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null -check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1 - -# delete all rules -ip netns exec ${ns1} nft flush ruleset -ip netns exec ${ns2} nft flush ruleset -ip netns exec ${nsrouter} nft flush ruleset - -ip -net ${ns1} addr add 10.0.1.99/24 dev eth0 -ip -net ${ns1} addr add dead:1::99/64 dev eth0 - -ip -net ${ns1} addr del 10.0.2.99/24 dev eth0 -ip -net ${ns1} addr del dead:2::99/64 dev eth0 - -ip -net ${nsrouter} addr del dead:2::1/64 dev veth0 - -# ... pbr ruleset for the router, check iif+oif. -load_pbr_ruleset ${nsrouter} -if [ $? -ne 0 ] ; then - echo "SKIP: Could not load fib forward ruleset" - exit $ksft_skip -fi - -ip -net ${nsrouter} rule add from all table 128 -ip -net ${nsrouter} rule add from all iif veth0 table 129 -ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0 -ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1 - -# drop main ipv4 table -ip -net ${nsrouter} -4 rule delete table main - -test_ping 10.0.2.99 dead:2::99 -if [ $? -ne 0 ] ; then - ip -net ${nsrouter} nft list ruleset - echo "FAIL: fib mismatch in pbr setup" - exit 1 -fi - -echo "PASS: fib expression forward check with policy based routing" -exit 0 diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh deleted file mode 100755 index e127297533..0000000000 --- a/tools/testing/selftests/netfilter/nft_queue.sh +++ /dev/null @@ -1,449 +0,0 @@ -#!/bin/bash -# -# This tests nf_queue: -# 1. can process packets from all hooks -# 2. support running nfqueue from more than one base chain -# -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 -ret=0 - -sfx=$(mktemp -u "XXXXXXXX") -ns1="ns1-$sfx" -ns2="ns2-$sfx" -nsrouter="nsrouter-$sfx" -timeout=4 - -cleanup() -{ - ip netns pids ${ns1} | xargs kill 2>/dev/null - ip netns pids ${ns2} | xargs kill 2>/dev/null - ip netns pids ${nsrouter} | xargs kill 2>/dev/null - - ip netns del ${ns1} - ip netns del ${ns2} - ip netns del ${nsrouter} - rm -f "$TMPFILE0" - rm -f "$TMPFILE1" - rm -f "$TMPFILE2" "$TMPFILE3" -} - -nft --version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without nft tool" - exit $ksft_skip -fi - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -ip netns add ${nsrouter} -if [ $? -ne 0 ];then - echo "SKIP: Could not create net namespace" - exit $ksft_skip -fi - -TMPFILE0=$(mktemp) -TMPFILE1=$(mktemp) -TMPFILE2=$(mktemp) -TMPFILE3=$(mktemp) -trap cleanup EXIT - -ip netns add ${ns1} -ip netns add ${ns2} - -ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: No virtual ethernet pair device support in kernel" - exit $ksft_skip -fi -ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2} - -ip -net ${nsrouter} link set lo up -ip -net ${nsrouter} link set veth0 up -ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0 -ip -net ${nsrouter} addr add dead:1::1/64 dev veth0 - -ip -net ${nsrouter} link set veth1 up -ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1 -ip -net ${nsrouter} addr add dead:2::1/64 dev veth1 - -ip -net ${ns1} link set lo up -ip -net ${ns1} link set eth0 up - -ip -net ${ns2} link set lo up -ip -net ${ns2} link set eth0 up - -ip -net ${ns1} addr add 10.0.1.99/24 dev eth0 -ip -net ${ns1} addr add dead:1::99/64 dev eth0 -ip -net ${ns1} route add default via 10.0.1.1 -ip -net ${ns1} route add default via dead:1::1 - -ip -net ${ns2} addr add 10.0.2.99/24 dev eth0 -ip -net ${ns2} addr add dead:2::99/64 dev eth0 -ip -net ${ns2} route add default via 10.0.2.1 -ip -net ${ns2} route add default via dead:2::1 - -load_ruleset() { - local name=$1 - local prio=$2 - -ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF -table inet $name { - chain nfq { - ip protocol icmp queue bypass - icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass - } - chain pre { - type filter hook prerouting priority $prio; policy accept; - jump nfq - } - chain input { - type filter hook input priority $prio; policy accept; - jump nfq - } - chain forward { - type filter hook forward priority $prio; policy accept; - tcp dport 12345 queue num 2 - jump nfq - } - chain output { - type filter hook output priority $prio; policy accept; - tcp dport 12345 queue num 3 - tcp sport 23456 queue num 3 - jump nfq - } - chain post { - type filter hook postrouting priority $prio; policy accept; - jump nfq - } -} -EOF -} - -load_counter_ruleset() { - local prio=$1 - -ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF -table inet countrules { - chain pre { - type filter hook prerouting priority $prio; policy accept; - counter - } - chain input { - type filter hook input priority $prio; policy accept; - counter - } - chain forward { - type filter hook forward priority $prio; policy accept; - counter - } - chain output { - type filter hook output priority $prio; policy accept; - counter - } - chain post { - type filter hook postrouting priority $prio; policy accept; - counter - } -} -EOF -} - -test_ping() { - ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null - if [ $? -ne 0 ];then - return 1 - fi - - ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null - if [ $? -ne 0 ];then - return 1 - fi - - return 0 -} - -test_ping_router() { - ip netns exec ${ns1} ping -c 1 -q 10.0.2.1 > /dev/null - if [ $? -ne 0 ];then - return 1 - fi - - ip netns exec ${ns1} ping -c 1 -q dead:2::1 > /dev/null - if [ $? -ne 0 ];then - return 1 - fi - - return 0 -} - -test_queue_blackhole() { - local proto=$1 - -ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF -table $proto blackh { - chain forward { - type filter hook forward priority 0; policy accept; - queue num 600 - } -} -EOF - if [ $proto = "ip" ] ;then - ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null - lret=$? - elif [ $proto = "ip6" ]; then - ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null - lret=$? - else - lret=111 - fi - - # queue without bypass keyword should drop traffic if no listener exists. - if [ $lret -eq 0 ];then - echo "FAIL: $proto expected failure, got $lret" 1>&2 - exit 1 - fi - - ip netns exec ${nsrouter} nft delete table $proto blackh - if [ $? -ne 0 ] ;then - echo "FAIL: $proto: Could not delete blackh table" - exit 1 - fi - - echo "PASS: $proto: statement with no listener results in packet drop" -} - -test_queue() -{ - local expected=$1 - local last="" - - # spawn nf-queue listeners - ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" & - ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" & - sleep 1 - test_ping - ret=$? - if [ $ret -ne 0 ];then - echo "FAIL: netns routing/connectivity with active listener on queue $queue: $ret" 1>&2 - exit $ret - fi - - test_ping_router - ret=$? - if [ $ret -ne 0 ];then - echo "FAIL: netns router unreachable listener on queue $queue: $ret" 1>&2 - exit $ret - fi - - wait - ret=$? - - for file in $TMPFILE0 $TMPFILE1; do - last=$(tail -n1 "$file") - if [ x"$last" != x"$expected packets total" ]; then - echo "FAIL: Expected $expected packets total, but got $last" 1>&2 - cat "$file" 1>&2 - - ip netns exec ${nsrouter} nft list ruleset - exit 1 - fi - done - - echo "PASS: Expected and received $last" -} - -test_tcp_forward() -{ - ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout & - local nfqpid=$! - - tmpfile=$(mktemp) || exit 1 - dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile - ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & - local rpid=$! - - sleep 1 - ip netns exec ${ns1} nc -w 5 10.0.2.99 12345 <"$tmpfile" >/dev/null & - - rm -f "$tmpfile" - - wait $rpid - wait $lpid - [ $? -eq 0 ] && echo "PASS: tcp and nfqueue in forward chain" -} - -test_tcp_localhost() -{ - tmpfile=$(mktemp) || exit 1 - - dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile - ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & - local rpid=$! - - ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout & - local nfqpid=$! - - sleep 1 - ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null - rm -f "$tmpfile" - - wait $rpid - [ $? -eq 0 ] && echo "PASS: tcp via loopback" - wait 2>/dev/null -} - -test_tcp_localhost_connectclose() -{ - tmpfile=$(mktemp) || exit 1 - - ip netns exec ${nsrouter} ./connect_close -p 23456 -t $timeout & - - ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout & - local nfqpid=$! - - sleep 1 - rm -f "$tmpfile" - - wait $rpid - [ $? -eq 0 ] && echo "PASS: tcp via loopback with connect/close" - wait 2>/dev/null -} - -test_tcp_localhost_requeue() -{ -ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF -flush ruleset -table inet filter { - chain output { - type filter hook output priority 0; policy accept; - tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0 - } - chain post { - type filter hook postrouting priority 0; policy accept; - tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0 - } -} -EOF - tmpfile=$(mktemp) || exit 1 - dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile - ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & - local rpid=$! - - ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" & - - # nfqueue 1 will be called via output hook. But this time, - # re-queue the packet to nfqueue program on queue 2. - ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" & - - sleep 1 - ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null - rm -f "$tmpfile" - - wait - - if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then - echo "FAIL: lost packets during requeue?!" 1>&2 - return - fi - - echo "PASS: tcp via loopback and re-queueing" -} - -test_icmp_vrf() { - ip -net $ns1 link add tvrf type vrf table 9876 - if [ $? -ne 0 ];then - echo "SKIP: Could not add vrf device" - return - fi - - ip -net $ns1 li set eth0 master tvrf - ip -net $ns1 li set tvrf up - - ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876 -ip netns exec ${ns1} nft -f /dev/stdin <<EOF -flush ruleset -table inet filter { - chain output { - type filter hook output priority 0; policy accept; - meta oifname "tvrf" icmp type echo-request counter queue num 1 - meta oifname "eth0" icmp type echo-request counter queue num 1 - } - chain post { - type filter hook postrouting priority 0; policy accept; - meta oifname "tvrf" icmp type echo-request counter queue num 1 - meta oifname "eth0" icmp type echo-request counter queue num 1 - } -} -EOF - ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout & - local nfqpid=$! - - sleep 1 - ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null - - for n in output post; do - for d in tvrf eth0; do - ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1" - if [ $? -ne 0 ] ; then - echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2 - ip netns exec ${ns1} nft list ruleset - ret=1 - return - fi - done - done - - wait $nfqpid - [ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf" - wait 2>/dev/null -} - -ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null -ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null -ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null - -load_ruleset "filter" 0 - -sleep 3 - -test_ping -ret=$? -if [ $ret -eq 0 ];then - # queue bypass works (rules were skipped, no listener) - echo "PASS: ${ns1} can reach ${ns2}" -else - echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2 - exit $ret -fi - -test_queue_blackhole ip -test_queue_blackhole ip6 - -# dummy ruleset to add base chains between the -# queueing rules. We don't want the second reinject -# to re-execute the old hooks. -load_counter_ruleset 10 - -# we are hooking all: prerouting/input/forward/output/postrouting. -# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so: -# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply). -# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply. -# so we expect that userspace program receives 10 packets. -test_queue 10 - -# same. We queue to a second program as well. -load_ruleset "filter2" 20 -test_queue 20 - -test_tcp_forward -test_tcp_localhost -test_tcp_localhost_connectclose -test_tcp_localhost_requeue -test_icmp_vrf - -exit $ret diff --git a/tools/testing/selftests/netfilter/nft_synproxy.sh b/tools/testing/selftests/netfilter/nft_synproxy.sh deleted file mode 100755 index b62933b680..0000000000 --- a/tools/testing/selftests/netfilter/nft_synproxy.sh +++ /dev/null @@ -1,117 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 -ret=0 - -rnd=$(mktemp -u XXXXXXXX) -nsr="nsr-$rnd" # synproxy machine -ns1="ns1-$rnd" # iperf client -ns2="ns2-$rnd" # iperf server - -checktool (){ - if ! $1 > /dev/null 2>&1; then - echo "SKIP: Could not $2" - exit $ksft_skip - fi -} - -checktool "nft --version" "run test without nft tool" -checktool "ip -Version" "run test without ip tool" -checktool "iperf3 --version" "run test without iperf3" -checktool "ip netns add $nsr" "create net namespace" - -modprobe -q nf_conntrack - -ip netns add $ns1 -ip netns add $ns2 - -cleanup() { - ip netns pids $ns1 | xargs kill 2>/dev/null - ip netns pids $ns2 | xargs kill 2>/dev/null - ip netns del $ns1 - ip netns del $ns2 - - ip netns del $nsr -} - -trap cleanup EXIT - -ip link add veth0 netns $nsr type veth peer name eth0 netns $ns1 -ip link add veth1 netns $nsr type veth peer name eth0 netns $ns2 - -for dev in lo veth0 veth1; do -ip -net $nsr link set $dev up -done - -ip -net $nsr addr add 10.0.1.1/24 dev veth0 -ip -net $nsr addr add 10.0.2.1/24 dev veth1 - -ip netns exec $nsr sysctl -q net.ipv4.conf.veth0.forwarding=1 -ip netns exec $nsr sysctl -q net.ipv4.conf.veth1.forwarding=1 -ip netns exec $nsr sysctl -q net.netfilter.nf_conntrack_tcp_loose=0 - -for n in $ns1 $ns2; do - ip -net $n link set lo up - ip -net $n link set eth0 up -done -ip -net $ns1 addr add 10.0.1.99/24 dev eth0 -ip -net $ns2 addr add 10.0.2.99/24 dev eth0 -ip -net $ns1 route add default via 10.0.1.1 -ip -net $ns2 route add default via 10.0.2.1 - -# test basic connectivity -if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then - echo "ERROR: $ns1 cannot reach $ns2" 1>&2 - exit 1 -fi - -if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then - echo "ERROR: $ns2 cannot reach $ns1" 1>&2 - exit 1 -fi - -ip netns exec $ns2 iperf3 -s > /dev/null 2>&1 & -# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 & - -sleep 1 - -ip netns exec $nsr nft -f - <<EOF -table inet filter { - chain prerouting { - type filter hook prerouting priority -300; policy accept; - meta iif veth0 tcp flags syn counter notrack - } - - chain forward { - type filter hook forward priority 0; policy accept; - - ct state new,established counter accept - - meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp - - ct state invalid counter drop - - # make ns2 unreachable w.o. tcp synproxy - tcp flags syn counter drop - } -} -EOF -if [ $? -ne 0 ]; then - echo "SKIP: Cannot add nft synproxy" - exit $ksft_skip -fi - -ip netns exec $ns1 timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null - -if [ $? -ne 0 ]; then - echo "FAIL: iperf3 returned an error" 1>&2 - ret=$? - ip netns exec $nsr nft list ruleset -else - echo "PASS: synproxy connection successful" -fi - -exit $ret diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh deleted file mode 100755 index 2ffba45a78..0000000000 --- a/tools/testing/selftests/netfilter/nft_trans_stress.sh +++ /dev/null @@ -1,151 +0,0 @@ -#!/bin/bash -# -# This test is for stress-testing the nf_tables config plane path vs. -# packet path processing: Make sure we never release rules that are -# still visible to other cpus. -# -# set -e - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -testns=testns-$(mktemp -u "XXXXXXXX") -tmp="" - -tables="foo bar baz quux" -global_ret=0 -eret=0 -lret=0 - -cleanup() { - ip netns pids "$testns" | xargs kill 2>/dev/null - ip netns del "$testns" - - rm -f "$tmp" -} - -check_result() -{ - local r=$1 - local OK="PASS" - - if [ $r -ne 0 ] ;then - OK="FAIL" - global_ret=$r - fi - - echo "$OK: nft $2 test returned $r" - - eret=0 -} - -nft --version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without nft tool" - exit $ksft_skip -fi - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -trap cleanup EXIT -tmp=$(mktemp) - -for table in $tables; do - echo add table inet "$table" >> "$tmp" - echo flush table inet "$table" >> "$tmp" - - echo "add chain inet $table INPUT { type filter hook input priority 0; }" >> "$tmp" - echo "add chain inet $table OUTPUT { type filter hook output priority 0; }" >> "$tmp" - for c in $(seq 1 400); do - chain=$(printf "chain%03u" "$c") - echo "add chain inet $table $chain" >> "$tmp" - done - - for c in $(seq 1 400); do - chain=$(printf "chain%03u" "$c") - for BASE in INPUT OUTPUT; do - echo "add rule inet $table $BASE counter jump $chain" >> "$tmp" - done - echo "add rule inet $table $chain counter return" >> "$tmp" - done -done - -ip netns add "$testns" -ip -netns "$testns" link set lo up - -lscpu | grep ^CPU\(s\): | ( read cpu cpunum ; -cpunum=$((cpunum-1)) -for i in $(seq 0 $cpunum);do - mask=$(printf 0x%x $((1<<$i))) - ip netns exec "$testns" taskset $mask ping -4 127.0.0.1 -fq > /dev/null & - ip netns exec "$testns" taskset $mask ping -6 ::1 -fq > /dev/null & -done) - -sleep 1 - -ip netns exec "$testns" nft -f "$tmp" -for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done - -for table in $tables;do - randsleep=$((RANDOM%2)) - sleep $randsleep - ip netns exec "$testns" nft delete table inet $table - lret=$? - if [ $lret -ne 0 ]; then - eret=$lret - fi -done - -check_result $eret "add/delete" - -for i in $(seq 1 10) ; do - (echo "flush ruleset"; cat "$tmp") | ip netns exec "$testns" nft -f /dev/stdin - - lret=$? - if [ $lret -ne 0 ]; then - eret=$lret - fi -done - -check_result $eret "reload" - -for i in $(seq 1 10) ; do - (echo "flush ruleset"; cat "$tmp" - echo "insert rule inet foo INPUT meta nftrace set 1" - echo "insert rule inet foo OUTPUT meta nftrace set 1" - ) | ip netns exec "$testns" nft -f /dev/stdin - lret=$? - if [ $lret -ne 0 ]; then - eret=$lret - fi - - (echo "flush ruleset"; cat "$tmp" - ) | ip netns exec "$testns" nft -f /dev/stdin - - lret=$? - if [ $lret -ne 0 ]; then - eret=$lret - fi -done - -check_result $eret "add/delete with nftrace enabled" - -echo "insert rule inet foo INPUT meta nftrace set 1" >> $tmp -echo "insert rule inet foo OUTPUT meta nftrace set 1" >> $tmp - -for i in $(seq 1 10) ; do - (echo "flush ruleset"; cat "$tmp") | ip netns exec "$testns" nft -f /dev/stdin - - lret=$? - if [ $lret -ne 0 ]; then - eret=1 - fi -done - -check_result $lret "add/delete with nftrace enabled" - -exit $global_ret diff --git a/tools/testing/selftests/netfilter/settings b/tools/testing/selftests/netfilter/settings deleted file mode 100644 index 6091b45d22..0000000000 --- a/tools/testing/selftests/netfilter/settings +++ /dev/null @@ -1 +0,0 @@ -timeout=120 diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 6ba4f8275a..994477ee87 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -27,6 +27,7 @@ #include <sys/syscall.h> #include <sys/sysmacros.h> #include <sys/time.h> +#include <sys/utsname.h> #include <sys/wait.h> #include <dirent.h> #include <errno.h> @@ -600,6 +601,25 @@ int expect_strne(const char *expr, int llen, const char *cmp) return ret; } +#define EXPECT_STRBUFEQ(cond, expr, buf, val, cmp) \ + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_str_buf_eq(expr, buf, val, llen, cmp); } while (0) + +static __attribute__((unused)) +int expect_str_buf_eq(size_t expr, const char *buf, size_t val, int llen, const char *cmp) +{ + llen += printf(" = %lu <%s> ", (unsigned long)expr, buf); + if (strcmp(buf, cmp) != 0) { + result(llen, FAIL); + return 1; + } + if (expr != val) { + result(llen, FAIL); + return 1; + } + + result(llen, OK); + return 0; +} /* declare tests based on line numbers. There must be exactly one test per line. */ #define CASE_TEST(name) \ @@ -761,6 +781,45 @@ int test_stat_timestamps(void) return 0; } +int test_uname(void) +{ + struct utsname buf; + char osrelease[sizeof(buf.release)]; + ssize_t r; + int fd; + + memset(&buf.domainname, 'P', sizeof(buf.domainname)); + + if (uname(&buf)) + return 1; + + if (strncmp("Linux", buf.sysname, sizeof(buf.sysname))) + return 1; + + fd = open("/proc/sys/kernel/osrelease", O_RDONLY); + if (fd == -1) + return 1; + + r = read(fd, osrelease, sizeof(osrelease)); + if (r == -1) + return 1; + + close(fd); + + if (osrelease[r - 1] == '\n') + r--; + + /* Validate one of the later fields to ensure field sizes are correct */ + if (strncmp(osrelease, buf.release, r)) + return 1; + + /* Ensure the field domainname is set, it is missing from struct old_utsname */ + if (strnlen(buf.domainname, sizeof(buf.domainname)) == sizeof(buf.domainname)) + return 1; + + return 0; +} + int test_mmap_munmap(void) { int ret, fd, i, page_size; @@ -966,6 +1025,8 @@ int run_syscall(int min, int max) CASE_TEST(stat_fault); EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break; CASE_TEST(stat_timestamps); EXPECT_SYSZR(1, test_stat_timestamps()); break; CASE_TEST(symlink_root); EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break; + CASE_TEST(uname); EXPECT_SYSZR(proc, test_uname()); break; + CASE_TEST(uname_fault); EXPECT_SYSER(1, uname(NULL), -1, EFAULT); break; CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break; CASE_TEST(unlink_blah); EXPECT_SYSER(1, unlink("/proc/self/blah"), -1, ENOENT); break; CASE_TEST(wait_child); EXPECT_SYSER(1, wait(&tmp), -1, ECHILD); break; @@ -991,6 +1052,14 @@ int run_stdlib(int min, int max) for (test = min; test >= 0 && test <= max; test++) { int llen = 0; /* line length */ + /* For functions that take a long buffer, like strlcat() + * Add some more chars after the \0, to test functions that overwrite the buffer set + * the \0 at the exact right position. + */ + char buf[10] = "test123456"; + buf[4] = '\0'; + + /* avoid leaving empty lines below, this will insert holes into * test numbers. */ @@ -1007,6 +1076,19 @@ int run_stdlib(int min, int max) CASE_TEST(strchr_foobar_z); EXPECT_STRZR(1, strchr("foobar", 'z')); break; CASE_TEST(strrchr_foobar_o); EXPECT_STREQ(1, strrchr("foobar", 'o'), "obar"); break; CASE_TEST(strrchr_foobar_z); EXPECT_STRZR(1, strrchr("foobar", 'z')); break; +#ifdef NOLIBC + CASE_TEST(strlcat_0); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 0), buf, 3, "test"); break; + CASE_TEST(strlcat_1); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 1), buf, 4, "test"); break; + CASE_TEST(strlcat_5); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 5), buf, 7, "test"); break; + CASE_TEST(strlcat_6); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 6), buf, 7, "testb"); break; + CASE_TEST(strlcat_7); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 7), buf, 7, "testba"); break; + CASE_TEST(strlcat_8); EXPECT_STRBUFEQ(1, strlcat(buf, "bar", 8), buf, 7, "testbar"); break; + CASE_TEST(strlcpy_0); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 0), buf, 3, "test"); break; + CASE_TEST(strlcpy_1); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 1), buf, 3, ""); break; + CASE_TEST(strlcpy_2); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 2), buf, 3, "b"); break; + CASE_TEST(strlcpy_3); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 3), buf, 3, "ba"); break; + CASE_TEST(strlcpy_4); EXPECT_STRBUFEQ(1, strlcpy(buf, "bar", 4), buf, 3, "bar"); break; +#endif CASE_TEST(memcmp_20_20); EXPECT_EQ(1, memcmp("aaa\x20", "aaa\x20", 4), 0); break; CASE_TEST(memcmp_20_60); EXPECT_LT(1, memcmp("aaa\x20", "aaa\x60", 4), 0); break; CASE_TEST(memcmp_60_20); EXPECT_GT(1, memcmp("aaa\x60", "aaa\x20", 4), 0); break; diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile index 254d676a26..185dc76ebb 100644 --- a/tools/testing/selftests/openat2/Makefile +++ b/tools/testing/selftests/openat2/Makefile @@ -1,8 +1,18 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test +# gcc requires -static-libasan in order to ensure that Address Sanitizer's +# library is the first one loaded. However, clang already statically links the +# Address Sanitizer if -fsanitize is specified. Therefore, simply omit +# -static-libasan for clang builds. +ifeq ($(LLVM),) + CFLAGS += -static-libasan +endif + +LOCAL_HDRS += helpers.h + include ../lib.mk -$(TEST_GEN_PROGS): helpers.c helpers.h +$(TEST_GEN_PROGS): helpers.c diff --git a/tools/testing/selftests/perf_events/.gitignore b/tools/testing/selftests/perf_events/.gitignore index 790c47001e..ee93dc4969 100644 --- a/tools/testing/selftests/perf_events/.gitignore +++ b/tools/testing/selftests/perf_events/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only sigtrap_threads remove_on_exec +watermark_signal diff --git a/tools/testing/selftests/perf_events/Makefile b/tools/testing/selftests/perf_events/Makefile index db93c4ff08..70e3ff2112 100644 --- a/tools/testing/selftests/perf_events/Makefile +++ b/tools/testing/selftests/perf_events/Makefile @@ -2,5 +2,5 @@ CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES) LDFLAGS += -lpthread -TEST_GEN_PROGS := sigtrap_threads remove_on_exec +TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal include ../lib.mk diff --git a/tools/testing/selftests/perf_events/watermark_signal.c b/tools/testing/selftests/perf_events/watermark_signal.c new file mode 100644 index 0000000000..49dc1e8311 --- /dev/null +++ b/tools/testing/selftests/perf_events/watermark_signal.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <linux/perf_event.h> +#include <stddef.h> +#include <sched.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +#define __maybe_unused __attribute__((__unused__)) + +static int sigio_count; + +static void handle_sigio(int signum __maybe_unused, + siginfo_t *oh __maybe_unused, + void *uc __maybe_unused) +{ + ++sigio_count; +} + +static void do_child(void) +{ + raise(SIGSTOP); + + for (int i = 0; i < 20; ++i) + sleep(1); + + raise(SIGSTOP); + + exit(0); +} + +TEST(watermark_signal) +{ + struct perf_event_attr attr; + struct perf_event_mmap_page *p = NULL; + struct sigaction previous_sigio, sigio = { 0 }; + pid_t child = -1; + int child_status; + int fd = -1; + long page_size = sysconf(_SC_PAGE_SIZE); + + sigio.sa_sigaction = handle_sigio; + EXPECT_EQ(sigaction(SIGIO, &sigio, &previous_sigio), 0); + + memset(&attr, 0, sizeof(attr)); + attr.size = sizeof(attr); + attr.type = PERF_TYPE_SOFTWARE; + attr.config = PERF_COUNT_SW_DUMMY; + attr.sample_period = 1; + attr.disabled = 1; + attr.watermark = 1; + attr.context_switch = 1; + attr.wakeup_watermark = 1; + + child = fork(); + EXPECT_GE(child, 0); + if (child == 0) + do_child(); + else if (child < 0) { + perror("fork()"); + goto cleanup; + } + + if (waitpid(child, &child_status, WSTOPPED) != child || + !(WIFSTOPPED(child_status) && WSTOPSIG(child_status) == SIGSTOP)) { + fprintf(stderr, + "failed to sycnhronize with child errno=%d status=%x\n", + errno, + child_status); + goto cleanup; + } + + fd = syscall(__NR_perf_event_open, &attr, child, -1, -1, + PERF_FLAG_FD_CLOEXEC); + if (fd < 0) { + fprintf(stderr, "failed opening event %llx\n", attr.config); + goto cleanup; + } + + if (fcntl(fd, F_SETFL, FASYNC)) { + perror("F_SETFL FASYNC"); + goto cleanup; + } + + if (fcntl(fd, F_SETOWN, getpid())) { + perror("F_SETOWN getpid()"); + goto cleanup; + } + + if (fcntl(fd, F_SETSIG, SIGIO)) { + perror("F_SETSIG SIGIO"); + goto cleanup; + } + + p = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (p == NULL) { + perror("mmap"); + goto cleanup; + } + + if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) { + perror("PERF_EVENT_IOC_ENABLE"); + goto cleanup; + } + + if (kill(child, SIGCONT) < 0) { + perror("SIGCONT"); + goto cleanup; + } + + if (waitpid(child, &child_status, WSTOPPED) != -1 || errno != EINTR) + fprintf(stderr, + "expected SIGIO to terminate wait errno=%d status=%x\n%d", + errno, + child_status, + sigio_count); + + EXPECT_GE(sigio_count, 1); + +cleanup: + if (p != NULL) + munmap(p, 2 * page_size); + + if (fd >= 0) + close(fd); + + if (child > 0) { + kill(child, SIGKILL); + waitpid(child, NULL, 0); + } + + sigaction(SIGIO, &previous_sigio, NULL); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c index 01cc37bf61..f062a986e3 100644 --- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c +++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c @@ -307,5 +307,5 @@ int main(int argc, char **argv) test_pidfd_fdinfo_nspid(); test_pidfd_dead_fdinfo(); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index 8a59438ccc..c62564c264 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -159,5 +159,7 @@ on_error: if (pidfd >= 0) close(pidfd); - return !ret ? ksft_exit_pass() : ksft_exit_fail(); + if (ret) + ksft_exit_fail(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c index 6108112753..55d74a5035 100644 --- a/tools/testing/selftests/pidfd/pidfd_poll_test.c +++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c @@ -112,5 +112,5 @@ int main(int argc, char **argv) } ksft_test_result_pass("pidfd poll test: pass\n"); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index c081ae9131..9faa686f90 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -572,5 +572,5 @@ int main(int argc, char **argv) test_pidfd_send_signal_exited_fail(); test_pidfd_send_signal_recycled_pid_fail(); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index c376151982..b175e94e19 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -7,12 +7,6 @@ ARCH := $(shell echo $(ARCH) | sed -e s/ppc.*/powerpc/) ifeq ($(ARCH),powerpc) -GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown") - -CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR)/include $(CFLAGS) - -export CFLAGS - SUB_DIRS = alignment \ benchmarks \ cache_shape \ @@ -46,6 +40,7 @@ $(SUB_DIRS): BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all include ../lib.mk +include ./flags.mk override define RUN_TESTS +@for TARGET in $(SUB_DIRS); do \ @@ -57,14 +52,14 @@ endef override define INSTALL_RULE +@for TARGET in $(SUB_DIRS); do \ BUILD_TARGET=$(OUTPUT)/$$TARGET; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install;\ + $(MAKE) OUTPUT=$$BUILD_TARGET INSTALL_PATH=$$INSTALL_PATH/$$TARGET -C $$TARGET install;\ done; endef emit_tests: +@for TARGET in $(SUB_DIRS); do \ BUILD_TARGET=$(OUTPUT)/$$TARGET; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET $@;\ + $(MAKE) OUTPUT=$$BUILD_TARGET COLLECTION=$(COLLECTION)/$$TARGET -s -C $$TARGET $@;\ done; override define CLEAN diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile index 93e9af3744..66d5d7aaeb 100644 --- a/tools/testing/selftests/powerpc/alignment/Makefile +++ b/tools/testing/selftests/powerpc/alignment/Makefile @@ -3,5 +3,6 @@ TEST_GEN_PROGS := copy_first_unaligned alignment_handler top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile index a32a6ab899..1321922038 100644 --- a/tools/testing/selftests/powerpc/benchmarks/Makefile +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -4,10 +4,11 @@ TEST_GEN_FILES := exec_target TEST_FILES := settings -CFLAGS += -O2 - top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +CFLAGS += -O2 $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile index 689f6c8ebc..3a3ca956ac 100644 --- a/tools/testing/selftests/powerpc/cache_shape/Makefile +++ b/tools/testing/selftests/powerpc/cache_shape/Makefile @@ -3,5 +3,6 @@ TEST_GEN_PROGS := cache_shape top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 77594e697f..42940f92d8 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -1,14 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -# The loops are all 64-bit code -CFLAGS += -m64 -CFLAGS += -I$(CURDIR) -CFLAGS += -D SELFTEST -CFLAGS += -maltivec -CFLAGS += -mcpu=power4 - -# Use our CFLAGS for the implicit .S rule & set the asm machine type -ASFLAGS = $(CFLAGS) -Wa,-mpower4 - TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ copyuser_p7_t0 copyuser_p7_t1 \ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ @@ -20,6 +10,17 @@ EXTRA_SOURCES := validate.c ../harness.c stubs.S top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +# The loops are all 64-bit code +CFLAGS += -m64 +CFLAGS += -I$(CURDIR) +CFLAGS += -D SELFTEST +CFLAGS += -maltivec +CFLAGS += -mcpu=power4 + +# Use our CFLAGS for the implicit .S rule & set the asm machine type +ASFLAGS = $(CFLAGS) -Wa,-mpower4 $(OUTPUT)/copyuser_64_t%: copyuser_64.S $(EXTRA_SOURCES) $(CC) $(CPPFLAGS) $(CFLAGS) \ diff --git a/tools/testing/selftests/powerpc/dexcr/.gitignore b/tools/testing/selftests/powerpc/dexcr/.gitignore index b82f45dd46..11eefb4b9f 100644 --- a/tools/testing/selftests/powerpc/dexcr/.gitignore +++ b/tools/testing/selftests/powerpc/dexcr/.gitignore @@ -1,2 +1,4 @@ +dexcr_test hashchk_test +chdexcr lsdexcr diff --git a/tools/testing/selftests/powerpc/dexcr/Makefile b/tools/testing/selftests/powerpc/dexcr/Makefile index 829ad075b4..58cf9f7229 100644 --- a/tools/testing/selftests/powerpc/dexcr/Makefile +++ b/tools/testing/selftests/powerpc/dexcr/Makefile @@ -1,7 +1,10 @@ -TEST_GEN_PROGS := hashchk_test -TEST_GEN_FILES := lsdexcr +TEST_GEN_PROGS := dexcr_test hashchk_test +TEST_GEN_FILES := lsdexcr chdexcr include ../../lib.mk +include ../flags.mk + +CFLAGS += $(KHDR_INCLUDES) $(OUTPUT)/hashchk_test: CFLAGS += -fno-pie -no-pie $(call cc-option,-mno-rop-protect) diff --git a/tools/testing/selftests/powerpc/dexcr/chdexcr.c b/tools/testing/selftests/powerpc/dexcr/chdexcr.c new file mode 100644 index 0000000000..c548d7a5bb --- /dev/null +++ b/tools/testing/selftests/powerpc/dexcr/chdexcr.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/prctl.h> + +#include "dexcr.h" +#include "utils.h" + +static void die(const char *msg) +{ + printf("%s\n", msg); + exit(1); +} + +static void help(void) +{ + printf("Invoke a provided program with a custom DEXCR on-exec reset value\n" + "\n" + "usage: chdexcr [CHDEXCR OPTIONS] -- PROGRAM [ARGS...]\n" + "\n" + "Each configurable DEXCR aspect is exposed as an option.\n" + "\n" + "The normal option sets the aspect in the DEXCR. The --no- variant\n" + "clears that aspect. For example, --ibrtpd sets the IBRTPD aspect bit,\n" + "so indirect branch prediction will be disabled in the provided program.\n" + "Conversely, --no-ibrtpd clears the aspect bit, so indirect branch\n" + "prediction may occur.\n" + "\n" + "CHDEXCR OPTIONS:\n"); + + for (int i = 0; i < ARRAY_SIZE(aspects); i++) { + const struct dexcr_aspect *aspect = &aspects[i]; + + if (aspect->prctl == -1) + continue; + + printf(" --%-6s / --no-%-6s : %s\n", aspect->opt, aspect->opt, aspect->desc); + } +} + +static const struct dexcr_aspect *opt_to_aspect(const char *opt) +{ + for (int i = 0; i < ARRAY_SIZE(aspects); i++) + if (aspects[i].prctl != -1 && !strcmp(aspects[i].opt, opt)) + return &aspects[i]; + + return NULL; +} + +static int apply_option(const char *option) +{ + const struct dexcr_aspect *aspect; + const char *opt = NULL; + const char *set_prefix = "--"; + const char *clear_prefix = "--no-"; + unsigned long ctrl = 0; + int err; + + if (!strcmp(option, "-h") || !strcmp(option, "--help")) { + help(); + exit(0); + } + + /* Strip out --(no-) prefix and determine ctrl value */ + if (!strncmp(option, clear_prefix, strlen(clear_prefix))) { + opt = &option[strlen(clear_prefix)]; + ctrl |= PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC; + } else if (!strncmp(option, set_prefix, strlen(set_prefix))) { + opt = &option[strlen(set_prefix)]; + ctrl |= PR_PPC_DEXCR_CTRL_SET_ONEXEC; + } + + if (!opt || !*opt) + return 1; + + aspect = opt_to_aspect(opt); + if (!aspect) + die("unknown aspect"); + + err = pr_set_dexcr(aspect->prctl, ctrl); + if (err) + die("failed to apply option"); + + return 0; +} + +int main(int argc, char *const argv[]) +{ + int i; + + if (!dexcr_exists()) + die("DEXCR not detected on this hardware"); + + for (i = 1; i < argc; i++) + if (apply_option(argv[i])) + break; + + if (i < argc && !strcmp(argv[i], "--")) + i++; + + if (i >= argc) + die("missing command"); + + execvp(argv[i], &argv[i]); + perror("execve"); + + return errno; +} diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr.c b/tools/testing/selftests/powerpc/dexcr/dexcr.c index 65ec5347de..468fd0dc99 100644 --- a/tools/testing/selftests/powerpc/dexcr/dexcr.c +++ b/tools/testing/selftests/powerpc/dexcr/dexcr.c @@ -3,6 +3,7 @@ #include <errno.h> #include <setjmp.h> #include <signal.h> +#include <sys/prctl.h> #include <sys/types.h> #include <sys/wait.h> @@ -43,6 +44,45 @@ out: return exists; } +unsigned int pr_which_to_aspect(unsigned long which) +{ + switch (which) { + case PR_PPC_DEXCR_SBHE: + return DEXCR_PR_SBHE; + case PR_PPC_DEXCR_IBRTPD: + return DEXCR_PR_IBRTPD; + case PR_PPC_DEXCR_SRAPD: + return DEXCR_PR_SRAPD; + case PR_PPC_DEXCR_NPHIE: + return DEXCR_PR_NPHIE; + default: + FAIL_IF_EXIT_MSG(true, "unknown PR aspect"); + } +} + +int pr_get_dexcr(unsigned long which) +{ + return prctl(PR_PPC_GET_DEXCR, which, 0UL, 0UL, 0UL); +} + +int pr_set_dexcr(unsigned long which, unsigned long ctrl) +{ + return prctl(PR_PPC_SET_DEXCR, which, ctrl, 0UL, 0UL); +} + +bool pr_dexcr_aspect_supported(unsigned long which) +{ + if (pr_get_dexcr(which) == -1) + return errno == ENODEV; + + return true; +} + +bool pr_dexcr_aspect_editable(unsigned long which) +{ + return pr_get_dexcr(which) & PR_PPC_DEXCR_CTRL_EDITABLE; +} + /* * Just test if a bad hashchk triggers a signal, without checking * for support or if the NPHIE aspect is enabled. diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr.h b/tools/testing/selftests/powerpc/dexcr/dexcr.h index f55cbbc864..51e9ba3b09 100644 --- a/tools/testing/selftests/powerpc/dexcr/dexcr.h +++ b/tools/testing/selftests/powerpc/dexcr/dexcr.h @@ -9,6 +9,7 @@ #define _SELFTESTS_POWERPC_DEXCR_DEXCR_H #include <stdbool.h> +#include <sys/prctl.h> #include <sys/types.h> #include "reg.h" @@ -26,8 +27,64 @@ #define PPC_RAW_HASHCHK(b, i, a) \ str(.long (0x7C0005E4 | PPC_RAW_HASH_ARGS(b, i, a));) +struct dexcr_aspect { + const char *name; /* Short display name */ + const char *opt; /* Option name for chdexcr */ + const char *desc; /* Expanded aspect meaning */ + unsigned int index; /* Aspect bit index in DEXCR */ + unsigned long prctl; /* 'which' value for get/set prctl */ +}; + +static const struct dexcr_aspect aspects[] = { + { + .name = "SBHE", + .opt = "sbhe", + .desc = "Speculative branch hint enable", + .index = 0, + .prctl = PR_PPC_DEXCR_SBHE, + }, + { + .name = "IBRTPD", + .opt = "ibrtpd", + .desc = "Indirect branch recurrent target prediction disable", + .index = 3, + .prctl = PR_PPC_DEXCR_IBRTPD, + }, + { + .name = "SRAPD", + .opt = "srapd", + .desc = "Subroutine return address prediction disable", + .index = 4, + .prctl = PR_PPC_DEXCR_SRAPD, + }, + { + .name = "NPHIE", + .opt = "nphie", + .desc = "Non-privileged hash instruction enable", + .index = 5, + .prctl = PR_PPC_DEXCR_NPHIE, + }, + { + .name = "PHIE", + .opt = "phie", + .desc = "Privileged hash instruction enable", + .index = 6, + .prctl = -1, + }, +}; + bool dexcr_exists(void); +bool pr_dexcr_aspect_supported(unsigned long which); + +bool pr_dexcr_aspect_editable(unsigned long which); + +int pr_get_dexcr(unsigned long pr_aspect); + +int pr_set_dexcr(unsigned long pr_aspect, unsigned long ctrl); + +unsigned int pr_which_to_aspect(unsigned long which); + bool hashchk_triggers(void); enum dexcr_source { diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr_test.c b/tools/testing/selftests/powerpc/dexcr/dexcr_test.c new file mode 100644 index 0000000000..7a86571649 --- /dev/null +++ b/tools/testing/selftests/powerpc/dexcr/dexcr_test.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/prctl.h> +#include <unistd.h> + +#include "dexcr.h" +#include "utils.h" + +/* + * Helper function for testing the behaviour of a newly exec-ed process + */ +static int dexcr_prctl_onexec_test_child(unsigned long which, const char *status) +{ + unsigned long dexcr = mfspr(SPRN_DEXCR_RO); + unsigned long aspect = pr_which_to_aspect(which); + int ctrl = pr_get_dexcr(which); + + if (!strcmp(status, "set")) { + FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET), + "setting aspect across exec not applied"); + + FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC), + "setting aspect across exec not inherited"); + + FAIL_IF_EXIT_MSG(!(aspect & dexcr), "setting aspect across exec did not take effect"); + } else if (!strcmp(status, "clear")) { + FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), + "clearing aspect across exec not applied"); + + FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC), + "clearing aspect across exec not inherited"); + + FAIL_IF_EXIT_MSG(aspect & dexcr, "clearing aspect across exec did not take effect"); + } else { + FAIL_IF_EXIT_MSG(true, "unknown expected status"); + } + + return 0; +} + +/* + * Test that the given prctl value can be manipulated freely + */ +static int dexcr_prctl_aspect_test(unsigned long which) +{ + unsigned long aspect = pr_which_to_aspect(which); + pid_t pid; + int ctrl; + int err; + int errno_save; + + SKIP_IF_MSG(!dexcr_exists(), "DEXCR not supported"); + SKIP_IF_MSG(!pr_dexcr_aspect_supported(which), "DEXCR aspect not supported"); + SKIP_IF_MSG(!pr_dexcr_aspect_editable(which), "DEXCR aspect not editable with prctl"); + + /* We reject invalid combinations of arguments */ + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR); + errno_save = errno; + FAIL_IF_MSG(err != -1, "simultaneous set and clear should be rejected"); + FAIL_IF_MSG(errno_save != EINVAL, "simultaneous set and clear should be rejected with EINVAL"); + + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET_ONEXEC | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC); + errno_save = errno; + FAIL_IF_MSG(err != -1, "simultaneous set and clear on exec should be rejected"); + FAIL_IF_MSG(errno_save != EINVAL, "simultaneous set and clear on exec should be rejected with EINVAL"); + + /* We set the aspect */ + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET); + FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET failed"); + + ctrl = pr_get_dexcr(which); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET), "config value not PR_PPC_DEXCR_CTRL_SET"); + FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_CLEAR, "config value unexpected clear flag"); + FAIL_IF_MSG(!(aspect & mfspr(SPRN_DEXCR_RO)), "setting aspect did not take effect"); + + /* We clear the aspect */ + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_CLEAR); + FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_CLEAR failed"); + + ctrl = pr_get_dexcr(which); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "config value not PR_PPC_DEXCR_CTRL_CLEAR"); + FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_SET, "config value unexpected set flag"); + FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "clearing aspect did not take effect"); + + /* We make it set on exec (doesn't change our current value) */ + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET_ONEXEC); + FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET_ONEXEC failed"); + + ctrl = pr_get_dexcr(which); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "process aspect should still be cleared"); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_SET_ONEXEC"); + FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC, "config value unexpected clear on exec flag"); + FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "scheduling aspect to set on exec should not change it now"); + + /* We make it clear on exec (doesn't change our current value) */ + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC); + FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC failed"); + + ctrl = pr_get_dexcr(which); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "process aspect config should still be cleared"); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC"); + FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC, "config value unexpected set on exec flag"); + FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "process aspect should still be cleared"); + + /* We allow setting the current and on-exec value in a single call */ + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC); + FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC failed"); + + ctrl = pr_get_dexcr(which); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET), "config value not PR_PPC_DEXCR_CTRL_SET"); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC"); + FAIL_IF_MSG(!(aspect & mfspr(SPRN_DEXCR_RO)), "process aspect should be set"); + + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_CLEAR | PR_PPC_DEXCR_CTRL_SET_ONEXEC); + FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_CLEAR | PR_PPC_DEXCR_CTRL_SET_ONEXEC failed"); + + ctrl = pr_get_dexcr(which); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "config value not PR_PPC_DEXCR_CTRL_CLEAR"); + FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_SET_ONEXEC"); + FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "process aspect should be clear"); + + /* Verify the onexec value is applied across exec */ + pid = fork(); + if (!pid) { + char which_str[32] = {}; + char *args[] = { "dexcr_prctl_onexec_test_child", which_str, "set", NULL }; + unsigned int ctrl = pr_get_dexcr(which); + + sprintf(which_str, "%lu", which); + + FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC), + "setting aspect on exec not copied across fork"); + + FAIL_IF_EXIT_MSG(mfspr(SPRN_DEXCR_RO) & aspect, + "setting aspect on exec wrongly applied to fork"); + + execve("/proc/self/exe", args, NULL); + _exit(errno); + } + await_child_success(pid); + + err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC); + FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC failed"); + + pid = fork(); + if (!pid) { + char which_str[32] = {}; + char *args[] = { "dexcr_prctl_onexec_test_child", which_str, "clear", NULL }; + unsigned int ctrl = pr_get_dexcr(which); + + sprintf(which_str, "%lu", which); + + FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC), + "clearing aspect on exec not copied across fork"); + + FAIL_IF_EXIT_MSG(!(mfspr(SPRN_DEXCR_RO) & aspect), + "clearing aspect on exec wrongly applied to fork"); + + execve("/proc/self/exe", args, NULL); + _exit(errno); + } + await_child_success(pid); + + return 0; +} + +static int dexcr_prctl_ibrtpd_test(void) +{ + return dexcr_prctl_aspect_test(PR_PPC_DEXCR_IBRTPD); +} + +static int dexcr_prctl_srapd_test(void) +{ + return dexcr_prctl_aspect_test(PR_PPC_DEXCR_SRAPD); +} + +static int dexcr_prctl_nphie_test(void) +{ + return dexcr_prctl_aspect_test(PR_PPC_DEXCR_NPHIE); +} + +int main(int argc, char *argv[]) +{ + int err = 0; + + /* + * Some tests require checking what happens across exec, so we may be + * invoked as the child of a particular test + */ + if (argc > 1) { + if (argc == 3 && !strcmp(argv[0], "dexcr_prctl_onexec_test_child")) { + unsigned long which; + + err = parse_ulong(argv[1], strlen(argv[1]), &which, 10); + FAIL_IF_MSG(err, "failed to parse which value for child"); + + return dexcr_prctl_onexec_test_child(which, argv[2]); + } + + FAIL_IF_MSG(true, "unknown test case"); + } + + /* + * Otherwise we are the main test invocation and run the full suite + */ + err |= test_harness(dexcr_prctl_ibrtpd_test, "dexcr_prctl_ibrtpd"); + err |= test_harness(dexcr_prctl_srapd_test, "dexcr_prctl_srapd"); + err |= test_harness(dexcr_prctl_nphie_test, "dexcr_prctl_nphie"); + + return err; +} diff --git a/tools/testing/selftests/powerpc/dexcr/hashchk_test.c b/tools/testing/selftests/powerpc/dexcr/hashchk_test.c index 7d5658c9eb..645224bdc1 100644 --- a/tools/testing/selftests/powerpc/dexcr/hashchk_test.c +++ b/tools/testing/selftests/powerpc/dexcr/hashchk_test.c @@ -21,8 +21,14 @@ static int require_nphie(void) { SKIP_IF_MSG(!dexcr_exists(), "DEXCR not supported"); + + pr_set_dexcr(PR_PPC_DEXCR_NPHIE, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_SET_ONEXEC); + + if (get_dexcr(EFFECTIVE) & DEXCR_PR_NPHIE) + return 0; + SKIP_IF_MSG(!(get_dexcr(EFFECTIVE) & DEXCR_PR_NPHIE), - "DEXCR[NPHIE] not enabled"); + "Failed to enable DEXCR[NPHIE]"); return 0; } diff --git a/tools/testing/selftests/powerpc/dexcr/lsdexcr.c b/tools/testing/selftests/powerpc/dexcr/lsdexcr.c index 94abbfcc38..7588929180 100644 --- a/tools/testing/selftests/powerpc/dexcr/lsdexcr.c +++ b/tools/testing/selftests/powerpc/dexcr/lsdexcr.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0+ -#include <errno.h> #include <stddef.h> #include <stdio.h> #include <string.h> +#include <sys/prctl.h> #include "dexcr.h" #include "utils.h" @@ -12,40 +12,6 @@ static unsigned int dexcr; static unsigned int hdexcr; static unsigned int effective; -struct dexcr_aspect { - const char *name; - const char *desc; - unsigned int index; -}; - -static const struct dexcr_aspect aspects[] = { - { - .name = "SBHE", - .desc = "Speculative branch hint enable", - .index = 0, - }, - { - .name = "IBRTPD", - .desc = "Indirect branch recurrent target prediction disable", - .index = 3, - }, - { - .name = "SRAPD", - .desc = "Subroutine return address prediction disable", - .index = 4, - }, - { - .name = "NPHIE", - .desc = "Non-privileged hash instruction enable", - .index = 5, - }, - { - .name = "PHIE", - .desc = "Privileged hash instruction enable", - .index = 6, - }, -}; - static void print_list(const char *list[], size_t len) { for (size_t i = 0; i < len; i++) { @@ -60,7 +26,7 @@ static void print_dexcr(char *name, unsigned int bits) const char *enabled_aspects[ARRAY_SIZE(aspects) + 1] = {NULL}; size_t j = 0; - printf("%s: %08x", name, bits); + printf("%s: 0x%08x", name, bits); if (bits == 0) { printf("\n"); @@ -103,6 +69,63 @@ static void print_aspect(const struct dexcr_aspect *aspect) printf(" \t(%s)\n", aspect->desc); } +static void print_aspect_config(const struct dexcr_aspect *aspect) +{ + const char *reason = NULL; + const char *reason_hyp = NULL; + const char *reason_prctl = "no prctl"; + bool actual = effective & DEXCR_PR_BIT(aspect->index); + bool expected = actual; /* Assume it's fine if we don't expect a specific set/clear value */ + + if (actual) + reason = "set by unknown"; + else + reason = "cleared by unknown"; + + if (aspect->prctl != -1) { + int ctrl = pr_get_dexcr(aspect->prctl); + + if (ctrl < 0) { + reason_prctl = "failed to read prctl"; + } else { + if (ctrl & PR_PPC_DEXCR_CTRL_SET) { + reason_prctl = "set by prctl"; + expected = true; + } else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR) { + reason_prctl = "cleared by prctl"; + expected = false; + } else { + reason_prctl = "unknown prctl"; + } + + reason = reason_prctl; + } + } + + if (hdexcr & DEXCR_PR_BIT(aspect->index)) { + reason_hyp = "set by hypervisor"; + reason = reason_hyp; + expected = true; + } else { + reason_hyp = "not modified by hypervisor"; + } + + printf("%12s (%d): %-28s (%s, %s)\n", + aspect->name, + aspect->index, + reason, + reason_hyp, + reason_prctl); + + /* + * The checks are not atomic, so this can technically trigger if the + * hypervisor makes a change while we are checking each source. It's + * far more likely to be a bug if we see this though. + */ + if (actual != expected) + printf(" : ! actual %s does not match config\n", aspect->name); +} + int main(int argc, char *argv[]) { if (!dexcr_exists()) { @@ -114,6 +137,8 @@ int main(int argc, char *argv[]) hdexcr = get_dexcr(HDEXCR); effective = dexcr | hdexcr; + printf("current status:\n"); + print_dexcr(" DEXCR", dexcr); print_dexcr(" HDEXCR", hdexcr); print_dexcr("Effective", effective); @@ -136,6 +161,12 @@ int main(int argc, char *argv[]) else printf("ignored\n"); } + printf("\n"); + + printf("configuration:\n"); + for (size_t i = 0; i < ARRAY_SIZE(aspects); i++) + print_aspect_config(&aspects[i]); + printf("\n"); return 0; } diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile index 9289d5febe..9fa9cb5bd9 100644 --- a/tools/testing/selftests/powerpc/dscr/Makefile +++ b/tools/testing/selftests/powerpc/dscr/Makefile @@ -5,6 +5,7 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \ top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(OUTPUT)/dscr_default_test: LDLIBS += -lpthread $(OUTPUT)/dscr_explicit_test: LDLIBS += -lpthread diff --git a/tools/testing/selftests/powerpc/eeh/Makefile b/tools/testing/selftests/powerpc/eeh/Makefile index ae963eb2dc..70797716f2 100644 --- a/tools/testing/selftests/powerpc/eeh/Makefile +++ b/tools/testing/selftests/powerpc/eeh/Makefile @@ -7,3 +7,4 @@ TEST_FILES := eeh-functions.sh settings top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk diff --git a/tools/testing/selftests/powerpc/flags.mk b/tools/testing/selftests/powerpc/flags.mk new file mode 100644 index 0000000000..abb9e58d95 --- /dev/null +++ b/tools/testing/selftests/powerpc/flags.mk @@ -0,0 +1,9 @@ +#This checks for any ENV variables and add those. + +ifeq ($(GIT_VERSION),) +GIT_VERSION := $(shell git describe --always --long --dirty || echo "unknown") +export GIT_VERSION +endif + +CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(USERCFLAGS) +export CFLAGS diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile index 3948f7c510..b14fd2e0c6 100644 --- a/tools/testing/selftests/powerpc/math/Makefile +++ b/tools/testing/selftests/powerpc/math/Makefile @@ -3,6 +3,7 @@ TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vm top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c $(TEST_GEN_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec diff --git a/tools/testing/selftests/powerpc/mce/Makefile b/tools/testing/selftests/powerpc/mce/Makefile index 2424513982..ce4ed679aa 100644 --- a/tools/testing/selftests/powerpc/mce/Makefile +++ b/tools/testing/selftests/powerpc/mce/Makefile @@ -3,5 +3,6 @@ TEST_GEN_PROGS := inject-ra-err include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 4a6608beef..aab058ecb3 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -13,6 +13,7 @@ TEST_GEN_FILES := tempfile top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile index 0785c2e99d..480d8ba94c 100644 --- a/tools/testing/selftests/powerpc/nx-gzip/Makefile +++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile @@ -1,8 +1,9 @@ -CFLAGS = -O3 -m64 -I./include -I../include - TEST_GEN_FILES := gzfht_test gunz_test TEST_PROGS := nx-gzip-test.sh include ../../lib.mk +include ../flags.mk + +CFLAGS = -O3 -m64 -I./include -I../include $(TEST_GEN_FILES): gzip_vas.c ../utils.c diff --git a/tools/testing/selftests/powerpc/papr_attributes/Makefile b/tools/testing/selftests/powerpc/papr_attributes/Makefile index e899712d49..4064294990 100644 --- a/tools/testing/selftests/powerpc/papr_attributes/Makefile +++ b/tools/testing/selftests/powerpc/papr_attributes/Makefile @@ -3,5 +3,6 @@ TEST_GEN_PROGS := attr_test top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk -$(TEST_GEN_PROGS): ../harness.c ../utils.c
\ No newline at end of file +$(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/papr_sysparm/Makefile b/tools/testing/selftests/powerpc/papr_sysparm/Makefile index 7f79e43763..fed4f2414d 100644 --- a/tools/testing/selftests/powerpc/papr_sysparm/Makefile +++ b/tools/testing/selftests/powerpc/papr_sysparm/Makefile @@ -6,6 +6,7 @@ TEST_GEN_PROGS := papr_sysparm top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/papr_vpd/Makefile b/tools/testing/selftests/powerpc/papr_vpd/Makefile index 06b719703b..b09852e408 100644 --- a/tools/testing/selftests/powerpc/papr_vpd/Makefile +++ b/tools/testing/selftests/powerpc/papr_vpd/Makefile @@ -6,6 +6,7 @@ TEST_GEN_PROGS := papr_vpd top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index a284fa874a..7e9dbf3d0d 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -7,8 +7,11 @@ EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk -all: $(TEST_GEN_PROGS) ebb sampling_tests event_code_tests +SUB_DIRS := ebb sampling_tests event_code_tests + +all: $(TEST_GEN_PROGS) $(SUB_DIRS) $(TEST_GEN_PROGS): $(EXTRA_SOURCES) @@ -22,12 +25,16 @@ $(OUTPUT)/count_stcx_fail: loop.S $(EXTRA_SOURCES) $(OUTPUT)/per_event_excludes: ../utils.c +$(SUB_DIRS): + BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all + DEFAULT_RUN_TESTS := $(RUN_TESTS) override define RUN_TESTS $(DEFAULT_RUN_TESTS) - +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests - +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests - +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests + +@for TARGET in $(SUB_DIRS); do \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests; \ + done; endef emit_tests: @@ -35,34 +42,29 @@ emit_tests: BASENAME_TEST=`basename $$TEST`; \ echo "$(COLLECTION):$$BASENAME_TEST"; \ done - +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests - +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests - +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests + +@for TARGET in $(SUB_DIRS); do \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ + $(MAKE) OUTPUT=$$BUILD_TARGET COLLECTION=$(COLLECTION)/$$TARGET -s -C $$TARGET emit_tests; \ + done; DEFAULT_INSTALL_RULE := $(INSTALL_RULE) override define INSTALL_RULE $(DEFAULT_INSTALL_RULE) - +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install - +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install - +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install + +@for TARGET in $(SUB_DIRS); do \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ + $(MAKE) OUTPUT=$$BUILD_TARGET INSTALL_PATH=$$INSTALL_PATH/$$TARGET -C $$TARGET install; \ + done; endef DEFAULT_CLEAN := $(CLEAN) override define CLEAN $(DEFAULT_CLEAN) $(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o - +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean - +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean - +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean + +@for TARGET in $(SUB_DIRS); do \ + BUILD_TARGET=$(OUTPUT)/$$TARGET; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \ + done; endef -ebb: - TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all - -sampling_tests: - TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all - -event_code_tests: - TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all .PHONY: all run_tests ebb sampling_tests event_code_tests emit_tests diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index 0101606902..1b39af7c10 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -4,16 +4,6 @@ include ../../../../../build/Build.include noarg: $(MAKE) -C ../../ -# The EBB handler is 64-bit code and everything links against it -CFLAGS += -m64 - -TMPOUT = $(OUTPUT)/TMPDIR/ -# Toolchains may build PIE by default which breaks the assembly -no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ - $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) - -LDFLAGS += $(no-pie-option) - TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \ cycles_with_freeze_test pmc56_overflow_test \ ebb_vs_cpu_event_test cpu_event_vs_ebb_test \ @@ -28,6 +18,17 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \ top_srcdir = ../../../../../.. include ../../../lib.mk +include ../../flags.mk + +# The EBB handler is 64-bit code and everything links against it +CFLAGS += -m64 + +TMPOUT = $(OUTPUT)/TMPDIR/ +# Toolchains may build PIE by default which breaks the assembly +no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ + $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) + +LDFLAGS += $(no-pie-option) $(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \ ebb.c ebb_handler.S trace.c busy_loop.S diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile index 4e07d70464..fdb080b3fa 100644 --- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -1,6 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -m64 - TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ @@ -11,5 +9,8 @@ TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_te top_srcdir = ../../../../../.. include ../../../lib.mk +include ../../flags.mk + +CFLAGS += -m64 $(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c ../sampling_tests/misc.h ../sampling_tests/misc.c diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index 9e67351fb2..9f79bec5fc 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -1,6 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -m64 - TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ @@ -11,5 +9,8 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test top_srcdir = ../../../../../.. include ../../../lib.mk +include ../../flags.mk + +CFLAGS += -m64 $(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S ../branch_loops.S diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile index 9b9491a632..23bd9a7590 100644 --- a/tools/testing/selftests/powerpc/primitives/Makefile +++ b/tools/testing/selftests/powerpc/primitives/Makefile @@ -1,9 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -I$(CURDIR) - TEST_GEN_PROGS := load_unaligned_zeropad top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +CFLAGS += -I$(CURDIR) $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index 1b39b86849..59ca01d856 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -26,6 +26,7 @@ LOCAL_HDRS += $(patsubst %,$(selfdir)/powerpc/ptrace/%,$(wildcard *.h)) top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk TM_TESTS := $(patsubst %,$(OUTPUT)/%,$(TM_TESTS)) TESTS_64 := $(patsubst %,$(OUTPUT)/%,$(TESTS_64)) diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index e0d979ab02..3328603972 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -5,9 +5,10 @@ TEST_PROGS := mitigation-patching.sh top_srcdir = ../../../../.. -CFLAGS += $(KHDR_INCLUDES) - include ../../lib.mk +include ../flags.mk + +CFLAGS += $(KHDR_INCLUDES) $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index f679d260af..ece95bd52b 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -3,7 +3,6 @@ TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart TEST_GEN_PROGS += sigreturn_kernel TEST_GEN_PROGS += sigreturn_unaligned -CFLAGS += -maltivec $(OUTPUT)/signal_tm: CFLAGS += -mhtm $(OUTPUT)/sigfuz: CFLAGS += -pthread -m64 @@ -11,5 +10,8 @@ TEST_FILES := settings top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +CFLAGS += -maltivec $(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile index 9c39f55a58..4c9d9a58c9 100644 --- a/tools/testing/selftests/powerpc/stringloops/Makefile +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -# The loops are all 64-bit code -CFLAGS += -I$(CURDIR) - EXTRA_SOURCES := ../harness.c build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null 2>&1) then echo "1"; fi) @@ -27,9 +24,13 @@ $(OUTPUT)/strlen_32: CFLAGS += -m32 TEST_GEN_PROGS += strlen_32 endif -ASFLAGS = $(CFLAGS) - top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +# The loops are all 64-bit code +CFLAGS += -I$(CURDIR) + +ASFLAGS = $(CFLAGS) $(TEST_GEN_PROGS): $(EXTRA_SOURCES) diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile index bdc081afed..0da2e0a742 100644 --- a/tools/testing/selftests/powerpc/switch_endian/Makefile +++ b/tools/testing/selftests/powerpc/switch_endian/Makefile @@ -1,12 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 TEST_GEN_PROGS := switch_endian_test -ASFLAGS += -O2 -Wall -g -nostdlib -m64 - EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +ASFLAGS += -O2 -Wall -g -nostdlib -m64 $(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT) $(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile index ee1740ddfb..3bc07af88f 100644 --- a/tools/testing/selftests/powerpc/syscalls/Makefile +++ b/tools/testing/selftests/powerpc/syscalls/Makefile @@ -1,9 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only TEST_GEN_PROGS := ipc_unmuxed rtas_filter -CFLAGS += $(KHDR_INCLUDES) - top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +CFLAGS += $(KHDR_INCLUDES) $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 3876805c2f..f13f0ab360 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -11,6 +11,7 @@ TEST_FILES := settings top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile index cf65cbf330..61d519a076 100644 --- a/tools/testing/selftests/powerpc/vphn/Makefile +++ b/tools/testing/selftests/powerpc/vphn/Makefile @@ -1,10 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only TEST_GEN_PROGS := test-vphn -CFLAGS += -m64 -I$(CURDIR) - top_srcdir = ../../../../.. include ../../lib.mk +include ../flags.mk + +CFLAGS += -m64 -I$(CURDIR) $(TEST_GEN_PROGS): ../harness.c diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index bbac5f4b03..990d24696f 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -391,7 +391,7 @@ __EOF__ forceflavor="`echo $flavor | sed -e 's/^CONFIG/CONFIG_FORCE/'`" deselectedflavors="`grep -v $flavor $T/rcutasksflavors | tr '\012' ' ' | tr -s ' ' | sed -e 's/ *$//'`" echo " --- Running RCU Tasks Trace flavor $flavor `date`" >> $rtfdir/log - tools/testing/selftests/rcutorture/bin/kvm.sh --datestamp "$ds/results-rcutasksflavors/$flavor" --buildonly --configs "TINY01 TREE04" --kconfig "CONFIG_RCU_EXPERT=y CONFIG_RCU_SCALE_TEST=y $forceflavor=y $deselectedflavors" --trust-make > $T/$flavor.out 2>&1 + tools/testing/selftests/rcutorture/bin/kvm.sh --datestamp "$ds/results-rcutasksflavors/$flavor" --buildonly --configs "TINY01 TREE04" --kconfig "CONFIG_RCU_EXPERT=y CONFIG_RCU_SCALE_TEST=y CONFIG_KPROBES=n CONFIG_RCU_TRACE=n CONFIG_TRACING=n CONFIG_BLK_DEV_IO_TRACE=n CONFIG_UPROBE_EVENTS=n $forceflavor=y $deselectedflavors" --trust-make > $T/$flavor.out 2>&1 retcode=$? if test "$retcode" -ne 0 then @@ -425,7 +425,7 @@ fi if test "$do_scftorture" = "yes" then # Scale memory based on the number of CPUs. - scfmem=$((2+HALF_ALLOTED_CPUS/16)) + scfmem=$((3+HALF_ALLOTED_CPUS/16)) torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1" torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory ${scfmem}G --trust-make fi @@ -559,7 +559,7 @@ do_kcsan="$do_kcsan_save" if test "$do_kvfree" = "yes" then torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot" - torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make + torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration $duration_rcutorture --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make fi if test "$do_clocksourcewd" = "yes" diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09 index fc45645bb5..9ecd1b4e65 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09 @@ -10,8 +10,9 @@ CONFIG_NO_HZ_FULL=n CONFIG_RCU_TRACE=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_BOOST=n +CONFIG_RCU_BOOST=y +CONFIG_RCU_BOOST_DELAY=100 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n -#CHECK#CONFIG_RCU_EXPERT=n +CONFIG_RCU_EXPERT=y CONFIG_KPROBES=n CONFIG_FTRACE=n diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index 178a41d4bb..55315ed695 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -128,7 +128,7 @@ static int check_results(struct resctrl_val_param *param, const char *cache_type return fail; } -void cat_test_cleanup(void) +static void cat_test_cleanup(void) { remove(RESULT_FILE_NAME); } @@ -284,13 +284,10 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param ret = cat_test(test, uparams, ¶m, span, start_mask); if (ret) - goto out; + return ret; ret = check_results(¶m, test->resource, cache_total_size, full_cache_mask, start_mask); -out: - cat_test_cleanup(); - return ret; } @@ -385,6 +382,7 @@ struct resctrl_test l3_cat_test = { .resource = "L3", .feature_check = test_resource_feature_check, .run_test = cat_run_test, + .cleanup = cat_test_cleanup, }; struct resctrl_test l3_noncont_cat_test = { diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c index a81f91222a..0105afec61 100644 --- a/tools/testing/selftests/resctrl/cmt_test.c +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -40,11 +40,11 @@ static int show_results_info(unsigned long sum_llc_val, int no_of_bits, int ret; avg_llc_val = sum_llc_val / num_of_runs; - avg_diff = (long)abs(cache_span - avg_llc_val); + avg_diff = (long)(cache_span - avg_llc_val); diff_percent = ((float)cache_span - avg_llc_val) / cache_span * 100; ret = platform && abs((int)diff_percent) > max_diff_percent && - abs(avg_diff) > max_diff; + labs(avg_diff) > max_diff; ksft_print_msg("%s Check cache miss rate within %lu%%\n", ret ? "Fail:" : "Pass:", max_diff_percent); @@ -91,7 +91,7 @@ static int check_results(struct resctrl_val_param *param, size_t span, int no_of MAX_DIFF, MAX_DIFF_PERCENT, runs - 1, true); } -void cmt_test_cleanup(void) +static void cmt_test_cleanup(void) { remove(RESULT_FILE_NAME); } @@ -161,7 +161,6 @@ static int cmt_run_test(const struct resctrl_test *test, const struct user_param ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); out: - cmt_test_cleanup(); free(span_str); return ret; @@ -178,4 +177,5 @@ struct resctrl_test cmt_test = { .resource = "L3", .feature_check = cmt_feature_check, .run_test = cmt_run_test, + .cleanup = cmt_test_cleanup, }; diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index 7946e32e85..a6ad39aae1 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -60,8 +60,8 @@ static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) /* Memory bandwidth from 100% down to 10% */ for (allocation = 0; allocation < ALLOCATION_MAX / ALLOCATION_STEP; allocation++) { - unsigned long avg_bw_imc, avg_bw_resc; unsigned long sum_bw_imc = 0, sum_bw_resc = 0; + long avg_bw_imc, avg_bw_resc; int avg_diff_per; float avg_diff; @@ -137,7 +137,7 @@ static int check_results(void) return show_mba_info(bw_imc, bw_resc); } -void mba_test_cleanup(void) +static void mba_test_cleanup(void) { remove(RESULT_FILE_NAME); } @@ -158,13 +158,10 @@ static int mba_run_test(const struct resctrl_test *test, const struct user_param ret = resctrl_val(test, uparams, uparams->benchmark_cmd, ¶m); if (ret) - goto out; + return ret; ret = check_results(); -out: - mba_test_cleanup(); - return ret; } @@ -180,4 +177,5 @@ struct resctrl_test mba_test = { .vendor_specific = ARCH_INTEL, .feature_check = mba_feature_check, .run_test = mba_run_test, + .cleanup = mba_test_cleanup, }; diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index d67ffa3ec6..6fec51e1ff 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -17,8 +17,8 @@ static int show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, size_t span) { - unsigned long avg_bw_imc = 0, avg_bw_resc = 0; unsigned long sum_bw_imc = 0, sum_bw_resc = 0; + long avg_bw_imc = 0, avg_bw_resc = 0; int runs, ret, avg_diff_per; float avg_diff = 0; @@ -105,7 +105,7 @@ static int mbm_setup(const struct resctrl_test *test, return ret; } -void mbm_test_cleanup(void) +static void mbm_test_cleanup(void) { remove(RESULT_FILE_NAME); } @@ -126,15 +126,12 @@ static int mbm_run_test(const struct resctrl_test *test, const struct user_param ret = resctrl_val(test, uparams, uparams->benchmark_cmd, ¶m); if (ret) - goto out; + return ret; ret = check_results(DEFAULT_SPAN); if (ret && (get_vendor() == ARCH_INTEL)) ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); -out: - mbm_test_cleanup(); - return ret; } @@ -150,4 +147,5 @@ struct resctrl_test mbm_test = { .vendor_specific = ARCH_INTEL, .feature_check = mbm_feature_check, .run_test = mbm_run_test, + .cleanup = mbm_test_cleanup, }; diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index 2051bd135e..00d51fa753 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -72,6 +72,7 @@ struct user_params { * @disabled: Test is disabled * @feature_check: Callback to check required resctrl features * @run_test: Callback to run the test + * @cleanup: Callback to cleanup after the test */ struct resctrl_test { const char *name; @@ -82,6 +83,7 @@ struct resctrl_test { bool (*feature_check)(const struct resctrl_test *test); int (*run_test)(const struct resctrl_test *test, const struct user_params *uparams); + void (*cleanup)(void); }; /* @@ -156,9 +158,6 @@ int resctrl_val(const struct resctrl_test *test, const struct user_params *uparams, const char * const *benchmark_cmd, struct resctrl_val_param *param); -void tests_cleanup(void); -void mbm_test_cleanup(void); -void mba_test_cleanup(void); unsigned long create_bit_mask(unsigned int start, unsigned int len); unsigned int count_contiguous_bits(unsigned long val, unsigned int *start); int get_full_cbm(const char *cache_type, unsigned long *mask); @@ -166,11 +165,9 @@ int get_mask_no_shareable(const char *cache_type, unsigned long *mask); int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size); int resource_info_unsigned_get(const char *resource, const char *filename, unsigned int *val); void ctrlc_handler(int signum, siginfo_t *info, void *ptr); -int signal_handler_register(void); +int signal_handler_register(const struct resctrl_test *test); void signal_handler_unregister(void); -void cat_test_cleanup(void); unsigned int count_bits(unsigned long n); -void cmt_test_cleanup(void); void perf_event_attr_initialize(struct perf_event_attr *pea, __u64 config); void perf_event_initialize_read_format(struct perf_event_read *pe_read); diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index f3dc1b9696..ecbb7605a9 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -81,19 +81,11 @@ static void cmd_help(void) printf("\t-h: help\n"); } -void tests_cleanup(void) -{ - mbm_test_cleanup(); - mba_test_cleanup(); - cmt_test_cleanup(); - cat_test_cleanup(); -} - -static int test_prepare(void) +static int test_prepare(const struct resctrl_test *test) { int res; - res = signal_handler_register(); + res = signal_handler_register(test); if (res) { ksft_print_msg("Failed to register signal handler\n"); return res; @@ -108,8 +100,10 @@ static int test_prepare(void) return 0; } -static void test_cleanup(void) +static void test_cleanup(const struct resctrl_test *test) { + if (test->cleanup) + test->cleanup(); umount_resctrlfs(); signal_handler_unregister(); } @@ -136,7 +130,7 @@ static void run_single_test(const struct resctrl_test *test, const struct user_p ksft_print_msg("Starting %s test ...\n", test->name); - if (test_prepare()) { + if (test_prepare(test)) { ksft_exit_fail_msg("Abnormal failure when preparing for the test\n"); return; } @@ -151,7 +145,7 @@ static void run_single_test(const struct resctrl_test *test, const struct user_p ksft_test_result(!ret, "%s: test\n", test->name); cleanup: - test_cleanup(); + test_cleanup(test); } static void init_user_params(struct user_params *uparams) @@ -253,13 +247,13 @@ last_arg: * 2. We execute perf commands */ if (geteuid() != 0) - return ksft_exit_skip("Not running as root. Skipping...\n"); + ksft_exit_skip("Not running as root. Skipping...\n"); if (!check_resctrlfs_support()) - return ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n"); + ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n"); if (umount_resctrlfs()) - return ksft_exit_skip("resctrl FS unmount failed.\n"); + ksft_exit_skip("resctrl FS unmount failed.\n"); filter_dmesg(); diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index 5a49f07a6c..f55f5989de 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -62,6 +62,7 @@ struct imc_counter_config { static char mbm_total_path[1024]; static int imcs; static struct imc_counter_config imc_counters_config[MAX_IMCS][2]; +static const struct resctrl_test *current_test; void membw_initialize_perf_event_attr(int i, int j) { @@ -292,6 +293,18 @@ static int initialize_mem_bw_imc(void) return 0; } +static void perf_close_imc_mem_bw(void) +{ + int mc; + + for (mc = 0; mc < imcs; mc++) { + if (imc_counters_config[mc][READ].fd != -1) + close(imc_counters_config[mc][READ].fd); + if (imc_counters_config[mc][WRITE].fd != -1) + close(imc_counters_config[mc][WRITE].fd); + } +} + /* * get_mem_bw_imc: Memory band width as reported by iMC counters * @cpu_no: CPU number that the benchmark PID is binded to @@ -305,26 +318,33 @@ static int initialize_mem_bw_imc(void) static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) { float reads, writes, of_mul_read, of_mul_write; - int imc, j, ret; + int imc, ret; + + for (imc = 0; imc < imcs; imc++) { + imc_counters_config[imc][READ].fd = -1; + imc_counters_config[imc][WRITE].fd = -1; + } /* Start all iMC counters to log values (both read and write) */ reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1; for (imc = 0; imc < imcs; imc++) { - for (j = 0; j < 2; j++) { - ret = open_perf_event(imc, cpu_no, j); - if (ret) - return -1; - } - for (j = 0; j < 2; j++) - membw_ioctl_perf_event_ioc_reset_enable(imc, j); + ret = open_perf_event(imc, cpu_no, READ); + if (ret) + goto close_fds; + ret = open_perf_event(imc, cpu_no, WRITE); + if (ret) + goto close_fds; + + membw_ioctl_perf_event_ioc_reset_enable(imc, READ); + membw_ioctl_perf_event_ioc_reset_enable(imc, WRITE); } sleep(1); /* Stop counters after a second to get results (both read and write) */ for (imc = 0; imc < imcs; imc++) { - for (j = 0; j < 2; j++) - membw_ioctl_perf_event_ioc_disable(imc, j); + membw_ioctl_perf_event_ioc_disable(imc, READ); + membw_ioctl_perf_event_ioc_disable(imc, WRITE); } /* @@ -340,15 +360,13 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) if (read(r->fd, &r->return_value, sizeof(struct membw_read_format)) == -1) { ksft_perror("Couldn't get read b/w through iMC"); - - return -1; + goto close_fds; } if (read(w->fd, &w->return_value, sizeof(struct membw_read_format)) == -1) { ksft_perror("Couldn't get write bw through iMC"); - - return -1; + goto close_fds; } __u64 r_time_enabled = r->return_value.time_enabled; @@ -368,10 +386,7 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) writes += w->return_value.value * of_mul_write * SCALE; } - for (imc = 0; imc < imcs; imc++) { - close(imc_counters_config[imc][READ].fd); - close(imc_counters_config[imc][WRITE].fd); - } + perf_close_imc_mem_bw(); if (strcmp(bw_report, "reads") == 0) { *bw_imc = reads; @@ -385,6 +400,10 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc) *bw_imc = reads + writes; return 0; + +close_fds: + perf_close_imc_mem_bw(); + return -1; } void set_mbm_path(const char *ctrlgrp, const char *mongrp, int domain_id) @@ -472,7 +491,8 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr) if (bm_pid) kill(bm_pid, SIGKILL); umount_resctrlfs(); - tests_cleanup(); + if (current_test && current_test->cleanup) + current_test->cleanup(); ksft_print_msg("Ending\n\n"); exit(EXIT_SUCCESS); @@ -482,13 +502,14 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr) * Register CTRL-C handler for parent, as it has to kill * child process before exiting. */ -int signal_handler_register(void) +int signal_handler_register(const struct resctrl_test *test) { struct sigaction sigact = {}; int ret = 0; bm_pid = 0; + current_test = test; sigact.sa_sigaction = ctrlc_handler; sigemptyset(&sigact.sa_mask); sigact.sa_flags = SA_SIGINFO; @@ -510,6 +531,7 @@ void signal_handler_unregister(void) { struct sigaction sigact = {}; + current_test = NULL; sigact.sa_handler = SIG_DFL; sigemptyset(&sigact.sa_mask); if (sigaction(SIGINT, &sigact, NULL) || diff --git a/tools/testing/selftests/ring-buffer/.gitignore b/tools/testing/selftests/ring-buffer/.gitignore new file mode 100644 index 0000000000..3aed1a2a6c --- /dev/null +++ b/tools/testing/selftests/ring-buffer/.gitignore @@ -0,0 +1 @@ +map_test diff --git a/tools/testing/selftests/ring-buffer/Makefile b/tools/testing/selftests/ring-buffer/Makefile new file mode 100644 index 0000000000..627c5fa6d1 --- /dev/null +++ b/tools/testing/selftests/ring-buffer/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -Wl,-no-as-needed -Wall +CFLAGS += $(KHDR_INCLUDES) +CFLAGS += -D_GNU_SOURCE + +TEST_GEN_PROGS = map_test + +include ../lib.mk diff --git a/tools/testing/selftests/ring-buffer/config b/tools/testing/selftests/ring-buffer/config new file mode 100644 index 0000000000..d936f8f00e --- /dev/null +++ b/tools/testing/selftests/ring-buffer/config @@ -0,0 +1,2 @@ +CONFIG_FTRACE=y +CONFIG_TRACER_SNAPSHOT=y diff --git a/tools/testing/selftests/ring-buffer/map_test.c b/tools/testing/selftests/ring-buffer/map_test.c new file mode 100644 index 0000000000..a9006fa709 --- /dev/null +++ b/tools/testing/selftests/ring-buffer/map_test.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Ring-buffer memory mapping tests + * + * Copyright (c) 2024 Vincent Donnefort <vdonnefort@google.com> + */ +#include <fcntl.h> +#include <sched.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <linux/trace_mmap.h> + +#include <sys/mman.h> +#include <sys/ioctl.h> + +#include "../user_events/user_events_selftests.h" /* share tracefs setup */ +#include "../kselftest_harness.h" + +#define TRACEFS_ROOT "/sys/kernel/tracing" + +static int __tracefs_write(const char *path, const char *value) +{ + int fd, ret; + + fd = open(path, O_WRONLY | O_TRUNC); + if (fd < 0) + return fd; + + ret = write(fd, value, strlen(value)); + + close(fd); + + return ret == -1 ? -errno : 0; +} + +static int __tracefs_write_int(const char *path, int value) +{ + char *str; + int ret; + + if (asprintf(&str, "%d", value) < 0) + return -1; + + ret = __tracefs_write(path, str); + + free(str); + + return ret; +} + +#define tracefs_write_int(path, value) \ + ASSERT_EQ(__tracefs_write_int((path), (value)), 0) + +#define tracefs_write(path, value) \ + ASSERT_EQ(__tracefs_write((path), (value)), 0) + +static int tracefs_reset(void) +{ + if (__tracefs_write_int(TRACEFS_ROOT"/tracing_on", 0)) + return -1; + if (__tracefs_write(TRACEFS_ROOT"/trace", "")) + return -1; + if (__tracefs_write(TRACEFS_ROOT"/set_event", "")) + return -1; + if (__tracefs_write(TRACEFS_ROOT"/current_tracer", "nop")) + return -1; + + return 0; +} + +struct tracefs_cpu_map_desc { + struct trace_buffer_meta *meta; + int cpu_fd; +}; + +int tracefs_cpu_map(struct tracefs_cpu_map_desc *desc, int cpu) +{ + int page_size = getpagesize(); + char *cpu_path; + void *map; + + if (asprintf(&cpu_path, + TRACEFS_ROOT"/per_cpu/cpu%d/trace_pipe_raw", + cpu) < 0) + return -ENOMEM; + + desc->cpu_fd = open(cpu_path, O_RDONLY | O_NONBLOCK); + free(cpu_path); + if (desc->cpu_fd < 0) + return -ENODEV; + + map = mmap(NULL, page_size, PROT_READ, MAP_SHARED, desc->cpu_fd, 0); + if (map == MAP_FAILED) + return -errno; + + desc->meta = (struct trace_buffer_meta *)map; + + return 0; +} + +void tracefs_cpu_unmap(struct tracefs_cpu_map_desc *desc) +{ + munmap(desc->meta, desc->meta->meta_page_size); + close(desc->cpu_fd); +} + +FIXTURE(map) { + struct tracefs_cpu_map_desc map_desc; + bool umount; +}; + +FIXTURE_VARIANT(map) { + int subbuf_size; +}; + +FIXTURE_VARIANT_ADD(map, subbuf_size_4k) { + .subbuf_size = 4, +}; + +FIXTURE_VARIANT_ADD(map, subbuf_size_8k) { + .subbuf_size = 8, +}; + +FIXTURE_SETUP(map) +{ + int cpu = sched_getcpu(); + cpu_set_t cpu_mask; + bool fail, umount; + char *message; + + if (getuid() != 0) + SKIP(return, "Skipping: %s", "Please run the test as root"); + + if (!tracefs_enabled(&message, &fail, &umount)) { + if (fail) { + TH_LOG("Tracefs setup failed: %s", message); + ASSERT_FALSE(fail); + } + SKIP(return, "Skipping: %s", message); + } + + self->umount = umount; + + ASSERT_GE(cpu, 0); + + ASSERT_EQ(tracefs_reset(), 0); + + tracefs_write_int(TRACEFS_ROOT"/buffer_subbuf_size_kb", variant->subbuf_size); + + ASSERT_EQ(tracefs_cpu_map(&self->map_desc, cpu), 0); + + /* + * Ensure generated events will be found on this very same ring-buffer. + */ + CPU_ZERO(&cpu_mask); + CPU_SET(cpu, &cpu_mask); + ASSERT_EQ(sched_setaffinity(0, sizeof(cpu_mask), &cpu_mask), 0); +} + +FIXTURE_TEARDOWN(map) +{ + tracefs_reset(); + + if (self->umount) + tracefs_unmount(); + + tracefs_cpu_unmap(&self->map_desc); +} + +TEST_F(map, meta_page_check) +{ + struct tracefs_cpu_map_desc *desc = &self->map_desc; + int cnt = 0; + + ASSERT_EQ(desc->meta->entries, 0); + ASSERT_EQ(desc->meta->overrun, 0); + ASSERT_EQ(desc->meta->read, 0); + + ASSERT_EQ(desc->meta->reader.id, 0); + ASSERT_EQ(desc->meta->reader.read, 0); + + ASSERT_EQ(ioctl(desc->cpu_fd, TRACE_MMAP_IOCTL_GET_READER), 0); + ASSERT_EQ(desc->meta->reader.id, 0); + + tracefs_write_int(TRACEFS_ROOT"/tracing_on", 1); + for (int i = 0; i < 16; i++) + tracefs_write_int(TRACEFS_ROOT"/trace_marker", i); +again: + ASSERT_EQ(ioctl(desc->cpu_fd, TRACE_MMAP_IOCTL_GET_READER), 0); + + ASSERT_EQ(desc->meta->entries, 16); + ASSERT_EQ(desc->meta->overrun, 0); + ASSERT_EQ(desc->meta->read, 16); + + ASSERT_EQ(desc->meta->reader.id, 1); + + if (!(cnt++)) + goto again; +} + +TEST_F(map, data_mmap) +{ + struct tracefs_cpu_map_desc *desc = &self->map_desc; + unsigned long meta_len, data_len; + void *data; + + meta_len = desc->meta->meta_page_size; + data_len = desc->meta->subbuf_size * desc->meta->nr_subbufs; + + /* Map all the available subbufs */ + data = mmap(NULL, data_len, PROT_READ, MAP_SHARED, + desc->cpu_fd, meta_len); + ASSERT_NE(data, MAP_FAILED); + munmap(data, data_len); + + /* Map all the available subbufs - 1 */ + data_len -= desc->meta->subbuf_size; + data = mmap(NULL, data_len, PROT_READ, MAP_SHARED, + desc->cpu_fd, meta_len); + ASSERT_NE(data, MAP_FAILED); + munmap(data, data_len); + + /* Overflow the available subbufs by 1 */ + meta_len += desc->meta->subbuf_size * 2; + data = mmap(NULL, data_len, PROT_READ, MAP_SHARED, + desc->cpu_fd, meta_len); + ASSERT_EQ(data, MAP_FAILED); +} + +FIXTURE(snapshot) { + bool umount; +}; + +FIXTURE_SETUP(snapshot) +{ + bool fail, umount; + struct stat sb; + char *message; + + if (getuid() != 0) + SKIP(return, "Skipping: %s", "Please run the test as root"); + + if (stat(TRACEFS_ROOT"/snapshot", &sb)) + SKIP(return, "Skipping: %s", "snapshot not available"); + + if (!tracefs_enabled(&message, &fail, &umount)) { + if (fail) { + TH_LOG("Tracefs setup failed: %s", message); + ASSERT_FALSE(fail); + } + SKIP(return, "Skipping: %s", message); + } + + self->umount = umount; +} + +FIXTURE_TEARDOWN(snapshot) +{ + __tracefs_write(TRACEFS_ROOT"/events/sched/sched_switch/trigger", + "!snapshot"); + tracefs_reset(); + + if (self->umount) + tracefs_unmount(); +} + +TEST_F(snapshot, excludes_map) +{ + struct tracefs_cpu_map_desc map_desc; + int cpu = sched_getcpu(); + + ASSERT_GE(cpu, 0); + tracefs_write(TRACEFS_ROOT"/events/sched/sched_switch/trigger", + "snapshot"); + ASSERT_EQ(tracefs_cpu_map(&map_desc, cpu), -EBUSY); +} + +TEST_F(snapshot, excluded_by_map) +{ + struct tracefs_cpu_map_desc map_desc; + int cpu = sched_getcpu(); + + ASSERT_EQ(tracefs_cpu_map(&map_desc, cpu), 0); + + ASSERT_EQ(__tracefs_write(TRACEFS_ROOT"/events/sched/sched_switch/trigger", + "snapshot"), -EBUSY); + ASSERT_EQ(__tracefs_write(TRACEFS_ROOT"/snapshot", + "1"), -EBUSY); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile index 4a9ff515a3..7ce03d832b 100644 --- a/tools/testing/selftests/riscv/Makefile +++ b/tools/testing/selftests/riscv/Makefile @@ -5,7 +5,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),riscv)) -RISCV_SUBTARGETS ?= hwprobe vector mm +RISCV_SUBTARGETS ?= hwprobe vector mm sigreturn else RISCV_SUBTARGETS := endif diff --git a/tools/testing/selftests/riscv/sigreturn/.gitignore b/tools/testing/selftests/riscv/sigreturn/.gitignore new file mode 100644 index 0000000000..35002b8ae7 --- /dev/null +++ b/tools/testing/selftests/riscv/sigreturn/.gitignore @@ -0,0 +1 @@ +sigreturn diff --git a/tools/testing/selftests/riscv/sigreturn/Makefile b/tools/testing/selftests/riscv/sigreturn/Makefile new file mode 100644 index 0000000000..eb8bac9279 --- /dev/null +++ b/tools/testing/selftests/riscv/sigreturn/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2021 ARM Limited +# Originally tools/testing/arm64/abi/Makefile + +CFLAGS += -I$(top_srcdir)/tools/include + +TEST_GEN_PROGS := sigreturn + +include ../../lib.mk + +$(OUTPUT)/sigreturn: sigreturn.c + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ diff --git a/tools/testing/selftests/riscv/sigreturn/sigreturn.c b/tools/testing/selftests/riscv/sigreturn/sigreturn.c new file mode 100644 index 0000000000..ed351a1cb9 --- /dev/null +++ b/tools/testing/selftests/riscv/sigreturn/sigreturn.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <ucontext.h> +#include <linux/ptrace.h> +#include "../../kselftest_harness.h" + +#define RISCV_V_MAGIC 0x53465457 +#define DEFAULT_VALUE 2 +#define SIGNAL_HANDLER_OVERRIDE 3 + +static void simple_handle(int sig_no, siginfo_t *info, void *vcontext) +{ + ucontext_t *context = vcontext; + + context->uc_mcontext.__gregs[REG_PC] = context->uc_mcontext.__gregs[REG_PC] + 4; +} + +static void vector_override(int sig_no, siginfo_t *info, void *vcontext) +{ + ucontext_t *context = vcontext; + + // vector state + struct __riscv_extra_ext_header *ext; + struct __riscv_v_ext_state *v_ext_state; + + /* Find the vector context. */ + ext = (void *)(&context->uc_mcontext.__fpregs); + if (ext->hdr.magic != RISCV_V_MAGIC) { + fprintf(stderr, "bad vector magic: %x\n", ext->hdr.magic); + abort(); + } + + v_ext_state = (void *)((char *)(ext) + sizeof(*ext)); + + *(int *)v_ext_state->datap = SIGNAL_HANDLER_OVERRIDE; + + context->uc_mcontext.__gregs[REG_PC] = context->uc_mcontext.__gregs[REG_PC] + 4; +} + +static int vector_sigreturn(int data, void (*handler)(int, siginfo_t *, void *)) +{ + int after_sigreturn; + struct sigaction sig_action = { + .sa_sigaction = handler, + .sa_flags = SA_SIGINFO + }; + + sigaction(SIGSEGV, &sig_action, 0); + + asm(".option push \n\ + .option arch, +v \n\ + vsetivli x0, 1, e32, m1, ta, ma \n\ + vmv.s.x v0, %1 \n\ + # Generate SIGSEGV \n\ + lw a0, 0(x0) \n\ + vmv.x.s %0, v0 \n\ + .option pop" : "=r" (after_sigreturn) : "r" (data)); + + return after_sigreturn; +} + +TEST(vector_restore) +{ + int result; + + result = vector_sigreturn(DEFAULT_VALUE, &simple_handle); + + EXPECT_EQ(DEFAULT_VALUE, result); +} + +TEST(vector_restore_signal_handler_override) +{ + int result; + + result = vector_sigreturn(DEFAULT_VALUE, &vector_override); + + EXPECT_EQ(SIGNAL_HANDLER_OVERRIDE, result); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index b83099160f..94886c82ae 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -194,14 +194,14 @@ int main(int argc, char *argv[]) ksft_set_plan(7); ksft_print_msg("Running on:\n"); - ksft_print_msg(""); + ksft_print_msg("%s", ""); system("uname -a"); ksft_print_msg("Current BPF sysctl settings:\n"); /* Avoid using "sysctl" which may not be installed. */ - ksft_print_msg(""); + ksft_print_msg("%s", ""); system("grep -H . /proc/sys/net/core/bpf_jit_enable"); - ksft_print_msg(""); + ksft_print_msg("%s", ""); system("grep -H . /proc/sys/net/core/bpf_jit_harden"); affinity(); diff --git a/tools/testing/selftests/sigaltstack/current_stack_pointer.h b/tools/testing/selftests/sigaltstack/current_stack_pointer.h index ea9bdf3a90..09da8f1011 100644 --- a/tools/testing/selftests/sigaltstack/current_stack_pointer.h +++ b/tools/testing/selftests/sigaltstack/current_stack_pointer.h @@ -8,7 +8,7 @@ register unsigned long sp asm("sp"); register unsigned long sp asm("esp"); #elif __loongarch64 register unsigned long sp asm("$sp"); -#elif __ppc__ +#elif __powerpc__ register unsigned long sp asm("r1"); #elif __s390x__ register unsigned long sp asm("%15"); diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c index 414a617db9..93db5aa246 100644 --- a/tools/testing/selftests/sync/sync_test.c +++ b/tools/testing/selftests/sync/sync_test.c @@ -109,6 +109,5 @@ int main(void) ksft_exit_fail_msg("%d out of %d sync tests failed\n", err, ksft_test_num()); - /* need this return to keep gcc happy */ - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c index 47e05fdc32..205b76a4ab 100644 --- a/tools/testing/selftests/timers/adjtick.c +++ b/tools/testing/selftests/timers/adjtick.c @@ -205,7 +205,7 @@ int main(int argc, char **argv) adjtimex(&tx1); if (err) - return ksft_exit_fail(); + ksft_exit_fail(); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c index 4332b49410..ad52e608b8 100644 --- a/tools/testing/selftests/timers/alarmtimer-suspend.c +++ b/tools/testing/selftests/timers/alarmtimer-suspend.c @@ -173,6 +173,6 @@ int main(void) timer_delete(tm1); } if (final_ret) - return ksft_exit_fail(); - return ksft_exit_pass(); + ksft_exit_fail(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c index 992a77f2a7..4421cd562c 100644 --- a/tools/testing/selftests/timers/change_skew.c +++ b/tools/testing/selftests/timers/change_skew.c @@ -89,8 +89,8 @@ int main(int argc, char **argv) if (ret) { printf("[FAIL]"); - return ksft_exit_fail(); + ksft_exit_fail(); } printf("[OK]"); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/freq-step.c b/tools/testing/selftests/timers/freq-step.c index 4b76450d78..73b636f89f 100644 --- a/tools/testing/selftests/timers/freq-step.c +++ b/tools/testing/selftests/timers/freq-step.c @@ -257,7 +257,7 @@ int main(int argc, char **argv) set_frequency(0.0); if (fails) - return ksft_exit_fail(); + ksft_exit_fail(); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c index 23eb398c81..986abbdb15 100644 --- a/tools/testing/selftests/timers/leap-a-day.c +++ b/tools/testing/selftests/timers/leap-a-day.c @@ -268,7 +268,7 @@ int main(int argc, char **argv) if (ret < 0) { printf("Error: Problem setting STA_INS/STA_DEL!: %s\n", time_state_str(ret)); - return ksft_exit_fail(); + ksft_exit_fail(); } /* Validate STA_INS was set */ @@ -277,7 +277,7 @@ int main(int argc, char **argv) if (tx.status != STA_INS && tx.status != STA_DEL) { printf("Error: STA_INS/STA_DEL not set!: %s\n", time_state_str(ret)); - return ksft_exit_fail(); + ksft_exit_fail(); } if (tai_time) { @@ -295,7 +295,7 @@ int main(int argc, char **argv) se.sigev_value.sival_int = 0; if (timer_create(CLOCK_REALTIME, &se, &tm1) == -1) { printf("Error: timer_create failed\n"); - return ksft_exit_fail(); + ksft_exit_fail(); } its1.it_value.tv_sec = next_leap; its1.it_value.tv_nsec = 0; @@ -366,7 +366,7 @@ int main(int argc, char **argv) if (error_found) { printf("Errors observed\n"); clear_time_state(); - return ksft_exit_fail(); + ksft_exit_fail(); } printf("\n"); if ((iterations != -1) && !(--iterations)) @@ -374,5 +374,5 @@ int main(int argc, char **argv) } clear_time_state(); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c index f70802c5dd..8fd065eec9 100644 --- a/tools/testing/selftests/timers/leapcrash.c +++ b/tools/testing/selftests/timers/leapcrash.c @@ -87,7 +87,7 @@ int main(void) tv.tv_usec = 0; if (settimeofday(&tv, NULL)) { printf("Error: You're likely not running with proper (ie: root) permissions\n"); - return ksft_exit_fail(); + ksft_exit_fail(); } tx.modes = 0; adjtimex(&tx); @@ -104,5 +104,5 @@ int main(void) fflush(stdout); } printf("[OK]\n"); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c index 7916cf5cc6..f3179a605b 100644 --- a/tools/testing/selftests/timers/mqueue-lat.c +++ b/tools/testing/selftests/timers/mqueue-lat.c @@ -107,8 +107,8 @@ int main(int argc, char **argv) ret = mqueue_lat_test(); if (ret < 0) { printf("[FAILED]\n"); - return ksft_exit_fail(); + ksft_exit_fail(); } printf("[OK]\n"); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index c001dd7917..07c81c0093 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -260,16 +260,16 @@ int main(int argc, char **argv) ksft_print_msg("based timers if other threads run on the CPU...\n"); if (check_itimer(ITIMER_VIRTUAL) < 0) - return ksft_exit_fail(); + ksft_exit_fail(); if (check_itimer(ITIMER_PROF) < 0) - return ksft_exit_fail(); + ksft_exit_fail(); if (check_itimer(ITIMER_REAL) < 0) - return ksft_exit_fail(); + ksft_exit_fail(); if (check_timer_create(CLOCK_THREAD_CPUTIME_ID) < 0) - return ksft_exit_fail(); + ksft_exit_fail(); /* * It's unfortunately hard to reliably test a timer expiration @@ -281,10 +281,10 @@ int main(int argc, char **argv) * find a better solution. */ if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0) - return ksft_exit_fail(); + ksft_exit_fail(); if (check_timer_distribution() < 0) - return ksft_exit_fail(); + ksft_exit_fail(); ksft_finished(); } diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c index 6eba203f9d..030143eb09 100644 --- a/tools/testing/selftests/timers/raw_skew.c +++ b/tools/testing/selftests/timers/raw_skew.c @@ -137,11 +137,11 @@ int main(int argc, char **argv) if (tx1.offset || tx2.offset || tx1.freq != tx2.freq || tx1.tick != tx2.tick) { printf(" [SKIP]\n"); - return ksft_exit_skip("The clock was adjusted externally. Shutdown NTPd or other time sync daemons\n"); + ksft_exit_skip("The clock was adjusted externally. Shutdown NTPd or other time sync daemons\n"); } printf(" [FAILED]\n"); - return ksft_exit_fail(); + ksft_exit_fail(); } printf(" [OK]\n"); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c index 688cfd81b5..f7d978721b 100644 --- a/tools/testing/selftests/timers/set-2038.c +++ b/tools/testing/selftests/timers/set-2038.c @@ -128,6 +128,6 @@ out: /* restore clock */ settime(start); if (ret) - return ksft_exit_fail(); - return ksft_exit_pass(); + ksft_exit_fail(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c index 8c4179ee2c..5b67462efc 100644 --- a/tools/testing/selftests/timers/set-tai.c +++ b/tools/testing/selftests/timers/set-tai.c @@ -61,9 +61,9 @@ int main(int argc, char **argv) ret = get_tai(); if (ret != i) { printf("[FAILED] expected: %i got %i\n", i, ret); - return ksft_exit_fail(); + ksft_exit_fail(); } } printf("[OK]\n"); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c index 50da45437d..7ce240c89b 100644 --- a/tools/testing/selftests/timers/set-timer-lat.c +++ b/tools/testing/selftests/timers/set-timer-lat.c @@ -278,6 +278,6 @@ int main(void) ret |= do_timer_oneshot(clock_id, 0); } if (ret) - return ksft_exit_fail(); - return ksft_exit_pass(); + ksft_exit_fail(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/set-tz.c b/tools/testing/selftests/timers/set-tz.c index 62bd33eb16..20daaf1782 100644 --- a/tools/testing/selftests/timers/set-tz.c +++ b/tools/testing/selftests/timers/set-tz.c @@ -102,9 +102,9 @@ int main(int argc, char **argv) printf("[OK]\n"); set_tz(min, dst); - return ksft_exit_pass(); + ksft_exit_pass(); err: set_tz(min, dst); - return ksft_exit_fail(); + ksft_exit_fail(); } diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c index 63913f75b3..c8e6bffe4e 100644 --- a/tools/testing/selftests/timers/skew_consistency.c +++ b/tools/testing/selftests/timers/skew_consistency.c @@ -70,8 +70,8 @@ int main(int argc, char **argv) if (ret) { printf("[FAILED]\n"); - return ksft_exit_fail(); + ksft_exit_fail(); } printf("[OK]\n"); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c index 80aed4bf06..76b38e41d9 100644 --- a/tools/testing/selftests/timers/threadtest.c +++ b/tools/testing/selftests/timers/threadtest.c @@ -189,5 +189,5 @@ out: /* die */ if (ret) ksft_exit_fail(); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c index d13ebde203..d500884801 100644 --- a/tools/testing/selftests/timers/valid-adjtimex.c +++ b/tools/testing/selftests/timers/valid-adjtimex.c @@ -320,10 +320,10 @@ int validate_set_offset(void) int main(int argc, char **argv) { if (validate_freq()) - return ksft_exit_fail(); + ksft_exit_fail(); if (validate_set_offset()) - return ksft_exit_fail(); + ksft_exit_fail(); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/tty/tty_tstamp_update.c b/tools/testing/selftests/tty/tty_tstamp_update.c index 0ee97943dc..9e1a40f5db 100644 --- a/tools/testing/selftests/tty/tty_tstamp_update.c +++ b/tools/testing/selftests/tty/tty_tstamp_update.c @@ -47,42 +47,60 @@ int main(int argc, char **argv) int r; char tty[PATH_MAX] = {}; struct stat st1, st2; + int result = KSFT_FAIL; ksft_print_header(); ksft_set_plan(1); r = readlink("/proc/self/fd/0", tty, PATH_MAX); - if (r < 0) - ksft_exit_fail_msg("readlink on /proc/self/fd/0 failed: %m\n"); + if (r < 0) { + ksft_print_msg("readlink on /proc/self/fd/0 failed: %m\n"); + goto out; + } + + if (!tty_valid(tty)) { + ksft_print_msg("invalid tty path '%s'\n", tty); + result = KSFT_SKIP; + goto out; - if (!tty_valid(tty)) - ksft_exit_skip("invalid tty path '%s'\n", tty); + } r = stat(tty, &st1); - if (r < 0) - ksft_exit_fail_msg("stat failed on tty path '%s': %m\n", tty); + if (r < 0) { + ksft_print_msg("stat failed on tty path '%s': %m\n", tty); + goto out; + } /* We need to wait at least 8 seconds in order to observe timestamp change */ /* https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fbf47635315ab308c9b58a1ea0906e711a9228de */ sleep(10); r = write_dev_tty(); - if (r < 0) - ksft_exit_fail_msg("failed to write to /dev/tty: %s\n", - strerror(-r)); + if (r < 0) { + ksft_print_msg("failed to write to /dev/tty: %s\n", + strerror(-r)); + goto out; + } r = stat(tty, &st2); - if (r < 0) - ksft_exit_fail_msg("stat failed on tty path '%s': %m\n", tty); + if (r < 0) { + ksft_print_msg("stat failed on tty path '%s': %m\n", tty); + goto out; + } /* We wrote to the terminal so timestamps should have been updated */ if (st1.st_atim.tv_sec == st2.st_atim.tv_sec && st1.st_mtim.tv_sec == st2.st_mtim.tv_sec) { - ksft_test_result_fail("tty timestamps not updated\n"); - ksft_exit_fail(); + ksft_print_msg("tty timestamps not updated\n"); + goto out; } - ksft_test_result_pass( + ksft_print_msg( "timestamps of terminal '%s' updated after write to /dev/tty\n", tty); - return EXIT_SUCCESS; + result = KSFT_PASS; + +out: + ksft_test_result_report(result, "tty_tstamp_update\n"); + + ksft_finished(); } diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c index dcd7509fe2..0bb46793dc 100644 --- a/tools/testing/selftests/user_events/ftrace_test.c +++ b/tools/testing/selftests/user_events/ftrace_test.c @@ -261,6 +261,12 @@ TEST_F(user, register_events) { ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); ASSERT_EQ(0, reg.write_index); + /* Register without separator spacing should still match */ + reg.enable_bit = 29; + reg.name_args = (__u64)"__test_event u32 field1;u32 field2"; + ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); + ASSERT_EQ(0, reg.write_index); + /* Multiple registers to same name but different args should fail */ reg.enable_bit = 29; reg.name_args = (__u64)"__test_event u32 field1;"; @@ -288,6 +294,8 @@ TEST_F(user, register_events) { ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg)); unreg.disable_bit = 30; ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg)); + unreg.disable_bit = 29; + ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg)); /* Delete should have been auto-done after close and unregister */ close(self->data_fd); diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index d53a4d8008..98d8ba2afa 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -1,35 +1,30 @@ # SPDX-License-Identifier: GPL-2.0 -include ../lib.mk - uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres +TEST_GEN_PROGS := vdso_test_gettimeofday +TEST_GEN_PROGS += vdso_test_getcpu +TEST_GEN_PROGS += vdso_test_abi +TEST_GEN_PROGS += vdso_test_clock_getres ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) -TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86 +TEST_GEN_PROGS += vdso_standalone_test_x86 endif -TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness +TEST_GEN_PROGS += vdso_test_correctness CFLAGS := -std=gnu99 -CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector -LDFLAGS_vdso_test_correctness := -ldl + ifeq ($(CONFIG_X86_32),y) LDLIBS += -lgcc_s endif -all: $(TEST_GEN_PROGS) +include ../lib.mk $(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c $(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c $(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c $(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c + $(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c - $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \ - vdso_standalone_test_x86.c parse_vdso.c \ - -o $@ +$(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector + $(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c - $(CC) $(CFLAGS) \ - vdso_test_correctness.c \ - -o $@ \ - $(LDFLAGS_vdso_test_correctness) +$(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config index a7f8e8a956..66290cf289 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config +++ b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config @@ -2,7 +2,7 @@ CONFIG_NONPORTABLE=y CONFIG_ARCH_RV32I=y CONFIG_MMU=y CONFIG_FPU=y -CONFIG_SOC_VIRT=y +CONFIG_ARCH_VIRT=y CONFIG_RISCV_ISA_FALLBACK=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config index daeb3e5e09..db1aa9f388 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config @@ -1,7 +1,7 @@ CONFIG_ARCH_RV64I=y CONFIG_MMU=y CONFIG_FPU=y -CONFIG_SOC_VIRT=y +CONFIG_ARCH_VIRT=y CONFIG_RISCV_ISA_FALLBACK=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 507555714b..f314d3789f 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -41,7 +41,6 @@ CONFIG_KALLSYMS=y CONFIG_BUG=y CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y CONFIG_JUMP_LABEL=y -CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_SHMEM=y CONFIG_SLUB=y diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c index d884fd69dd..95aad6d884 100644 --- a/tools/testing/selftests/x86/amx.c +++ b/tools/testing/selftests/x86/amx.c @@ -103,21 +103,6 @@ static void clearhandler(int sig) #define CPUID_LEAF1_ECX_XSAVE_MASK (1 << 26) #define CPUID_LEAF1_ECX_OSXSAVE_MASK (1 << 27) -static inline void check_cpuid_xsave(void) -{ - uint32_t eax, ebx, ecx, edx; - - /* - * CPUID.1:ECX.XSAVE[bit 26] enumerates general - * support for the XSAVE feature set, including - * XGETBV. - */ - __cpuid_count(1, 0, eax, ebx, ecx, edx); - if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK)) - fatal_error("cpuid: no CPU xsave support"); - if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK)) - fatal_error("cpuid: no OS xsave support"); -} static uint32_t xbuf_size; @@ -350,6 +335,7 @@ enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED }; /* arch_prctl() and sigaltstack() test */ +#define ARCH_GET_XCOMP_SUPP 0x1021 #define ARCH_GET_XCOMP_PERM 0x1022 #define ARCH_REQ_XCOMP_PERM 0x1023 @@ -928,8 +914,15 @@ static void test_ptrace(void) int main(void) { - /* Check hardware availability at first */ - check_cpuid_xsave(); + unsigned long features; + long rc; + + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_SUPP, &features); + if (rc || (features & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE) { + ksft_print_msg("no AMX support\n"); + return KSFT_SKIP; + } + check_cpuid_xtiledata(); init_stashed_xsave(); diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 215b8150b7..0ea4f68139 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -1183,7 +1183,7 @@ int main(int argc, char **argv) if (!cpu_has_lam()) { ksft_print_msg("Unsupported LAM feature!\n"); - return -1; + return KSFT_SKIP; } while ((c = getopt(argc, argv, "ht:")) != -1) { @@ -1237,5 +1237,5 @@ int main(int argc, char **argv) ksft_set_plan(tests_cnt); - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c index f0d876d482..d53959e035 100644 --- a/tools/testing/selftests/x86/test_mremap_vdso.c +++ b/tools/testing/selftests/x86/test_mremap_vdso.c @@ -19,6 +19,7 @@ #include <sys/auxv.h> #include <sys/syscall.h> #include <sys/wait.h> +#include "../kselftest.h" #define PAGE_SIZE 4096 @@ -29,13 +30,13 @@ static int try_to_remap(void *vdso_addr, unsigned long size) /* Searching for memory location where to remap */ dest_addr = mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (dest_addr == MAP_FAILED) { - printf("[WARN]\tmmap failed (%d): %m\n", errno); + ksft_print_msg("WARN: mmap failed (%d): %m\n", errno); return 0; } - printf("[NOTE]\tMoving vDSO: [%p, %#lx] -> [%p, %#lx]\n", - vdso_addr, (unsigned long)vdso_addr + size, - dest_addr, (unsigned long)dest_addr + size); + ksft_print_msg("Moving vDSO: [%p, %#lx] -> [%p, %#lx]\n", + vdso_addr, (unsigned long)vdso_addr + size, + dest_addr, (unsigned long)dest_addr + size); fflush(stdout); new_addr = mremap(vdso_addr, size, size, @@ -43,10 +44,10 @@ static int try_to_remap(void *vdso_addr, unsigned long size) if ((unsigned long)new_addr == (unsigned long)-1) { munmap(dest_addr, size); if (errno == EINVAL) { - printf("[NOTE]\tvDSO partial move failed, will try with bigger size\n"); + ksft_print_msg("vDSO partial move failed, will try with bigger size\n"); return -1; /* Retry with larger */ } - printf("[FAIL]\tmremap failed (%d): %m\n", errno); + ksft_print_msg("[FAIL]\tmremap failed (%d): %m\n", errno); return 1; } @@ -58,11 +59,12 @@ int main(int argc, char **argv, char **envp) { pid_t child; + ksft_print_header(); + ksft_set_plan(1); + child = fork(); - if (child == -1) { - printf("[WARN]\tfailed to fork (%d): %m\n", errno); - return 1; - } + if (child == -1) + ksft_exit_fail_msg("failed to fork (%d): %m\n", errno); if (child == 0) { unsigned long vdso_size = PAGE_SIZE; @@ -70,9 +72,9 @@ int main(int argc, char **argv, char **envp) int ret = -1; auxval = getauxval(AT_SYSINFO_EHDR); - printf("\tAT_SYSINFO_EHDR is %#lx\n", auxval); + ksft_print_msg("AT_SYSINFO_EHDR is %#lx\n", auxval); if (!auxval || auxval == -ENOENT) { - printf("[WARN]\tgetauxval failed\n"); + ksft_print_msg("WARN: getauxval failed\n"); return 0; } @@ -92,16 +94,13 @@ int main(int argc, char **argv, char **envp) int status; if (waitpid(child, &status, 0) != child || - !WIFEXITED(status)) { - printf("[FAIL]\tmremap() of the vDSO does not work on this kernel!\n"); - return 1; - } else if (WEXITSTATUS(status) != 0) { - printf("[FAIL]\tChild failed with %d\n", - WEXITSTATUS(status)); - return 1; - } - printf("[OK]\n"); + !WIFEXITED(status)) + ksft_test_result_fail("mremap() of the vDSO does not work on this kernel!\n"); + else if (WEXITSTATUS(status) != 0) + ksft_test_result_fail("Child failed with %d\n", WEXITSTATUS(status)); + else + ksft_test_result_pass("%s\n", __func__); } - return 0; + ksft_finished(); } diff --git a/tools/testing/selftests/x86/test_shadow_stack.c b/tools/testing/selftests/x86/test_shadow_stack.c index 757e6527f6..ee909a7927 100644 --- a/tools/testing/selftests/x86/test_shadow_stack.c +++ b/tools/testing/selftests/x86/test_shadow_stack.c @@ -556,7 +556,7 @@ struct node { * looked at the shadow stack gaps. * 5. See if it landed in the gap. */ -int test_guard_gap(void) +int test_guard_gap_other_gaps(void) { void *free_area, *shstk, *test_map = (void *)0xFFFFFFFFFFFFFFFF; struct node *head = NULL, *cur; @@ -593,11 +593,64 @@ int test_guard_gap(void) if (shstk - test_map - PAGE_SIZE != PAGE_SIZE) return 1; - printf("[OK]\tGuard gap test\n"); + printf("[OK]\tGuard gap test, other mapping's gaps\n"); return 0; } +/* Tests respecting the guard gap of the mapping getting placed */ +int test_guard_gap_new_mappings_gaps(void) +{ + void *free_area, *shstk_start, *test_map = (void *)0xFFFFFFFFFFFFFFFF; + struct node *head = NULL, *cur; + int ret = 0; + + free_area = mmap(0, PAGE_SIZE * 4, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + munmap(free_area, PAGE_SIZE * 4); + + /* Test letting map_shadow_stack find a free space */ + shstk_start = mmap(free_area, PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (shstk_start == MAP_FAILED || shstk_start != free_area) + return 1; + + while (test_map > shstk_start) { + test_map = (void *)syscall(__NR_map_shadow_stack, 0, PAGE_SIZE, 0); + if (test_map == MAP_FAILED) { + printf("[INFO]\tmap_shadow_stack MAP_FAILED\n"); + ret = 1; + break; + } + + cur = malloc(sizeof(*cur)); + cur->mapping = test_map; + + cur->next = head; + head = cur; + + if (test_map == free_area + PAGE_SIZE) { + printf("[INFO]\tNew mapping has other mapping in guard gap!\n"); + ret = 1; + break; + } + } + + while (head) { + cur = head; + head = cur->next; + munmap(cur->mapping, PAGE_SIZE); + free(cur); + } + + munmap(shstk_start, PAGE_SIZE); + + if (!ret) + printf("[OK]\tGuard gap test, placement mapping's gaps\n"); + + return ret; +} + /* * Too complicated to pull it out of the 32 bit header, but also get the * 64 bit one needed above. Just define a copy here. @@ -850,9 +903,15 @@ int main(int argc, char *argv[]) goto out; } - if (test_guard_gap()) { + if (test_guard_gap_other_gaps()) { + ret = 1; + printf("[FAIL]\tGuard gap test, other mappings' gaps\n"); + goto out; + } + + if (test_guard_gap_new_mappings_gaps()) { ret = 1; - printf("[FAIL]\tGuard gap test\n"); + printf("[FAIL]\tGuard gap test, placement mapping's gaps\n"); goto out; } diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index 47cab97280..d4c8e8d79d 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -21,6 +21,13 @@ #include <sys/uio.h> #include "helpers.h" +#include "../kselftest.h" + +#ifdef __x86_64__ +#define TOTAL_TESTS 13 +#else +#define TOTAL_TESTS 8 +#endif #ifdef __x86_64__ # define VSYS(x) (x) @@ -39,18 +46,6 @@ /* max length of lines in /proc/self/maps - anything longer is skipped here */ #define MAPS_LINE_LEN 128 -static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), - int flags) -{ - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handler; - sa.sa_flags = SA_SIGINFO | flags; - sigemptyset(&sa.sa_mask); - if (sigaction(sig, &sa, 0)) - err(1, "sigaction"); -} - /* vsyscalls and vDSO */ bool vsyscall_map_r = false, vsyscall_map_x = false; @@ -75,83 +70,25 @@ static void init_vdso(void) if (!vdso) vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); if (!vdso) { - printf("[WARN]\tfailed to find vDSO\n"); + ksft_print_msg("[WARN] failed to find vDSO\n"); return; } vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday"); if (!vdso_gtod) - printf("[WARN]\tfailed to find gettimeofday in vDSO\n"); + ksft_print_msg("[WARN] failed to find gettimeofday in vDSO\n"); vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); if (!vdso_gettime) - printf("[WARN]\tfailed to find clock_gettime in vDSO\n"); + ksft_print_msg("[WARN] failed to find clock_gettime in vDSO\n"); vdso_time = (time_func_t)dlsym(vdso, "__vdso_time"); if (!vdso_time) - printf("[WARN]\tfailed to find time in vDSO\n"); + ksft_print_msg("[WARN] failed to find time in vDSO\n"); vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); if (!vdso_getcpu) - printf("[WARN]\tfailed to find getcpu in vDSO\n"); -} - -static int init_vsys(void) -{ -#ifdef __x86_64__ - int nerrs = 0; - FILE *maps; - char line[MAPS_LINE_LEN]; - bool found = false; - - maps = fopen("/proc/self/maps", "r"); - if (!maps) { - printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n"); - vsyscall_map_r = true; - return 0; - } - - while (fgets(line, MAPS_LINE_LEN, maps)) { - char r, x; - void *start, *end; - char name[MAPS_LINE_LEN]; - - /* sscanf() is safe here as strlen(name) >= strlen(line) */ - if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", - &start, &end, &r, &x, name) != 5) - continue; - - if (strcmp(name, "[vsyscall]")) - continue; - - printf("\tvsyscall map: %s", line); - - if (start != (void *)0xffffffffff600000 || - end != (void *)0xffffffffff601000) { - printf("[FAIL]\taddress range is nonsense\n"); - nerrs++; - } - - printf("\tvsyscall permissions are %c-%c\n", r, x); - vsyscall_map_r = (r == 'r'); - vsyscall_map_x = (x == 'x'); - - found = true; - break; - } - - fclose(maps); - - if (!found) { - printf("\tno vsyscall map in /proc/self/maps\n"); - vsyscall_map_r = false; - vsyscall_map_x = false; - } - - return nerrs; -#else - return 0; -#endif + ksft_print_msg("[WARN] failed to find getcpu in vDSO\n"); } /* syscalls */ @@ -176,98 +113,76 @@ static inline long sys_getcpu(unsigned * cpu, unsigned * node, return syscall(SYS_getcpu, cpu, node, cache); } -static jmp_buf jmpbuf; -static volatile unsigned long segv_err; - -static void sigsegv(int sig, siginfo_t *info, void *ctx_void) -{ - ucontext_t *ctx = (ucontext_t *)ctx_void; - - segv_err = ctx->uc_mcontext.gregs[REG_ERR]; - siglongjmp(jmpbuf, 1); -} - static double tv_diff(const struct timeval *a, const struct timeval *b) { return (double)(a->tv_sec - b->tv_sec) + (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6; } -static int check_gtod(const struct timeval *tv_sys1, - const struct timeval *tv_sys2, - const struct timezone *tz_sys, - const char *which, - const struct timeval *tv_other, - const struct timezone *tz_other) +static void check_gtod(const struct timeval *tv_sys1, + const struct timeval *tv_sys2, + const struct timezone *tz_sys, + const char *which, + const struct timeval *tv_other, + const struct timezone *tz_other) { - int nerrs = 0; double d1, d2; - if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) { - printf("[FAIL] %s tz mismatch\n", which); - nerrs++; - } + if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || + tz_sys->tz_dsttime != tz_other->tz_dsttime)) + ksft_print_msg("%s tz mismatch\n", which); d1 = tv_diff(tv_other, tv_sys1); d2 = tv_diff(tv_sys2, tv_other); - printf("\t%s time offsets: %lf %lf\n", which, d1, d2); - if (d1 < 0 || d2 < 0) { - printf("[FAIL]\t%s time was inconsistent with the syscall\n", which); - nerrs++; - } else { - printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which); - } + ksft_print_msg("%s time offsets: %lf %lf\n", which, d1, d2); - return nerrs; + ksft_test_result(!(d1 < 0 || d2 < 0), "%s gettimeofday()'s timeval\n", which); } -static int test_gtod(void) +static void test_gtod(void) { struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys; struct timezone tz_sys, tz_vdso, tz_vsys; long ret_vdso = -1; long ret_vsys = -1; - int nerrs = 0; - printf("[RUN]\ttest gettimeofday()\n"); + ksft_print_msg("test gettimeofday()\n"); if (sys_gtod(&tv_sys1, &tz_sys) != 0) - err(1, "syscall gettimeofday"); + ksft_exit_fail_msg("syscall gettimeofday: %s\n", strerror(errno)); if (vdso_gtod) ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso); if (vsyscall_map_x) ret_vsys = vgtod(&tv_vsys, &tz_vsys); if (sys_gtod(&tv_sys2, &tz_sys) != 0) - err(1, "syscall gettimeofday"); + ksft_exit_fail_msg("syscall gettimeofday: %s\n", strerror(errno)); if (vdso_gtod) { - if (ret_vdso == 0) { - nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso); - } else { - printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso); - nerrs++; - } + if (ret_vdso == 0) + check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso); + else + ksft_test_result_fail("vDSO gettimeofday() failed: %ld\n", ret_vdso); + } else { + ksft_test_result_skip("vdso_gtod isn't set\n"); } if (vsyscall_map_x) { - if (ret_vsys == 0) { - nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys); - } else { - printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys); - nerrs++; - } + if (ret_vsys == 0) + check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys); + else + ksft_test_result_fail("vsys gettimeofday() failed: %ld\n", ret_vsys); + } else { + ksft_test_result_skip("vsyscall_map_x isn't set\n"); } - - return nerrs; } -static int test_time(void) { - int nerrs = 0; - - printf("[RUN]\ttest time()\n"); +static void test_time(void) +{ long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0; long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1; + + ksft_print_msg("test time()\n"); t_sys1 = sys_time(&t2_sys1); if (vdso_time) t_vdso = vdso_time(&t2_vdso); @@ -275,56 +190,60 @@ static int test_time(void) { t_vsys = vtime(&t2_vsys); t_sys2 = sys_time(&t2_sys2); if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) { - printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2); - nerrs++; - return nerrs; + ksft_print_msg("syscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", + t_sys1, t2_sys1, t_sys2, t2_sys2); + ksft_test_result_skip("vdso_time\n"); + ksft_test_result_skip("vdso_time\n"); + return; } if (vdso_time) { - if (t_vdso < 0 || t_vdso != t2_vdso) { - printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso); - nerrs++; - } else if (t_vdso < t_sys1 || t_vdso > t_sys2) { - printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2); - nerrs++; - } else { - printf("[OK]\tvDSO time() is okay\n"); - } + if (t_vdso < 0 || t_vdso != t2_vdso) + ksft_test_result_fail("vDSO failed (ret:%ld output:%ld)\n", + t_vdso, t2_vdso); + else if (t_vdso < t_sys1 || t_vdso > t_sys2) + ksft_test_result_fail("vDSO returned the wrong time (%ld %ld %ld)\n", + t_sys1, t_vdso, t_sys2); + else + ksft_test_result_pass("vDSO time() is okay\n"); + } else { + ksft_test_result_skip("vdso_time isn't set\n"); } if (vsyscall_map_x) { - if (t_vsys < 0 || t_vsys != t2_vsys) { - printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys); - nerrs++; - } else if (t_vsys < t_sys1 || t_vsys > t_sys2) { - printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2); - nerrs++; - } else { - printf("[OK]\tvsyscall time() is okay\n"); - } + if (t_vsys < 0 || t_vsys != t2_vsys) + ksft_test_result_fail("vsyscall failed (ret:%ld output:%ld)\n", + t_vsys, t2_vsys); + else if (t_vsys < t_sys1 || t_vsys > t_sys2) + ksft_test_result_fail("vsyscall returned the wrong time (%ld %ld %ld)\n", + t_sys1, t_vsys, t_sys2); + else + ksft_test_result_pass("vsyscall time() is okay\n"); + } else { + ksft_test_result_skip("vsyscall_map_x isn't set\n"); } - - return nerrs; } -static int test_getcpu(int cpu) +static void test_getcpu(int cpu) { - int nerrs = 0; + unsigned int cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys; long ret_sys, ret_vdso = -1, ret_vsys = -1; + unsigned int node = 0; + bool have_node = false; + cpu_set_t cpuset; - printf("[RUN]\tgetcpu() on CPU %d\n", cpu); + ksft_print_msg("getcpu() on CPU %d\n", cpu); - cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(cpu, &cpuset); if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) { - printf("[SKIP]\tfailed to force CPU %d\n", cpu); - return nerrs; + ksft_print_msg("failed to force CPU %d\n", cpu); + ksft_test_result_skip("vdso_getcpu\n"); + ksft_test_result_skip("vsyscall_map_x\n"); + + return; } - unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys; - unsigned node = 0; - bool have_node = false; ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); if (vdso_getcpu) ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); @@ -332,10 +251,9 @@ static int test_getcpu(int cpu) ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); if (ret_sys == 0) { - if (cpu_sys != cpu) { - printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu); - nerrs++; - } + if (cpu_sys != cpu) + ksft_print_msg("syscall reported CPU %hu but should be %d\n", + cpu_sys, cpu); have_node = true; node = node_sys; @@ -343,63 +261,84 @@ static int test_getcpu(int cpu) if (vdso_getcpu) { if (ret_vdso) { - printf("[FAIL]\tvDSO getcpu() failed\n"); - nerrs++; + ksft_test_result_fail("vDSO getcpu() failed\n"); } else { if (!have_node) { have_node = true; node = node_vdso; } - if (cpu_vdso != cpu) { - printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu); - nerrs++; - } else { - printf("[OK]\tvDSO reported correct CPU\n"); - } - - if (node_vdso != node) { - printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node); - nerrs++; + if (cpu_vdso != cpu || node_vdso != node) { + if (cpu_vdso != cpu) + ksft_print_msg("vDSO reported CPU %hu but should be %d\n", + cpu_vdso, cpu); + if (node_vdso != node) + ksft_print_msg("vDSO reported node %hu but should be %hu\n", + node_vdso, node); + ksft_test_result_fail("Wrong values\n"); } else { - printf("[OK]\tvDSO reported correct node\n"); + ksft_test_result_pass("vDSO reported correct CPU and node\n"); } } + } else { + ksft_test_result_skip("vdso_getcpu isn't set\n"); } if (vsyscall_map_x) { if (ret_vsys) { - printf("[FAIL]\tvsyscall getcpu() failed\n"); - nerrs++; + ksft_test_result_fail("vsyscall getcpu() failed\n"); } else { if (!have_node) { have_node = true; node = node_vsys; } - if (cpu_vsys != cpu) { - printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu); - nerrs++; + if (cpu_vsys != cpu || node_vsys != node) { + if (cpu_vsys != cpu) + ksft_print_msg("vsyscall reported CPU %hu but should be %d\n", + cpu_vsys, cpu); + if (node_vsys != node) + ksft_print_msg("vsyscall reported node %hu but should be %hu\n", + node_vsys, node); + ksft_test_result_fail("Wrong values\n"); } else { - printf("[OK]\tvsyscall reported correct CPU\n"); - } - - if (node_vsys != node) { - printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node); - nerrs++; - } else { - printf("[OK]\tvsyscall reported correct node\n"); + ksft_test_result_pass("vsyscall reported correct CPU and node\n"); } } + } else { + ksft_test_result_skip("vsyscall_map_x isn't set\n"); } +} + +#ifdef __x86_64__ + +static jmp_buf jmpbuf; +static volatile unsigned long segv_err; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; - return nerrs; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + ksft_exit_fail_msg("sigaction failed\n"); } -static int test_vsys_r(void) +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) { -#ifdef __x86_64__ - printf("[RUN]\tChecking read access to the vsyscall page\n"); + ucontext_t *ctx = (ucontext_t *)ctx_void; + + segv_err = ctx->uc_mcontext.gregs[REG_ERR]; + siglongjmp(jmpbuf, 1); +} + +static void test_vsys_r(void) +{ + ksft_print_msg("Checking read access to the vsyscall page\n"); bool can_read; if (sigsetjmp(jmpbuf, 1) == 0) { *(volatile int *)0xffffffffff600000; @@ -408,32 +347,25 @@ static int test_vsys_r(void) can_read = false; } - if (can_read && !vsyscall_map_r) { - printf("[FAIL]\tWe have read access, but we shouldn't\n"); - return 1; - } else if (!can_read && vsyscall_map_r) { - printf("[FAIL]\tWe don't have read access, but we should\n"); - return 1; - } else if (can_read) { - printf("[OK]\tWe have read access\n"); - } else { - printf("[OK]\tWe do not have read access: #PF(0x%lx)\n", - segv_err); - } -#endif - - return 0; + if (can_read && !vsyscall_map_r) + ksft_test_result_fail("We have read access, but we shouldn't\n"); + else if (!can_read && vsyscall_map_r) + ksft_test_result_fail("We don't have read access, but we should\n"); + else if (can_read) + ksft_test_result_pass("We have read access\n"); + else + ksft_test_result_pass("We do not have read access: #PF(0x%lx)\n", segv_err); } -static int test_vsys_x(void) +static void test_vsys_x(void) { -#ifdef __x86_64__ if (vsyscall_map_x) { /* We already tested this adequately. */ - return 0; + ksft_test_result_pass("vsyscall_map_x is true\n"); + return; } - printf("[RUN]\tMake sure that vsyscalls really page fault\n"); + ksft_print_msg("Make sure that vsyscalls really page fault\n"); bool can_exec; if (sigsetjmp(jmpbuf, 1) == 0) { @@ -443,20 +375,14 @@ static int test_vsys_x(void) can_exec = false; } - if (can_exec) { - printf("[FAIL]\tExecuting the vsyscall did not page fault\n"); - return 1; - } else if (segv_err & (1 << 4)) { /* INSTR */ - printf("[OK]\tExecuting the vsyscall page failed: #PF(0x%lx)\n", - segv_err); - } else { - printf("[FAIL]\tExecution failed with the wrong error: #PF(0x%lx)\n", - segv_err); - return 1; - } -#endif - - return 0; + if (can_exec) + ksft_test_result_fail("Executing the vsyscall did not page fault\n"); + else if (segv_err & (1 << 4)) /* INSTR */ + ksft_test_result_pass("Executing the vsyscall page failed: #PF(0x%lx)\n", + segv_err); + else + ksft_test_result_fail("Execution failed with the wrong error: #PF(0x%lx)\n", + segv_err); } /* @@ -470,14 +396,13 @@ static int test_vsys_x(void) * fact that ptrace() ever worked was a nice courtesy of old kernels, * but the code to support it is fairly gross. */ -static int test_process_vm_readv(void) +static void test_process_vm_readv(void) { -#ifdef __x86_64__ char buf[4096]; struct iovec local, remote; int ret; - printf("[RUN]\tprocess_vm_readv() from vsyscall page\n"); + ksft_print_msg("process_vm_readv() from vsyscall page\n"); local.iov_base = buf; local.iov_len = 4096; @@ -489,27 +414,71 @@ static int test_process_vm_readv(void) * We expect process_vm_readv() to work if and only if the * vsyscall page is readable. */ - printf("[%s]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", vsyscall_map_r ? "FAIL" : "OK", ret, errno); - return vsyscall_map_r ? 1 : 0; + ksft_test_result(!vsyscall_map_r, + "process_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno); + return; } - if (vsyscall_map_r) { - if (!memcmp(buf, remote.iov_base, sizeof(buf))) { - printf("[OK]\tIt worked and read correct data\n"); - } else { - printf("[FAIL]\tIt worked but returned incorrect data\n"); - return 1; + if (vsyscall_map_r) + ksft_test_result(!memcmp(buf, remote.iov_base, sizeof(buf)), "Read data\n"); + else + ksft_test_result_fail("process_rm_readv() succeeded, but it should have failed in this configuration\n"); +} + +static void init_vsys(void) +{ + int nerrs = 0; + FILE *maps; + char line[MAPS_LINE_LEN]; + bool found = false; + + maps = fopen("/proc/self/maps", "r"); + if (!maps) { + ksft_test_result_skip("Could not open /proc/self/maps -- assuming vsyscall is r-x\n"); + vsyscall_map_r = true; + return; + } + + while (fgets(line, MAPS_LINE_LEN, maps)) { + char r, x; + void *start, *end; + char name[MAPS_LINE_LEN]; + + /* sscanf() is safe here as strlen(name) >= strlen(line) */ + if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", + &start, &end, &r, &x, name) != 5) + continue; + + if (strcmp(name, "[vsyscall]")) + continue; + + ksft_print_msg("vsyscall map: %s", line); + + if (start != (void *)0xffffffffff600000 || + end != (void *)0xffffffffff601000) { + ksft_print_msg("address range is nonsense\n"); + nerrs++; } - } else { - printf("[FAIL]\tprocess_rm_readv() succeeded, but it should have failed in this configuration\n"); - return 1; + + ksft_print_msg("vsyscall permissions are %c-%c\n", r, x); + vsyscall_map_r = (r == 'r'); + vsyscall_map_x = (x == 'x'); + + found = true; + break; } -#endif - return 0; + fclose(maps); + + if (!found) { + ksft_print_msg("no vsyscall map in /proc/self/maps\n"); + vsyscall_map_r = false; + vsyscall_map_x = false; + } + + ksft_test_result(!nerrs, "vsyscall map\n"); } -#ifdef __x86_64__ static volatile sig_atomic_t num_vsyscall_traps; static void sigtrap(int sig, siginfo_t *info, void *ctx_void) @@ -521,15 +490,17 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void) num_vsyscall_traps++; } -static int test_emulation(void) +static void test_emulation(void) { time_t tmp; bool is_native; - if (!vsyscall_map_x) - return 0; + if (!vsyscall_map_x) { + ksft_test_result_skip("vsyscall_map_x isn't set\n"); + return; + } - printf("[RUN]\tchecking that vsyscalls are emulated\n"); + ksft_print_msg("checking that vsyscalls are emulated\n"); sethandler(SIGTRAP, sigtrap, 0); set_eflags(get_eflags() | X86_EFLAGS_TF); vtime(&tmp); @@ -545,36 +516,35 @@ static int test_emulation(void) */ is_native = (num_vsyscall_traps > 1); - printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n", - (is_native ? "FAIL" : "OK"), - (is_native ? "native" : "emulated"), - (int)num_vsyscall_traps); - - return is_native; + ksft_test_result(!is_native, "vsyscalls are %s (%d instructions in vsyscall page)\n", + (is_native ? "native" : "emulated"), (int)num_vsyscall_traps); } #endif int main(int argc, char **argv) { - int nerrs = 0; + int total_tests = TOTAL_TESTS; - init_vdso(); - nerrs += init_vsys(); + ksft_print_header(); + ksft_set_plan(total_tests); - nerrs += test_gtod(); - nerrs += test_time(); - nerrs += test_getcpu(0); - nerrs += test_getcpu(1); - - sethandler(SIGSEGV, sigsegv, 0); - nerrs += test_vsys_r(); - nerrs += test_vsys_x(); + init_vdso(); +#ifdef __x86_64__ + init_vsys(); +#endif - nerrs += test_process_vm_readv(); + test_gtod(); + test_time(); + test_getcpu(0); + test_getcpu(1); #ifdef __x86_64__ - nerrs += test_emulation(); + sethandler(SIGSEGV, sigsegv, 0); + test_vsys_r(); + test_vsys_x(); + test_process_vm_readv(); + test_emulation(); #endif - return nerrs ? 1 : 0; + ksft_finished(); } |