diff options
Diffstat (limited to 'tools/testing/selftests')
464 files changed, 28948 insertions, 5897 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 8247a7c69c..15b6a111c3 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -26,6 +26,8 @@ TARGETS += filesystems TARGETS += filesystems/binderfs TARGETS += filesystems/epoll TARGETS += filesystems/fat +TARGETS += filesystems/overlayfs +TARGETS += filesystems/statmount TARGETS += firmware TARGETS += fpu TARGETS += ftrace @@ -43,6 +45,7 @@ TARGETS += landlock TARGETS += lib TARGETS += livepatch TARGETS += lkdtm +TARGETS += lsm TARGETS += membarrier TARGETS += memfd TARGETS += memory-hotplug @@ -58,6 +61,7 @@ TARGETS += net/forwarding TARGETS += net/hsr TARGETS += net/mptcp TARGETS += net/openvswitch +TARGETS += net/tcp_ao TARGETS += netfilter TARGETS += nsfs TARGETS += perf_events diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c index df942149c6..1c04e5f638 100644 --- a/tools/testing/selftests/alsa/mixer-test.c +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -347,7 +347,7 @@ static bool ctl_value_index_valid(struct ctl_data *ctl, } if (int_val >= snd_ctl_elem_info_get_items(ctl->info)) { - ksft_print_msg("%s.%d value %ld more than item count %ld\n", + ksft_print_msg("%s.%d value %ld more than item count %u\n", ctl->name, index, int_val, snd_ctl_elem_info_get_items(ctl->info)); return false; diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c index 351a098b50..02ee3a91b7 100644 --- a/tools/testing/selftests/arm64/abi/tpidr2.c +++ b/tools/testing/selftests/arm64/abi/tpidr2.c @@ -254,6 +254,12 @@ static int write_clone_read(void) putnum(++tests_run); \ putstr(" " #name "\n"); +#define skip_test(name) \ + tests_skipped++; \ + putstr("ok "); \ + putnum(++tests_run); \ + putstr(" # SKIP " #name "\n"); + int main(int argc, char **argv) { int ret, i; @@ -283,13 +289,11 @@ int main(int argc, char **argv) } else { putstr("# SME support not present\n"); - for (i = 0; i < EXPECTED_TESTS; i++) { - putstr("ok "); - putnum(i); - putstr(" skipped, TPIDR2 not supported\n"); - } - - tests_skipped += EXPECTED_TESTS; + skip_test(default_value); + skip_test(write_read); + skip_test(write_sleep_read); + skip_test(write_fork_read); + skip_test(write_clone_read); } print_summary(); diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index 547d077e35..fff60e2a25 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -515,6 +515,10 @@ function barf mov x11, x1 // actual data mov x12, x2 // data size +#ifdef SSVE + mrs x13, S3_3_C4_C2_2 +#endif + puts "Mismatch: PID=" mov x0, x20 bl putdec @@ -534,6 +538,12 @@ function barf bl dumphex puts "]\n" +#ifdef SSVE + puts "\tSVCR: " + mov x0, x13 + bl putdecn +#endif + mov x8, #__NR_getpid svc #0 // fpsimd.c acitivty log dump hack diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index 5f648b97a0..ea9c7d4779 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -66,6 +66,11 @@ static struct vec_data vec_data[] = { }, }; +static bool vec_type_supported(struct vec_data *data) +{ + return getauxval(data->hwcap_type) & data->hwcap; +} + static int stdio_read_integer(FILE *f, const char *what, int *val) { int n = 0; @@ -564,8 +569,11 @@ static void prctl_set_all_vqs(struct vec_data *data) return; } - for (i = 0; i < ARRAY_SIZE(vec_data); i++) + for (i = 0; i < ARRAY_SIZE(vec_data); i++) { + if (!vec_type_supported(&vec_data[i])) + continue; orig_vls[i] = vec_data[i].rdvl(); + } for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { vl = sve_vl_from_vq(vq); @@ -594,7 +602,7 @@ static void prctl_set_all_vqs(struct vec_data *data) if (&vec_data[i] == data) continue; - if (!(getauxval(vec_data[i].hwcap_type) & vec_data[i].hwcap)) + if (!vec_type_supported(&vec_data[i])) continue; if (vec_data[i].rdvl() != orig_vls[i]) { @@ -765,7 +773,7 @@ int main(void) struct vec_data *data = &vec_data[i]; unsigned long supported; - supported = getauxval(data->hwcap_type) & data->hwcap; + supported = vec_type_supported(data); if (!supported) all_supported = false; diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S index 9dcd709113..095b455316 100644 --- a/tools/testing/selftests/arm64/fp/za-test.S +++ b/tools/testing/selftests/arm64/fp/za-test.S @@ -333,6 +333,9 @@ function barf // mov w8, #__NR_exit // svc #0 // end hack + + mrs x13, S3_3_C4_C2_2 + smstop mov x10, x0 // expected data mov x11, x1 // actual data @@ -356,6 +359,9 @@ function barf mov x1, x12 bl dumphex puts "]\n" + puts "\tSVCR: " + mov x0, x13 + bl putdecn mov x8, #__NR_getpid svc #0 diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S index d632863976..b5c81e81a3 100644 --- a/tools/testing/selftests/arm64/fp/zt-test.S +++ b/tools/testing/selftests/arm64/fp/zt-test.S @@ -267,6 +267,8 @@ function barf // mov w8, #__NR_exit // svc #0 // end hack + + mrs x13, S3_3_C4_C2_2 smstop mov x10, x0 // expected data mov x11, x1 // actual data @@ -287,6 +289,9 @@ function barf mov x1, x12 bl dumphex puts "]\n" + puts "\tSVCR: " + mov x0, x13 + bl putdecn mov x8, #__NR_getpid svc #0 diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 9c27b67bc7..fd15017ed3 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -18,7 +18,7 @@ else GENDIR := $(abspath ../../../../include/generated) endif GENHDR := $(GENDIR)/autoconf.h -HOSTPKG_CONFIG := pkg-config +PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config ifneq ($(wildcard $(GENHDR)),) GENFLAGS := -DHAVE_GENHDR @@ -29,13 +29,17 @@ SAN_CFLAGS ?= SAN_LDFLAGS ?= $(SAN_CFLAGS) RELEASE ?= OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0) + +LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null) +LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf) + CFLAGS += -g $(OPT_FLAGS) -rdynamic \ -Wall -Werror \ - $(GENFLAGS) $(SAN_CFLAGS) \ + $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) LDFLAGS += $(SAN_LDFLAGS) -LDLIBS += -lelf -lz -lrt -lpthread +LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread ifneq ($(LLVM),) # Silence some warnings when compiled with clang @@ -219,9 +223,9 @@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_r $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c $(call msg,SIGN-FILE,,$@) - $(Q)$(CC) $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) \ + $(Q)$(CC) $(shell $(PKG_CONFIG) --cflags libcrypto 2> /dev/null) \ $< -o $@ \ - $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto) + $(shell $(PKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto) $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) @@ -379,6 +383,7 @@ CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ -I$(abspath $(OUTPUT)/../usr/include) +# TODO: enable me -Wsign-compare CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ -Wno-compare-distinct-pointer-types diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index cb9b95702a..9af79c7a9b 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -77,7 +77,7 @@ In case of linker errors when running selftests, try using static linking: .. code-block:: console - $ LDLIBS=-static vmtest.sh + $ LDLIBS=-static PKG_CONFIG='pkg-config --static' vmtest.sh .. note:: Some distros may not support static linking. diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c index 9146d3f414..926ee82214 100644 --- a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c +++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c @@ -335,6 +335,7 @@ static void htab_mem_report_final(struct bench_res res[], int res_cnt) " peak memory usage %7.2lfMiB\n", loop_mean, loop_stddev, mem_mean, mem_stddev, peak_mem / 1048576.0); + close(ctx.fd); cleanup_cgroup_environment(); } diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 1386baf9ae..f44875f8b3 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -254,173 +254,97 @@ extern void bpf_throw(u64 cookie) __ksym; } \ }) -/* Description - * Assert that a conditional expression is true. - * Returns - * Void. - * Throws - * An exception with the value zero when the assertion fails. - */ -#define bpf_assert(cond) if (!(cond)) bpf_throw(0); - -/* Description - * Assert that a conditional expression is true. - * Returns - * Void. - * Throws - * An exception with the specified value when the assertion fails. - */ -#define bpf_assert_with(cond, value) if (!(cond)) bpf_throw(value); - -/* Description - * Assert that LHS is equal to RHS. This statement updates the known value - * of LHS during verification. Note that RHS must be a constant value, and - * must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the value zero when the assertion fails. - */ -#define bpf_assert_eq(LHS, RHS) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, ==, RHS, 0, true); \ - }) - -/* Description - * Assert that LHS is equal to RHS. This statement updates the known value - * of LHS during verification. Note that RHS must be a constant value, and - * must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the specified value when the assertion fails. - */ -#define bpf_assert_eq_with(LHS, RHS, value) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, ==, RHS, value, true); \ - }) - -/* Description - * Assert that LHS is less than RHS. This statement updates the known - * bounds of LHS during verification. Note that RHS must be a constant - * value, and must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the value zero when the assertion fails. - */ -#define bpf_assert_lt(LHS, RHS) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, <, RHS, 0, false); \ - }) - -/* Description - * Assert that LHS is less than RHS. This statement updates the known - * bounds of LHS during verification. Note that RHS must be a constant - * value, and must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the specified value when the assertion fails. - */ -#define bpf_assert_lt_with(LHS, RHS, value) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, <, RHS, value, false); \ - }) +#define __cmp_cannot_be_signed(x) \ + __builtin_strcmp(#x, "==") == 0 || __builtin_strcmp(#x, "!=") == 0 || \ + __builtin_strcmp(#x, "&") == 0 -/* Description - * Assert that LHS is greater than RHS. This statement updates the known - * bounds of LHS during verification. Note that RHS must be a constant - * value, and must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the value zero when the assertion fails. - */ -#define bpf_assert_gt(LHS, RHS) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, >, RHS, 0, false); \ - }) +#define __is_signed_type(type) (((type)(-1)) < (type)1) -/* Description - * Assert that LHS is greater than RHS. This statement updates the known - * bounds of LHS during verification. Note that RHS must be a constant - * value, and must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the specified value when the assertion fails. +#define __bpf_cmp(LHS, OP, SIGN, PRED, RHS, DEFAULT) \ + ({ \ + __label__ l_true; \ + bool ret = DEFAULT; \ + asm volatile goto("if %[lhs] " SIGN #OP " %[rhs] goto %l[l_true]" \ + :: [lhs] "r"((short)LHS), [rhs] PRED (RHS) :: l_true); \ + ret = !DEFAULT; \ +l_true: \ + ret; \ + }) + +/* C type conversions coupled with comparison operator are tricky. + * Make sure BPF program is compiled with -Wsign-compare then + * __lhs OP __rhs below will catch the mistake. + * Be aware that we check only __lhs to figure out the sign of compare. */ -#define bpf_assert_gt_with(LHS, RHS, value) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, >, RHS, value, false); \ - }) +#define _bpf_cmp(LHS, OP, RHS, NOFLIP) \ + ({ \ + typeof(LHS) __lhs = (LHS); \ + typeof(RHS) __rhs = (RHS); \ + bool ret; \ + _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \ + (void)(__lhs OP __rhs); \ + if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) { \ + if (sizeof(__rhs) == 8) \ + ret = __bpf_cmp(__lhs, OP, "", "r", __rhs, NOFLIP); \ + else \ + ret = __bpf_cmp(__lhs, OP, "", "i", __rhs, NOFLIP); \ + } else { \ + if (sizeof(__rhs) == 8) \ + ret = __bpf_cmp(__lhs, OP, "s", "r", __rhs, NOFLIP); \ + else \ + ret = __bpf_cmp(__lhs, OP, "s", "i", __rhs, NOFLIP); \ + } \ + ret; \ + }) + +#ifndef bpf_cmp_unlikely +#define bpf_cmp_unlikely(LHS, OP, RHS) _bpf_cmp(LHS, OP, RHS, true) +#endif -/* Description - * Assert that LHS is less than or equal to RHS. This statement updates the - * known bounds of LHS during verification. Note that RHS must be a - * constant value, and must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the value zero when the assertion fails. - */ -#define bpf_assert_le(LHS, RHS) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, <=, RHS, 0, false); \ - }) +#ifndef bpf_cmp_likely +#define bpf_cmp_likely(LHS, OP, RHS) \ + ({ \ + bool ret; \ + if (__builtin_strcmp(#OP, "==") == 0) \ + ret = _bpf_cmp(LHS, !=, RHS, false); \ + else if (__builtin_strcmp(#OP, "!=") == 0) \ + ret = _bpf_cmp(LHS, ==, RHS, false); \ + else if (__builtin_strcmp(#OP, "<=") == 0) \ + ret = _bpf_cmp(LHS, >, RHS, false); \ + else if (__builtin_strcmp(#OP, "<") == 0) \ + ret = _bpf_cmp(LHS, >=, RHS, false); \ + else if (__builtin_strcmp(#OP, ">") == 0) \ + ret = _bpf_cmp(LHS, <=, RHS, false); \ + else if (__builtin_strcmp(#OP, ">=") == 0) \ + ret = _bpf_cmp(LHS, <, RHS, false); \ + else \ + (void) "bug"; \ + ret; \ + }) +#endif -/* Description - * Assert that LHS is less than or equal to RHS. This statement updates the - * known bounds of LHS during verification. Note that RHS must be a - * constant value, and must fit within the data type of LHS. - * Returns - * Void. - * Throws - * An exception with the specified value when the assertion fails. - */ -#define bpf_assert_le_with(LHS, RHS, value) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, <=, RHS, value, false); \ - }) +#ifndef bpf_nop_mov +#define bpf_nop_mov(var) \ + asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var)) +#endif /* Description - * Assert that LHS is greater than or equal to RHS. This statement updates - * the known bounds of LHS during verification. Note that RHS must be a - * constant value, and must fit within the data type of LHS. + * Assert that a conditional expression is true. * Returns * Void. * Throws * An exception with the value zero when the assertion fails. */ -#define bpf_assert_ge(LHS, RHS) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, >=, RHS, 0, false); \ - }) +#define bpf_assert(cond) if (!(cond)) bpf_throw(0); /* Description - * Assert that LHS is greater than or equal to RHS. This statement updates - * the known bounds of LHS during verification. Note that RHS must be a - * constant value, and must fit within the data type of LHS. + * Assert that a conditional expression is true. * Returns * Void. * Throws * An exception with the specified value when the assertion fails. */ -#define bpf_assert_ge_with(LHS, RHS, value) \ - ({ \ - barrier_var(LHS); \ - __bpf_assert_op(LHS, >=, RHS, value, false); \ - }) +#define bpf_assert_with(cond, value) if (!(cond)) bpf_throw(value); /* Description * Assert that LHS is in the range [BEG, END] (inclusive of both). This diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 5ca68ff0b5..b4e78c1eb3 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -55,4 +55,14 @@ void *bpf_cast_to_kern_ctx(void *) __ksym; void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym; +extern int bpf_get_file_xattr(struct file *file, const char *name, + struct bpf_dynptr *value_ptr) __ksym; +extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym; + +extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; +extern void bpf_key_put(struct bpf_key *key) __ksym; +extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, + struct bpf_dynptr *sig_ptr, + struct bpf_key *trusted_keyring) __ksym; #endif diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 10b5f42e65..19be9c63d5 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -45,9 +45,12 @@ #define format_parent_cgroup_path(buf, path) \ format_cgroup_path_pid(buf, path, getppid()) -#define format_classid_path(buf) \ - snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ - CGROUP_WORK_DIR) +#define format_classid_path_pid(buf, pid) \ + snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH, \ + CGROUP_WORK_DIR, pid) + +#define format_classid_path(buf) \ + format_classid_path_pid(buf, getpid()) static __thread bool cgroup_workdir_mounted; @@ -419,26 +422,23 @@ int create_and_get_cgroup(const char *relative_path) } /** - * get_cgroup_id() - Get cgroup id for a particular cgroup path - * @relative_path: The cgroup path, relative to the workdir, to join + * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path + * @cgroup_workdir: The absolute cgroup path * * On success, it returns the cgroup id. On failure it returns 0, * which is an invalid cgroup id. * If there is a failure, it prints the error to stderr. */ -unsigned long long get_cgroup_id(const char *relative_path) +unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir) { int dirfd, err, flags, mount_id, fhsize; union { unsigned long long cgid; unsigned char raw_bytes[8]; } id; - char cgroup_workdir[PATH_MAX + 1]; struct file_handle *fhp, *fhp2; unsigned long long ret = 0; - format_cgroup_path(cgroup_workdir, relative_path); - dirfd = AT_FDCWD; flags = 0; fhsize = sizeof(*fhp); @@ -474,6 +474,14 @@ free_mem: return ret; } +unsigned long long get_cgroup_id(const char *relative_path) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_cgroup_path(cgroup_workdir, relative_path); + return get_cgroup_id_from_path(cgroup_workdir); +} + int cgroup_setup_and_join(const char *path) { int cg_fd; @@ -551,15 +559,16 @@ int setup_classid_environment(void) /** * set_classid() - Set a cgroupv1 net_cls classid - * @id: the numeric classid * - * Writes the passed classid into the cgroup work dir's net_cls.classid + * Writes the classid into the cgroup work dir's net_cls.classid * file in order to later on trigger socket tagging. * + * We leverage the current pid as the classid, ensuring unique identification. + * * On success, it returns 0, otherwise on failure it returns 1. If there * is a failure, it prints the error to stderr. */ -int set_classid(unsigned int id) +int set_classid(void) { char cgroup_workdir[PATH_MAX - 42]; char cgroup_classid_path[PATH_MAX + 1]; @@ -575,7 +584,7 @@ int set_classid(unsigned int id) return 1; } - if (dprintf(fd, "%u\n", id) < 0) { + if (dprintf(fd, "%u\n", getpid()) < 0) { log_err("Setting cgroup classid"); rc = 1; } @@ -617,3 +626,82 @@ void cleanup_classid_environment(void) join_cgroup_from_top(NETCLS_MOUNT_PATH); nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); } + +/** + * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup + */ +unsigned long long get_classid_cgroup_id(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + return get_cgroup_id_from_path(cgroup_workdir); +} + +/** + * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name. + * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be + * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-contollers like + * "net_cls,net_prio". + */ +int get_cgroup1_hierarchy_id(const char *subsys_name) +{ + char *c, *c2, *c3, *c4; + bool found = false; + char line[1024]; + FILE *file; + int i, id; + + if (!subsys_name) + return -1; + + file = fopen("/proc/self/cgroup", "r"); + if (!file) { + log_err("fopen /proc/self/cgroup"); + return -1; + } + + while (fgets(line, 1024, file)) { + i = 0; + for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) { + if (i == 0) { + id = strtol(c, NULL, 10); + } else if (i == 1) { + if (!strcmp(c, subsys_name)) { + found = true; + break; + } + + /* Multiple subsystems may share one single mount point */ + for (c3 = strtok_r(c, ",", &c4); c3; + c3 = strtok_r(NULL, ",", &c4)) { + if (!strcmp(c, subsys_name)) { + found = true; + break; + } + } + } + i++; + } + if (found) + break; + } + fclose(file); + return found ? id : -1; +} + +/** + * open_classid() - Open a cgroupv1 net_cls classid + * + * This function expects the cgroup work dir to be already created, as we + * open it here. + * + * On success, it returns the file descriptor. On failure it returns -1. + */ +int open_classid(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + return open(cgroup_workdir, O_RDONLY); +} diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 5c2cb9c8b5..502845160d 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -20,6 +20,7 @@ int get_root_cgroup(void); int create_and_get_cgroup(const char *relative_path); void remove_cgroup(const char *relative_path); unsigned long long get_cgroup_id(const char *relative_path); +int get_cgroup1_hierarchy_id(const char *subsys_name); int join_cgroup(const char *relative_path); int join_root_cgroup(void); @@ -29,8 +30,10 @@ int setup_cgroup_environment(void); void cleanup_cgroup_environment(void); /* cgroupv1 related */ -int set_classid(unsigned int id); +int set_classid(void); int join_classid(void); +unsigned long long get_classid_cgroup_id(void); +int open_classid(void); int setup_classid_environment(void); void cleanup_classid_environment(void); diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 3ec5927ec3..c125c441ab 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -23,6 +23,7 @@ CONFIG_FPROBE=y CONFIG_FTRACE_SYSCALLS=y CONFIG_FUNCTION_ERROR_INJECTION=y CONFIG_FUNCTION_TRACER=y +CONFIG_FS_VERITY=y CONFIG_GENEVE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -82,7 +83,7 @@ CONFIG_SECURITY=y CONFIG_SECURITYFS=y CONFIG_TEST_BPF=m CONFIG_USERFAULTFD=y +CONFIG_VSOCKETS=y CONFIG_VXLAN=y CONFIG_XDP_SOCKETS=y CONFIG_XFRM_INTERFACE=y -CONFIG_VSOCKETS=y diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 index 2538214948..3720b76115 100644 --- a/tools/testing/selftests/bpf/config.aarch64 +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -1,4 +1,3 @@ -CONFIG_9P_FS=y CONFIG_ARCH_VEXPRESS=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_ARM_SMMU_V3=y @@ -12,7 +11,6 @@ CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_SD=y CONFIG_BONDING=y -CONFIG_BPFILTER=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_JIT_DEFAULT_ON=y CONFIG_BPF_PRELOAD_UMD=y @@ -37,6 +35,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_INFO_REDUCED=n CONFIG_DEBUG_LIST=y CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_NOTIFIERS=y @@ -46,7 +45,6 @@ CONFIG_DEBUG_SG=y CONFIG_DETECT_HUNG_TASK=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_DEVTMPFS=y -CONFIG_DRM_VIRTIO_GPU=y CONFIG_DRM=y CONFIG_DUMMY=y CONFIG_EXPERT=y @@ -67,7 +65,6 @@ CONFIG_HAVE_KRETPROBES=y CONFIG_HEADERS_INSTALL=y CONFIG_HIGH_RES_TIMERS=y CONFIG_HUGETLBFS=y -CONFIG_HW_RANDOM_VIRTIO=y CONFIG_HW_RANDOM=y CONFIG_HZ_100=y CONFIG_IDLE_PAGE_TRACKING=y @@ -99,8 +96,6 @@ CONFIG_MEMCG=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_NAMESPACES=y -CONFIG_NET_9P_VIRTIO=y -CONFIG_NET_9P=y CONFIG_NET_ACT_BPF=y CONFIG_NET_ACT_GACT=y CONFIG_NETDEVICES=y @@ -140,7 +135,6 @@ CONFIG_SCHED_TRACER=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_LOGGING=y CONFIG_SCSI_SCAN_ASYNC=y -CONFIG_SCSI_VIRTIO=y CONFIG_SCSI=y CONFIG_SECURITY_NETWORK=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y @@ -167,16 +161,6 @@ CONFIG_UPROBES=y CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_CONSOLE=y -CONFIG_VIRTIO_FS=y -CONFIG_VIRTIO_INPUT=y -CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y -CONFIG_VIRTIO_MMIO=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index 2ba92167be..706931a8c2 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -1,4 +1,3 @@ -CONFIG_9P_FS=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_AUDIT=y CONFIG_BLK_CGROUP=y @@ -10,7 +9,6 @@ CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_JIT_DEFAULT_ON=y CONFIG_BPF_PRELOAD=y CONFIG_BPF_PRELOAD_UMD=y -CONFIG_BPFILTER=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_FREEZER=y @@ -84,8 +82,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_NAMESPACES=y CONFIG_NET=y -CONFIG_NET_9P=y -CONFIG_NET_9P_VIRTIO=y CONFIG_NET_ACT_BPF=y CONFIG_NET_ACT_GACT=y CONFIG_NET_KEY=y @@ -114,7 +110,6 @@ CONFIG_SAMPLE_SECCOMP=y CONFIG_SAMPLES=y CONFIG_SCHED_TRACER=y CONFIG_SCSI=y -CONFIG_SCSI_VIRTIO=y CONFIG_SECURITY_NETWORK=y CONFIG_STACK_TRACER=y CONFIG_STATIC_KEYS_SELFTEST=y @@ -136,11 +131,6 @@ CONFIG_UPROBES=y CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y diff --git a/tools/testing/selftests/bpf/config.vm b/tools/testing/selftests/bpf/config.vm new file mode 100644 index 0000000000..a9746ca787 --- /dev/null +++ b/tools/testing/selftests/bpf/config.vm @@ -0,0 +1,12 @@ +CONFIG_9P_FS=y +CONFIG_9P_FS_POSIX_ACL=y +CONFIG_9P_FS_SECURITY=y +CONFIG_CRYPTO_DEV_VIRTIO=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 49a29dbc19..5680befae8 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -1,6 +1,3 @@ -CONFIG_9P_FS=y -CONFIG_9P_FS_POSIX_ACL=y -CONFIG_9P_FS_SECURITY=y CONFIG_AGP=y CONFIG_AGP_AMD64=y CONFIG_AGP_INTEL=y @@ -22,7 +19,6 @@ CONFIG_BOOTTIME_TRACING=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_PRELOAD=y CONFIG_BPF_PRELOAD_UMD=y -CONFIG_BPFILTER=y CONFIG_BSD_DISKLABEL=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_CFS_BANDWIDTH=y @@ -45,7 +41,6 @@ CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y -CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y @@ -144,8 +139,6 @@ CONFIG_MEMORY_FAILURE=y CONFIG_MINIX_SUBPARTITION=y CONFIG_NAMESPACES=y CONFIG_NET=y -CONFIG_NET_9P=y -CONFIG_NET_9P_VIRTIO=y CONFIG_NET_ACT_BPF=y CONFIG_NET_CLS_CGROUP=y CONFIG_NET_EMATCH=y @@ -227,12 +220,6 @@ CONFIG_USER_NS=y CONFIG_VALIDATE_FS_PARSER=y CONFIG_VETH=y CONFIG_VIRT_DRIVERS=y -CONFIG_VIRTIO_BALLOON=y -CONFIG_VIRTIO_BLK=y -CONFIG_VIRTIO_CONSOLE=y -CONFIG_VIRTIO_NET=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_VSOCKETS_COMMON=y CONFIG_VLAN_8021Q=y CONFIG_VSOCKETS=y CONFIG_VSOCKETS_LOOPBACK=y diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c index 8bf497a984..2ea3640881 100644 --- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c +++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c @@ -131,10 +131,17 @@ static bool is_lru(__u32 map_type) map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; } +static bool is_percpu(__u32 map_type) +{ + return map_type == BPF_MAP_TYPE_PERCPU_HASH || + map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; +} + struct upsert_opts { __u32 map_type; int map_fd; __u32 n; + bool retry_for_nomem; }; static int create_small_hash(void) @@ -148,19 +155,38 @@ static int create_small_hash(void) return map_fd; } +static bool retry_for_nomem_fn(int err) +{ + return err == ENOMEM; +} + static void *patch_map_thread(void *arg) { + /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */ + static __u8 blob[8 << 10]; struct upsert_opts *opts = arg; + void *val_ptr; int val; int ret; int i; for (i = 0; i < opts->n; i++) { - if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) + if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { val = create_small_hash(); - else + val_ptr = &val; + } else if (is_percpu(opts->map_type)) { + val_ptr = blob; + } else { val = rand(); - ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0); + val_ptr = &val; + } + + /* 2 seconds may be enough ? */ + if (opts->retry_for_nomem) + ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0, + 40, retry_for_nomem_fn); + else + ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0); CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno)); if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) @@ -281,6 +307,13 @@ static void __test(int map_fd) else opts.n /= 2; + /* per-cpu bpf memory allocator may not be able to allocate per-cpu + * pointer successfully and it can not refill free llist timely, and + * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate + * the problem temporarily. + */ + opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC); + /* * Upsert keys [0, n) under some competition: with random values from * N_THREADS threads. Check values, then delete all elements and check diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 34f1200a78..94b9be24e3 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -71,4 +71,47 @@ struct nstoken; */ struct nstoken *open_netns(const char *name); void close_netns(struct nstoken *token); + +static __u16 csum_fold(__u32 csum) +{ + csum = (csum & 0xffff) + (csum >> 16); + csum = (csum & 0xffff) + (csum >> 16); + + return (__u16)~csum; +} + +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, + __wsum csum) +{ + __u64 s = csum; + + s += (__u32)saddr; + s += (__u32)daddr; + s += htons(proto + len); + s = (s & 0xffffffff) + (s >> 32); + s = (s & 0xffffffff) + (s >> 32); + + return csum_fold((__u32)s); +} + +static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, + __wsum csum) +{ + __u64 s = csum; + int i; + + for (i = 0; i < 4; i++) + s += (__u32)saddr->s6_addr32[i]; + for (i = 0; i < 4; i++) + s += (__u32)daddr->s6_addr32[i]; + s += htons(proto + len); + s = (s & 0xffffffff) + (s >> 32); + s = (s & 0xffffffff) + (s >> 32); + + return csum_fold((__u32)s); +} + #endif diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 465c1c3a3d..4ebd0da898 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -40,7 +40,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "2"}, {1, "R3_w", "4"}, @@ -68,7 +68,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "1"}, {1, "R3_w", "2"}, @@ -97,7 +97,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "4"}, {1, "R3_w", "8"}, @@ -119,7 +119,7 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {0, "R1", "ctx(off=0,imm=0)"}, + {0, "R1", "ctx()"}, {0, "R10", "fp0"}, {0, "R3_w", "7"}, {1, "R3_w", "7"}, @@ -162,13 +162,13 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {6, "R0_w", "pkt(off=8,r=8,imm=0)"}, + {6, "R0_w", "pkt(off=8,r=8)"}, {6, "R3_w", "var_off=(0x0; 0xff)"}, {7, "R3_w", "var_off=(0x0; 0x1fe)"}, {8, "R3_w", "var_off=(0x0; 0x3fc)"}, {9, "R3_w", "var_off=(0x0; 0x7f8)"}, {10, "R3_w", "var_off=(0x0; 0xff0)"}, - {12, "R3_w", "pkt_end(off=0,imm=0)"}, + {12, "R3_w", "pkt_end()"}, {17, "R4_w", "var_off=(0x0; 0xff)"}, {18, "R4_w", "var_off=(0x0; 0x1fe0)"}, {19, "R4_w", "var_off=(0x0; 0xff0)"}, @@ -235,11 +235,11 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {2, "R5_w", "pkt(off=0,r=0,imm=0)"}, - {4, "R5_w", "pkt(off=14,r=0,imm=0)"}, - {5, "R4_w", "pkt(off=14,r=0,imm=0)"}, - {9, "R2", "pkt(off=0,r=18,imm=0)"}, - {10, "R5", "pkt(off=14,r=18,imm=0)"}, + {2, "R5_w", "pkt(r=0)"}, + {4, "R5_w", "pkt(off=14,r=0)"}, + {5, "R4_w", "pkt(off=14,r=0)"}, + {9, "R2", "pkt(r=18)"}, + {10, "R5", "pkt(off=14,r=18)"}, {10, "R4_w", "var_off=(0x0; 0xff)"}, {13, "R4_w", "var_off=(0x0; 0xffff)"}, {14, "R4_w", "var_off=(0x0; 0xffff)"}, @@ -299,7 +299,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {7, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. @@ -337,7 +337,7 @@ static struct bpf_align_test tests[] = { /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5_w", "pkt(off=14,r=8,"}, + {26, "R5_w", "pkt(off=14,r=8)"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). See comment for insn #18 * for R4 = R5 trick. @@ -397,7 +397,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {7, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ {8, "R6_w", "var_off=(0x2; 0x7fc)"}, @@ -459,7 +459,7 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {3, "R5_w", "pkt_end(off=0,imm=0)"}, + {3, "R5_w", "pkt_end()"}, /* (ptr - ptr) << 2 == unknown, (4n) */ {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"}, /* (4n) + 14 == (4n+2). We blow our bounds, because @@ -513,7 +513,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {8, "R6_w", "var_off=(0x0; 0x3fc)"}, /* Adding 14 makes R6 be (4n+2) */ {9, "R6_w", "var_off=(0x2; 0x7fc)"}, @@ -566,7 +566,7 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - {6, "R2_w", "pkt(off=0,r=8,imm=0)"}, + {6, "R2_w", "pkt(r=8)"}, {9, "R6_w", "var_off=(0x0; 0x3c)"}, /* Adding 14 makes R6 be (4n+2) */ {10, "R6_w", "var_off=(0x2; 0x7c)"}, @@ -659,14 +659,14 @@ static int do_test_single(struct bpf_align_test *test) /* Check the next line as well in case the previous line * did not have a corresponding bpf insn. Example: * func#0 @0 - * 0: R1=ctx(off=0,imm=0) R10=fp0 + * 0: R1=ctx() R10=fp0 * 0: (b7) r3 = 2 ; R3_w=2 * * Sometimes it's actually two lines below, e.g. when * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))": - * from 4 to 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 - * 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0 - * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff)) + * from 4 to 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0 + * 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0 + * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(r=8) R3_w=scalar(umax=255,var_off=(0x0; 0xff)) */ while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) { cur_line = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c index a1766a298b..f7cd129cb8 100644 --- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c +++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c @@ -9,8 +9,6 @@ #include "cap_helpers.h" #include "bind_perm.skel.h" -static int duration; - static int create_netns(void) { if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) @@ -27,7 +25,7 @@ void try_bind(int family, int port, int expected_errno) int fd = -1; fd = socket(family, SOCK_STREAM, 0); - if (CHECK(fd < 0, "fd", "errno %d", errno)) + if (!ASSERT_GE(fd, 0, "socket")) goto close_socket; if (family == AF_INET) { @@ -60,7 +58,7 @@ void test_bind_perm(void) return; cgroup_fd = test__join_cgroup("/bind_perm"); - if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno)) + if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) return; skel = bind_perm__open_and_load(); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 2cacc8fa96..618af9dfae 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -34,8 +34,6 @@ #include "bpf_iter_ksym.skel.h" #include "bpf_iter_sockmap.skel.h" -static int duration; - static void test_btf_id_or_null(void) { struct bpf_iter_test_kern3 *skel; @@ -64,7 +62,7 @@ static void do_dummy_read_opts(struct bpf_program *prog, struct bpf_iter_attach_ /* not check contents, but ensure read() ends without error */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)); + ASSERT_GE(len, 0, "read"); close(iter_fd); @@ -415,7 +413,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel) goto free_link; } - if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(err, 0, "read")) goto free_link; ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)", @@ -528,11 +526,11 @@ static int do_read_with_fd(int iter_fd, const char *expected, start = 0; while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) { start += len; - if (CHECK(start >= 16, "read", "read len %d\n", len)) + if (!ASSERT_LT(start, 16, "read")) return -1; read_buf_len = read_one_char ? 1 : 16 - start; } - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) return -1; if (!ASSERT_STREQ(buf, expected, "read")) @@ -573,8 +571,7 @@ static int do_read(const char *path, const char *expected) int err, iter_fd; iter_fd = open(path, O_RDONLY); - if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n", - path, strerror(errno))) + if (!ASSERT_GE(iter_fd, 0, "open")) return -1; err = do_read_with_fd(iter_fd, expected, false); @@ -602,7 +599,7 @@ static void test_file_iter(void) unlink(path); err = bpf_link__pin(link, path); - if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err)) + if (!ASSERT_OK(err, "pin_iter")) goto free_link; err = do_read(path, "abcd"); @@ -653,12 +650,10 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) * overflow and needs restart. */ map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); - if (CHECK(map1_fd < 0, "bpf_map_create", - "map_creation failed: %s\n", strerror(errno))) + if (!ASSERT_GE(map1_fd, 0, "bpf_map_create")) goto out; map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL); - if (CHECK(map2_fd < 0, "bpf_map_create", - "map_creation failed: %s\n", strerror(errno))) + if (!ASSERT_GE(map2_fd, 0, "bpf_map_create")) goto free_map1; /* bpf_seq_printf kernel buffer is 8 pages, so one map @@ -687,14 +682,12 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) /* setup filtering map_id in bpf program */ map_info_len = sizeof(map_info); err = bpf_map_get_info_by_fd(map1_fd, &map_info, &map_info_len); - if (CHECK(err, "get_map_info", "get map info failed: %s\n", - strerror(errno))) + if (!ASSERT_OK(err, "get_map_info")) goto free_map2; skel->bss->map1_id = map_info.id; err = bpf_map_get_info_by_fd(map2_fd, &map_info, &map_info_len); - if (CHECK(err, "get_map_info", "get map info failed: %s\n", - strerror(errno))) + if (!ASSERT_OK(err, "get_map_info")) goto free_map2; skel->bss->map2_id = map_info.id; @@ -707,7 +700,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) goto free_link; buf = malloc(expected_read_len); - if (!buf) + if (!ASSERT_OK_PTR(buf, "malloc")) goto close_iter; /* do read */ @@ -716,16 +709,14 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) while ((len = read(iter_fd, buf, expected_read_len)) > 0) total_read_len += len; - CHECK(len != -1 || errno != E2BIG, "read", - "expected ret -1, errno E2BIG, but get ret %d, error %s\n", - len, strerror(errno)); + ASSERT_EQ(len, -1, "read"); + ASSERT_EQ(errno, E2BIG, "read"); goto free_buf; } else if (!ret1) { while ((len = read(iter_fd, buf, expected_read_len)) > 0) total_read_len += len; - if (CHECK(len < 0, "read", "read failed: %s\n", - strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto free_buf; } else { do { @@ -734,8 +725,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) total_read_len += len; } while (len > 0 || len == -EAGAIN); - if (CHECK(len < 0, "read", "read failed: %s\n", - strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto free_buf; } @@ -838,7 +828,7 @@ static void test_bpf_hash_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ @@ -880,6 +870,8 @@ static void test_bpf_percpu_hash_map(void) skel->rodata->num_cpus = bpf_num_possible_cpus(); val = malloc(8 * bpf_num_possible_cpus()); + if (!ASSERT_OK_PTR(val, "malloc")) + goto out; err = bpf_iter_bpf_percpu_hash_map__load(skel); if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load")) @@ -919,7 +911,7 @@ static void test_bpf_percpu_hash_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ @@ -985,17 +977,14 @@ static void test_bpf_array_map(void) start = 0; while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0) start += len; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ res_first_key = *(__u32 *)buf; res_first_val = *(__u64 *)(buf + sizeof(__u32)); - if (CHECK(res_first_key != 0 || res_first_val != first_val, - "bpf_seq_write", - "seq_write failure: first key %u vs expected 0, " - " first value %llu vs expected %llu\n", - res_first_key, res_first_val, first_val)) + if (!ASSERT_EQ(res_first_key, 0, "bpf_seq_write") || + !ASSERT_EQ(res_first_val, first_val, "bpf_seq_write")) goto close_iter; if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum")) @@ -1059,6 +1048,8 @@ static void test_bpf_percpu_array_map(void) skel->rodata->num_cpus = bpf_num_possible_cpus(); val = malloc(8 * bpf_num_possible_cpus()); + if (!ASSERT_OK_PTR(val, "malloc")) + goto out; err = bpf_iter_bpf_percpu_array_map__load(skel); if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load")) @@ -1094,7 +1085,7 @@ static void test_bpf_percpu_array_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ @@ -1133,6 +1124,7 @@ static void test_bpf_sk_storage_delete(void) sock_fd = socket(AF_INET6, SOCK_STREAM, 0); if (!ASSERT_GE(sock_fd, 0, "socket")) goto out; + err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST); if (!ASSERT_OK(err, "map_update")) goto out; @@ -1153,14 +1145,19 @@ static void test_bpf_sk_storage_delete(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem", - "map value wasn't deleted (err=%d, errno=%d)\n", err, errno)) - goto close_iter; + + /* Note: The following assertions serve to ensure + * the value was deleted. It does so by asserting + * that bpf_map_lookup_elem has failed. This might + * seem counterintuitive at first. + */ + ASSERT_ERR(err, "bpf_map_lookup_elem"); + ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem"); close_iter: close(iter_fd); @@ -1205,17 +1202,15 @@ static void test_bpf_sk_storage_get(void) do_dummy_read(skel->progs.fill_socket_owner); err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - if (CHECK(err || val != getpid(), "bpf_map_lookup_elem", - "map value wasn't set correctly (expected %d, got %d, err=%d)\n", - getpid(), val, err)) + if (!ASSERT_OK(err, "bpf_map_lookup_elem") || + !ASSERT_EQ(val, getpid(), "bpf_map_lookup_elem")) goto close_socket; do_dummy_read(skel->progs.negate_socket_local_storage); err = bpf_map_lookup_elem(map_fd, &sock_fd, &val); - CHECK(err || val != -getpid(), "bpf_map_lookup_elem", - "map value wasn't set correctly (expected %d, got %d, err=%d)\n", - -getpid(), val, err); + ASSERT_OK(err, "bpf_map_lookup_elem"); + ASSERT_EQ(val, -getpid(), "bpf_map_lookup_elem"); close_socket: close(sock_fd); @@ -1292,7 +1287,7 @@ static void test_bpf_sk_storage_map(void) /* do some tests */ while ((len = read(iter_fd, buf, sizeof(buf))) > 0) ; - if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + if (!ASSERT_GE(len, 0, "read")) goto close_iter; /* test results */ diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c index 675b90b152..f09d6ac2ef 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c @@ -25,7 +25,7 @@ void serial_test_bpf_obj_id(void) */ __u32 map_ids[nr_iters + 1]; char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128]; - __u32 i, next_id, info_len, nr_id_found, duration = 0; + __u32 i, next_id, info_len, nr_id_found; struct timespec real_time_ts, boot_time_ts; int err = 0; __u64 array_value; @@ -33,16 +33,16 @@ void serial_test_bpf_obj_id(void) time_t now, load_time; err = bpf_prog_get_fd_by_id(0); - CHECK(err >= 0 || errno != ENOENT, - "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno); + ASSERT_LT(err, 0, "bpf_prog_get_fd_by_id"); + ASSERT_EQ(errno, ENOENT, "bpf_prog_get_fd_by_id"); err = bpf_map_get_fd_by_id(0); - CHECK(err >= 0 || errno != ENOENT, - "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno); + ASSERT_LT(err, 0, "bpf_map_get_fd_by_id"); + ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id"); err = bpf_link_get_fd_by_id(0); - CHECK(err >= 0 || errno != ENOENT, - "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno); + ASSERT_LT(err, 0, "bpf_map_get_fd_by_id"); + ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id"); /* Check bpf_map_get_info_by_fd() */ bzero(zeros, sizeof(zeros)); @@ -53,25 +53,26 @@ void serial_test_bpf_obj_id(void) /* test_obj_id.o is a dumb prog. It should never fail * to load. */ - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_prog_test_load")) continue; /* Insert a magic value to the map */ map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id"); - if (CHECK_FAIL(map_fds[i] < 0)) + if (!ASSERT_GE(map_fds[i], 0, "bpf_find_map")) goto done; + err = bpf_map_update_elem(map_fds[i], &array_key, &array_magic_value, 0); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_map_update_elem")) goto done; - prog = bpf_object__find_program_by_name(objs[i], - "test_obj_id"); - if (CHECK_FAIL(!prog)) + prog = bpf_object__find_program_by_name(objs[i], "test_obj_id"); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) goto done; + links[i] = bpf_program__attach(prog); err = libbpf_get_error(links[i]); - if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) { + if (!ASSERT_OK(err, "bpf_program__attach")) { links[i] = NULL; goto done; } @@ -81,24 +82,14 @@ void serial_test_bpf_obj_id(void) bzero(&map_infos[i], info_len); err = bpf_map_get_info_by_fd(map_fds[i], &map_infos[i], &info_len); - if (CHECK(err || - map_infos[i].type != BPF_MAP_TYPE_ARRAY || - map_infos[i].key_size != sizeof(__u32) || - map_infos[i].value_size != sizeof(__u64) || - map_infos[i].max_entries != 1 || - map_infos[i].map_flags != 0 || - info_len != sizeof(struct bpf_map_info) || - strcmp((char *)map_infos[i].name, expected_map_name), - "get-map-info(fd)", - "err %d errno %d type %d(%d) info_len %u(%zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n", - err, errno, - map_infos[i].type, BPF_MAP_TYPE_ARRAY, - info_len, sizeof(struct bpf_map_info), - map_infos[i].key_size, - map_infos[i].value_size, - map_infos[i].max_entries, - map_infos[i].map_flags, - map_infos[i].name, expected_map_name)) + if (!ASSERT_OK(err, "bpf_map_get_info_by_fd") || + !ASSERT_EQ(map_infos[i].type, BPF_MAP_TYPE_ARRAY, "map_type") || + !ASSERT_EQ(map_infos[i].key_size, sizeof(__u32), "key_size") || + !ASSERT_EQ(map_infos[i].value_size, sizeof(__u64), "value_size") || + !ASSERT_EQ(map_infos[i].max_entries, 1, "max_entries") || + !ASSERT_EQ(map_infos[i].map_flags, 0, "map_flags") || + !ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "map_info_len") || + !ASSERT_STREQ((char *)map_infos[i].name, expected_map_name, "map_name")) goto done; /* Check getting prog info */ @@ -112,48 +103,34 @@ void serial_test_bpf_obj_id(void) prog_infos[i].xlated_prog_len = sizeof(xlated_insns); prog_infos[i].map_ids = ptr_to_u64(map_ids + i); prog_infos[i].nr_map_ids = 2; + err = clock_gettime(CLOCK_REALTIME, &real_time_ts); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "clock_gettime")) goto done; + err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "clock_gettime")) goto done; + err = bpf_prog_get_info_by_fd(prog_fds[i], &prog_infos[i], &info_len); load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec) + (prog_infos[i].load_time / nsec_per_sec); - if (CHECK(err || - prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT || - info_len != sizeof(struct bpf_prog_info) || - (env.jit_enabled && !prog_infos[i].jited_prog_len) || - (env.jit_enabled && - !memcmp(jited_insns, zeros, sizeof(zeros))) || - !prog_infos[i].xlated_prog_len || - !memcmp(xlated_insns, zeros, sizeof(zeros)) || - load_time < now - 60 || load_time > now + 60 || - prog_infos[i].created_by_uid != my_uid || - prog_infos[i].nr_map_ids != 1 || - *(int *)(long)prog_infos[i].map_ids != map_infos[i].id || - strcmp((char *)prog_infos[i].name, expected_prog_name), - "get-prog-info(fd)", - "err %d errno %d i %d type %d(%d) info_len %u(%zu) " - "jit_enabled %d jited_prog_len %u xlated_prog_len %u " - "jited_prog %d xlated_prog %d load_time %lu(%lu) " - "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) " - "name %s(%s)\n", - err, errno, i, - prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, - info_len, sizeof(struct bpf_prog_info), - env.jit_enabled, - prog_infos[i].jited_prog_len, - prog_infos[i].xlated_prog_len, - !!memcmp(jited_insns, zeros, sizeof(zeros)), - !!memcmp(xlated_insns, zeros, sizeof(zeros)), - load_time, now, - prog_infos[i].created_by_uid, my_uid, - prog_infos[i].nr_map_ids, 1, - *(int *)(long)prog_infos[i].map_ids, map_infos[i].id, - prog_infos[i].name, expected_prog_name)) + + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") || + !ASSERT_EQ(prog_infos[i].type, BPF_PROG_TYPE_RAW_TRACEPOINT, "prog_type") || + !ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len") || + !ASSERT_FALSE((env.jit_enabled && !prog_infos[i].jited_prog_len), "jited_prog_len") || + !ASSERT_FALSE((env.jit_enabled && !memcmp(jited_insns, zeros, sizeof(zeros))), + "jited_insns") || + !ASSERT_NEQ(prog_infos[i].xlated_prog_len, 0, "xlated_prog_len") || + !ASSERT_NEQ(memcmp(xlated_insns, zeros, sizeof(zeros)), 0, "xlated_insns") || + !ASSERT_GE(load_time, (now - 60), "load_time") || + !ASSERT_LE(load_time, (now + 60), "load_time") || + !ASSERT_EQ(prog_infos[i].created_by_uid, my_uid, "created_by_uid") || + !ASSERT_EQ(prog_infos[i].nr_map_ids, 1, "nr_map_ids") || + !ASSERT_EQ(*(int *)(long)prog_infos[i].map_ids, map_infos[i].id, "map_ids") || + !ASSERT_STREQ((char *)prog_infos[i].name, expected_prog_name, "prog_name")) goto done; /* Check getting link info */ @@ -163,25 +140,12 @@ void serial_test_bpf_obj_id(void) link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name); err = bpf_link_get_info_by_fd(bpf_link__fd(links[i]), &link_infos[i], &info_len); - if (CHECK(err || - link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT || - link_infos[i].prog_id != prog_infos[i].id || - link_infos[i].raw_tracepoint.tp_name != ptr_to_u64(&tp_name) || - strcmp(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), - "sys_enter") || - info_len != sizeof(struct bpf_link_info), - "get-link-info(fd)", - "err %d errno %d info_len %u(%zu) type %d(%d) id %d " - "prog_id %d (%d) tp_name %s(%s)\n", - err, errno, - info_len, sizeof(struct bpf_link_info), - link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, - link_infos[i].id, - link_infos[i].prog_id, prog_infos[i].id, - (const char *)u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), - "sys_enter")) + if (!ASSERT_OK(err, "bpf_link_get_info_by_fd") || + !ASSERT_EQ(link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type") || + !ASSERT_EQ(link_infos[i].prog_id, prog_infos[i].id, "prog_id") || + !ASSERT_EQ(link_infos[i].raw_tracepoint.tp_name, ptr_to_u64(&tp_name), "&tp_name") || + !ASSERT_STREQ(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), "sys_enter", "tp_name")) goto done; - } /* Check bpf_prog_get_next_id() */ @@ -190,7 +154,7 @@ void serial_test_bpf_obj_id(void) while (!bpf_prog_get_next_id(next_id, &next_id)) { struct bpf_prog_info prog_info = {}; __u32 saved_map_id; - int prog_fd; + int prog_fd, cmp_res; info_len = sizeof(prog_info); @@ -198,9 +162,7 @@ void serial_test_bpf_obj_id(void) if (prog_fd < 0 && errno == ENOENT) /* The bpf_prog is in the dead row */ continue; - if (CHECK(prog_fd < 0, "get-prog-fd(next_id)", - "prog_fd %d next_id %d errno %d\n", - prog_fd, next_id, errno)) + if (!ASSERT_GE(prog_fd, 0, "bpf_prog_get_fd_by_id")) break; for (i = 0; i < nr_iters; i++) @@ -218,9 +180,8 @@ void serial_test_bpf_obj_id(void) */ prog_info.nr_map_ids = 1; err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len); - if (CHECK(!err || errno != EFAULT, - "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)", - err, errno, EFAULT)) + if (!ASSERT_ERR(err, "bpf_prog_get_info_by_fd") || + !ASSERT_EQ(errno, EFAULT, "bpf_prog_get_info_by_fd")) break; bzero(&prog_info, sizeof(prog_info)); info_len = sizeof(prog_info); @@ -231,27 +192,22 @@ void serial_test_bpf_obj_id(void) err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len); prog_infos[i].jited_prog_insns = 0; prog_infos[i].xlated_prog_insns = 0; - CHECK(err || info_len != sizeof(struct bpf_prog_info) || - memcmp(&prog_info, &prog_infos[i], info_len) || - *(int *)(long)prog_info.map_ids != saved_map_id, - "get-prog-info(next_id->fd)", - "err %d errno %d info_len %u(%zu) memcmp %d map_id %u(%u)\n", - err, errno, info_len, sizeof(struct bpf_prog_info), - memcmp(&prog_info, &prog_infos[i], info_len), - *(int *)(long)prog_info.map_ids, saved_map_id); + cmp_res = memcmp(&prog_info, &prog_infos[i], info_len); + + ASSERT_OK(err, "bpf_prog_get_info_by_fd"); + ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len"); + ASSERT_OK(cmp_res, "memcmp"); + ASSERT_EQ(*(int *)(long)prog_info.map_ids, saved_map_id, "map_id"); close(prog_fd); } - CHECK(nr_id_found != nr_iters, - "check total prog id found by get_next_id", - "nr_id_found %u(%u)\n", - nr_id_found, nr_iters); + ASSERT_EQ(nr_id_found, nr_iters, "prog_nr_id_found"); /* Check bpf_map_get_next_id() */ nr_id_found = 0; next_id = 0; while (!bpf_map_get_next_id(next_id, &next_id)) { struct bpf_map_info map_info = {}; - int map_fd; + int map_fd, cmp_res; info_len = sizeof(map_info); @@ -259,9 +215,7 @@ void serial_test_bpf_obj_id(void) if (map_fd < 0 && errno == ENOENT) /* The bpf_map is in the dead row */ continue; - if (CHECK(map_fd < 0, "get-map-fd(next_id)", - "map_fd %d next_id %u errno %d\n", - map_fd, next_id, errno)) + if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id")) break; for (i = 0; i < nr_iters; i++) @@ -274,25 +228,19 @@ void serial_test_bpf_obj_id(void) nr_id_found++; err = bpf_map_lookup_elem(map_fd, &array_key, &array_value); - if (CHECK_FAIL(err)) + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) goto done; err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len); - CHECK(err || info_len != sizeof(struct bpf_map_info) || - memcmp(&map_info, &map_infos[i], info_len) || - array_value != array_magic_value, - "check get-map-info(next_id->fd)", - "err %d errno %d info_len %u(%zu) memcmp %d array_value %llu(%llu)\n", - err, errno, info_len, sizeof(struct bpf_map_info), - memcmp(&map_info, &map_infos[i], info_len), - array_value, array_magic_value); + cmp_res = memcmp(&map_info, &map_infos[i], info_len); + ASSERT_OK(err, "bpf_map_get_info_by_fd"); + ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "info_len"); + ASSERT_OK(cmp_res, "memcmp"); + ASSERT_EQ(array_value, array_magic_value, "array_value"); close(map_fd); } - CHECK(nr_id_found != nr_iters, - "check total map id found by get_next_id", - "nr_id_found %u(%u)\n", - nr_id_found, nr_iters); + ASSERT_EQ(nr_id_found, nr_iters, "map_nr_id_found"); /* Check bpf_link_get_next_id() */ nr_id_found = 0; @@ -308,9 +256,7 @@ void serial_test_bpf_obj_id(void) if (link_fd < 0 && errno == ENOENT) /* The bpf_link is in the dead row */ continue; - if (CHECK(link_fd < 0, "get-link-fd(next_id)", - "link_fd %d next_id %u errno %d\n", - link_fd, next_id, errno)) + if (!ASSERT_GE(link_fd, 0, "bpf_link_get_fd_by_id")) break; for (i = 0; i < nr_iters; i++) @@ -325,17 +271,13 @@ void serial_test_bpf_obj_id(void) err = bpf_link_get_info_by_fd(link_fd, &link_info, &info_len); cmp_res = memcmp(&link_info, &link_infos[i], offsetof(struct bpf_link_info, raw_tracepoint)); - CHECK(err || info_len != sizeof(link_info) || cmp_res, - "check get-link-info(next_id->fd)", - "err %d errno %d info_len %u(%zu) memcmp %d\n", - err, errno, info_len, sizeof(struct bpf_link_info), - cmp_res); + ASSERT_OK(err, "bpf_link_get_info_by_fd"); + ASSERT_EQ(info_len, sizeof(link_info), "info_len"); + ASSERT_OK(cmp_res, "memcmp"); close(link_fd); } - CHECK(nr_id_found != nr_iters, - "check total link id found by get_next_id", - "nr_id_found %u(%u)\n", nr_id_found, nr_iters); + ASSERT_EQ(nr_id_found, nr_iters, "link_nr_id_found"); done: for (i = 0; i < nr_iters; i++) { diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 4aabeaa525..a88e6e07e4 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -20,15 +20,14 @@ static const unsigned int total_bytes = 10 * 1024 * 1024; static int expected_stg = 0xeB9F; -static int stop, duration; +static int stop; static int settcpca(int fd, const char *tcp_ca) { int err; err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca)); - if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n", - errno)) + if (!ASSERT_NEQ(err, -1, "setsockopt")) return -1; return 0; @@ -65,8 +64,7 @@ static void *server(void *arg) bytes += nr_sent; } - CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n", - bytes, total_bytes, nr_sent, errno); + ASSERT_EQ(bytes, total_bytes, "send"); done: if (fd >= 0) @@ -92,10 +90,11 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) WRITE_ONCE(stop, 0); lfd = socket(AF_INET6, SOCK_STREAM, 0); - if (CHECK(lfd == -1, "socket", "errno:%d\n", errno)) + if (!ASSERT_NEQ(lfd, -1, "socket")) return; + fd = socket(AF_INET6, SOCK_STREAM, 0); - if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) { + if (!ASSERT_NEQ(fd, -1, "socket")) { close(lfd); return; } @@ -108,26 +107,27 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) sa6.sin6_family = AF_INET6; sa6.sin6_addr = in6addr_loopback; err = bind(lfd, (struct sockaddr *)&sa6, addrlen); - if (CHECK(err == -1, "bind", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "bind")) goto done; + err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen); - if (CHECK(err == -1, "getsockname", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "getsockname")) goto done; + err = listen(lfd, 1); - if (CHECK(err == -1, "listen", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "listen")) goto done; if (sk_stg_map) { err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd, &expected_stg, BPF_NOEXIST); - if (CHECK(err, "bpf_map_update_elem(sk_stg_map)", - "err:%d errno:%d\n", err, errno)) + if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)")) goto done; } /* connect to server */ err = connect(fd, (struct sockaddr *)&sa6, addrlen); - if (CHECK(err == -1, "connect", "errno:%d\n", errno)) + if (!ASSERT_NEQ(err, -1, "connect")) goto done; if (sk_stg_map) { @@ -135,14 +135,13 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd, &tmp_stg); - if (CHECK(!err || errno != ENOENT, - "bpf_map_lookup_elem(sk_stg_map)", - "err:%d errno:%d\n", err, errno)) + if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") || + !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)")) goto done; } err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd); - if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno)) + if (!ASSERT_OK(err, "pthread_create")) goto done; /* recv total_bytes */ @@ -156,13 +155,12 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map) bytes += nr_recv; } - CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n", - bytes, total_bytes, nr_recv, errno); + ASSERT_EQ(bytes, total_bytes, "recv"); WRITE_ONCE(stop, 1); pthread_join(srv_thread, &thread_ret); - CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld", - PTR_ERR(thread_ret)); + ASSERT_OK(IS_ERR(thread_ret), "thread_ret"); + done: close(lfd); close(fd); @@ -174,7 +172,7 @@ static void test_cubic(void) struct bpf_link *link; cubic_skel = bpf_cubic__open_and_load(); - if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n")) + if (!ASSERT_OK_PTR(cubic_skel, "bpf_cubic__open_and_load")) return; link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic); @@ -197,7 +195,7 @@ static void test_dctcp(void) struct bpf_link *link; dctcp_skel = bpf_dctcp__open_and_load(); - if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n")) + if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load")) return; link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp); @@ -207,9 +205,7 @@ static void test_dctcp(void) } do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map); - CHECK(dctcp_skel->bss->stg_result != expected_stg, - "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n", - dctcp_skel->bss->stg_result, expected_stg); + ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result"); bpf_link__destroy(link); bpf_dctcp__destroy(dctcp_skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index 731c343897..e770912fc1 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -35,7 +35,7 @@ static int check_load(const char *file, enum bpf_prog_type type) } bpf_program__set_type(prog, type); - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); + bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS); bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags); err = bpf_object__load(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c new file mode 100644 index 0000000000..74d6d7546f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */ + +#include <sys/types.h> +#include <unistd.h> +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "test_cgroup1_hierarchy.skel.h" + +static void bpf_cgroup1(struct test_cgroup1_hierarchy *skel) +{ + struct bpf_link *lsm_link, *fentry_link; + int err; + + /* Attach LSM prog first */ + lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run); + if (!ASSERT_OK_PTR(lsm_link, "lsm_attach")) + return; + + /* LSM prog will be triggered when attaching fentry */ + fentry_link = bpf_program__attach_trace(skel->progs.fentry_run); + ASSERT_NULL(fentry_link, "fentry_attach_fail"); + + err = bpf_link__destroy(lsm_link); + ASSERT_OK(err, "destroy_lsm"); +} + +static void bpf_cgroup1_sleepable(struct test_cgroup1_hierarchy *skel) +{ + struct bpf_link *lsm_link, *fentry_link; + int err; + + /* Attach LSM prog first */ + lsm_link = bpf_program__attach_lsm(skel->progs.lsm_s_run); + if (!ASSERT_OK_PTR(lsm_link, "lsm_attach")) + return; + + /* LSM prog will be triggered when attaching fentry */ + fentry_link = bpf_program__attach_trace(skel->progs.fentry_run); + ASSERT_NULL(fentry_link, "fentry_attach_fail"); + + err = bpf_link__destroy(lsm_link); + ASSERT_OK(err, "destroy_lsm"); +} + +static void bpf_cgroup1_invalid_id(struct test_cgroup1_hierarchy *skel) +{ + struct bpf_link *lsm_link, *fentry_link; + int err; + + /* Attach LSM prog first */ + lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run); + if (!ASSERT_OK_PTR(lsm_link, "lsm_attach")) + return; + + /* LSM prog will be triggered when attaching fentry */ + fentry_link = bpf_program__attach_trace(skel->progs.fentry_run); + if (!ASSERT_OK_PTR(fentry_link, "fentry_attach_success")) + goto cleanup; + + err = bpf_link__destroy(fentry_link); + ASSERT_OK(err, "destroy_lsm"); + +cleanup: + err = bpf_link__destroy(lsm_link); + ASSERT_OK(err, "destroy_fentry"); +} + +void test_cgroup1_hierarchy(void) +{ + struct test_cgroup1_hierarchy *skel; + __u64 current_cgid; + int hid, err; + + skel = test_cgroup1_hierarchy__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + skel->bss->target_pid = getpid(); + + err = bpf_program__set_attach_target(skel->progs.fentry_run, 0, "bpf_fentry_test1"); + if (!ASSERT_OK(err, "fentry_set_target")) + goto destroy; + + err = test_cgroup1_hierarchy__load(skel); + if (!ASSERT_OK(err, "load")) + goto destroy; + + /* Setup cgroup1 hierarchy */ + err = setup_classid_environment(); + if (!ASSERT_OK(err, "setup_classid_environment")) + goto destroy; + + err = join_classid(); + if (!ASSERT_OK(err, "join_cgroup1")) + goto cleanup; + + current_cgid = get_classid_cgroup_id(); + if (!ASSERT_GE(current_cgid, 0, "cgroup1 id")) + goto cleanup; + + hid = get_cgroup1_hierarchy_id("net_cls"); + if (!ASSERT_GE(hid, 0, "cgroup1 id")) + goto cleanup; + skel->bss->target_hid = hid; + + if (test__start_subtest("test_cgroup1_hierarchy")) { + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1(skel); + } + + if (test__start_subtest("test_root_cgid")) { + skel->bss->target_ancestor_cgid = 1; + skel->bss->target_ancestor_level = 0; + bpf_cgroup1(skel); + } + + if (test__start_subtest("test_invalid_level")) { + skel->bss->target_ancestor_cgid = 1; + skel->bss->target_ancestor_level = 1; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_cgid")) { + skel->bss->target_ancestor_cgid = 0; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_hid")) { + skel->bss->target_ancestor_cgid = 1; + skel->bss->target_ancestor_level = 0; + skel->bss->target_hid = -1; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_cgrp_name")) { + skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cl"); + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_invalid_cgrp_name2")) { + skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cls,"); + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1_invalid_id(skel); + } + + if (test__start_subtest("test_sleepable_prog")) { + skel->bss->target_hid = hid; + skel->bss->target_ancestor_cgid = current_cgid; + bpf_cgroup1_sleepable(skel); + } + +cleanup: + cleanup_classid_environment(); +destroy: + test_cgroup1_hierarchy__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c index 9026b42914..addf720428 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c @@ -71,7 +71,7 @@ void test_cgroup_v1v2(void) } ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only"); setup_classid_environment(); - set_classid(42); + set_classid(); ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2"); cleanup_classid_environment(); close(server_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c index 63e776f417..7477615720 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -19,6 +19,21 @@ struct socket_cookie { __u64 cookie_value; }; +static bool is_cgroup1; +static int target_hid; + +#define CGROUP_MODE_SET(skel) \ +{ \ + skel->bss->is_cgroup1 = is_cgroup1; \ + skel->bss->target_hid = target_hid; \ +} + +static void cgroup_mode_value_init(bool cgroup, int hid) +{ + is_cgroup1 = cgroup; + target_hid = hid; +} + static void test_tp_btf(int cgroup_fd) { struct cgrp_ls_tp_btf *skel; @@ -29,6 +44,8 @@ static void test_tp_btf(int cgroup_fd) if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; + CGROUP_MODE_SET(skel); + /* populate a value in map_b */ err = bpf_map_update_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val1, BPF_ANY); if (!ASSERT_OK(err, "map_update_elem")) @@ -130,6 +147,8 @@ static void test_recursion(int cgroup_fd) if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; + CGROUP_MODE_SET(skel); + err = cgrp_ls_recursion__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto out; @@ -165,6 +184,8 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id) if (!ASSERT_OK_PTR(skel, "skel_open")) return; + CGROUP_MODE_SET(skel); + bpf_program__set_autoload(skel->progs.cgroup_iter, true); err = cgrp_ls_sleepable__load(skel); if (!ASSERT_OK(err, "skel_load")) @@ -202,6 +223,7 @@ static void test_yes_rcu_lock(__u64 cgroup_id) if (!ASSERT_OK_PTR(skel, "skel_open")) return; + CGROUP_MODE_SET(skel); skel->bss->target_pid = syscall(SYS_gettid); bpf_program__set_autoload(skel->progs.yes_rcu_lock, true); @@ -229,6 +251,8 @@ static void test_no_rcu_lock(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; + CGROUP_MODE_SET(skel); + bpf_program__set_autoload(skel->progs.no_rcu_lock, true); err = cgrp_ls_sleepable__load(skel); ASSERT_ERR(err, "skel_load"); @@ -236,7 +260,25 @@ static void test_no_rcu_lock(void) cgrp_ls_sleepable__destroy(skel); } -void test_cgrp_local_storage(void) +static void test_cgrp1_no_rcu_lock(void) +{ + struct cgrp_ls_sleepable *skel; + int err; + + skel = cgrp_ls_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + CGROUP_MODE_SET(skel); + + bpf_program__set_autoload(skel->progs.cgrp1_no_rcu_lock, true); + err = cgrp_ls_sleepable__load(skel); + ASSERT_OK(err, "skel_load"); + + cgrp_ls_sleepable__destroy(skel); +} + +static void cgrp2_local_storage(void) { __u64 cgroup_id; int cgroup_fd; @@ -245,6 +287,8 @@ void test_cgrp_local_storage(void) if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /cgrp_local_storage")) return; + cgroup_mode_value_init(0, -1); + cgroup_id = get_cgroup_id("/cgrp_local_storage"); if (test__start_subtest("tp_btf")) test_tp_btf(cgroup_fd); @@ -263,3 +307,55 @@ void test_cgrp_local_storage(void) close(cgroup_fd); } + +static void cgrp1_local_storage(void) +{ + int cgrp1_fd, cgrp1_hid, cgrp1_id, err; + + /* Setup cgroup1 hierarchy */ + err = setup_classid_environment(); + if (!ASSERT_OK(err, "setup_classid_environment")) + return; + + err = join_classid(); + if (!ASSERT_OK(err, "join_cgroup1")) + goto cleanup; + + cgrp1_fd = open_classid(); + if (!ASSERT_GE(cgrp1_fd, 0, "cgroup1 fd")) + goto cleanup; + + cgrp1_id = get_classid_cgroup_id(); + if (!ASSERT_GE(cgrp1_id, 0, "cgroup1 id")) + goto close_fd; + + cgrp1_hid = get_cgroup1_hierarchy_id("net_cls"); + if (!ASSERT_GE(cgrp1_hid, 0, "cgroup1 hid")) + goto close_fd; + + cgroup_mode_value_init(1, cgrp1_hid); + + if (test__start_subtest("cgrp1_tp_btf")) + test_tp_btf(cgrp1_fd); + if (test__start_subtest("cgrp1_recursion")) + test_recursion(cgrp1_fd); + if (test__start_subtest("cgrp1_negative")) + test_negative(); + if (test__start_subtest("cgrp1_iter_sleepable")) + test_cgroup_iter_sleepable(cgrp1_fd, cgrp1_id); + if (test__start_subtest("cgrp1_yes_rcu_lock")) + test_yes_rcu_lock(cgrp1_id); + if (test__start_subtest("cgrp1_no_rcu_lock")) + test_cgrp1_no_rcu_lock(); + +close_fd: + close(cgrp1_fd); +cleanup: + cleanup_classid_environment(); +} + +void test_cgrp_local_storage(void) +{ + cgrp2_local_storage(); + cgrp1_local_storage(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index 756ea8b590..c2e886399e 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -18,6 +18,7 @@ static const char * const cpumask_success_testcases[] = { "test_insert_leave", "test_insert_remove_release", "test_global_mask_rcu", + "test_cpumask_weight", }; static void verify_success(const char *prog_name) diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index 8ec73fdfcd..f29fc789c1 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -348,7 +348,8 @@ static void test_func_sockmap_update(void) } static void test_obj_load_failure_common(const char *obj_file, - const char *target_obj_file) + const char *target_obj_file, + const char *exp_msg) { /* * standalone test that asserts failure to load freplace prog @@ -356,6 +357,7 @@ static void test_obj_load_failure_common(const char *obj_file, */ struct bpf_object *obj = NULL, *pkt_obj; struct bpf_program *prog; + char log_buf[64 * 1024]; int err, pkt_fd; __u32 duration = 0; @@ -374,11 +376,21 @@ static void test_obj_load_failure_common(const char *obj_file, err = bpf_program__set_attach_target(prog, pkt_fd, NULL); ASSERT_OK(err, "set_attach_target"); + log_buf[0] = '\0'; + if (exp_msg) + bpf_program__set_log_buf(prog, log_buf, sizeof(log_buf)); + if (env.verbosity > VERBOSE_NONE) + bpf_program__set_log_level(prog, 2); + /* It should fail to load the program */ err = bpf_object__load(obj); + if (env.verbosity > VERBOSE_NONE && exp_msg) /* we overtook log */ + printf("VERIFIER LOG:\n================\n%s\n================\n", log_buf); if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err)) goto close_prog; + if (exp_msg) + ASSERT_HAS_SUBSTR(log_buf, exp_msg, "fail_msg"); close_prog: bpf_object__close(obj); bpf_object__close(pkt_obj); @@ -388,14 +400,24 @@ static void test_func_replace_return_code(void) { /* test invalid return code in the replaced program */ test_obj_load_failure_common("./freplace_connect_v4_prog.bpf.o", - "./connect4_prog.bpf.o"); + "./connect4_prog.bpf.o", NULL); } static void test_func_map_prog_compatibility(void) { /* test with spin lock map value in the replaced program */ test_obj_load_failure_common("./freplace_attach_probe.bpf.o", - "./test_attach_probe.bpf.o"); + "./test_attach_probe.bpf.o", NULL); +} + +static void test_func_replace_unreliable(void) +{ + /* freplace'ing unreliable main prog should fail with error + * "Cannot replace static functions" + */ + test_obj_load_failure_common("freplace_unreliable_prog.bpf.o", + "./verifier_btf_unreliable_prog.bpf.o", + "Cannot replace static functions"); } static void test_func_replace_global_func(void) @@ -563,6 +585,8 @@ void serial_test_fexit_bpf2bpf(void) test_func_replace_return_code(); if (test__start_subtest("func_map_prog_compatibility")) test_func_map_prog_compatibility(); + if (test__start_subtest("func_replace_unreliable")) + test_func_replace_unreliable(); if (test__start_subtest("func_replace_multi")) test_func_replace_multi(); if (test__start_subtest("fmod_ret_freplace")) diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index 97142a4db3..d4b1901f78 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -7,6 +7,7 @@ #include <test_progs.h> #include "trace_helpers.h" #include "test_fill_link_info.skel.h" +#include "bpf/libbpf_internal.h" #define TP_CAT "sched" #define TP_NAME "sched_switch" @@ -140,14 +141,14 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, .retprobe = type == BPF_PERF_EVENT_KRETPROBE, ); ssize_t entry_offset = 0; + struct bpf_link *link; int link_fd, err; - skel->links.kprobe_run = bpf_program__attach_kprobe_opts(skel->progs.kprobe_run, - KPROBE_FUNC, &opts); - if (!ASSERT_OK_PTR(skel->links.kprobe_run, "attach_kprobe")) + link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_run, KPROBE_FUNC, &opts); + if (!ASSERT_OK_PTR(link, "attach_kprobe")) return; - link_fd = bpf_link__fd(skel->links.kprobe_run); + link_fd = bpf_link__fd(link); if (!invalid) { /* See also arch_adjust_kprobe_addr(). */ if (skel->kconfig->CONFIG_X86_KERNEL_IBT) @@ -157,39 +158,41 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, } else { kprobe_fill_invalid_user_buffer(link_fd); } - bpf_link__detach(skel->links.kprobe_run); + bpf_link__destroy(link); } static void test_tp_fill_link_info(struct test_fill_link_info *skel) { + struct bpf_link *link; int link_fd, err; - skel->links.tp_run = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME); - if (!ASSERT_OK_PTR(skel->links.tp_run, "attach_tp")) + link = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME); + if (!ASSERT_OK_PTR(link, "attach_tp")) return; - link_fd = bpf_link__fd(skel->links.tp_run); + link_fd = bpf_link__fd(link); err = verify_perf_link_info(link_fd, BPF_PERF_EVENT_TRACEPOINT, 0, 0, 0); ASSERT_OK(err, "verify_perf_link_info"); - bpf_link__detach(skel->links.tp_run); + bpf_link__destroy(link); } static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, enum bpf_perf_event_type type) { + struct bpf_link *link; int link_fd, err; - skel->links.uprobe_run = bpf_program__attach_uprobe(skel->progs.uprobe_run, - type == BPF_PERF_EVENT_URETPROBE, - 0, /* self pid */ - UPROBE_FILE, uprobe_offset); - if (!ASSERT_OK_PTR(skel->links.uprobe_run, "attach_uprobe")) + link = bpf_program__attach_uprobe(skel->progs.uprobe_run, + type == BPF_PERF_EVENT_URETPROBE, + 0, /* self pid */ + UPROBE_FILE, uprobe_offset); + if (!ASSERT_OK_PTR(link, "attach_uprobe")) return; - link_fd = bpf_link__fd(skel->links.uprobe_run); + link_fd = bpf_link__fd(link); err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0); ASSERT_OK(err, "verify_perf_link_info"); - bpf_link__detach(skel->links.uprobe_run); + bpf_link__destroy(link); } static int verify_kmulti_link_info(int fd, bool retprobe) @@ -278,24 +281,214 @@ static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel, bool retprobe, bool invalid) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + struct bpf_link *link; int link_fd, err; opts.syms = kmulti_syms; opts.cnt = KMULTI_CNT; opts.retprobe = retprobe; - skel->links.kmulti_run = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, - NULL, &opts); - if (!ASSERT_OK_PTR(skel->links.kmulti_run, "attach_kprobe_multi")) + link = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, NULL, &opts); + if (!ASSERT_OK_PTR(link, "attach_kprobe_multi")) return; - link_fd = bpf_link__fd(skel->links.kmulti_run); + link_fd = bpf_link__fd(link); if (!invalid) { err = verify_kmulti_link_info(link_fd, retprobe); ASSERT_OK(err, "verify_kmulti_link_info"); } else { verify_kmulti_invalid_user_buffer(link_fd); } - bpf_link__detach(skel->links.kmulti_run); + bpf_link__destroy(link); +} + +#define SEC(name) __attribute__((section(name), used)) + +static short uprobe_link_info_sema_1 SEC(".probes"); +static short uprobe_link_info_sema_2 SEC(".probes"); +static short uprobe_link_info_sema_3 SEC(".probes"); + +noinline void uprobe_link_info_func_1(void) +{ + asm volatile (""); + uprobe_link_info_sema_1++; +} + +noinline void uprobe_link_info_func_2(void) +{ + asm volatile (""); + uprobe_link_info_sema_2++; +} + +noinline void uprobe_link_info_func_3(void) +{ + asm volatile (""); + uprobe_link_info_sema_3++; +} + +static int +verify_umulti_link_info(int fd, bool retprobe, __u64 *offsets, + __u64 *cookies, __u64 *ref_ctr_offsets) +{ + char path[PATH_MAX], path_buf[PATH_MAX]; + struct bpf_link_info info; + __u32 len = sizeof(info); + __u64 ref_ctr_offsets_buf[3]; + __u64 offsets_buf[3]; + __u64 cookies_buf[3]; + int i, err, bit; + __u32 count = 0; + + memset(path, 0, sizeof(path)); + err = readlink("/proc/self/exe", path, sizeof(path)); + if (!ASSERT_NEQ(err, -1, "readlink")) + return -1; + + for (bit = 0; bit < 8; bit++) { + memset(&info, 0, sizeof(info)); + info.uprobe_multi.path = ptr_to_u64(path_buf); + info.uprobe_multi.path_size = sizeof(path_buf); + info.uprobe_multi.count = count; + + if (bit & 0x1) + info.uprobe_multi.offsets = ptr_to_u64(offsets_buf); + if (bit & 0x2) + info.uprobe_multi.cookies = ptr_to_u64(cookies_buf); + if (bit & 0x4) + info.uprobe_multi.ref_ctr_offsets = ptr_to_u64(ref_ctr_offsets_buf); + + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "bpf_link_get_info_by_fd")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_UPROBE_MULTI, "info.type")) + return -1; + + ASSERT_EQ(info.uprobe_multi.pid, getpid(), "info.uprobe_multi.pid"); + ASSERT_EQ(info.uprobe_multi.count, 3, "info.uprobe_multi.count"); + ASSERT_EQ(info.uprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN, + retprobe, "info.uprobe_multi.flags.retprobe"); + ASSERT_EQ(info.uprobe_multi.path_size, strlen(path) + 1, "info.uprobe_multi.path_size"); + ASSERT_STREQ(path_buf, path, "info.uprobe_multi.path"); + + for (i = 0; i < info.uprobe_multi.count; i++) { + if (info.uprobe_multi.offsets) + ASSERT_EQ(offsets_buf[i], offsets[i], "info.uprobe_multi.offsets"); + if (info.uprobe_multi.cookies) + ASSERT_EQ(cookies_buf[i], cookies[i], "info.uprobe_multi.cookies"); + if (info.uprobe_multi.ref_ctr_offsets) { + ASSERT_EQ(ref_ctr_offsets_buf[i], ref_ctr_offsets[i], + "info.uprobe_multi.ref_ctr_offsets"); + } + } + count = count ?: info.uprobe_multi.count; + } + + return 0; +} + +static void verify_umulti_invalid_user_buffer(int fd) +{ + struct bpf_link_info info; + __u32 len = sizeof(info); + __u64 buf[3]; + int err; + + /* upath_size defined, not path */ + memset(&info, 0, sizeof(info)); + info.uprobe_multi.path_size = 3; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EINVAL, "failed_upath_size"); + + /* path defined, but small */ + memset(&info, 0, sizeof(info)); + info.uprobe_multi.path = ptr_to_u64(buf); + info.uprobe_multi.path_size = 3; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_LT(err, 0, "failed_upath_small"); + + /* path has wrong pointer */ + memset(&info, 0, sizeof(info)); + info.uprobe_multi.path_size = PATH_MAX; + info.uprobe_multi.path = 123; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EFAULT, "failed_bad_path_ptr"); + + /* count zero, with offsets */ + memset(&info, 0, sizeof(info)); + info.uprobe_multi.offsets = ptr_to_u64(buf); + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EINVAL, "failed_count"); + + /* offsets not big enough */ + memset(&info, 0, sizeof(info)); + info.uprobe_multi.offsets = ptr_to_u64(buf); + info.uprobe_multi.count = 2; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -ENOSPC, "failed_small_count"); + + /* offsets has wrong pointer */ + memset(&info, 0, sizeof(info)); + info.uprobe_multi.offsets = 123; + info.uprobe_multi.count = 3; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EFAULT, "failed_wrong_offsets"); +} + +static void test_uprobe_multi_fill_link_info(struct test_fill_link_info *skel, + bool retprobe, bool invalid) +{ + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts, + .retprobe = retprobe, + ); + const char *syms[3] = { + "uprobe_link_info_func_1", + "uprobe_link_info_func_2", + "uprobe_link_info_func_3", + }; + __u64 cookies[3] = { + 0xdead, + 0xbeef, + 0xcafe, + }; + const char *sema[3] = { + "uprobe_link_info_sema_1", + "uprobe_link_info_sema_2", + "uprobe_link_info_sema_3", + }; + __u64 *offsets = NULL, *ref_ctr_offsets; + struct bpf_link *link; + int link_fd, err; + + err = elf_resolve_syms_offsets("/proc/self/exe", 3, sema, + (unsigned long **) &ref_ctr_offsets, STT_OBJECT); + if (!ASSERT_OK(err, "elf_resolve_syms_offsets_object")) + return; + + err = elf_resolve_syms_offsets("/proc/self/exe", 3, syms, + (unsigned long **) &offsets, STT_FUNC); + if (!ASSERT_OK(err, "elf_resolve_syms_offsets_func")) + goto out; + + opts.syms = syms; + opts.cookies = &cookies[0]; + opts.ref_ctr_offsets = (unsigned long *) &ref_ctr_offsets[0]; + opts.cnt = ARRAY_SIZE(syms); + + link = bpf_program__attach_uprobe_multi(skel->progs.umulti_run, 0, + "/proc/self/exe", NULL, &opts); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi")) + goto out; + + link_fd = bpf_link__fd(link); + if (invalid) + verify_umulti_invalid_user_buffer(link_fd); + else + verify_umulti_link_info(link_fd, retprobe, offsets, cookies, ref_ctr_offsets); + + bpf_link__destroy(link); +out: + free(ref_ctr_offsets); + free(offsets); } void test_fill_link_info(void) @@ -337,6 +530,13 @@ void test_fill_link_info(void) if (test__start_subtest("kprobe_multi_invalid_ubuff")) test_kprobe_multi_fill_link_info(skel, true, true); + if (test__start_subtest("uprobe_multi_link_info")) + test_uprobe_multi_fill_link_info(skel, false, false); + if (test__start_subtest("uretprobe_multi_link_info")) + test_uprobe_multi_fill_link_info(skel, true, false); + if (test__start_subtest("uprobe_multi_invalid")) + test_uprobe_multi_fill_link_info(skel, false, true); + cleanup: test_fill_link_info__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c new file mode 100644 index 0000000000..37056ba738 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <stdlib.h> +#include <sys/types.h> +#include <sys/xattr.h> +#include <linux/fsverity.h> +#include <unistd.h> +#include <test_progs.h> +#include "test_get_xattr.skel.h" +#include "test_fsverity.skel.h" + +static const char testfile[] = "/tmp/test_progs_fs_kfuncs"; + +static void test_xattr(void) +{ + struct test_get_xattr *skel = NULL; + int fd = -1, err; + + fd = open(testfile, O_CREAT | O_RDONLY, 0644); + if (!ASSERT_GE(fd, 0, "create_file")) + return; + + close(fd); + fd = -1; + + err = setxattr(testfile, "user.kfuncs", "hello", sizeof("hello"), 0); + if (err && errno == EOPNOTSUPP) { + printf("%s:SKIP:local fs doesn't support xattr (%d)\n" + "To run this test, make sure /tmp filesystem supports xattr.\n", + __func__, errno); + test__skip(); + goto out; + } + + if (!ASSERT_OK(err, "setxattr")) + goto out; + + skel = test_get_xattr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_get_xattr__open_and_load")) + goto out; + + skel->bss->monitored_pid = getpid(); + err = test_get_xattr__attach(skel); + + if (!ASSERT_OK(err, "test_get_xattr__attach")) + goto out; + + fd = open(testfile, O_RDONLY, 0644); + if (!ASSERT_GE(fd, 0, "open_file")) + goto out; + + ASSERT_EQ(skel->bss->found_xattr, 1, "found_xattr"); + +out: + close(fd); + test_get_xattr__destroy(skel); + remove(testfile); +} + +#ifndef SHA256_DIGEST_SIZE +#define SHA256_DIGEST_SIZE 32 +#endif + +static void test_fsverity(void) +{ + struct fsverity_enable_arg arg = {0}; + struct test_fsverity *skel = NULL; + struct fsverity_digest *d; + int fd, err; + char buffer[4096]; + + fd = open(testfile, O_CREAT | O_RDWR, 0644); + if (!ASSERT_GE(fd, 0, "create_file")) + return; + + /* Write random buffer, so the file is not empty */ + err = write(fd, buffer, 4096); + if (!ASSERT_EQ(err, 4096, "write_file")) + goto out; + close(fd); + + /* Reopen read-only, otherwise FS_IOC_ENABLE_VERITY will fail */ + fd = open(testfile, O_RDONLY, 0644); + if (!ASSERT_GE(fd, 0, "open_file1")) + return; + + /* Enable fsverity for the file. + * If the file system doesn't support verity, this will fail. Skip + * the test in such case. + */ + arg.version = 1; + arg.hash_algorithm = FS_VERITY_HASH_ALG_SHA256; + arg.block_size = 4096; + err = ioctl(fd, FS_IOC_ENABLE_VERITY, &arg); + if (err) { + printf("%s:SKIP:local fs doesn't support fsverity (%d)\n" + "To run this test, try enable CONFIG_FS_VERITY and enable FSVerity for the filesystem.\n", + __func__, errno); + test__skip(); + goto out; + } + + skel = test_fsverity__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_fsverity__open_and_load")) + goto out; + + /* Get fsverity_digest from ioctl */ + d = (struct fsverity_digest *)skel->bss->expected_digest; + d->digest_algorithm = FS_VERITY_HASH_ALG_SHA256; + d->digest_size = SHA256_DIGEST_SIZE; + err = ioctl(fd, FS_IOC_MEASURE_VERITY, skel->bss->expected_digest); + if (!ASSERT_OK(err, "ioctl_FS_IOC_MEASURE_VERITY")) + goto out; + + skel->bss->monitored_pid = getpid(); + err = test_fsverity__attach(skel); + if (!ASSERT_OK(err, "test_fsverity__attach")) + goto out; + + /* Reopen the file to trigger the program */ + close(fd); + fd = open(testfile, O_RDONLY); + if (!ASSERT_GE(fd, 0, "open_file2")) + goto out; + + ASSERT_EQ(skel->bss->got_fsverity, 1, "got_fsverity"); + ASSERT_EQ(skel->bss->digest_matches, 1, "digest_matches"); +out: + close(fd); + test_fsverity__destroy(skel); + remove(testfile); +} + +void test_fs_kfuncs(void) +{ + if (test__start_subtest("xattr")) + test_xattr(); + + if (test__start_subtest("fsverity")) + test_fsverity(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c b/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c new file mode 100644 index 0000000000..65309894b2 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include "verifier_global_subprogs.skel.h" +#include "freplace_dead_global_func.skel.h" + +void test_global_func_dead_code(void) +{ + struct verifier_global_subprogs *tgt_skel = NULL; + struct freplace_dead_global_func *skel = NULL; + char log_buf[4096]; + int err, tgt_fd; + + /* first, try to load target with good global subprog */ + tgt_skel = verifier_global_subprogs__open(); + if (!ASSERT_OK_PTR(tgt_skel, "tgt_skel_good_open")) + return; + + bpf_program__set_autoload(tgt_skel->progs.chained_global_func_calls_success, true); + + err = verifier_global_subprogs__load(tgt_skel); + if (!ASSERT_OK(err, "tgt_skel_good_load")) + goto out; + + tgt_fd = bpf_program__fd(tgt_skel->progs.chained_global_func_calls_success); + + /* Attach to good non-eliminated subprog */ + skel = freplace_dead_global_func__open(); + if (!ASSERT_OK_PTR(skel, "skel_good_open")) + goto out; + + err = bpf_program__set_attach_target(skel->progs.freplace_prog, tgt_fd, "global_good"); + ASSERT_OK(err, "attach_target_good"); + + err = freplace_dead_global_func__load(skel); + if (!ASSERT_OK(err, "skel_good_load")) + goto out; + + freplace_dead_global_func__destroy(skel); + + /* Try attaching to dead code-eliminated subprog */ + skel = freplace_dead_global_func__open(); + if (!ASSERT_OK_PTR(skel, "skel_dead_open")) + goto out; + + bpf_program__set_log_buf(skel->progs.freplace_prog, log_buf, sizeof(log_buf)); + err = bpf_program__set_attach_target(skel->progs.freplace_prog, tgt_fd, "global_dead"); + ASSERT_OK(err, "attach_target_dead"); + + err = freplace_dead_global_func__load(skel); + if (!ASSERT_ERR(err, "skel_dead_load")) + goto out; + + ASSERT_HAS_SUBSTR(log_buf, "Subprog global_dead doesn't exist", "dead_subprog_missing_msg"); + +out: + verifier_global_subprogs__destroy(tgt_skel); + freplace_dead_global_func__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index bf84d4a1d9..3c440370c1 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -193,6 +193,7 @@ static void subtest_task_iters(void) ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt"); ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt"); ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt"); + ASSERT_EQ(skel->bss->invalid_cnt, 0, "invalid_cnt"); pthread_mutex_unlock(&do_nothing_mutex); for (int i = 0; i < thread_num; i++) ASSERT_OK(pthread_join(thread_ids[i], &ret), "pthread_join"); diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 4041cfa670..05000810e2 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -222,6 +222,7 @@ static void test_attach_api_fails(void) "bpf_fentry_test2", }; __u64 cookies[2]; + int saved_error; addrs[0] = ksym_get_addr("bpf_fentry_test1"); addrs[1] = ksym_get_addr("bpf_fentry_test2"); @@ -238,10 +239,11 @@ static void test_attach_api_fails(void) /* fail_1 - pattern and opts NULL */ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, NULL, NULL); + saved_error = -errno; if (!ASSERT_ERR_PTR(link, "fail_1")) goto cleanup; - if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_1_error")) + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_1_error")) goto cleanup; /* fail_2 - both addrs and syms set */ @@ -252,10 +254,11 @@ static void test_attach_api_fails(void) link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, NULL, &opts); + saved_error = -errno; if (!ASSERT_ERR_PTR(link, "fail_2")) goto cleanup; - if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_2_error")) + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_2_error")) goto cleanup; /* fail_3 - pattern and addrs set */ @@ -266,10 +269,11 @@ static void test_attach_api_fails(void) link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, "ksys_*", &opts); + saved_error = -errno; if (!ASSERT_ERR_PTR(link, "fail_3")) goto cleanup; - if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_3_error")) + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_3_error")) goto cleanup; /* fail_4 - pattern and cnt set */ @@ -280,10 +284,11 @@ static void test_attach_api_fails(void) link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, "ksys_*", &opts); + saved_error = -errno; if (!ASSERT_ERR_PTR(link, "fail_4")) goto cleanup; - if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_4_error")) + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_4_error")) goto cleanup; /* fail_5 - pattern and cookies */ @@ -294,10 +299,26 @@ static void test_attach_api_fails(void) link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, "ksys_*", &opts); + saved_error = -errno; if (!ASSERT_ERR_PTR(link, "fail_5")) goto cleanup; - if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_5_error")) + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_5_error")) + goto cleanup; + + /* fail_6 - abnormal cnt */ + opts.addrs = (const unsigned long *) addrs; + opts.syms = NULL; + opts.cnt = INT_MAX; + opts.cookies = NULL; + + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, + NULL, &opts); + saved_error = -errno; + if (!ASSERT_ERR_PTR(link, "fail_6")) + goto cleanup; + + if (!ASSERT_EQ(saved_error, -E2BIG, "fail_6_error")) goto cleanup; cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index c440ea3311..eb34d612d6 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -87,7 +87,7 @@ static void test_libbpf_bpf_link_type_str(void) const char *link_type_str; char buf[256]; - if (link_type == MAX_BPF_LINK_TYPE) + if (link_type == __MAX_BPF_LINK_TYPE) continue; link_type_name = btf__str_by_offset(btf, e->name_off); diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c index b25b870f87..827e713f6c 100644 --- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c @@ -48,6 +48,27 @@ static void test_local_kptr_stash_plain(void) local_kptr_stash__destroy(skel); } +static void test_local_kptr_stash_local_with_root(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct local_kptr_stash *skel; + int ret; + + skel = local_kptr_stash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_local_with_root), &opts); + ASSERT_OK(ret, "local_kptr_stash_add_local_with_root run"); + ASSERT_OK(opts.retval, "local_kptr_stash_add_local_with_root retval"); + + local_kptr_stash__destroy(skel); +} + static void test_local_kptr_stash_unstash(void) { LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -73,6 +94,37 @@ static void test_local_kptr_stash_unstash(void) local_kptr_stash__destroy(skel); } +static void test_refcount_acquire_without_unstash(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct local_kptr_stash *skel; + int ret; + + skel = local_kptr_stash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash), + &opts); + ASSERT_OK(ret, "refcount_acquire_without_unstash run"); + ASSERT_EQ(opts.retval, 2, "refcount_acquire_without_unstash retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_refcounted_node), &opts); + ASSERT_OK(ret, "stash_refcounted_node run"); + ASSERT_OK(opts.retval, "stash_refcounted_node retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash), + &opts); + ASSERT_OK(ret, "refcount_acquire_without_unstash (2) run"); + ASSERT_EQ(opts.retval, 42, "refcount_acquire_without_unstash (2) retval"); + + local_kptr_stash__destroy(skel); +} + static void test_local_kptr_stash_fail(void) { RUN_TESTS(local_kptr_stash_fail); @@ -84,8 +136,12 @@ void test_local_kptr_stash(void) test_local_kptr_stash_simple(); if (test__start_subtest("local_kptr_stash_plain")) test_local_kptr_stash_plain(); + if (test__start_subtest("local_kptr_stash_local_with_root")) + test_local_kptr_stash_local_with_root(); if (test__start_subtest("local_kptr_stash_unstash")) test_local_kptr_stash_unstash(); + if (test__start_subtest("refcount_acquire_without_unstash")) + test_refcount_acquire_without_unstash(); if (test__start_subtest("local_kptr_stash_fail")) test_local_kptr_stash_fail(); } diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c index fe9a23e65e..0f7ea4d7d9 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_buf.c +++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c @@ -78,7 +78,7 @@ static void obj_load_log_buf(void) ASSERT_OK_PTR(strstr(libbpf_log_buf, "prog 'bad_prog': BPF program load failed"), "libbpf_log_not_empty"); ASSERT_OK_PTR(strstr(obj_log_buf, "DATASEC license"), "obj_log_not_empty"); - ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"), + ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx() R10=fp0"), "good_log_verbose"); ASSERT_OK_PTR(strstr(bad_log_buf, "invalid access to map value, value_size=16 off=16000 size=4"), "bad_log_not_empty"); @@ -175,7 +175,7 @@ static void bpf_prog_load_log_buf(void) opts.log_level = 2; fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL", good_prog_insns, good_prog_insn_cnt, &opts); - ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"), "good_log_2"); + ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx() R10=fp0"), "good_log_2"); ASSERT_GE(fd, 0, "good_fd2"); if (fd >= 0) close(fd); diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c index effd78b2a6..7a3fa2ff56 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c +++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c @@ -169,9 +169,9 @@ void test_log_fixup(void) if (test__start_subtest("bad_core_relo_trunc_none")) bad_core_relo(0, TRUNC_NONE /* full buf */); if (test__start_subtest("bad_core_relo_trunc_partial")) - bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */); + bad_core_relo(280, TRUNC_PARTIAL /* truncate original log a bit */); if (test__start_subtest("bad_core_relo_trunc_full")) - bad_core_relo(210, TRUNC_FULL /* truncate also libbpf's message patch */); + bad_core_relo(220, TRUNC_FULL /* truncate also libbpf's message patch */); if (test__start_subtest("bad_core_relo_subprog")) bad_core_relo_subprog(); if (test__start_subtest("missing_map")) diff --git a/tools/testing/selftests/bpf/prog_tests/map_btf.c b/tools/testing/selftests/bpf/prog_tests/map_btf.c new file mode 100644 index 0000000000..2c4ef60375 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_btf.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <test_progs.h> + +#include "normal_map_btf.skel.h" +#include "map_in_map_btf.skel.h" + +static void do_test_normal_map_btf(void) +{ + struct normal_map_btf *skel; + int i, err, new_fd = -1; + int map_fd_arr[64]; + + skel = normal_map_btf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_load")) + return; + + err = normal_map_btf__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto out; + + skel->bss->pid = getpid(); + usleep(1); + ASSERT_TRUE(skel->bss->done, "done"); + + /* Use percpu_array to slow bpf_map_free_deferred() down. + * The memory allocation may fail, so doesn't check the returned fd. + */ + for (i = 0; i < ARRAY_SIZE(map_fd_arr); i++) + map_fd_arr[i] = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, 4, 4, 256, NULL); + + /* Close array fd later */ + new_fd = dup(bpf_map__fd(skel->maps.array)); +out: + normal_map_btf__destroy(skel); + if (new_fd < 0) + return; + /* Use kern_sync_rcu() to wait for the start of the free of the bpf + * program and use an assumed delay to wait for the release of the map + * btf which is held by other maps (e.g, bss). After that, array map + * holds the last reference of map btf. + */ + kern_sync_rcu(); + usleep(4000); + /* Spawn multiple kworkers to delay the invocation of + * bpf_map_free_deferred() for array map. + */ + for (i = 0; i < ARRAY_SIZE(map_fd_arr); i++) { + if (map_fd_arr[i] < 0) + continue; + close(map_fd_arr[i]); + } + close(new_fd); +} + +static void do_test_map_in_map_btf(void) +{ + int err, zero = 0, new_fd = -1; + struct map_in_map_btf *skel; + + skel = map_in_map_btf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_load")) + return; + + err = map_in_map_btf__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto out; + + skel->bss->pid = getpid(); + usleep(1); + ASSERT_TRUE(skel->bss->done, "done"); + + /* Close inner_array fd later */ + new_fd = dup(bpf_map__fd(skel->maps.inner_array)); + /* Defer the free of inner_array */ + err = bpf_map__delete_elem(skel->maps.outer_array, &zero, sizeof(zero), 0); + ASSERT_OK(err, "delete inner map"); +out: + map_in_map_btf__destroy(skel); + if (new_fd < 0) + return; + /* Use kern_sync_rcu() to wait for the start of the free of the bpf + * program and use an assumed delay to wait for the free of the outer + * map and the release of map btf. After that, inner map holds the last + * reference of map btf. + */ + kern_sync_rcu(); + usleep(10000); + close(new_fd); +} + +void test_map_btf(void) +{ + if (test__start_subtest("array_btf")) + do_test_normal_map_btf(); + if (test__start_subtest("inner_array_btf")) + do_test_map_in_map_btf(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/map_in_map.c b/tools/testing/selftests/bpf/prog_tests/map_in_map.c new file mode 100644 index 0000000000..d2a10eb4e5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_in_map.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <test_progs.h> +#include <bpf/btf.h> +#include "access_map_in_map.skel.h" + +struct thread_ctx { + pthread_barrier_t barrier; + int outer_map_fd; + int start, abort; + int loop, err; +}; + +static int wait_for_start_or_abort(struct thread_ctx *ctx) +{ + while (!ctx->start && !ctx->abort) + usleep(1); + return ctx->abort ? -1 : 0; +} + +static void *update_map_fn(void *data) +{ + struct thread_ctx *ctx = data; + int loop = ctx->loop, err = 0; + + if (wait_for_start_or_abort(ctx) < 0) + return NULL; + pthread_barrier_wait(&ctx->barrier); + + while (loop-- > 0) { + int fd, zero = 0; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL); + if (fd < 0) { + err |= 1; + pthread_barrier_wait(&ctx->barrier); + continue; + } + + /* Remove the old inner map */ + if (bpf_map_update_elem(ctx->outer_map_fd, &zero, &fd, 0) < 0) + err |= 2; + close(fd); + pthread_barrier_wait(&ctx->barrier); + } + + ctx->err = err; + + return NULL; +} + +static void *access_map_fn(void *data) +{ + struct thread_ctx *ctx = data; + int loop = ctx->loop; + + if (wait_for_start_or_abort(ctx) < 0) + return NULL; + pthread_barrier_wait(&ctx->barrier); + + while (loop-- > 0) { + /* Access the old inner map */ + syscall(SYS_getpgid); + pthread_barrier_wait(&ctx->barrier); + } + + return NULL; +} + +static void test_map_in_map_access(const char *prog_name, const char *map_name) +{ + struct access_map_in_map *skel; + struct bpf_map *outer_map; + struct bpf_program *prog; + struct thread_ctx ctx; + pthread_t tid[2]; + int err; + + skel = access_map_in_map__open(); + if (!ASSERT_OK_PTR(skel, "access_map_in_map open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "find program")) + goto out; + bpf_program__set_autoload(prog, true); + + outer_map = bpf_object__find_map_by_name(skel->obj, map_name); + if (!ASSERT_OK_PTR(outer_map, "find map")) + goto out; + + err = access_map_in_map__load(skel); + if (!ASSERT_OK(err, "access_map_in_map load")) + goto out; + + err = access_map_in_map__attach(skel); + if (!ASSERT_OK(err, "access_map_in_map attach")) + goto out; + + skel->bss->tgid = getpid(); + + memset(&ctx, 0, sizeof(ctx)); + pthread_barrier_init(&ctx.barrier, NULL, 2); + ctx.outer_map_fd = bpf_map__fd(outer_map); + ctx.loop = 4; + + err = pthread_create(&tid[0], NULL, update_map_fn, &ctx); + if (!ASSERT_OK(err, "close_thread")) + goto out; + + err = pthread_create(&tid[1], NULL, access_map_fn, &ctx); + if (!ASSERT_OK(err, "read_thread")) { + ctx.abort = 1; + pthread_join(tid[0], NULL); + goto out; + } + + ctx.start = 1; + pthread_join(tid[0], NULL); + pthread_join(tid[1], NULL); + + ASSERT_OK(ctx.err, "err"); +out: + access_map_in_map__destroy(skel); +} + +void test_map_in_map(void) +{ + if (test__start_subtest("acc_map_in_array")) + test_map_in_map_access("access_map_in_array", "outer_array_map"); + if (test__start_subtest("sleepable_acc_map_in_array")) + test_map_in_map_access("sleepable_access_map_in_array", "outer_array_map"); + if (test__start_subtest("acc_map_in_htab")) + test_map_in_map_access("access_map_in_htab", "outer_htab_map"); + if (test__start_subtest("sleepable_acc_map_in_htab")) + test_map_in_map_access("sleepable_access_map_in_htab", "outer_htab_map"); +} + diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c new file mode 100644 index 0000000000..3405923fe4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2024. Huawei Technologies Co., Ltd */ +#include "test_progs.h" +#include "read_vsyscall.skel.h" + +#if defined(__x86_64__) +/* For VSYSCALL_ADDR */ +#include <asm/vsyscall.h> +#else +/* To prevent build failure on non-x86 arch */ +#define VSYSCALL_ADDR 0UL +#endif + +struct read_ret_desc { + const char *name; + int ret; +} all_read[] = { + { .name = "probe_read_kernel", .ret = -ERANGE }, + { .name = "probe_read_kernel_str", .ret = -ERANGE }, + { .name = "probe_read", .ret = -ERANGE }, + { .name = "probe_read_str", .ret = -ERANGE }, + { .name = "probe_read_user", .ret = -EFAULT }, + { .name = "probe_read_user_str", .ret = -EFAULT }, + { .name = "copy_from_user", .ret = -EFAULT }, + { .name = "copy_from_user_task", .ret = -EFAULT }, +}; + +void test_read_vsyscall(void) +{ + struct read_vsyscall *skel; + unsigned int i; + int err; + +#if !defined(__x86_64__) + test__skip(); + return; +#endif + skel = read_vsyscall__open_and_load(); + if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load")) + return; + + skel->bss->target_pid = getpid(); + err = read_vsyscall__attach(skel); + if (!ASSERT_EQ(err, 0, "read_vsyscall attach")) + goto out; + + /* userspace may don't have vsyscall page due to LEGACY_VSYSCALL_NONE, + * but it doesn't affect the returned error codes. + */ + skel->bss->user_ptr = (void *)VSYSCALL_ADDR; + usleep(1); + + for (i = 0; i < ARRAY_SIZE(all_read); i++) + ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret, all_read[i].name); +out: + read_vsyscall__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c new file mode 100644 index 0000000000..8100509e56 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Red Hat, Inc. */ +#include <test_progs.h> +#include "fentry_recursive.skel.h" +#include "fentry_recursive_target.skel.h" +#include <bpf/btf.h> +#include "bpf/libbpf_internal.h" + +/* Test recursive attachment of tracing progs with more than one nesting level + * is not possible. Create a chain of attachment, verify that the last prog + * will fail. Depending on the arguments, following cases are tested: + * + * - Recursive loading of tracing progs, without attaching (attach = false, + * detach = false). The chain looks like this: + * load target + * load fentry1 -> target + * load fentry2 -> fentry1 (fail) + * + * - Recursive attach of tracing progs (attach = true, detach = false). The + * chain looks like this: + * load target + * load fentry1 -> target + * attach fentry1 -> target + * load fentry2 -> fentry1 (fail) + * + * - Recursive attach and detach of tracing progs (attach = true, detach = + * true). This validates that attach_tracing_prog flag will be set throughout + * the whole lifecycle of an fentry prog, independently from whether it's + * detached. The chain looks like this: + * load target + * load fentry1 -> target + * attach fentry1 -> target + * detach fentry1 + * load fentry2 -> fentry1 (fail) + */ +static void test_recursive_fentry_chain(bool attach, bool detach) +{ + struct fentry_recursive_target *target_skel = NULL; + struct fentry_recursive *tracing_chain[2] = {}; + struct bpf_program *prog; + int prev_fd, err; + + target_skel = fentry_recursive_target__open_and_load(); + if (!ASSERT_OK_PTR(target_skel, "fentry_recursive_target__open_and_load")) + return; + + /* Create an attachment chain with two fentry progs */ + for (int i = 0; i < 2; i++) { + tracing_chain[i] = fentry_recursive__open(); + if (!ASSERT_OK_PTR(tracing_chain[i], "fentry_recursive__open")) + goto close_prog; + + /* The first prog in the chain is going to be attached to the target + * fentry program, the second one to the previous in the chain. + */ + prog = tracing_chain[i]->progs.recursive_attach; + if (i == 0) { + prev_fd = bpf_program__fd(target_skel->progs.test1); + err = bpf_program__set_attach_target(prog, prev_fd, "test1"); + } else { + prev_fd = bpf_program__fd(tracing_chain[i-1]->progs.recursive_attach); + err = bpf_program__set_attach_target(prog, prev_fd, "recursive_attach"); + } + + if (!ASSERT_OK(err, "bpf_program__set_attach_target")) + goto close_prog; + + err = fentry_recursive__load(tracing_chain[i]); + /* The first attach should succeed, the second fail */ + if (i == 0) { + if (!ASSERT_OK(err, "fentry_recursive__load")) + goto close_prog; + + if (attach) { + err = fentry_recursive__attach(tracing_chain[i]); + if (!ASSERT_OK(err, "fentry_recursive__attach")) + goto close_prog; + } + + if (detach) { + /* Flag attach_tracing_prog should still be set, preventing + * attachment of the following prog. + */ + fentry_recursive__detach(tracing_chain[i]); + } + } else { + if (!ASSERT_ERR(err, "fentry_recursive__load")) + goto close_prog; + } + } + +close_prog: + fentry_recursive_target__destroy(target_skel); + for (int i = 0; i < 2; i++) { + fentry_recursive__destroy(tracing_chain[i]); + } +} + +void test_recursive_fentry(void) +{ + if (test__start_subtest("attach")) + test_recursive_fentry_chain(true, false); + if (test__start_subtest("load")) + test_recursive_fentry_chain(false, false); + if (test__start_subtest("detach")) + test_recursive_fentry_chain(true, true); +} + +/* Test that a tracing prog reattachment (when we land in + * "prog->aux->dst_trampoline and tgt_prog is NULL" branch in + * bpf_tracing_prog_attach) does not lead to a crash due to missing attach_btf + */ +void test_fentry_attach_btf_presence(void) +{ + struct fentry_recursive_target *target_skel = NULL; + struct fentry_recursive *tracing_skel = NULL; + struct bpf_program *prog; + int err, link_fd, tgt_prog_fd; + + target_skel = fentry_recursive_target__open_and_load(); + if (!ASSERT_OK_PTR(target_skel, "fentry_recursive_target__open_and_load")) + goto close_prog; + + tracing_skel = fentry_recursive__open(); + if (!ASSERT_OK_PTR(tracing_skel, "fentry_recursive__open")) + goto close_prog; + + prog = tracing_skel->progs.recursive_attach; + tgt_prog_fd = bpf_program__fd(target_skel->progs.fentry_target); + err = bpf_program__set_attach_target(prog, tgt_prog_fd, "fentry_target"); + if (!ASSERT_OK(err, "bpf_program__set_attach_target")) + goto close_prog; + + err = fentry_recursive__load(tracing_skel); + if (!ASSERT_OK(err, "fentry_recursive__load")) + goto close_prog; + + tgt_prog_fd = bpf_program__fd(tracing_skel->progs.recursive_attach); + link_fd = bpf_link_create(tgt_prog_fd, 0, BPF_TRACE_FENTRY, NULL); + if (!ASSERT_GE(link_fd, 0, "link_fd")) + goto close_prog; + + fentry_recursive__detach(tracing_skel); + + err = fentry_recursive__attach(tracing_skel); + ASSERT_ERR(err, "fentry_recursive__attach"); + +close_prog: + fentry_recursive_target__destroy(target_skel); + fentry_recursive__destroy(tracing_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c new file mode 100644 index 0000000000..820d0bcfc4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -0,0 +1,2131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#define _GNU_SOURCE +#include <limits.h> +#include <test_progs.h> +#include <linux/filter.h> +#include <linux/bpf.h> + +/* ================================= + * SHORT AND CONSISTENT NUMBER TYPES + * ================================= + */ +#define U64_MAX ((u64)UINT64_MAX) +#define U32_MAX ((u32)UINT_MAX) +#define U16_MAX ((u32)UINT_MAX) +#define S64_MIN ((s64)INT64_MIN) +#define S64_MAX ((s64)INT64_MAX) +#define S32_MIN ((s32)INT_MIN) +#define S32_MAX ((s32)INT_MAX) +#define S16_MIN ((s16)0x80000000) +#define S16_MAX ((s16)0x7fffffff) + +typedef unsigned long long ___u64; +typedef unsigned int ___u32; +typedef long long ___s64; +typedef int ___s32; + +/* avoid conflicts with already defined types in kernel headers */ +#define u64 ___u64 +#define u32 ___u32 +#define s64 ___s64 +#define s32 ___s32 + +/* ================================== + * STRING BUF ABSTRACTION AND HELPERS + * ================================== + */ +struct strbuf { + size_t buf_sz; + int pos; + char buf[0]; +}; + +#define DEFINE_STRBUF(name, N) \ + struct { struct strbuf buf; char data[(N)]; } ___##name; \ + struct strbuf *name = (___##name.buf.buf_sz = (N), ___##name.buf.pos = 0, &___##name.buf) + +__printf(2, 3) +static inline void snappendf(struct strbuf *s, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + s->pos += vsnprintf(s->buf + s->pos, + s->pos < s->buf_sz ? s->buf_sz - s->pos : 0, + fmt, args); + va_end(args); +} + +/* ================================== + * GENERIC NUMBER TYPE AND OPERATIONS + * ================================== + */ +enum num_t { U64, first_t = U64, U32, S64, S32, last_t = S32 }; + +static __always_inline u64 min_t(enum num_t t, u64 x, u64 y) +{ + switch (t) { + case U64: return (u64)x < (u64)y ? (u64)x : (u64)y; + case U32: return (u32)x < (u32)y ? (u32)x : (u32)y; + case S64: return (s64)x < (s64)y ? (s64)x : (s64)y; + case S32: return (s32)x < (s32)y ? (s32)x : (s32)y; + default: printf("min_t!\n"); exit(1); + } +} + +static __always_inline u64 max_t(enum num_t t, u64 x, u64 y) +{ + switch (t) { + case U64: return (u64)x > (u64)y ? (u64)x : (u64)y; + case U32: return (u32)x > (u32)y ? (u32)x : (u32)y; + case S64: return (s64)x > (s64)y ? (s64)x : (s64)y; + case S32: return (s32)x > (s32)y ? (u32)(s32)x : (u32)(s32)y; + default: printf("max_t!\n"); exit(1); + } +} + +static __always_inline u64 cast_t(enum num_t t, u64 x) +{ + switch (t) { + case U64: return (u64)x; + case U32: return (u32)x; + case S64: return (s64)x; + case S32: return (u32)(s32)x; + default: printf("cast_t!\n"); exit(1); + } +} + +static const char *t_str(enum num_t t) +{ + switch (t) { + case U64: return "u64"; + case U32: return "u32"; + case S64: return "s64"; + case S32: return "s32"; + default: printf("t_str!\n"); exit(1); + } +} + +static enum num_t t_is_32(enum num_t t) +{ + switch (t) { + case U64: return false; + case U32: return true; + case S64: return false; + case S32: return true; + default: printf("t_is_32!\n"); exit(1); + } +} + +static enum num_t t_signed(enum num_t t) +{ + switch (t) { + case U64: return S64; + case U32: return S32; + case S64: return S64; + case S32: return S32; + default: printf("t_signed!\n"); exit(1); + } +} + +static enum num_t t_unsigned(enum num_t t) +{ + switch (t) { + case U64: return U64; + case U32: return U32; + case S64: return U64; + case S32: return U32; + default: printf("t_unsigned!\n"); exit(1); + } +} + +#define UNUM_MAX_DECIMAL U16_MAX +#define SNUM_MAX_DECIMAL S16_MAX +#define SNUM_MIN_DECIMAL S16_MIN + +static bool num_is_small(enum num_t t, u64 x) +{ + switch (t) { + case U64: return (u64)x <= UNUM_MAX_DECIMAL; + case U32: return (u32)x <= UNUM_MAX_DECIMAL; + case S64: return (s64)x >= SNUM_MIN_DECIMAL && (s64)x <= SNUM_MAX_DECIMAL; + case S32: return (s32)x >= SNUM_MIN_DECIMAL && (s32)x <= SNUM_MAX_DECIMAL; + default: printf("num_is_small!\n"); exit(1); + } +} + +static void snprintf_num(enum num_t t, struct strbuf *sb, u64 x) +{ + bool is_small = num_is_small(t, x); + + if (is_small) { + switch (t) { + case U64: return snappendf(sb, "%llu", (u64)x); + case U32: return snappendf(sb, "%u", (u32)x); + case S64: return snappendf(sb, "%lld", (s64)x); + case S32: return snappendf(sb, "%d", (s32)x); + default: printf("snprintf_num!\n"); exit(1); + } + } else { + switch (t) { + case U64: + if (x == U64_MAX) + return snappendf(sb, "U64_MAX"); + else if (x >= U64_MAX - 256) + return snappendf(sb, "U64_MAX-%llu", U64_MAX - x); + else + return snappendf(sb, "%#llx", (u64)x); + case U32: + if ((u32)x == U32_MAX) + return snappendf(sb, "U32_MAX"); + else if ((u32)x >= U32_MAX - 256) + return snappendf(sb, "U32_MAX-%u", U32_MAX - (u32)x); + else + return snappendf(sb, "%#x", (u32)x); + case S64: + if ((s64)x == S64_MAX) + return snappendf(sb, "S64_MAX"); + else if ((s64)x >= S64_MAX - 256) + return snappendf(sb, "S64_MAX-%lld", S64_MAX - (s64)x); + else if ((s64)x == S64_MIN) + return snappendf(sb, "S64_MIN"); + else if ((s64)x <= S64_MIN + 256) + return snappendf(sb, "S64_MIN+%lld", (s64)x - S64_MIN); + else + return snappendf(sb, "%#llx", (s64)x); + case S32: + if ((s32)x == S32_MAX) + return snappendf(sb, "S32_MAX"); + else if ((s32)x >= S32_MAX - 256) + return snappendf(sb, "S32_MAX-%d", S32_MAX - (s32)x); + else if ((s32)x == S32_MIN) + return snappendf(sb, "S32_MIN"); + else if ((s32)x <= S32_MIN + 256) + return snappendf(sb, "S32_MIN+%d", (s32)x - S32_MIN); + else + return snappendf(sb, "%#x", (s32)x); + default: printf("snprintf_num!\n"); exit(1); + } + } +} + +/* =================================== + * GENERIC RANGE STRUCT AND OPERATIONS + * =================================== + */ +struct range { + u64 a, b; +}; + +static void snprintf_range(enum num_t t, struct strbuf *sb, struct range x) +{ + if (x.a == x.b) + return snprintf_num(t, sb, x.a); + + snappendf(sb, "["); + snprintf_num(t, sb, x.a); + snappendf(sb, "; "); + snprintf_num(t, sb, x.b); + snappendf(sb, "]"); +} + +static void print_range(enum num_t t, struct range x, const char *sfx) +{ + DEFINE_STRBUF(sb, 128); + + snprintf_range(t, sb, x); + printf("%s%s", sb->buf, sfx); +} + +static const struct range unkn[] = { + [U64] = { 0, U64_MAX }, + [U32] = { 0, U32_MAX }, + [S64] = { (u64)S64_MIN, (u64)S64_MAX }, + [S32] = { (u64)(u32)S32_MIN, (u64)(u32)S32_MAX }, +}; + +static struct range unkn_subreg(enum num_t t) +{ + switch (t) { + case U64: return unkn[U32]; + case U32: return unkn[U32]; + case S64: return unkn[U32]; + case S32: return unkn[S32]; + default: printf("unkn_subreg!\n"); exit(1); + } +} + +static struct range range(enum num_t t, u64 a, u64 b) +{ + switch (t) { + case U64: return (struct range){ (u64)a, (u64)b }; + case U32: return (struct range){ (u32)a, (u32)b }; + case S64: return (struct range){ (s64)a, (s64)b }; + case S32: return (struct range){ (u32)(s32)a, (u32)(s32)b }; + default: printf("range!\n"); exit(1); + } +} + +static __always_inline u32 sign64(u64 x) { return (x >> 63) & 1; } +static __always_inline u32 sign32(u64 x) { return ((u32)x >> 31) & 1; } +static __always_inline u32 upper32(u64 x) { return (u32)(x >> 32); } +static __always_inline u64 swap_low32(u64 x, u32 y) { return (x & 0xffffffff00000000ULL) | y; } + +static bool range_eq(struct range x, struct range y) +{ + return x.a == y.a && x.b == y.b; +} + +static struct range range_cast_to_s32(struct range x) +{ + u64 a = x.a, b = x.b; + + /* if upper 32 bits are constant, lower 32 bits should form a proper + * s32 range to be correct + */ + if (upper32(a) == upper32(b) && (s32)a <= (s32)b) + return range(S32, a, b); + + /* Special case where upper bits form a small sequence of two + * sequential numbers (in 32-bit unsigned space, so 0xffffffff to + * 0x00000000 is also valid), while lower bits form a proper s32 range + * going from negative numbers to positive numbers. + * + * E.g.: [0xfffffff0ffffff00; 0xfffffff100000010]. Iterating + * over full 64-bit numbers range will form a proper [-16, 16] + * ([0xffffff00; 0x00000010]) range in its lower 32 bits. + */ + if (upper32(a) + 1 == upper32(b) && (s32)a < 0 && (s32)b >= 0) + return range(S32, a, b); + + /* otherwise we can't derive much meaningful information */ + return unkn[S32]; +} + +static struct range range_cast_u64(enum num_t to_t, struct range x) +{ + u64 a = (u64)x.a, b = (u64)x.b; + + switch (to_t) { + case U64: + return x; + case U32: + if (upper32(a) != upper32(b)) + return unkn[U32]; + return range(U32, a, b); + case S64: + if (sign64(a) != sign64(b)) + return unkn[S64]; + return range(S64, a, b); + case S32: + return range_cast_to_s32(x); + default: printf("range_cast_u64!\n"); exit(1); + } +} + +static struct range range_cast_s64(enum num_t to_t, struct range x) +{ + s64 a = (s64)x.a, b = (s64)x.b; + + switch (to_t) { + case U64: + /* equivalent to (s64)a <= (s64)b check */ + if (sign64(a) != sign64(b)) + return unkn[U64]; + return range(U64, a, b); + case U32: + if (upper32(a) != upper32(b) || sign32(a) != sign32(b)) + return unkn[U32]; + return range(U32, a, b); + case S64: + return x; + case S32: + return range_cast_to_s32(x); + default: printf("range_cast_s64!\n"); exit(1); + } +} + +static struct range range_cast_u32(enum num_t to_t, struct range x) +{ + u32 a = (u32)x.a, b = (u32)x.b; + + switch (to_t) { + case U64: + case S64: + /* u32 is always a valid zero-extended u64/s64 */ + return range(to_t, a, b); + case U32: + return x; + case S32: + return range_cast_to_s32(range(U32, a, b)); + default: printf("range_cast_u32!\n"); exit(1); + } +} + +static struct range range_cast_s32(enum num_t to_t, struct range x) +{ + s32 a = (s32)x.a, b = (s32)x.b; + + switch (to_t) { + case U64: + case U32: + case S64: + if (sign32(a) != sign32(b)) + return unkn[to_t]; + return range(to_t, a, b); + case S32: + return x; + default: printf("range_cast_s32!\n"); exit(1); + } +} + +/* Reinterpret range in *from_t* domain as a range in *to_t* domain preserving + * all possible information. Worst case, it will be unknown range within + * *to_t* domain, if nothing more specific can be guaranteed during the + * conversion + */ +static struct range range_cast(enum num_t from_t, enum num_t to_t, struct range from) +{ + switch (from_t) { + case U64: return range_cast_u64(to_t, from); + case U32: return range_cast_u32(to_t, from); + case S64: return range_cast_s64(to_t, from); + case S32: return range_cast_s32(to_t, from); + default: printf("range_cast!\n"); exit(1); + } +} + +static bool is_valid_num(enum num_t t, u64 x) +{ + switch (t) { + case U64: return true; + case U32: return upper32(x) == 0; + case S64: return true; + case S32: return upper32(x) == 0; + default: printf("is_valid_num!\n"); exit(1); + } +} + +static bool is_valid_range(enum num_t t, struct range x) +{ + if (!is_valid_num(t, x.a) || !is_valid_num(t, x.b)) + return false; + + switch (t) { + case U64: return (u64)x.a <= (u64)x.b; + case U32: return (u32)x.a <= (u32)x.b; + case S64: return (s64)x.a <= (s64)x.b; + case S32: return (s32)x.a <= (s32)x.b; + default: printf("is_valid_range!\n"); exit(1); + } +} + +static struct range range_improve(enum num_t t, struct range old, struct range new) +{ + return range(t, max_t(t, old.a, new.a), min_t(t, old.b, new.b)); +} + +static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y) +{ + struct range y_cast; + + y_cast = range_cast(y_t, x_t, y); + + /* the case when new range knowledge, *y*, is a 32-bit subregister + * range, while previous range knowledge, *x*, is a full register + * 64-bit range, needs special treatment to take into account upper 32 + * bits of full register range + */ + if (t_is_32(y_t) && !t_is_32(x_t)) { + struct range x_swap; + + /* some combinations of upper 32 bits and sign bit can lead to + * invalid ranges, in such cases it's easier to detect them + * after cast/swap than try to enumerate all the conditions + * under which transformation and knowledge transfer is valid + */ + x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b)); + if (!is_valid_range(x_t, x_swap)) + return x; + return range_improve(x_t, x, x_swap); + } + + /* otherwise, plain range cast and intersection works */ + return range_improve(x_t, x, y_cast); +} + +/* ======================= + * GENERIC CONDITIONAL OPS + * ======================= + */ +enum op { OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE, first_op = OP_LT, last_op = OP_NE }; + +static enum op complement_op(enum op op) +{ + switch (op) { + case OP_LT: return OP_GE; + case OP_LE: return OP_GT; + case OP_GT: return OP_LE; + case OP_GE: return OP_LT; + case OP_EQ: return OP_NE; + case OP_NE: return OP_EQ; + default: printf("complement_op!\n"); exit(1); + } +} + +static const char *op_str(enum op op) +{ + switch (op) { + case OP_LT: return "<"; + case OP_LE: return "<="; + case OP_GT: return ">"; + case OP_GE: return ">="; + case OP_EQ: return "=="; + case OP_NE: return "!="; + default: printf("op_str!\n"); exit(1); + } +} + +/* Can register with range [x.a, x.b] *EVER* satisfy + * OP (<, <=, >, >=, ==, !=) relation to + * a regsiter with range [y.a, y.b] + * _in *num_t* domain_ + */ +static bool range_canbe_op(enum num_t t, struct range x, struct range y, enum op op) +{ +#define range_canbe(T) do { \ + switch (op) { \ + case OP_LT: return (T)x.a < (T)y.b; \ + case OP_LE: return (T)x.a <= (T)y.b; \ + case OP_GT: return (T)x.b > (T)y.a; \ + case OP_GE: return (T)x.b >= (T)y.a; \ + case OP_EQ: return (T)max_t(t, x.a, y.a) <= (T)min_t(t, x.b, y.b); \ + case OP_NE: return !((T)x.a == (T)x.b && (T)y.a == (T)y.b && (T)x.a == (T)y.a); \ + default: printf("range_canbe op %d\n", op); exit(1); \ + } \ +} while (0) + + switch (t) { + case U64: { range_canbe(u64); } + case U32: { range_canbe(u32); } + case S64: { range_canbe(s64); } + case S32: { range_canbe(s32); } + default: printf("range_canbe!\n"); exit(1); + } +#undef range_canbe +} + +/* Does register with range [x.a, x.b] *ALWAYS* satisfy + * OP (<, <=, >, >=, ==, !=) relation to + * a regsiter with range [y.a, y.b] + * _in *num_t* domain_ + */ +static bool range_always_op(enum num_t t, struct range x, struct range y, enum op op) +{ + /* always op <=> ! canbe complement(op) */ + return !range_canbe_op(t, x, y, complement_op(op)); +} + +/* Does register with range [x.a, x.b] *NEVER* satisfy + * OP (<, <=, >, >=, ==, !=) relation to + * a regsiter with range [y.a, y.b] + * _in *num_t* domain_ + */ +static bool range_never_op(enum num_t t, struct range x, struct range y, enum op op) +{ + return !range_canbe_op(t, x, y, op); +} + +/* similar to verifier's is_branch_taken(): + * 1 - always taken; + * 0 - never taken, + * -1 - unsure. + */ +static int range_branch_taken_op(enum num_t t, struct range x, struct range y, enum op op) +{ + if (range_always_op(t, x, y, op)) + return 1; + if (range_never_op(t, x, y, op)) + return 0; + return -1; +} + +/* What would be the new estimates for register x and y ranges assuming truthful + * OP comparison between them. I.e., (x OP y == true) => x <- newx, y <- newy. + * + * We assume "interesting" cases where ranges overlap. Cases where it's + * obvious that (x OP y) is either always true or false should be filtered with + * range_never and range_always checks. + */ +static void range_cond(enum num_t t, struct range x, struct range y, + enum op op, struct range *newx, struct range *newy) +{ + if (!range_canbe_op(t, x, y, op)) { + /* nothing to adjust, can't happen, return original values */ + *newx = x; + *newy = y; + return; + } + switch (op) { + case OP_LT: + *newx = range(t, x.a, min_t(t, x.b, y.b - 1)); + *newy = range(t, max_t(t, x.a + 1, y.a), y.b); + break; + case OP_LE: + *newx = range(t, x.a, min_t(t, x.b, y.b)); + *newy = range(t, max_t(t, x.a, y.a), y.b); + break; + case OP_GT: + *newx = range(t, max_t(t, x.a, y.a + 1), x.b); + *newy = range(t, y.a, min_t(t, x.b - 1, y.b)); + break; + case OP_GE: + *newx = range(t, max_t(t, x.a, y.a), x.b); + *newy = range(t, y.a, min_t(t, x.b, y.b)); + break; + case OP_EQ: + *newx = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b)); + *newy = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b)); + break; + case OP_NE: + /* below logic is supported by the verifier now */ + if (x.a == x.b && x.a == y.a) { + /* X is a constant matching left side of Y */ + *newx = range(t, x.a, x.b); + *newy = range(t, y.a + 1, y.b); + } else if (x.a == x.b && x.b == y.b) { + /* X is a constant matching rigth side of Y */ + *newx = range(t, x.a, x.b); + *newy = range(t, y.a, y.b - 1); + } else if (y.a == y.b && x.a == y.a) { + /* Y is a constant matching left side of X */ + *newx = range(t, x.a + 1, x.b); + *newy = range(t, y.a, y.b); + } else if (y.a == y.b && x.b == y.b) { + /* Y is a constant matching rigth side of X */ + *newx = range(t, x.a, x.b - 1); + *newy = range(t, y.a, y.b); + } else { + /* generic case, can't derive more information */ + *newx = range(t, x.a, x.b); + *newy = range(t, y.a, y.b); + } + + break; + default: + break; + } +} + +/* ======================= + * REGISTER STATE HANDLING + * ======================= + */ +struct reg_state { + struct range r[4]; /* indexed by enum num_t: U64, U32, S64, S32 */ + bool valid; +}; + +static void print_reg_state(struct reg_state *r, const char *sfx) +{ + DEFINE_STRBUF(sb, 512); + enum num_t t; + int cnt = 0; + + if (!r->valid) { + printf("<not found>%s", sfx); + return; + } + + snappendf(sb, "scalar("); + for (t = first_t; t <= last_t; t++) { + snappendf(sb, "%s%s=", cnt++ ? "," : "", t_str(t)); + snprintf_range(t, sb, r->r[t]); + } + snappendf(sb, ")"); + + printf("%s%s", sb->buf, sfx); +} + +static void print_refinement(enum num_t s_t, struct range src, + enum num_t d_t, struct range old, struct range new, + const char *ctx) +{ + printf("REFINING (%s) (%s)SRC=", ctx, t_str(s_t)); + print_range(s_t, src, ""); + printf(" (%s)DST_OLD=", t_str(d_t)); + print_range(d_t, old, ""); + printf(" (%s)DST_NEW=", t_str(d_t)); + print_range(d_t, new, "\n"); +} + +static void reg_state_refine(struct reg_state *r, enum num_t t, struct range x, const char *ctx) +{ + enum num_t d_t, s_t; + struct range old; + bool keep_going = false; + +again: + /* try to derive new knowledge from just learned range x of type t */ + for (d_t = first_t; d_t <= last_t; d_t++) { + old = r->r[d_t]; + r->r[d_t] = range_refine(d_t, r->r[d_t], t, x); + if (!range_eq(r->r[d_t], old)) { + keep_going = true; + if (env.verbosity >= VERBOSE_VERY) + print_refinement(t, x, d_t, old, r->r[d_t], ctx); + } + } + + /* now see if we can derive anything new from updated reg_state's ranges */ + for (s_t = first_t; s_t <= last_t; s_t++) { + for (d_t = first_t; d_t <= last_t; d_t++) { + old = r->r[d_t]; + r->r[d_t] = range_refine(d_t, r->r[d_t], s_t, r->r[s_t]); + if (!range_eq(r->r[d_t], old)) { + keep_going = true; + if (env.verbosity >= VERBOSE_VERY) + print_refinement(s_t, r->r[s_t], d_t, old, r->r[d_t], ctx); + } + } + } + + /* keep refining until we converge */ + if (keep_going) { + keep_going = false; + goto again; + } +} + +static void reg_state_set_const(struct reg_state *rs, enum num_t t, u64 val) +{ + enum num_t tt; + + rs->valid = true; + for (tt = first_t; tt <= last_t; tt++) + rs->r[tt] = tt == t ? range(t, val, val) : unkn[tt]; + + reg_state_refine(rs, t, rs->r[t], "CONST"); +} + +static void reg_state_cond(enum num_t t, struct reg_state *x, struct reg_state *y, enum op op, + struct reg_state *newx, struct reg_state *newy, const char *ctx) +{ + char buf[32]; + enum num_t ts[2]; + struct reg_state xx = *x, yy = *y; + int i, t_cnt; + struct range z1, z2; + + if (op == OP_EQ || op == OP_NE) { + /* OP_EQ and OP_NE are sign-agnostic, so we need to process + * both signed and unsigned domains at the same time + */ + ts[0] = t_unsigned(t); + ts[1] = t_signed(t); + t_cnt = 2; + } else { + ts[0] = t; + t_cnt = 1; + } + + for (i = 0; i < t_cnt; i++) { + t = ts[i]; + z1 = x->r[t]; + z2 = y->r[t]; + + range_cond(t, z1, z2, op, &z1, &z2); + + if (newx) { + snprintf(buf, sizeof(buf), "%s R1", ctx); + reg_state_refine(&xx, t, z1, buf); + } + if (newy) { + snprintf(buf, sizeof(buf), "%s R2", ctx); + reg_state_refine(&yy, t, z2, buf); + } + } + + if (newx) + *newx = xx; + if (newy) + *newy = yy; +} + +static int reg_state_branch_taken_op(enum num_t t, struct reg_state *x, struct reg_state *y, + enum op op) +{ + if (op == OP_EQ || op == OP_NE) { + /* OP_EQ and OP_NE are sign-agnostic */ + enum num_t tu = t_unsigned(t); + enum num_t ts = t_signed(t); + int br_u, br_s, br; + + br_u = range_branch_taken_op(tu, x->r[tu], y->r[tu], op); + br_s = range_branch_taken_op(ts, x->r[ts], y->r[ts], op); + + if (br_u >= 0 && br_s >= 0 && br_u != br_s) + ASSERT_FALSE(true, "branch taken inconsistency!\n"); + + /* if 64-bit ranges are indecisive, use 32-bit subranges to + * eliminate always/never taken branches, if possible + */ + if (br_u == -1 && (t == U64 || t == S64)) { + br = range_branch_taken_op(U32, x->r[U32], y->r[U32], op); + /* we can only reject for OP_EQ, never take branch + * based on lower 32 bits + */ + if (op == OP_EQ && br == 0) + return 0; + /* for OP_NEQ we can be conclusive only if lower 32 bits + * differ and thus inequality branch is always taken + */ + if (op == OP_NE && br == 1) + return 1; + + br = range_branch_taken_op(S32, x->r[S32], y->r[S32], op); + if (op == OP_EQ && br == 0) + return 0; + if (op == OP_NE && br == 1) + return 1; + } + + return br_u >= 0 ? br_u : br_s; + } + return range_branch_taken_op(t, x->r[t], y->r[t], op); +} + +/* ===================================== + * BPF PROGS GENERATION AND VERIFICATION + * ===================================== + */ +struct case_spec { + /* whether to init full register (r1) or sub-register (w1) */ + bool init_subregs; + /* whether to establish initial value range on full register (r1) or + * sub-register (w1) + */ + bool setup_subregs; + /* whether to establish initial value range using signed or unsigned + * comparisons (i.e., initialize umin/umax or smin/smax directly) + */ + bool setup_signed; + /* whether to perform comparison on full registers or sub-registers */ + bool compare_subregs; + /* whether to perform comparison using signed or unsigned operations */ + bool compare_signed; +}; + +/* Generate test BPF program based on provided test ranges, operation, and + * specifications about register bitness and signedness. + */ +static int load_range_cmp_prog(struct range x, struct range y, enum op op, + int branch_taken, struct case_spec spec, + char *log_buf, size_t log_sz, + int *false_pos, int *true_pos) +{ +#define emit(insn) ({ \ + struct bpf_insn __insns[] = { insn }; \ + int __i; \ + for (__i = 0; __i < ARRAY_SIZE(__insns); __i++) \ + insns[cur_pos + __i] = __insns[__i]; \ + cur_pos += __i; \ +}) +#define JMP_TO(target) (target - cur_pos - 1) + int cur_pos = 0, exit_pos, fd, op_code; + struct bpf_insn insns[64]; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .log_level = 2, + .log_buf = log_buf, + .log_size = log_sz, + .prog_flags = BPF_F_TEST_REG_INVARIANTS, + ); + + /* ; skip exit block below + * goto +2; + */ + emit(BPF_JMP_A(2)); + exit_pos = cur_pos; + /* ; exit block for all the preparatory conditionals + * out: + * r0 = 0; + * exit; + */ + emit(BPF_MOV64_IMM(BPF_REG_0, 0)); + emit(BPF_EXIT_INSN()); + /* + * ; assign r6/w6 and r7/w7 unpredictable u64/u32 value + * call bpf_get_current_pid_tgid; + * r6 = r0; | w6 = w0; + * call bpf_get_current_pid_tgid; + * r7 = r0; | w7 = w0; + */ + emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid)); + if (spec.init_subregs) + emit(BPF_MOV32_REG(BPF_REG_6, BPF_REG_0)); + else + emit(BPF_MOV64_REG(BPF_REG_6, BPF_REG_0)); + emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid)); + if (spec.init_subregs) + emit(BPF_MOV32_REG(BPF_REG_7, BPF_REG_0)); + else + emit(BPF_MOV64_REG(BPF_REG_7, BPF_REG_0)); + /* ; setup initial r6/w6 possible value range ([x.a, x.b]) + * r1 = %[x.a] ll; | w1 = %[x.a]; + * r2 = %[x.b] ll; | w2 = %[x.b]; + * if r6 < r1 goto out; | if w6 < w1 goto out; + * if r6 > r2 goto out; | if w6 > w2 goto out; + */ + if (spec.setup_subregs) { + emit(BPF_MOV32_IMM(BPF_REG_1, (s32)x.a)); + emit(BPF_MOV32_IMM(BPF_REG_2, (s32)x.b)); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos))); + } else { + emit(BPF_LD_IMM64(BPF_REG_1, x.a)); + emit(BPF_LD_IMM64(BPF_REG_2, x.b)); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos))); + } + /* ; setup initial r7/w7 possible value range ([y.a, y.b]) + * r1 = %[y.a] ll; | w1 = %[y.a]; + * r2 = %[y.b] ll; | w2 = %[y.b]; + * if r7 < r1 goto out; | if w7 < w1 goto out; + * if r7 > r2 goto out; | if w7 > w2 goto out; + */ + if (spec.setup_subregs) { + emit(BPF_MOV32_IMM(BPF_REG_1, (s32)y.a)); + emit(BPF_MOV32_IMM(BPF_REG_2, (s32)y.b)); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos))); + } else { + emit(BPF_LD_IMM64(BPF_REG_1, y.a)); + emit(BPF_LD_IMM64(BPF_REG_2, y.b)); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT, + BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos))); + emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT, + BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos))); + } + /* ; range test instruction + * if r6 <op> r7 goto +3; | if w6 <op> w7 goto +3; + */ + switch (op) { + case OP_LT: op_code = spec.compare_signed ? BPF_JSLT : BPF_JLT; break; + case OP_LE: op_code = spec.compare_signed ? BPF_JSLE : BPF_JLE; break; + case OP_GT: op_code = spec.compare_signed ? BPF_JSGT : BPF_JGT; break; + case OP_GE: op_code = spec.compare_signed ? BPF_JSGE : BPF_JGE; break; + case OP_EQ: op_code = BPF_JEQ; break; + case OP_NE: op_code = BPF_JNE; break; + default: + printf("unrecognized op %d\n", op); + return -ENOTSUP; + } + /* ; BEFORE conditional, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably + * ; this is used for debugging, as verifier doesn't always print + * ; registers states as of condition jump instruction (e.g., when + * ; precision marking happens) + * r0 = r6; | w0 = w6; + * r0 = r7; | w0 = w7; + */ + if (spec.compare_subregs) { + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7)); + } else { + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); + } + if (spec.compare_subregs) + emit(BPF_JMP32_REG(op_code, BPF_REG_6, BPF_REG_7, 3)); + else + emit(BPF_JMP_REG(op_code, BPF_REG_6, BPF_REG_7, 3)); + /* ; FALSE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably + * r0 = r6; | w0 = w6; + * r0 = r7; | w0 = w7; + * exit; + */ + *false_pos = cur_pos; + if (spec.compare_subregs) { + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7)); + } else { + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); + } + if (branch_taken == 1) /* false branch is never taken */ + emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */ + else + emit(BPF_EXIT_INSN()); + /* ; TRUE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably + * r0 = r6; | w0 = w6; + * r0 = r7; | w0 = w7; + * exit; + */ + *true_pos = cur_pos; + if (spec.compare_subregs) { + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7)); + } else { + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6)); + emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7)); + } + if (branch_taken == 0) /* true branch is never taken */ + emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */ + emit(BPF_EXIT_INSN()); /* last instruction has to be exit */ + + fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "reg_bounds_test", + "GPL", insns, cur_pos, &opts); + if (fd < 0) + return fd; + + close(fd); + return 0; +#undef emit +#undef JMP_TO +} + +#define str_has_pfx(str, pfx) (strncmp(str, pfx, strlen(pfx)) == 0) + +/* Parse register state from verifier log. + * `s` should point to the start of "Rx = ..." substring in the verifier log. + */ +static int parse_reg_state(const char *s, struct reg_state *reg) +{ + /* There are two generic forms for SCALAR register: + * - known constant: R6_rwD=P%lld + * - range: R6_rwD=scalar(id=1,...), where "..." is a comma-separated + * list of optional range specifiers: + * - umin=%llu, if missing, assumed 0; + * - umax=%llu, if missing, assumed U64_MAX; + * - smin=%lld, if missing, assumed S64_MIN; + * - smax=%lld, if missing, assummed S64_MAX; + * - umin32=%d, if missing, assumed 0; + * - umax32=%d, if missing, assumed U32_MAX; + * - smin32=%d, if missing, assumed S32_MIN; + * - smax32=%d, if missing, assummed S32_MAX; + * - var_off=(%#llx; %#llx), tnum part, we don't care about it. + * + * If some of the values are equal, they will be grouped (but min/max + * are not mixed together, and similarly negative values are not + * grouped with non-negative ones). E.g.: + * + * R6_w=Pscalar(smin=smin32=0, smax=umax=umax32=1000) + * + * _rwD part is optional (and any of the letters can be missing). + * P (precision mark) is optional as well. + * + * Anything inside scalar() is optional, including id, of course. + */ + struct { + const char *pfx; + u64 *dst, def; + bool is_32, is_set; + } *f, fields[8] = { + {"smin=", ®->r[S64].a, S64_MIN}, + {"smax=", ®->r[S64].b, S64_MAX}, + {"umin=", ®->r[U64].a, 0}, + {"umax=", ®->r[U64].b, U64_MAX}, + {"smin32=", ®->r[S32].a, (u32)S32_MIN, true}, + {"smax32=", ®->r[S32].b, (u32)S32_MAX, true}, + {"umin32=", ®->r[U32].a, 0, true}, + {"umax32=", ®->r[U32].b, U32_MAX, true}, + }; + const char *p; + int i; + + p = strchr(s, '='); + if (!p) + return -EINVAL; + p++; + if (*p == 'P') + p++; + + if (!str_has_pfx(p, "scalar(")) { + long long sval; + enum num_t t; + + if (p[0] == '0' && p[1] == 'x') { + if (sscanf(p, "%llx", &sval) != 1) + return -EINVAL; + } else { + if (sscanf(p, "%lld", &sval) != 1) + return -EINVAL; + } + + reg->valid = true; + for (t = first_t; t <= last_t; t++) { + reg->r[t] = range(t, sval, sval); + } + return 0; + } + + p += sizeof("scalar"); + while (p) { + int midxs[ARRAY_SIZE(fields)], mcnt = 0; + u64 val; + + for (i = 0; i < ARRAY_SIZE(fields); i++) { + f = &fields[i]; + if (!str_has_pfx(p, f->pfx)) + continue; + midxs[mcnt++] = i; + p += strlen(f->pfx); + } + + if (mcnt) { + /* populate all matched fields */ + if (p[0] == '0' && p[1] == 'x') { + if (sscanf(p, "%llx", &val) != 1) + return -EINVAL; + } else { + if (sscanf(p, "%lld", &val) != 1) + return -EINVAL; + } + + for (i = 0; i < mcnt; i++) { + f = &fields[midxs[i]]; + f->is_set = true; + *f->dst = f->is_32 ? (u64)(u32)val : val; + } + } else if (str_has_pfx(p, "var_off")) { + /* skip "var_off=(0x0; 0x3f)" part completely */ + p = strchr(p, ')'); + if (!p) + return -EINVAL; + p++; + } + + p = strpbrk(p, ",)"); + if (*p == ')') + break; + if (p) + p++; + } + + reg->valid = true; + + for (i = 0; i < ARRAY_SIZE(fields); i++) { + f = &fields[i]; + if (!f->is_set) + *f->dst = f->def; + } + + return 0; +} + + +/* Parse all register states (TRUE/FALSE branches and DST/SRC registers) + * out of the verifier log for a corresponding test case BPF program. + */ +static int parse_range_cmp_log(const char *log_buf, struct case_spec spec, + int false_pos, int true_pos, + struct reg_state *false1_reg, struct reg_state *false2_reg, + struct reg_state *true1_reg, struct reg_state *true2_reg) +{ + struct { + int insn_idx; + int reg_idx; + const char *reg_upper; + struct reg_state *state; + } specs[] = { + {false_pos, 6, "R6=", false1_reg}, + {false_pos + 1, 7, "R7=", false2_reg}, + {true_pos, 6, "R6=", true1_reg}, + {true_pos + 1, 7, "R7=", true2_reg}, + }; + char buf[32]; + const char *p = log_buf, *q; + int i, err; + + for (i = 0; i < 4; i++) { + sprintf(buf, "%d: (%s) %s = %s%d", specs[i].insn_idx, + spec.compare_subregs ? "bc" : "bf", + spec.compare_subregs ? "w0" : "r0", + spec.compare_subregs ? "w" : "r", specs[i].reg_idx); + + q = strstr(p, buf); + if (!q) { + *specs[i].state = (struct reg_state){.valid = false}; + continue; + } + p = strstr(q, specs[i].reg_upper); + if (!p) + return -EINVAL; + err = parse_reg_state(p, specs[i].state); + if (err) + return -EINVAL; + } + return 0; +} + +/* Validate ranges match, and print details if they don't */ +static bool assert_range_eq(enum num_t t, struct range x, struct range y, + const char *ctx1, const char *ctx2) +{ + DEFINE_STRBUF(sb, 512); + + if (range_eq(x, y)) + return true; + + snappendf(sb, "MISMATCH %s.%s: ", ctx1, ctx2); + snprintf_range(t, sb, x); + snappendf(sb, " != "); + snprintf_range(t, sb, y); + + printf("%s\n", sb->buf); + + return false; +} + +/* Validate that register states match, and print details if they don't */ +static bool assert_reg_state_eq(struct reg_state *r, struct reg_state *e, const char *ctx) +{ + bool ok = true; + enum num_t t; + + if (r->valid != e->valid) { + printf("MISMATCH %s: actual %s != expected %s\n", ctx, + r->valid ? "<valid>" : "<invalid>", + e->valid ? "<valid>" : "<invalid>"); + return false; + } + + if (!r->valid) + return true; + + for (t = first_t; t <= last_t; t++) { + if (!assert_range_eq(t, r->r[t], e->r[t], ctx, t_str(t))) + ok = false; + } + + return ok; +} + +/* Printf verifier log, filtering out irrelevant noise */ +static void print_verifier_log(const char *buf) +{ + const char *p; + + while (buf[0]) { + p = strchrnul(buf, '\n'); + + /* filter out irrelevant precision backtracking logs */ + if (str_has_pfx(buf, "mark_precise: ")) + goto skip_line; + + printf("%.*s\n", (int)(p - buf), buf); + +skip_line: + buf = *p == '\0' ? p : p + 1; + } +} + +/* Simulate provided test case purely with our own range-based logic. + * This is done to set up expectations for verifier's branch_taken logic and + * verifier's register states in the verifier log. + */ +static void sim_case(enum num_t init_t, enum num_t cond_t, + struct range x, struct range y, enum op op, + struct reg_state *fr1, struct reg_state *fr2, + struct reg_state *tr1, struct reg_state *tr2, + int *branch_taken) +{ + const u64 A = x.a; + const u64 B = x.b; + const u64 C = y.a; + const u64 D = y.b; + struct reg_state rc; + enum op rev_op = complement_op(op); + enum num_t t; + + fr1->valid = fr2->valid = true; + tr1->valid = tr2->valid = true; + for (t = first_t; t <= last_t; t++) { + /* if we are initializing using 32-bit subregisters, + * full registers get upper 32 bits zeroed automatically + */ + struct range z = t_is_32(init_t) ? unkn_subreg(t) : unkn[t]; + + fr1->r[t] = fr2->r[t] = tr1->r[t] = tr2->r[t] = z; + } + + /* step 1: r1 >= A, r2 >= C */ + reg_state_set_const(&rc, init_t, A); + reg_state_cond(init_t, fr1, &rc, OP_GE, fr1, NULL, "r1>=A"); + reg_state_set_const(&rc, init_t, C); + reg_state_cond(init_t, fr2, &rc, OP_GE, fr2, NULL, "r2>=C"); + *tr1 = *fr1; + *tr2 = *fr2; + if (env.verbosity >= VERBOSE_VERY) { + printf("STEP1 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n"); + printf("STEP1 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n"); + } + + /* step 2: r1 <= B, r2 <= D */ + reg_state_set_const(&rc, init_t, B); + reg_state_cond(init_t, fr1, &rc, OP_LE, fr1, NULL, "r1<=B"); + reg_state_set_const(&rc, init_t, D); + reg_state_cond(init_t, fr2, &rc, OP_LE, fr2, NULL, "r2<=D"); + *tr1 = *fr1; + *tr2 = *fr2; + if (env.verbosity >= VERBOSE_VERY) { + printf("STEP2 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n"); + printf("STEP2 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n"); + } + + /* step 3: r1 <op> r2 */ + *branch_taken = reg_state_branch_taken_op(cond_t, fr1, fr2, op); + fr1->valid = fr2->valid = false; + tr1->valid = tr2->valid = false; + if (*branch_taken != 1) { /* FALSE is possible */ + fr1->valid = fr2->valid = true; + reg_state_cond(cond_t, fr1, fr2, rev_op, fr1, fr2, "FALSE"); + } + if (*branch_taken != 0) { /* TRUE is possible */ + tr1->valid = tr2->valid = true; + reg_state_cond(cond_t, tr1, tr2, op, tr1, tr2, "TRUE"); + } + if (env.verbosity >= VERBOSE_VERY) { + printf("STEP3 (%s) FALSE R1:", t_str(cond_t)); print_reg_state(fr1, "\n"); + printf("STEP3 (%s) FALSE R2:", t_str(cond_t)); print_reg_state(fr2, "\n"); + printf("STEP3 (%s) TRUE R1:", t_str(cond_t)); print_reg_state(tr1, "\n"); + printf("STEP3 (%s) TRUE R2:", t_str(cond_t)); print_reg_state(tr2, "\n"); + } +} + +/* =============================== + * HIGH-LEVEL TEST CASE VALIDATION + * =============================== + */ +static u32 upper_seeds[] = { + 0, + 1, + U32_MAX, + U32_MAX - 1, + S32_MAX, + (u32)S32_MIN, +}; + +static u32 lower_seeds[] = { + 0, + 1, + 2, (u32)-2, + 255, (u32)-255, + UINT_MAX, + UINT_MAX - 1, + INT_MAX, + (u32)INT_MIN, +}; + +struct ctx { + int val_cnt, subval_cnt, range_cnt, subrange_cnt; + u64 uvals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)]; + s64 svals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)]; + u32 usubvals[ARRAY_SIZE(lower_seeds)]; + s32 ssubvals[ARRAY_SIZE(lower_seeds)]; + struct range *uranges, *sranges; + struct range *usubranges, *ssubranges; + int max_failure_cnt, cur_failure_cnt; + int total_case_cnt, case_cnt; + int rand_case_cnt; + unsigned rand_seed; + __u64 start_ns; + char progress_ctx[64]; +}; + +static void cleanup_ctx(struct ctx *ctx) +{ + free(ctx->uranges); + free(ctx->sranges); + free(ctx->usubranges); + free(ctx->ssubranges); +} + +struct subtest_case { + enum num_t init_t; + enum num_t cond_t; + struct range x; + struct range y; + enum op op; +}; + +static void subtest_case_str(struct strbuf *sb, struct subtest_case *t, bool use_op) +{ + snappendf(sb, "(%s)", t_str(t->init_t)); + snprintf_range(t->init_t, sb, t->x); + snappendf(sb, " (%s)%s ", t_str(t->cond_t), use_op ? op_str(t->op) : "<op>"); + snprintf_range(t->init_t, sb, t->y); +} + +/* Generate and validate test case based on specific combination of setup + * register ranges (including their expected num_t domain), and conditional + * operation to perform (including num_t domain in which it has to be + * performed) + */ +static int verify_case_op(enum num_t init_t, enum num_t cond_t, + struct range x, struct range y, enum op op) +{ + char log_buf[256 * 1024]; + size_t log_sz = sizeof(log_buf); + int err, false_pos = 0, true_pos = 0, branch_taken; + struct reg_state fr1, fr2, tr1, tr2; + struct reg_state fe1, fe2, te1, te2; + bool failed = false; + struct case_spec spec = { + .init_subregs = (init_t == U32 || init_t == S32), + .setup_subregs = (init_t == U32 || init_t == S32), + .setup_signed = (init_t == S64 || init_t == S32), + .compare_subregs = (cond_t == U32 || cond_t == S32), + .compare_signed = (cond_t == S64 || cond_t == S32), + }; + + log_buf[0] = '\0'; + + sim_case(init_t, cond_t, x, y, op, &fe1, &fe2, &te1, &te2, &branch_taken); + + err = load_range_cmp_prog(x, y, op, branch_taken, spec, + log_buf, log_sz, &false_pos, &true_pos); + if (err) { + ASSERT_OK(err, "load_range_cmp_prog"); + failed = true; + } + + err = parse_range_cmp_log(log_buf, spec, false_pos, true_pos, + &fr1, &fr2, &tr1, &tr2); + if (err) { + ASSERT_OK(err, "parse_range_cmp_log"); + failed = true; + } + + if (!assert_reg_state_eq(&fr1, &fe1, "false_reg1") || + !assert_reg_state_eq(&fr2, &fe2, "false_reg2") || + !assert_reg_state_eq(&tr1, &te1, "true_reg1") || + !assert_reg_state_eq(&tr2, &te2, "true_reg2")) { + failed = true; + } + + if (failed || env.verbosity >= VERBOSE_NORMAL) { + if (failed || env.verbosity >= VERBOSE_VERY) { + printf("VERIFIER LOG:\n========================\n"); + print_verifier_log(log_buf); + printf("=====================\n"); + } + printf("ACTUAL FALSE1: "); print_reg_state(&fr1, "\n"); + printf("EXPECTED FALSE1: "); print_reg_state(&fe1, "\n"); + printf("ACTUAL FALSE2: "); print_reg_state(&fr2, "\n"); + printf("EXPECTED FALSE2: "); print_reg_state(&fe2, "\n"); + printf("ACTUAL TRUE1: "); print_reg_state(&tr1, "\n"); + printf("EXPECTED TRUE1: "); print_reg_state(&te1, "\n"); + printf("ACTUAL TRUE2: "); print_reg_state(&tr2, "\n"); + printf("EXPECTED TRUE2: "); print_reg_state(&te2, "\n"); + + return failed ? -EINVAL : 0; + } + + return 0; +} + +/* Given setup ranges and number types, go over all supported operations, + * generating individual subtest for each allowed combination + */ +static int verify_case_opt(struct ctx *ctx, enum num_t init_t, enum num_t cond_t, + struct range x, struct range y, bool is_subtest) +{ + DEFINE_STRBUF(sb, 256); + int err; + struct subtest_case sub = { + .init_t = init_t, + .cond_t = cond_t, + .x = x, + .y = y, + }; + + sb->pos = 0; /* reset position in strbuf */ + subtest_case_str(sb, &sub, false /* ignore op */); + if (is_subtest && !test__start_subtest(sb->buf)) + return 0; + + for (sub.op = first_op; sub.op <= last_op; sub.op++) { + sb->pos = 0; /* reset position in strbuf */ + subtest_case_str(sb, &sub, true /* print op */); + + if (env.verbosity >= VERBOSE_NORMAL) /* this speeds up debugging */ + printf("TEST CASE: %s\n", sb->buf); + + err = verify_case_op(init_t, cond_t, x, y, sub.op); + if (err || env.verbosity >= VERBOSE_NORMAL) + ASSERT_OK(err, sb->buf); + if (err) { + ctx->cur_failure_cnt++; + if (ctx->cur_failure_cnt > ctx->max_failure_cnt) + return err; + return 0; /* keep testing other cases */ + } + ctx->case_cnt++; + if ((ctx->case_cnt % 10000) == 0) { + double progress = (ctx->case_cnt + 0.0) / ctx->total_case_cnt; + u64 elapsed_ns = get_time_ns() - ctx->start_ns; + double remain_ns = elapsed_ns / progress * (1 - progress); + + fprintf(env.stderr, "PROGRESS (%s): %d/%d (%.2lf%%), " + "elapsed %llu mins (%.2lf hrs), " + "ETA %.0lf mins (%.2lf hrs)\n", + ctx->progress_ctx, + ctx->case_cnt, ctx->total_case_cnt, 100.0 * progress, + elapsed_ns / 1000000000 / 60, + elapsed_ns / 1000000000.0 / 3600, + remain_ns / 1000000000.0 / 60, + remain_ns / 1000000000.0 / 3600); + } + } + + return 0; +} + +static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t, + struct range x, struct range y) +{ + return verify_case_opt(ctx, init_t, cond_t, x, y, true /* is_subtest */); +} + +/* ================================ + * GENERATED CASES FROM SEED VALUES + * ================================ + */ +static int u64_cmp(const void *p1, const void *p2) +{ + u64 x1 = *(const u64 *)p1, x2 = *(const u64 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +static int u32_cmp(const void *p1, const void *p2) +{ + u32 x1 = *(const u32 *)p1, x2 = *(const u32 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +static int s64_cmp(const void *p1, const void *p2) +{ + s64 x1 = *(const s64 *)p1, x2 = *(const s64 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +static int s32_cmp(const void *p1, const void *p2) +{ + s32 x1 = *(const s32 *)p1, x2 = *(const s32 *)p2; + + return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0; +} + +/* Generate valid unique constants from seeds, both signed and unsigned */ +static void gen_vals(struct ctx *ctx) +{ + int i, j, cnt = 0; + + for (i = 0; i < ARRAY_SIZE(upper_seeds); i++) { + for (j = 0; j < ARRAY_SIZE(lower_seeds); j++) { + ctx->uvals[cnt++] = (((u64)upper_seeds[i]) << 32) | lower_seeds[j]; + } + } + + /* sort and compact uvals (i.e., it's `sort | uniq`) */ + qsort(ctx->uvals, cnt, sizeof(*ctx->uvals), u64_cmp); + for (i = 1, j = 0; i < cnt; i++) { + if (ctx->uvals[j] == ctx->uvals[i]) + continue; + j++; + ctx->uvals[j] = ctx->uvals[i]; + } + ctx->val_cnt = j + 1; + + /* we have exactly the same number of s64 values, they are just in + * a different order than u64s, so just sort them differently + */ + for (i = 0; i < ctx->val_cnt; i++) + ctx->svals[i] = ctx->uvals[i]; + qsort(ctx->svals, ctx->val_cnt, sizeof(*ctx->svals), s64_cmp); + + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + for (i = 0; i < ctx->val_cnt; i++) { + sb1->pos = sb2->pos = 0; + snprintf_num(U64, sb1, ctx->uvals[i]); + snprintf_num(S64, sb2, ctx->svals[i]); + printf("SEED #%d: u64=%-20s s64=%-20s\n", i, sb1->buf, sb2->buf); + } + } + + /* 32-bit values are generated separately */ + cnt = 0; + for (i = 0; i < ARRAY_SIZE(lower_seeds); i++) { + ctx->usubvals[cnt++] = lower_seeds[i]; + } + + /* sort and compact usubvals (i.e., it's `sort | uniq`) */ + qsort(ctx->usubvals, cnt, sizeof(*ctx->usubvals), u32_cmp); + for (i = 1, j = 0; i < cnt; i++) { + if (ctx->usubvals[j] == ctx->usubvals[i]) + continue; + j++; + ctx->usubvals[j] = ctx->usubvals[i]; + } + ctx->subval_cnt = j + 1; + + for (i = 0; i < ctx->subval_cnt; i++) + ctx->ssubvals[i] = ctx->usubvals[i]; + qsort(ctx->ssubvals, ctx->subval_cnt, sizeof(*ctx->ssubvals), s32_cmp); + + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + for (i = 0; i < ctx->subval_cnt; i++) { + sb1->pos = sb2->pos = 0; + snprintf_num(U32, sb1, ctx->usubvals[i]); + snprintf_num(S32, sb2, ctx->ssubvals[i]); + printf("SUBSEED #%d: u32=%-10s s32=%-10s\n", i, sb1->buf, sb2->buf); + } + } +} + +/* Generate valid ranges from upper/lower seeds */ +static int gen_ranges(struct ctx *ctx) +{ + int i, j, cnt = 0; + + for (i = 0; i < ctx->val_cnt; i++) { + for (j = i; j < ctx->val_cnt; j++) { + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + sb1->pos = sb2->pos = 0; + snprintf_range(U64, sb1, range(U64, ctx->uvals[i], ctx->uvals[j])); + snprintf_range(S64, sb2, range(S64, ctx->svals[i], ctx->svals[j])); + printf("RANGE #%d: u64=%-40s s64=%-40s\n", cnt, sb1->buf, sb2->buf); + } + cnt++; + } + } + ctx->range_cnt = cnt; + + ctx->uranges = calloc(ctx->range_cnt, sizeof(*ctx->uranges)); + if (!ASSERT_OK_PTR(ctx->uranges, "uranges_calloc")) + return -EINVAL; + ctx->sranges = calloc(ctx->range_cnt, sizeof(*ctx->sranges)); + if (!ASSERT_OK_PTR(ctx->sranges, "sranges_calloc")) + return -EINVAL; + + cnt = 0; + for (i = 0; i < ctx->val_cnt; i++) { + for (j = i; j < ctx->val_cnt; j++) { + ctx->uranges[cnt] = range(U64, ctx->uvals[i], ctx->uvals[j]); + ctx->sranges[cnt] = range(S64, ctx->svals[i], ctx->svals[j]); + cnt++; + } + } + + cnt = 0; + for (i = 0; i < ctx->subval_cnt; i++) { + for (j = i; j < ctx->subval_cnt; j++) { + if (env.verbosity >= VERBOSE_SUPER) { + DEFINE_STRBUF(sb1, 256); + DEFINE_STRBUF(sb2, 256); + + sb1->pos = sb2->pos = 0; + snprintf_range(U32, sb1, range(U32, ctx->usubvals[i], ctx->usubvals[j])); + snprintf_range(S32, sb2, range(S32, ctx->ssubvals[i], ctx->ssubvals[j])); + printf("SUBRANGE #%d: u32=%-20s s32=%-20s\n", cnt, sb1->buf, sb2->buf); + } + cnt++; + } + } + ctx->subrange_cnt = cnt; + + ctx->usubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->usubranges)); + if (!ASSERT_OK_PTR(ctx->usubranges, "usubranges_calloc")) + return -EINVAL; + ctx->ssubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->ssubranges)); + if (!ASSERT_OK_PTR(ctx->ssubranges, "ssubranges_calloc")) + return -EINVAL; + + cnt = 0; + for (i = 0; i < ctx->subval_cnt; i++) { + for (j = i; j < ctx->subval_cnt; j++) { + ctx->usubranges[cnt] = range(U32, ctx->usubvals[i], ctx->usubvals[j]); + ctx->ssubranges[cnt] = range(S32, ctx->ssubvals[i], ctx->ssubvals[j]); + cnt++; + } + } + + return 0; +} + +static int parse_env_vars(struct ctx *ctx) +{ + const char *s; + + if ((s = getenv("REG_BOUNDS_MAX_FAILURE_CNT"))) { + errno = 0; + ctx->max_failure_cnt = strtol(s, NULL, 10); + if (errno || ctx->max_failure_cnt < 0) { + ASSERT_OK(-errno, "REG_BOUNDS_MAX_FAILURE_CNT"); + return -EINVAL; + } + } + + if ((s = getenv("REG_BOUNDS_RAND_CASE_CNT"))) { + errno = 0; + ctx->rand_case_cnt = strtol(s, NULL, 10); + if (errno || ctx->rand_case_cnt < 0) { + ASSERT_OK(-errno, "REG_BOUNDS_RAND_CASE_CNT"); + return -EINVAL; + } + } + + if ((s = getenv("REG_BOUNDS_RAND_SEED"))) { + errno = 0; + ctx->rand_seed = strtoul(s, NULL, 10); + if (errno) { + ASSERT_OK(-errno, "REG_BOUNDS_RAND_SEED"); + return -EINVAL; + } + } + + return 0; +} + +static int prepare_gen_tests(struct ctx *ctx) +{ + const char *s; + int err; + + if (!(s = getenv("SLOW_TESTS")) || strcmp(s, "1") != 0) { + test__skip(); + return -ENOTSUP; + } + + err = parse_env_vars(ctx); + if (err) + return err; + + gen_vals(ctx); + err = gen_ranges(ctx); + if (err) { + ASSERT_OK(err, "gen_ranges"); + return err; + } + + return 0; +} + +/* Go over generated constants and ranges and validate various supported + * combinations of them + */ +static void validate_gen_range_vs_const_64(enum num_t init_t, enum num_t cond_t) +{ + struct ctx ctx; + struct range rconst; + const struct range *ranges; + const u64 *vals; + int i, j; + + memset(&ctx, 0, sizeof(ctx)); + + if (prepare_gen_tests(&ctx)) + goto cleanup; + + ranges = init_t == U64 ? ctx.uranges : ctx.sranges; + vals = init_t == U64 ? ctx.uvals : (const u64 *)ctx.svals; + + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.range_cnt * ctx.val_cnt); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "RANGE x CONST, %s -> %s", + t_str(init_t), t_str(cond_t)); + + for (i = 0; i < ctx.val_cnt; i++) { + for (j = 0; j < ctx.range_cnt; j++) { + rconst = range(init_t, vals[i], vals[i]); + + /* (u64|s64)(<range> x <const>) */ + if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst)) + goto cleanup; + /* (u64|s64)(<const> x <range>) */ + if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j])) + goto cleanup; + } + } + +cleanup: + cleanup_ctx(&ctx); +} + +static void validate_gen_range_vs_const_32(enum num_t init_t, enum num_t cond_t) +{ + struct ctx ctx; + struct range rconst; + const struct range *ranges; + const u32 *vals; + int i, j; + + memset(&ctx, 0, sizeof(ctx)); + + if (prepare_gen_tests(&ctx)) + goto cleanup; + + ranges = init_t == U32 ? ctx.usubranges : ctx.ssubranges; + vals = init_t == U32 ? ctx.usubvals : (const u32 *)ctx.ssubvals; + + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.subrange_cnt * ctx.subval_cnt); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "RANGE x CONST, %s -> %s", + t_str(init_t), t_str(cond_t)); + + for (i = 0; i < ctx.subval_cnt; i++) { + for (j = 0; j < ctx.subrange_cnt; j++) { + rconst = range(init_t, vals[i], vals[i]); + + /* (u32|s32)(<range> x <const>) */ + if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst)) + goto cleanup; + /* (u32|s32)(<const> x <range>) */ + if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j])) + goto cleanup; + } + } + +cleanup: + cleanup_ctx(&ctx); +} + +static void validate_gen_range_vs_range(enum num_t init_t, enum num_t cond_t) +{ + struct ctx ctx; + const struct range *ranges; + int i, j, rcnt; + + memset(&ctx, 0, sizeof(ctx)); + + if (prepare_gen_tests(&ctx)) + goto cleanup; + + switch (init_t) + { + case U64: + ranges = ctx.uranges; + rcnt = ctx.range_cnt; + break; + case U32: + ranges = ctx.usubranges; + rcnt = ctx.subrange_cnt; + break; + case S64: + ranges = ctx.sranges; + rcnt = ctx.range_cnt; + break; + case S32: + ranges = ctx.ssubranges; + rcnt = ctx.subrange_cnt; + break; + default: + printf("validate_gen_range_vs_range!\n"); + exit(1); + } + + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * rcnt * (rcnt + 1) / 2); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "RANGE x RANGE, %s -> %s", + t_str(init_t), t_str(cond_t)); + + for (i = 0; i < rcnt; i++) { + for (j = i; j < rcnt; j++) { + /* (<range> x <range>) */ + if (verify_case(&ctx, init_t, cond_t, ranges[i], ranges[j])) + goto cleanup; + if (verify_case(&ctx, init_t, cond_t, ranges[j], ranges[i])) + goto cleanup; + } + } + +cleanup: + cleanup_ctx(&ctx); +} + +/* Go over thousands of test cases generated from initial seed values. + * Given this take a long time, guard this begind SLOW_TESTS=1 envvar. If + * envvar is not set, this test is skipped during test_progs testing. + * + * We split this up into smaller subsets based on initialization and + * conditiona numeric domains to get an easy parallelization with test_progs' + * -j argument. + */ + +/* RANGE x CONST, U64 initial range */ +void test_reg_bounds_gen_consts_u64_u64(void) { validate_gen_range_vs_const_64(U64, U64); } +void test_reg_bounds_gen_consts_u64_s64(void) { validate_gen_range_vs_const_64(U64, S64); } +void test_reg_bounds_gen_consts_u64_u32(void) { validate_gen_range_vs_const_64(U64, U32); } +void test_reg_bounds_gen_consts_u64_s32(void) { validate_gen_range_vs_const_64(U64, S32); } +/* RANGE x CONST, S64 initial range */ +void test_reg_bounds_gen_consts_s64_u64(void) { validate_gen_range_vs_const_64(S64, U64); } +void test_reg_bounds_gen_consts_s64_s64(void) { validate_gen_range_vs_const_64(S64, S64); } +void test_reg_bounds_gen_consts_s64_u32(void) { validate_gen_range_vs_const_64(S64, U32); } +void test_reg_bounds_gen_consts_s64_s32(void) { validate_gen_range_vs_const_64(S64, S32); } +/* RANGE x CONST, U32 initial range */ +void test_reg_bounds_gen_consts_u32_u64(void) { validate_gen_range_vs_const_32(U32, U64); } +void test_reg_bounds_gen_consts_u32_s64(void) { validate_gen_range_vs_const_32(U32, S64); } +void test_reg_bounds_gen_consts_u32_u32(void) { validate_gen_range_vs_const_32(U32, U32); } +void test_reg_bounds_gen_consts_u32_s32(void) { validate_gen_range_vs_const_32(U32, S32); } +/* RANGE x CONST, S32 initial range */ +void test_reg_bounds_gen_consts_s32_u64(void) { validate_gen_range_vs_const_32(S32, U64); } +void test_reg_bounds_gen_consts_s32_s64(void) { validate_gen_range_vs_const_32(S32, S64); } +void test_reg_bounds_gen_consts_s32_u32(void) { validate_gen_range_vs_const_32(S32, U32); } +void test_reg_bounds_gen_consts_s32_s32(void) { validate_gen_range_vs_const_32(S32, S32); } + +/* RANGE x RANGE, U64 initial range */ +void test_reg_bounds_gen_ranges_u64_u64(void) { validate_gen_range_vs_range(U64, U64); } +void test_reg_bounds_gen_ranges_u64_s64(void) { validate_gen_range_vs_range(U64, S64); } +void test_reg_bounds_gen_ranges_u64_u32(void) { validate_gen_range_vs_range(U64, U32); } +void test_reg_bounds_gen_ranges_u64_s32(void) { validate_gen_range_vs_range(U64, S32); } +/* RANGE x RANGE, S64 initial range */ +void test_reg_bounds_gen_ranges_s64_u64(void) { validate_gen_range_vs_range(S64, U64); } +void test_reg_bounds_gen_ranges_s64_s64(void) { validate_gen_range_vs_range(S64, S64); } +void test_reg_bounds_gen_ranges_s64_u32(void) { validate_gen_range_vs_range(S64, U32); } +void test_reg_bounds_gen_ranges_s64_s32(void) { validate_gen_range_vs_range(S64, S32); } +/* RANGE x RANGE, U32 initial range */ +void test_reg_bounds_gen_ranges_u32_u64(void) { validate_gen_range_vs_range(U32, U64); } +void test_reg_bounds_gen_ranges_u32_s64(void) { validate_gen_range_vs_range(U32, S64); } +void test_reg_bounds_gen_ranges_u32_u32(void) { validate_gen_range_vs_range(U32, U32); } +void test_reg_bounds_gen_ranges_u32_s32(void) { validate_gen_range_vs_range(U32, S32); } +/* RANGE x RANGE, S32 initial range */ +void test_reg_bounds_gen_ranges_s32_u64(void) { validate_gen_range_vs_range(S32, U64); } +void test_reg_bounds_gen_ranges_s32_s64(void) { validate_gen_range_vs_range(S32, S64); } +void test_reg_bounds_gen_ranges_s32_u32(void) { validate_gen_range_vs_range(S32, U32); } +void test_reg_bounds_gen_ranges_s32_s32(void) { validate_gen_range_vs_range(S32, S32); } + +#define DEFAULT_RAND_CASE_CNT 100 + +#define RAND_21BIT_MASK ((1 << 22) - 1) + +static u64 rand_u64() +{ + /* RAND_MAX is guaranteed to be at least 1<<15, but in practice it + * seems to be 1<<31, so we need to call it thrice to get full u64; + * we'll use rougly equal split: 22 + 21 + 21 bits + */ + return ((u64)random() << 42) | + (((u64)random() & RAND_21BIT_MASK) << 21) | + (random() & RAND_21BIT_MASK); +} + +static u64 rand_const(enum num_t t) +{ + return cast_t(t, rand_u64()); +} + +static struct range rand_range(enum num_t t) +{ + u64 x = rand_const(t), y = rand_const(t); + + return range(t, min_t(t, x, y), max_t(t, x, y)); +} + +static void validate_rand_ranges(enum num_t init_t, enum num_t cond_t, bool const_range) +{ + struct ctx ctx; + struct range range1, range2; + int err, i; + u64 t; + + memset(&ctx, 0, sizeof(ctx)); + + err = parse_env_vars(&ctx); + if (err) { + ASSERT_OK(err, "parse_env_vars"); + return; + } + + if (ctx.rand_case_cnt == 0) + ctx.rand_case_cnt = DEFAULT_RAND_CASE_CNT; + if (ctx.rand_seed == 0) + ctx.rand_seed = (unsigned)get_time_ns(); + + srandom(ctx.rand_seed); + + ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.rand_case_cnt); + ctx.start_ns = get_time_ns(); + snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx), + "[RANDOM SEED %u] RANGE x %s, %s -> %s", + ctx.rand_seed, const_range ? "CONST" : "RANGE", + t_str(init_t), t_str(cond_t)); + + for (i = 0; i < ctx.rand_case_cnt; i++) { + range1 = rand_range(init_t); + if (const_range) { + t = rand_const(init_t); + range2 = range(init_t, t, t); + } else { + range2 = rand_range(init_t); + } + + /* <range1> x <range2> */ + if (verify_case_opt(&ctx, init_t, cond_t, range1, range2, false /* !is_subtest */)) + goto cleanup; + /* <range2> x <range1> */ + if (verify_case_opt(&ctx, init_t, cond_t, range2, range1, false /* !is_subtest */)) + goto cleanup; + } + +cleanup: + /* make sure we report random seed for reproducing */ + ASSERT_TRUE(true, ctx.progress_ctx); + cleanup_ctx(&ctx); +} + +/* [RANDOM] RANGE x CONST, U64 initial range */ +void test_reg_bounds_rand_consts_u64_u64(void) { validate_rand_ranges(U64, U64, true /* const */); } +void test_reg_bounds_rand_consts_u64_s64(void) { validate_rand_ranges(U64, S64, true /* const */); } +void test_reg_bounds_rand_consts_u64_u32(void) { validate_rand_ranges(U64, U32, true /* const */); } +void test_reg_bounds_rand_consts_u64_s32(void) { validate_rand_ranges(U64, S32, true /* const */); } +/* [RANDOM] RANGE x CONST, S64 initial range */ +void test_reg_bounds_rand_consts_s64_u64(void) { validate_rand_ranges(S64, U64, true /* const */); } +void test_reg_bounds_rand_consts_s64_s64(void) { validate_rand_ranges(S64, S64, true /* const */); } +void test_reg_bounds_rand_consts_s64_u32(void) { validate_rand_ranges(S64, U32, true /* const */); } +void test_reg_bounds_rand_consts_s64_s32(void) { validate_rand_ranges(S64, S32, true /* const */); } +/* [RANDOM] RANGE x CONST, U32 initial range */ +void test_reg_bounds_rand_consts_u32_u64(void) { validate_rand_ranges(U32, U64, true /* const */); } +void test_reg_bounds_rand_consts_u32_s64(void) { validate_rand_ranges(U32, S64, true /* const */); } +void test_reg_bounds_rand_consts_u32_u32(void) { validate_rand_ranges(U32, U32, true /* const */); } +void test_reg_bounds_rand_consts_u32_s32(void) { validate_rand_ranges(U32, S32, true /* const */); } +/* [RANDOM] RANGE x CONST, S32 initial range */ +void test_reg_bounds_rand_consts_s32_u64(void) { validate_rand_ranges(S32, U64, true /* const */); } +void test_reg_bounds_rand_consts_s32_s64(void) { validate_rand_ranges(S32, S64, true /* const */); } +void test_reg_bounds_rand_consts_s32_u32(void) { validate_rand_ranges(S32, U32, true /* const */); } +void test_reg_bounds_rand_consts_s32_s32(void) { validate_rand_ranges(S32, S32, true /* const */); } + +/* [RANDOM] RANGE x RANGE, U64 initial range */ +void test_reg_bounds_rand_ranges_u64_u64(void) { validate_rand_ranges(U64, U64, false /* range */); } +void test_reg_bounds_rand_ranges_u64_s64(void) { validate_rand_ranges(U64, S64, false /* range */); } +void test_reg_bounds_rand_ranges_u64_u32(void) { validate_rand_ranges(U64, U32, false /* range */); } +void test_reg_bounds_rand_ranges_u64_s32(void) { validate_rand_ranges(U64, S32, false /* range */); } +/* [RANDOM] RANGE x RANGE, S64 initial range */ +void test_reg_bounds_rand_ranges_s64_u64(void) { validate_rand_ranges(S64, U64, false /* range */); } +void test_reg_bounds_rand_ranges_s64_s64(void) { validate_rand_ranges(S64, S64, false /* range */); } +void test_reg_bounds_rand_ranges_s64_u32(void) { validate_rand_ranges(S64, U32, false /* range */); } +void test_reg_bounds_rand_ranges_s64_s32(void) { validate_rand_ranges(S64, S32, false /* range */); } +/* [RANDOM] RANGE x RANGE, U32 initial range */ +void test_reg_bounds_rand_ranges_u32_u64(void) { validate_rand_ranges(U32, U64, false /* range */); } +void test_reg_bounds_rand_ranges_u32_s64(void) { validate_rand_ranges(U32, S64, false /* range */); } +void test_reg_bounds_rand_ranges_u32_u32(void) { validate_rand_ranges(U32, U32, false /* range */); } +void test_reg_bounds_rand_ranges_u32_s32(void) { validate_rand_ranges(U32, S32, false /* range */); } +/* [RANDOM] RANGE x RANGE, S32 initial range */ +void test_reg_bounds_rand_ranges_s32_u64(void) { validate_rand_ranges(S32, U64, false /* range */); } +void test_reg_bounds_rand_ranges_s32_s64(void) { validate_rand_ranges(S32, S64, false /* range */); } +void test_reg_bounds_rand_ranges_s32_u32(void) { validate_rand_ranges(S32, U32, false /* range */); } +void test_reg_bounds_rand_ranges_s32_s32(void) { validate_rand_ranges(S32, S32, false /* range */); } + +/* A set of hard-coded "interesting" cases to validate as part of normal + * test_progs test runs + */ +static struct subtest_case crafted_cases[] = { + {U64, U64, {0, 0xffffffff}, {0, 0}}, + {U64, U64, {0, 0x80000000}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x100000100ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x180000000ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x1ffffff00ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x1ffffff01ULL}, {0, 0}}, + {U64, U64, {0x100000000ULL, 0x1fffffffeULL}, {0, 0}}, + {U64, U64, {0x100000001ULL, 0x1000000ffULL}, {0, 0}}, + + /* single point overlap, interesting BPF_EQ and BPF_NE interactions */ + {U64, U64, {0, 1}, {1, 0x80000000}}, + {U64, S64, {0, 1}, {1, 0x80000000}}, + {U64, U32, {0, 1}, {1, 0x80000000}}, + {U64, S32, {0, 1}, {1, 0x80000000}}, + + {U64, S64, {0, 0xffffffff00000000ULL}, {0, 0}}, + {U64, S64, {0x7fffffffffffffffULL, 0xffffffff00000000ULL}, {0, 0}}, + {U64, S64, {0x7fffffff00000001ULL, 0xffffffff00000000ULL}, {0, 0}}, + {U64, S64, {0, 0xffffffffULL}, {1, 1}}, + {U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}}, + + {U64, U32, {0, 0x100000000}, {0, 0}}, + {U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}}, + + {U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}}, + /* these are tricky cases where lower 32 bits allow to tighten 64 + * bit boundaries based on tightened lower 32 bit boundaries + */ + {U64, S32, {0, 0x0ffffffffULL}, {0, 0}}, + {U64, S32, {0, 0x100000000ULL}, {0, 0}}, + {U64, S32, {0, 0x100000001ULL}, {0, 0}}, + {U64, S32, {0, 0x180000000ULL}, {0, 0}}, + {U64, S32, {0, 0x17fffffffULL}, {0, 0}}, + {U64, S32, {0, 0x180000001ULL}, {0, 0}}, + + /* verifier knows about [-1, 0] range for s32 for this case already */ + {S64, S64, {0xffffffffffffffffULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}}, + /* but didn't know about these cases initially */ + {U64, U64, {0xffffffff, 0x100000000ULL}, {0, 0}}, /* s32: [-1, 0] */ + {U64, U64, {0xffffffff, 0x100000001ULL}, {0, 0}}, /* s32: [-1, 1] */ + + /* longer convergence case: learning from u64 -> s64 -> u64 -> u32, + * arriving at u32: [1, U32_MAX] (instead of more pessimistic [0, U32_MAX]) + */ + {S64, U64, {0xffffffff00000001ULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}}, + + {U32, U32, {1, U32_MAX}, {0, 0}}, + + {U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}}, + + {S32, U64, {(u32)S32_MIN, (u32)S32_MIN}, {(u32)(s32)-255, 0}}, + {S32, S64, {(u32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}}, + {S32, S64, {0, 1}, {(u32)S32_MIN, (u32)S32_MIN}}, + {S32, U32, {(u32)S32_MIN, (u32)S32_MIN}, {(u32)S32_MIN, (u32)S32_MIN}}, + + /* edge overlap testings for BPF_NE */ + {U64, U64, {0, U64_MAX}, {U64_MAX, U64_MAX}}, + {U64, U64, {0, U64_MAX}, {0, 0}}, + {S64, U64, {S64_MIN, 0}, {S64_MIN, S64_MIN}}, + {S64, U64, {S64_MIN, 0}, {0, 0}}, + {S64, U64, {S64_MIN, S64_MAX}, {S64_MAX, S64_MAX}}, + {U32, U32, {0, U32_MAX}, {0, 0}}, + {U32, U32, {0, U32_MAX}, {U32_MAX, U32_MAX}}, + {S32, U32, {(u32)S32_MIN, 0}, {0, 0}}, + {S32, U32, {(u32)S32_MIN, 0}, {(u32)S32_MIN, (u32)S32_MIN}}, + {S32, U32, {(u32)S32_MIN, S32_MAX}, {S32_MAX, S32_MAX}}, +}; + +/* Go over crafted hard-coded cases. This is fast, so we do it as part of + * normal test_progs run. + */ +void test_reg_bounds_crafted(void) +{ + struct ctx ctx; + int i; + + memset(&ctx, 0, sizeof(ctx)); + + for (i = 0; i < ARRAY_SIZE(crafted_cases); i++) { + struct subtest_case *c = &crafted_cases[i]; + + verify_case(&ctx, c->init_t, c->cond_t, c->x, c->y); + verify_case(&ctx, c->init_t, c->cond_t, c->y, c->x); + } + + cleanup_ctx(&ctx); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c new file mode 100644 index 0000000000..0c365f36c7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2024 Meta + +#include <test_progs.h> +#include "network_helpers.h" +#include "sock_iter_batch.skel.h" + +#define TEST_NS "sock_iter_batch_netns" + +static const int nr_soreuse = 4; + +static void do_test(int sock_type, bool onebyone) +{ + int err, i, nread, to_read, total_read, iter_fd = -1; + int first_idx, second_idx, indices[nr_soreuse]; + struct bpf_link *link = NULL; + struct sock_iter_batch *skel; + int *fds[2] = {}; + + skel = sock_iter_batch__open(); + if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open")) + return; + + /* Prepare 2 buckets of sockets in the kernel hashtable */ + for (i = 0; i < ARRAY_SIZE(fds); i++) { + int local_port; + + fds[i] = start_reuseport_server(AF_INET6, sock_type, "::1", 0, 0, + nr_soreuse); + if (!ASSERT_OK_PTR(fds[i], "start_reuseport_server")) + goto done; + local_port = get_socket_local_port(*fds[i]); + if (!ASSERT_GE(local_port, 0, "get_socket_local_port")) + goto done; + skel->rodata->ports[i] = ntohs(local_port); + } + + err = sock_iter_batch__load(skel); + if (!ASSERT_OK(err, "sock_iter_batch__load")) + goto done; + + link = bpf_program__attach_iter(sock_type == SOCK_STREAM ? + skel->progs.iter_tcp_soreuse : + skel->progs.iter_udp_soreuse, + NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter")) + goto done; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create")) + goto done; + + /* Test reading a bucket (either from fds[0] or fds[1]). + * Only read "nr_soreuse - 1" number of sockets + * from a bucket and leave one socket out from + * that bucket on purpose. + */ + to_read = (nr_soreuse - 1) * sizeof(*indices); + total_read = 0; + first_idx = -1; + do { + nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read); + if (nread <= 0 || nread % sizeof(*indices)) + break; + total_read += nread; + + if (first_idx == -1) + first_idx = indices[0]; + for (i = 0; i < nread / sizeof(*indices); i++) + ASSERT_EQ(indices[i], first_idx, "first_idx"); + } while (total_read < to_read); + ASSERT_EQ(nread, onebyone ? sizeof(*indices) : to_read, "nread"); + ASSERT_EQ(total_read, to_read, "total_read"); + + free_fds(fds[first_idx], nr_soreuse); + fds[first_idx] = NULL; + + /* Read the "whole" second bucket */ + to_read = nr_soreuse * sizeof(*indices); + total_read = 0; + second_idx = !first_idx; + do { + nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read); + if (nread <= 0 || nread % sizeof(*indices)) + break; + total_read += nread; + + for (i = 0; i < nread / sizeof(*indices); i++) + ASSERT_EQ(indices[i], second_idx, "second_idx"); + } while (total_read <= to_read); + ASSERT_EQ(nread, 0, "nread"); + /* Both so_reuseport ports should be in different buckets, so + * total_read must equal to the expected to_read. + * + * For a very unlikely case, both ports collide at the same bucket, + * the bucket offset (i.e. 3) will be skipped and it cannot + * expect the to_read number of bytes. + */ + if (skel->bss->bucket[0] != skel->bss->bucket[1]) + ASSERT_EQ(total_read, to_read, "total_read"); + +done: + for (i = 0; i < ARRAY_SIZE(fds); i++) + free_fds(fds[i], nr_soreuse); + if (iter_fd < 0) + close(iter_fd); + bpf_link__destroy(link); + sock_iter_batch__destroy(skel); +} + +void test_sock_iter_batch(void) +{ + struct nstoken *nstoken = NULL; + + SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); + SYS(done, "ip netns add %s", TEST_NS); + SYS(done, "ip -net %s link set dev lo up", TEST_NS); + + nstoken = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto done; + + if (test__start_subtest("tcp")) { + do_test(SOCK_STREAM, true); + do_test(SOCK_STREAM, false); + } + if (test__start_subtest("udp")) { + do_test(SOCK_DGRAM, true); + do_test(SOCK_DGRAM, false); + } + close_netns(nstoken); + +done: + SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 7c2241fae1..77e26ecffa 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -555,6 +555,213 @@ static void test_sockmap_unconnected_unix(void) close(dgram); } +static void test_sockmap_many_socket(void) +{ + struct test_sockmap_pass_prog *skel; + int stream[2], dgram, udp, tcp; + int i, err, map, entry = 0; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + map = bpf_map__fd(skel->maps.sock_map_rx); + + dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); + if (dgram < 0) { + test_sockmap_pass_prog__destroy(skel); + return; + } + + tcp = connected_socket_v4(); + if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) { + close(dgram); + test_sockmap_pass_prog__destroy(skel); + return; + } + + udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0); + if (udp < 0) { + close(dgram); + close(tcp); + test_sockmap_pass_prog__destroy(skel); + return; + } + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream); + ASSERT_OK(err, "socketpair(af_unix, sock_stream)"); + if (err) + goto out; + + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map, &entry, &stream[0], BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(stream)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map, &entry, &dgram, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(dgram)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map, &entry, &udp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(udp)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map, &entry, &tcp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(tcp)"); + } + for (entry--; entry >= 0; entry--) { + err = bpf_map_delete_elem(map, &entry); + ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + } + + close(stream[0]); + close(stream[1]); +out: + close(dgram); + close(tcp); + close(udp); + test_sockmap_pass_prog__destroy(skel); +} + +static void test_sockmap_many_maps(void) +{ + struct test_sockmap_pass_prog *skel; + int stream[2], dgram, udp, tcp; + int i, err, map[2], entry = 0; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + map[0] = bpf_map__fd(skel->maps.sock_map_rx); + map[1] = bpf_map__fd(skel->maps.sock_map_tx); + + dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); + if (dgram < 0) { + test_sockmap_pass_prog__destroy(skel); + return; + } + + tcp = connected_socket_v4(); + if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) { + close(dgram); + test_sockmap_pass_prog__destroy(skel); + return; + } + + udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0); + if (udp < 0) { + close(dgram); + close(tcp); + test_sockmap_pass_prog__destroy(skel); + return; + } + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream); + ASSERT_OK(err, "socketpair(af_unix, sock_stream)"); + if (err) + goto out; + + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map[i], &entry, &stream[0], BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(stream)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map[i], &entry, &dgram, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(dgram)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map[i], &entry, &udp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(udp)"); + } + for (i = 0; i < 2; i++, entry++) { + err = bpf_map_update_elem(map[i], &entry, &tcp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(tcp)"); + } + for (entry--; entry >= 0; entry--) { + err = bpf_map_delete_elem(map[1], &entry); + entry--; + ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + err = bpf_map_delete_elem(map[0], &entry); + ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + } + + close(stream[0]); + close(stream[1]); +out: + close(dgram); + close(tcp); + close(udp); + test_sockmap_pass_prog__destroy(skel); +} + +static void test_sockmap_same_sock(void) +{ + struct test_sockmap_pass_prog *skel; + int stream[2], dgram, udp, tcp; + int i, err, map, zero = 0; + + skel = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + map = bpf_map__fd(skel->maps.sock_map_rx); + + dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0); + if (dgram < 0) { + test_sockmap_pass_prog__destroy(skel); + return; + } + + tcp = connected_socket_v4(); + if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) { + close(dgram); + test_sockmap_pass_prog__destroy(skel); + return; + } + + udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0); + if (udp < 0) { + close(dgram); + close(tcp); + test_sockmap_pass_prog__destroy(skel); + return; + } + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream); + ASSERT_OK(err, "socketpair(af_unix, sock_stream)"); + if (err) + goto out; + + for (i = 0; i < 2; i++) { + err = bpf_map_update_elem(map, &zero, &stream[0], BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(stream)"); + } + for (i = 0; i < 2; i++) { + err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(dgram)"); + } + for (i = 0; i < 2; i++) { + err = bpf_map_update_elem(map, &zero, &udp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(udp)"); + } + for (i = 0; i < 2; i++) { + err = bpf_map_update_elem(map, &zero, &tcp, BPF_ANY); + ASSERT_OK(err, "bpf_map_update_elem(tcp)"); + } + + err = bpf_map_delete_elem(map, &zero); + ASSERT_OK(err, "bpf_map_delete_elem(entry)"); + + close(stream[0]); + close(stream[1]); +out: + close(dgram); + close(tcp); + close(udp); + test_sockmap_pass_prog__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -597,7 +804,12 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_fionread(false); if (test__start_subtest("sockmap skb_verdict msg_f_peek")) test_sockmap_skb_verdict_peek(); - if (test__start_subtest("sockmap unconnected af_unix")) test_sockmap_unconnected_unix(); + if (test__start_subtest("sockmap one socket to many map entries")) + test_sockmap_many_socket(); + if (test__start_subtest("sockmap one socket to many maps")) + test_sockmap_many_maps(); + if (test__start_subtest("sockmap same socket replace")) + test_sockmap_same_sock(); } diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c index f29c08d93b..18d451be57 100644 --- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -13,22 +13,22 @@ static struct { const char *err_msg; } spin_lock_fail_tests[] = { { "lock_id_kptr_preserve", - "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) " - "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" + "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2) " + "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n" "R1 type=ptr_ expected=percpu_ptr_" }, { "lock_id_global_zero", - "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n" + "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" - " R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)" - " R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n" + " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)" + " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_innermapval_preserve", "[0-9]\\+: (bf) r1 = r0 ;" - " R0=map_value(id=2,off=0,ks=4,vs=8,imm=0)" - " R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n" + " R0=map_value(id=2,ks=4,vs=8)" + " R1_w=map_value(id=2,ks=4,vs=8)\n" "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" }, diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c index f4d4000115..0be8301c0f 100644 --- a/tools/testing/selftests/bpf/prog_tests/syscall.c +++ b/tools/testing/selftests/bpf/prog_tests/syscall.c @@ -12,7 +12,7 @@ struct args { int btf_fd; }; -void test_syscall(void) +static void test_syscall_load_prog(void) { static char verifier_log[8192]; struct args ctx = { @@ -32,7 +32,7 @@ void test_syscall(void) if (!ASSERT_OK_PTR(skel, "skel_load")) goto cleanup; - prog_fd = bpf_program__fd(skel->progs.bpf_prog); + prog_fd = bpf_program__fd(skel->progs.load_prog); err = bpf_prog_test_run_opts(prog_fd, &tattr); ASSERT_EQ(err, 0, "err"); ASSERT_EQ(tattr.retval, 1, "retval"); @@ -53,3 +53,29 @@ cleanup: if (ctx.btf_fd > 0) close(ctx.btf_fd); } + +static void test_syscall_update_outer_map(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct syscall *skel; + int err, prog_fd; + + skel = syscall__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.update_outer_map); + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_EQ(err, 0, "err"); + ASSERT_EQ(opts.retval, 1, "retval"); +cleanup: + syscall__destroy(skel); +} + +void test_syscall(void) +{ + if (test__start_subtest("load_prog")) + test_syscall_load_prog(); + if (test__start_subtest("update_outer_map")) + test_syscall_update_outer_map(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c index d3491a84b3..ccae0b31ac 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c @@ -14,7 +14,8 @@ static void do_bpf_ma_test(const char *name) struct test_bpf_ma *skel; struct bpf_program *prog; struct btf *btf; - int i, err; + int i, err, id; + char tname[32]; skel = test_bpf_ma__open(); if (!ASSERT_OK_PTR(skel, "open")) @@ -25,16 +26,21 @@ static void do_bpf_ma_test(const char *name) goto out; for (i = 0; i < ARRAY_SIZE(skel->rodata->data_sizes); i++) { - char name[32]; - int id; - - snprintf(name, sizeof(name), "bin_data_%u", skel->rodata->data_sizes[i]); - id = btf__find_by_name_kind(btf, name, BTF_KIND_STRUCT); - if (!ASSERT_GT(id, 0, "bin_data")) + snprintf(tname, sizeof(tname), "bin_data_%u", skel->rodata->data_sizes[i]); + id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); + if (!ASSERT_GT(id, 0, tname)) goto out; skel->rodata->data_btf_ids[i] = id; } + for (i = 0; i < ARRAY_SIZE(skel->rodata->percpu_data_sizes); i++) { + snprintf(tname, sizeof(tname), "percpu_bin_data_%u", skel->rodata->percpu_data_sizes[i]); + id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); + if (!ASSERT_GT(id, 0, tname)) + goto out; + skel->rodata->percpu_data_btf_ids[i] = id; + } + prog = bpf_object__find_program_by_name(skel->obj, name); if (!ASSERT_OK_PTR(prog, "invalid prog name")) goto out; diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c index e0879df386..e905cbaf6b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -20,6 +20,122 @@ #include "test_global_func17.skel.h" #include "test_global_func_ctx_args.skel.h" +#include "bpf/libbpf_internal.h" +#include "btf_helpers.h" + +static void check_ctx_arg_type(const struct btf *btf, const struct btf_param *p) +{ + const struct btf_type *t; + const char *s; + + t = btf__type_by_id(btf, p->type); + if (!ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_t")) + return; + + s = btf_type_raw_dump(btf, t->type); + if (!ASSERT_HAS_SUBSTR(s, "STRUCT 'bpf_perf_event_data' size=0 vlen=0", + "ctx_struct_t")) + return; +} + +static void subtest_ctx_arg_rewrite(void) +{ + struct test_global_func_ctx_args *skel = NULL; + struct bpf_prog_info info; + char func_info_buf[1024] __attribute__((aligned(8))); + struct bpf_func_info_min *rec; + struct btf *btf = NULL; + __u32 info_len = sizeof(info); + int err, fd, i; + struct btf *kern_btf = NULL; + + kern_btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK_PTR(kern_btf, "kern_btf_load")) + return; + + /* simple detection of kernel native arg:ctx tag support */ + if (btf__find_by_name_kind(kern_btf, "bpf_subprog_arg_info", BTF_KIND_STRUCT) > 0) { + test__skip(); + btf__free(kern_btf); + return; + } + btf__free(kern_btf); + + skel = test_global_func_ctx_args__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + bpf_program__set_autoload(skel->progs.arg_tag_ctx_perf, true); + + err = test_global_func_ctx_args__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto out; + + memset(&info, 0, sizeof(info)); + info.func_info = ptr_to_u64(&func_info_buf); + info.nr_func_info = 3; + info.func_info_rec_size = sizeof(struct bpf_func_info_min); + + fd = bpf_program__fd(skel->progs.arg_tag_ctx_perf); + err = bpf_prog_get_info_by_fd(fd, &info, &info_len); + if (!ASSERT_OK(err, "prog_info")) + goto out; + + if (!ASSERT_EQ(info.nr_func_info, 3, "nr_func_info")) + goto out; + + btf = btf__load_from_kernel_by_id(info.btf_id); + if (!ASSERT_OK_PTR(btf, "obj_kern_btf")) + goto out; + + rec = (struct bpf_func_info_min *)func_info_buf; + for (i = 0; i < info.nr_func_info; i++, rec = (void *)rec + info.func_info_rec_size) { + const struct btf_type *fn_t, *proto_t; + const char *name; + + if (rec->insn_off == 0) + continue; /* main prog, skip */ + + fn_t = btf__type_by_id(btf, rec->type_id); + if (!ASSERT_OK_PTR(fn_t, "fn_type")) + goto out; + if (!ASSERT_EQ(btf_kind(fn_t), BTF_KIND_FUNC, "fn_type_kind")) + goto out; + proto_t = btf__type_by_id(btf, fn_t->type); + if (!ASSERT_OK_PTR(proto_t, "proto_type")) + goto out; + + name = btf__name_by_offset(btf, fn_t->name_off); + if (strcmp(name, "subprog_ctx_tag") == 0) { + /* int subprog_ctx_tag(void *ctx __arg_ctx) */ + if (!ASSERT_EQ(btf_vlen(proto_t), 1, "arg_cnt")) + goto out; + + /* arg 0 is PTR -> STRUCT bpf_perf_event_data */ + check_ctx_arg_type(btf, &btf_params(proto_t)[0]); + } else if (strcmp(name, "subprog_multi_ctx_tags") == 0) { + /* int subprog_multi_ctx_tags(void *ctx1 __arg_ctx, + * struct my_struct *mem, + * void *ctx2 __arg_ctx) + */ + if (!ASSERT_EQ(btf_vlen(proto_t), 3, "arg_cnt")) + goto out; + + /* arg 0 is PTR -> STRUCT bpf_perf_event_data */ + check_ctx_arg_type(btf, &btf_params(proto_t)[0]); + /* arg 2 is PTR -> STRUCT bpf_perf_event_data */ + check_ctx_arg_type(btf, &btf_params(proto_t)[2]); + } else { + ASSERT_FAIL("unexpected subprog %s", name); + goto out; + } + } + +out: + btf__free(btf); + test_global_func_ctx_args__destroy(skel); +} + void test_test_global_funcs(void) { RUN_TESTS(test_global_func1); @@ -40,4 +156,7 @@ void test_test_global_funcs(void) RUN_TESTS(test_global_func16); RUN_TESTS(test_global_func17); RUN_TESTS(test_global_func_ctx_args); + + if (test__start_subtest("ctx_arg_rewrite")) + subtest_ctx_arg_rewrite(); } diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index d149ab9879..2b3c6dd662 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -50,6 +50,7 @@ */ #include <arpa/inet.h> +#include <linux/if_link.h> #include <linux/if_tun.h> #include <linux/limits.h> #include <linux/sysctl.h> @@ -92,6 +93,11 @@ #define IPIP_TUNL_DEV0 "ipip00" #define IPIP_TUNL_DEV1 "ipip11" +#define XFRM_AUTH "0x1111111111111111111111111111111111111111" +#define XFRM_ENC "0x22222222222222222222222222222222" +#define XFRM_SPI_IN_TO_OUT 0x1 +#define XFRM_SPI_OUT_TO_IN 0x2 + #define PING_ARGS "-i 0.01 -c 3 -w 10 -q" static int config_device(void) @@ -264,6 +270,92 @@ static void delete_ipip_tunnel(void) SYS_NOFAIL("ip fou del port 5555 2> /dev/null"); } +static int add_xfrm_tunnel(void) +{ + /* at_ns0 namespace + * at_ns0 -> root + */ + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 1 mode tunnel replay-window 42 " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm policy add src %s/32 dst %s/32 dir out " + "tmpl src %s dst %s proto esp reqid 1 " + "mode tunnel", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1); + + /* root -> at_ns0 */ + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 2 mode tunnel " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm policy add src %s/32 dst %s/32 dir in " + "tmpl src %s dst %s proto esp reqid 2 " + "mode tunnel", + IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0); + + /* address & route */ + SYS(fail, "ip netns exec at_ns0 ip addr add dev veth0 %s/32", + IP4_ADDR_TUNL_DEV0); + SYS(fail, "ip netns exec at_ns0 ip route add %s dev veth0 via %s src %s", + IP4_ADDR_TUNL_DEV1, IP4_ADDR1_VETH1, IP4_ADDR_TUNL_DEV0); + + /* root namespace + * at_ns0 -> root + */ + SYS(fail, + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 1 mode tunnel replay-window 42 " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip xfrm policy add src %s/32 dst %s/32 dir in " + "tmpl src %s dst %s proto esp reqid 1 " + "mode tunnel", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1); + + /* root -> at_ns0 */ + SYS(fail, + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 2 mode tunnel " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip xfrm policy add src %s/32 dst %s/32 dir out " + "tmpl src %s dst %s proto esp reqid 2 " + "mode tunnel", + IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0); + + /* address & route */ + SYS(fail, "ip addr add dev veth1 %s/32", IP4_ADDR_TUNL_DEV1); + SYS(fail, "ip route add %s dev veth1 via %s src %s", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_VETH0, IP4_ADDR_TUNL_DEV1); + + return 0; +fail: + return -1; +} + +static void delete_xfrm_tunnel(void) +{ + SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32 2> /dev/null", + IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0); + SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32 2> /dev/null", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1); + SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null", + IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT); + SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null", + IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN); +} + static int test_ping(int family, const char *addr) { SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); @@ -532,25 +624,85 @@ done: test_tunnel_kern__destroy(skel); } +static void test_xfrm_tunnel(void) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, + .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_xdp_attach_opts, opts); + struct test_tunnel_kern *skel = NULL; + struct nstoken *nstoken; + int xdp_prog_fd; + int tc_prog_fd; + int ifindex; + int err; + + err = add_xfrm_tunnel(); + if (!ASSERT_OK(err, "add_xfrm_tunnel")) + return; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + goto done; + + ifindex = if_nametoindex("veth1"); + if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) + goto done; + + /* attach tc prog to tunnel dev */ + tc_hook.ifindex = ifindex; + tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state); + if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, tc_prog_fd, -1)) + goto done; + + /* attach xdp prog to tunnel dev */ + xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp); + if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd")) + goto done; + err = bpf_xdp_attach(ifindex, xdp_prog_fd, XDP_FLAGS_REPLACE, &opts); + if (!ASSERT_OK(err, "bpf_xdp_attach")) + goto done; + + /* ping from at_ns0 namespace test */ + nstoken = open_netns("at_ns0"); + err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); + close_netns(nstoken); + if (!ASSERT_OK(err, "test_ping")) + goto done; + + if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id")) + goto done; + if (!ASSERT_EQ(skel->bss->xfrm_spi, XFRM_SPI_IN_TO_OUT, "spi")) + goto done; + if (!ASSERT_EQ(skel->bss->xfrm_remote_ip, 0xac100164, "remote_ip")) + goto done; + if (!ASSERT_EQ(skel->bss->xfrm_replay_window, 42, "replay_window")) + goto done; + +done: + delete_xfrm_tunnel(); + if (skel) + test_tunnel_kern__destroy(skel); +} + #define RUN_TEST(name, ...) \ ({ \ if (test__start_subtest(#name)) { \ + config_device(); \ test_ ## name(__VA_ARGS__); \ + cleanup(); \ } \ }) static void *test_tunnel_run_tests(void *arg) { - cleanup(); - config_device(); - RUN_TEST(vxlan_tunnel); RUN_TEST(ip6vxlan_tunnel); RUN_TEST(ipip_tunnel, NONE); RUN_TEST(ipip_tunnel, FOU); RUN_TEST(ipip_tunnel, GUE); - - cleanup(); + RUN_TEST(xfrm_tunnel); return NULL; } diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 760ad96b4b..d66687f1ee 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -4,10 +4,29 @@ #include "timer.skel.h" #include "timer_failure.skel.h" +#define NUM_THR 8 + +static void *spin_lock_thread(void *arg) +{ + int i, err, prog_fd = *(int *)arg; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + for (i = 0; i < 10000; i++) { + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err") || + !ASSERT_OK(topts.retval, "test_run_opts retval")) + break; + } + + pthread_exit(arg); +} + static int timer(struct timer *timer_skel) { - int err, prog_fd; + int i, err, prog_fd; LIBBPF_OPTS(bpf_test_run_opts, topts); + pthread_t thread_id[NUM_THR]; + void *ret; err = timer__attach(timer_skel); if (!ASSERT_OK(err, "timer_attach")) @@ -43,6 +62,20 @@ static int timer(struct timer *timer_skel) /* check that code paths completed */ ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok"); + prog_fd = bpf_program__fd(timer_skel->progs.race); + for (i = 0; i < NUM_THR; i++) { + err = pthread_create(&thread_id[i], NULL, + &spin_lock_thread, &prog_fd); + if (!ASSERT_OK(err, "pthread_create")) + break; + } + + while (i) { + err = pthread_join(thread_id[--i], &ret); + if (ASSERT_OK(err, "pthread_join")) + ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join"); + } + return 0; } diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c index cd051d3901..8269cdee33 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -234,6 +234,177 @@ static void test_attach_api_syms(void) test_attach_api("/proc/self/exe", NULL, &opts); } +static void test_attach_api_fails(void) +{ + LIBBPF_OPTS(bpf_link_create_opts, opts); + const char *path = "/proc/self/exe"; + struct uprobe_multi *skel = NULL; + int prog_fd, link_fd = -1; + unsigned long offset = 0; + + skel = uprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.uprobe_extra); + + /* abnormal cnt */ + opts.uprobe_multi.path = path; + opts.uprobe_multi.offsets = &offset; + opts.uprobe_multi.cnt = INT_MAX; + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -E2BIG, "big cnt")) + goto cleanup; + + /* cnt is 0 */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.offsets = (unsigned long *) &offset, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EINVAL, "cnt_is_zero")) + goto cleanup; + + /* negative offset */ + offset = -1; + opts.uprobe_multi.path = path; + opts.uprobe_multi.offsets = (unsigned long *) &offset; + opts.uprobe_multi.cnt = 1; + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EINVAL, "offset_is_negative")) + goto cleanup; + + /* offsets is NULL */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EINVAL, "offsets_is_null")) + goto cleanup; + + /* wrong offsets pointer */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.offsets = (unsigned long *) 1, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EFAULT, "offsets_is_wrong")) + goto cleanup; + + /* path is NULL */ + offset = 1; + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EINVAL, "path_is_null")) + goto cleanup; + + /* wrong path pointer */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = (const char *) 1, + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EFAULT, "path_is_wrong")) + goto cleanup; + + /* wrong path type */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = "/", + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EBADF, "path_is_wrong_type")) + goto cleanup; + + /* wrong cookies pointer */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cookies = (__u64 *) 1ULL, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EFAULT, "cookies_is_wrong")) + goto cleanup; + + /* wrong ref_ctr_offsets pointer */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cookies = (__u64 *) &offset, + .uprobe_multi.ref_ctr_offsets = (unsigned long *) 1, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EFAULT, "ref_ctr_offsets_is_wrong")) + goto cleanup; + + /* wrong flags */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.flags = 1 << 31, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EINVAL, "wrong_flags")) + goto cleanup; + + /* wrong pid */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cnt = 1, + .uprobe_multi.pid = -2, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + ASSERT_EQ(link_fd, -ESRCH, "pid_is_wrong"); + +cleanup: + if (link_fd >= 0) + close(link_fd); + uprobe_multi__destroy(skel); +} + static void __test_link_api(struct child *child) { int prog_fd, link1_fd = -1, link2_fd = -1, link3_fd = -1, link4_fd = -1; @@ -249,7 +420,7 @@ static void __test_link_api(struct child *child) int link_extra_fd = -1; int err; - err = elf_resolve_syms_offsets(path, 3, syms, (unsigned long **) &offsets); + err = elf_resolve_syms_offsets(path, 3, syms, (unsigned long **) &offsets, STT_FUNC); if (!ASSERT_OK(err, "elf_resolve_syms_offsets")) return; @@ -311,7 +482,7 @@ cleanup: free(offsets); } -void test_link_api(void) +static void test_link_api(void) { struct child *child; @@ -412,4 +583,6 @@ void test_uprobe_multi_test(void) test_bench_attach_uprobe(); if (test__start_subtest("bench_usdt")) test_bench_attach_usdt(); + if (test__start_subtest("attach_api_fails")) + test_attach_api_fails(); } diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 5cfa7a6316..d62c5bf00e 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -6,6 +6,7 @@ #include "verifier_and.skel.h" #include "verifier_array_access.skel.h" #include "verifier_basic_stack.skel.h" +#include "verifier_bitfield_write.skel.h" #include "verifier_bounds.skel.h" #include "verifier_bounds_deduction.skel.h" #include "verifier_bounds_deduction_non_const.skel.h" @@ -13,6 +14,7 @@ #include "verifier_bpf_get_stack.skel.h" #include "verifier_bswap.skel.h" #include "verifier_btf_ctx_access.skel.h" +#include "verifier_btf_unreliable_prog.skel.h" #include "verifier_cfg.skel.h" #include "verifier_cgroup_inv_retcode.skel.h" #include "verifier_cgroup_skb.skel.h" @@ -25,6 +27,7 @@ #include "verifier_direct_stack_access_wraparound.skel.h" #include "verifier_div0.skel.h" #include "verifier_div_overflow.skel.h" +#include "verifier_global_subprogs.skel.h" #include "verifier_gotol.skel.h" #include "verifier_helper_access_var_len.skel.h" #include "verifier_helper_packet_access.skel.h" @@ -115,6 +118,7 @@ static void run_tests_aux(const char *skel_name, void test_verifier_and(void) { RUN(verifier_and); } void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); } +void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); } void test_verifier_bounds(void) { RUN(verifier_bounds); } void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); } void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); } @@ -122,6 +126,7 @@ void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_u void test_verifier_bpf_get_stack(void) { RUN(verifier_bpf_get_stack); } void test_verifier_bswap(void) { RUN(verifier_bswap); } void test_verifier_btf_ctx_access(void) { RUN(verifier_btf_ctx_access); } +void test_verifier_btf_unreliable_prog(void) { RUN(verifier_btf_unreliable_prog); } void test_verifier_cfg(void) { RUN(verifier_cfg); } void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode); } void test_verifier_cgroup_skb(void) { RUN(verifier_cgroup_skb); } @@ -134,6 +139,7 @@ void test_verifier_direct_packet_access(void) { RUN(verifier_direct_packet_acces void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_stack_access_wraparound); } void test_verifier_div0(void) { RUN(verifier_div0); } void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); } +void test_verifier_global_subprogs(void) { RUN(verifier_global_subprogs); } void test_verifier_gotol(void) { RUN(verifier_gotol); } void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); } void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); } diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c index dd7f2bc700..ab0f02faa8 100644 --- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c @@ -16,9 +16,12 @@ #include <sys/wait.h> #include <sys/mman.h> #include <linux/keyctl.h> +#include <sys/xattr.h> +#include <linux/fsverity.h> #include <test_progs.h> #include "test_verify_pkcs7_sig.skel.h" +#include "test_sig_in_xattr.skel.h" #define MAX_DATA_SIZE (1024 * 1024) #define MAX_SIG_SIZE 1024 @@ -26,6 +29,10 @@ #define VERIFY_USE_SECONDARY_KEYRING (1UL) #define VERIFY_USE_PLATFORM_KEYRING (2UL) +#ifndef SHA256_DIGEST_SIZE +#define SHA256_DIGEST_SIZE 32 +#endif + /* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */ #define MODULE_SIG_STRING "~Module signature appended~\n" @@ -254,7 +261,7 @@ out: return ret; } -void test_verify_pkcs7_sig(void) +static void test_verify_pkcs7_sig_from_map(void) { libbpf_print_fn_t old_print_cb; char tmp_dir_template[] = "/tmp/verify_sigXXXXXX"; @@ -400,3 +407,159 @@ close_prog: skel->bss->monitored_pid = 0; test_verify_pkcs7_sig__destroy(skel); } + +static int get_signature_size(const char *sig_path) +{ + struct stat st; + + if (stat(sig_path, &st) == -1) + return -1; + + return st.st_size; +} + +static int add_signature_to_xattr(const char *data_path, const char *sig_path) +{ + char sig[MAX_SIG_SIZE] = {0}; + int fd, size, ret; + + if (sig_path) { + fd = open(sig_path, O_RDONLY); + if (fd < 0) + return -1; + + size = read(fd, sig, MAX_SIG_SIZE); + close(fd); + if (size <= 0) + return -1; + } else { + /* no sig_path, just write 32 bytes of zeros */ + size = 32; + } + ret = setxattr(data_path, "user.sig", sig, size, 0); + if (!ASSERT_OK(ret, "setxattr")) + return -1; + + return 0; +} + +static int test_open_file(struct test_sig_in_xattr *skel, char *data_path, + pid_t pid, bool should_success, char *name) +{ + int ret; + + skel->bss->monitored_pid = pid; + ret = open(data_path, O_RDONLY); + close(ret); + skel->bss->monitored_pid = 0; + + if (should_success) { + if (!ASSERT_GE(ret, 0, name)) + return -1; + } else { + if (!ASSERT_LT(ret, 0, name)) + return -1; + } + return 0; +} + +static void test_pkcs7_sig_fsverity(void) +{ + char data_path[PATH_MAX]; + char sig_path[PATH_MAX]; + char tmp_dir_template[] = "/tmp/verify_sigXXXXXX"; + char *tmp_dir; + struct test_sig_in_xattr *skel = NULL; + pid_t pid; + int ret; + + tmp_dir = mkdtemp(tmp_dir_template); + if (!ASSERT_OK_PTR(tmp_dir, "mkdtemp")) + return; + + snprintf(data_path, PATH_MAX, "%s/data-file", tmp_dir); + snprintf(sig_path, PATH_MAX, "%s/sig-file", tmp_dir); + + ret = _run_setup_process(tmp_dir, "setup"); + if (!ASSERT_OK(ret, "_run_setup_process")) + goto out; + + ret = _run_setup_process(tmp_dir, "fsverity-create-sign"); + + if (ret) { + printf("%s: SKIP: fsverity [sign|enable] doesn't work.\n" + "To run this test, try enable CONFIG_FS_VERITY and enable FSVerity for the filesystem.\n", + __func__); + test__skip(); + goto out; + } + + skel = test_sig_in_xattr__open(); + if (!ASSERT_OK_PTR(skel, "test_sig_in_xattr__open")) + goto out; + ret = get_signature_size(sig_path); + if (!ASSERT_GT(ret, 0, "get_signature_size")) + goto out; + skel->bss->sig_size = ret; + skel->bss->user_keyring_serial = syscall(__NR_request_key, "keyring", + "ebpf_testing_keyring", NULL, + KEY_SPEC_SESSION_KEYRING); + memcpy(skel->bss->digest, "FSVerity", 8); + + ret = test_sig_in_xattr__load(skel); + if (!ASSERT_OK(ret, "test_sig_in_xattr__load")) + goto out; + + ret = test_sig_in_xattr__attach(skel); + if (!ASSERT_OK(ret, "test_sig_in_xattr__attach")) + goto out; + + pid = getpid(); + + /* Case 1: fsverity is not enabled, open should succeed */ + if (test_open_file(skel, data_path, pid, true, "open_1")) + goto out; + + /* Case 2: fsverity is enabled, xattr is missing, open should + * fail + */ + ret = _run_setup_process(tmp_dir, "fsverity-enable"); + if (!ASSERT_OK(ret, "fsverity-enable")) + goto out; + if (test_open_file(skel, data_path, pid, false, "open_2")) + goto out; + + /* Case 3: fsverity is enabled, xattr has valid signature, open + * should succeed + */ + ret = add_signature_to_xattr(data_path, sig_path); + if (!ASSERT_OK(ret, "add_signature_to_xattr_1")) + goto out; + + if (test_open_file(skel, data_path, pid, true, "open_3")) + goto out; + + /* Case 4: fsverity is enabled, xattr has invalid signature, open + * should fail + */ + ret = add_signature_to_xattr(data_path, NULL); + if (!ASSERT_OK(ret, "add_signature_to_xattr_2")) + goto out; + test_open_file(skel, data_path, pid, false, "open_4"); + +out: + _run_setup_process(tmp_dir, "cleanup"); + if (!skel) + return; + + skel->bss->monitored_pid = 0; + test_sig_in_xattr__destroy(skel); +} + +void test_verify_pkcs7_sig(void) +{ + if (test__start_subtest("pkcs7_sig_from_map")) + test_verify_pkcs7_sig_from_map(); + if (test__start_subtest("pkcs7_sig_fsverity")) + test_pkcs7_sig_fsverity(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c index 72310cfc64..6fb2217d94 100644 --- a/tools/testing/selftests/bpf/prog_tests/vmlinux.c +++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c @@ -16,27 +16,27 @@ static void nsleep() void test_vmlinux(void) { - int duration = 0, err; + int err; struct test_vmlinux* skel; struct test_vmlinux__bss *bss; skel = test_vmlinux__open_and_load(); - if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + if (!ASSERT_OK_PTR(skel, "test_vmlinux__open_and_load")) return; bss = skel->bss; err = test_vmlinux__attach(skel); - if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + if (!ASSERT_OK(err, "test_vmlinux__attach")) goto cleanup; /* trigger everything */ nsleep(); - CHECK(!bss->tp_called, "tp", "not called\n"); - CHECK(!bss->raw_tp_called, "raw_tp", "not called\n"); - CHECK(!bss->tp_btf_called, "tp_btf", "not called\n"); - CHECK(!bss->kprobe_called, "kprobe", "not called\n"); - CHECK(!bss->fentry_called, "fentry", "not called\n"); + ASSERT_TRUE(bss->tp_called, "tp"); + ASSERT_TRUE(bss->raw_tp_called, "raw_tp"); + ASSERT_TRUE(bss->tp_btf_called, "tp_btf"); + ASSERT_TRUE(bss->kprobe_called, "kprobe"); + ASSERT_TRUE(bss->fentry_called, "fentry"); cleanup: test_vmlinux__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index ab4952b9fb..e6a783c7f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -77,8 +77,8 @@ void test_xdp_context_test_run(void) test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data), 0, 0, 0); - /* Meta data must be 32 bytes or smaller */ - test_xdp_context_error(prog_fd, opts, 0, 36, sizeof(data), 0, 0, 0); + /* Meta data must be 255 bytes or smaller */ + test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0); /* Total size of data must match data_end - data_meta */ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 4439ba9392..05edcf32f5 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -20,7 +20,7 @@ #define UDP_PAYLOAD_BYTES 4 -#define AF_XDP_SOURCE_PORT 1234 +#define UDP_SOURCE_PORT 1234 #define AF_XDP_CONSUMER_PORT 8080 #define UMEM_NUM 16 @@ -33,6 +33,18 @@ #define RX_ADDR "10.0.0.2" #define PREFIX_LEN "8" #define FAMILY AF_INET +#define TX_NETNS_NAME "xdp_metadata_tx" +#define RX_NETNS_NAME "xdp_metadata_rx" +#define TX_MAC "00:00:00:00:00:01" +#define RX_MAC "00:00:00:00:00:02" + +#define VLAN_ID 59 +#define VLAN_PROTO "802.1Q" +#define VLAN_PID htons(ETH_P_8021Q) +#define TX_NAME_VLAN TX_NAME "." TO_STR(VLAN_ID) + +#define XDP_RSS_TYPE_L4 BIT(3) +#define VLAN_VID_MASK 0xfff struct xsk { void *umem_area; @@ -56,7 +68,8 @@ static int open_xsk(int ifindex, struct xsk *xsk) .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, - .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG, + .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG | XDP_UMEM_TX_SW_CSUM, + .tx_metadata_len = sizeof(struct xsk_tx_metadata), }; __u32 idx; u64 addr; @@ -138,6 +151,7 @@ static void ip_csum(struct iphdr *iph) static int generate_packet(struct xsk *xsk, __u16 dst_port) { + struct xsk_tx_metadata *meta; struct xdp_desc *tx_desc; struct udphdr *udph; struct ethhdr *eth; @@ -151,10 +165,14 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port) return -1; tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx); - tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE; + tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata); printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr); data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr); + meta = data - sizeof(struct xsk_tx_metadata); + memset(meta, 0, sizeof(*meta)); + meta->flags = XDP_TXMD_FLAGS_TIMESTAMP; + eth = data; iph = (void *)(eth + 1); udph = (void *)(iph + 1); @@ -175,14 +193,20 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port) ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)"); ip_csum(iph); - udph->source = htons(AF_XDP_SOURCE_PORT); + udph->source = htons(UDP_SOURCE_PORT); udph->dest = htons(dst_port); udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES); - udph->check = 0; + udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, + ntohs(udph->len), IPPROTO_UDP, 0); memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES); + meta->flags |= XDP_TXMD_FLAGS_CHECKSUM; + meta->request.csum_start = sizeof(*eth) + sizeof(*iph); + meta->request.csum_offset = offsetof(struct udphdr, check); + tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES; + tx_desc->options |= XDP_TX_METADATA; xsk_ring_prod__submit(&xsk->tx, 1); ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0); @@ -192,15 +216,47 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port) return 0; } +static int generate_packet_inet(void) +{ + char udp_payload[UDP_PAYLOAD_BYTES]; + struct sockaddr_in rx_addr; + int sock_fd, err = 0; + + /* Build a packet */ + memset(udp_payload, 0xAA, UDP_PAYLOAD_BYTES); + rx_addr.sin_addr.s_addr = inet_addr(RX_ADDR); + rx_addr.sin_family = AF_INET; + rx_addr.sin_port = htons(AF_XDP_CONSUMER_PORT); + + sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (!ASSERT_GE(sock_fd, 0, "socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)")) + return sock_fd; + + err = sendto(sock_fd, udp_payload, UDP_PAYLOAD_BYTES, MSG_DONTWAIT, + (void *)&rx_addr, sizeof(rx_addr)); + ASSERT_GE(err, 0, "sendto"); + + close(sock_fd); + return err; +} + static void complete_tx(struct xsk *xsk) { - __u32 idx; + struct xsk_tx_metadata *meta; __u64 addr; + void *data; + __u32 idx; if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) { addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx); printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr); + + data = xsk_umem__get_data(xsk->umem_area, addr); + meta = data - sizeof(struct xsk_tx_metadata); + + ASSERT_NEQ(meta->completion.tx_timestamp, 0, "tx_timestamp"); + xsk_ring_cons__release(&xsk->comp, 1); } } @@ -216,11 +272,12 @@ static void refill_rx(struct xsk *xsk, __u64 addr) } } -static int verify_xsk_metadata(struct xsk *xsk) +static int verify_xsk_metadata(struct xsk *xsk, bool sent_from_af_xdp) { const struct xdp_desc *rx_desc; struct pollfd fds = {}; struct xdp_meta *meta; + struct udphdr *udph; struct ethhdr *eth; struct iphdr *iph; __u64 comp_addr; @@ -257,6 +314,7 @@ static int verify_xsk_metadata(struct xsk *xsk) ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto"); iph = (void *)(eth + 1); ASSERT_EQ((int)iph->version, 4, "iph->version"); + udph = (void *)(iph + 1); /* custom metadata */ @@ -268,14 +326,42 @@ static int verify_xsk_metadata(struct xsk *xsk) if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash")) return -1; + if (!sent_from_af_xdp) { + if (!ASSERT_NEQ(meta->rx_hash_type & XDP_RSS_TYPE_L4, 0, "rx_hash_type")) + return -1; + + if (!ASSERT_EQ(meta->rx_vlan_tci & VLAN_VID_MASK, VLAN_ID, "rx_vlan_tci")) + return -1; + + if (!ASSERT_EQ(meta->rx_vlan_proto, VLAN_PID, "rx_vlan_proto")) + return -1; + goto done; + } + ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type"); + /* checksum offload */ + ASSERT_EQ(udph->check, htons(0x721c), "csum"); + +done: xsk_ring_cons__release(&xsk->rx, 1); refill_rx(xsk, comp_addr); return 0; } +static void switch_ns_to_rx(struct nstoken **tok) +{ + close_netns(*tok); + *tok = open_netns(RX_NETNS_NAME); +} + +static void switch_ns_to_tx(struct nstoken **tok) +{ + close_netns(*tok); + *tok = open_netns(TX_NETNS_NAME); +} + void test_xdp_metadata(void) { struct xdp_metadata2 *bpf_obj2 = NULL; @@ -293,27 +379,35 @@ void test_xdp_metadata(void) int sock_fd; int ret; - /* Setup new networking namespace, with a veth pair. */ + /* Setup new networking namespaces, with a veth pair. */ + SYS(out, "ip netns add " TX_NETNS_NAME); + SYS(out, "ip netns add " RX_NETNS_NAME); - SYS(out, "ip netns add xdp_metadata"); - tok = open_netns("xdp_metadata"); + tok = open_netns(TX_NETNS_NAME); SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1"); - SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01"); - SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02"); + SYS(out, "ip link set " RX_NAME " netns " RX_NETNS_NAME); + + SYS(out, "ip link set dev " TX_NAME " address " TX_MAC); SYS(out, "ip link set dev " TX_NAME " up"); + + SYS(out, "ip link add link " TX_NAME " " TX_NAME_VLAN + " type vlan proto " VLAN_PROTO " id " TO_STR(VLAN_ID)); + SYS(out, "ip link set dev " TX_NAME_VLAN " up"); + SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME_VLAN); + + /* Avoid ARP calls */ + SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME_VLAN); + + switch_ns_to_rx(&tok); + + SYS(out, "ip link set dev " RX_NAME " address " RX_MAC); SYS(out, "ip link set dev " RX_NAME " up"); - SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); rx_ifindex = if_nametoindex(RX_NAME); - tx_ifindex = if_nametoindex(TX_NAME); - /* Setup separate AF_XDP for TX and RX interfaces. */ - - ret = open_xsk(tx_ifindex, &tx_xsk); - if (!ASSERT_OK(ret, "open_xsk(TX_NAME)")) - goto out; + /* Setup separate AF_XDP for RX interface. */ ret = open_xsk(rx_ifindex, &rx_xsk); if (!ASSERT_OK(ret, "open_xsk(RX_NAME)")) @@ -354,18 +448,38 @@ void test_xdp_metadata(void) if (!ASSERT_GE(ret, 0, "bpf_map_update_elem")) goto out; - /* Send packet destined to RX AF_XDP socket. */ + switch_ns_to_tx(&tok); + + /* Setup separate AF_XDP for TX interface nad send packet to the RX socket. */ + tx_ifindex = if_nametoindex(TX_NAME); + ret = open_xsk(tx_ifindex, &tx_xsk); + if (!ASSERT_OK(ret, "open_xsk(TX_NAME)")) + goto out; + if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, "generate AF_XDP_CONSUMER_PORT")) goto out; - /* Verify AF_XDP RX packet has proper metadata. */ - if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk), 0, + switch_ns_to_rx(&tok); + + /* Verify packet sent from AF_XDP has proper metadata. */ + if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, true), 0, "verify_xsk_metadata")) goto out; + switch_ns_to_tx(&tok); complete_tx(&tx_xsk); + /* Now check metadata of packet, generated with network stack */ + if (!ASSERT_GE(generate_packet_inet(), 0, "generate UDP packet")) + goto out; + + switch_ns_to_rx(&tok); + + if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, false), 0, + "verify_xsk_metadata")) + goto out; + /* Make sure freplace correctly picks up original bound device * and doesn't crash. */ @@ -383,11 +497,15 @@ void test_xdp_metadata(void) if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace")) goto out; + switch_ns_to_tx(&tok); + /* Send packet to trigger . */ if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, "generate freplace packet")) goto out; + switch_ns_to_rx(&tok); + while (!retries--) { if (bpf_obj2->bss->called) break; @@ -402,5 +520,6 @@ out: xdp_metadata__destroy(bpf_obj); if (tok) close_netns(tok); - SYS_NOFAIL("ip netns del xdp_metadata"); + SYS_NOFAIL("ip netns del " RX_NETNS_NAME); + SYS_NOFAIL("ip netns del " TX_NETNS_NAME); } diff --git a/tools/testing/selftests/bpf/progs/access_map_in_map.c b/tools/testing/selftests/bpf/progs/access_map_in_map.c new file mode 100644 index 0000000000..1126871c2e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/access_map_in_map.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <linux/bpf.h> +#include <time.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" + +struct inner_map_type { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, 4); + __uint(value_size, 4); + __uint(max_entries, 1); +} inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); + __array(values, struct inner_map_type); +} outer_array_map SEC(".maps") = { + .values = { + [0] = &inner_map, + }, +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); + __array(values, struct inner_map_type); +} outer_htab_map SEC(".maps") = { + .values = { + [0] = &inner_map, + }, +}; + +char _license[] SEC("license") = "GPL"; + +int tgid = 0; + +static int acc_map_in_map(void *outer_map) +{ + int i, key, value = 0xdeadbeef; + void *inner_map; + + if ((bpf_get_current_pid_tgid() >> 32) != tgid) + return 0; + + /* Find nonexistent inner map */ + key = 1; + inner_map = bpf_map_lookup_elem(outer_map, &key); + if (inner_map) + return 0; + + /* Find the old inner map */ + key = 0; + inner_map = bpf_map_lookup_elem(outer_map, &key); + if (!inner_map) + return 0; + + /* Wait for the old inner map to be replaced */ + for (i = 0; i < 2048; i++) + bpf_map_update_elem(inner_map, &key, &value, 0); + + return 0; +} + +SEC("?kprobe/" SYS_PREFIX "sys_getpgid") +int access_map_in_array(void *ctx) +{ + return acc_map_in_map(&outer_array_map); +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int sleepable_access_map_in_array(void *ctx) +{ + return acc_map_in_map(&outer_array_map); +} + +SEC("?kprobe/" SYS_PREFIX "sys_getpgid") +int access_map_in_htab(void *ctx) +{ + return acc_map_in_map(&outer_htab_map); +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int sleepable_access_map_in_htab(void *ctx) +{ + return acc_map_in_map(&outer_htab_map); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c index feaaa2b89c..5014a17d6c 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c @@ -20,7 +20,7 @@ struct { } hashmap1 SEC(".maps"); /* will set before prog run */ -volatile const __u32 num_cpus = 0; +volatile const __s32 num_cpus = 0; /* will collect results during prog run */ __u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c index dd923dc637..423b39e60b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c @@ -35,7 +35,7 @@ SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx) return 0; file = vma->vm_file; - if (task->tgid != pid) { + if (task->tgid != (pid_t)pid) { if (one_task) one_task_error = 1; return 0; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c index 96131b9a1c..6cbb3393f2 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c @@ -22,7 +22,7 @@ int dump_task(struct bpf_iter__task *ctx) return 0; } - if (task->pid != tid) + if (task->pid != (pid_t)tid) num_unknown_tid++; else num_known_tid++; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c index 400fdf8d62..dbf61c44ac 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c @@ -45,7 +45,7 @@ int dump_bpf_map(struct bpf_iter__bpf_map *ctx) } /* fill seq_file buffer */ - for (i = 0; i < print_len; i++) + for (i = 0; i < (int)print_len; i++) bpf_seq_write(seq, &seq_num, sizeof(seq_num)); return ret; diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 799fff4995..2fd59970c4 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -71,6 +71,7 @@ #define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="#val))) #define __auxiliary __attribute__((btf_decl_tag("comment:test_auxiliary"))) #define __auxiliary_unpriv __attribute__((btf_decl_tag("comment:test_auxiliary_unpriv"))) +#define __btf_path(path) __attribute__((btf_decl_tag("comment:test_btf_path=" path))) /* Convenience macro for use with 'asm volatile' blocks */ #define __naked __attribute__((naked)) diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 0b793a1027..e8bd4b7b5e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -26,6 +26,7 @@ #define IPV6_AUTOFLOWLABEL 70 #define TC_ACT_UNSPEC (-1) +#define TC_ACT_OK 0 #define TC_ACT_SHOT 2 #define SOL_TCP 6 @@ -71,6 +72,8 @@ #define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr #define inet_dport sk.__sk_common.skc_dport +#define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1] + #define ir_loc_addr req.__req_common.skc_rcv_saddr #define ir_num req.__req_common.skc_num #define ir_rmt_addr req.__req_common.skc_daddr @@ -84,6 +87,7 @@ #define sk_rmem_alloc sk_backlog.rmem_alloc #define sk_refcnt __sk_common.skc_refcnt #define sk_state __sk_common.skc_state +#define sk_net __sk_common.skc_net #define sk_v6_daddr __sk_common.skc_v6_daddr #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr #define sk_flags __sk_common.skc_flags diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c index b7fa8804e1..45a0e9f492 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c +++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c @@ -11,7 +11,7 @@ __u32 invocations = 0; __u32 assertion_error = 0; __u32 retval_value = 0; -__u32 page_size = 0; +__s32 page_size = 0; SEC("cgroup/setsockopt") int get_retval(struct bpf_sockopt *ctx) diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c index 0fa564a5cc..9fe9c4a4e8 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c @@ -78,7 +78,7 @@ int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path) } SEC("kretprobe/cgroup_destroy_locked") -__failure __msg("reg type unsupported for arg#0 function") +__failure __msg("calling kernel function bpf_cgroup_acquire is not allowed") int BPF_PROG(cgrp_kfunc_acquire_unsafe_kretprobe, struct cgroup *cgrp) { struct cgroup *acquired; diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c index a043d8fefd..610c2427fd 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c @@ -21,50 +21,100 @@ struct { __type(value, long); } map_b SEC(".maps"); +int target_hid = 0; +bool is_cgroup1 = 0; + +struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym; +void bpf_cgroup_release(struct cgroup *cgrp) __ksym; + +static void __on_lookup(struct cgroup *cgrp) +{ + bpf_cgrp_storage_delete(&map_a, cgrp); + bpf_cgrp_storage_delete(&map_b, cgrp); +} + SEC("fentry/bpf_local_storage_lookup") int BPF_PROG(on_lookup) { struct task_struct *task = bpf_get_current_task_btf(); + struct cgroup *cgrp; + + if (is_cgroup1) { + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; - bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp); - bpf_cgrp_storage_delete(&map_b, task->cgroups->dfl_cgrp); + __on_lookup(cgrp); + bpf_cgroup_release(cgrp); + return 0; + } + + __on_lookup(task->cgroups->dfl_cgrp); return 0; } -SEC("fentry/bpf_local_storage_update") -int BPF_PROG(on_update) +static void __on_update(struct cgroup *cgrp) { - struct task_struct *task = bpf_get_current_task_btf(); long *ptr; - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) *ptr += 1; - ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) *ptr += 1; +} +SEC("fentry/bpf_local_storage_update") +int BPF_PROG(on_update) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct cgroup *cgrp; + + if (is_cgroup1) { + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + __on_update(cgrp); + bpf_cgroup_release(cgrp); + return 0; + } + + __on_update(task->cgroups->dfl_cgrp); return 0; } -SEC("tp_btf/sys_enter") -int BPF_PROG(on_enter, struct pt_regs *regs, long id) +static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp) { - struct task_struct *task; long *ptr; - task = bpf_get_current_task_btf(); - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) *ptr = 200; - ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) *ptr = 100; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct cgroup *cgrp; + + if (is_cgroup1) { + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + __on_enter(regs, id, cgrp); + bpf_cgroup_release(cgrp); + return 0; + } + + __on_enter(regs, id, task->cgroups->dfl_cgrp); return 0; } diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c index 4c7844e1db..5e282c16ea 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c @@ -15,9 +15,13 @@ struct { __type(value, long); } map_a SEC(".maps"); -__u32 target_pid; +__s32 target_pid; __u64 cgroup_id; +int target_hid; +bool is_cgroup1; +struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym; +void bpf_cgroup_release(struct cgroup *cgrp) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; @@ -37,23 +41,50 @@ int cgroup_iter(struct bpf_iter__cgroup *ctx) return 0; } +static void __no_rcu_lock(struct cgroup *cgrp) +{ + long *ptr; + + /* Note that trace rcu is held in sleepable prog, so we can use + * bpf_cgrp_storage_get() in sleepable prog. + */ + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + cgroup_id = cgrp->kn->id; +} + SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") -int no_rcu_lock(void *ctx) +int cgrp1_no_rcu_lock(void *ctx) { struct task_struct *task; struct cgroup *cgrp; - long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + /* bpf_task_get_cgroup1 can work in sleepable prog */ + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + __no_rcu_lock(cgrp); + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int no_rcu_lock(void *ctx) +{ + struct task_struct *task; task = bpf_get_current_task_btf(); if (task->pid != target_pid) return 0; /* task->cgroups is untrusted in sleepable prog outside of RCU CS */ - cgrp = task->cgroups->dfl_cgrp; - ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); - if (ptr) - cgroup_id = cgrp->kn->id; + __no_rcu_lock(task->cgroups->dfl_cgrp); return 0; } @@ -68,6 +99,22 @@ int yes_rcu_lock(void *ctx) if (task->pid != target_pid) return 0; + if (is_cgroup1) { + bpf_rcu_read_lock(); + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) { + bpf_rcu_read_unlock(); + return 0; + } + + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + cgroup_id = cgrp->kn->id; + bpf_cgroup_release(cgrp); + bpf_rcu_read_unlock(); + return 0; + } + bpf_rcu_read_lock(); cgrp = task->cgroups->dfl_cgrp; /* cgrp is trusted under RCU CS */ diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c index 9ebb8e2fe5..1c348f000f 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c @@ -27,62 +27,100 @@ pid_t target_pid = 0; int mismatch_cnt = 0; int enter_cnt = 0; int exit_cnt = 0; +int target_hid = 0; +bool is_cgroup1 = 0; -SEC("tp_btf/sys_enter") -int BPF_PROG(on_enter, struct pt_regs *regs, long id) +struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym; +void bpf_cgroup_release(struct cgroup *cgrp) __ksym; + +static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp) { - struct task_struct *task; long *ptr; int err; - task = bpf_get_current_task_btf(); - if (task->pid != target_pid) - return 0; - /* populate value 0 */ - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!ptr) - return 0; + return; /* delete value 0 */ - err = bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp); + err = bpf_cgrp_storage_delete(&map_a, cgrp); if (err) - return 0; + return; /* value is not available */ - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, 0); + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, 0); if (ptr) - return 0; + return; /* re-populate the value */ - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!ptr) - return 0; + return; __sync_fetch_and_add(&enter_cnt, 1); *ptr = MAGIC_VALUE + enter_cnt; - - return 0; } -SEC("tp_btf/sys_exit") -int BPF_PROG(on_exit, struct pt_regs *regs, long id) +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) { struct task_struct *task; - long *ptr; + struct cgroup *cgrp; task = bpf_get_current_task_btf(); if (task->pid != target_pid) return 0; - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + if (is_cgroup1) { + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + __on_enter(regs, id, cgrp); + bpf_cgroup_release(cgrp); + return 0; + } + + __on_enter(regs, id, task->cgroups->dfl_cgrp); + return 0; +} + +static void __on_exit(struct pt_regs *regs, long id, struct cgroup *cgrp) +{ + long *ptr; + + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!ptr) - return 0; + return; __sync_fetch_and_add(&exit_cnt, 1); if (*ptr != MAGIC_VALUE + exit_cnt) __sync_fetch_and_add(&mismatch_cnt, 1); +} + +SEC("tp_btf/sys_exit") +int BPF_PROG(on_exit, struct pt_regs *regs, long id) +{ + struct task_struct *task; + struct cgroup *cgrp; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + if (is_cgroup1) { + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + __on_exit(regs, id, cgrp); + bpf_cgroup_release(cgrp); + return 0; + } + + __on_exit(regs, id, task->cgroups->dfl_cgrp); return 0; } diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index b15c588ace..0cd4aebb97 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -54,6 +54,7 @@ bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym; void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym; u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym; u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym; +u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index 674a63424d..7a1e64c6c0 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -332,7 +332,7 @@ SEC("tp_btf/task_newtask") int BPF_PROG(test_copy_any_anyand, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *mask1, *mask2, *dst1, *dst2; - u32 cpu; + int cpu; if (!is_test_task()) return 0; @@ -461,6 +461,49 @@ int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags) } SEC("tp_btf/task_newtask") +int BPF_PROG(test_cpumask_weight, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local; + + if (!is_test_task()) + return 0; + + local = create_cpumask(); + if (!local) + return 0; + + if (bpf_cpumask_weight(cast(local)) != 0) { + err = 3; + goto out; + } + + bpf_cpumask_set_cpu(0, local); + if (bpf_cpumask_weight(cast(local)) != 1) { + err = 4; + goto out; + } + + /* + * Make sure that adding additional CPUs changes the weight. Test to + * see whether the CPU was set to account for running on UP machines. + */ + bpf_cpumask_set_cpu(1, local); + if (bpf_cpumask_test_cpu(1, cast(local)) && bpf_cpumask_weight(cast(local)) != 2) { + err = 5; + goto out; + } + + bpf_cpumask_clear(local); + if (bpf_cpumask_weight(cast(local)) != 0) { + err = 6; + goto out; + } +out: + bpf_cpumask_release(local); + return 0; +} + +SEC("tp_btf/task_newtask") __success int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags) { diff --git a/tools/testing/selftests/bpf/progs/exceptions.c b/tools/testing/selftests/bpf/progs/exceptions.c index 2811ee842b..f09cd14d8e 100644 --- a/tools/testing/selftests/bpf/progs/exceptions.c +++ b/tools/testing/selftests/bpf/progs/exceptions.c @@ -210,7 +210,7 @@ __noinline int assert_zero_gfunc(u64 c) { volatile u64 cookie = c; - bpf_assert_eq(cookie, 0); + bpf_assert(bpf_cmp_unlikely(cookie, ==, 0)); return 0; } @@ -218,7 +218,7 @@ __noinline int assert_neg_gfunc(s64 c) { volatile s64 cookie = c; - bpf_assert_lt(cookie, 0); + bpf_assert(bpf_cmp_unlikely(cookie, <, 0)); return 0; } @@ -226,7 +226,7 @@ __noinline int assert_pos_gfunc(s64 c) { volatile s64 cookie = c; - bpf_assert_gt(cookie, 0); + bpf_assert(bpf_cmp_unlikely(cookie, >, 0)); return 0; } @@ -234,7 +234,7 @@ __noinline int assert_negeq_gfunc(s64 c) { volatile s64 cookie = c; - bpf_assert_le(cookie, -1); + bpf_assert(bpf_cmp_unlikely(cookie, <=, -1)); return 0; } @@ -242,7 +242,7 @@ __noinline int assert_poseq_gfunc(s64 c) { volatile s64 cookie = c; - bpf_assert_ge(cookie, 1); + bpf_assert(bpf_cmp_unlikely(cookie, >=, 1)); return 0; } @@ -258,7 +258,7 @@ __noinline int assert_zero_gfunc_with(u64 c) { volatile u64 cookie = c; - bpf_assert_eq_with(cookie, 0, cookie + 100); + bpf_assert_with(bpf_cmp_unlikely(cookie, ==, 0), cookie + 100); return 0; } @@ -266,7 +266,7 @@ __noinline int assert_neg_gfunc_with(s64 c) { volatile s64 cookie = c; - bpf_assert_lt_with(cookie, 0, cookie + 100); + bpf_assert_with(bpf_cmp_unlikely(cookie, <, 0), cookie + 100); return 0; } @@ -274,7 +274,7 @@ __noinline int assert_pos_gfunc_with(s64 c) { volatile s64 cookie = c; - bpf_assert_gt_with(cookie, 0, cookie + 100); + bpf_assert_with(bpf_cmp_unlikely(cookie, >, 0), cookie + 100); return 0; } @@ -282,7 +282,7 @@ __noinline int assert_negeq_gfunc_with(s64 c) { volatile s64 cookie = c; - bpf_assert_le_with(cookie, -1, cookie + 100); + bpf_assert_with(bpf_cmp_unlikely(cookie, <=, -1), cookie + 100); return 0; } @@ -290,7 +290,7 @@ __noinline int assert_poseq_gfunc_with(s64 c) { volatile s64 cookie = c; - bpf_assert_ge_with(cookie, 1, cookie + 100); + bpf_assert_with(bpf_cmp_unlikely(cookie, >=, 1), cookie + 100); return 0; } diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c index e1e5c54a6a..5e0a1ca96d 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_assert.c +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c @@ -11,55 +11,55 @@ #define check_assert(type, op, name, value) \ SEC("?tc") \ __log_level(2) __failure \ - int check_assert_##op##_##name(void *ctx) \ + int check_assert_##name(void *ctx) \ { \ type num = bpf_ktime_get_ns(); \ - bpf_assert_##op(num, value); \ + bpf_assert(bpf_cmp_unlikely(num, op, value)); \ return *(u64 *)num; \ } -__msg(": R0_w=-2147483648 R10=fp0") -check_assert(s64, eq, int_min, INT_MIN); -__msg(": R0_w=2147483647 R10=fp0") -check_assert(s64, eq, int_max, INT_MAX); -__msg(": R0_w=0 R10=fp0") -check_assert(s64, eq, zero, 0); -__msg(": R0_w=-9223372036854775808 R1_w=-9223372036854775808 R10=fp0") -check_assert(s64, eq, llong_min, LLONG_MIN); -__msg(": R0_w=9223372036854775807 R1_w=9223372036854775807 R10=fp0") -check_assert(s64, eq, llong_max, LLONG_MAX); - -__msg(": R0_w=scalar(smax=2147483646) R10=fp0") -check_assert(s64, lt, pos, INT_MAX); -__msg(": R0_w=scalar(smax=-1,umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))") -check_assert(s64, lt, zero, 0); -__msg(": R0_w=scalar(smax=-2147483649,umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))") -check_assert(s64, lt, neg, INT_MIN); - -__msg(": R0_w=scalar(smax=2147483647) R10=fp0") -check_assert(s64, le, pos, INT_MAX); -__msg(": R0_w=scalar(smax=0) R10=fp0") -check_assert(s64, le, zero, 0); -__msg(": R0_w=scalar(smax=-2147483648,umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))") -check_assert(s64, le, neg, INT_MIN); - -__msg(": R0_w=scalar(smin=umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") -check_assert(s64, gt, pos, INT_MAX); -__msg(": R0_w=scalar(smin=umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") -check_assert(s64, gt, zero, 0); -__msg(": R0_w=scalar(smin=-2147483647) R10=fp0") -check_assert(s64, gt, neg, INT_MIN); - -__msg(": R0_w=scalar(smin=umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))") -check_assert(s64, ge, pos, INT_MAX); -__msg(": R0_w=scalar(smin=0,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0") -check_assert(s64, ge, zero, 0); -__msg(": R0_w=scalar(smin=-2147483648) R10=fp0") -check_assert(s64, ge, neg, INT_MIN); +__msg(": R0_w=0xffffffff80000000") +check_assert(s64, ==, eq_int_min, INT_MIN); +__msg(": R0_w=0x7fffffff") +check_assert(s64, ==, eq_int_max, INT_MAX); +__msg(": R0_w=0") +check_assert(s64, ==, eq_zero, 0); +__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000") +check_assert(s64, ==, eq_llong_min, LLONG_MIN); +__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff") +check_assert(s64, ==, eq_llong_max, LLONG_MAX); + +__msg(": R0_w=scalar(id=1,smax=0x7ffffffe)") +check_assert(s64, <, lt_pos, INT_MAX); +__msg(": R0_w=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))") +check_assert(s64, <, lt_zero, 0); +__msg(": R0_w=scalar(id=1,smax=0xffffffff7fffffff") +check_assert(s64, <, lt_neg, INT_MIN); + +__msg(": R0_w=scalar(id=1,smax=0x7fffffff)") +check_assert(s64, <=, le_pos, INT_MAX); +__msg(": R0_w=scalar(id=1,smax=0)") +check_assert(s64, <=, le_zero, 0); +__msg(": R0_w=scalar(id=1,smax=0xffffffff80000000") +check_assert(s64, <=, le_neg, INT_MIN); + +__msg(": R0_w=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +check_assert(s64, >, gt_pos, INT_MAX); +__msg(": R0_w=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +check_assert(s64, >, gt_zero, 0); +__msg(": R0_w=scalar(id=1,smin=0xffffffff80000001") +check_assert(s64, >, gt_neg, INT_MIN); + +__msg(": R0_w=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +check_assert(s64, >=, ge_pos, INT_MAX); +__msg(": R0_w=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))") +check_assert(s64, >=, ge_zero, 0); +__msg(": R0_w=scalar(id=1,smin=0xffffffff80000000") +check_assert(s64, >=, ge_neg, INT_MIN); SEC("?tc") __log_level(2) __failure -__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0") +__msg(": R0=0 R1=ctx() R2=scalar(smin=0xffffffff80000002,smax=smax32=0x7ffffffd,smin32=0x80000002) R10=fp0") int check_assert_range_s64(struct __sk_buff *ctx) { struct bpf_sock *sk = ctx->sk; @@ -75,7 +75,7 @@ int check_assert_range_s64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R1=ctx(off=0,imm=0) R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))") +__msg(": R1=ctx() R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))") int check_assert_range_u64(struct __sk_buff *ctx) { u64 num = ctx->len; @@ -86,7 +86,7 @@ int check_assert_range_u64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R0=0 R1=ctx(off=0,imm=0) R2=4096 R10=fp0") +__msg(": R0=0 R1=ctx() R2=4096 R10=fp0") int check_assert_single_range_s64(struct __sk_buff *ctx) { struct bpf_sock *sk = ctx->sk; @@ -103,7 +103,7 @@ int check_assert_single_range_s64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R1=ctx(off=0,imm=0) R2=4096 R10=fp0") +__msg(": R1=ctx() R2=4096 R10=fp0") int check_assert_single_range_u64(struct __sk_buff *ctx) { u64 num = ctx->len; @@ -114,7 +114,7 @@ int check_assert_single_range_u64(struct __sk_buff *ctx) SEC("?tc") __log_level(2) __failure -__msg(": R1=pkt(off=64,r=64,imm=0) R2=pkt_end(off=0,imm=0) R6=pkt(off=0,r=64,imm=0) R10=fp0") +__msg(": R1=pkt(off=64,r=64) R2=pkt_end() R6=pkt(r=64) R10=fp0") int check_assert_generic(struct __sk_buff *ctx) { u8 *data_end = (void *)(long)ctx->data_end; @@ -125,7 +125,7 @@ int check_assert_generic(struct __sk_buff *ctx) } SEC("?fentry/bpf_check") -__failure __msg("At program exit the register R0 has value (0x40; 0x0)") +__failure __msg("At program exit the register R1 has smin=64 smax=64") int check_assert_with_return(void *ctx) { bpf_assert_with(!ctx, 64); diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index 8c0ef27422..9cceb65211 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -308,7 +308,7 @@ int reject_set_exception_cb_bad_ret1(void *ctx) } SEC("?fentry/bpf_check") -__failure __msg("At program exit the register R0 has value (0x40; 0x0) should") +__failure __msg("At program exit the register R1 has smin=64 smax=64 should") int reject_set_exception_cb_bad_ret2(void *ctx) { bpf_throw(64); diff --git a/tools/testing/selftests/bpf/progs/fentry_recursive.c b/tools/testing/selftests/bpf/progs/fentry_recursive.c new file mode 100644 index 0000000000..2c9fb5ac42 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fentry_recursive.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Red Hat, Inc. */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +/* Dummy fentry bpf prog for testing fentry attachment chains */ +SEC("fentry/XXX") +int BPF_PROG(recursive_attach, int a) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/fentry_recursive_target.c b/tools/testing/selftests/bpf/progs/fentry_recursive_target.c new file mode 100644 index 0000000000..267c876d0a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fentry_recursive_target.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Red Hat, Inc. */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +/* Dummy fentry bpf prog for testing fentry attachment chains. It's going to be + * a start of the chain. + */ +SEC("fentry/bpf_testmod_fentry_test1") +int BPF_PROG(test1, int a) +{ + return 0; +} + +/* Dummy bpf prog for testing attach_btf presence when attaching an fentry + * program. + */ +SEC("raw_tp/sys_enter") +int BPF_PROG(fentry_target, struct pt_regs *regs, long id) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c b/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c new file mode 100644 index 0000000000..e6a75f86ca --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("freplace") +int freplace_prog(void) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c b/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c new file mode 100644 index 0000000000..624078abf3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +SEC("freplace/btf_unreliable_kprobe") +/* context type is what BPF verifier expects for kprobe context, but target + * program has `stuct whatever *ctx` argument, so freplace operation will be + * rejected with the following message: + * + * arg0 replace_btf_unreliable_kprobe(struct pt_regs *) doesn't match btf_unreliable_kprobe(struct whatever *) + */ +int replace_btf_unreliable_kprobe(bpf_user_pt_regs_t *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 844d968c27..fe971992e6 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -6,7 +6,7 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0])) static volatile int zero = 0; @@ -676,7 +676,7 @@ static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n) while ((t = bpf_iter_num_next(it))) { i = *t; - if (i >= n) + if ((__u32)i >= n) break; sum += arr[i]; } @@ -1411,4 +1411,26 @@ __naked int checkpoint_states_deletion(void) ); } +struct { + int data[32]; + int n; +} loop_data; + +SEC("raw_tp") +__success +int iter_arr_with_actual_elem_count(const void *ctx) +{ + int i, n = loop_data.n, sum = 0; + + if (n > ARRAY_SIZE(loop_data.data)) + return 0; + + bpf_for(i, 0, n) { + /* no rechecking of i against ARRAY_SIZE(loop_data.n) */ + sum += loop_data.data[i]; + } + + return sum; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters_task.c b/tools/testing/selftests/bpf/progs/iters_task.c index c9b4055cd4..e4d53e40ff 100644 --- a/tools/testing/selftests/bpf/progs/iters_task.c +++ b/tools/testing/selftests/bpf/progs/iters_task.c @@ -10,7 +10,7 @@ char _license[] SEC("license") = "GPL"; pid_t target_pid; -int procs_cnt, threads_cnt, proc_threads_cnt; +int procs_cnt, threads_cnt, proc_threads_cnt, invalid_cnt; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; @@ -26,6 +26,16 @@ int iter_task_for_each_sleep(void *ctx) procs_cnt = threads_cnt = proc_threads_cnt = 0; bpf_rcu_read_lock(); + bpf_for_each(task, pos, NULL, ~0U) { + /* Below instructions shouldn't be executed for invalid flags */ + invalid_cnt++; + } + + bpf_for_each(task, pos, NULL, BPF_TASK_ITER_PROC_THREADS) { + /* Below instructions shouldn't be executed for invalid task__nullable */ + invalid_cnt++; + } + bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) if (pos->pid == target_pid) procs_cnt++; diff --git a/tools/testing/selftests/bpf/progs/iters_task_vma.c b/tools/testing/selftests/bpf/progs/iters_task_vma.c index e085a51d15..dc0c3691dc 100644 --- a/tools/testing/selftests/bpf/progs/iters_task_vma.c +++ b/tools/testing/selftests/bpf/progs/iters_task_vma.c @@ -28,9 +28,8 @@ int iter_task_vma_for_each(const void *ctx) return 0; bpf_for_each(task_vma, vma, task, 0) { - if (seen >= 1000) + if (bpf_cmp_unlikely(seen, >=, 1000)) break; - barrier_var(seen); vm_ranges[seen].vm_start = vma->vm_start; vm_ranges[seen].vm_end = vma->vm_end; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c index c4b49ceea9..cc79dddac1 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs1.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c @@ -8,7 +8,7 @@ #include "bpf_misc.h" /* weak and shared between two files */ -const volatile int my_tid __weak; +const volatile __u32 my_tid __weak; long syscall_id __weak; int output_val1; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c index 013ff0645f..942cc5526d 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs2.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c @@ -68,7 +68,7 @@ int BPF_PROG(handler2, struct pt_regs *regs, long id) { static volatile int whatever; - if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) + if (my_tid != (s32)bpf_get_current_pid_tgid() || id != syscall_id) return 0; /* make sure we have CO-RE relocations in main program */ diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c index 84d1777a9e..26205ca806 100644 --- a/tools/testing/selftests/bpf/progs/linked_list.c +++ b/tools/testing/selftests/bpf/progs/linked_list.c @@ -6,7 +6,7 @@ #include "bpf_experimental.h" #ifndef ARRAY_SIZE -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0])) #endif #include "linked_list.h" diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c index b567a666d2..75043ffc5d 100644 --- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -14,16 +14,41 @@ struct node_data { struct bpf_rb_node node; }; +struct refcounted_node { + long data; + struct bpf_rb_node rb_node; + struct bpf_refcount refcount; +}; + +struct stash { + struct bpf_spin_lock l; + struct refcounted_node __kptr *stashed; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct stash); + __uint(max_entries, 10); +} refcounted_node_stash SEC(".maps"); + struct plain_local { long key; long data; }; +struct local_with_root { + long key; + struct bpf_spin_lock l; + struct bpf_rb_root r __contains(node_data, node); +}; + struct map_value { struct prog_test_ref_kfunc *not_kptr; struct prog_test_ref_kfunc __kptr *val; struct node_data __kptr *node; struct plain_local __kptr *plain; + struct local_with_root __kptr *local_root; }; /* This is necessary so that LLVM generates BTF for node_data struct @@ -38,6 +63,7 @@ struct map_value { * Had to do the same w/ bpf_kfunc_call_test_release below */ struct node_data *just_here_because_btf_bug; +struct refcounted_node *just_here_because_btf_bug2; struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -46,6 +72,17 @@ struct { __uint(max_entries, 2); } some_nodes SEC(".maps"); +static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_data *node_a; + struct node_data *node_b; + + node_a = container_of(a, struct node_data, node); + node_b = container_of(b, struct node_data, node); + + return node_a->key < node_b->key; +} + static int create_and_stash(int idx, int val) { struct map_value *mapval; @@ -95,6 +132,41 @@ long stash_plain(void *ctx) } SEC("tc") +long stash_local_with_root(void *ctx) +{ + struct local_with_root *res; + struct map_value *mapval; + struct node_data *n; + int idx = 0; + + mapval = bpf_map_lookup_elem(&some_nodes, &idx); + if (!mapval) + return 1; + + res = bpf_obj_new(typeof(*res)); + if (!res) + return 2; + res->key = 41; + + n = bpf_obj_new(typeof(*n)); + if (!n) { + bpf_obj_drop(res); + return 3; + } + + bpf_spin_lock(&res->l); + bpf_rbtree_add(&res->r, &n->node, less); + bpf_spin_unlock(&res->l); + + res = bpf_kptr_xchg(&mapval->local_root, res); + if (res) { + bpf_obj_drop(res); + return 4; + } + return 0; +} + +SEC("tc") long unstash_rb_node(void *ctx) { struct map_value *mapval; @@ -132,4 +204,56 @@ long stash_test_ref_kfunc(void *ctx) return 0; } +SEC("tc") +long refcount_acquire_without_unstash(void *ctx) +{ + struct refcounted_node *p; + struct stash *s; + int ret = 0; + + s = bpf_map_lookup_elem(&refcounted_node_stash, &ret); + if (!s) + return 1; + + if (!s->stashed) + /* refcount_acquire failure is expected when no refcounted_node + * has been stashed before this program executes + */ + return 2; + + p = bpf_refcount_acquire(s->stashed); + if (!p) + return 3; + + ret = s->stashed ? s->stashed->data : -1; + bpf_obj_drop(p); + return ret; +} + +/* Helper for refcount_acquire_without_unstash test */ +SEC("tc") +long stash_refcounted_node(void *ctx) +{ + struct refcounted_node *p; + struct stash *s; + int key = 0; + + s = bpf_map_lookup_elem(&refcounted_node_stash, &key); + if (!s) + return 1; + + p = bpf_obj_new(typeof(*p)); + if (!p) + return 2; + p->data = 42; + + p = bpf_kptr_xchg(&s->stashed, p); + if (p) { + bpf_obj_drop(p); + return 3; + } + + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index bc8ea56671..e5e3a8b8dd 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -13,7 +13,7 @@ char _license[] SEC("license") = "GPL"; #define DUMMY_STORAGE_VALUE 0xdeadbeef -int monitored_pid = 0; +__u32 monitored_pid = 0; int inode_storage_result = -1; int sk_storage_result = -1; int task_storage_result = -1; diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index fadfdd9870..0c13b74099 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -92,7 +92,7 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma, if (ret != 0) return ret; - __u32 pid = bpf_get_current_pid_tgid() >> 32; + __s32 pid = bpf_get_current_pid_tgid() >> 32; int is_stack = 0; is_stack = (vma->vm_start <= vma->vm_mm->start_stack && diff --git a/tools/testing/selftests/bpf/progs/map_in_map_btf.c b/tools/testing/selftests/bpf/progs/map_in_map_btf.c new file mode 100644 index 0000000000..7a1336d7b1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_in_map_btf.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct node_data { + __u64 data; + struct bpf_list_node node; +}; + +struct map_value { + struct bpf_list_head head __contains(node_data, node); + struct bpf_spin_lock lock; +}; + +struct inner_array_type { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} inner_array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(key_size, 4); + __uint(value_size, 4); + __uint(max_entries, 1); + __array(values, struct inner_array_type); +} outer_array SEC(".maps") = { + .values = { + [0] = &inner_array, + }, +}; + +char _license[] SEC("license") = "GPL"; + +int pid = 0; +bool done = false; + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int add_to_list_in_inner_array(void *ctx) +{ + struct map_value *value; + struct node_data *new; + struct bpf_map *map; + int zero = 0; + + if (done || (u32)bpf_get_current_pid_tgid() != pid) + return 0; + + map = bpf_map_lookup_elem(&outer_array, &zero); + if (!map) + return 0; + + value = bpf_map_lookup_elem(map, &zero); + if (!value) + return 0; + + new = bpf_obj_new(typeof(*new)); + if (!new) + return 0; + + bpf_spin_lock(&value->lock); + bpf_list_push_back(&value->head, &new->node); + bpf_spin_unlock(&value->lock); + done = true; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/normal_map_btf.c b/tools/testing/selftests/bpf/progs/normal_map_btf.c new file mode 100644 index 0000000000..a45c929955 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/normal_map_btf.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct node_data { + __u64 data; + struct bpf_list_node node; +}; + +struct map_value { + struct bpf_list_head head __contains(node_data, node); + struct bpf_spin_lock lock; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} array SEC(".maps"); + +char _license[] SEC("license") = "GPL"; + +int pid = 0; +bool done = false; + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int add_to_list_in_array(void *ctx) +{ + struct map_value *value; + struct node_data *new; + int zero = 0; + + if (done || (int)bpf_get_current_pid_tgid() != pid) + return 0; + + value = bpf_map_lookup_elem(&array, &zero); + if (!value) + return 0; + + new = bpf_obj_new(typeof(*new)); + if (!new) + return 0; + + bpf_spin_lock(&value->lock); + bpf_list_push_back(&value->head, &new->node); + bpf_spin_unlock(&value->lock); + done = true; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c index 1a891d30f1..f2b8eb2ff7 100644 --- a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c +++ b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c @@ -17,6 +17,10 @@ struct val_with_rb_root_t { struct bpf_spin_lock lock; }; +struct val_600b_t { + char b[600]; +}; + struct elem { long sum; struct val_t __percpu_kptr *pc; @@ -161,4 +165,18 @@ int BPF_PROG(test_array_map_7) return 0; } +SEC("?fentry.s/bpf_fentry_test1") +__failure __msg("bpf_percpu_obj_new type size (600) is greater than 512") +int BPF_PROG(test_array_map_8) +{ + struct val_600b_t __percpu_kptr *p; + + p = bpf_percpu_obj_new(struct val_600b_t); + if (!p) + return 0; + + bpf_percpu_obj_drop(p); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 897061930c..de3b6e4e4d 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -7,6 +7,7 @@ #include "profiler.h" #include "err.h" +#include "bpf_experimental.h" #ifndef NULL #define NULL 0 @@ -132,7 +133,7 @@ struct { } disallowed_exec_inodes SEC(".maps"); #ifndef ARRAY_SIZE -#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) +#define ARRAY_SIZE(arr) (int)(sizeof(arr) / sizeof(arr[0])) #endif static INLINE bool IS_ERR(const void* ptr) @@ -221,8 +222,7 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, return payload; if (cgroup_node == cgroup_root_node) *root_pos = payload - payload_start; - if (filepart_length <= MAX_PATH) { - barrier_var(filepart_length); + if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) { payload += filepart_length; } cgroup_node = BPF_CORE_READ(cgroup_node, parent); @@ -305,9 +305,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, size_t cgroup_root_length = bpf_probe_read_kernel_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name)); - barrier_var(cgroup_root_length); - if (cgroup_root_length <= MAX_PATH) { - barrier_var(cgroup_root_length); + if (bpf_cmp_likely(cgroup_root_length, <=, MAX_PATH)) { cgroup_data->cgroup_root_length = cgroup_root_length; payload += cgroup_root_length; } @@ -315,9 +313,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, size_t cgroup_proc_length = bpf_probe_read_kernel_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name)); - barrier_var(cgroup_proc_length); - if (cgroup_proc_length <= MAX_PATH) { - barrier_var(cgroup_proc_length); + if (bpf_cmp_likely(cgroup_proc_length, <=, MAX_PATH)) { cgroup_data->cgroup_proc_length = cgroup_proc_length; payload += cgroup_proc_length; } @@ -347,9 +343,7 @@ static INLINE void* populate_var_metadata(struct var_metadata_t* metadata, metadata->comm_length = 0; size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); - barrier_var(comm_length); - if (comm_length <= TASK_COMM_LEN) { - barrier_var(comm_length); + if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) { metadata->comm_length = comm_length; payload += comm_length; } @@ -494,10 +488,9 @@ read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload) filepart_length = bpf_probe_read_kernel_str(payload, MAX_PATH, BPF_CORE_READ(filp_dentry, d_name.name)); - barrier_var(filepart_length); - if (filepart_length > MAX_PATH) + bpf_nop_mov(filepart_length); + if (bpf_cmp_unlikely(filepart_length, >, MAX_PATH)) break; - barrier_var(filepart_length); payload += filepart_length; length += filepart_length; @@ -579,9 +572,7 @@ ssize_t BPF_KPROBE(kprobe__proc_sys_write, size_t sysctl_val_length = bpf_probe_read_kernel_str(payload, CTL_MAXNAME, buf); - barrier_var(sysctl_val_length); - if (sysctl_val_length <= CTL_MAXNAME) { - barrier_var(sysctl_val_length); + if (bpf_cmp_likely(sysctl_val_length, <=, CTL_MAXNAME)) { sysctl_data->sysctl_val_length = sysctl_val_length; payload += sysctl_val_length; } @@ -590,9 +581,7 @@ ssize_t BPF_KPROBE(kprobe__proc_sys_write, bpf_probe_read_kernel_str(payload, MAX_PATH, BPF_CORE_READ(filp, f_path.dentry, d_name.name)); - barrier_var(sysctl_path_length); - if (sysctl_path_length <= MAX_PATH) { - barrier_var(sysctl_path_length); + if (bpf_cmp_likely(sysctl_path_length, <=, MAX_PATH)) { sysctl_data->sysctl_path_length = sysctl_path_length; payload += sysctl_path_length; } @@ -645,7 +634,7 @@ int raw_tracepoint__sched_process_exit(void* ctx) for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) { struct var_kill_data_t* past_kill_data = &arr_struct->array[i]; - if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) { + if (past_kill_data != NULL && past_kill_data->kill_target_pid == (pid_t)tpid) { bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data), past_kill_data); void* payload = kill_data->payload; @@ -658,9 +647,7 @@ int raw_tracepoint__sched_process_exit(void* ctx) kill_data->kill_target_cgroup_proc_length = 0; size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); - barrier_var(comm_length); - if (comm_length <= TASK_COMM_LEN) { - barrier_var(comm_length); + if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) { kill_data->kill_target_name_length = comm_length; payload += comm_length; } @@ -669,9 +656,7 @@ int raw_tracepoint__sched_process_exit(void* ctx) bpf_probe_read_kernel_str(payload, KILL_TARGET_LEN, BPF_CORE_READ(proc_kernfs, name)); - barrier_var(cgroup_proc_length); - if (cgroup_proc_length <= KILL_TARGET_LEN) { - barrier_var(cgroup_proc_length); + if (bpf_cmp_likely(cgroup_proc_length, <=, KILL_TARGET_LEN)) { kill_data->kill_target_cgroup_proc_length = cgroup_proc_length; payload += cgroup_proc_length; } @@ -731,9 +716,7 @@ int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx) const char* filename = BPF_CORE_READ(bprm, filename); size_t bin_path_length = bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename); - barrier_var(bin_path_length); - if (bin_path_length <= MAX_FILENAME_LEN) { - barrier_var(bin_path_length); + if (bpf_cmp_likely(bin_path_length, <=, MAX_FILENAME_LEN)) { proc_exec_data->bin_path_length = bin_path_length; payload += bin_path_length; } @@ -743,8 +726,7 @@ int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx) unsigned int cmdline_length = probe_read_lim(payload, arg_start, arg_end - arg_start, MAX_ARGS_LEN); - if (cmdline_length <= MAX_ARGS_LEN) { - barrier_var(cmdline_length); + if (bpf_cmp_likely(cmdline_length, <=, MAX_ARGS_LEN)) { proc_exec_data->cmdline_length = cmdline_length; payload += cmdline_length; } @@ -821,9 +803,7 @@ int kprobe_ret__do_filp_open(struct pt_regs* ctx) payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload); - barrier_var(len); - if (len <= MAX_FILEPATH_LENGTH) { - barrier_var(len); + if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { payload += len; filemod_data->dst_filepath_length = len; } @@ -876,17 +856,13 @@ int BPF_KPROBE(kprobe__vfs_link, payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); size_t len = read_absolute_file_path_from_dentry(old_dentry, payload); - barrier_var(len); - if (len <= MAX_FILEPATH_LENGTH) { - barrier_var(len); + if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { payload += len; filemod_data->src_filepath_length = len; } len = read_absolute_file_path_from_dentry(new_dentry, payload); - barrier_var(len); - if (len <= MAX_FILEPATH_LENGTH) { - barrier_var(len); + if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { payload += len; filemod_data->dst_filepath_length = len; } @@ -936,16 +912,12 @@ int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry, size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH, oldname); - barrier_var(len); - if (len <= MAX_FILEPATH_LENGTH) { - barrier_var(len); + if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { payload += len; filemod_data->src_filepath_length = len; } len = read_absolute_file_path_from_dentry(dentry, payload); - barrier_var(len); - if (len <= MAX_FILEPATH_LENGTH) { - barrier_var(len); + if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { payload += len; filemod_data->dst_filepath_length = len; } diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c new file mode 100644 index 0000000000..986f96687a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2024. Huawei Technologies Co., Ltd */ +#include <linux/types.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" + +int target_pid = 0; +void *user_ptr = 0; +int read_ret[8]; + +char _license[] SEC("license") = "GPL"; + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int do_probe_read(void *ctx) +{ + char buf[8]; + + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) + return 0; + + read_ret[0] = bpf_probe_read_kernel(buf, sizeof(buf), user_ptr); + read_ret[1] = bpf_probe_read_kernel_str(buf, sizeof(buf), user_ptr); + read_ret[2] = bpf_probe_read(buf, sizeof(buf), user_ptr); + read_ret[3] = bpf_probe_read_str(buf, sizeof(buf), user_ptr); + read_ret[4] = bpf_probe_read_user(buf, sizeof(buf), user_ptr); + read_ret[5] = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr); + + return 0; +} + +SEC("fentry.s/" SYS_PREFIX "sys_nanosleep") +int do_copy_from_user(void *ctx) +{ + char buf[8]; + + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) + return 0; + + read_ret[6] = bpf_copy_from_user(buf, sizeof(buf), user_ptr); + read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr, + bpf_get_current_task_btf(), 0); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c index 1ef07f6ee5..1553b9c16a 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c @@ -54,6 +54,25 @@ long rbtree_refcounted_node_ref_escapes(void *ctx) } SEC("?tc") +__failure __msg("Possibly NULL pointer passed to trusted arg0") +long refcount_acquire_maybe_null(void *ctx) +{ + struct node_acquire *n, *m; + + n = bpf_obj_new(typeof(*n)); + /* Intentionally not testing !n + * it's MAYBE_NULL for refcount_acquire + */ + m = bpf_refcount_acquire(n); + if (m) + bpf_obj_drop(m); + if (n) + bpf_obj_drop(n); + + return 0; +} + +SEC("?tc") __failure __msg("Unreleased reference id=3 alloc_insn=9") long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) { diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c new file mode 100644 index 0000000000..ffbbfe1fa1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2024 Meta + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_endian.h> +#include "bpf_tracing_net.h" +#include "bpf_kfuncs.h" + +#define ATTR __always_inline +#include "test_jhash.h" + +static bool ipv6_addr_loopback(const struct in6_addr *a) +{ + return (a->s6_addr32[0] | a->s6_addr32[1] | + a->s6_addr32[2] | (a->s6_addr32[3] ^ bpf_htonl(1))) == 0; +} + +volatile const __u16 ports[2]; +unsigned int bucket[2]; + +SEC("iter/tcp") +int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) +{ + struct sock *sk = (struct sock *)ctx->sk_common; + struct inet_hashinfo *hinfo; + unsigned int hash; + struct net *net; + int idx; + + if (!sk) + return 0; + + sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock)); + if (sk->sk_family != AF_INET6 || + sk->sk_state != TCP_LISTEN || + !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr)) + return 0; + + if (sk->sk_num == ports[0]) + idx = 0; + else if (sk->sk_num == ports[1]) + idx = 1; + else + return 0; + + /* bucket selection as in inet_lhash2_bucket_sk() */ + net = sk->sk_net.net; + hash = jhash2(sk->sk_v6_rcv_saddr.s6_addr32, 4, net->hash_mix); + hash ^= sk->sk_num; + hinfo = net->ipv4.tcp_death_row.hashinfo; + bucket[idx] = hash & hinfo->lhash2_mask; + bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx)); + + return 0; +} + +#define udp_sk(ptr) container_of(ptr, struct udp_sock, inet.sk) + +SEC("iter/udp") +int iter_udp_soreuse(struct bpf_iter__udp *ctx) +{ + struct sock *sk = (struct sock *)ctx->udp_sk; + struct udp_table *udptable; + int idx; + + if (!sk) + return 0; + + sk = bpf_rdonly_cast(sk, bpf_core_type_id_kernel(struct sock)); + if (sk->sk_family != AF_INET6 || + !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr)) + return 0; + + if (sk->sk_num == ports[0]) + idx = 0; + else if (sk->sk_num == ports[1]) + idx = 1; + else + return 0; + + /* bucket selection as in udp_hashslot2() */ + udptable = sk->sk_net.net->ipv4.udp_table; + bucket[idx] = udp_sk(sk)->udp_portaddr_hash & udptable->mask; + bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx)); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c index c8f59caa46..a3434b8409 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c @@ -9,7 +9,7 @@ char _license[] SEC("license") = "GPL"; #define CUSTOM_INHERIT2 1 #define CUSTOM_LISTENER 2 -__u32 page_size = 0; +__s32 page_size = 0; struct sockopt_inherit { __u8 val; diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c index 96f29fce05..db67278e12 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_multi.c +++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c @@ -5,7 +5,7 @@ char _license[] SEC("license") = "GPL"; -__u32 page_size = 0; +__s32 page_size = 0; SEC("cgroup/getsockopt") int _getsockopt_child(struct bpf_sockopt *ctx) diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c index dbe235ede7..83753b00a5 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c +++ b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c @@ -9,7 +9,7 @@ char _license[] SEC("license") = "GPL"; -__u32 page_size = 0; +__s32 page_size = 0; SEC("cgroup/setsockopt") int sockopt_qos_to_cc(struct bpf_sockopt *ctx) diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c index e550f72896..3d3cafdebe 100644 --- a/tools/testing/selftests/bpf/progs/syscall.c +++ b/tools/testing/selftests/bpf/progs/syscall.c @@ -6,9 +6,15 @@ #include <bpf/bpf_tracing.h> #include <../../../tools/include/linux/filter.h> #include <linux/btf.h> +#include <string.h> +#include <errno.h> char _license[] SEC("license") = "GPL"; +struct bpf_map { + int id; +} __attribute__((preserve_access_index)); + struct args { __u64 log_buf; __u32 log_size; @@ -27,6 +33,37 @@ struct args { BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ BTF_INT_ENC(encoding, bits_offset, bits) +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, union bpf_attr); + __uint(max_entries, 1); +} bpf_attr_array SEC(".maps"); + +struct inner_map_type { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, 4); + __uint(value_size, 4); + __uint(max_entries, 1); +} inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); + __array(values, struct inner_map_type); +} outer_array_map SEC(".maps") = { + .values = { + [0] = &inner_map, + }, +}; + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + static int btf_load(void) { struct btf_blob { @@ -58,7 +95,7 @@ static int btf_load(void) } SEC("syscall") -int bpf_prog(struct args *ctx) +int load_prog(struct args *ctx) { static char license[] = "GPL"; static struct bpf_insn insns[] = { @@ -94,8 +131,8 @@ int bpf_prog(struct args *ctx) map_create_attr.max_entries = ctx->max_entries; map_create_attr.btf_fd = ret; - prog_load_attr.license = (long) license; - prog_load_attr.insns = (long) insns; + prog_load_attr.license = ptr_to_u64(license); + prog_load_attr.insns = ptr_to_u64(insns); prog_load_attr.log_buf = ctx->log_buf; prog_load_attr.log_size = ctx->log_size; prog_load_attr.log_level = 1; @@ -107,8 +144,8 @@ int bpf_prog(struct args *ctx) insns[3].imm = ret; map_update_attr.map_fd = ret; - map_update_attr.key = (long) &key; - map_update_attr.value = (long) &value; + map_update_attr.key = ptr_to_u64(&key); + map_update_attr.value = ptr_to_u64(&value); ret = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, &map_update_attr, sizeof(map_update_attr)); if (ret < 0) return ret; @@ -119,3 +156,52 @@ int bpf_prog(struct args *ctx) ctx->prog_fd = ret; return 1; } + +SEC("syscall") +int update_outer_map(void *ctx) +{ + int zero = 0, ret = 0, outer_fd = -1, inner_fd = -1, err; + const int attr_sz = sizeof(union bpf_attr); + union bpf_attr *attr; + + attr = bpf_map_lookup_elem((struct bpf_map *)&bpf_attr_array, &zero); + if (!attr) + goto out; + + memset(attr, 0, attr_sz); + attr->map_id = ((struct bpf_map *)&outer_array_map)->id; + outer_fd = bpf_sys_bpf(BPF_MAP_GET_FD_BY_ID, attr, attr_sz); + if (outer_fd < 0) + goto out; + + memset(attr, 0, attr_sz); + attr->map_type = BPF_MAP_TYPE_ARRAY; + attr->key_size = 4; + attr->value_size = 4; + attr->max_entries = 1; + inner_fd = bpf_sys_bpf(BPF_MAP_CREATE, attr, attr_sz); + if (inner_fd < 0) + goto out; + + memset(attr, 0, attr_sz); + attr->map_fd = outer_fd; + attr->key = ptr_to_u64(&zero); + attr->value = ptr_to_u64(&inner_fd); + err = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, attr, attr_sz); + if (err) + goto out; + + memset(attr, 0, attr_sz); + attr->map_fd = outer_fd; + attr->key = ptr_to_u64(&zero); + err = bpf_sys_bpf(BPF_MAP_DELETE_ELEM, attr, attr_sz); + if (err) + goto out; + ret = 1; +out: + if (inner_fd >= 0) + bpf_sys_close(inner_fd); + if (outer_fd >= 0) + bpf_sys_close(outer_fd); + return ret; +} diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index dcdea31270..ad88a3796d 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -248,7 +248,7 @@ int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 cl } SEC("lsm/task_free") -__failure __msg("reg type unsupported for arg#0 function") +__failure __msg("R1 must be a rcu pointer") int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task) { struct task_struct *acquired; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c index b685a4aba6..3494ca30fa 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c @@ -17,20 +17,23 @@ struct generic_map_value { char _license[] SEC("license") = "GPL"; -const unsigned int data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096}; +const unsigned int data_sizes[] = {16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096}; const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {}; +const unsigned int percpu_data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512}; +const volatile unsigned int percpu_data_btf_ids[ARRAY_SIZE(data_sizes)] = {}; + int err = 0; -int pid = 0; +u32 pid = 0; #define DEFINE_ARRAY_WITH_KPTR(_size) \ struct bin_data_##_size { \ char data[_size - sizeof(void *)]; \ }; \ + /* See Commit 5d8d6634ccc, force btf generation for type bin_data_##_size */ \ + struct bin_data_##_size *__bin_data_##_size; \ struct map_value_##_size { \ struct bin_data_##_size __kptr * data; \ - /* To emit BTF info for bin_data_xx */ \ - struct bin_data_##_size not_used; \ }; \ struct { \ __uint(type, BPF_MAP_TYPE_ARRAY); \ @@ -40,8 +43,12 @@ int pid = 0; } array_##_size SEC(".maps") #define DEFINE_ARRAY_WITH_PERCPU_KPTR(_size) \ + struct percpu_bin_data_##_size { \ + char data[_size]; \ + }; \ + struct percpu_bin_data_##_size *__percpu_bin_data_##_size; \ struct map_value_percpu_##_size { \ - struct bin_data_##_size __percpu_kptr * data; \ + struct percpu_bin_data_##_size __percpu_kptr * data; \ }; \ struct { \ __uint(type, BPF_MAP_TYPE_ARRAY); \ @@ -114,7 +121,7 @@ static __always_inline void batch_percpu_alloc(struct bpf_map *map, unsigned int return; } /* per-cpu allocator may not be able to refill in time */ - new = bpf_percpu_obj_new_impl(data_btf_ids[idx], NULL); + new = bpf_percpu_obj_new_impl(percpu_data_btf_ids[idx], NULL); if (!new) continue; @@ -166,7 +173,7 @@ static __always_inline void batch_percpu_free(struct bpf_map *map, unsigned int batch_percpu_free((struct bpf_map *)(&array_percpu_##size), batch, idx); \ } while (0) -DEFINE_ARRAY_WITH_KPTR(8); +/* kptr doesn't support bin_data_8 which is a zero-sized array */ DEFINE_ARRAY_WITH_KPTR(16); DEFINE_ARRAY_WITH_KPTR(32); DEFINE_ARRAY_WITH_KPTR(64); @@ -179,7 +186,7 @@ DEFINE_ARRAY_WITH_KPTR(1024); DEFINE_ARRAY_WITH_KPTR(2048); DEFINE_ARRAY_WITH_KPTR(4096); -/* per-cpu kptr doesn't support bin_data_8 which is a zero-sized array */ +DEFINE_ARRAY_WITH_PERCPU_KPTR(8); DEFINE_ARRAY_WITH_PERCPU_KPTR(16); DEFINE_ARRAY_WITH_PERCPU_KPTR(32); DEFINE_ARRAY_WITH_PERCPU_KPTR(64); @@ -188,9 +195,6 @@ DEFINE_ARRAY_WITH_PERCPU_KPTR(128); DEFINE_ARRAY_WITH_PERCPU_KPTR(192); DEFINE_ARRAY_WITH_PERCPU_KPTR(256); DEFINE_ARRAY_WITH_PERCPU_KPTR(512); -DEFINE_ARRAY_WITH_PERCPU_KPTR(1024); -DEFINE_ARRAY_WITH_PERCPU_KPTR(2048); -DEFINE_ARRAY_WITH_PERCPU_KPTR(4096); SEC("?fentry/" SYS_PREFIX "sys_nanosleep") int test_batch_alloc_free(void *ctx) @@ -198,21 +202,20 @@ int test_batch_alloc_free(void *ctx) if ((u32)bpf_get_current_pid_tgid() != pid) return 0; - /* Alloc 128 8-bytes objects in batch to trigger refilling, - * then free 128 8-bytes objects in batch to trigger freeing. + /* Alloc 128 16-bytes objects in batch to trigger refilling, + * then free 128 16-bytes objects in batch to trigger freeing. */ - CALL_BATCH_ALLOC_FREE(8, 128, 0); - CALL_BATCH_ALLOC_FREE(16, 128, 1); - CALL_BATCH_ALLOC_FREE(32, 128, 2); - CALL_BATCH_ALLOC_FREE(64, 128, 3); - CALL_BATCH_ALLOC_FREE(96, 128, 4); - CALL_BATCH_ALLOC_FREE(128, 128, 5); - CALL_BATCH_ALLOC_FREE(192, 128, 6); - CALL_BATCH_ALLOC_FREE(256, 128, 7); - CALL_BATCH_ALLOC_FREE(512, 64, 8); - CALL_BATCH_ALLOC_FREE(1024, 32, 9); - CALL_BATCH_ALLOC_FREE(2048, 16, 10); - CALL_BATCH_ALLOC_FREE(4096, 8, 11); + CALL_BATCH_ALLOC_FREE(16, 128, 0); + CALL_BATCH_ALLOC_FREE(32, 128, 1); + CALL_BATCH_ALLOC_FREE(64, 128, 2); + CALL_BATCH_ALLOC_FREE(96, 128, 3); + CALL_BATCH_ALLOC_FREE(128, 128, 4); + CALL_BATCH_ALLOC_FREE(192, 128, 5); + CALL_BATCH_ALLOC_FREE(256, 128, 6); + CALL_BATCH_ALLOC_FREE(512, 64, 7); + CALL_BATCH_ALLOC_FREE(1024, 32, 8); + CALL_BATCH_ALLOC_FREE(2048, 16, 9); + CALL_BATCH_ALLOC_FREE(4096, 8, 10); return 0; } @@ -223,21 +226,20 @@ int test_free_through_map_free(void *ctx) if ((u32)bpf_get_current_pid_tgid() != pid) return 0; - /* Alloc 128 8-bytes objects in batch to trigger refilling, + /* Alloc 128 16-bytes objects in batch to trigger refilling, * then free these objects through map free. */ - CALL_BATCH_ALLOC(8, 128, 0); - CALL_BATCH_ALLOC(16, 128, 1); - CALL_BATCH_ALLOC(32, 128, 2); - CALL_BATCH_ALLOC(64, 128, 3); - CALL_BATCH_ALLOC(96, 128, 4); - CALL_BATCH_ALLOC(128, 128, 5); - CALL_BATCH_ALLOC(192, 128, 6); - CALL_BATCH_ALLOC(256, 128, 7); - CALL_BATCH_ALLOC(512, 64, 8); - CALL_BATCH_ALLOC(1024, 32, 9); - CALL_BATCH_ALLOC(2048, 16, 10); - CALL_BATCH_ALLOC(4096, 8, 11); + CALL_BATCH_ALLOC(16, 128, 0); + CALL_BATCH_ALLOC(32, 128, 1); + CALL_BATCH_ALLOC(64, 128, 2); + CALL_BATCH_ALLOC(96, 128, 3); + CALL_BATCH_ALLOC(128, 128, 4); + CALL_BATCH_ALLOC(192, 128, 5); + CALL_BATCH_ALLOC(256, 128, 6); + CALL_BATCH_ALLOC(512, 64, 7); + CALL_BATCH_ALLOC(1024, 32, 8); + CALL_BATCH_ALLOC(2048, 16, 9); + CALL_BATCH_ALLOC(4096, 8, 10); return 0; } @@ -248,9 +250,10 @@ int test_batch_percpu_alloc_free(void *ctx) if ((u32)bpf_get_current_pid_tgid() != pid) return 0; - /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling, - * then free 128 16-bytes per-cpu objects in batch to trigger freeing. + /* Alloc 128 8-bytes per-cpu objects in batch to trigger refilling, + * then free 128 8-bytes per-cpu objects in batch to trigger freeing. */ + CALL_BATCH_PERCPU_ALLOC_FREE(8, 128, 0); CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 1); CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 2); CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 3); @@ -259,9 +262,6 @@ int test_batch_percpu_alloc_free(void *ctx) CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 6); CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 7); CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 8); - CALL_BATCH_PERCPU_ALLOC_FREE(1024, 32, 9); - CALL_BATCH_PERCPU_ALLOC_FREE(2048, 16, 10); - CALL_BATCH_PERCPU_ALLOC_FREE(4096, 8, 11); return 0; } @@ -272,9 +272,10 @@ int test_percpu_free_through_map_free(void *ctx) if ((u32)bpf_get_current_pid_tgid() != pid) return 0; - /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling, + /* Alloc 128 8-bytes per-cpu objects in batch to trigger refilling, * then free these object through map free. */ + CALL_BATCH_PERCPU_ALLOC(8, 128, 0); CALL_BATCH_PERCPU_ALLOC(16, 128, 1); CALL_BATCH_PERCPU_ALLOC(32, 128, 2); CALL_BATCH_PERCPU_ALLOC(64, 128, 3); @@ -283,9 +284,6 @@ int test_percpu_free_through_map_free(void *ctx) CALL_BATCH_PERCPU_ALLOC(192, 128, 6); CALL_BATCH_PERCPU_ALLOC(256, 128, 7); CALL_BATCH_PERCPU_ALLOC(512, 64, 8); - CALL_BATCH_PERCPU_ALLOC(1024, 32, 9); - CALL_BATCH_PERCPU_ALLOC(2048, 16, 10); - CALL_BATCH_PERCPU_ALLOC(4096, 8, 11); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c new file mode 100644 index 0000000000..44628865fe --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +__u32 target_ancestor_level; +__u64 target_ancestor_cgid; +int target_pid, target_hid; + +struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym; +struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym; +void bpf_cgroup_release(struct cgroup *cgrp) __ksym; + +static int bpf_link_create_verify(int cmd) +{ + struct cgroup *cgrp, *ancestor; + struct task_struct *task; + int ret = 0; + + if (cmd != BPF_LINK_CREATE) + return 0; + + task = bpf_get_current_task_btf(); + + /* Then it can run in parallel with others */ + if (task->pid != target_pid) + return 0; + + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + /* Refuse it if its cgid or its ancestor's cgid is the target cgid */ + if (cgrp->kn->id == target_ancestor_cgid) + ret = -1; + + ancestor = bpf_cgroup_ancestor(cgrp, target_ancestor_level); + if (!ancestor) + goto out; + + if (ancestor->kn->id == target_ancestor_cgid) + ret = -1; + bpf_cgroup_release(ancestor); + +out: + bpf_cgroup_release(cgrp); + return ret; +} + +SEC("lsm/bpf") +int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size) +{ + return bpf_link_create_verify(cmd); +} + +SEC("lsm.s/bpf") +int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size) +{ + return bpf_link_create_verify(cmd); +} + +SEC("fentry") +int BPF_PROG(fentry_run) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c index a17dd83eae..ee4a601dcb 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c @@ -53,7 +53,7 @@ int test_core_kernel(void *ctx) struct task_struct *task = (void *)bpf_get_current_task(); struct core_reloc_kernel_output *out = (void *)&data.out; uint64_t pid_tgid = bpf_get_current_pid_tgid(); - uint32_t real_tgid = (uint32_t)pid_tgid; + int32_t real_tgid = (int32_t)pid_tgid; int pid, tgid; if (data.my_pid_tgid != pid_tgid) diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c index f59f175c7b..bcb31ff92d 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c @@ -43,8 +43,8 @@ int BPF_PROG(test_core_module_probed, #if __has_builtin(__builtin_preserve_enum_value) struct core_reloc_module_output *out = (void *)&data.out; __u64 pid_tgid = bpf_get_current_pid_tgid(); - __u32 real_tgid = (__u32)(pid_tgid >> 32); - __u32 real_pid = (__u32)pid_tgid; + __s32 real_tgid = (__s32)(pid_tgid >> 32); + __s32 real_pid = (__s32)pid_tgid; if (data.my_pid_tgid != pid_tgid) return 0; @@ -77,8 +77,8 @@ int BPF_PROG(test_core_module_direct, #if __has_builtin(__builtin_preserve_enum_value) struct core_reloc_module_output *out = (void *)&data.out; __u64 pid_tgid = bpf_get_current_pid_tgid(); - __u32 real_tgid = (__u32)(pid_tgid >> 32); - __u32 real_pid = (__u32)pid_tgid; + __s32 real_tgid = (__s32)(pid_tgid >> 32); + __s32 real_pid = (__s32)pid_tgid; if (data.my_pid_tgid != pid_tgid) return 0; diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c index 564f402d56..69509f8bb6 100644 --- a/tools/testing/selftests/bpf/progs/test_fill_link_info.c +++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c @@ -39,4 +39,10 @@ int BPF_PROG(kmulti_run) return 0; } +SEC("uprobe.multi") +int BPF_PROG(umulti_run) +{ + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_fsverity.c b/tools/testing/selftests/bpf/progs/test_fsverity.c new file mode 100644 index 0000000000..9e0f73e818 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_fsverity.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_kfuncs.h" + +char _license[] SEC("license") = "GPL"; + +#ifndef SHA256_DIGEST_SIZE +#define SHA256_DIGEST_SIZE 32 +#endif + +#define SIZEOF_STRUCT_FSVERITY_DIGEST 4 /* sizeof(struct fsverity_digest) */ + +char expected_digest[SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE]; +char digest[SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE]; +__u32 monitored_pid; +__u32 got_fsverity; +__u32 digest_matches; + +SEC("lsm.s/file_open") +int BPF_PROG(test_file_open, struct file *f) +{ + struct bpf_dynptr digest_ptr; + __u32 pid; + int ret; + int i; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid != monitored_pid) + return 0; + + bpf_dynptr_from_mem(digest, sizeof(digest), 0, &digest_ptr); + ret = bpf_get_fsverity_digest(f, &digest_ptr); + if (ret < 0) + return 0; + got_fsverity = 1; + + for (i = 0; i < (int)sizeof(digest); i++) { + if (digest[i] != expected_digest[i]) + return 0; + } + + digest_matches = 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_get_xattr.c b/tools/testing/selftests/bpf/progs/test_get_xattr.c new file mode 100644 index 0000000000..7eb2a4e5a3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_get_xattr.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_kfuncs.h" + +char _license[] SEC("license") = "GPL"; + +__u32 monitored_pid; +__u32 found_xattr; + +static const char expected_value[] = "hello"; +char value[32]; + +SEC("lsm.s/file_open") +int BPF_PROG(test_file_open, struct file *f) +{ + struct bpf_dynptr value_ptr; + __u32 pid; + int ret; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid != monitored_pid) + return 0; + + bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr); + + ret = bpf_get_file_xattr(f, "user.kfuncs", &value_ptr); + if (ret != sizeof(expected_value)) + return 0; + if (bpf_strncmp(value, ret, expected_value)) + return 0; + found_xattr = 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func12.c b/tools/testing/selftests/bpf/progs/test_global_func12.c index 7f159d83c6..6e03d42519 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func12.c +++ b/tools/testing/selftests/bpf/progs/test_global_func12.c @@ -19,5 +19,7 @@ int global_func12(struct __sk_buff *skb) { const struct S s = {.x = skb->len }; - return foo(&s); + foo(&s); + + return 1; } diff --git a/tools/testing/selftests/bpf/progs/test_global_func15.c b/tools/testing/selftests/bpf/progs/test_global_func15.c index b512d6a6c7..b4e089d698 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func15.c +++ b/tools/testing/selftests/bpf/progs/test_global_func15.c @@ -13,7 +13,7 @@ __noinline int foo(unsigned int *v) } SEC("cgroup_skb/ingress") -__failure __msg("At program exit the register R0 has value") +__failure __msg("At program exit the register R0 has ") int global_func15(struct __sk_buff *skb) { unsigned int v = 1; @@ -22,3 +22,35 @@ int global_func15(struct __sk_buff *skb) return v; } + +SEC("cgroup_skb/ingress") +__log_level(2) __flag(BPF_F_TEST_STATE_FREQ) +__failure +/* check that fallthrough code path marks r0 as precise */ +__msg("mark_precise: frame0: regs=r0 stack= before 2: (b7) r0 = 1") +/* check that branch code path marks r0 as precise */ +__msg("mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7") +__msg("At program exit the register R0 has ") +__naked int global_func15_tricky_pruning(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 s> 1000 goto 1f;" + "r0 = 1;" + "1:" + "goto +0;" /* checkpoint */ + /* cgroup_skb/ingress program is expected to return [0, 1] + * values, so branch above makes sure that in a fallthrough + * case we have a valid 1 stored in R0 register, but in + * a branch case we assign some random value to R0. So if + * there is something wrong with precision tracking for R0 at + * program exit, we might erronenously prune branch case, + * because R0 in fallthrough case is imprecise (and thus any + * value is valid from POV of verifier is_state_equal() logic) + */ + "exit;" + : + : __imm(bpf_get_prandom_u32) + : __clobber_common + ); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func5.c b/tools/testing/selftests/bpf/progs/test_global_func5.c index cc55aedaf8..257c0569ff 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func5.c +++ b/tools/testing/selftests/bpf/progs/test_global_func5.c @@ -26,7 +26,7 @@ int f3(int val, struct __sk_buff *skb) } SEC("tc") -__failure __msg("expected pointer to ctx, but got PTR") +__failure __msg("expects pointer to ctx") int global_func5(struct __sk_buff *skb) { return f1(skb) + f2(2, skb) + f3(3, skb); diff --git a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c index 7faa8eef05..9a06e5eb1f 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c +++ b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c @@ -102,3 +102,52 @@ int perf_event_ctx(void *ctx) { return perf_event_ctx_subprog(ctx); } + +/* this global subprog can be now called from many types of entry progs, each + * with different context type + */ +__weak int subprog_ctx_tag(void *ctx __arg_ctx) +{ + return bpf_get_stack(ctx, stack, sizeof(stack), 0); +} + +struct my_struct { int x; }; + +__weak int subprog_multi_ctx_tags(void *ctx1 __arg_ctx, + struct my_struct *mem, + void *ctx2 __arg_ctx) +{ + if (!mem) + return 0; + + return bpf_get_stack(ctx1, stack, sizeof(stack), 0) + + mem->x + + bpf_get_stack(ctx2, stack, sizeof(stack), 0); +} + +SEC("?raw_tp") +__success __log_level(2) +int arg_tag_ctx_raw_tp(void *ctx) +{ + struct my_struct x = { .x = 123 }; + + return subprog_ctx_tag(ctx) + subprog_multi_ctx_tags(ctx, &x, ctx); +} + +SEC("?perf_event") +__success __log_level(2) +int arg_tag_ctx_perf(void *ctx) +{ + struct my_struct x = { .x = 123 }; + + return subprog_ctx_tag(ctx) + subprog_multi_ctx_tags(ctx, &x, ctx); +} + +SEC("?kprobe") +__success __log_level(2) +int arg_tag_ctx_kprobe(void *ctx) +{ + struct my_struct x = { .x = 123 }; + + return subprog_ctx_tag(ctx) + subprog_multi_ctx_tags(ctx, &x, ctx); +} diff --git a/tools/testing/selftests/bpf/progs/test_jhash.h b/tools/testing/selftests/bpf/progs/test_jhash.h index c300734d26..ef53559bbb 100644 --- a/tools/testing/selftests/bpf/progs/test_jhash.h +++ b/tools/testing/selftests/bpf/progs/test_jhash.h @@ -69,3 +69,34 @@ u32 jhash(const void *key, u32 length, u32 initval) return c; } + +static __always_inline u32 jhash2(const u32 *k, u32 length, u32 initval) +{ + u32 a, b, c; + + /* Set up the internal state */ + a = b = c = JHASH_INITVAL + (length<<2) + initval; + + /* Handle most of the key */ + while (length > 3) { + a += k[0]; + b += k[1]; + c += k[2]; + __jhash_mix(a, b, c); + length -= 3; + k += 3; + } + + /* Handle the last 3 u32's */ + switch (length) { + case 3: c += k[2]; + case 2: b += k[1]; + case 1: a += k[0]; + __jhash_final(a, b, c); + break; + case 0: /* Nothing left to add */ + break; + } + + return c; +} diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c new file mode 100644 index 0000000000..2f0eb1334d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_kfuncs.h" + +char _license[] SEC("license") = "GPL"; + +#ifndef SHA256_DIGEST_SIZE +#define SHA256_DIGEST_SIZE 32 +#endif + +#define MAX_SIG_SIZE 1024 + +/* By default, "fsverity sign" signs a file with fsverity_formatted_digest + * of the file. fsverity_formatted_digest on the kernel side is only used + * with CONFIG_FS_VERITY_BUILTIN_SIGNATURES. However, BPF LSM doesn't not + * require CONFIG_FS_VERITY_BUILTIN_SIGNATURES, so vmlinux.h may not have + * fsverity_formatted_digest. In this test, we intentionally avoid using + * fsverity_formatted_digest. + * + * Luckily, fsverity_formatted_digest is simply 8-byte magic followed by + * fsverity_digest. We use a char array of size fsverity_formatted_digest + * plus SHA256_DIGEST_SIZE. The magic part of it is filled by user space, + * and the rest of it is filled by bpf_get_fsverity_digest. + * + * Note that, generating signatures based on fsverity_formatted_digest is + * the design choice of this selftest (and "fsverity sign"). With BPF + * LSM, we have the flexibility to generate signature based on other data + * sets, for example, fsverity_digest or only the digest[] part of it. + */ +#define MAGIC_SIZE 8 +#define SIZEOF_STRUCT_FSVERITY_DIGEST 4 /* sizeof(struct fsverity_digest) */ +char digest[MAGIC_SIZE + SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE]; + +__u32 monitored_pid; +char sig[MAX_SIG_SIZE]; +__u32 sig_size; +__u32 user_keyring_serial; + +SEC("lsm.s/file_open") +int BPF_PROG(test_file_open, struct file *f) +{ + struct bpf_dynptr digest_ptr, sig_ptr; + struct bpf_key *trusted_keyring; + __u32 pid; + int ret; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid != monitored_pid) + return 0; + + /* digest_ptr points to fsverity_digest */ + bpf_dynptr_from_mem(digest + MAGIC_SIZE, sizeof(digest) - MAGIC_SIZE, 0, &digest_ptr); + + ret = bpf_get_fsverity_digest(f, &digest_ptr); + /* No verity, allow access */ + if (ret < 0) + return 0; + + /* Move digest_ptr to fsverity_formatted_digest */ + bpf_dynptr_from_mem(digest, sizeof(digest), 0, &digest_ptr); + + /* Read signature from xattr */ + bpf_dynptr_from_mem(sig, sizeof(sig), 0, &sig_ptr); + ret = bpf_get_file_xattr(f, "user.sig", &sig_ptr); + /* No signature, reject access */ + if (ret < 0) + return -EPERM; + + trusted_keyring = bpf_lookup_user_key(user_keyring_serial, 0); + if (!trusted_keyring) + return -ENOENT; + + /* Verify signature */ + ret = bpf_verify_pkcs7_signature(&digest_ptr, &sig_ptr, trusted_keyring); + + bpf_key_put(trusted_keyring); + return ret; +} diff --git a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c index eacda9fe07..4cfa42aa94 100644 --- a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c +++ b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c @@ -29,7 +29,7 @@ int BPF_PROG(unix_listen, struct socket *sock, int backlog) len = unix_sk->addr->len - sizeof(short); path[0] = '@'; for (i = 1; i < len; i++) { - if (i >= sizeof(struct sockaddr_un)) + if (i >= (int)sizeof(struct sockaddr_un)) break; path[i] = unix_sk->addr->name->sun_path[i]; diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index f66af753bb..3e436e6f73 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -6,66 +6,34 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include <stddef.h> -#include <string.h> -#include <arpa/inet.h> -#include <linux/bpf.h> -#include <linux/if_ether.h> -#include <linux/if_packet.h> -#include <linux/if_tunnel.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <linux/icmp.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/pkt_cls.h> -#include <linux/erspan.h> -#include <linux/udp.h> +#include "vmlinux.h" +#include <bpf/bpf_core_read.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include "bpf_kfuncs.h" +#include "bpf_tracing_net.h" #define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret) -#define VXLAN_UDP_PORT 4789 +#define VXLAN_UDP_PORT 4789 +#define ETH_P_IP 0x0800 +#define PACKET_HOST 0 +#define TUNNEL_CSUM bpf_htons(0x01) +#define TUNNEL_KEY bpf_htons(0x04) /* Only IPv4 address assigned to veth1. * 172.16.1.200 */ #define ASSIGNED_ADDR_VETH1 0xac1001c8 -struct geneve_opt { - __be16 opt_class; - __u8 type; - __u8 length:5; - __u8 r3:1; - __u8 r2:1; - __u8 r1:1; - __u8 opt_data[8]; /* hard-coded to 8 byte */ -}; - -struct vxlanhdr { - __be32 vx_flags; - __be32 vx_vni; -} __attribute__((packed)); - -struct vxlan_metadata { - __u32 gbp; -}; - -struct bpf_fou_encap { - __be16 sport; - __be16 dport; -}; - -enum bpf_fou_encap_type { - FOU_BPF_ENCAP_FOU, - FOU_BPF_ENCAP_GUE, -}; - int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx, struct bpf_fou_encap *encap, int type) __ksym; int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, struct bpf_fou_encap *encap) __ksym; +struct xfrm_state * +bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, + u32 opts__sz) __ksym; +void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __ksym; struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -205,9 +173,9 @@ int erspan_set_tunnel(struct __sk_buff *skb) __u8 hwid = 7; md.version = 2; - md.u.md2.dir = direction; - md.u.md2.hwid = hwid & 0xf; - md.u.md2.hwid_upper = (hwid >> 4) & 0x3; + BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf)); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3); #endif ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); @@ -246,8 +214,9 @@ int erspan_get_tunnel(struct __sk_buff *skb) bpf_printk("\tindex %x\n", index); #else bpf_printk("\tdirection %d hwid %x timestamp %u\n", - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + BPF_CORE_READ_BITFIELD(&md.u.md2, dir), + (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) + + BPF_CORE_READ_BITFIELD(&md.u.md2, hwid), bpf_ntohl(md.u.md2.timestamp)); #endif @@ -284,9 +253,9 @@ int ip4ip6erspan_set_tunnel(struct __sk_buff *skb) __u8 hwid = 17; md.version = 2; - md.u.md2.dir = direction; - md.u.md2.hwid = hwid & 0xf; - md.u.md2.hwid_upper = (hwid >> 4) & 0x3; + BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf)); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3); #endif ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); @@ -326,8 +295,9 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb) bpf_printk("\tindex %x\n", index); #else bpf_printk("\tdirection %d hwid %x timestamp %u\n", - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + BPF_CORE_READ_BITFIELD(&md.u.md2, dir), + (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) + + BPF_CORE_READ_BITFIELD(&md.u.md2, hwid), bpf_ntohl(md.u.md2.timestamp)); #endif @@ -963,6 +933,10 @@ int ip6ip6_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +volatile int xfrm_reqid = 0; +volatile int xfrm_spi = 0; +volatile int xfrm_remote_ip = 0; + SEC("tc") int xfrm_get_state(struct __sk_buff *skb) { @@ -973,10 +947,58 @@ int xfrm_get_state(struct __sk_buff *skb) if (ret < 0) return TC_ACT_OK; - bpf_printk("reqid %d spi 0x%x remote ip 0x%x\n", - x.reqid, bpf_ntohl(x.spi), - bpf_ntohl(x.remote_ipv4)); + xfrm_reqid = x.reqid; + xfrm_spi = bpf_ntohl(x.spi); + xfrm_remote_ip = bpf_ntohl(x.remote_ipv4); + return TC_ACT_OK; } +volatile int xfrm_replay_window = 0; + +SEC("xdp") +int xfrm_get_state_xdp(struct xdp_md *xdp) +{ + struct bpf_xfrm_state_opts opts = {}; + struct xfrm_state *x = NULL; + struct ip_esp_hdr *esph; + struct bpf_dynptr ptr; + u8 esph_buf[8] = {}; + u8 iph_buf[20] = {}; + struct iphdr *iph; + u32 off; + + if (bpf_dynptr_from_xdp(xdp, 0, &ptr)) + goto out; + + off = sizeof(struct ethhdr); + iph = bpf_dynptr_slice(&ptr, off, iph_buf, sizeof(iph_buf)); + if (!iph || iph->protocol != IPPROTO_ESP) + goto out; + + off += sizeof(struct iphdr); + esph = bpf_dynptr_slice(&ptr, off, esph_buf, sizeof(esph_buf)); + if (!esph) + goto out; + + opts.netns_id = BPF_F_CURRENT_NETNS; + opts.daddr.a4 = iph->daddr; + opts.spi = esph->spi; + opts.proto = IPPROTO_ESP; + opts.family = AF_INET; + + x = bpf_xdp_get_xfrm_state(xdp, &opts, sizeof(opts)); + if (!x) + goto out; + + if (!x->replay_esn) + goto out; + + xfrm_replay_window = x->replay_esn->replay_window; +out: + if (x) + bpf_xdp_xfrm_state_release(x); + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c index 7748cc23de..f42e9f3831 100644 --- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c @@ -10,17 +10,11 @@ #include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include "bpf_kfuncs.h" #define MAX_DATA_SIZE (1024 * 1024) #define MAX_SIG_SIZE 1024 -extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; -extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; -extern void bpf_key_put(struct bpf_key *key) __ksym; -extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, - struct bpf_dynptr *sig_ptr, - struct bpf_key *trusted_keyring) __ksym; - __u32 monitored_pid; __u32 user_keyring_serial; __u64 system_keyring_id; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c index 5baaafed0d..3abf068b84 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c @@ -38,7 +38,7 @@ int xdp_redirect(struct xdp_md *xdp) if (payload + 1 > data_end) return XDP_ABORTED; - if (xdp->ingress_ifindex != ifindex_in) + if (xdp->ingress_ifindex != (__u32)ifindex_in) return XDP_ABORTED; if (metadata + 1 > data) diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c index 8b946c8188..f615da97df 100644 --- a/tools/testing/selftests/bpf/progs/timer.c +++ b/tools/testing/selftests/bpf/progs/timer.c @@ -51,7 +51,8 @@ struct { __uint(max_entries, 1); __type(key, int); __type(value, struct elem); -} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"); +} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"), + race_array SEC(".maps"); __u64 bss_data; __u64 abs_data; @@ -390,3 +391,34 @@ int BPF_PROG2(test5, int, a) return 0; } + +static int race_timer_callback(void *race_array, int *race_key, struct bpf_timer *timer) +{ + bpf_timer_start(timer, 1000000, 0); + return 0; +} + +SEC("syscall") +int race(void *ctx) +{ + struct bpf_timer *timer; + int err, race_key = 0; + struct elem init; + + __builtin_memset(&init, 0, sizeof(struct elem)); + bpf_map_update_elem(&race_array, &race_key, &init, BPF_ANY); + + timer = bpf_map_lookup_elem(&race_array, &race_key); + if (!timer) + return 1; + + err = bpf_timer_init(timer, &race_array, CLOCK_MONOTONIC); + if (err && err != -EBUSY) + return 1; + + bpf_timer_set_callback(timer, race_timer_callback); + bpf_timer_start(timer, 0, 0); + bpf_timer_cancel(timer); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c index 226d33b5a0..0996c2486f 100644 --- a/tools/testing/selftests/bpf/progs/timer_failure.c +++ b/tools/testing/selftests/bpf/progs/timer_failure.c @@ -21,17 +21,38 @@ struct { __type(value, struct elem); } timer_map SEC(".maps"); -static int timer_cb_ret1(void *map, int *key, struct bpf_timer *timer) +__naked __noinline __used +static unsigned long timer_cb_ret_bad() { - if (bpf_get_smp_processor_id() % 2) - return 1; - else - return 0; + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 s> 1000 goto 1f;" + "r0 = 0;" + "1:" + "goto +0;" /* checkpoint */ + /* async callback is expected to return 0, so branch above + * skipping r0 = 0; should lead to a failure, but if exit + * instruction doesn't enforce r0's precision, this callback + * will be successfully verified + */ + "exit;" + : + : __imm(bpf_get_prandom_u32) + : __clobber_common + ); } SEC("fentry/bpf_fentry_test1") -__failure __msg("should have been in (0x0; 0x0)") -int BPF_PROG2(test_ret_1, int, a) +__log_level(2) +__flag(BPF_F_TEST_STATE_FREQ) +__failure +/* check that fallthrough code path marks r0 as precise */ +__msg("mark_precise: frame0: regs=r0 stack= before") +__msg(": (85) call bpf_get_prandom_u32#7") /* anchor message */ +/* check that branch code path marks r0 as precise */ +__msg("mark_precise: frame0: regs=r0 stack= before ") __msg(": (85) call bpf_get_prandom_u32#7") +__msg("should have been in [0, 0]") +long BPF_PROG2(test_bad_ret, int, a) { int key = 0; struct bpf_timer *timer; @@ -39,7 +60,7 @@ int BPF_PROG2(test_ret_1, int, a) timer = bpf_map_lookup_elem(&timer_map, &key); if (timer) { bpf_timer_init(timer, &timer_map, CLOCK_BOOTTIME); - bpf_timer_set_callback(timer, timer_cb_ret1); + bpf_timer_set_callback(timer, timer_cb_ret_bad); bpf_timer_start(timer, 1000, 0); } diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c index 03ee946c6b..11ab25c42c 100644 --- a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c +++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c @@ -184,7 +184,7 @@ invalid_drain_callback_return(struct bpf_dynptr *dynptr, void *context) * not be able to write to that pointer. */ SEC("?raw_tp") -__failure __msg("At callback return the register R0 has value") +__failure __msg("At callback return the register R0 has ") int user_ringbuf_callback_invalid_return(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, invalid_drain_callback_return, NULL, 0); diff --git a/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c b/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c new file mode 100644 index 0000000000..623f130a31 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <stdint.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +#include "bpf_misc.h" + +struct core_reloc_bitfields { + /* unsigned bitfields */ + uint8_t ub1: 1; + uint8_t ub2: 2; + uint32_t ub7: 7; + /* signed bitfields */ + int8_t sb4: 4; + int32_t sb20: 20; + /* non-bitfields */ + uint32_t u32; + int32_t s32; +} __attribute__((preserve_access_index)); + +SEC("tc") +__description("single CO-RE bitfield roundtrip") +__btf_path("btf__core_reloc_bitfields.bpf.o") +__success +__retval(3) +int single_field_roundtrip(struct __sk_buff *ctx) +{ + struct core_reloc_bitfields bitfields; + + __builtin_memset(&bitfields, 0, sizeof(bitfields)); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 3); + return BPF_CORE_READ_BITFIELD(&bitfields, ub2); +} + +SEC("tc") +__description("multiple CO-RE bitfield roundtrip") +__btf_path("btf__core_reloc_bitfields.bpf.o") +__success +__retval(0x3FD) +int multiple_field_roundtrip(struct __sk_buff *ctx) +{ + struct core_reloc_bitfields bitfields; + uint8_t ub2; + int8_t sb4; + + __builtin_memset(&bitfields, 0, sizeof(bitfields)); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 1); + BPF_CORE_WRITE_BITFIELD(&bitfields, sb4, -1); + + ub2 = BPF_CORE_READ_BITFIELD(&bitfields, ub2); + sb4 = BPF_CORE_READ_BITFIELD(&bitfields, sb4); + + return (((uint8_t)sb4) << 2) | ub2; +} + +SEC("tc") +__description("adjacent CO-RE bitfield roundtrip") +__btf_path("btf__core_reloc_bitfields.bpf.o") +__success +__retval(7) +int adjacent_field_roundtrip(struct __sk_buff *ctx) +{ + struct core_reloc_bitfields bitfields; + uint8_t ub1, ub2; + + __builtin_memset(&bitfields, 0, sizeof(bitfields)); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub1, 1); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 3); + + ub1 = BPF_CORE_READ_BITFIELD(&bitfields, ub1); + ub2 = BPF_CORE_READ_BITFIELD(&bitfields, ub2); + + return (ub2 << 1) | ub1; +} + +SEC("tc") +__description("multibyte CO-RE bitfield roundtrip") +__btf_path("btf__core_reloc_bitfields.bpf.o") +__success +__retval(0x21) +int multibyte_field_roundtrip(struct __sk_buff *ctx) +{ + struct core_reloc_bitfields bitfields; + uint32_t ub7; + uint8_t ub1; + + __builtin_memset(&bitfields, 0, sizeof(bitfields)); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub1, 1); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub7, 16); + + ub1 = BPF_CORE_READ_BITFIELD(&bitfields, ub1); + ub7 = BPF_CORE_READ_BITFIELD(&bitfields, ub7); + + return (ub7 << 1) | ub1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index c5588a14fe..960998f163 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -965,6 +965,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("bound check with JMP_JSLT for crossing 64-bit signed boundary") __success __retval(0) +__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */ __naked void crossing_64_bit_signed_boundary_2(void) { asm volatile (" \ @@ -1046,6 +1047,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("bound check with JMP32_JSLT for crossing 32-bit signed boundary") __success __retval(0) +__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */ __naked void crossing_32_bit_signed_boundary_2(void) { asm volatile (" \ @@ -1073,4 +1075,66 @@ l0_%=: r0 = 0; \ : __clobber_all); } +SEC("tc") +__description("bounds check with JMP_NE for reg edge") +__success __retval(0) +__naked void reg_not_equal_const(void) +{ + asm volatile (" \ + r6 = r1; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + call %[bpf_get_prandom_u32]; \ + r4 = r0; \ + r4 &= 7; \ + if r4 != 0 goto l0_%=; \ + r0 = 0; \ + exit; \ +l0_%=: r1 = r6; \ + r2 = 0; \ + r3 = r10; \ + r3 += -8; \ + r5 = 0; \ + /* The 4th argument of bpf_skb_store_bytes is defined as \ + * ARG_CONST_SIZE, so 0 is not allowed. The 'r4 != 0' \ + * is providing us this exclusion of zero from initial \ + * [0, 7] range. \ + */ \ + call %[bpf_skb_store_bytes]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + +SEC("tc") +__description("bounds check with JMP_EQ for reg edge") +__success __retval(0) +__naked void reg_equal_const(void) +{ + asm volatile (" \ + r6 = r1; \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + call %[bpf_get_prandom_u32]; \ + r4 = r0; \ + r4 &= 7; \ + if r4 == 0 goto l0_%=; \ + r1 = r6; \ + r2 = 0; \ + r3 = r10; \ + r3 += -8; \ + r5 = 0; \ + /* Just the same as what we do in reg_not_equal_const() */ \ + call %[bpf_skb_store_bytes]; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32), + __imm(bpf_skb_store_bytes) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c b/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c new file mode 100644 index 0000000000..36e033a2e0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2017 Facebook + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" + +struct whatever {}; + +SEC("kprobe") +__success __log_level(2) +/* context type is wrong, making it impossible to freplace this program */ +int btf_unreliable_kprobe(struct whatever *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c index d6c4a7f3f7..6e0f349f8f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c +++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c @@ -7,7 +7,7 @@ SEC("cgroup/sock") __description("bpf_exit with invalid return code. test1") -__failure __msg("R0 has value (0x0; 0xffffffff)") +__failure __msg("smin=0 smax=4294967295 should have been in [0, 1]") __naked void with_invalid_return_code_test1(void) { asm volatile (" \ @@ -30,7 +30,7 @@ __naked void with_invalid_return_code_test2(void) SEC("cgroup/sock") __description("bpf_exit with invalid return code. test3") -__failure __msg("R0 has value (0x0; 0x3)") +__failure __msg("smin=0 smax=3 should have been in [0, 1]") __naked void with_invalid_return_code_test3(void) { asm volatile (" \ @@ -53,7 +53,7 @@ __naked void with_invalid_return_code_test4(void) SEC("cgroup/sock") __description("bpf_exit with invalid return code. test5") -__failure __msg("R0 has value (0x2; 0x0)") +__failure __msg("smin=2 smax=2 should have been in [0, 1]") __naked void with_invalid_return_code_test5(void) { asm volatile (" \ @@ -75,7 +75,7 @@ __naked void with_invalid_return_code_test6(void) SEC("cgroup/sock") __description("bpf_exit with invalid return code. test7") -__failure __msg("R0 has unknown scalar value") +__failure __msg("R0 has unknown scalar value should have been in [0, 1]") __naked void with_invalid_return_code_test7(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c index 99a23dea82..be95570ab3 100644 --- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c @@ -411,7 +411,7 @@ l0_%=: r0 = 0; \ SEC("tc") __description("direct packet access: test17 (pruning, alignment)") -__failure __msg("misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4") +__failure __msg("misaligned packet access off 2+0+15+-4 size 4") __flag(BPF_F_STRICT_ALIGNMENT) __naked void packet_access_test17_pruning_alignment(void) { diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c new file mode 100644 index 0000000000..67dddd9418 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_misc.h" +#include "xdp_metadata.h" +#include "bpf_kfuncs.h" + +int arr[1]; +int unkn_idx; +const volatile bool call_dead_subprog = false; + +__noinline long global_bad(void) +{ + return arr[unkn_idx]; /* BOOM */ +} + +__noinline long global_good(void) +{ + return arr[0]; +} + +__noinline long global_calls_bad(void) +{ + return global_good() + global_bad() /* does BOOM indirectly */; +} + +__noinline long global_calls_good_only(void) +{ + return global_good(); +} + +__noinline long global_dead(void) +{ + return arr[0] * 2; +} + +SEC("?raw_tp") +__success __log_level(2) +/* main prog is validated completely first */ +__msg("('global_calls_good_only') is global and assumed valid.") +/* eventually global_good() is transitively validated as well */ +__msg("Validating global_good() func") +__msg("('global_good') is safe for any args that match its prototype") +int chained_global_func_calls_success(void) +{ + int sum = 0; + + if (call_dead_subprog) + sum += global_dead(); + return global_calls_good_only() + sum; +} + +SEC("?raw_tp") +__failure __log_level(2) +/* main prog validated successfully first */ +__msg("('global_calls_bad') is global and assumed valid.") +/* eventually we validate global_bad() and fail */ +__msg("Validating global_bad() func") +__msg("math between map_value pointer and register") /* BOOM */ +int chained_global_func_calls_bad(void) +{ + return global_calls_bad(); +} + +/* do out of bounds access forcing verifier to fail verification if this + * global func is called + */ +__noinline int global_unsupp(const int *mem) +{ + if (!mem) + return 0; + return mem[100]; /* BOOM */ +} + +const volatile bool skip_unsupp_global = true; + +SEC("?raw_tp") +__success +int guarded_unsupp_global_called(void) +{ + if (!skip_unsupp_global) + return global_unsupp(NULL); + return 0; +} + +SEC("?raw_tp") +__failure __log_level(2) +__msg("Func#1 ('global_unsupp') is global and assumed valid.") +__msg("Validating global_unsupp() func#1...") +__msg("value is outside of the allowed memory range") +int unguarded_unsupp_global_called(void) +{ + int x = 0; + + return global_unsupp(&x); +} + +long stack[128]; + +__weak int subprog_nullable_ptr_bad(int *p) +{ + return (*p) * 2; /* bad, missing null check */ +} + +SEC("?raw_tp") +__failure __log_level(2) +__msg("invalid mem access 'mem_or_null'") +int arg_tag_nullable_ptr_fail(void *ctx) +{ + int x = 42; + + return subprog_nullable_ptr_bad(&x); +} + +__noinline __weak int subprog_nonnull_ptr_good(int *p1 __arg_nonnull, int *p2 __arg_nonnull) +{ + return (*p1) * (*p2); /* good, no need for NULL checks */ +} + +int x = 47; + +SEC("?raw_tp") +__success __log_level(2) +int arg_tag_nonnull_ptr_good(void *ctx) +{ + int y = 74; + + return subprog_nonnull_ptr_good(&x, &y); +} + +/* this global subprog can be now called from many types of entry progs, each + * with different context type + */ +__weak int subprog_ctx_tag(void *ctx __arg_ctx) +{ + return bpf_get_stack(ctx, stack, sizeof(stack), 0); +} + +__weak int raw_tp_canonical(struct bpf_raw_tracepoint_args *ctx __arg_ctx) +{ + return 0; +} + +__weak int raw_tp_u64_array(u64 *ctx __arg_ctx) +{ + return 0; +} + +SEC("?raw_tp") +__success __log_level(2) +int arg_tag_ctx_raw_tp(void *ctx) +{ + return subprog_ctx_tag(ctx) + raw_tp_canonical(ctx) + raw_tp_u64_array(ctx); +} + +SEC("?raw_tp.w") +__success __log_level(2) +int arg_tag_ctx_raw_tp_writable(void *ctx) +{ + return subprog_ctx_tag(ctx) + raw_tp_canonical(ctx) + raw_tp_u64_array(ctx); +} + +SEC("?tp_btf/sys_enter") +__success __log_level(2) +int arg_tag_ctx_raw_tp_btf(void *ctx) +{ + return subprog_ctx_tag(ctx) + raw_tp_canonical(ctx) + raw_tp_u64_array(ctx); +} + +struct whatever { }; + +__weak int tp_whatever(struct whatever *ctx __arg_ctx) +{ + return 0; +} + +SEC("?tp") +__success __log_level(2) +int arg_tag_ctx_tp(void *ctx) +{ + return subprog_ctx_tag(ctx) + tp_whatever(ctx); +} + +__weak int kprobe_subprog_pt_regs(struct pt_regs *ctx __arg_ctx) +{ + return 0; +} + +__weak int kprobe_subprog_typedef(bpf_user_pt_regs_t *ctx __arg_ctx) +{ + return 0; +} + +SEC("?kprobe") +__success __log_level(2) +int arg_tag_ctx_kprobe(void *ctx) +{ + return subprog_ctx_tag(ctx) + + kprobe_subprog_pt_regs(ctx) + + kprobe_subprog_typedef(ctx); +} + +__weak int perf_subprog_regs( +#if defined(bpf_target_riscv) + struct user_regs_struct *ctx __arg_ctx +#elif defined(bpf_target_s390) + /* user_pt_regs typedef is anonymous struct, so only `void *` works */ + void *ctx __arg_ctx +#elif defined(bpf_target_loongarch) || defined(bpf_target_arm64) || defined(bpf_target_powerpc) + struct user_pt_regs *ctx __arg_ctx +#else + struct pt_regs *ctx __arg_ctx +#endif +) +{ + return 0; +} + +__weak int perf_subprog_typedef(bpf_user_pt_regs_t *ctx __arg_ctx) +{ + return 0; +} + +__weak int perf_subprog_canonical(struct bpf_perf_event_data *ctx __arg_ctx) +{ + return 0; +} + +SEC("?perf_event") +__success __log_level(2) +int arg_tag_ctx_perf(void *ctx) +{ + return subprog_ctx_tag(ctx) + + perf_subprog_regs(ctx) + + perf_subprog_typedef(ctx) + + perf_subprog_canonical(ctx); +} + +__weak int iter_subprog_void(void *ctx __arg_ctx) +{ + return 0; +} + +__weak int iter_subprog_typed(struct bpf_iter__task *ctx __arg_ctx) +{ + return 0; +} + +SEC("?iter/task") +__success __log_level(2) +int arg_tag_ctx_iter_task(struct bpf_iter__task *ctx) +{ + return (iter_subprog_void(ctx) + iter_subprog_typed(ctx)) & 1; +} + +__weak int tracing_subprog_void(void *ctx __arg_ctx) +{ + return 0; +} + +__weak int tracing_subprog_u64(u64 *ctx __arg_ctx) +{ + return 0; +} + +int acc; + +SEC("?fentry/" SYS_PREFIX "sys_nanosleep") +__success __log_level(2) +int BPF_PROG(arg_tag_ctx_fentry) +{ + acc += tracing_subprog_void(ctx) + tracing_subprog_u64(ctx); + return 0; +} + +SEC("?fexit/" SYS_PREFIX "sys_nanosleep") +__success __log_level(2) +int BPF_PROG(arg_tag_ctx_fexit) +{ + acc += tracing_subprog_void(ctx) + tracing_subprog_u64(ctx); + return 0; +} + +SEC("?fmod_ret/" SYS_PREFIX "sys_nanosleep") +__success __log_level(2) +int BPF_PROG(arg_tag_ctx_fmod_ret) +{ + return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx); +} + +SEC("?lsm/bpf") +__success __log_level(2) +int BPF_PROG(arg_tag_ctx_lsm) +{ + return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx); +} + +SEC("?struct_ops/test_1") +__success __log_level(2) +int BPF_PROG(arg_tag_ctx_struct_ops) +{ + return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx); +} + +SEC(".struct_ops") +struct bpf_dummy_ops dummy_1 = { + .test_1 = (void *)arg_tag_ctx_struct_ops, +}; + +SEC("?syscall") +__success __log_level(2) +int arg_tag_ctx_syscall(void *ctx) +{ + return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx) + tp_whatever(ctx); +} + +__weak int subprog_dynptr(struct bpf_dynptr *dptr) +{ + long *d, t, buf[1] = {}; + + d = bpf_dynptr_data(dptr, 0, sizeof(long)); + if (!d) + return 0; + + t = *d + 1; + + d = bpf_dynptr_slice(dptr, 0, &buf, sizeof(long)); + if (!d) + return t; + + t = *d + 2; + + return t; +} + +SEC("?xdp") +__success __log_level(2) +int arg_tag_dynptr(struct xdp_md *ctx) +{ + struct bpf_dynptr dptr; + + bpf_dynptr_from_xdp(ctx, 0, &dptr); + + return subprog_dynptr(&dptr); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c index d1edbcff9a..05a329ee45 100644 --- a/tools/testing/selftests/bpf/progs/verifier_gotol.c +++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c @@ -33,6 +33,25 @@ l3_%=: \ : __clobber_all); } +SEC("socket") +__description("gotol, large_imm") +__success __failure_unpriv __retval(40000) +__naked void gotol_large_imm(void) +{ + asm volatile (" \ + gotol 1f; \ +0: \ + r0 = 0; \ + .rept 40000; \ + r0 += 1; \ + .endr; \ + exit; \ +1: gotol 0b; \ +" : + : + : __clobber_all); +} + #else SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c index 692216c0ad..886498b5e6 100644 --- a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c @@ -89,9 +89,14 @@ l0_%=: exit; \ : __clobber_all); } +/* Call a function taking a pointer and a size which doesn't allow the size to + * be zero (i.e. bpf_trace_printk() declares the second argument to be + * ARG_CONST_SIZE, not ARG_CONST_SIZE_OR_ZERO). We attempt to pass zero for the + * size and expect to fail. + */ SEC("tracepoint") __description("helper access to map: empty range") -__failure __msg("invalid access to map value, value_size=48 off=0 size=0") +__failure __msg("R2 invalid zero-sized read: u64=[0,0]") __naked void access_to_map_empty_range(void) { asm volatile (" \ @@ -113,6 +118,38 @@ l0_%=: exit; \ : __clobber_all); } +/* Like the test above, but this time the size register is not known to be zero; + * its lower-bound is zero though, which is still unacceptable. + */ +SEC("tracepoint") +__description("helper access to map: possibly-empty ange") +__failure __msg("R2 invalid zero-sized read: u64=[0,4]") +__naked void access_to_map_possibly_empty_range(void) +{ + asm volatile (" \ + r2 = r10; \ + r2 += -8; \ + r1 = 0; \ + *(u64*)(r2 + 0) = r1; \ + r1 = %[map_hash_48b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + r1 = r0; \ + /* Read an unknown value */ \ + r7 = *(u64*)(r0 + 0); \ + /* Make it small and positive, to avoid other errors */ \ + r7 &= 4; \ + r2 = 0; \ + r2 += r7; \ + call %[bpf_trace_printk]; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm(bpf_trace_printk), + __imm_addr(map_hash_48b) + : __clobber_all); +} + SEC("tracepoint") __description("helper access to map: out-of-bound range") __failure __msg("invalid access to map value, value_size=48 off=0 size=56") @@ -221,7 +258,7 @@ l0_%=: exit; \ SEC("tracepoint") __description("helper access to adjusted map (via const imm): empty range") -__failure __msg("invalid access to map value, value_size=48 off=4 size=0") +__failure __msg("R2 invalid zero-sized read") __naked void via_const_imm_empty_range(void) { asm volatile (" \ @@ -386,7 +423,7 @@ l0_%=: exit; \ SEC("tracepoint") __description("helper access to adjusted map (via const reg): empty range") -__failure __msg("R1 min value is outside of the allowed memory range") +__failure __msg("R2 invalid zero-sized read") __naked void via_const_reg_empty_range(void) { asm volatile (" \ @@ -556,7 +593,7 @@ l0_%=: exit; \ SEC("tracepoint") __description("helper access to adjusted map (via variable): empty range") -__failure __msg("R1 min value is outside of the allowed memory range") +__failure __msg("R2 invalid zero-sized read") __naked void map_via_variable_empty_range(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c index 589e8270de..9fc3fae5cd 100644 --- a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c +++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c @@ -68,7 +68,7 @@ __naked void ptr_to_long_half_uninitialized(void) SEC("cgroup/sysctl") __description("ARG_PTR_TO_LONG misaligned") -__failure __msg("misaligned stack access off (0x0; 0x0)+-20+0 size 8") +__failure __msg("misaligned stack access off 0+-20+0 size 8") __naked void arg_ptr_to_long_misaligned(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c index 5905e036e0..a955a63582 100644 --- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c +++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c @@ -239,4 +239,74 @@ int bpf_loop_iter_limit_nested(void *unused) return 1000 * a + b + c; } +struct iter_limit_bug_ctx { + __u64 a; + __u64 b; + __u64 c; +}; + +static __naked void iter_limit_bug_cb(void) +{ + /* This is the same as C code below, but written + * in assembly to control which branches are fall-through. + * + * switch (bpf_get_prandom_u32()) { + * case 1: ctx->a = 42; break; + * case 2: ctx->b = 42; break; + * default: ctx->c = 42; break; + * } + */ + asm volatile ( + "r9 = r2;" + "call %[bpf_get_prandom_u32];" + "r1 = r0;" + "r2 = 42;" + "r0 = 0;" + "if r1 == 0x1 goto 1f;" + "if r1 == 0x2 goto 2f;" + "*(u64 *)(r9 + 16) = r2;" + "exit;" + "1: *(u64 *)(r9 + 0) = r2;" + "exit;" + "2: *(u64 *)(r9 + 8) = r2;" + "exit;" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +SEC("tc") +__failure +__flag(BPF_F_TEST_STATE_FREQ) +int iter_limit_bug(struct __sk_buff *skb) +{ + struct iter_limit_bug_ctx ctx = { 7, 7, 7 }; + + bpf_loop(2, iter_limit_bug_cb, &ctx, 0); + + /* This is the same as C code below, + * written in assembly to guarantee checks order. + * + * if (ctx.a == 42 && ctx.b == 42 && ctx.c == 7) + * asm volatile("r1 /= 0;":::"r1"); + */ + asm volatile ( + "r1 = *(u64 *)%[ctx_a];" + "if r1 != 42 goto 1f;" + "r1 = *(u64 *)%[ctx_b];" + "if r1 != 42 goto 1f;" + "r1 = *(u64 *)%[ctx_c];" + "if r1 != 7 goto 1f;" + "r1 /= 0;" + "1:" + : + : [ctx_a]"m"(ctx.a), + [ctx_b]"m"(ctx.b), + [ctx_c]"m"(ctx.c) + : "r1" + ); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c index 353ae6da00..e1ffa5d32f 100644 --- a/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c +++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c @@ -39,7 +39,7 @@ __naked void with_valid_return_code_test3(void) SEC("netfilter") __description("bpf_exit with invalid return code. test4") -__failure __msg("R0 has value (0x2; 0x0)") +__failure __msg("R0 has smin=2 smax=2 should have been in [0, 1]") __naked void with_invalid_return_code_test4(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c index f67390224a..7cc83acac7 100644 --- a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c @@ -64,7 +64,7 @@ __naked void load_bytes_negative_len_2(void) SEC("tc") __description("raw_stack: skb_load_bytes, zero len") -__failure __msg("invalid zero-sized read") +__failure __msg("R4 invalid zero-sized read: u64=[0,0]") __naked void skb_load_bytes_zero_len(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 6115520154..39fe3372e0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include <../../../tools/include/linux/filter.h> struct { __uint(type, BPF_MAP_TYPE_RINGBUF); @@ -450,4 +451,290 @@ l0_%=: r1 >>= 16; \ : __clobber_all); } +SEC("raw_tp") +__log_level(2) +__success +__msg("fp-8=0m??mmmm") +__msg("fp-16=00mm??mm") +__msg("fp-24=00mm???m") +__naked void spill_subregs_preserve_stack_zero(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + + /* 32-bit subreg spill with ZERO, MISC, and INVALID */ + ".8byte %[fp1_u8_st_zero];" /* ZERO, LLVM-18+: *(u8 *)(r10 -1) = 0; */ + "*(u8 *)(r10 -2) = r0;" /* MISC */ + /* fp-3 and fp-4 stay INVALID */ + "*(u32 *)(r10 -8) = r0;" + + /* 16-bit subreg spill with ZERO, MISC, and INVALID */ + ".8byte %[fp10_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r10 -10) = 0; */ + "*(u16 *)(r10 -12) = r0;" /* MISC */ + /* fp-13 and fp-14 stay INVALID */ + "*(u16 *)(r10 -16) = r0;" + + /* 8-bit subreg spill with ZERO, MISC, and INVALID */ + ".8byte %[fp18_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r18 -10) = 0; */ + "*(u16 *)(r10 -20) = r0;" /* MISC */ + /* fp-21, fp-22, and fp-23 stay INVALID */ + "*(u8 *)(r10 -24) = r0;" + + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm_insn(fp1_u8_st_zero, BPF_ST_MEM(BPF_B, BPF_REG_FP, -1, 0)), + __imm_insn(fp10_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -10, 0)), + __imm_insn(fp18_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -18, 0)) + : __clobber_all); +} + +char single_byte_buf[1] SEC(".data.single_byte_buf"); + +SEC("raw_tp") +__log_level(2) +__success +/* make sure fp-8 is all STACK_ZERO */ +__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=00000000") +/* but fp-16 is spilled IMPRECISE zero const reg */ +__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0") +/* validate that assigning R2 from STACK_ZERO doesn't mark register + * precise immediately; if necessary, it will be marked precise later + */ +__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=00000000") +/* similarly, when R2 is assigned from spilled register, it is initially + * imprecise, but will be marked precise later once it is used in precise context + */ +__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2_w=0 R10=fp0 fp-16_w=0") +__msg("11: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)") +__msg("mark_precise: frame0: regs= stack=-16 before 9: (bf) r1 = r6") +__msg("mark_precise: frame0: regs= stack=-16 before 8: (73) *(u8 *)(r1 +0) = r2") +__msg("mark_precise: frame0: regs= stack=-16 before 7: (0f) r1 += r2") +__msg("mark_precise: frame0: regs= stack=-16 before 6: (71) r2 = *(u8 *)(r10 -1)") +__msg("mark_precise: frame0: regs= stack=-16 before 5: (bf) r1 = r6") +__msg("mark_precise: frame0: regs= stack=-16 before 4: (7b) *(u64 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 3: (b7) r0 = 0") +__naked void partial_stack_load_preserves_zeros(void) +{ + asm volatile ( + /* fp-8 is all STACK_ZERO */ + ".8byte %[fp8_st_zero];" /* LLVM-18+: *(u64 *)(r10 -8) = 0; */ + + /* fp-16 is const zero register */ + "r0 = 0;" + "*(u64 *)(r10 -16) = r0;" + + /* load single U8 from non-aligned STACK_ZERO slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u8 *)(r10 -1);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U8 from non-aligned ZERO REG slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u8 *)(r10 -9);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U16 from non-aligned STACK_ZERO slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u16 *)(r10 -2);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U16 from non-aligned ZERO REG slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u16 *)(r10 -10);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U32 from non-aligned STACK_ZERO slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u32 *)(r10 -4);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U32 from non-aligned ZERO REG slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u32 *)(r10 -12);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* for completeness, load U64 from STACK_ZERO slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u64 *)(r10 -8);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* for completeness, load U64 from ZERO REG slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u64 *)(r10 -16);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + "r0 = 0;" + "exit;" + : + : __imm_ptr(single_byte_buf), + __imm_insn(fp8_st_zero, BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0)) + : __clobber_common); +} + +char two_byte_buf[2] SEC(".data.two_byte_buf"); + +SEC("raw_tp") +__log_level(2) __flag(BPF_F_TEST_STATE_FREQ) +__success +/* make sure fp-8 is IMPRECISE fake register spill */ +__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8_w=1") +/* and fp-16 is spilled IMPRECISE const reg */ +__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16_w=1") +/* validate load from fp-8, which was initialized using BPF_ST_MEM */ +__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=1") +__msg("9: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 8: (79) r2 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6") +/* note, fp-8 is precise, fp-16 is not yet precise, we'll get there */ +__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_w=1") +__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") +__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0") +__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1") +__msg("mark_precise: frame0: regs= stack=-8 before 3: (7a) *(u64 *)(r10 -8) = 1") +__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +/* validate load from fp-16, which was initialized using BPF_STX_MEM */ +__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=1") +__msg("13: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 12: (79) r2 = *(u64 *)(r10 -16)") +__msg("mark_precise: frame0: regs= stack=-16 before 11: (bf) r1 = r6") +__msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2") +__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2") +__msg("mark_precise: frame0: regs= stack=-16 before 8: (79) r2 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6") +/* now both fp-8 and fp-16 are precise, very good */ +__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_rw=P1") +__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") +__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0") +__msg("mark_precise: frame0: regs= stack=-16 before 5: (7b) *(u64 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1") +__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +__naked void stack_load_preserves_const_precision(void) +{ + asm volatile ( + /* establish checkpoint with state that has no stack slots; + * if we bubble up to this state without finding desired stack + * slot, then it's a bug and should be caught + */ + "goto +0;" + + /* fp-8 is const 1 *fake* register */ + ".8byte %[fp8_st_one];" /* LLVM-18+: *(u64 *)(r10 -8) = 1; */ + + /* fp-16 is const 1 register */ + "r0 = 1;" + "*(u64 *)(r10 -16) = r0;" + + /* force checkpoint to check precision marks preserved in parent states */ + "goto +0;" + + /* load single U64 from aligned FAKE_REG=1 slot */ + "r1 = %[two_byte_buf];" + "r2 = *(u64 *)(r10 -8);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U64 from aligned REG=1 slot */ + "r1 = %[two_byte_buf];" + "r2 = *(u64 *)(r10 -16);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + "r0 = 0;" + "exit;" + : + : __imm_ptr(two_byte_buf), + __imm_insn(fp8_st_one, BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 1)) + : __clobber_common); +} + +SEC("raw_tp") +__log_level(2) __flag(BPF_F_TEST_STATE_FREQ) +__success +/* make sure fp-8 is 32-bit FAKE subregister spill */ +__msg("3: (62) *(u32 *)(r10 -8) = 1 ; R10=fp0 fp-8=????1") +/* but fp-16 is spilled IMPRECISE zero const reg */ +__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16=????1") +/* validate load from fp-8, which was initialized using BPF_ST_MEM */ +__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=????1") +__msg("9: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 8: (61) r2 = *(u32 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6") +__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16=????1") +__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") +__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0") +__msg("mark_precise: frame0: regs= stack=-8 before 5: (63) *(u32 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1") +__msg("mark_precise: frame0: regs= stack=-8 before 3: (62) *(u32 *)(r10 -8) = 1") +__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +/* validate load from fp-16, which was initialized using BPF_STX_MEM */ +__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=????1") +__msg("13: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 12: (61) r2 = *(u32 *)(r10 -16)") +__msg("mark_precise: frame0: regs= stack=-16 before 11: (bf) r1 = r6") +__msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2") +__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2") +__msg("mark_precise: frame0: regs= stack=-16 before 8: (61) r2 = *(u32 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6") +__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16_r=????P1") +__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") +__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0") +__msg("mark_precise: frame0: regs= stack=-16 before 5: (63) *(u32 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1") +__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +__naked void stack_load_preserves_const_precision_subreg(void) +{ + asm volatile ( + /* establish checkpoint with state that has no stack slots; + * if we bubble up to this state without finding desired stack + * slot, then it's a bug and should be caught + */ + "goto +0;" + + /* fp-8 is const 1 *fake* SUB-register */ + ".8byte %[fp8_st_one];" /* LLVM-18+: *(u32 *)(r10 -8) = 1; */ + + /* fp-16 is const 1 SUB-register */ + "r0 = 1;" + "*(u32 *)(r10 -16) = r0;" + + /* force checkpoint to check precision marks preserved in parent states */ + "goto +0;" + + /* load single U32 from aligned FAKE_REG=1 slot */ + "r1 = %[two_byte_buf];" + "r2 = *(u32 *)(r10 -8);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U32 from aligned REG=1 slot */ + "r1 = %[two_byte_buf];" + "r2 = *(u32 *)(r10 -16);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + "r0 = 0;" + "exit;" + : + : __imm_ptr(two_byte_buf), + __imm_insn(fp8_st_one, BPF_ST_MEM(BPF_W, BPF_REG_FP, -8, 1)) /* 32-bit spill */ + : __clobber_common); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c index e0f77e3e78..417c61cd4b 100644 --- a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c +++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c @@ -37,7 +37,7 @@ __naked void ptr_to_stack_store_load(void) SEC("socket") __description("PTR_TO_STACK store/load - bad alignment on off") -__failure __msg("misaligned stack access off (0x0; 0x0)+-8+2 size 8") +__failure __msg("misaligned stack access off 0+-8+2 size 8") __failure_unpriv __naked void load_bad_alignment_on_off(void) { @@ -53,7 +53,7 @@ __naked void load_bad_alignment_on_off(void) SEC("socket") __description("PTR_TO_STACK store/load - bad alignment on reg") -__failure __msg("misaligned stack access off (0x0; 0x0)+-10+8 size 8") +__failure __msg("misaligned stack access off 0+-10+8 size 8") __failure_unpriv __naked void load_bad_alignment_on_reg(void) { diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index f61d623b1c..6f5d19665c 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -117,6 +117,56 @@ __naked int global_subprog_result_precise(void) ); } +__naked __noinline __used +static unsigned long loop_callback_bad() +{ + /* bpf_loop() callback that can return values outside of [0, 1] range */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 s> 1000 goto 1f;" + "r0 = 0;" + "1:" + "goto +0;" /* checkpoint */ + /* bpf_loop() expects [0, 1] values, so branch above skipping + * r0 = 0; should lead to a failure, but if exit instruction + * doesn't enforce r0's precision, this callback will be + * successfully verified + */ + "exit;" + : + : __imm(bpf_get_prandom_u32) + : __clobber_common + ); +} + +SEC("?raw_tp") +__failure __log_level(2) +__flag(BPF_F_TEST_STATE_FREQ) +/* check that fallthrough code path marks r0 as precise */ +__msg("mark_precise: frame1: regs=r0 stack= before 11: (b7) r0 = 0") +/* check that we have branch code path doing its own validation */ +__msg("from 10 to 12: frame1: R0=scalar(smin=umin=1001") +/* check that branch code path marks r0 as precise, before failing */ +__msg("mark_precise: frame1: regs=r0 stack= before 9: (85) call bpf_get_prandom_u32#7") +__msg("At callback return the register R0 has smin=1001 should have been in [0, 1]") +__naked int callback_precise_return_fail(void) +{ + asm volatile ( + "r1 = 1;" /* nr_loops */ + "r2 = %[loop_callback_bad];" /* callback_fn */ + "r3 = 0;" /* callback_ctx */ + "r4 = 0;" /* flags */ + "call %[bpf_loop];" + + "r0 = 0;" + "exit;" + : + : __imm_ptr(loop_callback_bad), + __imm(bpf_loop) + : __clobber_common + ); +} + SEC("?raw_tp") __success __log_level(2) /* First simulated path does not include callback body, @@ -370,12 +420,10 @@ __naked int parent_stack_slot_precise(void) SEC("?raw_tp") __success __log_level(2) __msg("9: (0f) r1 += r6") -__msg("mark_precise: frame0: last_idx 9 first_idx 6") +__msg("mark_precise: frame0: last_idx 9 first_idx 0") __msg("mark_precise: frame0: regs=r6 stack= before 8: (bf) r1 = r7") __msg("mark_precise: frame0: regs=r6 stack= before 7: (27) r6 *= 4") __msg("mark_precise: frame0: regs=r6 stack= before 6: (79) r6 = *(u64 *)(r10 -8)") -__msg("mark_precise: frame0: parent state regs= stack=-8:") -__msg("mark_precise: frame0: last_idx 5 first_idx 0") __msg("mark_precise: frame0: regs= stack=-8 before 5: (85) call pc+6") __msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r1 = 0") __msg("mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r6") @@ -541,11 +589,24 @@ static __u64 subprog_spill_reg_precise(void) SEC("?raw_tp") __success __log_level(2) -/* precision backtracking can't currently handle stack access not through r10, - * so we won't be able to mark stack slot fp-8 as precise, and so will - * fallback to forcing all as precise - */ -__msg("mark_precise: frame0: falling back to forcing all scalars precise") +__msg("10: (0f) r1 += r7") +__msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1") +__msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8") +__msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4") +__msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=2 R6_w=1 R8_rw=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8_rw=P1") +__msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7") +__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2") +__msg("mark_precise: frame1: regs= stack= before 16: (79) r2 = *(u64 *)(r1 +0)") +__msg("mark_precise: frame1: regs= stack= before 15: (79) r0 = *(u64 *)(r10 -16)") +__msg("mark_precise: frame1: regs= stack= before 14: (7b) *(u64 *)(r10 -16) = r2") +__msg("mark_precise: frame1: regs= stack= before 13: (7b) *(u64 *)(r1 +0) = r2") +__msg("mark_precise: frame1: regs=r2 stack= before 6: (85) call pc+6") +__msg("mark_precise: frame0: regs=r2 stack= before 5: (bf) r2 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 4: (07) r1 += -8") +__msg("mark_precise: frame0: regs=r6 stack= before 3: (bf) r1 = r10") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1") __naked int subprog_spill_into_parent_stack_slot_precise(void) { asm volatile ( @@ -580,14 +641,68 @@ __naked int subprog_spill_into_parent_stack_slot_precise(void) ); } -__naked __noinline __used -static __u64 subprog_with_checkpoint(void) +SEC("?raw_tp") +__success __log_level(2) +__msg("17: (0f) r1 += r0") +__msg("mark_precise: frame0: last_idx 17 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 16: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r0 stack= before 15: (27) r0 *= 4") +__msg("mark_precise: frame0: regs=r0 stack= before 14: (79) r0 = *(u64 *)(r10 -16)") +__msg("mark_precise: frame0: regs= stack=-16 before 13: (7b) *(u64 *)(r7 -8) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 12: (79) r0 = *(u64 *)(r8 +16)") +__msg("mark_precise: frame0: regs= stack=-16 before 11: (7b) *(u64 *)(r8 +16) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 10: (79) r0 = *(u64 *)(r7 -8)") +__msg("mark_precise: frame0: regs= stack=-16 before 9: (7b) *(u64 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 8: (07) r8 += -32") +__msg("mark_precise: frame0: regs=r0 stack= before 7: (bf) r8 = r10") +__msg("mark_precise: frame0: regs=r0 stack= before 6: (07) r7 += -8") +__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r7 = r10") +__msg("mark_precise: frame0: regs=r0 stack= before 21: (95) exit") +__msg("mark_precise: frame1: regs=r0 stack= before 20: (bf) r0 = r1") +__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+15") +__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1") +__naked int stack_slot_aliases_precision(void) { asm volatile ( - "r0 = 0;" - /* guaranteed checkpoint if BPF_F_TEST_STATE_FREQ is used */ - "goto +0;" + "r6 = 1;" + /* pass r6 through r1 into subprog to get it back as r0; + * this whole chain will have to be marked as precise later + */ + "r1 = r6;" + "call identity_subprog;" + /* let's setup two registers that are aliased to r10 */ + "r7 = r10;" + "r7 += -8;" /* r7 = r10 - 8 */ + "r8 = r10;" + "r8 += -32;" /* r8 = r10 - 32 */ + /* now spill subprog's return value (a r6 -> r1 -> r0 chain) + * a few times through different stack pointer regs, making + * sure to use r10, r7, and r8 both in LDX and STX insns, and + * *importantly* also using a combination of const var_off and + * insn->off to validate that we record final stack slot + * correctly, instead of relying on just insn->off derivation, + * which is only valid for r10-based stack offset + */ + "*(u64 *)(r10 - 16) = r0;" + "r0 = *(u64 *)(r7 - 8);" /* r7 - 8 == r10 - 16 */ + "*(u64 *)(r8 + 16) = r0;" /* r8 + 16 = r10 - 16 */ + "r0 = *(u64 *)(r8 + 16);" + "*(u64 *)(r7 - 8) = r0;" + "r0 = *(u64 *)(r10 - 16);" + /* get ready to use r0 as an index into array to force precision */ + "r0 *= 4;" + "r1 = %[vals];" + /* here r0->r1->r6 chain is forced to be precise and has to be + * propagated back to the beginning, including through the + * subprog call and all the stack spills and loads + */ + "r1 += r0;" + "r0 = *(u32 *)(r1 + 0);" "exit;" + : + : __imm_ptr(vals) + : __clobber_common, "r6" ); } diff --git a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c index 71814a7532..a9ab37d3b9 100644 --- a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c +++ b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c @@ -146,4 +146,23 @@ l0_%=: exit; \ : __clobber_all); } +SEC("flow_dissector") +__description("flow_keys illegal alu op with variable offset") +__failure __msg("R7 pointer arithmetic on flow_keys prohibited") +__naked void flow_keys_illegal_variable_offset_alu(void) +{ + asm volatile(" \ + r6 = r1; \ + r7 = *(u64*)(r6 + %[flow_keys_off]); \ + r8 = 8; \ + r8 /= 1; \ + r8 &= 8; \ + r7 += r8; \ + r0 = *(u64*)(r7 + 0); \ + exit; \ +" : + : __imm_const(flow_keys_off, offsetof(struct __sk_buff, flow_keys)) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c index d1f23c1a7c..c810f4f6f4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_var_off.c +++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c @@ -263,6 +263,35 @@ __naked void access_max_out_of_bound(void) : __clobber_all); } +/* Similar to the test above, but this time check the special case of a + * zero-sized stack access. We used to have a bug causing crashes for zero-sized + * out-of-bounds accesses. + */ +SEC("socket") +__description("indirect variable-offset stack access, zero-sized, max out of bound") +__failure __msg("invalid variable-offset indirect access to stack R1") +__naked void zero_sized_access_max_out_of_bound(void) +{ + asm volatile (" \ + r0 = 0; \ + /* Fill some stack */ \ + *(u64*)(r10 - 16) = r0; \ + *(u64*)(r10 - 8) = r0; \ + /* Get an unknown value */ \ + r1 = *(u32*)(r1 + 0); \ + r1 &= 63; \ + r1 += -16; \ + /* r1 is now anywhere in [-16,48) */ \ + r1 += r10; \ + r2 = 0; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + SEC("lwt_in") __description("indirect variable-offset stack access, min out of bound") __failure __msg("invalid variable-offset indirect access to stack R2") diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c index f6d1cc9ad8..330ece2eab 100644 --- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c @@ -20,21 +20,32 @@ extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, __u64 *timestamp) __ksym; extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, enum xdp_rss_hash_type *rss_type) __ksym; +extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, + __be16 *vlan_proto, + __u16 *vlan_tci) __ksym; SEC("xdp.frags") int rx(struct xdp_md *ctx) { void *data, *data_meta, *data_end; struct ipv6hdr *ip6h = NULL; - struct ethhdr *eth = NULL; struct udphdr *udp = NULL; struct iphdr *iph = NULL; struct xdp_meta *meta; + struct ethhdr *eth; int err; data = (void *)(long)ctx->data; data_end = (void *)(long)ctx->data_end; eth = data; + + if (eth + 1 < data_end && (eth->h_proto == bpf_htons(ETH_P_8021AD) || + eth->h_proto == bpf_htons(ETH_P_8021Q))) + eth = (void *)eth + sizeof(struct vlan_hdr); + + if (eth + 1 < data_end && eth->h_proto == bpf_htons(ETH_P_8021Q)) + eth = (void *)eth + sizeof(struct vlan_hdr); + if (eth + 1 < data_end) { if (eth->h_proto == bpf_htons(ETH_P_IP)) { iph = (void *)(eth + 1); @@ -76,15 +87,28 @@ int rx(struct xdp_md *ctx) return XDP_PASS; } + meta->hint_valid = 0; + + meta->xdp_timestamp = bpf_ktime_get_tai_ns(); err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp); - if (!err) - meta->xdp_timestamp = bpf_ktime_get_tai_ns(); + if (err) + meta->rx_timestamp_err = err; else - meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */ + meta->hint_valid |= XDP_META_FIELD_TS; - err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type); - if (err < 0) - meta->rx_hash_err = err; /* Used by AF_XDP as no hash signal */ + err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, + &meta->rx_hash_type); + if (err) + meta->rx_hash_err = err; + else + meta->hint_valid |= XDP_META_FIELD_RSS; + + err = bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_proto, + &meta->rx_vlan_tci); + if (err) + meta->rx_vlan_tag_err = err; + else + meta->hint_valid |= XDP_META_FIELD_VLAN_TAG; __sync_add_and_fetch(&pkts_redir, 1); return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS); diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c index d151d406a1..31ca229bb3 100644 --- a/tools/testing/selftests/bpf/progs/xdp_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c @@ -23,15 +23,47 @@ extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, __u64 *timestamp) __ksym; extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, enum xdp_rss_hash_type *rss_type) __ksym; +extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, + __be16 *vlan_proto, + __u16 *vlan_tci) __ksym; SEC("xdp") int rx(struct xdp_md *ctx) { - void *data, *data_meta; + void *data, *data_meta, *data_end; + struct ipv6hdr *ip6h = NULL; + struct ethhdr *eth = NULL; + struct udphdr *udp = NULL; + struct iphdr *iph = NULL; struct xdp_meta *meta; u64 timestamp = -1; int ret; + data = (void *)(long)ctx->data; + data_end = (void *)(long)ctx->data_end; + eth = data; + if (eth + 1 < data_end) { + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + iph = (void *)(eth + 1); + if (iph + 1 < data_end && iph->protocol == IPPROTO_UDP) + udp = (void *)(iph + 1); + } + if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + ip6h = (void *)(eth + 1); + if (ip6h + 1 < data_end && ip6h->nexthdr == IPPROTO_UDP) + udp = (void *)(ip6h + 1); + } + if (udp && udp + 1 > data_end) + udp = NULL; + } + + if (!udp) + return XDP_PASS; + + /* Forwarding UDP:8080 to AF_XDP */ + if (udp->dest != bpf_htons(8080)) + return XDP_PASS; + /* Reserve enough for all custom metadata. */ ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta)); @@ -57,6 +89,8 @@ int rx(struct xdp_md *ctx) meta->rx_timestamp = 1; bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type); + bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_proto, + &meta->rx_vlan_tci); return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS); } diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 37ffa57f28..f013910212 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -12,7 +12,7 @@ #define str_has_pfx(str, pfx) \ (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0) -#define TEST_LOADER_LOG_BUF_SZ 1048576 +#define TEST_LOADER_LOG_BUF_SZ 2097152 #define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure" #define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success" @@ -27,6 +27,7 @@ #define TEST_TAG_RETVAL_PFX_UNPRIV "comment:test_retval_unpriv=" #define TEST_TAG_AUXILIARY "comment:test_auxiliary" #define TEST_TAG_AUXILIARY_UNPRIV "comment:test_auxiliary_unpriv" +#define TEST_BTF_PATH "comment:test_btf_path=" /* Warning: duplicated in bpf_misc.h */ #define POINTER_VALUE 0xcafe4all @@ -58,6 +59,7 @@ struct test_spec { const char *prog_name; struct test_subspec priv; struct test_subspec unpriv; + const char *btf_custom_path; int log_level; int prog_flags; int mode_mask; @@ -153,6 +155,14 @@ static int parse_retval(const char *str, int *val, const char *name) return parse_int(str, val, name); } +static void update_flags(int *flags, int flag, bool clear) +{ + if (clear) + *flags &= ~flag; + else + *flags |= flag; +} + /* Uses btf_decl_tag attributes to describe the expected test * behavior, see bpf_misc.h for detailed description of each attribute * and attribute combinations. @@ -171,6 +181,7 @@ static int parse_test_spec(struct test_loader *tester, memset(spec, 0, sizeof(*spec)); spec->prog_name = bpf_program__name(prog); + spec->prog_flags = BPF_F_TEST_REG_INVARIANTS; /* by default be strict */ btf = bpf_object__btf(obj); if (!btf) { @@ -187,7 +198,8 @@ static int parse_test_spec(struct test_loader *tester, for (i = 1; i < btf__type_cnt(btf); i++) { const char *s, *val, *msg; const struct btf_type *t; - int tmp; + bool clear; + int flags; t = btf__type_by_id(btf, i); if (!btf_is_decl_tag(t)) @@ -253,24 +265,33 @@ static int parse_test_spec(struct test_loader *tester, goto cleanup; } else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) { val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1; + + clear = val[0] == '!'; + if (clear) + val++; + if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) { - spec->prog_flags |= BPF_F_STRICT_ALIGNMENT; + update_flags(&spec->prog_flags, BPF_F_STRICT_ALIGNMENT, clear); } else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) { - spec->prog_flags |= BPF_F_ANY_ALIGNMENT; + update_flags(&spec->prog_flags, BPF_F_ANY_ALIGNMENT, clear); } else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) { - spec->prog_flags |= BPF_F_TEST_RND_HI32; + update_flags(&spec->prog_flags, BPF_F_TEST_RND_HI32, clear); } else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) { - spec->prog_flags |= BPF_F_TEST_STATE_FREQ; + update_flags(&spec->prog_flags, BPF_F_TEST_STATE_FREQ, clear); } else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) { - spec->prog_flags |= BPF_F_SLEEPABLE; + update_flags(&spec->prog_flags, BPF_F_SLEEPABLE, clear); } else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) { - spec->prog_flags |= BPF_F_XDP_HAS_FRAGS; + update_flags(&spec->prog_flags, BPF_F_XDP_HAS_FRAGS, clear); + } else if (strcmp(val, "BPF_F_TEST_REG_INVARIANTS") == 0) { + update_flags(&spec->prog_flags, BPF_F_TEST_REG_INVARIANTS, clear); } else /* assume numeric value */ { - err = parse_int(val, &tmp, "test prog flags"); + err = parse_int(val, &flags, "test prog flags"); if (err) goto cleanup; - spec->prog_flags |= tmp; + update_flags(&spec->prog_flags, flags, clear); } + } else if (str_has_pfx(s, TEST_BTF_PATH)) { + spec->btf_custom_path = s + sizeof(TEST_BTF_PATH) - 1; } } @@ -561,6 +582,9 @@ void run_subtest(struct test_loader *tester, } } + /* Implicitly reset to NULL if next test case doesn't specify */ + open_opts->btf_custom_path = spec->btf_custom_path; + tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, open_opts); if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */ goto subtest_cleanup; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index e0dd101c9f..dfbab214f4 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1400,13 +1400,18 @@ static void test_map_stress(void) #define MAX_DELAY_US 50000 #define MIN_DELAY_RANGE_US 5000 -static int map_update_retriable(int map_fd, const void *key, const void *value, - int flags, int attempts) +static bool retry_for_again_or_busy(int err) +{ + return (err == EAGAIN || err == EBUSY); +} + +int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, + retry_for_error_fn need_retry) { int delay = rand() % MIN_DELAY_RANGE_US; while (bpf_map_update_elem(map_fd, key, value, flags)) { - if (!attempts || (errno != EAGAIN && errno != EBUSY)) + if (!attempts || !need_retry(errno)) return -errno; if (delay <= MAX_DELAY_US / 2) @@ -1449,11 +1454,13 @@ static void test_update_delete(unsigned int fn, void *data) key = value = i; if (do_update) { - err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES); + err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES, + retry_for_again_or_busy); if (err) printf("error %d %d\n", err, errno); assert(err == 0); - err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES); + err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES, + retry_for_again_or_busy); if (err) printf("error %d %d\n", err, errno); assert(err == 0); diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h index f6fbca7617..e4ac704a53 100644 --- a/tools/testing/selftests/bpf/test_maps.h +++ b/tools/testing/selftests/bpf/test_maps.h @@ -4,6 +4,7 @@ #include <stdio.h> #include <stdlib.h> +#include <stdbool.h> #define CHECK(condition, tag, format...) ({ \ int __ret = !!(condition); \ @@ -16,4 +17,8 @@ extern int skips; +typedef bool (*retry_for_error_fn)(int err); +int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts, + retry_for_error_fn need_retry); + #endif diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 40cba8d368..6157f884d0 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -169,12 +169,14 @@ def bpftool(args, JSON=True, ns="", fail=True, include_stderr=False): return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, fail=fail, include_stderr=include_stderr) -def bpftool_prog_list(expected=None, ns=""): +def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True): _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True) # Remove the base progs for p in base_progs: if p in progs: progs.remove(p) + if exclude_orphaned: + progs = [ p for p in progs if not p['orphaned'] ] if expected is not None: if len(progs) != expected: fail(True, "%d BPF programs loaded, expected %d" % @@ -612,11 +614,9 @@ def pin_map(file_name, idx=0, expected=1): def check_dev_info_removed(prog_file=None, map_file=None): bpftool_prog_list(expected=0) + bpftool_prog_list(expected=1, exclude_orphaned=False) ret, err = bpftool("prog show pin %s" % (prog_file), fail=False) - fail(ret == 0, "Showing prog with removed device did not fail") - fail(err["error"].find("No such device") == -1, - "Showing prog with removed device expected ENODEV, error is %s" % - (err["error"])) + fail(ret != 0, "failed to show prog with removed device") bpftool_map_list(expected=0) ret, err = bpftool("map show pin %s" % (map_file), fail=False) @@ -1395,10 +1395,7 @@ try: start_test("Test multi-dev ASIC cross-dev destruction - orphaned...") ret, out = bpftool("prog show %s" % (progB), fail=False) - fail(ret == 0, "got information about orphaned program") - fail("error" not in out, "no error reported for get info on orphaned") - fail(out["error"] != "can't get prog info: No such device", - "wrong error for get info on orphaned") + fail(ret != 0, "couldn't get information about orphaned program") print("%s: OK" % (os.path.basename(__file__))) diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index 2c89674fc6..b0068a9d2c 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -679,7 +679,7 @@ static int load_path(const struct sock_addr_test *test, const char *path) bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR); bpf_program__set_expected_attach_type(prog, test->expected_attach_type); - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32); + bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS); err = bpf_object__load(obj); if (err) { diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index 2dec7dbf29..d9661b9988 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -517,90 +517,6 @@ test_ip6ip6() echo -e ${GREEN}"PASS: ip6$TYPE"${NC} } -setup_xfrm_tunnel() -{ - auth=0x$(printf '1%.0s' {1..40}) - enc=0x$(printf '2%.0s' {1..32}) - spi_in_to_out=0x1 - spi_out_to_in=0x2 - # at_ns0 namespace - # at_ns0 -> root - ip netns exec at_ns0 \ - ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ - spi $spi_in_to_out reqid 1 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip netns exec at_ns0 \ - ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \ - tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ - mode tunnel - # root -> at_ns0 - ip netns exec at_ns0 \ - ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ - spi $spi_out_to_in reqid 2 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip netns exec at_ns0 \ - ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \ - tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ - mode tunnel - # address & route - ip netns exec at_ns0 \ - ip addr add dev veth0 10.1.1.100/32 - ip netns exec at_ns0 \ - ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \ - src 10.1.1.100 - - # root namespace - # at_ns0 -> root - ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ - spi $spi_in_to_out reqid 1 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \ - tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ - mode tunnel - # root -> at_ns0 - ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ - spi $spi_out_to_in reqid 2 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \ - tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ - mode tunnel - # address & route - ip addr add dev veth1 10.1.1.200/32 - ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200 -} - -test_xfrm_tunnel() -{ - if [[ -e /sys/kernel/tracing/trace ]]; then - TRACE=/sys/kernel/tracing/trace - else - TRACE=/sys/kernel/debug/tracing/trace - fi - config_device - > ${TRACE} - setup_xfrm_tunnel - mkdir -p ${BPF_PIN_TUNNEL_DIR} - bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR} - tc qdisc add dev veth1 clsact - tc filter add dev veth1 proto ip ingress bpf da object-pinned \ - ${BPF_PIN_TUNNEL_DIR}/xfrm_get_state - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - sleep 1 - grep "reqid 1" ${TRACE} - check_err $? - grep "spi 0x1" ${TRACE} - check_err $? - grep "remote ip 0xac100164" ${TRACE} - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: xfrm tunnel"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: xfrm tunnel"${NC} -} - attach_bpf() { DEV=$1 @@ -630,10 +546,6 @@ cleanup() ip link del ip6geneve11 2> /dev/null ip link del erspan11 2> /dev/null ip link del ip6erspan11 2> /dev/null - ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null - ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null - ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null - ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null } cleanup_exit() @@ -716,10 +628,6 @@ bpf_tunnel_test() test_ip6ip6 errors=$(( $errors + $? )) - echo "Testing IPSec tunnel..." - test_xfrm_tunnel - errors=$(( $errors + $? )) - return $errors } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 98107e0452..f36e41435b 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1588,7 +1588,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (fixup_skips != skips) return; - pflags = BPF_F_TEST_RND_HI32; + pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS; if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT) pflags |= BPF_F_STRICT_ALIGNMENT; if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 8d994884c7..d2458c1b16 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -276,7 +276,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type, if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type) bpf_program__set_type(prog, type); - flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32; + flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS; bpf_program__set_flags(prog, flags); err = bpf_object__load(obj); @@ -299,7 +299,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, { LIBBPF_OPTS(bpf_prog_load_opts, opts, .kern_version = kern_version, - .prog_flags = BPF_F_TEST_RND_HI32, + .prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS, .log_level = extra_prog_load_log_flags, .log_buf = log_buf, .log_size = log_buf_sz, diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index 5b7a551367..35284faff4 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -9,6 +9,9 @@ #include <bpf/libbpf.h> #include <time.h> +#define __TO_STR(x) #x +#define TO_STR(x) __TO_STR(x) + int parse_num_list(const char *s, bool **set, int *set_len); __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info); int bpf_prog_test_load(const char *file, enum bpf_prog_type type, diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 0d84dd1f38..8a2ff81d83 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -140,10 +140,11 @@ .result = REJECT, }, { - "precise: ST insn causing spi > allocated_stack", + "precise: ST zero to stack insn is supported", .insns = { BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0), + /* not a register spill, so we stop precision propagation for R4 here */ BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0), BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), BPF_MOV64_IMM(BPF_REG_0, -1), @@ -157,11 +158,11 @@ mark_precise: frame0: last_idx 4 first_idx 2\ mark_precise: frame0: regs=r4 stack= before 4\ mark_precise: frame0: regs=r4 stack= before 3\ - mark_precise: frame0: regs= stack=-8 before 2\ - mark_precise: frame0: falling back to forcing all scalars precise\ - force_precise: frame0: forcing r0 to be precise\ mark_precise: frame0: last_idx 5 first_idx 5\ - mark_precise: frame0: parent state regs= stack=:", + mark_precise: frame0: parent state regs=r0 stack=:\ + mark_precise: frame0: last_idx 4 first_idx 2\ + mark_precise: frame0: regs=r0 stack= before 4\ + 5: R0=-1 R4=0", .result = VERBOSE_ACCEPT, .retval = -1, }, @@ -169,6 +170,8 @@ "precise: STX insn causing spi > allocated_stack", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + /* make later reg spill more interesting by having somewhat known scalar */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff), BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0), BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8), @@ -179,18 +182,21 @@ }, .prog_type = BPF_PROG_TYPE_XDP, .flags = BPF_F_TEST_STATE_FREQ, - .errstr = "mark_precise: frame0: last_idx 6 first_idx 6\ + .errstr = "mark_precise: frame0: last_idx 7 first_idx 7\ mark_precise: frame0: parent state regs=r4 stack=:\ - mark_precise: frame0: last_idx 5 first_idx 3\ - mark_precise: frame0: regs=r4 stack= before 5\ - mark_precise: frame0: regs=r4 stack= before 4\ - mark_precise: frame0: regs= stack=-8 before 3\ - mark_precise: frame0: falling back to forcing all scalars precise\ - force_precise: frame0: forcing r0 to be precise\ - force_precise: frame0: forcing r0 to be precise\ - force_precise: frame0: forcing r0 to be precise\ - force_precise: frame0: forcing r0 to be precise\ - mark_precise: frame0: last_idx 6 first_idx 6\ + mark_precise: frame0: last_idx 6 first_idx 4\ + mark_precise: frame0: regs=r4 stack= before 6: (b7) r0 = -1\ + mark_precise: frame0: regs=r4 stack= before 5: (79) r4 = *(u64 *)(r10 -8)\ + mark_precise: frame0: regs= stack=-8 before 4: (7b) *(u64 *)(r3 -8) = r0\ + mark_precise: frame0: parent state regs=r0 stack=:\ + mark_precise: frame0: last_idx 3 first_idx 3\ + mark_precise: frame0: regs=r0 stack= before 3: (55) if r3 != 0x7b goto pc+0\ + mark_precise: frame0: regs=r0 stack= before 2: (bf) r3 = r10\ + mark_precise: frame0: regs=r0 stack= before 1: (57) r0 &= 255\ + mark_precise: frame0: parent state regs=r0 stack=:\ + mark_precise: frame0: last_idx 0 first_idx 0\ + mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7\ + mark_precise: frame0: last_idx 7 first_idx 7\ mark_precise: frame0: parent state regs= stack=:", .result = VERBOSE_ACCEPT, .retval = -1, diff --git a/tools/testing/selftests/bpf/verify_sig_setup.sh b/tools/testing/selftests/bpf/verify_sig_setup.sh index ba08922b4a..f2cac42298 100755 --- a/tools/testing/selftests/bpf/verify_sig_setup.sh +++ b/tools/testing/selftests/bpf/verify_sig_setup.sh @@ -60,6 +60,27 @@ cleanup() { rm -rf ${tmp_dir} } +fsverity_create_sign_file() { + local tmp_dir="$1" + + data_file=${tmp_dir}/data-file + sig_file=${tmp_dir}/sig-file + dd if=/dev/urandom of=$data_file bs=1 count=12345 2> /dev/null + fsverity sign --key ${tmp_dir}/signing_key.pem $data_file $sig_file + + # We do not want to enable fsverity on $data_file yet. Try whether + # the file system support fsverity on a different file. + touch ${tmp_dir}/tmp-file + fsverity enable ${tmp_dir}/tmp-file +} + +fsverity_enable_file() { + local tmp_dir="$1" + + data_file=${tmp_dir}/data-file + fsverity enable $data_file +} + catch() { local exit_code="$1" @@ -86,6 +107,10 @@ main() setup "${tmp_dir}" elif [[ "${action}" == "cleanup" ]]; then cleanup "${tmp_dir}" + elif [[ "${action}" == "fsverity-create-sign" ]]; then + fsverity_create_sign_file "${tmp_dir}" + elif [[ "${action}" == "fsverity-enable" ]]; then + fsverity_enable_file "${tmp_dir}" else echo "Unknown action: ${action}" exit 1 diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 0ad98b6a8e..244d4996e0 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -18,6 +18,7 @@ #include <libelf.h> #include <gelf.h> #include <float.h> +#include <math.h> #ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) @@ -99,6 +100,7 @@ struct stat_specs { enum stat_id ids[ALL_STATS_CNT]; enum stat_variant variants[ALL_STATS_CNT]; bool asc[ALL_STATS_CNT]; + bool abs[ALL_STATS_CNT]; int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */ }; @@ -133,6 +135,7 @@ struct filter { int stat_id; enum stat_variant stat_var; long value; + bool abs; }; static struct env { @@ -142,10 +145,12 @@ static struct env { bool debug; bool quiet; bool force_checkpoints; + bool force_reg_invariants; enum resfmt out_fmt; bool show_version; bool comparison_mode; bool replay_mode; + int top_n; int log_level; int log_size; @@ -210,8 +215,7 @@ static const struct argp_option opts[] = { { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" }, { "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" }, { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" }, - { "test-states", 't', NULL, 0, - "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, + { "top-n", 'n', "N", 0, "Emit only up to first N results." }, { "quiet", 'q', NULL, 0, "Quiet mode" }, { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" }, { "sort", 's', "SPEC", 0, "Specify sort order" }, @@ -219,6 +223,10 @@ static const struct argp_option opts[] = { { "compare", 'C', NULL, 0, "Comparison mode" }, { "replay", 'R', NULL, 0, "Replay mode" }, { "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." }, + { "test-states", 't', NULL, 0, + "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, + { "test-reg-invariants", 'r', NULL, 0, + "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, {}, }; @@ -290,6 +298,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) case 't': env.force_checkpoints = true; break; + case 'r': + env.force_reg_invariants = true; + break; + case 'n': + errno = 0; + env.top_n = strtol(arg, NULL, 10); + if (errno) { + fprintf(stderr, "invalid top N specifier: %s\n", arg); + argp_usage(state); + } case 'C': env.comparison_mode = true; break; @@ -455,7 +473,8 @@ static struct { { OP_EQ, "=" }, }; -static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var); +static bool parse_stat_id_var(const char *name, size_t len, int *id, + enum stat_variant *var, bool *is_abs); static int append_filter(struct filter **filters, int *cnt, const char *str) { @@ -488,13 +507,14 @@ static int append_filter(struct filter **filters, int *cnt, const char *str) long val; const char *end = str; const char *op_str; + bool is_abs; op_str = operators[i].op_str; p = strstr(str, op_str); if (!p) continue; - if (!parse_stat_id_var(str, p - str, &id, &var)) { + if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) { fprintf(stderr, "Unrecognized stat name in '%s'!\n", str); return -EINVAL; } @@ -533,6 +553,7 @@ static int append_filter(struct filter **filters, int *cnt, const char *str) f->stat_id = id; f->stat_var = var; f->op = operators[i].op_kind; + f->abs = true; f->value = val; *cnt += 1; @@ -657,7 +678,8 @@ static struct stat_def { [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, }, }; -static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var) +static bool parse_stat_id_var(const char *name, size_t len, int *id, + enum stat_variant *var, bool *is_abs) { static const char *var_sfxs[] = { [VARIANT_A] = "_a", @@ -667,6 +689,14 @@ static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_v }; int i, j, k; + /* |<stat>| means we take absolute value of given stat */ + *is_abs = false; + if (len > 2 && name[0] == '|' && name[len - 1] == '|') { + *is_abs = true; + name += 1; + len -= 2; + } + for (i = 0; i < ARRAY_SIZE(stat_defs); i++) { struct stat_def *def = &stat_defs[i]; size_t alias_len, sfx_len; @@ -722,7 +752,7 @@ static bool is_desc_sym(char c) static int parse_stat(const char *stat_name, struct stat_specs *specs) { int id; - bool has_order = false, is_asc = false; + bool has_order = false, is_asc = false, is_abs = false; size_t len = strlen(stat_name); enum stat_variant var; @@ -737,7 +767,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs) len -= 1; } - if (!parse_stat_id_var(stat_name, len, &id, &var)) { + if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) { fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name); return -ESRCH; } @@ -745,6 +775,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs) specs->ids[specs->spec_cnt] = id; specs->variants[specs->spec_cnt] = var; specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default; + specs->abs[specs->spec_cnt] = is_abs; specs->spec_cnt++; return 0; @@ -997,6 +1028,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf if (env.force_checkpoints) bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ); + if (env.force_reg_invariants) + bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS); err = bpf_object__load(obj); env.progs_processed++; @@ -1103,7 +1136,7 @@ cleanup: } static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, - enum stat_id id, bool asc) + enum stat_id id, bool asc, bool abs) { int cmp = 0; @@ -1124,6 +1157,11 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, long v1 = s1->stats[id]; long v2 = s2->stats[id]; + if (abs) { + v1 = v1 < 0 ? -v1 : v1; + v2 = v2 < 0 ? -v2 : v2; + } + if (v1 != v2) cmp = v1 < v2 ? -1 : 1; break; @@ -1142,7 +1180,8 @@ static int cmp_prog_stats(const void *v1, const void *v2) int i, cmp; for (i = 0; i < env.sort_spec.spec_cnt; i++) { - cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]); + cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], + env.sort_spec.asc[i], env.sort_spec.abs[i]); if (cmp != 0) return cmp; } @@ -1211,7 +1250,8 @@ static void fetch_join_stat_value(const struct verif_stats_join *s, static int cmp_join_stat(const struct verif_stats_join *s1, const struct verif_stats_join *s2, - enum stat_id id, enum stat_variant var, bool asc) + enum stat_id id, enum stat_variant var, + bool asc, bool abs) { const char *str1 = NULL, *str2 = NULL; double v1 = 0.0, v2 = 0.0; @@ -1220,6 +1260,11 @@ static int cmp_join_stat(const struct verif_stats_join *s1, fetch_join_stat_value(s1, id, var, &str1, &v1); fetch_join_stat_value(s2, id, var, &str2, &v2); + if (abs) { + v1 = fabs(v1); + v2 = fabs(v2); + } + if (str1) cmp = strcmp(str1, str2); else if (v1 != v2) @@ -1237,7 +1282,8 @@ static int cmp_join_stats(const void *v1, const void *v2) cmp = cmp_join_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.variants[i], - env.sort_spec.asc[i]); + env.sort_spec.asc[i], + env.sort_spec.abs[i]); if (cmp != 0) return cmp; } @@ -1720,6 +1766,9 @@ static bool is_join_stat_filter_matched(struct filter *f, const struct verif_sta fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value); + if (f->abs) + value = fabs(value); + switch (f->op) { case OP_EQ: return value > f->value - eps && value < f->value + eps; case OP_NEQ: return value < f->value - eps || value > f->value + eps; @@ -1766,7 +1815,7 @@ static int handle_comparison_mode(void) struct stat_specs base_specs = {}, comp_specs = {}; struct stat_specs tmp_sort_spec; enum resfmt cur_fmt; - int err, i, j, last_idx; + int err, i, j, last_idx, cnt; if (env.filename_cnt != 2) { fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n"); @@ -1879,7 +1928,7 @@ static int handle_comparison_mode(void) env.join_stat_cnt += 1; } - /* now sort joined results accorsing to sort spec */ + /* now sort joined results according to sort spec */ qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats); /* for human-readable table output we need to do extra pass to @@ -1896,16 +1945,22 @@ one_more_time: output_comp_headers(cur_fmt); last_idx = -1; + cnt = 0; for (i = 0; i < env.join_stat_cnt; i++) { const struct verif_stats_join *join = &env.join_stats[i]; if (!should_output_join_stats(join)) continue; + if (env.top_n && cnt >= env.top_n) + break; + if (cur_fmt == RESFMT_TABLE_CALCLEN) last_idx = i; output_comp_stats(join, cur_fmt, i == last_idx); + + cnt++; } if (cur_fmt == RESFMT_TABLE_CALCLEN) { @@ -1920,6 +1975,9 @@ static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *s { long value = stats->stats[f->stat_id]; + if (f->abs) + value = value < 0 ? -value : value; + switch (f->op) { case OP_EQ: return value == f->value; case OP_NEQ: return value != f->value; @@ -1964,7 +2022,7 @@ static bool should_output_stats(const struct verif_stats *stats) static void output_prog_stats(void) { const struct verif_stats *stats; - int i, last_stat_idx = 0; + int i, last_stat_idx = 0, cnt = 0; if (env.out_fmt == RESFMT_TABLE) { /* calculate column widths */ @@ -1984,7 +2042,10 @@ static void output_prog_stats(void) stats = &env.prog_stats[i]; if (!should_output_stats(stats)) continue; + if (env.top_n && cnt >= env.top_n) + break; output_stats(stats, env.out_fmt, i == last_stat_idx); + cnt++; } } diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 6850345280..65d14f3bbe 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -36,7 +36,9 @@ DEFAULT_COMMAND="./test_progs" MOUNT_DIR="mnt" ROOTFS_IMAGE="root.img" OUTPUT_DIR="$HOME/.bpf_selftests" -KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}") +KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" + "tools/testing/selftests/bpf/config.vm" + "tools/testing/selftests/bpf/config.${ARCH}") INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX" NUM_COMPILE_JOBS="$(nproc)" LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")" diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index c5e7937d7f..878d68db03 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -10,7 +10,9 @@ * - rx_hash * * TX: - * - TBD + * - UDP 9091 packets trigger TX reply + * - TX HW timestamp is requested and reported back upon completion + * - TX checksum is requested */ #include <test_progs.h> @@ -19,20 +21,26 @@ #include "xsk.h" #include <error.h> +#include <linux/kernel.h> +#include <linux/bits.h> +#include <linux/bitfield.h> #include <linux/errqueue.h> #include <linux/if_link.h> #include <linux/net_tstamp.h> #include <linux/udp.h> #include <linux/sockios.h> +#include <linux/if_xdp.h> #include <sys/mman.h> #include <net/if.h> #include <ctype.h> #include <poll.h> #include <time.h> +#include <unistd.h> +#include <libgen.h> #include "xdp_metadata.h" -#define UMEM_NUM 16 +#define UMEM_NUM 256 #define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE #define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM) #define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE) @@ -48,11 +56,14 @@ struct xsk { }; struct xdp_hw_metadata *bpf_obj; -__u16 bind_flags = XDP_COPY; +__u16 bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY; struct xsk *rx_xsk; const char *ifname; int ifindex; int rxq; +bool skip_tx; +__u64 last_hw_rx_timestamp; +__u64 last_xdp_rx_timestamp; void test__fail(void) { /* for network_helpers.c */ } @@ -68,7 +79,8 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, - .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG, + .flags = XSK_UMEM__DEFAULT_FLAGS, + .tx_metadata_len = sizeof(struct xsk_tx_metadata), }; __u32 idx = 0; u64 addr; @@ -110,7 +122,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) for (i = 0; i < UMEM_NUM / 2; i++) { addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE; printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr); - *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr; + *xsk_ring_prod__fill_addr(&xsk->fill, idx + i) = addr; } xsk_ring_prod__submit(&xsk->fill, ret); @@ -131,12 +143,22 @@ static void refill_rx(struct xsk *xsk, __u64 addr) __u32 idx; if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) { - printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr); + printf("%p: complete rx idx=%u addr=%llx\n", xsk, idx, addr); *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr; xsk_ring_prod__submit(&xsk->fill, 1); } } +static int kick_tx(struct xsk *xsk) +{ + return sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0); +} + +static int kick_rx(struct xsk *xsk) +{ + return recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL); +} + #define NANOSEC_PER_SEC 1000000000 /* 10^9 */ static __u64 gettime(clockid_t clock_id) { @@ -152,37 +174,64 @@ static __u64 gettime(clockid_t clock_id) return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; } +static void print_tstamp_delta(const char *name, const char *refname, + __u64 tstamp, __u64 reference) +{ + __s64 delta = (__s64)reference - (__s64)tstamp; + + printf("%s: %llu (sec:%0.4f) delta to %s sec:%0.4f (%0.3f usec)\n", + name, tstamp, (double)tstamp / NANOSEC_PER_SEC, refname, + (double)delta / NANOSEC_PER_SEC, + (double)delta / 1000); +} + +#define VLAN_PRIO_MASK GENMASK(15, 13) /* Priority Code Point */ +#define VLAN_DEI_MASK GENMASK(12, 12) /* Drop Eligible Indicator */ +#define VLAN_VID_MASK GENMASK(11, 0) /* VLAN Identifier */ +static void print_vlan_tci(__u16 tag) +{ + __u16 vlan_id = FIELD_GET(VLAN_VID_MASK, tag); + __u8 pcp = FIELD_GET(VLAN_PRIO_MASK, tag); + bool dei = FIELD_GET(VLAN_DEI_MASK, tag); + + printf("PCP=%u, DEI=%d, VID=0x%X\n", pcp, dei, vlan_id); +} + static void verify_xdp_metadata(void *data, clockid_t clock_id) { struct xdp_meta *meta; meta = data - sizeof(*meta); - if (meta->rx_hash_err < 0) - printf("No rx_hash err=%d\n", meta->rx_hash_err); - else + if (meta->hint_valid & XDP_META_FIELD_RSS) printf("rx_hash: 0x%X with RSS type:0x%X\n", meta->rx_hash, meta->rx_hash_type); + else + printf("No rx_hash, err=%d\n", meta->rx_hash_err); + + if (meta->hint_valid & XDP_META_FIELD_TS) { + __u64 ref_tstamp = gettime(clock_id); - printf("rx_timestamp: %llu (sec:%0.4f)\n", meta->rx_timestamp, - (double)meta->rx_timestamp / NANOSEC_PER_SEC); - if (meta->rx_timestamp) { - __u64 usr_clock = gettime(clock_id); - __u64 xdp_clock = meta->xdp_timestamp; - __s64 delta_X = xdp_clock - meta->rx_timestamp; - __s64 delta_X2U = usr_clock - xdp_clock; - - printf("XDP RX-time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n", - xdp_clock, (double)xdp_clock / NANOSEC_PER_SEC, - (double)delta_X / NANOSEC_PER_SEC, - (double)delta_X / 1000); - - printf("AF_XDP time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n", - usr_clock, (double)usr_clock / NANOSEC_PER_SEC, - (double)delta_X2U / NANOSEC_PER_SEC, - (double)delta_X2U / 1000); + /* store received timestamps to calculate a delta at tx */ + last_hw_rx_timestamp = meta->rx_timestamp; + last_xdp_rx_timestamp = meta->xdp_timestamp; + + print_tstamp_delta("HW RX-time", "User RX-time", + meta->rx_timestamp, ref_tstamp); + print_tstamp_delta("XDP RX-time", "User RX-time", + meta->xdp_timestamp, ref_tstamp); + } else { + printf("No rx_timestamp, err=%d\n", meta->rx_timestamp_err); } + if (meta->hint_valid & XDP_META_FIELD_VLAN_TAG) { + printf("rx_vlan_proto: 0x%X\n", ntohs(meta->rx_vlan_proto)); + printf("rx_vlan_tci: "); + print_vlan_tci(meta->rx_vlan_tci); + } else { + printf("No rx_vlan_tci or rx_vlan_proto, err=%d\n", + meta->rx_vlan_tag_err); + } } static void verify_skb_metadata(int fd) @@ -230,6 +279,129 @@ static void verify_skb_metadata(int fd) printf("skb hwtstamp is not found!\n"); } +static bool complete_tx(struct xsk *xsk, clockid_t clock_id) +{ + struct xsk_tx_metadata *meta; + __u64 addr; + void *data; + __u32 idx; + + if (!xsk_ring_cons__peek(&xsk->comp, 1, &idx)) + return false; + + addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx); + data = xsk_umem__get_data(xsk->umem_area, addr); + meta = data - sizeof(struct xsk_tx_metadata); + + printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr); + + if (meta->completion.tx_timestamp) { + __u64 ref_tstamp = gettime(clock_id); + + print_tstamp_delta("HW TX-complete-time", "User TX-complete-time", + meta->completion.tx_timestamp, ref_tstamp); + print_tstamp_delta("XDP RX-time", "User TX-complete-time", + last_xdp_rx_timestamp, ref_tstamp); + print_tstamp_delta("HW RX-time", "HW TX-complete-time", + last_hw_rx_timestamp, meta->completion.tx_timestamp); + } else { + printf("No tx_timestamp\n"); + } + + xsk_ring_cons__release(&xsk->comp, 1); + + return true; +} + +#define swap(a, b, len) do { \ + for (int i = 0; i < len; i++) { \ + __u8 tmp = ((__u8 *)a)[i]; \ + ((__u8 *)a)[i] = ((__u8 *)b)[i]; \ + ((__u8 *)b)[i] = tmp; \ + } \ +} while (0) + +static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id) +{ + struct xsk_tx_metadata *meta; + struct ipv6hdr *ip6h = NULL; + struct iphdr *iph = NULL; + struct xdp_desc *tx_desc; + struct udphdr *udph; + struct ethhdr *eth; + __sum16 want_csum; + void *data; + __u32 idx; + int ret; + int len; + + ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx); + if (ret != 1) { + printf("%p: failed to reserve tx slot\n", xsk); + return; + } + + tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx); + tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata); + data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr); + + meta = data - sizeof(struct xsk_tx_metadata); + memset(meta, 0, sizeof(*meta)); + meta->flags = XDP_TXMD_FLAGS_TIMESTAMP; + + eth = rx_packet; + + if (eth->h_proto == htons(ETH_P_IP)) { + iph = (void *)(eth + 1); + udph = (void *)(iph + 1); + } else if (eth->h_proto == htons(ETH_P_IPV6)) { + ip6h = (void *)(eth + 1); + udph = (void *)(ip6h + 1); + } else { + printf("%p: failed to detect IP version for ping pong %04x\n", xsk, eth->h_proto); + xsk_ring_prod__cancel(&xsk->tx, 1); + return; + } + + len = ETH_HLEN; + if (ip6h) + len += sizeof(*ip6h) + ntohs(ip6h->payload_len); + if (iph) + len += ntohs(iph->tot_len); + + swap(eth->h_dest, eth->h_source, ETH_ALEN); + if (iph) + swap(&iph->saddr, &iph->daddr, 4); + else + swap(&ip6h->saddr, &ip6h->daddr, 16); + swap(&udph->source, &udph->dest, 2); + + want_csum = udph->check; + if (ip6h) + udph->check = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + ntohs(udph->len), IPPROTO_UDP, 0); + else + udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, + ntohs(udph->len), IPPROTO_UDP, 0); + + meta->flags |= XDP_TXMD_FLAGS_CHECKSUM; + if (iph) + meta->request.csum_start = sizeof(*eth) + sizeof(*iph); + else + meta->request.csum_start = sizeof(*eth) + sizeof(*ip6h); + meta->request.csum_offset = offsetof(struct udphdr, check); + + printf("%p: ping-pong with csum=%04x (want %04x) csum_start=%d csum_offset=%d\n", + xsk, ntohs(udph->check), ntohs(want_csum), + meta->request.csum_start, meta->request.csum_offset); + + memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */ + tx_desc->options |= XDP_TX_METADATA; + tx_desc->len = len; + + xsk_ring_prod__submit(&xsk->tx, 1); +} + static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id) { const struct xdp_desc *rx_desc; @@ -252,6 +424,13 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t while (true) { errno = 0; + + for (i = 0; i < rxq; i++) { + ret = kick_rx(&rx_xsk[i]); + if (ret) + printf("kick_rx ret=%d\n", ret); + } + ret = poll(fds, rxq + 1, 1000); printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n", ret, errno, bpf_obj->bss->pkts_skip, @@ -288,6 +467,22 @@ peek: verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr), clock_id); first_seg = false; + + if (!skip_tx) { + /* mirror first chunk back */ + ping_pong(xsk, xsk_umem__get_data(xsk->umem_area, addr), + clock_id); + + ret = kick_tx(xsk); + if (ret) + printf("kick_tx ret=%d\n", ret); + + for (int j = 0; j < 500; j++) { + if (complete_tx(xsk, clock_id)) + break; + usleep(10*1000); + } + } } xsk_ring_cons__release(&xsk->rx, 1); @@ -420,8 +615,10 @@ static void print_usage(void) { const char *usage = "Usage: xdp_hw_metadata [OPTIONS] [IFNAME]\n" - " -m Enable multi-buffer XDP for larger MTU\n" + " -c Run in copy mode (zerocopy is default)\n" " -h Display this help and exit\n\n" + " -m Enable multi-buffer XDP for larger MTU\n" + " -r Don't generate AF_XDP reply (rx metadata only)\n" "Generate test packets on the other machine with:\n" " echo -n xdp | nc -u -q1 <dst_ip> 9091\n"; @@ -432,14 +629,22 @@ static void read_args(int argc, char *argv[]) { int opt; - while ((opt = getopt(argc, argv, "mh")) != -1) { + while ((opt = getopt(argc, argv, "chmr")) != -1) { switch (opt) { - case 'm': - bind_flags |= XDP_USE_SG; + case 'c': + bind_flags &= ~XDP_USE_NEED_WAKEUP; + bind_flags &= ~XDP_ZEROCOPY; + bind_flags |= XDP_COPY; break; case 'h': print_usage(); exit(0); + case 'm': + bind_flags |= XDP_USE_SG; + break; + case 'r': + skip_tx = true; + break; case '?': if (isprint(optopt)) fprintf(stderr, "Unknown option: -%c\n", optopt); diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h index 938a729bd3..87318ad111 100644 --- a/tools/testing/selftests/bpf/xdp_metadata.h +++ b/tools/testing/selftests/bpf/xdp_metadata.h @@ -9,12 +9,44 @@ #define ETH_P_IPV6 0x86DD #endif +#ifndef ETH_P_8021Q +#define ETH_P_8021Q 0x8100 +#endif + +#ifndef ETH_P_8021AD +#define ETH_P_8021AD 0x88A8 +#endif + +#ifndef BIT +#define BIT(nr) (1 << (nr)) +#endif + +/* Non-existent checksum status */ +#define XDP_CHECKSUM_MAGIC BIT(2) + +enum xdp_meta_field { + XDP_META_FIELD_TS = BIT(0), + XDP_META_FIELD_RSS = BIT(1), + XDP_META_FIELD_VLAN_TAG = BIT(2), +}; + struct xdp_meta { - __u64 rx_timestamp; + union { + __u64 rx_timestamp; + __s32 rx_timestamp_err; + }; __u64 xdp_timestamp; __u32 rx_hash; union { __u32 rx_hash_type; __s32 rx_hash_err; }; + union { + struct { + __be16 rx_vlan_proto; + __u16 rx_vlan_tci; + }; + __s32 rx_vlan_tag_err; + }; + enum xdp_meta_field hint_valid; }; diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c index e574711eeb..25d568abf0 100644 --- a/tools/testing/selftests/bpf/xsk.c +++ b/tools/testing/selftests/bpf/xsk.c @@ -115,6 +115,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg, cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; cfg->flags = XSK_UMEM__DEFAULT_FLAGS; + cfg->tx_metadata_len = 0; return; } @@ -123,6 +124,7 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg, cfg->frame_size = usr_cfg->frame_size; cfg->frame_headroom = usr_cfg->frame_headroom; cfg->flags = usr_cfg->flags; + cfg->tx_metadata_len = usr_cfg->tx_metadata_len; } static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, @@ -252,6 +254,7 @@ int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, mr.chunk_size = umem->config.frame_size; mr.headroom = umem->config.frame_headroom; mr.flags = umem->config.flags; + mr.tx_metadata_len = umem->config.tx_metadata_len; err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); if (err) { diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h index 771570bc37..93c2cc413c 100644 --- a/tools/testing/selftests/bpf/xsk.h +++ b/tools/testing/selftests/bpf/xsk.h @@ -200,6 +200,7 @@ struct xsk_umem_config { __u32 frame_size; __u32 frame_headroom; __u32 flags; + __u32 tx_metadata_len; }; int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags); diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c index 3266cc9293..d46962a247 100644 --- a/tools/testing/selftests/breakpoints/breakpoint_test.c +++ b/tools/testing/selftests/breakpoints/breakpoint_test.c @@ -284,9 +284,9 @@ static void check_success(const char *msg) nr_tests++; if (ret) - ksft_test_result_pass(msg); + ksft_test_result_pass("%s", msg); else - ksft_test_result_fail(msg); + ksft_test_result_fail("%s", msg); } static void launch_instruction_breakpoints(char *buf, int local, int global) diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c index 2cf6f10ab7..b8703c499d 100644 --- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c +++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c @@ -89,7 +89,7 @@ int run_test(int cpu) wpid = waitpid(pid, &status, __WALL); if (wpid != pid) { - ksft_print_msg("waitpid() failed: $s\n", strerror(errno)); + ksft_print_msg("waitpid() failed: %s\n", strerror(errno)); return KSFT_FAIL; } if (WIFEXITED(status)) { diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c index e3a352b020..7cde07a5df 100644 --- a/tools/testing/selftests/capabilities/test_execve.c +++ b/tools/testing/selftests/capabilities/test_execve.c @@ -88,11 +88,7 @@ static bool create_and_enter_ns(uid_t inner_uid) outer_uid = getuid(); outer_gid = getgid(); - /* - * TODO: If we're already root, we could skip creating the userns. - */ - - if (unshare(CLONE_NEWNS) == 0) { + if (outer_uid == 0 && unshare(CLONE_NEWNS) == 0) { ksft_print_msg("[NOTE]\tUsing global UIDs for tests\n"); if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0) ksft_exit_fail_msg("PR_SET_KEEPCAPS - %s\n", diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index a6e9848189..b5eb1be224 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -147,71 +147,6 @@ test_add_proc() } # -# Testing the new "isolated" partition root type -# -test_isolated() -{ - cd $CGROUP2/test - echo 2-3 > cpuset.cpus - TYPE=$(cat cpuset.cpus.partition) - [[ $TYPE = member ]] || echo member > cpuset.cpus.partition - - console_msg "Change from member to root" - test_partition root - - console_msg "Change from root to isolated" - test_partition isolated - - console_msg "Change from isolated to member" - test_partition member - - console_msg "Change from member to isolated" - test_partition isolated - - console_msg "Change from isolated to root" - test_partition root - - console_msg "Change from root to member" - test_partition member - - # - # Testing partition root with no cpu - # - console_msg "Distribute all cpus to child partition" - echo +cpuset > cgroup.subtree_control - test_partition root - - mkdir A1 - cd A1 - echo 2-3 > cpuset.cpus - test_partition root - test_effective_cpus 2-3 - cd .. - test_effective_cpus "" - - console_msg "Moving task to partition test" - test_add_proc "No space left" - cd A1 - test_add_proc "" - cd .. - - console_msg "Shrink and expand child partition" - cd A1 - echo 2 > cpuset.cpus - cd .. - test_effective_cpus 3 - cd A1 - echo 2-3 > cpuset.cpus - cd .. - test_effective_cpus "" - - # Cleaning up - console_msg "Cleaning up" - echo $$ > $CGROUP2/cgroup.procs - [[ -d A1 ]] && rmdir A1 -} - -# # Cpuset controller state transition test matrix. # # Cgroup test hierarchy @@ -297,14 +232,14 @@ TEST_MATRIX=( " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \ A1:P0,A2:P1,A3:P2,B1:P1 2-3" " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4" + A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3" " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4" + A1:P0,A2:P1,A3:P2,B1:P1 2-4,3" " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \ - A1:P0,A2:P2,A3:P1 2-4" + A1:P0,A2:P2,A3:P1 2-4,2-3" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \ - A1:P0,A2:P-2,A3:P-1 ." + A1:P0,A2:P-2,A3:P-1" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \ A1:P0,A2:P2,A3:P-1 2-4" @@ -313,7 +248,7 @@ TEST_MATRIX=( " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3" " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3," # An invalidated remote partition cannot self-recover from hotplug " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2" @@ -347,10 +282,10 @@ TEST_MATRIX=( # cpus_allowed/exclusive_cpus update tests " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ . . C3 P2 . 0 A1:0-2,A2:0-2,XA2:3,XA3:3,A3:3 \ A1:P0,A3:P2 3" @@ -359,13 +294,13 @@ TEST_MATRIX=( A1:P0,A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . . C3 . . 0 A1:0-3,A2:3,XA2:3,XA3:3,A3:3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ . C4 . . . 0 A1:4,A2:4,A3:4,XA1:,XA2:,XA3 \ - A1:P0,A3:P-2 ." + A1:P0,A3:P-2" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- @@ -441,7 +376,7 @@ write_cpu_online() } fi echo $VAL > $CPUFILE - pause 0.01 + pause 0.05 } # @@ -573,12 +508,14 @@ dump_states() XECPUS=$DIR/cpuset.cpus.exclusive.effective PRS=$DIR/cpuset.cpus.partition PCPUS=$DIR/.__DEBUG__.cpuset.cpus.subpartitions + ISCPUS=$DIR/cpuset.cpus.isolated [[ -e $CPUS ]] && echo "$CPUS: $(cat $CPUS)" [[ -e $XCPUS ]] && echo "$XCPUS: $(cat $XCPUS)" [[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)" [[ -e $XECPUS ]] && echo "$XECPUS: $(cat $XECPUS)" [[ -e $PRS ]] && echo "$PRS: $(cat $PRS)" [[ -e $PCPUS ]] && echo "$PCPUS: $(cat $PCPUS)" + [[ -e $ISCPUS ]] && echo "$ISCPUS: $(cat $ISCPUS)" done } @@ -656,11 +593,17 @@ check_cgroup_states() # # Get isolated (including offline) CPUs by looking at -# /sys/kernel/debug/sched/domains and compare that with the expected value. +# /sys/kernel/debug/sched/domains and cpuset.cpus.isolated control file, +# if available, and compare that with the expected value. # -# Note that a sched domain of just 1 CPU will be considered isolated. +# Note that isolated CPUs from the sched/domains context include offline +# CPUs as well as CPUs in non-isolated 1-CPU partition. Those CPUs may +# not be included in the cpuset.cpus.isolated control file which contains +# only CPUs in isolated partitions. # -# $1 - expected isolated cpu list +# $1 - expected isolated cpu list(s) <isolcpus1>{,<isolcpus2>} +# <isolcpus1> - expected sched/domains value +# <isolcpus2> - cpuset.cpus.isolated value = <isolcpus1> if not defined # check_isolcpus() { @@ -668,8 +611,38 @@ check_isolcpus() ISOLCPUS= LASTISOLCPU= SCHED_DOMAINS=/sys/kernel/debug/sched/domains + ISCPUS=${CGROUP2}/cpuset.cpus.isolated + if [[ $EXPECT_VAL = . ]] + then + EXPECT_VAL= + EXPECT_VAL2= + elif [[ $(expr $EXPECT_VAL : ".*,.*") > 0 ]] + then + set -- $(echo $EXPECT_VAL | sed -e "s/,/ /g") + EXPECT_VAL=$1 + EXPECT_VAL2=$2 + else + EXPECT_VAL2=$EXPECT_VAL + fi + + # + # Check the debug isolated cpumask, if present + # + [[ -f $ISCPUS ]] && { + ISOLCPUS=$(cat $ISCPUS) + [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { + # Take a 50ms pause and try again + pause 0.05 + ISOLCPUS=$(cat $ISCPUS) + } + [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1 + ISOLCPUS= + } + + # + # Use the sched domain in debugfs to check isolated CPUs, if available + # [[ -d $SCHED_DOMAINS ]] || return 0 - [[ $EXPECT_VAL = . ]] && EXPECT_VAL= for ((CPU=0; CPU < $NR_CPUS; CPU++)) do @@ -714,6 +687,26 @@ test_fail() } # +# Check to see if there are unexpected isolated CPUs left +# +null_isolcpus_check() +{ + [[ $VERBOSE -gt 0 ]] || return 0 + # Retry a few times before printing error + RETRY=0 + while [[ $RETRY -lt 5 ]] + do + pause 0.01 + check_isolcpus "." + [[ $? -eq 0 ]] && return 0 + ((RETRY++)) + done + echo "Unexpected isolated CPUs: $ISOLCPUS" + dump_states + exit 1 +} + +# # Run cpuset state transition test # $1 - test matrix name # @@ -787,7 +780,7 @@ run_state_test() # NEWLIST=$(cat cpuset.cpus.effective) RETRY=0 - while [[ $NEWLIST != $CPULIST && $RETRY -lt 5 ]] + while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]] do # Wait a bit longer & recheck a few times pause 0.01 @@ -798,6 +791,7 @@ run_state_test() echo "Effective cpus changed to $NEWLIST after test $I!" exit 1 } + null_isolcpus_check [[ $VERBOSE -gt 0 ]] && echo "Test $I done." ((I++)) done @@ -805,6 +799,72 @@ run_state_test() } # +# Testing the new "isolated" partition root type +# +test_isolated() +{ + cd $CGROUP2/test + echo 2-3 > cpuset.cpus + TYPE=$(cat cpuset.cpus.partition) + [[ $TYPE = member ]] || echo member > cpuset.cpus.partition + + console_msg "Change from member to root" + test_partition root + + console_msg "Change from root to isolated" + test_partition isolated + + console_msg "Change from isolated to member" + test_partition member + + console_msg "Change from member to isolated" + test_partition isolated + + console_msg "Change from isolated to root" + test_partition root + + console_msg "Change from root to member" + test_partition member + + # + # Testing partition root with no cpu + # + console_msg "Distribute all cpus to child partition" + echo +cpuset > cgroup.subtree_control + test_partition root + + mkdir A1 + cd A1 + echo 2-3 > cpuset.cpus + test_partition root + test_effective_cpus 2-3 + cd .. + test_effective_cpus "" + + console_msg "Moving task to partition test" + test_add_proc "No space left" + cd A1 + test_add_proc "" + cd .. + + console_msg "Shrink and expand child partition" + cd A1 + echo 2 > cpuset.cpus + cd .. + test_effective_cpus 3 + cd A1 + echo 2-3 > cpuset.cpus + cd .. + test_effective_cpus "" + + # Cleaning up + console_msg "Cleaning up" + echo $$ > $CGROUP2/cgroup.procs + [[ -d A1 ]] && rmdir A1 + null_isolcpus_check +} + +# # Wait for inotify event for the given file and read it # $1: cgroup file to wait for # $2: file to store the read result diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c index ff519029f6..8845353aca 100644 --- a/tools/testing/selftests/cgroup/test_freezer.c +++ b/tools/testing/selftests/cgroup/test_freezer.c @@ -740,7 +740,7 @@ static int test_cgfreezer_ptraced(const char *root) /* * cg_check_frozen(cgroup, true) will fail here, - * because the task in in the TRACEd state. + * because the task is in the TRACEd state. */ if (cg_freeze_wait(cgroup, false)) goto cleanup; diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c index c99d2adaca..47fdaa1464 100644 --- a/tools/testing/selftests/cgroup/test_zswap.c +++ b/tools/testing/selftests/cgroup/test_zswap.c @@ -50,9 +50,9 @@ static int get_zswap_stored_pages(size_t *value) return read_int("/sys/kernel/debug/zswap/stored_pages", value); } -static int get_zswap_written_back_pages(size_t *value) +static int get_cg_wb_count(const char *cg) { - return read_int("/sys/kernel/debug/zswap/written_back_pages", value); + return cg_read_key_long(cg, "memory.stat", "zswp_wb"); } static long get_zswpout(const char *cgroup) @@ -73,6 +73,24 @@ static int allocate_bytes(const char *cgroup, void *arg) return 0; } +static char *setup_test_group_1M(const char *root, const char *name) +{ + char *group_name = cg_name(root, name); + + if (!group_name) + return NULL; + if (cg_create(group_name)) + goto fail; + if (cg_write(group_name, "memory.max", "1M")) { + cg_destroy(group_name); + goto fail; + } + return group_name; +fail: + free(group_name); + return NULL; +} + /* * Sanity test to check that pages are written into zswap. */ @@ -117,43 +135,51 @@ out: /* * When trying to store a memcg page in zswap, if the memcg hits its memory - * limit in zswap, writeback should not be triggered. - * - * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may - * not zswap"). Needs to be revised when a per memcg writeback mechanism is - * implemented. + * limit in zswap, writeback should affect only the zswapped pages of that + * memcg. */ static int test_no_invasive_cgroup_shrink(const char *root) { - size_t written_back_before, written_back_after; int ret = KSFT_FAIL; - char *test_group; + size_t control_allocation_size = MB(10); + char *control_allocation, *wb_group = NULL, *control_group = NULL; /* Set up */ - test_group = cg_name(root, "no_shrink_test"); - if (!test_group) - goto out; - if (cg_create(test_group)) + wb_group = setup_test_group_1M(root, "per_memcg_wb_test1"); + if (!wb_group) + return KSFT_FAIL; + if (cg_write(wb_group, "memory.zswap.max", "10K")) goto out; - if (cg_write(test_group, "memory.max", "1M")) + control_group = setup_test_group_1M(root, "per_memcg_wb_test2"); + if (!control_group) goto out; - if (cg_write(test_group, "memory.zswap.max", "10K")) + + /* Push some test_group2 memory into zswap */ + if (cg_enter_current(control_group)) goto out; - if (get_zswap_written_back_pages(&written_back_before)) + control_allocation = malloc(control_allocation_size); + for (int i = 0; i < control_allocation_size; i += 4095) + control_allocation[i] = 'a'; + if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1) goto out; - /* Allocate 10x memory.max to push memory into zswap */ - if (cg_run(test_group, allocate_bytes, (void *)MB(10))) + /* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */ + if (cg_run(wb_group, allocate_bytes, (void *)MB(10))) goto out; - /* Verify that no writeback happened because of the memcg allocation */ - if (get_zswap_written_back_pages(&written_back_after)) - goto out; - if (written_back_after == written_back_before) + /* Verify that only zswapped memory from gwb_group has been written back */ + if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0) ret = KSFT_PASS; out: - cg_destroy(test_group); - free(test_group); + cg_enter_current(root); + if (control_group) { + cg_destroy(control_group); + free(control_group); + } + cg_destroy(wb_group); + free(wb_group); + if (control_allocation) + free(control_allocation); return ret; } diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index b71247ba71..8a1cc2bf18 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -2,6 +2,7 @@ # Makefile for damon selftests TEST_GEN_FILES += huge_count_read_write +TEST_GEN_FILES += access_memory TEST_FILES = _chk_dependency.sh _debugfs_common.sh TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh @@ -9,6 +10,8 @@ TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh TEST_PROGS += debugfs_duplicate_context_creation.sh TEST_PROGS += debugfs_rm_non_contexts.sh TEST_PROGS += sysfs.sh sysfs_update_removed_scheme_dir.sh +TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py +TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py TEST_PROGS += reclaim.sh lru_sort.sh include ../lib.mk diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py new file mode 100644 index 0000000000..e98cf4b6a4 --- /dev/null +++ b/tools/testing/selftests/damon/_damon_sysfs.py @@ -0,0 +1,322 @@ +# SPDX-License-Identifier: GPL-2.0 + +import os + +sysfs_root = '/sys/kernel/mm/damon/admin' + +def write_file(path, string): + "Returns error string if failed, or None otherwise" + string = '%s' % string + try: + with open(path, 'w') as f: + f.write(string) + except Exception as e: + return '%s' % e + return None + +def read_file(path): + '''Returns the read content and error string. The read content is None if + the reading failed''' + try: + with open(path, 'r') as f: + return f.read(), None + except Exception as e: + return None, '%s' % e + +class DamosAccessPattern: + size = None + nr_accesses = None + age = None + scheme = None + + def __init__(self, size=None, nr_accesses=None, age=None): + self.size = size + self.nr_accesses = nr_accesses + self.age = age + + if self.size == None: + self.size = [0, 2**64 - 1] + if self.nr_accesses == None: + self.nr_accesses = [0, 2**64 - 1] + if self.age == None: + self.age = [0, 2**64 - 1] + + def sysfs_dir(self): + return os.path.join(self.scheme.sysfs_dir(), 'access_pattern') + + def stage(self): + err = write_file( + os.path.join(self.sysfs_dir(), 'sz', 'min'), self.size[0]) + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'sz', 'max'), self.size[1]) + if err != None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'min'), + self.nr_accesses[0]) + if err != None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'max'), + self.nr_accesses[1]) + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'age', 'min'), self.age[0]) + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'age', 'max'), self.age[1]) + if err != None: + return err + +class Damos: + action = None + access_pattern = None + # todo: Support quotas, watermarks, stats, tried_regions + idx = None + context = None + tried_bytes = None + + def __init__(self, action='stat', access_pattern=DamosAccessPattern()): + self.action = action + self.access_pattern = access_pattern + self.access_pattern.scheme = self + + def sysfs_dir(self): + return os.path.join( + self.context.sysfs_dir(), 'schemes', '%d' % self.idx) + + def stage(self): + err = write_file(os.path.join(self.sysfs_dir(), 'action'), self.action) + if err != None: + return err + err = self.access_pattern.stage() + if err != None: + return err + + # disable quotas + err = write_file(os.path.join(self.sysfs_dir(), 'quotas', 'ms'), '0') + if err != None: + return err + err = write_file( + os.path.join(self.sysfs_dir(), 'quotas', 'bytes'), '0') + if err != None: + return err + + # disable watermarks + err = write_file( + os.path.join(self.sysfs_dir(), 'watermarks', 'metric'), 'none') + if err != None: + return err + + # disable filters + err = write_file( + os.path.join(self.sysfs_dir(), 'filters', 'nr_filters'), '0') + if err != None: + return err + +class DamonTarget: + pid = None + # todo: Support target regions if test is made + idx = None + context = None + + def __init__(self, pid): + self.pid = pid + + def sysfs_dir(self): + return os.path.join( + self.context.sysfs_dir(), 'targets', '%d' % self.idx) + + def stage(self): + err = write_file( + os.path.join(self.sysfs_dir(), 'regions', 'nr_regions'), '0') + if err != None: + return err + return write_file( + os.path.join(self.sysfs_dir(), 'pid_target'), self.pid) + +class DamonAttrs: + sample_us = None + aggr_us = None + update_us = None + min_nr_regions = None + max_nr_regions = None + context = None + + def __init__(self, sample_us=5000, aggr_us=100000, update_us=1000000, + min_nr_regions=10, max_nr_regions=1000): + self.sample_us = sample_us + self.aggr_us = aggr_us + self.update_us = update_us + self.min_nr_regions = min_nr_regions + self.max_nr_regions = max_nr_regions + + def interval_sysfs_dir(self): + return os.path.join(self.context.sysfs_dir(), 'monitoring_attrs', + 'intervals') + + def nr_regions_range_sysfs_dir(self): + return os.path.join(self.context.sysfs_dir(), 'monitoring_attrs', + 'nr_regions') + + def stage(self): + err = write_file(os.path.join(self.interval_sysfs_dir(), 'sample_us'), + self.sample_us) + if err != None: + return err + err = write_file(os.path.join(self.interval_sysfs_dir(), 'aggr_us'), + self.aggr_us) + if err != None: + return err + err = write_file(os.path.join(self.interval_sysfs_dir(), 'update_us'), + self.update_us) + if err != None: + return err + + err = write_file( + os.path.join(self.nr_regions_range_sysfs_dir(), 'min'), + self.min_nr_regions) + if err != None: + return err + + err = write_file( + os.path.join(self.nr_regions_range_sysfs_dir(), 'max'), + self.max_nr_regions) + if err != None: + return err + +class DamonCtx: + ops = None + monitoring_attrs = None + targets = None + schemes = None + kdamond = None + idx = None + + def __init__(self, ops='paddr', monitoring_attrs=DamonAttrs(), targets=[], + schemes=[]): + self.ops = ops + self.monitoring_attrs = monitoring_attrs + self.monitoring_attrs.context = self + + self.targets = targets + for idx, target in enumerate(self.targets): + target.idx = idx + target.context = self + + self.schemes = schemes + for idx, scheme in enumerate(self.schemes): + scheme.idx = idx + scheme.context = self + + def sysfs_dir(self): + return os.path.join(self.kdamond.sysfs_dir(), 'contexts', + '%d' % self.idx) + + def stage(self): + err = write_file( + os.path.join(self.sysfs_dir(), 'operations'), self.ops) + if err != None: + return err + err = self.monitoring_attrs.stage() + if err != None: + return err + + nr_targets_file = os.path.join( + self.sysfs_dir(), 'targets', 'nr_targets') + content, err = read_file(nr_targets_file) + if err != None: + return err + if int(content) != len(self.targets): + err = write_file(nr_targets_file, '%d' % len(self.targets)) + if err != None: + return err + for target in self.targets: + err = target.stage() + if err != None: + return err + + nr_schemes_file = os.path.join( + self.sysfs_dir(), 'schemes', 'nr_schemes') + content, err = read_file(nr_schemes_file) + if int(content) != len(self.schemes): + err = write_file(nr_schemes_file, '%d' % len(self.schemes)) + if err != None: + return err + for scheme in self.schemes: + err = scheme.stage() + if err != None: + return err + return None + +class Kdamond: + state = None + pid = None + contexts = None + idx = None # index of this kdamond between siblings + kdamonds = None # parent + + def __init__(self, contexts=[]): + self.contexts = contexts + for idx, context in enumerate(self.contexts): + context.idx = idx + context.kdamond = self + + def sysfs_dir(self): + return os.path.join(self.kdamonds.sysfs_dir(), '%d' % self.idx) + + def start(self): + nr_contexts_file = os.path.join(self.sysfs_dir(), + 'contexts', 'nr_contexts') + content, err = read_file(nr_contexts_file) + if err != None: + return err + if int(content) != len(self.contexts): + err = write_file(nr_contexts_file, '%d' % len(self.contexts)) + if err != None: + return err + + for context in self.contexts: + err = context.stage() + if err != None: + return err + err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on') + return err + + def update_schemes_tried_bytes(self): + err = write_file(os.path.join(self.sysfs_dir(), 'state'), + 'update_schemes_tried_bytes') + if err != None: + return err + for context in self.contexts: + for scheme in context.schemes: + content, err = read_file(os.path.join(scheme.sysfs_dir(), + 'tried_regions', 'total_bytes')) + if err != None: + return err + scheme.tried_bytes = int(content) + +class Kdamonds: + kdamonds = [] + + def __init__(self, kdamonds=[]): + self.kdamonds = kdamonds + for idx, kdamond in enumerate(self.kdamonds): + kdamond.idx = idx + kdamond.kdamonds = self + + def sysfs_dir(self): + return os.path.join(sysfs_root, 'kdamonds') + + def start(self): + err = write_file(os.path.join(self.sysfs_dir(), 'nr_kdamonds'), + '%s' % len(self.kdamonds)) + if err != None: + return err + for kdamond in self.kdamonds: + err = kdamond.start() + if err != None: + return err + return None diff --git a/tools/testing/selftests/damon/access_memory.c b/tools/testing/selftests/damon/access_memory.c new file mode 100644 index 0000000000..585a2fa543 --- /dev/null +++ b/tools/testing/selftests/damon/access_memory.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Artificial memory access program for testing DAMON. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +int main(int argc, char *argv[]) +{ + char **regions; + clock_t start_clock; + int nr_regions; + int sz_region; + int access_time_ms; + int i; + + if (argc != 4) { + printf("Usage: %s <number> <size (bytes)> <time (ms)>\n", + argv[0]); + return -1; + } + + nr_regions = atoi(argv[1]); + sz_region = atoi(argv[2]); + access_time_ms = atoi(argv[3]); + + regions = malloc(sizeof(*regions) * nr_regions); + for (i = 0; i < nr_regions; i++) + regions[i] = malloc(sz_region); + + for (i = 0; i < nr_regions; i++) { + start_clock = clock(); + while ((clock() - start_clock) * 1000 / CLOCKS_PER_SEC < + access_time_ms) + memset(regions[i], i, 1024 * 1024 * 10); + } + return 0; +} diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh index 56f0230a8b..e9a976d296 100755 --- a/tools/testing/selftests/damon/sysfs.sh +++ b/tools/testing/selftests/damon/sysfs.sh @@ -150,6 +150,32 @@ test_weights() ensure_file "$weights_dir/age_permil" "exist" "600" } +test_goal() +{ + goal_dir=$1 + ensure_dir "$goal_dir" "exist" + ensure_file "$goal_dir/target_value" "exist" "600" + ensure_file "$goal_dir/current_value" "exist" "600" +} + +test_goals() +{ + goals_dir=$1 + ensure_dir "$goals_dir" "exist" + ensure_file "$goals_dir/nr_goals" "exist" "600" + + ensure_write_succ "$goals_dir/nr_goals" "1" "valid input" + test_goal "$goals_dir/0" + + ensure_write_succ "$goals_dir/nr_goals" "2" "valid input" + test_goal "$goals_dir/0" + test_goal "$goals_dir/1" + + ensure_write_succ "$goals_dir/nr_goals" "0" "valid input" + ensure_dir "$goals_dir/0" "not_exist" + ensure_dir "$goals_dir/1" "not_exist" +} + test_quotas() { quotas_dir=$1 @@ -158,6 +184,7 @@ test_quotas() ensure_file "$quotas_dir/bytes" "exist" 600 ensure_file "$quotas_dir/reset_interval_ms" "exist" 600 test_weights "$quotas_dir/weights" + test_goals "$quotas_dir/goals" } test_access_pattern() diff --git a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py new file mode 100644 index 0000000000..8c690ba1a5 --- /dev/null +++ b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +import time + +import _damon_sysfs + +def main(): + proc = subprocess.Popen(['sleep', '2']) + kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + ops='vaddr', + targets=[_damon_sysfs.DamonTarget(pid=proc.pid)], + schemes=[_damon_sysfs.Damos( + access_pattern=_damon_sysfs.DamosAccessPattern( + nr_accesses=[200, 200]))] # schemes + )] # contexts + )]) # kdamonds + + err = kdamonds.start() + if err != None: + print('kdmaond start failed: %s' % err) + exit(1) + + while proc.poll() == None: + err = kdamonds.kdamonds[0].update_schemes_tried_bytes() + if err != None: + print('tried bytes update failed: %s' % err) + exit(1) + +if __name__ == '__main__': + main() diff --git a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py new file mode 100644 index 0000000000..cdbf19b442 --- /dev/null +++ b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import subprocess +import time + +import _damon_sysfs + +def main(): + # access two 10 MiB memory regions, 2 second per each + sz_region = 10 * 1024 * 1024 + proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000']) + kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond( + contexts=[_damon_sysfs.DamonCtx( + ops='vaddr', + targets=[_damon_sysfs.DamonTarget(pid=proc.pid)], + schemes=[_damon_sysfs.Damos( + access_pattern=_damon_sysfs.DamosAccessPattern( + # >= 25% access rate, >= 200ms age + nr_accesses=[5, 20], age=[2, 2**64 - 1]))] # schemes + )] # contexts + )]) # kdamonds + + err = kdamonds.start() + if err != None: + print('kdmaond start failed: %s' % err) + exit(1) + + wss_collected = [] + while proc.poll() == None: + time.sleep(0.1) + err = kdamonds.kdamonds[0].update_schemes_tried_bytes() + if err != None: + print('tried bytes update failed: %s' % err) + exit(1) + + wss_collected.append( + kdamonds.kdamonds[0].contexts[0].schemes[0].tried_bytes) + + wss_collected.sort() + acceptable_error_rate = 0.2 + for percentile in [50, 75]: + sample = wss_collected[int(len(wss_collected) * percentile / 100)] + error_rate = abs(sample - sz_region) / sz_region + print('%d-th percentile (%d) error %f' % + (percentile, sample, error_rate)) + if error_rate > acceptable_error_rate: + print('the error rate is not acceptable (> %f)' % + acceptable_error_rate) + print('samples are as below') + print('\n'.join(['%d' % wss for wss in wss_collected])) + exit(1) + +if __name__ == '__main__': + main() diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index 70638fa50b..899d7fb6ea 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -1,2 +1,10 @@ CONFIG_BONDING=y +CONFIG_BRIDGE=y +CONFIG_DUMMY=y +CONFIG_IPV6=y CONFIG_MACVLAN=y +CONFIG_NET_ACT_GACT=y +CONFIG_NET_CLS_FLOWER=y +CONFIG_NET_SCH_INGRESS=y +CONFIG_NLMON=y +CONFIG_VETH=y diff --git a/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh new file mode 100755 index 0000000000..fe0343b95e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that PCI reset works correctly by verifying that only the expected reset +# methods are supported and that after issuing the reset the ifindex of the +# port changes. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + pci_reset_test +" +NUM_NETIFS=1 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +pci_reset_test() +{ + RET=0 + + local bus=$(echo $DEVLINK_DEV | cut -d '/' -f 1) + local bdf=$(echo $DEVLINK_DEV | cut -d '/' -f 2) + + if [ $bus != "pci" ]; then + check_err 1 "devlink device is not a PCI device" + log_test "pci reset" + return + fi + + if [ ! -f /sys/bus/pci/devices/$bdf/reset_method ]; then + check_err 1 "reset is not supported" + log_test "pci reset" + return + fi + + [[ $(cat /sys/bus/pci/devices/$bdf/reset_method) == "bus" ]] + check_err $? "only \"bus\" reset method should be supported" + + local ifindex_pre=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]') + + echo 1 > /sys/bus/pci/devices/$bdf/reset + check_err $? "reset failed" + + # Wait for udev to rename newly created netdev. + udevadm settle + + local ifindex_post=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]') + + [[ $ifindex_pre != $ifindex_post ]] + check_err $? "reset not performed" + + log_test "pci reset" +} + +swp1=${NETIFS[p1]} +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh index e71d811656..0f0f4f0580 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh @@ -40,7 +40,6 @@ # | + $swp1 $swp3 + + $swp4 | # | | iPOOL1 iPOOL0 | | iPOOL2 | # | | ePOOL4 ePOOL5 | | ePOOL4 | -# | | 1Gbps | | 1Gbps | # | | PFC:enabled=1 | | PFC:enabled=1 | # | +-|----------------------|-+ +-|------------------------+ | # | | + $swp1.111 $swp3.111 + | | + $swp4.111 | | diff --git a/tools/testing/selftests/drivers/net/netdevsim/config b/tools/testing/selftests/drivers/net/netdevsim/config new file mode 100644 index 0000000000..adf45a3a78 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/config @@ -0,0 +1,10 @@ +CONFIG_DUMMY=y +CONFIG_GENEVE=m +CONFIG_IPV6=y +CONFIG_NETDEVSIM=m +CONFIG_NET_SCH_MQPRIO=y +CONFIG_NET_SCH_MULTIQ=y +CONFIG_NET_SCH_PRIO=y +CONFIG_PSAMPLE=y +CONFIG_PTP_1588_CLOCK_MOCK=y +CONFIG_VXLAN=m diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh index 922744059a..80160579e0 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh @@ -51,6 +51,7 @@ function make_netdev { fi echo $NSIM_ID $@ > /sys/bus/netdevsim/new_device + udevadm settle # get new device name ls /sys/bus/netdevsim/devices/netdevsim${NSIM_ID}/net/ } diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh index 0c56746e9c..7d7829f575 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh @@ -8,16 +8,20 @@ NSIM_NETDEV=$(make_netdev) set -o pipefail +# Since commit 2b3ddcb35357 ("ethtool: fec: Change the prompt ...") +# in ethtool CLI the Configured lines start with Supported/Configured. +configured=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2 | head -1 | cut -d' ' -f1) + # netdevsim starts out with None/None s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) -check $? "$s" "Configured FEC encodings: None +check $? "$s" "$configured FEC encodings: None Active FEC encoding: None" # Test Auto $ETHTOOL --set-fec $NSIM_NETDEV encoding auto check $? s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) -check $? "$s" "Configured FEC encodings: Auto +check $? "$s" "$configured FEC encodings: Auto Active FEC encoding: Off" # Test case in-sensitivity @@ -25,7 +29,7 @@ for o in off Off OFF; do $ETHTOOL --set-fec $NSIM_NETDEV encoding $o check $? s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) - check $? "$s" "Configured FEC encodings: Off + check $? "$s" "$configured FEC encodings: Off Active FEC encoding: Off" done @@ -33,7 +37,7 @@ for o in BaseR baser BAser; do $ETHTOOL --set-fec $NSIM_NETDEV encoding $o check $? s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) - check $? "$s" "Configured FEC encodings: BaseR + check $? "$s" "$configured FEC encodings: BaseR Active FEC encoding: BaseR" done @@ -41,7 +45,7 @@ for o in llrs rs; do $ETHTOOL --set-fec $NSIM_NETDEV encoding $o check $? s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) - check $? "$s" "Configured FEC encodings: ${o^^} + check $? "$s" "$configured FEC encodings: ${o^^} Active FEC encoding: ${o^^}" done @@ -49,13 +53,13 @@ done $ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs check $? s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) -check $? "$s" "Configured FEC encodings: RS LLRS +check $? "$s" "$configured FEC encodings: RS LLRS Active FEC encoding: LLRS" $ETHTOOL --set-fec $NSIM_NETDEV encoding rs off auto check $? s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2) -check $? "$s" "Configured FEC encodings: Auto Off RS +check $? "$s" "$configured FEC encodings: Auto Off RS Active FEC encoding: RS" # Make sure other link modes are rejected diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 185b02d2d4..f98435c502 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -233,6 +233,7 @@ function print_tables { function get_netdev_name { local -n old=$1 + udevadm settle new=$(ls /sys/class/net) for netdev in $new; do diff --git a/tools/testing/selftests/filesystems/overlayfs/.gitignore b/tools/testing/selftests/filesystems/overlayfs/.gitignore new file mode 100644 index 0000000000..52ae618fdd --- /dev/null +++ b/tools/testing/selftests/filesystems/overlayfs/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +dev_in_maps diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile new file mode 100644 index 0000000000..56b2b48a76 --- /dev/null +++ b/tools/testing/selftests/filesystems/overlayfs/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_PROGS := dev_in_maps + +CFLAGS := -Wall -Werror + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c new file mode 100644 index 0000000000..e19ab0e857 --- /dev/null +++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <inttypes.h> +#include <unistd.h> +#include <stdio.h> + +#include <linux/unistd.h> +#include <linux/types.h> +#include <linux/mount.h> +#include <sys/syscall.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <sys/mman.h> +#include <sched.h> +#include <fcntl.h> + +#include "../../kselftest.h" +#include "log.h" + +static int sys_fsopen(const char *fsname, unsigned int flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} + +static int sys_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux) +{ + return syscall(__NR_fsconfig, fd, cmd, key, value, aux); +} + +static int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags) +{ + return syscall(__NR_fsmount, fd, flags, attr_flags); +} + +static int sys_move_mount(int from_dfd, const char *from_pathname, + int to_dfd, const char *to_pathname, + unsigned int flags) +{ + return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags); +} + +static long get_file_dev_and_inode(void *addr, struct statx *stx) +{ + char buf[4096]; + FILE *mapf; + + mapf = fopen("/proc/self/maps", "r"); + if (mapf == NULL) + return pr_perror("fopen(/proc/self/maps)"); + + while (fgets(buf, sizeof(buf), mapf)) { + unsigned long start, end; + uint32_t maj, min; + __u64 ino; + + if (sscanf(buf, "%lx-%lx %*s %*s %x:%x %llu", + &start, &end, &maj, &min, &ino) != 5) + return pr_perror("unable to parse: %s", buf); + if (start == (unsigned long)addr) { + stx->stx_dev_major = maj; + stx->stx_dev_minor = min; + stx->stx_ino = ino; + return 0; + } + } + + return pr_err("unable to find the mapping"); +} + +static int ovl_mount(void) +{ + int tmpfs, fsfd, ovl; + + fsfd = sys_fsopen("tmpfs", 0); + if (fsfd == -1) + return pr_perror("fsopen(tmpfs)"); + + if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) == -1) + return pr_perror("FSCONFIG_CMD_CREATE"); + + tmpfs = sys_fsmount(fsfd, 0, 0); + if (tmpfs == -1) + return pr_perror("fsmount"); + + close(fsfd); + + /* overlayfs can't be constructed on top of a detached mount. */ + if (sys_move_mount(tmpfs, "", AT_FDCWD, "/tmp", MOVE_MOUNT_F_EMPTY_PATH)) + return pr_perror("move_mount"); + close(tmpfs); + + if (mkdir("/tmp/w", 0755) == -1 || + mkdir("/tmp/u", 0755) == -1 || + mkdir("/tmp/l", 0755) == -1) + return pr_perror("mkdir"); + + fsfd = sys_fsopen("overlay", 0); + if (fsfd == -1) + return pr_perror("fsopen(overlay)"); + if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "source", "test", 0) == -1 || + sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "lowerdir", "/tmp/l", 0) == -1 || + sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "upperdir", "/tmp/u", 0) == -1 || + sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "workdir", "/tmp/w", 0) == -1) + return pr_perror("fsconfig"); + if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) == -1) + return pr_perror("fsconfig"); + ovl = sys_fsmount(fsfd, 0, 0); + if (ovl == -1) + return pr_perror("fsmount"); + + return ovl; +} + +/* + * Check that the file device and inode shown in /proc/pid/maps match values + * returned by stat(2). + */ +static int test(void) +{ + struct statx stx, mstx; + int ovl, fd; + void *addr; + + ovl = ovl_mount(); + if (ovl == -1) + return -1; + + fd = openat(ovl, "test", O_RDWR | O_CREAT, 0644); + if (fd == -1) + return pr_perror("openat"); + + addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) + return pr_perror("mmap"); + + if (get_file_dev_and_inode(addr, &mstx)) + return -1; + if (statx(fd, "", AT_EMPTY_PATH | AT_STATX_SYNC_AS_STAT, STATX_INO, &stx)) + return pr_perror("statx"); + + if (stx.stx_dev_major != mstx.stx_dev_major || + stx.stx_dev_minor != mstx.stx_dev_minor || + stx.stx_ino != mstx.stx_ino) + return pr_fail("unmatched dev:ino %x:%x:%llx (expected %x:%x:%llx)\n", + mstx.stx_dev_major, mstx.stx_dev_minor, mstx.stx_ino, + stx.stx_dev_major, stx.stx_dev_minor, stx.stx_ino); + + ksft_test_result_pass("devices are matched\n"); + return 0; +} + +int main(int argc, char **argv) +{ + int fsfd; + + fsfd = sys_fsopen("overlay", 0); + if (fsfd == -1) { + ksft_test_result_skip("unable to create overlay mount\n"); + return 1; + } + close(fsfd); + + /* Create a new mount namespace to not care about cleaning test mounts. */ + if (unshare(CLONE_NEWNS) == -1) { + ksft_test_result_skip("unable to create a new mount namespace\n"); + return 1; + } + + if (mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) == -1) { + pr_perror("mount"); + return 1; + } + + ksft_set_plan(1); + + if (test()) + return 1; + + ksft_exit_pass(); + return 0; +} diff --git a/tools/testing/selftests/filesystems/overlayfs/log.h b/tools/testing/selftests/filesystems/overlayfs/log.h new file mode 100644 index 0000000000..db64df2a84 --- /dev/null +++ b/tools/testing/selftests/filesystems/overlayfs/log.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __SELFTEST_TIMENS_LOG_H__ +#define __SELFTEST_TIMENS_LOG_H__ + +#define pr_msg(fmt, lvl, ...) \ + ksft_print_msg("[%s] (%s:%d)\t" fmt "\n", \ + lvl, __FILE__, __LINE__, ##__VA_ARGS__) + +#define pr_p(func, fmt, ...) func(fmt ": %m", ##__VA_ARGS__) + +#define pr_err(fmt, ...) \ + ({ \ + ksft_test_result_error(fmt "\n", ##__VA_ARGS__); \ + -1; \ + }) + +#define pr_fail(fmt, ...) \ + ({ \ + ksft_test_result_fail(fmt, ##__VA_ARGS__); \ + -1; \ + }) + +#define pr_perror(fmt, ...) pr_p(pr_err, fmt, ##__VA_ARGS__) + +#endif diff --git a/tools/testing/selftests/filesystems/statmount/.gitignore b/tools/testing/selftests/filesystems/statmount/.gitignore new file mode 100644 index 0000000000..82a4846cbc --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +/*_test diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile new file mode 100644 index 0000000000..07a0d5b545 --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) +TEST_GEN_PROGS := statmount_test + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c new file mode 100644 index 0000000000..3eafd7da58 --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -0,0 +1,612 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE + +#include <assert.h> +#include <stdint.h> +#include <sched.h> +#include <fcntl.h> +#include <sys/param.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/statfs.h> +#include <linux/mount.h> +#include <linux/stat.h> +#include <asm/unistd.h> + +#include "../../kselftest.h" + +static const char *const known_fs[] = { + "9p", "adfs", "affs", "afs", "aio", "anon_inodefs", "apparmorfs", + "autofs", "bcachefs", "bdev", "befs", "bfs", "binder", "binfmt_misc", + "bpf", "btrfs", "btrfs_test_fs", "ceph", "cgroup", "cgroup2", "cifs", + "coda", "configfs", "cpuset", "cramfs", "cxl", "dax", "debugfs", + "devpts", "devtmpfs", "dmabuf", "drm", "ecryptfs", "efivarfs", "efs", + "erofs", "exfat", "ext2", "ext3", "ext4", "f2fs", "functionfs", + "fuse", "fuseblk", "fusectl", "gadgetfs", "gfs2", "gfs2meta", "hfs", + "hfsplus", "hostfs", "hpfs", "hugetlbfs", "ibmasmfs", "iomem", + "ipathfs", "iso9660", "jffs2", "jfs", "minix", "mqueue", "msdos", + "nfs", "nfs4", "nfsd", "nilfs2", "nsfs", "ntfs", "ntfs3", "ocfs2", + "ocfs2_dlmfs", "ocxlflash", "omfs", "openpromfs", "overlay", "pipefs", + "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "reiserfs", + "resctrl", "romfs", "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", + "securityfs", "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", + "squashfs", "sysfs", "sysv", "tmpfs", "tracefs", "ubifs", "udf", + "ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs", + "zonefs", NULL }; + +static int statmount(uint64_t mnt_id, uint64_t mask, struct statmount *buf, + size_t bufsize, unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = mask, + }; + + return syscall(__NR_statmount, &req, buf, bufsize, flags); +} + +static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags) +{ + size_t bufsize = 1 << 15; + struct statmount *buf = NULL, *tmp = alloca(bufsize); + int tofree = 0; + int ret; + + for (;;) { + ret = statmount(mnt_id, mask, tmp, bufsize, flags); + if (ret != -1) + break; + if (tofree) + free(tmp); + if (errno != EOVERFLOW) + return NULL; + bufsize <<= 1; + tofree = 1; + tmp = malloc(bufsize); + if (!tmp) + return NULL; + } + buf = malloc(tmp->size); + if (buf) + memcpy(buf, tmp, tmp->size); + if (tofree) + free(tmp); + + return buf; +} + +static void write_file(const char *path, const char *val) +{ + int fd = open(path, O_WRONLY); + size_t len = strlen(val); + int ret; + + if (fd == -1) + ksft_exit_fail_msg("opening %s for write: %s\n", path, strerror(errno)); + + ret = write(fd, val, len); + if (ret == -1) + ksft_exit_fail_msg("writing to %s: %s\n", path, strerror(errno)); + if (ret != len) + ksft_exit_fail_msg("short write to %s\n", path); + + ret = close(fd); + if (ret == -1) + ksft_exit_fail_msg("closing %s\n", path); +} + +static uint64_t get_mnt_id(const char *name, const char *path, uint64_t mask) +{ + struct statx sx; + int ret; + + ret = statx(AT_FDCWD, path, 0, mask, &sx); + if (ret == -1) + ksft_exit_fail_msg("retrieving %s mount ID for %s: %s\n", + mask & STATX_MNT_ID_UNIQUE ? "unique" : "old", + name, strerror(errno)); + if (!(sx.stx_mask & mask)) + ksft_exit_fail_msg("no %s mount ID available for %s\n", + mask & STATX_MNT_ID_UNIQUE ? "unique" : "old", + name); + + return sx.stx_mnt_id; +} + + +static char root_mntpoint[] = "/tmp/statmount_test_root.XXXXXX"; +static int orig_root; +static uint64_t root_id, parent_id; +static uint32_t old_root_id, old_parent_id; + + +static void cleanup_namespace(void) +{ + fchdir(orig_root); + chroot("."); + umount2(root_mntpoint, MNT_DETACH); + rmdir(root_mntpoint); +} + +static void setup_namespace(void) +{ + int ret; + char buf[32]; + uid_t uid = getuid(); + gid_t gid = getgid(); + + ret = unshare(CLONE_NEWNS|CLONE_NEWUSER); + if (ret == -1) + ksft_exit_fail_msg("unsharing mountns and userns: %s\n", + strerror(errno)); + + sprintf(buf, "0 %d 1", uid); + write_file("/proc/self/uid_map", buf); + write_file("/proc/self/setgroups", "deny"); + sprintf(buf, "0 %d 1", gid); + write_file("/proc/self/gid_map", buf); + + ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); + if (ret == -1) + ksft_exit_fail_msg("making mount tree private: %s\n", + strerror(errno)); + + if (!mkdtemp(root_mntpoint)) + ksft_exit_fail_msg("creating temporary directory %s: %s\n", + root_mntpoint, strerror(errno)); + + old_parent_id = get_mnt_id("parent", root_mntpoint, STATX_MNT_ID); + parent_id = get_mnt_id("parent", root_mntpoint, STATX_MNT_ID_UNIQUE); + + orig_root = open("/", O_PATH); + if (orig_root == -1) + ksft_exit_fail_msg("opening root directory: %s", + strerror(errno)); + + atexit(cleanup_namespace); + + ret = mount(root_mntpoint, root_mntpoint, NULL, MS_BIND, NULL); + if (ret == -1) + ksft_exit_fail_msg("mounting temp root %s: %s\n", + root_mntpoint, strerror(errno)); + + ret = chroot(root_mntpoint); + if (ret == -1) + ksft_exit_fail_msg("chroot to temp root %s: %s\n", + root_mntpoint, strerror(errno)); + + ret = chdir("/"); + if (ret == -1) + ksft_exit_fail_msg("chdir to root: %s\n", strerror(errno)); + + old_root_id = get_mnt_id("root", "/", STATX_MNT_ID); + root_id = get_mnt_id("root", "/", STATX_MNT_ID_UNIQUE); +} + +static int setup_mount_tree(int log2_num) +{ + int ret, i; + + ret = mount("", "/", NULL, MS_REC|MS_SHARED, NULL); + if (ret == -1) { + ksft_test_result_fail("making mount tree shared: %s\n", + strerror(errno)); + return -1; + } + + for (i = 0; i < log2_num; i++) { + ret = mount("/", "/", NULL, MS_BIND, NULL); + if (ret == -1) { + ksft_test_result_fail("mounting submount %s: %s\n", + root_mntpoint, strerror(errno)); + return -1; + } + } + return 0; +} + +static ssize_t listmount(uint64_t mnt_id, uint64_t last_mnt_id, + uint64_t list[], size_t num, unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = last_mnt_id, + }; + + return syscall(__NR_listmount, &req, list, num, flags); +} + +static void test_listmount_empty_root(void) +{ + ssize_t res; + const unsigned int size = 32; + uint64_t list[size]; + + res = listmount(LSMT_ROOT, 0, list, size, 0); + if (res == -1) { + ksft_test_result_fail("listmount: %s\n", strerror(errno)); + return; + } + if (res != 1) { + ksft_test_result_fail("listmount result is %zi != 1\n", res); + return; + } + + if (list[0] != root_id) { + ksft_test_result_fail("listmount ID doesn't match 0x%llx != 0x%llx\n", + (unsigned long long) list[0], + (unsigned long long) root_id); + return; + } + + ksft_test_result_pass("listmount empty root\n"); +} + +static void test_statmount_zero_mask(void) +{ + struct statmount sm; + int ret; + + ret = statmount(root_id, 0, &sm, sizeof(sm), 0); + if (ret == -1) { + ksft_test_result_fail("statmount zero mask: %s\n", + strerror(errno)); + return; + } + if (sm.size != sizeof(sm)) { + ksft_test_result_fail("unexpected size: %u != %u\n", + sm.size, (uint32_t) sizeof(sm)); + return; + } + if (sm.mask != 0) { + ksft_test_result_fail("unexpected mask: 0x%llx != 0x0\n", + (unsigned long long) sm.mask); + return; + } + + ksft_test_result_pass("statmount zero mask\n"); +} + +static void test_statmount_mnt_basic(void) +{ + struct statmount sm; + int ret; + uint64_t mask = STATMOUNT_MNT_BASIC; + + ret = statmount(root_id, mask, &sm, sizeof(sm), 0); + if (ret == -1) { + ksft_test_result_fail("statmount mnt basic: %s\n", + strerror(errno)); + return; + } + if (sm.size != sizeof(sm)) { + ksft_test_result_fail("unexpected size: %u != %u\n", + sm.size, (uint32_t) sizeof(sm)); + return; + } + if (sm.mask != mask) { + ksft_test_result_skip("statmount mnt basic unavailable\n"); + return; + } + + if (sm.mnt_id != root_id) { + ksft_test_result_fail("unexpected root ID: 0x%llx != 0x%llx\n", + (unsigned long long) sm.mnt_id, + (unsigned long long) root_id); + return; + } + + if (sm.mnt_id_old != old_root_id) { + ksft_test_result_fail("unexpected old root ID: %u != %u\n", + sm.mnt_id_old, old_root_id); + return; + } + + if (sm.mnt_parent_id != parent_id) { + ksft_test_result_fail("unexpected parent ID: 0x%llx != 0x%llx\n", + (unsigned long long) sm.mnt_parent_id, + (unsigned long long) parent_id); + return; + } + + if (sm.mnt_parent_id_old != old_parent_id) { + ksft_test_result_fail("unexpected old parent ID: %u != %u\n", + sm.mnt_parent_id_old, old_parent_id); + return; + } + + if (sm.mnt_propagation != MS_PRIVATE) { + ksft_test_result_fail("unexpected propagation: 0x%llx\n", + (unsigned long long) sm.mnt_propagation); + return; + } + + ksft_test_result_pass("statmount mnt basic\n"); +} + + +static void test_statmount_sb_basic(void) +{ + struct statmount sm; + int ret; + uint64_t mask = STATMOUNT_SB_BASIC; + struct statx sx; + struct statfs sf; + + ret = statmount(root_id, mask, &sm, sizeof(sm), 0); + if (ret == -1) { + ksft_test_result_fail("statmount sb basic: %s\n", + strerror(errno)); + return; + } + if (sm.size != sizeof(sm)) { + ksft_test_result_fail("unexpected size: %u != %u\n", + sm.size, (uint32_t) sizeof(sm)); + return; + } + if (sm.mask != mask) { + ksft_test_result_skip("statmount sb basic unavailable\n"); + return; + } + + ret = statx(AT_FDCWD, "/", 0, 0, &sx); + if (ret == -1) { + ksft_test_result_fail("stat root failed: %s\n", + strerror(errno)); + return; + } + + if (sm.sb_dev_major != sx.stx_dev_major || + sm.sb_dev_minor != sx.stx_dev_minor) { + ksft_test_result_fail("unexpected sb dev %u:%u != %u:%u\n", + sm.sb_dev_major, sm.sb_dev_minor, + sx.stx_dev_major, sx.stx_dev_minor); + return; + } + + ret = statfs("/", &sf); + if (ret == -1) { + ksft_test_result_fail("statfs root failed: %s\n", + strerror(errno)); + return; + } + + if (sm.sb_magic != sf.f_type) { + ksft_test_result_fail("unexpected sb magic: 0x%llx != 0x%lx\n", + (unsigned long long) sm.sb_magic, + sf.f_type); + return; + } + + ksft_test_result_pass("statmount sb basic\n"); +} + +static void test_statmount_mnt_point(void) +{ + struct statmount *sm; + + sm = statmount_alloc(root_id, STATMOUNT_MNT_POINT, 0); + if (!sm) { + ksft_test_result_fail("statmount mount point: %s\n", + strerror(errno)); + return; + } + + if (strcmp(sm->str + sm->mnt_point, "/") != 0) { + ksft_test_result_fail("unexpected mount point: '%s' != '/'\n", + sm->str + sm->mnt_point); + goto out; + } + ksft_test_result_pass("statmount mount point\n"); +out: + free(sm); +} + +static void test_statmount_mnt_root(void) +{ + struct statmount *sm; + const char *mnt_root, *last_dir, *last_root; + + last_dir = strrchr(root_mntpoint, '/'); + assert(last_dir); + last_dir++; + + sm = statmount_alloc(root_id, STATMOUNT_MNT_ROOT, 0); + if (!sm) { + ksft_test_result_fail("statmount mount root: %s\n", + strerror(errno)); + return; + } + mnt_root = sm->str + sm->mnt_root; + last_root = strrchr(mnt_root, '/'); + if (last_root) + last_root++; + else + last_root = mnt_root; + + if (strcmp(last_dir, last_root) != 0) { + ksft_test_result_fail("unexpected mount root last component: '%s' != '%s'\n", + last_root, last_dir); + goto out; + } + ksft_test_result_pass("statmount mount root\n"); +out: + free(sm); +} + +static void test_statmount_fs_type(void) +{ + struct statmount *sm; + const char *fs_type; + const char *const *s; + + sm = statmount_alloc(root_id, STATMOUNT_FS_TYPE, 0); + if (!sm) { + ksft_test_result_fail("statmount fs type: %s\n", + strerror(errno)); + return; + } + fs_type = sm->str + sm->fs_type; + for (s = known_fs; s != NULL; s++) { + if (strcmp(fs_type, *s) == 0) + break; + } + if (!s) + ksft_print_msg("unknown filesystem type: %s\n", fs_type); + + ksft_test_result_pass("statmount fs type\n"); + free(sm); +} + +static void test_statmount_string(uint64_t mask, size_t off, const char *name) +{ + struct statmount *sm; + size_t len, shortsize, exactsize; + uint32_t start, i; + int ret; + + sm = statmount_alloc(root_id, mask, 0); + if (!sm) { + ksft_test_result_fail("statmount %s: %s\n", name, + strerror(errno)); + goto out; + } + if (sm->size < sizeof(*sm)) { + ksft_test_result_fail("unexpected size: %u < %u\n", + sm->size, (uint32_t) sizeof(*sm)); + goto out; + } + if (sm->mask != mask) { + ksft_test_result_skip("statmount %s unavailable\n", name); + goto out; + } + len = sm->size - sizeof(*sm); + start = ((uint32_t *) sm)[off]; + + for (i = start;; i++) { + if (i >= len) { + ksft_test_result_fail("string out of bounds\n"); + goto out; + } + if (!sm->str[i]) + break; + } + exactsize = sm->size; + shortsize = sizeof(*sm) + i; + + ret = statmount(root_id, mask, sm, exactsize, 0); + if (ret == -1) { + ksft_test_result_fail("statmount exact size: %s\n", + strerror(errno)); + goto out; + } + errno = 0; + ret = statmount(root_id, mask, sm, shortsize, 0); + if (ret != -1 || errno != EOVERFLOW) { + ksft_test_result_fail("should have failed with EOVERFLOW: %s\n", + strerror(errno)); + goto out; + } + + ksft_test_result_pass("statmount string %s\n", name); +out: + free(sm); +} + +static void test_listmount_tree(void) +{ + ssize_t res; + const unsigned int log2_num = 4; + const unsigned int step = 3; + const unsigned int size = (1 << log2_num) + step + 1; + size_t num, expect = 1 << log2_num; + uint64_t list[size]; + uint64_t list2[size]; + size_t i; + + + res = setup_mount_tree(log2_num); + if (res == -1) + return; + + num = res = listmount(LSMT_ROOT, 0, list, size, 0); + if (res == -1) { + ksft_test_result_fail("listmount: %s\n", strerror(errno)); + return; + } + if (num != expect) { + ksft_test_result_fail("listmount result is %zi != %zi\n", + res, expect); + return; + } + + for (i = 0; i < size - step;) { + res = listmount(LSMT_ROOT, i ? list2[i - 1] : 0, list2 + i, step, 0); + if (res == -1) + ksft_test_result_fail("short listmount: %s\n", + strerror(errno)); + i += res; + if (res < step) + break; + } + if (i != num) { + ksft_test_result_fail("different number of entries: %zu != %zu\n", + i, num); + return; + } + for (i = 0; i < num; i++) { + if (list2[i] != list[i]) { + ksft_test_result_fail("different value for entry %zu: 0x%llx != 0x%llx\n", + i, + (unsigned long long) list2[i], + (unsigned long long) list[i]); + } + } + + ksft_test_result_pass("listmount tree\n"); +} + +#define str_off(memb) (offsetof(struct statmount, memb) / sizeof(uint32_t)) + +int main(void) +{ + int ret; + uint64_t all_mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC | + STATMOUNT_PROPAGATE_FROM | STATMOUNT_MNT_ROOT | + STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE; + + ksft_print_header(); + + ret = statmount(0, 0, NULL, 0, 0); + assert(ret == -1); + if (errno == ENOSYS) + ksft_exit_skip("statmount() syscall not supported\n"); + + setup_namespace(); + + ksft_set_plan(14); + test_listmount_empty_root(); + test_statmount_zero_mask(); + test_statmount_mnt_basic(); + test_statmount_sb_basic(); + test_statmount_mnt_root(); + test_statmount_mnt_point(); + test_statmount_fs_type(); + test_statmount_string(STATMOUNT_MNT_ROOT, str_off(mnt_root), "mount root"); + test_statmount_string(STATMOUNT_MNT_POINT, str_off(mnt_point), "mount point"); + test_statmount_string(STATMOUNT_FS_TYPE, str_off(fs_type), "fs type"); + test_statmount_string(all_mask, str_off(mnt_root), "mount root & all"); + test_statmount_string(all_mask, str_off(mnt_point), "mount point & all"); + test_statmount_string(all_mask, str_off(fs_type), "fs type & all"); + + test_listmount_tree(); + + + if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) + ksft_exit_fail(); + else + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc new file mode 100644 index 0000000000..d44d09a33a --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc @@ -0,0 +1,95 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Change the ringbuffer sub-buffer size +# requires: buffer_subbuf_size_kb +# flags: instance + +get_buffer_data_size() { + sed -ne 's/^.*data.*size:\([0-9][0-9]*\).*/\1/p' events/header_page +} + +get_buffer_data_offset() { + sed -ne 's/^.*data.*offset:\([0-9][0-9]*\).*/\1/p' events/header_page +} + +get_event_header_size() { + type_len=`sed -ne 's/^.*type_len.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event` + time_len=`sed -ne 's/^.*time_delta.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event` + array_len=`sed -ne 's/^.*array.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event` + total_bits=$((type_len+time_len+array_len)) + total_bits=$((total_bits+7)) + echo $((total_bits/8)) +} + +get_print_event_buf_offset() { + sed -ne 's/^.*buf.*offset:\([0-9][0-9]*\).*/\1/p' events/ftrace/print/format +} + +event_header_size=`get_event_header_size` +print_header_size=`get_print_event_buf_offset` + +data_offset=`get_buffer_data_offset` + +marker_meta=$((event_header_size+print_header_size)) + +make_str() { + cnt=$1 + printf -- 'X%.0s' $(seq $cnt) +} + +write_buffer() { + size=$1 + + str=`make_str $size` + + # clear the buffer + echo > trace + + # write the string into the marker + echo $str > trace_marker + + echo $str +} + +test_buffer() { + size_kb=$1 + page_size=$((size_kb*1024)) + + size=`get_buffer_data_size` + + # the size must be greater than or equal to page_size - data_offset + page_size=$((page_size-data_offset)) + if [ $size -lt $page_size ]; then + exit fail + fi + + # Now add a little more the meta data overhead will overflow + + str=`write_buffer $size` + + # Make sure the line was broken + new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; exit}' trace` + + if [ "$new_str" = "$str" ]; then + exit fail; + fi + + # Make sure the entire line can be found + new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; }' trace` + + if [ "$new_str" != "$str" ]; then + exit fail; + fi +} + +ORIG=`cat buffer_subbuf_size_kb` + +# Could test bigger sizes than 32K, but then creating the string +# to write into the ring buffer takes too long +for a in 4 8 16 32 ; do + echo $a > buffer_subbuf_size_kb + test_buffer $a +done + +echo $ORIG > buffer_subbuf_size_kb + diff --git a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc new file mode 100644 index 0000000000..add7d5bf58 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc @@ -0,0 +1,114 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Test file and directory owership changes for eventfs + +original_group=`stat -c "%g" .` +original_owner=`stat -c "%u" .` + +mount_point=`stat -c '%m' .` +mount_options=`mount | grep "$mount_point" | sed -e 's/.*(\(.*\)).*/\1/'` + +# find another owner and group that is not the original +other_group=`tac /etc/group | grep -v ":$original_group:" | head -1 | cut -d: -f3` +other_owner=`tac /etc/passwd | grep -v ":$original_owner:" | head -1 | cut -d: -f3` + +# Remove any group ownership already +new_options=`echo "$mount_options" | sed -e "s/gid=[0-9]*/gid=$other_group/"` + +if [ "$new_options" = "$mount_options" ]; then + new_options="$mount_options,gid=$other_group" + mount_options="$mount_options,gid=$original_group" +fi + +canary="events/timer events/timer/timer_cancel events/timer/timer_cancel/format" + +test() { + file=$1 + test_group=$2 + + owner=`stat -c "%u" $file` + group=`stat -c "%g" $file` + + echo "testing $file $owner=$original_owner and $group=$test_group" + if [ $owner -ne $original_owner ]; then + exit_fail + fi + if [ $group -ne $test_group ]; then + exit_fail + fi + + # Note, the remount does not update ownership so test going to and from owner + echo "test owner $file to $other_owner" + chown $other_owner $file + owner=`stat -c "%u" $file` + if [ $owner -ne $other_owner ]; then + exit_fail + fi + + chown $original_owner $file + owner=`stat -c "%u" $file` + if [ $owner -ne $original_owner ]; then + exit_fail + fi + +} + +run_tests() { + for d in "." "events" "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do + test "$d" $other_group + done + + chgrp $original_group events + test "events" $original_group + for d in "." "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do + test "$d" $other_group + done + + chgrp $original_group events/sched + test "events/sched" $original_group + for d in "." "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do + test "$d" $other_group + done + + chgrp $original_group events/sched/sched_switch + test "events/sched/sched_switch" $original_group + for d in "." "events/sched/sched_switch/enable" $canary; do + test "$d" $other_group + done + + chgrp $original_group events/sched/sched_switch/enable + test "events/sched/sched_switch/enable" $original_group + for d in "." $canary; do + test "$d" $other_group + done +} + +mount -o remount,"$new_options" . + +run_tests + +mount -o remount,"$mount_options" . + +for d in "." "events" "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do + test "$d" $original_group +done + +# check instances as well + +chgrp $other_group instances + +instance="$(mktemp -u test-XXXXXX)" + +mkdir instances/$instance + +cd instances/$instance + +run_tests + +cd ../.. + +rmdir instances/$instance + +chgrp $original_group instances + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc new file mode 100644 index 0000000000..9aa0db2b84 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc @@ -0,0 +1,82 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Basic tests on writing to trace_marker +# requires: trace_marker +# flags: instance + +get_buffer_data_size() { + sed -ne 's/^.*data.*size:\([0-9][0-9]*\).*/\1/p' events/header_page +} + +get_buffer_data_offset() { + sed -ne 's/^.*data.*offset:\([0-9][0-9]*\).*/\1/p' events/header_page +} + +get_event_header_size() { + type_len=`sed -ne 's/^.*type_len.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event` + time_len=`sed -ne 's/^.*time_delta.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event` + array_len=`sed -ne 's/^.*array.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event` + total_bits=$((type_len+time_len+array_len)) + total_bits=$((total_bits+7)) + echo $((total_bits/8)) +} + +get_print_event_buf_offset() { + sed -ne 's/^.*buf.*offset:\([0-9][0-9]*\).*/\1/p' events/ftrace/print/format +} + +event_header_size=`get_event_header_size` +print_header_size=`get_print_event_buf_offset` + +data_offset=`get_buffer_data_offset` + +marker_meta=$((event_header_size+print_header_size)) + +make_str() { + cnt=$1 + # subtract two for \n\0 as marker adds these + cnt=$((cnt-2)) + printf -- 'X%.0s' $(seq $cnt) +} + +write_buffer() { + size=$1 + + str=`make_str $size` + + # clear the buffer + echo > trace + + # write the string into the marker + echo -n $str > trace_marker + + echo $str +} + +test_buffer() { + + size=`get_buffer_data_size` + oneline_size=$((size-marker_meta)) + echo size = $size + echo meta size = $marker_meta + + # Now add a little more the meta data overhead will overflow + + str=`write_buffer $size` + + # Make sure the line was broken + new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; exit}' trace` + + if [ "$new_str" = "$str" ]; then + exit fail; + fi + + # Make sure the entire line can be found + new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; }' trace` + + if [ "$new_str" != "$str" ]; then + exit fail; + fi +} + +test_buffer diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc index b1ede62498..b7c8f29c09 100644 --- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc @@ -18,7 +18,7 @@ echo 'sched:*' > set_event yield -count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi @@ -29,7 +29,7 @@ echo 1 > events/sched/enable yield -count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi @@ -40,7 +40,7 @@ echo 0 > events/sched/enable yield -count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -ne 0 ]; then fail "any of scheduler events should not be recorded" fi diff --git a/tools/testing/selftests/hid/config b/tools/testing/selftests/hid/config index 4f425178b5..1758b055f2 100644 --- a/tools/testing/selftests/hid/config +++ b/tools/testing/selftests/hid/config @@ -1,5 +1,4 @@ CONFIG_BPF_EVENTS=y -CONFIG_BPFILTER=y CONFIG_BPF_JIT_ALWAYS_ON=y CONFIG_BPF_JIT=y CONFIG_BPF_KPROBE_OVERRIDE=y diff --git a/tools/testing/selftests/hid/tests/base.py b/tools/testing/selftests/hid/tests/base.py index 1305cfc964..51433063b2 100644 --- a/tools/testing/selftests/hid/tests/base.py +++ b/tools/testing/selftests/hid/tests/base.py @@ -14,7 +14,7 @@ import logging from hidtools.device.base_device import BaseDevice, EvdevMatch, SysfsFile from pathlib import Path -from typing import Final +from typing import Final, List, Tuple logger = logging.getLogger("hidtools.test.base") @@ -155,7 +155,7 @@ class BaseTestCase: # if any module is not available (not compiled), the test will skip. # Each element is a tuple '(kernel driver name, kernel module)', # for example ("playstation", "hid-playstation") - kernel_modules = [] + kernel_modules: List[Tuple[str, str]] = [] def assertInputEventsIn(self, expected_events, effective_events): effective_events = effective_events.copy() @@ -238,8 +238,7 @@ class BaseTestCase: try: with HIDTestUdevRule.instance(): with new_uhdev as self.uhdev: - skip_cond = request.node.get_closest_marker("skip_if_uhdev") - if skip_cond: + for skip_cond in request.node.iter_markers("skip_if_uhdev"): test, message, *rest = skip_cond.args if test(self.uhdev): diff --git a/tools/testing/selftests/hid/tests/test_mouse.py b/tools/testing/selftests/hid/tests/test_mouse.py index fd2ba62e78..66daf7e597 100644 --- a/tools/testing/selftests/hid/tests/test_mouse.py +++ b/tools/testing/selftests/hid/tests/test_mouse.py @@ -52,13 +52,13 @@ class BaseMouse(base.UHIDTestDevice): :param reportID: the numeric report ID for this report, if needed """ if buttons is not None: - l, r, m = buttons - if l is not None: - self.left = l - if r is not None: - self.right = r - if m is not None: - self.middle = m + left, right, middle = buttons + if left is not None: + self.left = left + if right is not None: + self.right = right + if middle is not None: + self.middle = middle left = self.left right = self.right middle = self.middle diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py index 303ffff9ee..903f19f7cb 100644 --- a/tools/testing/selftests/hid/tests/test_tablet.py +++ b/tools/testing/selftests/hid/tests/test_tablet.py @@ -13,62 +13,133 @@ from hidtools.util import BusType import libevdev import logging import pytest -from typing import Dict, Tuple +from typing import Dict, List, Optional, Tuple logger = logging.getLogger("hidtools.test.tablet") +class BtnTouch(Enum): + """Represents whether the BTN_TOUCH event is set to True or False""" + + DOWN = True + UP = False + + +class ToolType(Enum): + PEN = libevdev.EV_KEY.BTN_TOOL_PEN + RUBBER = libevdev.EV_KEY.BTN_TOOL_RUBBER + + +class BtnPressed(Enum): + """Represents whether a button is pressed on the stylus""" + + PRIMARY_PRESSED = libevdev.EV_KEY.BTN_STYLUS + SECONDARY_PRESSED = libevdev.EV_KEY.BTN_STYLUS2 + + class PenState(Enum): """Pen states according to Microsoft reference: https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states - """ - PEN_IS_OUT_OF_RANGE = (False, None) - PEN_IS_IN_RANGE = (False, libevdev.EV_KEY.BTN_TOOL_PEN) - PEN_IS_IN_CONTACT = (True, libevdev.EV_KEY.BTN_TOOL_PEN) - PEN_IS_IN_RANGE_WITH_ERASING_INTENT = (False, libevdev.EV_KEY.BTN_TOOL_RUBBER) - PEN_IS_ERASING = (True, libevdev.EV_KEY.BTN_TOOL_RUBBER) + We extend it with the various buttons when we need to check them. + """ - def __init__(self, touch, tool): - self.touch = touch - self.tool = tool + PEN_IS_OUT_OF_RANGE = BtnTouch.UP, None, None + PEN_IS_IN_RANGE = BtnTouch.UP, ToolType.PEN, None + PEN_IS_IN_RANGE_WITH_BUTTON = BtnTouch.UP, ToolType.PEN, BtnPressed.PRIMARY_PRESSED + PEN_IS_IN_RANGE_WITH_SECOND_BUTTON = ( + BtnTouch.UP, + ToolType.PEN, + BtnPressed.SECONDARY_PRESSED, + ) + PEN_IS_IN_CONTACT = BtnTouch.DOWN, ToolType.PEN, None + PEN_IS_IN_CONTACT_WITH_BUTTON = ( + BtnTouch.DOWN, + ToolType.PEN, + BtnPressed.PRIMARY_PRESSED, + ) + PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON = ( + BtnTouch.DOWN, + ToolType.PEN, + BtnPressed.SECONDARY_PRESSED, + ) + PEN_IS_IN_RANGE_WITH_ERASING_INTENT = BtnTouch.UP, ToolType.RUBBER, None + PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON = ( + BtnTouch.UP, + ToolType.RUBBER, + BtnPressed.PRIMARY_PRESSED, + ) + PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_SECOND_BUTTON = ( + BtnTouch.UP, + ToolType.RUBBER, + BtnPressed.SECONDARY_PRESSED, + ) + PEN_IS_ERASING = BtnTouch.DOWN, ToolType.RUBBER, None + PEN_IS_ERASING_WITH_BUTTON = ( + BtnTouch.DOWN, + ToolType.RUBBER, + BtnPressed.PRIMARY_PRESSED, + ) + PEN_IS_ERASING_WITH_SECOND_BUTTON = ( + BtnTouch.DOWN, + ToolType.RUBBER, + BtnPressed.SECONDARY_PRESSED, + ) + + def __init__(self, touch: BtnTouch, tool: Optional[ToolType], button: Optional[BtnPressed]): + self.touch = touch # type: ignore + self.tool = tool # type: ignore + self.button = button # type: ignore @classmethod def from_evdev(cls, evdev) -> "PenState": - touch = bool(evdev.value[libevdev.EV_KEY.BTN_TOUCH]) + touch = BtnTouch(evdev.value[libevdev.EV_KEY.BTN_TOUCH]) tool = None + button = None if ( evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER] and not evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN] ): - tool = libevdev.EV_KEY.BTN_TOOL_RUBBER + tool = ToolType(libevdev.EV_KEY.BTN_TOOL_RUBBER) elif ( evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN] and not evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER] ): - tool = libevdev.EV_KEY.BTN_TOOL_PEN + tool = ToolType(libevdev.EV_KEY.BTN_TOOL_PEN) elif ( evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN] or evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER] ): raise ValueError("2 tools are not allowed") - return cls((touch, tool)) + # we take only the highest button in account + for b in [libevdev.EV_KEY.BTN_STYLUS, libevdev.EV_KEY.BTN_STYLUS2]: + if bool(evdev.value[b]): + button = BtnPressed(b) - def apply(self, events) -> "PenState": + # the kernel tends to insert an EV_SYN once removing the tool, so + # the button will be released after + if tool is None: + button = None + + return cls((touch, tool, button)) # type: ignore + + def apply(self, events: List[libevdev.InputEvent], strict: bool) -> "PenState": if libevdev.EV_SYN.SYN_REPORT in events: raise ValueError("EV_SYN is in the event sequence") touch = self.touch touch_found = False tool = self.tool tool_found = False + button = self.button + button_found = False for ev in events: if ev == libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH): if touch_found: raise ValueError(f"duplicated BTN_TOUCH in {events}") touch_found = True - touch = bool(ev.value) + touch = BtnTouch(ev.value) elif ev in ( libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN), libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_RUBBER), @@ -76,35 +147,55 @@ class PenState(Enum): if tool_found: raise ValueError(f"duplicated BTN_TOOL_* in {events}") tool_found = True - if ev.value: - tool = ev.code - else: - tool = None - - new_state = PenState((touch, tool)) - assert ( - new_state in self.valid_transitions() - ), f"moving from {self} to {new_state} is forbidden" + tool = ToolType(ev.code) if ev.value else None + elif ev in ( + libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS), + libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS2), + ): + if button_found: + raise ValueError(f"duplicated BTN_STYLUS* in {events}") + button_found = True + button = BtnPressed(ev.code) if ev.value else None + + # the kernel tends to insert an EV_SYN once removing the tool, so + # the button will be released after + if tool is None: + button = None + + new_state = PenState((touch, tool, button)) # type: ignore + if strict: + assert ( + new_state in self.valid_transitions() + ), f"moving from {self} to {new_state} is forbidden" + else: + assert ( + new_state in self.historically_tolerated_transitions() + ), f"moving from {self} to {new_state} is forbidden" return new_state def valid_transitions(self) -> Tuple["PenState", ...]: - """Following the state machine in the URL above, with a couple of addition - for skipping the in-range state, due to historical reasons. + """Following the state machine in the URL above. Note that those transitions are from the evdev point of view, not HID""" if self == PenState.PEN_IS_OUT_OF_RANGE: return ( PenState.PEN_IS_OUT_OF_RANGE, PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT, PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, PenState.PEN_IS_ERASING, ) if self == PenState.PEN_IS_IN_RANGE: return ( PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, PenState.PEN_IS_OUT_OF_RANGE, PenState.PEN_IS_IN_CONTACT, ) @@ -112,8 +203,9 @@ class PenState(Enum): if self == PenState.PEN_IS_IN_CONTACT: return ( PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, PenState.PEN_IS_IN_RANGE, - PenState.PEN_IS_OUT_OF_RANGE, ) if self == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT: @@ -127,113 +219,126 @@ class PenState(Enum): return ( PenState.PEN_IS_ERASING, PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT, + ) + + if self == PenState.PEN_IS_IN_RANGE_WITH_BUTTON: + return ( + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE, PenState.PEN_IS_OUT_OF_RANGE, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, ) - return tuple() + if self == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON: + return ( + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + ) + if self == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON: + return ( + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_OUT_OF_RANGE, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + ) -class Data(object): - pass + if self == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON: + return ( + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + ) + return tuple() -class Pen(object): - def __init__(self, x, y): - self.x = x - self.y = y - self.tipswitch = False - self.tippressure = 15 - self.azimuth = 0 - self.inrange = False - self.width = 10 - self.height = 10 - self.barrelswitch = False - self.invert = False - self.eraser = False - self.x_tilt = 0 - self.y_tilt = 0 - self.twist = 0 - self._old_values = None - self.current_state = None + def historically_tolerated_transitions(self) -> Tuple["PenState", ...]: + """Following the state machine in the URL above, with a couple of addition + for skipping the in-range state, due to historical reasons. - def _restore(self): - if self._old_values is not None: - for i in [ - "x", - "y", - "tippressure", - "azimuth", - "width", - "height", - "twist", - "x_tilt", - "y_tilt", - ]: - setattr(self, i, getattr(self._old_values, i)) + Note that those transitions are from the evdev point of view, not HID""" + if self == PenState.PEN_IS_OUT_OF_RANGE: + return ( + PenState.PEN_IS_OUT_OF_RANGE, + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_ERASING, + ) - def move_to(self, state): - # fill in the previous values - if self.current_state == PenState.PEN_IS_OUT_OF_RANGE: - self._restore() + if self == PenState.PEN_IS_IN_RANGE: + return ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_OUT_OF_RANGE, + PenState.PEN_IS_IN_CONTACT, + ) - print(f"\n *** pen is moving to {state} ***") + if self == PenState.PEN_IS_IN_CONTACT: + return ( + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_OUT_OF_RANGE, + ) - if state == PenState.PEN_IS_OUT_OF_RANGE: - self._old_values = copy.copy(self) - self.x = 0 - self.y = 0 - self.tipswitch = False - self.tippressure = 0 - self.azimuth = 0 - self.inrange = False - self.width = 0 - self.height = 0 - self.invert = False - self.eraser = False - self.x_tilt = 0 - self.y_tilt = 0 - self.twist = 0 - elif state == PenState.PEN_IS_IN_RANGE: - self.tipswitch = False - self.inrange = True - self.invert = False - self.eraser = False - elif state == PenState.PEN_IS_IN_CONTACT: - self.tipswitch = True - self.inrange = True - self.invert = False - self.eraser = False - elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT: - self.tipswitch = False - self.inrange = True - self.invert = True - self.eraser = False - elif state == PenState.PEN_IS_ERASING: - self.tipswitch = False - self.inrange = True - self.invert = True - self.eraser = True + if self == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT: + return ( + PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT, + PenState.PEN_IS_OUT_OF_RANGE, + PenState.PEN_IS_ERASING, + ) - self.current_state = state + if self == PenState.PEN_IS_ERASING: + return ( + PenState.PEN_IS_ERASING, + PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT, + PenState.PEN_IS_OUT_OF_RANGE, + ) - def __assert_axis(self, evdev, axis, value): - if ( - axis == libevdev.EV_KEY.BTN_TOOL_RUBBER - and evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER] is None - ): - return + if self == PenState.PEN_IS_IN_RANGE_WITH_BUTTON: + return ( + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_OUT_OF_RANGE, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + ) - assert ( - evdev.value[axis] == value - ), f"assert evdev.value[{axis}] ({evdev.value[axis]}) != {value}" + if self == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON: + return ( + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_OUT_OF_RANGE, + ) - def assert_expected_input_events(self, evdev): - assert evdev.value[libevdev.EV_ABS.ABS_X] == self.x - assert evdev.value[libevdev.EV_ABS.ABS_Y] == self.y - assert self.current_state == PenState.from_evdev(evdev) + if self == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON: + return ( + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_OUT_OF_RANGE, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + ) + + if self == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON: + return ( + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_OUT_OF_RANGE, + ) + + return tuple() @staticmethod - def legal_transitions() -> Dict[str, Tuple[PenState, ...]]: + def legal_transitions() -> Dict[str, Tuple["PenState", ...]]: """This is the first half of the Windows Pen Implementation state machine: we don't have Invert nor Erase bits, so just move in/out-of-range or proximity. https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states @@ -259,7 +364,7 @@ class Pen(object): } @staticmethod - def legal_transitions_with_invert() -> Dict[str, Tuple[PenState, ...]]: + def legal_transitions_with_invert() -> Dict[str, Tuple["PenState", ...]]: """This is the second half of the Windows Pen Implementation state machine: we now have Invert and Erase bits, so move in/out or proximity with the intend to erase. @@ -297,7 +402,106 @@ class Pen(object): } @staticmethod - def tolerated_transitions() -> Dict[str, Tuple[PenState, ...]]: + def legal_transitions_with_primary_button() -> Dict[str, Tuple["PenState", ...]]: + """We revisit the Windows Pen Implementation state machine: + we now have a primary button. + """ + return { + "hover-button": (PenState.PEN_IS_IN_RANGE_WITH_BUTTON,), + "hover-button -> out-of-range": ( + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_OUT_OF_RANGE, + ), + "in-range -> button-press": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + ), + "in-range -> button-press -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE, + ), + "in-range -> touch -> button-press -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT, + ), + "in-range -> touch -> button-press -> release -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE, + ), + "in-range -> button-press -> touch -> release -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_RANGE, + ), + "in-range -> button-press -> touch -> button-release -> release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_BUTTON, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_RANGE, + ), + } + + @staticmethod + def legal_transitions_with_secondary_button() -> Dict[str, Tuple["PenState", ...]]: + """We revisit the Windows Pen Implementation state machine: + we now have a secondary button. + Note: we don't looks for 2 buttons interactions. + """ + return { + "hover-button": (PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,), + "hover-button -> out-of-range": ( + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_OUT_OF_RANGE, + ), + "in-range -> button-press": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + ), + "in-range -> button-press -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE, + ), + "in-range -> touch -> button-press -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_CONTACT, + ), + "in-range -> touch -> button-press -> release -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE, + ), + "in-range -> button-press -> touch -> release -> button-release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_RANGE, + ), + "in-range -> button-press -> touch -> button-release -> release": ( + PenState.PEN_IS_IN_RANGE, + PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON, + PenState.PEN_IS_IN_CONTACT, + PenState.PEN_IS_IN_RANGE, + ), + } + + @staticmethod + def tolerated_transitions() -> Dict[str, Tuple["PenState", ...]]: """This is not adhering to the Windows Pen Implementation state machine but we should expect the kernel to behave properly, mostly for historical reasons.""" @@ -310,7 +514,7 @@ class Pen(object): } @staticmethod - def tolerated_transitions_with_invert() -> Dict[str, Tuple[PenState, ...]]: + def tolerated_transitions_with_invert() -> Dict[str, Tuple["PenState", ...]]: """This is the second half of the Windows Pen Implementation state machine: we now have Invert and Erase bits, so move in/out or proximity with the intend to erase. @@ -325,7 +529,7 @@ class Pen(object): } @staticmethod - def broken_transitions() -> Dict[str, Tuple[PenState, ...]]: + def broken_transitions() -> Dict[str, Tuple["PenState", ...]]: """Those tests are definitely not part of the Windows specification. However, a half broken device might export those transitions. For example, a pen that has the eraser button might wobble between @@ -363,6 +567,61 @@ class Pen(object): } +class Pen(object): + def __init__(self, x, y): + self.x = x + self.y = y + self.tipswitch = False + self.tippressure = 15 + self.azimuth = 0 + self.inrange = False + self.width = 10 + self.height = 10 + self.barrelswitch = False + self.secondarybarrelswitch = False + self.invert = False + self.eraser = False + self.xtilt = 1 + self.ytilt = 1 + self.twist = 1 + self._old_values = None + self.current_state = None + + def restore(self): + if self._old_values is not None: + for i in [ + "x", + "y", + "tippressure", + "azimuth", + "width", + "height", + "twist", + "xtilt", + "ytilt", + ]: + setattr(self, i, getattr(self._old_values, i)) + + def backup(self): + self._old_values = copy.copy(self) + + def __assert_axis(self, evdev, axis, value): + if ( + axis == libevdev.EV_KEY.BTN_TOOL_RUBBER + and evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER] is None + ): + return + + assert ( + evdev.value[axis] == value + ), f"assert evdev.value[{axis}] ({evdev.value[axis]}) != {value}" + + def assert_expected_input_events(self, evdev): + assert evdev.value[libevdev.EV_ABS.ABS_X] == self.x + assert evdev.value[libevdev.EV_ABS.ABS_Y] == self.y + assert self.current_state == PenState.from_evdev(evdev) + + class PenDigitizer(base.UHIDTestDevice): def __init__( self, @@ -388,6 +647,89 @@ class PenDigitizer(base.UHIDTestDevice): continue self.fields = [f.usage_name for f in r] + def move_to(self, pen, state): + # fill in the previous values + if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE: + pen.restore() + + print(f"\n *** pen is moving to {state} ***") + + if state == PenState.PEN_IS_OUT_OF_RANGE: + pen.backup() + pen.x = 0 + pen.y = 0 + pen.tipswitch = False + pen.tippressure = 0 + pen.azimuth = 0 + pen.inrange = False + pen.width = 0 + pen.height = 0 + pen.invert = False + pen.eraser = False + pen.xtilt = 0 + pen.ytilt = 0 + pen.twist = 0 + pen.barrelswitch = False + pen.secondarybarrelswitch = False + elif state == PenState.PEN_IS_IN_RANGE: + pen.tipswitch = False + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = False + pen.secondarybarrelswitch = False + elif state == PenState.PEN_IS_IN_CONTACT: + pen.tipswitch = True + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = False + pen.secondarybarrelswitch = False + elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON: + pen.tipswitch = False + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = True + pen.secondarybarrelswitch = False + elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON: + pen.tipswitch = True + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = True + pen.secondarybarrelswitch = False + elif state == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON: + pen.tipswitch = False + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = False + pen.secondarybarrelswitch = True + elif state == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON: + pen.tipswitch = True + pen.inrange = True + pen.invert = False + pen.eraser = False + pen.barrelswitch = False + pen.secondarybarrelswitch = True + elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT: + pen.tipswitch = False + pen.inrange = True + pen.invert = True + pen.eraser = False + pen.barrelswitch = False + pen.secondarybarrelswitch = False + elif state == PenState.PEN_IS_ERASING: + pen.tipswitch = False + pen.inrange = True + pen.invert = False + pen.eraser = True + pen.barrelswitch = False + pen.secondarybarrelswitch = False + + pen.current_state = state + def event(self, pen): rs = [] r = self.create_report(application=self.cur_application, data=pen) @@ -435,10 +777,14 @@ class BaseTest: self.debug_reports(r, uhdev, events) return events - def validate_transitions(self, from_state, pen, evdev, events): + def validate_transitions( + self, from_state, pen, evdev, events, allow_intermediate_states + ): # check that the final state is correct pen.assert_expected_input_events(evdev) + state = from_state + # check that the transitions are valid sync_events = [] while libevdev.InputEvent(libevdev.EV_SYN.SYN_REPORT) in events: @@ -448,12 +794,12 @@ class BaseTest: events = events[idx + 1 :] # now check for a valid transition - from_state = from_state.apply(sync_events) + state = state.apply(sync_events, not allow_intermediate_states) if events: - from_state = from_state.apply(sync_events) + state = state.apply(sync_events, not allow_intermediate_states) - def _test_states(self, state_list, scribble): + def _test_states(self, state_list, scribble, allow_intermediate_states): """Internal method to test against a list of transition between states. state_list is a list of PenState objects @@ -466,9 +812,11 @@ class BaseTest: cur_state = PenState.PEN_IS_OUT_OF_RANGE p = Pen(50, 60) - p.move_to(PenState.PEN_IS_OUT_OF_RANGE) + uhdev.move_to(p, PenState.PEN_IS_OUT_OF_RANGE) events = self.post(uhdev, p) - self.validate_transitions(cur_state, p, evdev, events) + self.validate_transitions( + cur_state, p, evdev, events, allow_intermediate_states + ) cur_state = p.current_state @@ -477,38 +825,77 @@ class BaseTest: p.x += 1 p.y -= 1 events = self.post(uhdev, p) - self.validate_transitions(cur_state, p, evdev, events) + self.validate_transitions( + cur_state, p, evdev, events, allow_intermediate_states + ) assert len(events) >= 3 # X, Y, SYN - p.move_to(state) + uhdev.move_to(p, state) if scribble and state != PenState.PEN_IS_OUT_OF_RANGE: p.x += 1 p.y -= 1 events = self.post(uhdev, p) - self.validate_transitions(cur_state, p, evdev, events) + self.validate_transitions( + cur_state, p, evdev, events, allow_intermediate_states + ) cur_state = p.current_state @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"]) @pytest.mark.parametrize( "state_list", - [pytest.param(v, id=k) for k, v in Pen.legal_transitions().items()], + [pytest.param(v, id=k) for k, v in PenState.legal_transitions().items()], ) def test_valid_pen_states(self, state_list, scribble): """This is the first half of the Windows Pen Implementation state machine: we don't have Invert nor Erase bits, so just move in/out-of-range or proximity. https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states """ - self._test_states(state_list, scribble) + self._test_states(state_list, scribble, allow_intermediate_states=False) @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"]) @pytest.mark.parametrize( "state_list", - [pytest.param(v, id=k) for k, v in Pen.tolerated_transitions().items()], + [ + pytest.param(v, id=k) + for k, v in PenState.tolerated_transitions().items() + ], ) def test_tolerated_pen_states(self, state_list, scribble): """This is not adhering to the Windows Pen Implementation state machine but we should expect the kernel to behave properly, mostly for historical reasons.""" - self._test_states(state_list, scribble) + self._test_states(state_list, scribble, allow_intermediate_states=True) + + @pytest.mark.skip_if_uhdev( + lambda uhdev: "Barrel Switch" not in uhdev.fields, + "Device not compatible, missing Barrel Switch usage", + ) + @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"]) + @pytest.mark.parametrize( + "state_list", + [ + pytest.param(v, id=k) + for k, v in PenState.legal_transitions_with_primary_button().items() + ], + ) + def test_valid_primary_button_pen_states(self, state_list, scribble): + """Rework the transition state machine by adding the primary button.""" + self._test_states(state_list, scribble, allow_intermediate_states=False) + + @pytest.mark.skip_if_uhdev( + lambda uhdev: "Secondary Barrel Switch" not in uhdev.fields, + "Device not compatible, missing Secondary Barrel Switch usage", + ) + @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"]) + @pytest.mark.parametrize( + "state_list", + [ + pytest.param(v, id=k) + for k, v in PenState.legal_transitions_with_secondary_button().items() + ], + ) + def test_valid_secondary_button_pen_states(self, state_list, scribble): + """Rework the transition state machine by adding the secondary button.""" + self._test_states(state_list, scribble, allow_intermediate_states=False) @pytest.mark.skip_if_uhdev( lambda uhdev: "Invert" not in uhdev.fields, @@ -519,7 +906,7 @@ class BaseTest: "state_list", [ pytest.param(v, id=k) - for k, v in Pen.legal_transitions_with_invert().items() + for k, v in PenState.legal_transitions_with_invert().items() ], ) def test_valid_invert_pen_states(self, state_list, scribble): @@ -528,7 +915,7 @@ class BaseTest: to erase. https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states """ - self._test_states(state_list, scribble) + self._test_states(state_list, scribble, allow_intermediate_states=False) @pytest.mark.skip_if_uhdev( lambda uhdev: "Invert" not in uhdev.fields, @@ -539,7 +926,7 @@ class BaseTest: "state_list", [ pytest.param(v, id=k) - for k, v in Pen.tolerated_transitions_with_invert().items() + for k, v in PenState.tolerated_transitions_with_invert().items() ], ) def test_tolerated_invert_pen_states(self, state_list, scribble): @@ -548,7 +935,7 @@ class BaseTest: to erase. https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states """ - self._test_states(state_list, scribble) + self._test_states(state_list, scribble, allow_intermediate_states=True) @pytest.mark.skip_if_uhdev( lambda uhdev: "Invert" not in uhdev.fields, @@ -557,7 +944,7 @@ class BaseTest: @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"]) @pytest.mark.parametrize( "state_list", - [pytest.param(v, id=k) for k, v in Pen.broken_transitions().items()], + [pytest.param(v, id=k) for k, v in PenState.broken_transitions().items()], ) def test_tolerated_broken_pen_states(self, state_list, scribble): """Those tests are definitely not part of the Windows specification. @@ -565,102 +952,7 @@ class BaseTest: For example, a pen that has the eraser button might wobble between touching and erasing if the tablet doesn't enforce the Windows state machine.""" - self._test_states(state_list, scribble) - - @pytest.mark.skip_if_uhdev( - lambda uhdev: "Barrel Switch" not in uhdev.fields, - "Device not compatible, missing Barrel Switch usage", - ) - def test_primary_button(self): - """Primary button (stylus) pressed, reports as pressed even while hovering. - Actual reporting from the device: hid=TIPSWITCH,BARRELSWITCH,INRANGE (code=TOUCH,STYLUS,PEN): - { 0, 0, 1 } <- hover - { 0, 1, 1 } <- primary button pressed - { 0, 1, 1 } <- liftoff - { 0, 0, 0 } <- leaves - """ - - uhdev = self.uhdev - evdev = uhdev.get_evdev() - - p = Pen(50, 60) - p.inrange = True - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 1) in events - assert evdev.value[libevdev.EV_ABS.ABS_X] == 50 - assert evdev.value[libevdev.EV_ABS.ABS_Y] == 60 - assert not evdev.value[libevdev.EV_KEY.BTN_STYLUS] - - p.barrelswitch = True - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS, 1) in events - - p.x += 1 - p.y -= 1 - events = self.post(uhdev, p) - assert len(events) == 3 # X, Y, SYN - assert libevdev.InputEvent(libevdev.EV_ABS.ABS_X, 51) in events - assert libevdev.InputEvent(libevdev.EV_ABS.ABS_Y, 59) in events - - p.barrelswitch = False - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS, 0) in events - - p.inrange = False - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 0) in events - - @pytest.mark.skip_if_uhdev( - lambda uhdev: "Barrel Switch" not in uhdev.fields, - "Device not compatible, missing Barrel Switch usage", - ) - def test_contact_primary_button(self): - """Primary button (stylus) pressed, reports as pressed even while hovering. - Actual reporting from the device: hid=TIPSWITCH,BARRELSWITCH,INRANGE (code=TOUCH,STYLUS,PEN): - { 0, 0, 1 } <- hover - { 0, 1, 1 } <- primary button pressed - { 1, 1, 1 } <- touch-down - { 1, 1, 1 } <- still touch, scribble on the screen - { 0, 1, 1 } <- liftoff - { 0, 0, 0 } <- leaves - """ - - uhdev = self.uhdev - evdev = uhdev.get_evdev() - - p = Pen(50, 60) - p.inrange = True - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 1) in events - assert evdev.value[libevdev.EV_ABS.ABS_X] == 50 - assert evdev.value[libevdev.EV_ABS.ABS_Y] == 60 - assert not evdev.value[libevdev.EV_KEY.BTN_STYLUS] - - p.barrelswitch = True - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS, 1) in events - - p.tipswitch = True - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events - assert evdev.value[libevdev.EV_KEY.BTN_STYLUS] - - p.x += 1 - p.y -= 1 - events = self.post(uhdev, p) - assert len(events) == 3 # X, Y, SYN - assert libevdev.InputEvent(libevdev.EV_ABS.ABS_X, 51) in events - assert libevdev.InputEvent(libevdev.EV_ABS.ABS_Y, 59) in events - - p.tipswitch = False - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events - - p.barrelswitch = False - p.inrange = False - events = self.post(uhdev, p) - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 0) in events - assert libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS, 0) in events + self._test_states(state_list, scribble, allow_intermediate_states=True) class GXTP_pen(PenDigitizer): diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py index f92fe8e02c..b62c7dba67 100644 --- a/tools/testing/selftests/hid/tests/test_wacom_generic.py +++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py @@ -27,6 +27,7 @@ from .descriptors_wacom import ( ) import attr +from collections import namedtuple from enum import Enum from hidtools.hut import HUT from hidtools.hid import HidUnit @@ -862,6 +863,8 @@ class TestPTHX60_Pen(TestOpaqueCTLTablet): class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest): + ContactIds = namedtuple("ContactIds", "contact_id, tracking_id, slot_num") + def create_device(self): return test_multitouch.Digitizer( "DTH 2452", @@ -869,6 +872,57 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest input_info=(0x3, 0x056A, 0x0383), ) + def make_contact(self, contact_id=0, t=0): + """ + Make a single touch contact that can move over time. + + Creates a touch object that has a well-known position in space that + does not overlap with other contacts. The value of `t` may be + incremented over time to move the point along a linear path. + """ + x = 50 + 10 * contact_id + t * 11 + y = 100 + 100 * contact_id + t * 11 + return test_multitouch.Touch(contact_id, x, y) + + def make_contacts(self, n, t=0): + """ + Make multiple touch contacts that can move over time. + + Returns a list of `n` touch objects that are positioned at well-known + locations. The value of `t` may be incremented over time to move the + points along a linear path. + """ + return [ self.make_contact(id, t) for id in range(0, n) ] + + def assert_contact(self, uhdev, evdev, contact_ids, t=0): + """ + Assert properties of a contact generated by make_contact. + """ + contact_id = contact_ids.contact_id + tracking_id = contact_ids.tracking_id + slot_num = contact_ids.slot_num + + x = 50 + 10 * contact_id + t * 11 + y = 100 + 100 * contact_id + t * 11 + + # If the data isn't supposed to be stored in any slots, there is + # nothing we can check for in the evdev stream. + if slot_num is None: + assert tracking_id == -1 + return + + assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == tracking_id + if tracking_id != -1: + assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_X] == x + assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_Y] == y + + def assert_contacts(self, uhdev, evdev, data, t=0): + """ + Assert properties of a list of contacts generated by make_contacts. + """ + for contact_ids in data: + self.assert_contact(uhdev, evdev, contact_ids, t) + def test_contact_id_0(self): """ Bring a finger in contact with the tablet, then hold it down and remove it. @@ -909,7 +963,7 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest Ensure that the confidence bit being set to false should not result in a touch event. """ uhdev = self.uhdev - evdev = uhdev.get_evdev() + _evdev = uhdev.get_evdev() t0 = test_multitouch.Touch(1, 50, 100) t0.confidence = False @@ -917,6 +971,228 @@ class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest events = uhdev.next_sync_events() self.debug_reports(r, uhdev, events) - slot = self.get_slot(uhdev, t0, 0) + _slot = self.get_slot(uhdev, t0, 0) + + assert not events + + def test_confidence_multitouch(self): + """ + Bring multiple fingers in contact with the tablet, some with the + confidence bit set, and some without. + + Ensure that all confident touches are reported and that all non- + confident touches are ignored. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + touches = self.make_contacts(5) + touches[0].confidence = False + touches[2].confidence = False + touches[4].confidence = False + + r = uhdev.event(touches) + events = uhdev.next_sync_events() + self.debug_reports(r, uhdev, events) + + assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events + + self.assert_contacts(uhdev, evdev, + [ self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), + self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), + self.ContactIds(contact_id = 2, tracking_id = -1, slot_num = None), + self.ContactIds(contact_id = 3, tracking_id = 1, slot_num = 1), + self.ContactIds(contact_id = 4, tracking_id = -1, slot_num = None) ]) + + def confidence_change_assert_playback(self, uhdev, evdev, timeline): + """ + Assert proper behavior of contacts that move and change tipswitch / + confidence status over time. + + Given a `timeline` list of touch states to iterate over, verify + that the contacts move and are reported as up/down as expected + by the state of the tipswitch and confidence bits. + """ + t = 0 + + for state in timeline: + touches = self.make_contacts(len(state), t) + + for item in zip(touches, state): + item[0].tipswitch = item[1][1] + item[0].confidence = item[1][2] + + r = uhdev.event(touches) + events = uhdev.next_sync_events() + self.debug_reports(r, uhdev, events) + + ids = [ x[0] for x in state ] + self.assert_contacts(uhdev, evdev, ids, t) + + t += 1 + + def test_confidence_loss_a(self): + """ + Transition a confident contact to a non-confident contact by + first clearing the tipswitch. + + Ensure that the driver reports the transitioned contact as + being removed and that other contacts continue to report + normally. This mode of confidence loss is used by the + DTH-2452. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + self.confidence_change_assert_playback(uhdev, evdev, [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # Both fingers confidently in contact + [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=1: Contact 0 == !Down + confident; Contact 1 == Down + confident + # First finger looses confidence and clears only the tipswitch flag + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, True), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] + ]) + + def test_confidence_loss_b(self): + """ + Transition a confident contact to a non-confident contact by + cleraing both tipswitch and confidence bits simultaneously. + + Ensure that the driver reports the transitioned contact as + being removed and that other contacts continue to report + normally. This mode of confidence loss is used by some + AES devices. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + self.confidence_change_assert_playback(uhdev, evdev, [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # Both fingers confidently in contact + [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=1: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger looses confidence and has both flags cleared simultaneously + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] + ]) + + def test_confidence_loss_c(self): + """ + Transition a confident contact to a non-confident contact by + clearing only the confidence bit. + + Ensure that the driver reports the transitioned contact as + being removed and that other contacts continue to report + normally. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + self.confidence_change_assert_playback(uhdev, evdev, [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # Both fingers confidently in contact + [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident + # First finger looses confidence and clears only the confidence flag + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), True, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident + # First finger has lost confidence and has both flags cleared + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] + ]) + + def test_confidence_gain_a(self): + """ + Transition a contact that was always non-confident to confident. + + Ensure that the confident contact is reported normally. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + self.confidence_change_assert_playback(uhdev, evdev, [ + # t=0: Contact 0 == Down + !confident; Contact 1 == Down + confident + # Only second finger is confidently in contact + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), True, False), + (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)], + + # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident + # First finger gains confidence + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), True, False), + (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)], + + # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident + # First finger remains confident + [(self.ContactIds(contact_id = 0, tracking_id = 1, slot_num = 1), True, True), + (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)], + + # t=3: Contact 0 == Down + confident; Contact 1 == Down + confident + # First finger remains confident + [(self.ContactIds(contact_id = 0, tracking_id = 1, slot_num = 1), True, True), + (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)] + ]) + + def test_confidence_gain_b(self): + """ + Transition a contact from non-confident to confident. + + Ensure that the confident contact is reported normally. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() - assert not events
\ No newline at end of file + self.confidence_change_assert_playback(uhdev, evdev, [ + # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident + # First and second finger confidently in contact + [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident + # Firtst finger looses confidence + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), True, False), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident + # First finger gains confidence + [(self.ContactIds(contact_id = 0, tracking_id = 2, slot_num = 0), True, True), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)], + + # t=3: Contact 0 == !Down + confident; Contact 1 == Down + confident + # First finger goes up + [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, True), + (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)] + ]) diff --git a/tools/testing/selftests/hid/vmtest.sh b/tools/testing/selftests/hid/vmtest.sh index 4da48bf6b3..db534e9099 100755 --- a/tools/testing/selftests/hid/vmtest.sh +++ b/tools/testing/selftests/hid/vmtest.sh @@ -19,12 +19,12 @@ esac SCRIPT_DIR="$(dirname $(realpath $0))" OUTPUT_DIR="$SCRIPT_DIR/results" KCONFIG_REL_PATHS=("${SCRIPT_DIR}/config" "${SCRIPT_DIR}/config.common" "${SCRIPT_DIR}/config.${ARCH}") -B2C_URL="https://gitlab.freedesktop.org/mupuf/boot2container/-/raw/master/vm2c.py" +B2C_URL="https://gitlab.freedesktop.org/gfx-ci/boot2container/-/raw/main/vm2c.py" NUM_COMPILE_JOBS="$(nproc)" LOG_FILE_BASE="$(date +"hid_selftests.%Y-%m-%d_%H-%M-%S")" LOG_FILE="${LOG_FILE_BASE}.log" EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status" -CONTAINER_IMAGE="registry.freedesktop.org/libevdev/hid-tools/fedora/37:2023-02-17.1" +CONTAINER_IMAGE="registry.freedesktop.org/bentiss/hid/fedora/39:2023-11-22.1" TARGETS="${TARGETS:=$(basename ${SCRIPT_DIR})}" DEFAULT_COMMAND="pip3 install hid-tools; make -C tools/testing/selftests TARGETS=${TARGETS} run_tests" @@ -32,7 +32,7 @@ DEFAULT_COMMAND="pip3 install hid-tools; make -C tools/testing/selftests TARGETS usage() { cat <<EOF -Usage: $0 [-i] [-s] [-d <output_dir>] -- [<command>] +Usage: $0 [-j N] [-s] [-b] [-d <output_dir>] -- [<command>] <command> is the command you would normally run when you are in the source kernel direcory. e.g: @@ -55,6 +55,7 @@ Options: -u) Update the boot2container script to a newer version. -d) Update the output directory (default: ${OUTPUT_DIR}) + -b) Run only the build steps for the kernel and the selftests -j) Number of jobs for compilation, similar to -j in make (default: ${NUM_COMPILE_JOBS}) -s) Instead of powering off the VM, start an interactive @@ -191,8 +192,9 @@ main() local command="${DEFAULT_COMMAND}" local update_b2c="no" local debug_shell="no" + local build_only="no" - while getopts ':hsud:j:' opt; do + while getopts ':hsud:j:b' opt; do case ${opt} in u) update_b2c="yes" @@ -207,6 +209,9 @@ main() command="/bin/sh" debug_shell="yes" ;; + b) + build_only="yes" + ;; h) usage exit 0 @@ -226,8 +231,7 @@ main() shift $((OPTIND -1)) # trap 'catch "$?"' EXIT - - if [[ "${debug_shell}" == "no" ]]; then + if [[ "${build_only}" == "no" && "${debug_shell}" == "no" ]]; then if [[ $# -eq 0 ]]; then echo "No command specified, will run ${DEFAULT_COMMAND} in the vm" else @@ -267,24 +271,26 @@ main() update_kconfig "${kernel_checkout}" "${kconfig_file}" recompile_kernel "${kernel_checkout}" "${make_command}" + update_selftests "${kernel_checkout}" "${make_command}" - if [[ "${update_b2c}" == "no" && ! -f "${b2c}" ]]; then - echo "vm2c script not found in ${b2c}" - update_b2c="yes" - fi + if [[ "${build_only}" == "no" ]]; then + if [[ "${update_b2c}" == "no" && ! -f "${b2c}" ]]; then + echo "vm2c script not found in ${b2c}" + update_b2c="yes" + fi - if [[ "${update_b2c}" == "yes" ]]; then - download $B2C_URL $b2c - chmod +x $b2c - fi + if [[ "${update_b2c}" == "yes" ]]; then + download $B2C_URL $b2c + chmod +x $b2c + fi - update_selftests "${kernel_checkout}" "${make_command}" - run_vm "${kernel_checkout}" $b2c "${kernel_bzimage}" "${command}" - if [[ "${debug_shell}" != "yes" ]]; then - echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}" - fi + run_vm "${kernel_checkout}" $b2c "${kernel_bzimage}" "${command}" + if [[ "${debug_shell}" != "yes" ]]; then + echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}" + fi - exit $(cat ${OUTPUT_DIR}/${EXIT_STATUS_FILE}) + exit $(cat ${OUTPUT_DIR}/${EXIT_STATUS_FILE}) + fi } main "$@" diff --git a/tools/testing/selftests/iommu/config b/tools/testing/selftests/iommu/config index 110d739176..02a2a1b267 100644 --- a/tools/testing/selftests/iommu/config +++ b/tools/testing/selftests/iommu/config @@ -1,3 +1,5 @@ CONFIG_IOMMUFD=y +CONFIG_FAULT_INJECTION_DEBUG_FS=y CONFIG_FAULT_INJECTION=y CONFIG_IOMMUFD_TEST=y +CONFIG_FAILSLAB=y diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 6ed328c863..edf1c99c99 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -12,6 +12,7 @@ static unsigned long HUGEPAGE_SIZE; #define MOCK_PAGE_SIZE (PAGE_SIZE / 2) +#define MOCK_HUGE_PAGE_SIZE (512 * MOCK_PAGE_SIZE) static unsigned long get_huge_page_size(void) { @@ -116,6 +117,7 @@ TEST_F(iommufd, cmd_length) TEST_LENGTH(iommu_destroy, IOMMU_DESTROY, id); TEST_LENGTH(iommu_hw_info, IOMMU_GET_HW_INFO, __reserved); TEST_LENGTH(iommu_hwpt_alloc, IOMMU_HWPT_ALLOC, __reserved); + TEST_LENGTH(iommu_hwpt_invalidate, IOMMU_HWPT_INVALIDATE, __reserved); TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC, out_ioas_id); TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES, out_iova_alignment); @@ -271,7 +273,9 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) struct iommu_hwpt_selftest data = { .iotlb = IOMMU_TEST_IOTLB_DEFAULT, }; + struct iommu_hwpt_invalidate_selftest inv_reqs[2] = {}; uint32_t nested_hwpt_id[2] = {}; + uint32_t num_inv; uint32_t parent_hwpt_id = 0; uint32_t parent_hwpt_id_not_work = 0; uint32_t test_hwpt_id = 0; @@ -330,6 +334,10 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) &nested_hwpt_id[1], IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[0], + IOMMU_TEST_IOTLB_DEFAULT); + test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[1], + IOMMU_TEST_IOTLB_DEFAULT); /* Negative test: a nested hwpt on top of a nested hwpt */ test_err_hwpt_alloc_nested(EINVAL, self->device_id, @@ -340,6 +348,151 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, parent_hwpt_id)); + /* hwpt_invalidate only supports a user-managed hwpt (nested) */ + num_inv = 1; + test_err_hwpt_invalidate(ENOENT, parent_hwpt_id, inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Check data_type by passing zero-length array */ + num_inv = 0; + test_cmd_hwpt_invalidate(nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: Invalid data_type */ + num_inv = 1; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST_INVALID, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: structure size sanity */ + num_inv = 1; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs) + 1, &num_inv); + assert(!num_inv); + + num_inv = 1; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + 1, &num_inv); + assert(!num_inv); + + /* Negative test: invalid flag is passed */ + num_inv = 1; + inv_reqs[0].flags = 0xffffffff; + test_err_hwpt_invalidate(EOPNOTSUPP, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid data_uptr when array is not empty */ + num_inv = 1; + inv_reqs[0].flags = 0; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], NULL, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid entry_len when array is not empty */ + num_inv = 1; + inv_reqs[0].flags = 0; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + 0, &num_inv); + assert(!num_inv); + + /* Negative test: invalid iotlb_id */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].iotlb_id = MOCK_NESTED_DOMAIN_IOTLB_ID_MAX + 1; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* + * Invalidate the 1st iotlb entry but fail the 2nd request + * due to invalid flags configuration in the 2nd request. + */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].iotlb_id = 0; + inv_reqs[1].flags = 0xffffffff; + inv_reqs[1].iotlb_id = 1; + test_err_hwpt_invalidate(EOPNOTSUPP, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 0, 0); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 1, + IOMMU_TEST_IOTLB_DEFAULT); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 2, + IOMMU_TEST_IOTLB_DEFAULT); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 3, + IOMMU_TEST_IOTLB_DEFAULT); + + /* + * Invalidate the 1st iotlb entry but fail the 2nd request + * due to invalid iotlb_id configuration in the 2nd request. + */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].iotlb_id = 0; + inv_reqs[1].flags = 0; + inv_reqs[1].iotlb_id = MOCK_NESTED_DOMAIN_IOTLB_ID_MAX + 1; + test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 0, 0); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 1, + IOMMU_TEST_IOTLB_DEFAULT); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 2, + IOMMU_TEST_IOTLB_DEFAULT); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 3, + IOMMU_TEST_IOTLB_DEFAULT); + + /* Invalidate the 2nd iotlb entry and verify */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].iotlb_id = 1; + test_cmd_hwpt_invalidate(nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 0, 0); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 1, 0); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 2, + IOMMU_TEST_IOTLB_DEFAULT); + test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 3, + IOMMU_TEST_IOTLB_DEFAULT); + + /* Invalidate the 3rd and 4th iotlb entries and verify */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].iotlb_id = 2; + inv_reqs[1].flags = 0; + inv_reqs[1].iotlb_id = 3; + test_cmd_hwpt_invalidate(nested_hwpt_id[0], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 2); + test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[0], 0); + + /* Invalidate all iotlb entries for nested_hwpt_id[1] and verify */ + num_inv = 1; + inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL; + test_cmd_hwpt_invalidate(nested_hwpt_id[1], inv_reqs, + IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[1], 0); + /* Attach device to nested_hwpt_id[0] that then will be busy */ test_cmd_mock_domain_replace(self->stdev_id, nested_hwpt_id[0]); EXPECT_ERRNO(EBUSY, @@ -1564,10 +1717,12 @@ FIXTURE(iommufd_dirty_tracking) FIXTURE_VARIANT(iommufd_dirty_tracking) { unsigned long buffer_size; + bool hugepages; }; FIXTURE_SETUP(iommufd_dirty_tracking) { + int mmap_flags; void *vrc; int rc; @@ -1580,25 +1735,41 @@ FIXTURE_SETUP(iommufd_dirty_tracking) variant->buffer_size, rc); } + mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED; + if (variant->hugepages) { + /* + * MAP_POPULATE will cause the kernel to fail mmap if THPs are + * not available. + */ + mmap_flags |= MAP_HUGETLB | MAP_POPULATE; + } assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0); vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); + mmap_flags, -1, 0); assert(vrc == self->buffer); self->page_size = MOCK_PAGE_SIZE; self->bitmap_size = variant->buffer_size / self->page_size / BITS_PER_BYTE; - /* Provision with an extra (MOCK_PAGE_SIZE) for the unaligned case */ + /* Provision with an extra (PAGE_SIZE) for the unaligned case */ rc = posix_memalign(&self->bitmap, PAGE_SIZE, - self->bitmap_size + MOCK_PAGE_SIZE); + self->bitmap_size + PAGE_SIZE); assert(!rc); assert(self->bitmap); assert((uintptr_t)self->bitmap % PAGE_SIZE == 0); test_ioctl_ioas_alloc(&self->ioas_id); - test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id, - &self->idev_id); + /* Enable 1M mock IOMMU hugepages */ + if (variant->hugepages) { + test_cmd_mock_domain_flags(self->ioas_id, + MOCK_FLAGS_DEVICE_HUGE_IOVA, + &self->stdev_id, &self->hwpt_id, + &self->idev_id); + } else { + test_cmd_mock_domain(self->ioas_id, &self->stdev_id, + &self->hwpt_id, &self->idev_id); + } } FIXTURE_TEARDOWN(iommufd_dirty_tracking) @@ -1632,12 +1803,26 @@ FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M) .buffer_size = 128UL * 1024UL * 1024UL, }; +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge) +{ + /* 4K bitmap (128M IOVA range) */ + .buffer_size = 128UL * 1024UL * 1024UL, + .hugepages = true, +}; + FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M) { /* 8K bitmap (256M IOVA range) */ .buffer_size = 256UL * 1024UL * 1024UL, }; +FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M_huge) +{ + /* 8K bitmap (256M IOVA range) */ + .buffer_size = 256UL * 1024UL * 1024UL, + .hugepages = true, +}; + TEST_F(iommufd_dirty_tracking, enforce_dirty) { uint32_t ioas_id, stddev_id, idev_id; @@ -1697,65 +1882,80 @@ TEST_F(iommufd_dirty_tracking, device_dirty_capability) TEST_F(iommufd_dirty_tracking, get_dirty_bitmap) { - uint32_t stddev_id; + uint32_t page_size = MOCK_PAGE_SIZE; uint32_t hwpt_id; uint32_t ioas_id; + if (variant->hugepages) + page_size = MOCK_HUGE_PAGE_SIZE; + test_ioctl_ioas_alloc(&ioas_id); test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer, variant->buffer_size, MOCK_APERTURE_START); test_cmd_hwpt_alloc(self->idev_id, ioas_id, IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); - test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); test_cmd_set_dirty_tracking(hwpt_id, true); test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, - MOCK_APERTURE_START, self->page_size, + MOCK_APERTURE_START, self->page_size, page_size, self->bitmap, self->bitmap_size, 0, _metadata); /* PAGE_SIZE unaligned bitmap */ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, - MOCK_APERTURE_START, self->page_size, + MOCK_APERTURE_START, self->page_size, page_size, self->bitmap + MOCK_PAGE_SIZE, self->bitmap_size, 0, _metadata); - test_ioctl_destroy(stddev_id); + /* u64 unaligned bitmap */ + test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, + MOCK_APERTURE_START, self->page_size, page_size, + self->bitmap + 0xff1, self->bitmap_size, 0, + _metadata); + test_ioctl_destroy(hwpt_id); } TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear) { - uint32_t stddev_id; + uint32_t page_size = MOCK_PAGE_SIZE; uint32_t hwpt_id; uint32_t ioas_id; + if (variant->hugepages) + page_size = MOCK_HUGE_PAGE_SIZE; + test_ioctl_ioas_alloc(&ioas_id); test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer, variant->buffer_size, MOCK_APERTURE_START); test_cmd_hwpt_alloc(self->idev_id, ioas_id, IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id); - test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); test_cmd_set_dirty_tracking(hwpt_id, true); test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, - MOCK_APERTURE_START, self->page_size, + MOCK_APERTURE_START, self->page_size, page_size, self->bitmap, self->bitmap_size, IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, _metadata); /* Unaligned bitmap */ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, - MOCK_APERTURE_START, self->page_size, + MOCK_APERTURE_START, self->page_size, page_size, self->bitmap + MOCK_PAGE_SIZE, self->bitmap_size, IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, _metadata); - test_ioctl_destroy(stddev_id); + /* u64 unaligned bitmap */ + test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size, + MOCK_APERTURE_START, self->page_size, page_size, + self->bitmap + 0xff1, self->bitmap_size, + IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR, + _metadata); + test_ioctl_destroy(hwpt_id); } diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index ad92023356..8d2b46b211 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -195,6 +195,61 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \ hwpt_id, data_type, data, data_len)) +#define test_cmd_hwpt_check_iotlb(hwpt_id, iotlb_id, expected) \ + ({ \ + struct iommu_test_cmd test_cmd = { \ + .size = sizeof(test_cmd), \ + .op = IOMMU_TEST_OP_MD_CHECK_IOTLB, \ + .id = hwpt_id, \ + .check_iotlb = { \ + .id = iotlb_id, \ + .iotlb = expected, \ + }, \ + }; \ + ASSERT_EQ(0, \ + ioctl(self->fd, \ + _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_IOTLB), \ + &test_cmd)); \ + }) + +#define test_cmd_hwpt_check_iotlb_all(hwpt_id, expected) \ + ({ \ + int i; \ + for (i = 0; i < MOCK_NESTED_DOMAIN_IOTLB_NUM; i++) \ + test_cmd_hwpt_check_iotlb(hwpt_id, i, expected); \ + }) + +static int _test_cmd_hwpt_invalidate(int fd, __u32 hwpt_id, void *reqs, + uint32_t data_type, uint32_t lreq, + uint32_t *nreqs) +{ + struct iommu_hwpt_invalidate cmd = { + .size = sizeof(cmd), + .hwpt_id = hwpt_id, + .data_type = data_type, + .data_uptr = (uint64_t)reqs, + .entry_len = lreq, + .entry_num = *nreqs, + }; + int rc = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cmd); + *nreqs = cmd.entry_num; + return rc; +} + +#define test_cmd_hwpt_invalidate(hwpt_id, reqs, data_type, lreq, nreqs) \ + ({ \ + ASSERT_EQ(0, \ + _test_cmd_hwpt_invalidate(self->fd, hwpt_id, reqs, \ + data_type, lreq, nreqs)); \ + }) +#define test_err_hwpt_invalidate(_errno, hwpt_id, reqs, data_type, lreq, \ + nreqs) \ + ({ \ + EXPECT_ERRNO(_errno, _test_cmd_hwpt_invalidate( \ + self->fd, hwpt_id, reqs, \ + data_type, lreq, nreqs)); \ + }) + static int _test_cmd_access_replace_ioas(int fd, __u32 access_id, unsigned int ioas_id) { @@ -289,16 +344,19 @@ static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length, page_size, bitmap, nr)) static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, - __u64 iova, size_t page_size, __u64 *bitmap, + __u64 iova, size_t page_size, + size_t pte_page_size, __u64 *bitmap, __u64 bitmap_size, __u32 flags, struct __test_metadata *_metadata) { - unsigned long i, nbits = bitmap_size * BITS_PER_BYTE; - unsigned long nr = nbits / 2; + unsigned long npte = pte_page_size / page_size, pteset = 2 * npte; + unsigned long nbits = bitmap_size * BITS_PER_BYTE; + unsigned long j, i, nr = nbits / pteset ?: 1; __u64 out_dirty = 0; /* Mark all even bits as dirty in the mock domain */ - for (i = 0; i < nbits; i += 2) + memset(bitmap, 0, bitmap_size); + for (i = 0; i < nbits; i += pteset) set_bit(i, (unsigned long *)bitmap); test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size, @@ -310,8 +368,12 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap, flags); /* Beware ASSERT_EQ() is two statements -- braces are not redundant! */ - for (i = 0; i < nbits; i++) { - ASSERT_EQ(!(i % 2), test_bit(i, (unsigned long *)bitmap)); + for (i = 0; i < nbits; i += pteset) { + for (j = 0; j < pteset; j++) { + ASSERT_EQ(j < npte, + test_bit(i + j, (unsigned long *)bitmap)); + } + ASSERT_EQ(!(i % pteset), test_bit(i, (unsigned long *)bitmap)); } memset(bitmap, 0, bitmap_size); @@ -319,19 +381,23 @@ static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length, flags); /* It as read already -- expect all zeroes */ - for (i = 0; i < nbits; i++) { - ASSERT_EQ(!(i % 2) && (flags & - IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR), - test_bit(i, (unsigned long *)bitmap)); + for (i = 0; i < nbits; i += pteset) { + for (j = 0; j < pteset; j++) { + ASSERT_EQ( + (j < npte) && + (flags & + IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR), + test_bit(i + j, (unsigned long *)bitmap)); + } } return 0; } -#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, bitmap, \ - bitmap_size, flags, _metadata) \ +#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, pte_size,\ + bitmap, bitmap_size, flags, _metadata) \ ASSERT_EQ(0, _test_mock_dirty_bitmaps(self->fd, hwpt_id, length, iova, \ - page_size, bitmap, bitmap_size, \ - flags, _metadata)) + page_size, pte_size, bitmap, \ + bitmap_size, flags, _metadata)) static int _test_cmd_create_access(int fd, unsigned int ioas_id, __u32 *access_id, unsigned int flags) diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index a781e63118..e3e260fe00 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -50,6 +50,7 @@ #include <stdarg.h> #include <string.h> #include <stdio.h> +#include <sys/utsname.h> #endif #ifndef ARRAY_SIZE @@ -78,6 +79,9 @@ #define KSFT_XPASS 3 #define KSFT_SKIP 4 +#ifndef __noreturn +#define __noreturn __attribute__((__noreturn__)) +#endif #define __printf(a, b) __attribute__((format(printf, a, b))) /* counters */ @@ -254,13 +258,13 @@ static inline __printf(1, 2) void ksft_test_result_error(const char *msg, ...) va_end(args); } -static inline int ksft_exit_pass(void) +static inline __noreturn int ksft_exit_pass(void) { ksft_print_cnts(); exit(KSFT_PASS); } -static inline int ksft_exit_fail(void) +static inline __noreturn int ksft_exit_fail(void) { ksft_print_cnts(); exit(KSFT_FAIL); @@ -287,7 +291,7 @@ static inline int ksft_exit_fail(void) ksft_cnt.ksft_xfail + \ ksft_cnt.ksft_xskip) -static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...) +static inline __noreturn __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...) { int saved_errno = errno; va_list args; @@ -302,19 +306,19 @@ static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...) exit(KSFT_FAIL); } -static inline int ksft_exit_xfail(void) +static inline __noreturn int ksft_exit_xfail(void) { ksft_print_cnts(); exit(KSFT_XFAIL); } -static inline int ksft_exit_xpass(void) +static inline __noreturn int ksft_exit_xpass(void) { ksft_print_cnts(); exit(KSFT_XPASS); } -static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...) +static inline __noreturn __printf(1, 2) int ksft_exit_skip(const char *msg, ...) { int saved_errno = errno; va_list args; @@ -343,4 +347,21 @@ static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...) exit(KSFT_SKIP); } +static inline int ksft_min_kernel_version(unsigned int min_major, + unsigned int min_minor) +{ +#ifdef NOLIBC + ksft_print_msg("NOLIBC: Can't check kernel version: Function not implemented\n"); + return 0; +#else + unsigned int major, minor; + struct utsname info; + + if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2) + ksft_exit_fail_msg("Can't parse kernel version\n"); + + return major > min_major || (major == min_major && minor >= min_minor); +#endif +} + #endif /* __KSELFTEST_H */ diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index cd2fb43eea..74954f6a8f 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -6,6 +6,7 @@ export skip_rc=4 export timeout_rc=124 export logfile=/dev/stdout export per_test_logging= +export RUN_IN_NETNS= # Defaults for "settings" file fields: # "timeout" how many seconds to let each test run before running @@ -47,7 +48,7 @@ run_one() { DIR="$1" TEST="$2" - NUM="$3" + local test_num="$3" BASENAME_TEST=$(basename $TEST) @@ -141,6 +142,33 @@ run_one() fi } +in_netns() +{ + local name=$1 + ip netns exec $name bash <<-EOF + BASE_DIR=$BASE_DIR + source $BASE_DIR/kselftest/runner.sh + logfile=$logfile + run_one $DIR $TEST $test_num + EOF +} + +run_in_netns() +{ + local netns=$(mktemp -u ${BASENAME_TEST}-XXXXXX) + local tmplog="/tmp/$(mktemp -u ${BASENAME_TEST}-XXXXXX)" + ip netns add $netns + if [ $? -ne 0 ]; then + echo "# Warning: Create namespace failed for $BASENAME_TEST" + echo "not ok $test_num selftests: $DIR: $BASENAME_TEST # Create NS failed" + fi + ip -n $netns link set lo up + in_netns $netns &> $tmplog + ip netns del $netns &> /dev/null + cat $tmplog + rm -f $tmplog +} + run_many() { echo "TAP version 13" @@ -155,6 +183,12 @@ run_many() logfile="/tmp/$BASENAME_TEST" cat /dev/null > "$logfile" fi - run_one "$DIR" "$TEST" "$test_num" + if [ -n "$RUN_IN_NETNS" ]; then + run_in_netns & + else + run_one "$DIR" "$TEST" "$test_num" + fi done + + wait } diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 3e0c36b8dd..492e937fab 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -77,11 +77,12 @@ TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test -TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test +TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test +TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test @@ -124,6 +125,7 @@ TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test +TEST_GEN_PROGS_x86_64 += guest_memfd_test TEST_GEN_PROGS_x86_64 += guest_print_test TEST_GEN_PROGS_x86_64 += hardware_disable_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus @@ -184,12 +186,13 @@ TEST_GEN_PROGS_s390x += kvm_binary_stats_test TEST_GEN_PROGS_riscv += demand_paging_test TEST_GEN_PROGS_riscv += dirty_log_test -TEST_GEN_PROGS_riscv += guest_print_test TEST_GEN_PROGS_riscv += get-reg-list +TEST_GEN_PROGS_riscv += guest_print_test +TEST_GEN_PROGS_riscv += kvm_binary_stats_test TEST_GEN_PROGS_riscv += kvm_create_max_vcpus TEST_GEN_PROGS_riscv += kvm_page_table_test TEST_GEN_PROGS_riscv += set_memory_region_test -TEST_GEN_PROGS_riscv += kvm_binary_stats_test +TEST_GEN_PROGS_riscv += steal_time SPLIT_TESTS += get-reg-list diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 274b8465b4..2cb8dd1f82 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -248,7 +248,7 @@ static void *test_vcpu_run(void *arg) REPORT_GUEST_ASSERT(uc); break; default: - TEST_FAIL("Unexpected guest exit\n"); + TEST_FAIL("Unexpected guest exit"); } return NULL; @@ -287,7 +287,7 @@ static int test_migrate_vcpu(unsigned int vcpu_idx) /* Allow the error where the vCPU thread is already finished */ TEST_ASSERT(ret == 0 || ret == ESRCH, - "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d\n", + "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d", vcpu_idx, new_pcpu, ret); return ret; @@ -326,12 +326,12 @@ static void test_run(struct kvm_vm *vm) pthread_mutex_init(&vcpu_done_map_lock, NULL); vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus); - TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap\n"); + TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap"); for (i = 0; i < (unsigned long)test_args.nr_vcpus; i++) { ret = pthread_create(&pt_vcpu_run[i], NULL, test_vcpu_run, (void *)(unsigned long)i); - TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread\n", i); + TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread", i); } /* Spawn a thread to control the vCPU migrations */ @@ -340,7 +340,7 @@ static void test_run(struct kvm_vm *vm) ret = pthread_create(&pt_vcpu_migration, NULL, test_vcpu_migration, NULL); - TEST_ASSERT(!ret, "Failed to create the migration pthread\n"); + TEST_ASSERT(!ret, "Failed to create the migration pthread"); } @@ -384,7 +384,7 @@ static struct kvm_vm *test_vm_create(void) if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &test_args.offset); else - TEST_FAIL("no support for global offset\n"); + TEST_FAIL("no support for global offset"); } for (i = 0; i < nr_vcpus; i++) diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c index 31f66ba972..27c10e7a7e 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c @@ -175,18 +175,18 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) /* First 'read' should be an upper limit of the features supported */ vcpu_get_reg(vcpu, reg_info->reg, &val); TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), - "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx\n", + "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx", reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val); /* Test a 'write' by disabling all the features of the register map */ ret = __vcpu_set_reg(vcpu, reg_info->reg, 0); TEST_ASSERT(ret == 0, - "Failed to clear all the features of reg: 0x%lx; ret: %d\n", + "Failed to clear all the features of reg: 0x%lx; ret: %d", reg_info->reg, errno); vcpu_get_reg(vcpu, reg_info->reg, &val); TEST_ASSERT(val == 0, - "Expected all the features to be cleared for reg: 0x%lx\n", reg_info->reg); + "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); /* * Test enabling a feature that's not supported. @@ -195,7 +195,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) if (reg_info->max_feat_bit < 63) { ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1)); TEST_ASSERT(ret != 0 && errno == EINVAL, - "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx\n", + "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx", errno, reg_info->reg); } } @@ -216,7 +216,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) */ vcpu_get_reg(vcpu, reg_info->reg, &val); TEST_ASSERT(val == 0, - "Expected all the features to be cleared for reg: 0x%lx\n", + "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg); /* @@ -226,7 +226,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) */ ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit)); TEST_ASSERT(ret != 0 && errno == EBUSY, - "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx\n", + "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx", errno, reg_info->reg); } } @@ -265,7 +265,7 @@ static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu) case TEST_STAGE_HVC_IFACE_FALSE_INFO: break; default: - TEST_FAIL("Unknown test stage: %d\n", prev_stage); + TEST_FAIL("Unknown test stage: %d", prev_stage); } } @@ -294,7 +294,7 @@ static void test_run(void) REPORT_GUEST_ASSERT(uc); break; default: - TEST_FAIL("Unexpected guest exit\n"); + TEST_FAIL("Unexpected guest exit"); } } diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index eb4217b7c7..53fddad57c 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -414,10 +414,10 @@ static bool punch_hole_in_backing_store(struct kvm_vm *vm, if (fd != -1) { ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, paging_size); - TEST_ASSERT(ret == 0, "fallocate failed\n"); + TEST_ASSERT(ret == 0, "fallocate failed"); } else { ret = madvise(hva, paging_size, MADV_DONTNEED); - TEST_ASSERT(ret == 0, "madvise failed\n"); + TEST_ASSERT(ret == 0, "madvise failed"); } return true; @@ -501,7 +501,7 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd) void fail_vcpu_run_no_handler(int ret) { - TEST_FAIL("Unexpected vcpu run failure\n"); + TEST_FAIL("Unexpected vcpu run failure"); } void fail_vcpu_run_mmio_no_syndrome_handler(int ret) @@ -705,7 +705,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) print_test_banner(mode, p); - vm = ____vm_create(mode); + vm = ____vm_create(VM_SHAPE(mode)); setup_memslots(vm, p); kvm_vm_elf_load(vm, program_invocation_name); setup_ucall(vm); diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/aarch64/smccc_filter.c index f4ceae9c89..2d189f3da2 100644 --- a/tools/testing/selftests/kvm/aarch64/smccc_filter.c +++ b/tools/testing/selftests/kvm/aarch64/smccc_filter.c @@ -178,7 +178,7 @@ static void expect_call_denied(struct kvm_vcpu *vcpu) struct ucall uc; if (get_ucall(vcpu, &uc) != UCALL_SYNC) - TEST_FAIL("Unexpected ucall: %lu\n", uc.cmd); + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED, "Unexpected SMCCC return code: %lu", uc.args[1]); diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c index 5ea78986e6..5f97133646 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c @@ -42,13 +42,12 @@ struct pmreg_sets { static uint64_t get_pmcr_n(uint64_t pmcr) { - return (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK; + return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr); } static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n) { - *pmcr = *pmcr & ~(ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT); - *pmcr |= (pmcr_n << ARMV8_PMU_PMCR_N_SHIFT); + u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N); } static uint64_t get_counters_mask(uint64_t n) @@ -518,11 +517,11 @@ static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail) if (expect_fail) TEST_ASSERT(pmcr_orig == pmcr, - "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx\n", + "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx", pmcr, pmcr_n); else TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr), - "Failed to update PMCR.N to %lu (received: %lu)\n", + "Failed to update PMCR.N to %lu (received: %lu)", pmcr_n, get_pmcr_n(pmcr)); } @@ -595,12 +594,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) */ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(set_reg_id), reg_val); vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(clr_reg_id), reg_val); /* @@ -612,12 +611,12 @@ static void run_pmregs_validity_test(uint64_t pmcr_n) vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), ®_val); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(set_reg_id), reg_val); vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), ®_val); TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0, - "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n", + "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx", KVM_ARM64_SYS_REG(clr_reg_id), reg_val); } diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 09c116a82a..bf3609f718 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -45,10 +45,10 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) /* Let the guest access its memory */ ret = _vcpu_run(vcpu); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); if (get_ucall(vcpu, NULL) != UCALL_SYNC) { TEST_ASSERT(false, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); } diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index d374dbcf9a..504f6fe980 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -88,9 +88,9 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) ret = _vcpu_run(vcpu); ts_diff = timespec_elapsed(start); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); pr_debug("Got sync event from vCPU %d\n", vcpu_idx); diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 936f3a8d1b..eaad5b2085 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -262,7 +262,7 @@ static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) "vcpu run failed: errno=%d", err); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(run->exit_reason)); vcpu_handle_sync_stop(); @@ -376,7 +376,10 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, cleared = kvm_vm_reset_dirty_ring(vcpu->vm); - /* Cleared pages should be the same as collected */ + /* + * Cleared pages should be the same as collected, as KVM is supposed to + * clear only the entries that have been harvested. + */ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " "with collected (%u)", cleared, count); @@ -410,17 +413,11 @@ static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) pr_info("vcpu continues now.\n"); } else { TEST_ASSERT(false, "Invalid guest sync status: " - "exit_reason=%s\n", + "exit_reason=%s", exit_reason_str(run->exit_reason)); } } -static void dirty_ring_before_vcpu_join(void) -{ - /* Kick another round of vcpu just to make sure it will quit */ - sem_post(&sem_vcpu_cont); -} - struct log_mode { const char *name; /* Return true if this mode is supported, otherwise false */ @@ -433,7 +430,6 @@ struct log_mode { uint32_t *ring_buf_idx); /* Hook to call when after each vcpu run */ void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); - void (*before_vcpu_join) (void); } log_modes[LOG_MODE_NUM] = { { .name = "dirty-log", @@ -452,7 +448,6 @@ struct log_mode { .supported = dirty_ring_supported, .create_vm_done = dirty_ring_create_vm_done, .collect_dirty_pages = dirty_ring_collect_dirty_pages, - .before_vcpu_join = dirty_ring_before_vcpu_join, .after_vcpu_run = dirty_ring_after_vcpu_run, }, }; @@ -513,14 +508,6 @@ static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) mode->after_vcpu_run(vcpu, ret, err); } -static void log_mode_before_vcpu_join(void) -{ - struct log_mode *mode = &log_modes[host_log_mode]; - - if (mode->before_vcpu_join) - mode->before_vcpu_join(); -} - static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -699,7 +686,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = __vm_create(mode, 1, extra_mem_pages); + vm = __vm_create(VM_SHAPE(mode), 1, extra_mem_pages); log_mode_create_vm_done(vm); *vcpu = vm_vcpu_add(vm, 0, guest_code); @@ -719,6 +706,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct kvm_vm *vm; unsigned long *bmap; uint32_t ring_buf_idx = 0; + int sem_val; if (!log_mode_supported()) { print_skip("Log mode '%s' not supported", @@ -788,12 +776,22 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Start the iterations */ iteration = 1; sync_global_to_guest(vm, iteration); - host_quit = false; + WRITE_ONCE(host_quit, false); host_dirty_count = 0; host_clear_count = 0; host_track_next_count = 0; WRITE_ONCE(dirty_ring_vcpu_ring_full, false); + /* + * Ensure the previous iteration didn't leave a dangling semaphore, i.e. + * that the main task and vCPU worker were synchronized and completed + * verification of all iterations. + */ + sem_getvalue(&sem_vcpu_stop, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); + sem_getvalue(&sem_vcpu_cont, &sem_val); + TEST_ASSERT_EQ(sem_val, 0); + pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); while (iteration < p->iterations) { @@ -819,15 +817,21 @@ static void run_test(enum vm_guest_mode mode, void *arg) assert(host_log_mode == LOG_MODE_DIRTY_RING || atomic_read(&vcpu_sync_stop_requested) == false); vm_dirty_log_verify(mode, bmap); - sem_post(&sem_vcpu_cont); - iteration++; + /* + * Set host_quit before sem_vcpu_cont in the final iteration to + * ensure that the vCPU worker doesn't resume the guest. As + * above, the dirty ring test may stop and wait even when not + * explicitly request to do so, i.e. would hang waiting for a + * "continue" if it's allowed to resume the guest. + */ + if (++iteration == p->iterations) + WRITE_ONCE(host_quit, true); + + sem_post(&sem_vcpu_cont); sync_global_to_guest(vm, iteration); } - /* Tell the vcpu thread to quit */ - host_quit = true; - log_mode_before_vcpu_join(); pthread_join(vcpu_thread, NULL); pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c index 8274ef0430..91f05f78e8 100644 --- a/tools/testing/selftests/kvm/get-reg-list.c +++ b/tools/testing/selftests/kvm/get-reg-list.c @@ -152,7 +152,7 @@ static void check_supported(struct vcpu_reg_list *c) continue; __TEST_REQUIRE(kvm_has_cap(s->capability), - "%s: %s not available, skipping tests\n", + "%s: %s not available, skipping tests", config_name(c), s->name); } } diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c new file mode 100644 index 0000000000..c78a98c1a9 --- /dev/null +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright Intel Corporation, 2023 + * + * Author: Chao Peng <chao.p.peng@linux.intel.com> + */ + +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <fcntl.h> + +#include <linux/bitmap.h> +#include <linux/falloc.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "test_util.h" +#include "kvm_util_base.h" + +static void test_file_read_write(int fd) +{ + char buf[64]; + + TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0, + "read on a guest_mem fd should fail"); + TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0, + "write on a guest_mem fd should fail"); + TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0, + "pread on a guest_mem fd should fail"); + TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0, + "pwrite on a guest_mem fd should fail"); +} + +static void test_mmap(int fd, size_t page_size) +{ + char *mem; + + mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + TEST_ASSERT_EQ(mem, MAP_FAILED); +} + +static void test_file_size(int fd, size_t page_size, size_t total_size) +{ + struct stat sb; + int ret; + + ret = fstat(fd, &sb); + TEST_ASSERT(!ret, "fstat should succeed"); + TEST_ASSERT_EQ(sb.st_size, total_size); + TEST_ASSERT_EQ(sb.st_blksize, page_size); +} + +static void test_fallocate(int fd, size_t page_size, size_t total_size) +{ + int ret; + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, total_size); + TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + page_size - 1, page_size); + TEST_ASSERT(ret, "fallocate with unaligned offset should fail"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size, page_size); + TEST_ASSERT(ret, "fallocate beginning at total_size should fail"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size + page_size, page_size); + TEST_ASSERT(ret, "fallocate beginning after total_size should fail"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + total_size, page_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) at total_size should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + total_size + page_size, page_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) after total_size should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + page_size, page_size - 1); + TEST_ASSERT(ret, "fallocate with unaligned size should fail"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + page_size, page_size); + TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) with aligned offset and size should succeed"); + + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, page_size, page_size); + TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed"); +} + +static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size) +{ + struct { + off_t offset; + off_t len; + } testcases[] = { + {0, 1}, + {0, page_size - 1}, + {0, page_size + 1}, + + {1, 1}, + {1, page_size - 1}, + {1, page_size}, + {1, page_size + 1}, + + {page_size, 1}, + {page_size, page_size - 1}, + {page_size, page_size + 1}, + }; + int ret, i; + + for (i = 0; i < ARRAY_SIZE(testcases); i++) { + ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + testcases[i].offset, testcases[i].len); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "PUNCH_HOLE with !PAGE_SIZE offset (%lx) and/or length (%lx) should fail", + testcases[i].offset, testcases[i].len); + } +} + +static void test_create_guest_memfd_invalid(struct kvm_vm *vm) +{ + size_t page_size = getpagesize(); + uint64_t flag; + size_t size; + int fd; + + for (size = 1; size < page_size; size++) { + fd = __vm_create_guest_memfd(vm, size, 0); + TEST_ASSERT(fd == -1 && errno == EINVAL, + "guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL", + size); + } + + for (flag = 0; flag; flag <<= 1) { + fd = __vm_create_guest_memfd(vm, page_size, flag); + TEST_ASSERT(fd == -1 && errno == EINVAL, + "guest_memfd() with flag '0x%lx' should fail with EINVAL", + flag); + } +} + +static void test_create_guest_memfd_multiple(struct kvm_vm *vm) +{ + int fd1, fd2, ret; + struct stat st1, st2; + + fd1 = __vm_create_guest_memfd(vm, 4096, 0); + TEST_ASSERT(fd1 != -1, "memfd creation should succeed"); + + ret = fstat(fd1, &st1); + TEST_ASSERT(ret != -1, "memfd fstat should succeed"); + TEST_ASSERT(st1.st_size == 4096, "memfd st_size should match requested size"); + + fd2 = __vm_create_guest_memfd(vm, 8192, 0); + TEST_ASSERT(fd2 != -1, "memfd creation should succeed"); + + ret = fstat(fd2, &st2); + TEST_ASSERT(ret != -1, "memfd fstat should succeed"); + TEST_ASSERT(st2.st_size == 8192, "second memfd st_size should match requested size"); + + ret = fstat(fd1, &st1); + TEST_ASSERT(ret != -1, "memfd fstat should succeed"); + TEST_ASSERT(st1.st_size == 4096, "first memfd st_size should still match requested size"); + TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers"); +} + +int main(int argc, char *argv[]) +{ + size_t page_size; + size_t total_size; + int fd; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD)); + + page_size = getpagesize(); + total_size = page_size * 4; + + vm = vm_create_barebones(); + + test_create_guest_memfd_invalid(vm); + test_create_guest_memfd_multiple(vm); + + fd = vm_create_guest_memfd(vm, total_size, 0); + + test_file_read_write(fd); + test_mmap(fd, page_size); + test_file_size(fd, page_size, total_size); + test_fallocate(fd, page_size, total_size); + test_invalid_punch_hole(fd, page_size, total_size); + + close(fd); +} diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c index 41230b7461..3502caa359 100644 --- a/tools/testing/selftests/kvm/guest_print_test.c +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -98,7 +98,7 @@ static void ucall_abort(const char *assert_msg, const char *expected_assert_msg) int offset = len_str - len_substr; TEST_ASSERT(len_substr <= len_str, - "Expected '%s' to be a substring of '%s'\n", + "Expected '%s' to be a substring of '%s'", assert_msg, expected_assert_msg); TEST_ASSERT(strcmp(&assert_msg[offset], expected_assert_msg) == 0, @@ -116,7 +116,7 @@ static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, - "Unexpected exit reason: %u (%s),\n", + "Unexpected exit reason: %u (%s),", run->exit_reason, exit_reason_str(run->exit_reason)); switch (get_ucall(vcpu, &uc)) { @@ -161,11 +161,11 @@ static void test_limits(void) vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, - "Unexpected exit reason: %u (%s),\n", + "Unexpected exit reason: %u (%s),", run->exit_reason, exit_reason_str(run->exit_reason)); TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_ABORT, - "Unexpected ucall command: %lu, Expected: %u (UCALL_ABORT)\n", + "Unexpected ucall command: %lu, Expected: %u (UCALL_ABORT)", uc.cmd, UCALL_ABORT); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index f5d59b9934..decc521fc7 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -41,7 +41,7 @@ static void *run_vcpu(void *arg) vcpu_run(vcpu); - TEST_ASSERT(false, "%s: exited with reason %d: %s\n", + TEST_ASSERT(false, "%s: exited with reason %d: %s", __func__, run->exit_reason, exit_reason_str(run->exit_reason)); pthread_exit(NULL); @@ -55,7 +55,7 @@ static void *sleeping_thread(void *arg) fd = open("/dev/null", O_RDWR); close(fd); } - TEST_ASSERT(false, "%s: exited\n", __func__); + TEST_ASSERT(false, "%s: exited", __func__); pthread_exit(NULL); } @@ -118,7 +118,7 @@ static void run_test(uint32_t run) for (i = 0; i < VCPU_NUM; ++i) check_join(threads[i], &b); /* Should not be reached */ - TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run); + TEST_ASSERT(false, "%s: [%d] child escaped the ninja", __func__, run); } void wait_for_child_setup(pid_t pid) diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index c42d683102..cf20e44e86 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -119,8 +119,8 @@ enum { /* Access flag update enable/disable */ #define TCR_EL1_HA (1ULL << 39) -void aarch64_get_supported_page_sizes(uint32_t ipa, - bool *ps4k, bool *ps16k, bool *ps64k); +void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, + uint32_t *ipa16k, uint32_t *ipa64k); void vm_init_descriptor_tables(struct kvm_vm *vm); void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); diff --git a/tools/testing/selftests/kvm/include/guest_modes.h b/tools/testing/selftests/kvm/include/guest_modes.h index b691df33e6..63f5167397 100644 --- a/tools/testing/selftests/kvm/include/guest_modes.h +++ b/tools/testing/selftests/kvm/include/guest_modes.h @@ -11,8 +11,8 @@ struct guest_mode { extern struct guest_mode guest_modes[NUM_VM_MODES]; -#define guest_mode_append(mode, supported, enabled) ({ \ - guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ +#define guest_mode_append(mode, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ (enabled), (enabled) }; \ }) void guest_modes_append_default(void); diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index a18db6a7b3..9e5afc472c 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -44,7 +44,7 @@ typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ struct userspace_mem_region { - struct kvm_userspace_memory_region region; + struct kvm_userspace_memory_region2 region; struct sparsebit *unused_phy_pages; int fd; off_t offset; @@ -129,6 +129,7 @@ struct vcpu_reg_sublist { const char *name; long capability; int feature; + int feature_type; bool finalize; __u64 *regs; __u64 regs_n; @@ -171,6 +172,7 @@ static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm, enum vm_guest_mode { VM_MODE_P52V48_4K, + VM_MODE_P52V48_16K, VM_MODE_P52V48_64K, VM_MODE_P48V48_4K, VM_MODE_P48V48_16K, @@ -188,6 +190,23 @@ enum vm_guest_mode { NUM_VM_MODES, }; +struct vm_shape { + enum vm_guest_mode mode; + unsigned int type; +}; + +#define VM_TYPE_DEFAULT 0 + +#define VM_SHAPE(__mode) \ +({ \ + struct vm_shape shape = { \ + .mode = (__mode), \ + .type = VM_TYPE_DEFAULT \ + }; \ + \ + shape; \ +}) + #if defined(__aarch64__) extern enum vm_guest_mode vm_mode_default; @@ -220,6 +239,8 @@ extern enum vm_guest_mode vm_mode_default; #endif +#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT) + #define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT) #define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE) @@ -248,6 +269,13 @@ static inline bool kvm_has_cap(long cap) #define __KVM_SYSCALL_ERROR(_name, _ret) \ "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) +/* + * Use the "inner", double-underscore macro when reporting errors from within + * other macros so that the name of ioctl() and not its literal numeric value + * is printed on error. The "outer" macro is strongly preferred when reporting + * errors "directly", i.e. without an additional layer of macros, as it reduces + * the probability of passing in the wrong string. + */ #define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret) #define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret) @@ -260,17 +288,13 @@ static inline bool kvm_has_cap(long cap) #define __kvm_ioctl(kvm_fd, cmd, arg) \ kvm_do_ioctl(kvm_fd, cmd, arg) - -#define _kvm_ioctl(kvm_fd, cmd, name, arg) \ +#define kvm_ioctl(kvm_fd, cmd, arg) \ ({ \ int ret = __kvm_ioctl(kvm_fd, cmd, arg); \ \ - TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \ + TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \ }) -#define kvm_ioctl(kvm_fd, cmd, arg) \ - _kvm_ioctl(kvm_fd, cmd, #cmd, arg) - static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { } #define __vm_ioctl(vm, cmd, arg) \ @@ -279,17 +303,42 @@ static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { } kvm_do_ioctl((vm)->fd, cmd, arg); \ }) -#define _vm_ioctl(vm, cmd, name, arg) \ +/* + * Assert that a VM or vCPU ioctl() succeeded, with extra magic to detect if + * the ioctl() failed because KVM killed/bugged the VM. To detect a dead VM, + * probe KVM_CAP_USER_MEMORY, which (a) has been supported by KVM since before + * selftests existed and (b) should never outright fail, i.e. is supposed to + * return 0 or 1. If KVM kills a VM, KVM returns -EIO for all ioctl()s for the + * VM and its vCPUs, including KVM_CHECK_EXTENSION. + */ +#define __TEST_ASSERT_VM_VCPU_IOCTL(cond, name, ret, vm) \ +do { \ + int __errno = errno; \ + \ + static_assert_is_vm(vm); \ + \ + if (cond) \ + break; \ + \ + if (errno == EIO && \ + __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY) < 0) { \ + TEST_ASSERT(errno == EIO, "KVM killed the VM, should return -EIO"); \ + TEST_FAIL("KVM killed/bugged the VM, check the kernel log for clues"); \ + } \ + errno = __errno; \ + TEST_ASSERT(cond, __KVM_IOCTL_ERROR(name, ret)); \ +} while (0) + +#define TEST_ASSERT_VM_VCPU_IOCTL(cond, cmd, ret, vm) \ + __TEST_ASSERT_VM_VCPU_IOCTL(cond, #cmd, ret, vm) + +#define vm_ioctl(vm, cmd, arg) \ ({ \ int ret = __vm_ioctl(vm, cmd, arg); \ \ - TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \ + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \ }) -#define vm_ioctl(vm, cmd, arg) \ - _vm_ioctl(vm, cmd, #cmd, arg) - - static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { } #define __vcpu_ioctl(vcpu, cmd, arg) \ @@ -298,16 +347,13 @@ static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { } kvm_do_ioctl((vcpu)->fd, cmd, arg); \ }) -#define _vcpu_ioctl(vcpu, cmd, name, arg) \ +#define vcpu_ioctl(vcpu, cmd, arg) \ ({ \ int ret = __vcpu_ioctl(vcpu, cmd, arg); \ \ - TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \ + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \ }) -#define vcpu_ioctl(vcpu, cmd, arg) \ - _vcpu_ioctl(vcpu, cmd, #cmd, arg) - /* * Looks up and returns the value corresponding to the capability * (KVM_CAP_*) given by cap. @@ -316,7 +362,7 @@ static inline int vm_check_cap(struct kvm_vm *vm, long cap) { int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap); - TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); + TEST_ASSERT_VM_VCPU_IOCTL(ret >= 0, KVM_CHECK_EXTENSION, ret, vm); return ret; } @@ -333,6 +379,54 @@ static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); } +static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa, + uint64_t size, uint64_t attributes) +{ + struct kvm_memory_attributes attr = { + .attributes = attributes, + .address = gpa, + .size = size, + .flags = 0, + }; + + /* + * KVM_SET_MEMORY_ATTRIBUTES overwrites _all_ attributes. These flows + * need significant enhancements to support multiple attributes. + */ + TEST_ASSERT(!attributes || attributes == KVM_MEMORY_ATTRIBUTE_PRIVATE, + "Update me to support multiple attributes!"); + + vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr); +} + + +static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE); +} + +static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_set_memory_attributes(vm, gpa, size, 0); +} + +void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size, + bool punch_hole); + +static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_guest_mem_fallocate(vm, gpa, size, true); +} + +static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa, + uint64_t size) +{ + vm_guest_mem_fallocate(vm, gpa, size, false); +} + void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); const char *vm_guest_mode_string(uint32_t i); @@ -375,7 +469,7 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm) { int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL); - TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_GET_STATS_FD, fd)); + TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vm); return fd; } @@ -431,14 +525,44 @@ static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) void vm_create_irqchip(struct kvm_vm *vm); +static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + uint64_t flags) +{ + struct kvm_create_guest_memfd guest_memfd = { + .size = size, + .flags = flags, + }; + + return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd); +} + +static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size, + uint64_t flags) +{ + int fd = __vm_create_guest_memfd(vm, size, flags); + + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd)); + return fd; +} + void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, uint64_t gpa, uint64_t size, void *hva); int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, uint64_t gpa, uint64_t size, void *hva); +void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset); +int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset); + void vm_userspace_mem_region_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, uint64_t guest_paddr, uint32_t slot, uint64_t npages, uint32_t flags); +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); @@ -587,7 +711,7 @@ static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu) { int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL); - TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_GET_STATS_FD, fd)); + TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm); return fd; } @@ -713,21 +837,33 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to * calculate the amount of memory needed for per-vCPU data, e.g. stacks. */ -struct kvm_vm *____vm_create(enum vm_guest_mode mode); -struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, +struct kvm_vm *____vm_create(struct vm_shape shape); +struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages); static inline struct kvm_vm *vm_create_barebones(void) { - return ____vm_create(VM_MODE_DEFAULT); + return ____vm_create(VM_SHAPE_DEFAULT); } +#ifdef __x86_64__ +static inline struct kvm_vm *vm_create_barebones_protected_vm(void) +{ + const struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = KVM_X86_SW_PROTECTED_VM, + }; + + return ____vm_create(shape); +} +#endif + static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) { - return __vm_create(VM_MODE_DEFAULT, nr_runnable_vcpus, 0); + return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0); } -struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, +struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, uint64_t extra_mem_pages, void *guest_code, struct kvm_vcpu *vcpus[]); @@ -735,17 +871,27 @@ static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus, void *guest_code, struct kvm_vcpu *vcpus[]) { - return __vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, 0, + return __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, 0, guest_code, vcpus); } + +struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, + struct kvm_vcpu **vcpu, + uint64_t extra_mem_pages, + void *guest_code); + /* * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages * additional pages of guest memory. Returns the VM and vCPU (via out param). */ -struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, - void *guest_code); +static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, + uint64_t extra_mem_pages, + void *guest_code) +{ + return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu, + extra_mem_pages, guest_code); +} static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, void *guest_code) @@ -753,6 +899,13 @@ static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, return __vm_create_with_one_vcpu(vcpu, 0, guest_code); } +static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape, + struct kvm_vcpu **vcpu, + void *guest_code) +{ + return __vm_create_shape_with_one_vcpu(shape, vcpu, 0, guest_code); +} + struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); void kvm_pin_this_task_to_pcpu(uint32_t pcpu); @@ -776,10 +929,6 @@ vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages) return n; } -struct kvm_userspace_memory_region * -kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, - uint64_t end); - #define sync_global_to_guest(vm, g) ({ \ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ memcpy(_p, &(g), sizeof(g)); \ diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index 5b62a3d2aa..a0f9efe5a2 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -10,10 +10,10 @@ #include "kvm_util.h" #include <linux/stringify.h> -static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx, - uint64_t size) +static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype, + uint64_t idx, uint64_t size) { - return KVM_REG_RISCV | type | idx | size; + return KVM_REG_RISCV | type | subtype | idx | size; } #if __riscv_xlen == 64 @@ -22,24 +22,30 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx, #define KVM_REG_SIZE_ULONG KVM_REG_SIZE_U32 #endif -#define RISCV_CONFIG_REG(name) __kvm_reg_id(KVM_REG_RISCV_CONFIG, \ - KVM_REG_RISCV_CONFIG_REG(name), \ - KVM_REG_SIZE_ULONG) +#define RISCV_CONFIG_REG(name) __kvm_reg_id(KVM_REG_RISCV_CONFIG, 0, \ + KVM_REG_RISCV_CONFIG_REG(name), \ + KVM_REG_SIZE_ULONG) -#define RISCV_CORE_REG(name) __kvm_reg_id(KVM_REG_RISCV_CORE, \ - KVM_REG_RISCV_CORE_REG(name), \ - KVM_REG_SIZE_ULONG) +#define RISCV_CORE_REG(name) __kvm_reg_id(KVM_REG_RISCV_CORE, 0, \ + KVM_REG_RISCV_CORE_REG(name), \ + KVM_REG_SIZE_ULONG) -#define RISCV_CSR_REG(name) __kvm_reg_id(KVM_REG_RISCV_CSR, \ - KVM_REG_RISCV_CSR_REG(name), \ - KVM_REG_SIZE_ULONG) +#define RISCV_GENERAL_CSR_REG(name) __kvm_reg_id(KVM_REG_RISCV_CSR, \ + KVM_REG_RISCV_CSR_GENERAL, \ + KVM_REG_RISCV_CSR_REG(name), \ + KVM_REG_SIZE_ULONG) -#define RISCV_TIMER_REG(name) __kvm_reg_id(KVM_REG_RISCV_TIMER, \ - KVM_REG_RISCV_TIMER_REG(name), \ - KVM_REG_SIZE_U64) +#define RISCV_TIMER_REG(name) __kvm_reg_id(KVM_REG_RISCV_TIMER, 0, \ + KVM_REG_RISCV_TIMER_REG(name), \ + KVM_REG_SIZE_U64) -#define RISCV_ISA_EXT_REG(idx) __kvm_reg_id(KVM_REG_RISCV_ISA_EXT, \ - idx, KVM_REG_SIZE_ULONG) +#define RISCV_ISA_EXT_REG(idx) __kvm_reg_id(KVM_REG_RISCV_ISA_EXT, \ + KVM_REG_RISCV_ISA_SINGLE, \ + idx, KVM_REG_SIZE_ULONG) + +#define RISCV_SBI_EXT_REG(idx) __kvm_reg_id(KVM_REG_RISCV_SBI_EXT, \ + KVM_REG_RISCV_SBI_SINGLE, \ + idx, KVM_REG_SIZE_ULONG) /* L3 index Bit[47:39] */ #define PGTBL_L3_INDEX_MASK 0x0000FF8000000000ULL @@ -102,6 +108,17 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx, #define SATP_ASID_SHIFT 44 #define SATP_ASID_MASK _AC(0xFFFF, UL) +/* SBI return error codes */ +#define SBI_SUCCESS 0 +#define SBI_ERR_FAILURE -1 +#define SBI_ERR_NOT_SUPPORTED -2 +#define SBI_ERR_INVALID_PARAM -3 +#define SBI_ERR_DENIED -4 +#define SBI_ERR_INVALID_ADDRESS -5 +#define SBI_ERR_ALREADY_AVAILABLE -6 +#define SBI_ERR_ALREADY_STARTED -7 +#define SBI_ERR_ALREADY_STOPPED -8 + #define SBI_EXT_EXPERIMENTAL_START 0x08000000 #define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF @@ -109,6 +126,15 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx, #define KVM_RISCV_SELFTESTS_SBI_UCALL 0 #define KVM_RISCV_SELFTESTS_SBI_UNEXP 1 +enum sbi_ext_id { + SBI_EXT_BASE = 0x10, + SBI_EXT_STA = 0x535441, +}; + +enum sbi_ext_base_fid { + SBI_EXT_BASE_PROBE_EXT = 3, +}; + struct sbiret { long error; long value; @@ -119,4 +145,6 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, unsigned long arg3, unsigned long arg4, unsigned long arg5); +bool guest_sbi_probe_extension(int extid, long *out_val); + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 8e5f413a59..50a5e31ba8 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -142,6 +142,11 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t) return vm_mem_backing_src_alias(t)->flag & MAP_SHARED; } +static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t) +{ + return t != VM_MEM_SRC_ANONYMOUS && t != VM_MEM_SRC_SHMEM; +} + /* Aligns x up to the next multiple of size. Size must be a power of 2. */ static inline uint64_t align_up(uint64_t x, uint64_t size) { @@ -186,8 +191,10 @@ static inline uint32_t atoi_non_negative(const char *name, const char *num_str) } int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args); -int guest_snprintf(char *buf, int n, const char *fmt, ...); +__printf(3, 4) int guest_snprintf(char *buf, int n, const char *fmt, ...); char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1))); +char *sys_get_cur_clocksource(void); + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index ce33d306c2..d9d6581b8d 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -34,9 +34,10 @@ void ucall_arch_do_ucall(vm_vaddr_t uc); void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); void ucall(uint64_t cmd, int nargs, ...); -void ucall_fmt(uint64_t cmd, const char *fmt, ...); -void ucall_assert(uint64_t cmd, const char *exp, const char *file, - unsigned int line, const char *fmt, ...); +__printf(2, 3) void ucall_fmt(uint64_t cmd, const char *fmt, ...); +__printf(5, 6) void ucall_assert(uint64_t cmd, const char *exp, + const char *file, unsigned int line, + const char *fmt, ...); uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); int ucall_nr_pages_required(uint64_t page_size); @@ -52,6 +53,17 @@ int ucall_nr_pages_required(uint64_t page_size); #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) +#define GUEST_SYNC1(arg0) ucall(UCALL_SYNC, 1, arg0) +#define GUEST_SYNC2(arg0, arg1) ucall(UCALL_SYNC, 2, arg0, arg1) +#define GUEST_SYNC3(arg0, arg1, arg2) \ + ucall(UCALL_SYNC, 3, arg0, arg1, arg2) +#define GUEST_SYNC4(arg0, arg1, arg2, arg3) \ + ucall(UCALL_SYNC, 4, arg0, arg1, arg2, arg3) +#define GUEST_SYNC5(arg0, arg1, arg2, arg3, arg4) \ + ucall(UCALL_SYNC, 5, arg0, arg1, arg2, arg3, arg4) +#define GUEST_SYNC6(arg0, arg1, arg2, arg3, arg4, arg5) \ + ucall(UCALL_SYNC, 6, arg0, arg1, arg2, arg3, arg4, arg5) + #define GUEST_PRINTF(_fmt, _args...) ucall_fmt(UCALL_PRINTF, _fmt, ##_args) #define GUEST_DONE() ucall(UCALL_DONE, 0) diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 25bc61dac5..5bca8c947c 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -15,6 +15,7 @@ #include <asm/msr-index.h> #include <asm/prctl.h> +#include <linux/kvm_para.h> #include <linux/stringify.h> #include "../kvm_util.h" @@ -1194,6 +1195,20 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1); void xen_hypercall(uint64_t nr, uint64_t a0, void *a1); +static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa, + uint64_t size, uint64_t flags) +{ + return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0); +} + +static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size, + uint64_t flags) +{ + uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags); + + GUEST_ASSERT(!ret); +} + void __vm_xsave_require_permission(uint64_t xfeature, const char *name); #define vm_xsave_require_permission(xfeature) \ @@ -1256,4 +1271,6 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, #define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) #define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) +bool sys_clocksource_is_based_on_tsc(void); + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index 31b3cb24b9..b9e23265e4 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -65,7 +65,7 @@ int main(int argc, char *argv[]) int r = setrlimit(RLIMIT_NOFILE, &rl); __TEST_REQUIRE(r >= 0, - "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n", + "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)", old_rlim_max, nr_fds_wanted); } else { TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index 69f26d80c8..e0ba97ac1c 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -204,9 +204,9 @@ static void *vcpu_worker(void *data) ret = _vcpu_run(vcpu); ts_diff = timespec_elapsed(start); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, - "Invalid guest sync status: exit_reason=%s\n", + "Invalid guest sync status: exit_reason=%s", exit_reason_str(vcpu->run->exit_reason)); pr_debug("Got sync event from vCPU %d\n", vcpu->id); @@ -254,7 +254,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) /* Create a VM with enough guest pages */ guest_num_pages = test_mem_size / guest_page_size; - vm = __vm_create_with_vcpus(mode, nr_vcpus, guest_num_pages, + vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus, guest_num_pages, guest_code, test_args.vcpus); /* Align down GPA of the testing memslot */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 6fe12e985b..43b9a72833 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -12,6 +12,7 @@ #include "kvm_util.h" #include "processor.h" #include <linux/bitfield.h> +#include <linux/sizes.h> #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 @@ -58,13 +59,25 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva) return (gva >> vm->page_shift) & mask; } +static inline bool use_lpa2_pte_format(struct kvm_vm *vm) +{ + return (vm->page_size == SZ_4K || vm->page_size == SZ_16K) && + (vm->pa_bits > 48 || vm->va_bits > 48); +} + static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) { uint64_t pte; - pte = pa & GENMASK(47, vm->page_shift); - if (vm->page_shift == 16) - pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + if (use_lpa2_pte_format(vm)) { + pte = pa & GENMASK(49, vm->page_shift); + pte |= FIELD_GET(GENMASK(51, 50), pa) << 8; + attrs &= ~GENMASK(9, 8); + } else { + pte = pa & GENMASK(47, vm->page_shift); + if (vm->page_shift == 16) + pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + } pte |= attrs; return pte; @@ -74,9 +87,14 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) { uint64_t pa; - pa = pte & GENMASK(47, vm->page_shift); - if (vm->page_shift == 16) - pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + if (use_lpa2_pte_format(vm)) { + pa = pte & GENMASK(49, vm->page_shift); + pa |= FIELD_GET(GENMASK(9, 8), pte) << 50; + } else { + pa = pte & GENMASK(47, vm->page_shift); + if (vm->page_shift == 16) + pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + } return pa; } @@ -266,9 +284,6 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) /* Configure base granule size */ switch (vm->mode) { - case VM_MODE_P52V48_4K: - TEST_FAIL("AArch64 does not support 4K sized pages " - "with 52-bit physical address ranges"); case VM_MODE_PXXV48_4K: TEST_FAIL("AArch64 does not support 4K sized pages " "with ANY-bit physical address ranges"); @@ -278,12 +293,14 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P36V48_64K: tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ break; + case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: case VM_MODE_P36V47_16K: tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ break; + case VM_MODE_P52V48_4K: case VM_MODE_P48V48_4K: case VM_MODE_P40V48_4K: case VM_MODE_P36V48_4K: @@ -297,6 +314,8 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) /* Configure output size */ switch (vm->mode) { + case VM_MODE_P52V48_4K: + case VM_MODE_P52V48_16K: case VM_MODE_P52V48_64K: tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; @@ -325,6 +344,8 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; + if (use_lpa2_pte_format(vm)) + tcr_el1 |= (1ul << 59) /* DS */; vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); @@ -377,7 +398,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) int i; TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" - " num: %u\n", num); + " num: %u", num); va_start(ap, num); @@ -492,12 +513,24 @@ uint32_t guest_get_vcpuid(void) return read_sysreg(tpidr_el1); } -void aarch64_get_supported_page_sizes(uint32_t ipa, - bool *ps4k, bool *ps16k, bool *ps64k) +static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran, + uint32_t not_sup_val, uint32_t ipa52_min_val) +{ + if (gran == not_sup_val) + return 0; + else if (gran >= ipa52_min_val && vm_ipa >= 52) + return 52; + else + return min(vm_ipa, 48U); +} + +void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, + uint32_t *ipa16k, uint32_t *ipa64k) { struct kvm_vcpu_init preferred_init; int kvm_fd, vm_fd, vcpu_fd, err; uint64_t val; + uint32_t gran; struct kvm_one_reg reg = { .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1), .addr = (uint64_t)&val, @@ -518,9 +551,17 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); - *ps4k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val) != 0xf; - *ps64k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val) == 0; - *ps16k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val) != 0; + gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val); + *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI, + ID_AA64MMFR0_EL1_TGRAN4_52_BIT); + + gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val); + *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI, + ID_AA64MMFR0_EL1_TGRAN64_IMP); + + gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val); + *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI, + ID_AA64MMFR0_EL1_TGRAN16_52_BIT); close(vcpu_fd); close(vm_fd); diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c index b5f28d21a9..184378d593 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c @@ -38,7 +38,7 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, struct list_head *iter; unsigned int nr_gic_pages, nr_vcpus_created = 0; - TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty\n"); + TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty"); /* * Make sure that the caller is infact calling this @@ -47,7 +47,7 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, list_for_each(iter, &vm->vcpus) nr_vcpus_created++; TEST_ASSERT(nr_vcpus == nr_vcpus_created, - "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)\n", + "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)", nr_vcpus, nr_vcpus_created); /* Distributor setup */ diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index 266f3876e1..f34d926d97 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -184,7 +184,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename) "Seek to program segment offset failed,\n" " program header idx: %u errno: %i\n" " offset_rv: 0x%jx\n" - " expected: 0x%jx\n", + " expected: 0x%jx", n1, errno, (intmax_t) offset_rv, (intmax_t) phdr.p_offset); test_read(fd, addr_gva2hva(vm, phdr.p_vaddr), diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index 1df3ce4b16..b04901e551 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -14,37 +14,33 @@ struct guest_mode guest_modes[NUM_VM_MODES]; void guest_modes_append_default(void) { #ifndef __aarch64__ - guest_mode_append(VM_MODE_DEFAULT, true, true); + guest_mode_append(VM_MODE_DEFAULT, true); #else { unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); - bool ps4k, ps16k, ps64k; + uint32_t ipa4k, ipa16k, ipa64k; int i; - aarch64_get_supported_page_sizes(limit, &ps4k, &ps16k, &ps64k); + aarch64_get_supported_page_sizes(limit, &ipa4k, &ipa16k, &ipa64k); - vm_mode_default = NUM_VM_MODES; + guest_mode_append(VM_MODE_P52V48_4K, ipa4k >= 52); + guest_mode_append(VM_MODE_P52V48_16K, ipa16k >= 52); + guest_mode_append(VM_MODE_P52V48_64K, ipa64k >= 52); - if (limit >= 52) - guest_mode_append(VM_MODE_P52V48_64K, ps64k, ps64k); - if (limit >= 48) { - guest_mode_append(VM_MODE_P48V48_4K, ps4k, ps4k); - guest_mode_append(VM_MODE_P48V48_16K, ps16k, ps16k); - guest_mode_append(VM_MODE_P48V48_64K, ps64k, ps64k); - } - if (limit >= 40) { - guest_mode_append(VM_MODE_P40V48_4K, ps4k, ps4k); - guest_mode_append(VM_MODE_P40V48_16K, ps16k, ps16k); - guest_mode_append(VM_MODE_P40V48_64K, ps64k, ps64k); - if (ps4k) - vm_mode_default = VM_MODE_P40V48_4K; - } - if (limit >= 36) { - guest_mode_append(VM_MODE_P36V48_4K, ps4k, ps4k); - guest_mode_append(VM_MODE_P36V48_16K, ps16k, ps16k); - guest_mode_append(VM_MODE_P36V48_64K, ps64k, ps64k); - guest_mode_append(VM_MODE_P36V47_16K, ps16k, ps16k); - } + guest_mode_append(VM_MODE_P48V48_4K, ipa4k >= 48); + guest_mode_append(VM_MODE_P48V48_16K, ipa16k >= 48); + guest_mode_append(VM_MODE_P48V48_64K, ipa64k >= 48); + + guest_mode_append(VM_MODE_P40V48_4K, ipa4k >= 40); + guest_mode_append(VM_MODE_P40V48_16K, ipa16k >= 40); + guest_mode_append(VM_MODE_P40V48_64K, ipa64k >= 40); + + guest_mode_append(VM_MODE_P36V48_4K, ipa4k >= 36); + guest_mode_append(VM_MODE_P36V48_16K, ipa16k >= 36); + guest_mode_append(VM_MODE_P36V48_64K, ipa64k >= 36); + guest_mode_append(VM_MODE_P36V47_16K, ipa16k >= 36); + + vm_mode_default = ipa4k >= 40 ? VM_MODE_P40V48_4K : NUM_VM_MODES; /* * Pick the first supported IPA size if the default @@ -72,7 +68,7 @@ void guest_modes_append_default(void) close(kvm_fd); /* Starting with z13 we have 47bits of physical address */ if (info.ibc >= 0x30) - guest_mode_append(VM_MODE_P47V64_4K, true, true); + guest_mode_append(VM_MODE_P47V64_4K, true); } #endif #ifdef __riscv @@ -80,9 +76,9 @@ void guest_modes_append_default(void) unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS); if (sz >= 52) - guest_mode_append(VM_MODE_P52V48_4K, true, true); + guest_mode_append(VM_MODE_P52V48_4K, true); if (sz >= 48) - guest_mode_append(VM_MODE_P48V48_4K, true, true); + guest_mode_append(VM_MODE_P48V48_4K, true); } #endif } diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 7a8af1821f..1b197426f2 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -27,7 +27,8 @@ int open_path_or_exit(const char *path, int flags) int fd; fd = open(path, flags); - __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno); + __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno)); + TEST_ASSERT(fd >= 0, "Failed to open '%s'", path); return fd; } @@ -148,6 +149,7 @@ const char *vm_guest_mode_string(uint32_t i) { static const char * const strings[] = { [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages", + [VM_MODE_P52V48_16K] = "PA-bits:52, VA-bits:48, 16K pages", [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages", [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages", [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages", @@ -173,6 +175,7 @@ const char *vm_guest_mode_string(uint32_t i) const struct vm_guest_mode_params vm_guest_mode_params[] = { [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 }, + [VM_MODE_P52V48_16K] = { 52, 48, 0x4000, 14 }, [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 }, [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 }, [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 }, @@ -209,7 +212,7 @@ __weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm) (1ULL << (vm->va_bits - 1)) >> vm->page_shift); } -struct kvm_vm *____vm_create(enum vm_guest_mode mode) +struct kvm_vm *____vm_create(struct vm_shape shape) { struct kvm_vm *vm; @@ -221,13 +224,13 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode) vm->regions.hva_tree = RB_ROOT; hash_init(vm->regions.slot_hash); - vm->mode = mode; - vm->type = 0; + vm->mode = shape.mode; + vm->type = shape.type; - vm->pa_bits = vm_guest_mode_params[mode].pa_bits; - vm->va_bits = vm_guest_mode_params[mode].va_bits; - vm->page_size = vm_guest_mode_params[mode].page_size; - vm->page_shift = vm_guest_mode_params[mode].page_shift; + vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits; + vm->va_bits = vm_guest_mode_params[vm->mode].va_bits; + vm->page_size = vm_guest_mode_params[vm->mode].page_size; + vm->page_shift = vm_guest_mode_params[vm->mode].page_shift; /* Setup mode specific traits. */ switch (vm->mode) { @@ -251,6 +254,7 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode) case VM_MODE_P36V48_64K: vm->pgtable_levels = 3; break; + case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: @@ -265,7 +269,7 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode) /* * Ignore KVM support for 5-level paging (vm->va_bits == 57), * it doesn't take effect unless a CR4.LA57 is set, which it - * isn't for this VM_MODE. + * isn't for this mode (48-bit virtual address space). */ TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57, "Linear address width (%d bits) not supported", @@ -285,10 +289,11 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode) vm->pgtable_levels = 5; break; default: - TEST_FAIL("Unknown guest mode, mode: 0x%x", mode); + TEST_FAIL("Unknown guest mode: 0x%x", vm->mode); } #ifdef __aarch64__ + TEST_ASSERT(!vm->type, "ARM doesn't support test-provided types"); if (vm->pa_bits != 40) vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); #endif @@ -316,7 +321,7 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, uint64_t nr_pages; TEST_ASSERT(nr_runnable_vcpus, - "Use vm_create_barebones() for VMs that _never_ have vCPUs\n"); + "Use vm_create_barebones() for VMs that _never_ have vCPUs"); TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), "nr_vcpus = %d too large for host, max-vcpus = %d", @@ -347,19 +352,19 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, return vm_adjust_num_guest_pages(mode, nr_pages); } -struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, +struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus, uint64_t nr_extra_pages) { - uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, + uint64_t nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus, nr_extra_pages); struct userspace_mem_region *slot0; struct kvm_vm *vm; int i; - pr_debug("%s: mode='%s' pages='%ld'\n", __func__, - vm_guest_mode_string(mode), nr_pages); + pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__, + vm_guest_mode_string(shape.mode), shape.type, nr_pages); - vm = ____vm_create(mode); + vm = ____vm_create(shape); vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0); for (i = 0; i < NR_MEM_REGIONS; i++) @@ -400,7 +405,7 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, * extra_mem_pages is only used to calculate the maximum page table size, * no real memory allocation for non-slot0 memory in this function. */ -struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, +struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus, uint64_t extra_mem_pages, void *guest_code, struct kvm_vcpu *vcpus[]) { @@ -409,7 +414,7 @@ struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array"); - vm = __vm_create(mode, nr_vcpus, extra_mem_pages); + vm = __vm_create(shape, nr_vcpus, extra_mem_pages); for (i = 0; i < nr_vcpus; ++i) vcpus[i] = vm_vcpu_add(vm, i, guest_code); @@ -417,15 +422,15 @@ struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus return vm; } -struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, - uint64_t extra_mem_pages, - void *guest_code) +struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape, + struct kvm_vcpu **vcpu, + uint64_t extra_mem_pages, + void *guest_code) { struct kvm_vcpu *vcpus[1]; struct kvm_vm *vm; - vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages, - guest_code, vcpus); + vm = __vm_create_with_vcpus(shape, 1, extra_mem_pages, guest_code, vcpus); *vcpu = vcpus[0]; return vm; @@ -453,8 +458,9 @@ void kvm_vm_restart(struct kvm_vm *vmp) vm_create_irqchip(vmp); hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) { - int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); - TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION2, ®ion->region); + + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" " guest_phys_addr: 0x%llx size: 0x%llx", @@ -486,7 +492,7 @@ void kvm_pin_this_task_to_pcpu(uint32_t pcpu) CPU_ZERO(&mask); CPU_SET(pcpu, &mask); r = sched_setaffinity(0, sizeof(mask), &mask); - TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu); + TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu); } static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) @@ -494,7 +500,7 @@ static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) uint32_t pcpu = atoi_non_negative("CPU number", cpu_str); TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask), - "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu); + "Not allowed to run on pCPU '%d', check cgroups?", pcpu); return pcpu; } @@ -524,7 +530,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], int i, r; cpu_list = strdup(pcpus_string); - TEST_ASSERT(cpu_list, "strdup() allocation failed.\n"); + TEST_ASSERT(cpu_list, "strdup() allocation failed."); r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask); TEST_ASSERT(!r, "sched_getaffinity() failed"); @@ -533,7 +539,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], /* 1. Get all pcpus for vcpus. */ for (i = 0; i < nr_vcpus; i++) { - TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i); + TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'", i); vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask); cpu = strtok(NULL, delim); } @@ -590,35 +596,6 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end) return NULL; } -/* - * KVM Userspace Memory Region Find - * - * Input Args: - * vm - Virtual Machine - * start - Starting VM physical address - * end - Ending VM physical address, inclusive. - * - * Output Args: None - * - * Return: - * Pointer to overlapping region, NULL if no such region. - * - * Public interface to userspace_mem_region_find. Allows tests to look up - * the memslot datastructure for a given range of guest physical memory. - */ -struct kvm_userspace_memory_region * -kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, - uint64_t end) -{ - struct userspace_mem_region *region; - - region = userspace_mem_region_find(vm, start, end); - if (!region) - return NULL; - - return ®ion->region; -} - __weak void vcpu_arch_free(struct kvm_vcpu *vcpu) { @@ -686,7 +663,7 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, } region->region.memory_size = 0; - vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); sparsebit_free(®ion->unused_phy_pages); ret = munmap(region->mmap_start, region->mmap_size); @@ -697,6 +674,8 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); close(region->fd); } + if (region->region.guest_memfd >= 0) + close(region->region.guest_memfd); free(region); } @@ -898,36 +877,44 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags, errno, strerror(errno)); } -/* - * VM Userspace Memory Region Add - * - * Input Args: - * vm - Virtual Machine - * src_type - Storage source for this region. - * NULL to use anonymous memory. - * guest_paddr - Starting guest physical address - * slot - KVM region slot - * npages - Number of physical pages - * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) - * - * Output Args: None - * - * Return: None - * - * Allocates a memory area of the number of pages specified by npages - * and maps it to the VM specified by vm, at a starting physical address - * given by guest_paddr. The region is created with a KVM region slot - * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The - * region is created with the flags given by flags. - */ -void vm_userspace_mem_region_add(struct kvm_vm *vm, - enum vm_mem_backing_src_type src_type, - uint64_t guest_paddr, uint32_t slot, uint64_t npages, - uint32_t flags) +int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset) +{ + struct kvm_userspace_memory_region2 region = { + .slot = slot, + .flags = flags, + .guest_phys_addr = gpa, + .memory_size = size, + .userspace_addr = (uintptr_t)hva, + .guest_memfd = guest_memfd, + .guest_memfd_offset = guest_memfd_offset, + }; + + return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, ®ion); +} + +void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags, + uint64_t gpa, uint64_t size, void *hva, + uint32_t guest_memfd, uint64_t guest_memfd_offset) +{ + int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva, + guest_memfd, guest_memfd_offset); + + TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed, errno = %d (%s)", + errno, strerror(errno)); +} + + +/* FIXME: This thing needs to be ripped apart and rewritten. */ +void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset) { int ret; struct userspace_mem_region *region; size_t backing_src_pagesz = get_backing_src_pagesz(src_type); + size_t mem_size = npages * vm->page_size; size_t alignment; TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages, @@ -980,7 +967,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, /* Allocate and initialize new mem region structure. */ region = calloc(1, sizeof(*region)); TEST_ASSERT(region != NULL, "Insufficient Memory"); - region->mmap_size = npages * vm->page_size; + region->mmap_size = mem_size; #ifdef __s390x__ /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */ @@ -1027,14 +1014,38 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, /* As needed perform madvise */ if ((src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) { - ret = madvise(region->host_mem, npages * vm->page_size, + ret = madvise(region->host_mem, mem_size, src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s", - region->host_mem, npages * vm->page_size, + region->host_mem, mem_size, vm_mem_backing_src_alias(src_type)->name); } region->backing_src_type = src_type; + + if (flags & KVM_MEM_GUEST_MEMFD) { + if (guest_memfd < 0) { + uint32_t guest_memfd_flags = 0; + TEST_ASSERT(!guest_memfd_offset, + "Offset must be zero when creating new guest_memfd"); + guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags); + } else { + /* + * Install a unique fd for each memslot so that the fd + * can be closed when the region is deleted without + * needing to track if the fd is owned by the framework + * or by the caller. + */ + guest_memfd = dup(guest_memfd); + TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd)); + } + + region->region.guest_memfd = guest_memfd; + region->region.guest_memfd_offset = guest_memfd_offset; + } else { + region->region.guest_memfd = -1; + } + region->unused_phy_pages = sparsebit_alloc(); sparsebit_set_num(region->unused_phy_pages, guest_paddr >> vm->page_shift, npages); @@ -1043,13 +1054,14 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, region->region.guest_phys_addr = guest_paddr; region->region.memory_size = npages * vm->page_size; region->region.userspace_addr = (uintptr_t) region->host_mem; - ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); - TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" - " guest_phys_addr: 0x%lx size: 0x%lx", + " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d", ret, errno, slot, flags, - guest_paddr, (uint64_t) region->region.memory_size); + guest_paddr, (uint64_t) region->region.memory_size, + region->region.guest_memfd); /* Add to quick lookup data structures */ vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region); @@ -1070,6 +1082,14 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, } } +void vm_userspace_mem_region_add(struct kvm_vm *vm, + enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, + uint64_t npages, uint32_t flags) +{ + vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0); +} + /* * Memslot to region * @@ -1126,9 +1146,9 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) region->region.flags = flags; - ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); + ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); - TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n" " rc: %i errno: %i slot: %u flags: 0x%x", ret, errno, slot, flags); } @@ -1156,9 +1176,9 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) region->region.guest_phys_addr = new_gpa; - ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); + ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); - TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" + TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed\n" "ret: %i errno: %i slot: %u new_gpa: 0x%lx", ret, errno, slot, new_gpa); } @@ -1181,6 +1201,34 @@ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) __vm_mem_region_delete(vm, memslot2region(vm, slot), true); } +void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, + bool punch_hole) +{ + const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0); + struct userspace_mem_region *region; + uint64_t end = base + size; + uint64_t gpa, len; + off_t fd_offset; + int ret; + + for (gpa = base; gpa < end; gpa += len) { + uint64_t offset; + + region = userspace_mem_region_find(vm, gpa, gpa); + TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD, + "Private memory region not found for GPA 0x%lx", gpa); + + offset = gpa - region->region.guest_phys_addr; + fd_offset = region->region.guest_memfd_offset + offset; + len = min_t(uint64_t, end - gpa, region->region.memory_size - offset); + + ret = fallocate(region->region.guest_memfd, mode, fd_offset, len); + TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx", + punch_hole ? "punch hole" : "allocate", gpa, len, + region->region.guest_memfd, mode, fd_offset); + } +} + /* Returns the size of a vCPU's kvm_run structure. */ static int vcpu_mmap_sz(void) { @@ -1218,7 +1266,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) struct kvm_vcpu *vcpu; /* Confirm a vcpu with the specified id doesn't already exist. */ - TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id); + TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists", vcpu_id); /* Allocate and initialize new vcpu structure. */ vcpu = calloc(1, sizeof(*vcpu)); @@ -1227,7 +1275,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) vcpu->vm = vm; vcpu->id = vcpu_id; vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); - TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd)); + TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm); TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index df457452d1..cf2c739713 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -168,7 +168,8 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, * The memory is also added to memslot 0, but that's a benign side * effect as KVM allows aliasing HVAs in meslots. */ - vm = __vm_create_with_vcpus(mode, nr_vcpus, slot0_pages + guest_num_pages, + vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus, + slot0_pages + guest_num_pages, memstress_guest_code, vcpus); args->vm = vm; @@ -191,7 +192,7 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus, TEST_ASSERT(guest_num_pages < region_end_gfn, "Requested more guest memory than address space allows.\n" " guest pages: %" PRIx64 " max gfn: %" PRIx64 - " nr_vcpus: %d wss: %" PRIx64 "]\n", + " nr_vcpus: %d wss: %" PRIx64 "]", guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes); args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size; diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index d146ca71e0..2bb33a8ac0 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -201,7 +201,7 @@ void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu) satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; satp |= SATP_MODE_48; - vcpu_set_reg(vcpu, RISCV_CSR_REG(satp), satp); + vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp); } void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) @@ -315,7 +315,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code); /* Setup default exception vector of guest */ - vcpu_set_reg(vcpu, RISCV_CSR_REG(stvec), (unsigned long)guest_unexp_trap); + vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)guest_unexp_trap); return vcpu; } @@ -327,7 +327,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) int i; TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n" - " num: %u\n", num); + " num: %u", num); va_start(ap, num); @@ -367,3 +367,48 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { } + +struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, + unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5) +{ + register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); + register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); + register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); + register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); + register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); + register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); + register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); + register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); + struct sbiret ret; + + asm volatile ( + "ecall" + : "+r" (a0), "+r" (a1) + : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) + : "memory"); + ret.error = a0; + ret.value = a1; + + return ret; +} + +bool guest_sbi_probe_extension(int extid, long *out_val) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid, + 0, 0, 0, 0, 0); + + __GUEST_ASSERT(!ret.error || ret.error == SBI_ERR_NOT_SUPPORTED, + "ret.error=%ld, ret.value=%ld\n", ret.error, ret.value); + + if (ret.error == SBI_ERR_NOT_SUPPORTED) + return false; + + if (out_val) + *out_val = ret.value; + + return true; +} diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index fe6d1004f0..14ee17151a 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -10,32 +10,6 @@ #include "kvm_util.h" #include "processor.h" -struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, - unsigned long arg1, unsigned long arg2, - unsigned long arg3, unsigned long arg4, - unsigned long arg5) -{ - register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); - register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); - register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); - register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); - register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); - register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); - register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); - register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); - struct sbiret ret; - - asm volatile ( - "ecall" - : "+r" (a0), "+r" (a1) - : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) - : "memory"); - ret.error = a0; - ret.value = a1; - - return ret; -} - void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index 15945121da..f6d227892c 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -198,7 +198,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) int i; TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n" - " num: %u\n", + " num: %u", num); va_start(ap, num); diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 5d7f28b02d..5a8f8becb1 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -392,3 +392,28 @@ char *strdup_printf(const char *fmt, ...) return str; } + +#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource" + +char *sys_get_cur_clocksource(void) +{ + char *clk_name; + struct stat st; + FILE *fp; + + fp = fopen(CLOCKSOURCE_PATH, "r"); + TEST_ASSERT(fp, "failed to open clocksource file, errno: %d", errno); + + TEST_ASSERT(!fstat(fileno(fp), &st), "failed to stat clocksource file, errno: %d", + errno); + + clk_name = malloc(st.st_size); + TEST_ASSERT(clk_name, "failed to allocate buffer to read file"); + + TEST_ASSERT(fgets(clk_name, st.st_size, fp), "failed to read clocksource file: %d", + ferror(fp)); + + fclose(fp); + + return clk_name; +} diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c index 271f638915..f4eef6eb2d 100644 --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -69,7 +69,7 @@ static void *uffd_handler_thread_fn(void *arg) if (pollfd[1].revents & POLLIN) { r = read(pollfd[1].fd, &tmp_chr, 1); TEST_ASSERT(r == 1, - "Error reading pipefd in UFFD thread\n"); + "Error reading pipefd in UFFD thread"); break; } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index d828837407..f639b3e062 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -170,10 +170,10 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, * this level. */ TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, vaddr: 0x%lx\n", + "Cannot create hugepage at level: %u, vaddr: 0x%lx", current_level, vaddr); TEST_ASSERT(!(*pte & PTE_LARGE_MASK), - "Cannot create page table at level: %u, vaddr: 0x%lx\n", + "Cannot create page table at level: %u, vaddr: 0x%lx", current_level, vaddr); } return pte; @@ -220,7 +220,7 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) /* Fill in page table entry. */ pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K); TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), - "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); + "PTE already present for 4k page at vaddr: 0x%lx", vaddr); *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); } @@ -253,7 +253,7 @@ static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level) if (*pte & PTE_LARGE_MASK) { TEST_ASSERT(*level == PG_LEVEL_NONE || *level == current_level, - "Unexpected hugepage at level %d\n", current_level); + "Unexpected hugepage at level %d", current_level); *level = current_level; } @@ -825,7 +825,7 @@ void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) struct kvm_regs regs; TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" - " num: %u\n", + " num: %u", num); va_start(ap, num); @@ -1299,3 +1299,14 @@ void kvm_selftest_arch_init(void) host_cpu_is_intel = this_cpu_is_intel(); host_cpu_is_amd = this_cpu_is_amd(); } + +bool sys_clocksource_is_based_on_tsc(void) +{ + char *clk_name = sys_get_cur_clocksource(); + bool ret = !strcmp(clk_name, "tsc\n") || + !strcmp(clk_name, "hyperv_clocksource_tsc_page\n"); + + free(clk_name); + + return ret; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 59d97531c9..089b8925b6 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -54,7 +54,7 @@ int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) /* KVM should return supported EVMCS version range */ TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) && (evmcs_ver & 0xff) > 0, - "Incorrect EVMCS version range: %x:%x\n", + "Incorrect EVMCS version range: %x:%x", evmcs_ver & 0xff, evmcs_ver >> 8); return evmcs_ver; @@ -387,10 +387,10 @@ static void nested_create_pte(struct kvm_vm *vm, * this level. */ TEST_ASSERT(current_level != target_level, - "Cannot create hugepage at level: %u, nested_paddr: 0x%lx\n", + "Cannot create hugepage at level: %u, nested_paddr: 0x%lx", current_level, nested_paddr); TEST_ASSERT(!pte->page_size, - "Cannot create page table at level: %u, nested_paddr: 0x%lx\n", + "Cannot create page table at level: %u, nested_paddr: 0x%lx", current_level, nested_paddr); } } diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 9855c41ca8..1563619666 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -45,7 +45,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) /* Let the guest access its memory until a stop signal is received */ while (!READ_ONCE(memstress_args.stop_vcpus)) { ret = _vcpu_run(vcpu); - TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret); if (get_ucall(vcpu, NULL) == UCALL_SYNC) continue; diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 8698d1ab60..579a64f973 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -175,11 +175,11 @@ static void wait_for_vcpu(void) struct timespec ts; TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts), - "clock_gettime() failed: %d\n", errno); + "clock_gettime() failed: %d", errno); ts.tv_sec += 2; TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts), - "sem_timedwait() failed: %d\n", errno); + "sem_timedwait() failed: %d", errno); } static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages) @@ -336,7 +336,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot); TEST_ASSERT(gpa == guest_addr, - "vm_phy_pages_alloc() failed\n"); + "vm_phy_pages_alloc() failed"); data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr); memset(data->hva_slots[slot - 1], 0, npages * guest_page_size); diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 25de4b8bc3..6435e7a656 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -12,6 +12,11 @@ #define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK) +enum { + VCPU_FEATURE_ISA_EXT = 0, + VCPU_FEATURE_SBI_EXT, +}; + static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX]; bool filter_reg(__u64 reg) @@ -28,31 +33,74 @@ bool filter_reg(__u64 reg) * * Note: The below list is alphabetically sorted. */ - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_D: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_F: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_H: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_V: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SMSTATEEN: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVNAPOT: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICOND: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: - case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_A: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_C: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_F: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_H: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_I: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_M: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSTC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKX: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBS: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICOND: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICSR: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIFENCEI: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTNTL: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNE: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNH: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKR: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSED: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSH: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKT: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFH: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFHMIN: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKG: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNED: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNHA: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNHB: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKSED: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKSH: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKT: + /* + * Like ISA_EXT registers, SBI_EXT registers are only visible when the + * host supports them and disabling them does not affect the visibility + * of the SBI_EXT register itself. + */ + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL: + case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR: return true; /* AIA registers are always available when Ssaia can't be disabled */ case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect): @@ -75,12 +123,12 @@ bool check_reject_set(int err) return err == EINVAL; } -static inline bool vcpu_has_ext(struct kvm_vcpu *vcpu, int ext) +static bool vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext_id) { int ret; unsigned long value; - ret = __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(ext), &value); + ret = __vcpu_get_reg(vcpu, ext_id, &value); return (ret) ? false : !!value; } @@ -88,6 +136,7 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) { unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 }; struct vcpu_reg_sublist *s; + uint64_t feature; int rc; for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) @@ -103,16 +152,32 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) isa_ext_cant_disable[i] = true; } + for (int i = 0; i < KVM_RISCV_SBI_EXT_MAX; i++) { + rc = __vcpu_set_reg(vcpu, RISCV_SBI_EXT_REG(i), 0); + TEST_ASSERT(!rc || (rc == -1 && errno == ENOENT), "Unexpected error"); + } + for_each_sublist(c, s) { if (!s->feature) continue; + switch (s->feature_type) { + case VCPU_FEATURE_ISA_EXT: + feature = RISCV_ISA_EXT_REG(s->feature); + break; + case VCPU_FEATURE_SBI_EXT: + feature = RISCV_SBI_EXT_REG(s->feature); + break; + default: + TEST_FAIL("Unknown feature type"); + } + /* Try to enable the desired extension */ - __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(s->feature), 1); + __vcpu_set_reg(vcpu, feature, 1); /* Double check whether the desired extension was enabled */ - __TEST_REQUIRE(vcpu_has_ext(vcpu, s->feature), - "%s not available, skipping tests\n", s->name); + __TEST_REQUIRE(vcpu_has_ext(vcpu, feature), + "%s not available, skipping tests", s->name); } } @@ -335,15 +400,10 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id) } #define KVM_ISA_EXT_ARR(ext) \ -[KVM_RISCV_ISA_EXT_##ext] = "KVM_RISCV_ISA_EXT_" #ext +[KVM_RISCV_ISA_EXT_##ext] = "KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_" #ext -static const char *isa_ext_id_to_str(const char *prefix, __u64 id) +static const char *isa_ext_single_id_to_str(__u64 reg_off) { - /* reg_off is the offset into unsigned long kvm_isa_ext_arr[] */ - __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_ISA_EXT); - - assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT); - static const char * const kvm_isa_ext_reg_name[] = { KVM_ISA_EXT_ARR(A), KVM_ISA_EXT_ARR(C), @@ -361,23 +421,87 @@ static const char *isa_ext_id_to_str(const char *prefix, __u64 id) KVM_ISA_EXT_ARR(SVPBMT), KVM_ISA_EXT_ARR(ZBA), KVM_ISA_EXT_ARR(ZBB), + KVM_ISA_EXT_ARR(ZBC), + KVM_ISA_EXT_ARR(ZBKB), + KVM_ISA_EXT_ARR(ZBKC), + KVM_ISA_EXT_ARR(ZBKX), KVM_ISA_EXT_ARR(ZBS), + KVM_ISA_EXT_ARR(ZFA), + KVM_ISA_EXT_ARR(ZFH), + KVM_ISA_EXT_ARR(ZFHMIN), KVM_ISA_EXT_ARR(ZICBOM), KVM_ISA_EXT_ARR(ZICBOZ), KVM_ISA_EXT_ARR(ZICNTR), KVM_ISA_EXT_ARR(ZICOND), KVM_ISA_EXT_ARR(ZICSR), KVM_ISA_EXT_ARR(ZIFENCEI), + KVM_ISA_EXT_ARR(ZIHINTNTL), KVM_ISA_EXT_ARR(ZIHINTPAUSE), KVM_ISA_EXT_ARR(ZIHPM), + KVM_ISA_EXT_ARR(ZKND), + KVM_ISA_EXT_ARR(ZKNE), + KVM_ISA_EXT_ARR(ZKNH), + KVM_ISA_EXT_ARR(ZKR), + KVM_ISA_EXT_ARR(ZKSED), + KVM_ISA_EXT_ARR(ZKSH), + KVM_ISA_EXT_ARR(ZKT), + KVM_ISA_EXT_ARR(ZVBB), + KVM_ISA_EXT_ARR(ZVBC), + KVM_ISA_EXT_ARR(ZVFH), + KVM_ISA_EXT_ARR(ZVFHMIN), + KVM_ISA_EXT_ARR(ZVKB), + KVM_ISA_EXT_ARR(ZVKG), + KVM_ISA_EXT_ARR(ZVKNED), + KVM_ISA_EXT_ARR(ZVKNHA), + KVM_ISA_EXT_ARR(ZVKNHB), + KVM_ISA_EXT_ARR(ZVKSED), + KVM_ISA_EXT_ARR(ZVKSH), + KVM_ISA_EXT_ARR(ZVKT), }; if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name)) - return strdup_printf("%lld /* UNKNOWN */", reg_off); + return strdup_printf("KVM_REG_RISCV_ISA_SINGLE | %lld /* UNKNOWN */", reg_off); return kvm_isa_ext_reg_name[reg_off]; } +static const char *isa_ext_multi_id_to_str(__u64 reg_subtype, __u64 reg_off) +{ + const char *unknown = ""; + + if (reg_off > KVM_REG_RISCV_ISA_MULTI_REG_LAST) + unknown = " /* UNKNOWN */"; + + switch (reg_subtype) { + case KVM_REG_RISCV_ISA_MULTI_EN: + return strdup_printf("KVM_REG_RISCV_ISA_MULTI_EN | %lld%s", reg_off, unknown); + case KVM_REG_RISCV_ISA_MULTI_DIS: + return strdup_printf("KVM_REG_RISCV_ISA_MULTI_DIS | %lld%s", reg_off, unknown); + } + + return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); +} + +static const char *isa_ext_id_to_str(const char *prefix, __u64 id) +{ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_ISA_EXT); + __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK; + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT); + + reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + switch (reg_subtype) { + case KVM_REG_RISCV_ISA_SINGLE: + return isa_ext_single_id_to_str(reg_off); + case KVM_REG_RISCV_ISA_MULTI_EN: + case KVM_REG_RISCV_ISA_MULTI_DIS: + return isa_ext_multi_id_to_str(reg_subtype, reg_off); + } + + return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); +} + #define KVM_SBI_EXT_ARR(ext) \ [ext] = "KVM_REG_RISCV_SBI_SINGLE | " #ext @@ -392,6 +516,7 @@ static const char *sbi_ext_single_id_to_str(__u64 reg_off) KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SRST), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_HSM), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_PMU), + KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR), KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN), @@ -440,6 +565,32 @@ static const char *sbi_ext_id_to_str(const char *prefix, __u64 id) return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); } +static const char *sbi_sta_id_to_str(__u64 reg_off) +{ + switch (reg_off) { + case 0: return "KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_lo)"; + case 1: return "KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi)"; + } + return strdup_printf("KVM_REG_RISCV_SBI_STA | %lld /* UNKNOWN */", reg_off); +} + +static const char *sbi_id_to_str(const char *prefix, __u64 id) +{ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_STATE); + __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK; + + assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_SBI_STATE); + + reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + switch (reg_subtype) { + case KVM_REG_RISCV_SBI_STA: + return sbi_sta_id_to_str(reg_off); + } + + return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off); +} + void print_reg(const char *prefix, __u64 id) { const char *reg_size = NULL; @@ -496,6 +647,10 @@ void print_reg(const char *prefix, __u64 id) printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_EXT | %s,\n", reg_size, sbi_ext_id_to_str(prefix, id)); break; + case KVM_REG_RISCV_SBI_STATE: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_STATE | %s,\n", + reg_size, sbi_id_to_str(prefix, id)); + break; default: printf("\tKVM_REG_RISCV | %s | 0x%llx /* UNKNOWN */,\n", reg_size, id & ~REG_MASK); @@ -561,18 +716,6 @@ static __u64 base_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time), KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare), KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state), - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_EN | 0, - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_DIS | 0, }; /* @@ -583,66 +726,31 @@ static __u64 base_skips_set[] = { KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state), }; -static __u64 h_regs[] = { - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_H, +static __u64 sbi_base_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR, +}; + +static __u64 sbi_sta_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_lo), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi), }; static __u64 zicbom_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size), - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM, }; static __u64 zicboz_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size), - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ, -}; - -static __u64 svpbmt_regs[] = { - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT, -}; - -static __u64 sstc_regs[] = { - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC, -}; - -static __u64 svinval_regs[] = { - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL, -}; - -static __u64 zihintpause_regs[] = { - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE, -}; - -static __u64 zba_regs[] = { - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA, -}; - -static __u64 zbb_regs[] = { - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB, -}; - -static __u64 zbs_regs[] = { - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS, -}; - -static __u64 zicntr_regs[] = { - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR, -}; - -static __u64 zicond_regs[] = { - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICOND, -}; - -static __u64 zicsr_regs[] = { - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR, -}; - -static __u64 zifencei_regs[] = { - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI, -}; - -static __u64 zihpm_regs[] = { - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ, }; static __u64 aia_regs[] = { @@ -653,12 +761,12 @@ static __u64 aia_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h), KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h), - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA, }; static __u64 smstateen_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_SMSTATEEN | KVM_REG_RISCV_CSR_SMSTATEEN_REG(sstateen0), - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SMSTATEEN, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN, }; static __u64 fp_f_regs[] = { @@ -695,7 +803,7 @@ static __u64 fp_f_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[30]), KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[31]), KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(fcsr), - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_F, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_F, }; static __u64 fp_d_regs[] = { @@ -732,224 +840,196 @@ static __u64 fp_d_regs[] = { KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[30]), KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[31]), KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(fcsr), - KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_D, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D, }; -#define BASE_SUBLIST \ +#define SUBLIST_BASE \ {"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \ .skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),} -#define H_REGS_SUBLIST \ - {"h", .feature = KVM_RISCV_ISA_EXT_H, .regs = h_regs, .regs_n = ARRAY_SIZE(h_regs),} -#define ZICBOM_REGS_SUBLIST \ +#define SUBLIST_SBI_BASE \ + {"sbi-base", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_V01, \ + .regs = sbi_base_regs, .regs_n = ARRAY_SIZE(sbi_base_regs),} +#define SUBLIST_SBI_STA \ + {"sbi-sta", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_STA, \ + .regs = sbi_sta_regs, .regs_n = ARRAY_SIZE(sbi_sta_regs),} +#define SUBLIST_ZICBOM \ {"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),} -#define ZICBOZ_REGS_SUBLIST \ +#define SUBLIST_ZICBOZ \ {"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),} -#define SVPBMT_REGS_SUBLIST \ - {"svpbmt", .feature = KVM_RISCV_ISA_EXT_SVPBMT, .regs = svpbmt_regs, .regs_n = ARRAY_SIZE(svpbmt_regs),} -#define SSTC_REGS_SUBLIST \ - {"sstc", .feature = KVM_RISCV_ISA_EXT_SSTC, .regs = sstc_regs, .regs_n = ARRAY_SIZE(sstc_regs),} -#define SVINVAL_REGS_SUBLIST \ - {"svinval", .feature = KVM_RISCV_ISA_EXT_SVINVAL, .regs = svinval_regs, .regs_n = ARRAY_SIZE(svinval_regs),} -#define ZIHINTPAUSE_REGS_SUBLIST \ - {"zihintpause", .feature = KVM_RISCV_ISA_EXT_ZIHINTPAUSE, .regs = zihintpause_regs, .regs_n = ARRAY_SIZE(zihintpause_regs),} -#define ZBA_REGS_SUBLIST \ - {"zba", .feature = KVM_RISCV_ISA_EXT_ZBA, .regs = zba_regs, .regs_n = ARRAY_SIZE(zba_regs),} -#define ZBB_REGS_SUBLIST \ - {"zbb", .feature = KVM_RISCV_ISA_EXT_ZBB, .regs = zbb_regs, .regs_n = ARRAY_SIZE(zbb_regs),} -#define ZBS_REGS_SUBLIST \ - {"zbs", .feature = KVM_RISCV_ISA_EXT_ZBS, .regs = zbs_regs, .regs_n = ARRAY_SIZE(zbs_regs),} -#define ZICNTR_REGS_SUBLIST \ - {"zicntr", .feature = KVM_RISCV_ISA_EXT_ZICNTR, .regs = zicntr_regs, .regs_n = ARRAY_SIZE(zicntr_regs),} -#define ZICOND_REGS_SUBLIST \ - {"zicond", .feature = KVM_RISCV_ISA_EXT_ZICOND, .regs = zicond_regs, .regs_n = ARRAY_SIZE(zicond_regs),} -#define ZICSR_REGS_SUBLIST \ - {"zicsr", .feature = KVM_RISCV_ISA_EXT_ZICSR, .regs = zicsr_regs, .regs_n = ARRAY_SIZE(zicsr_regs),} -#define ZIFENCEI_REGS_SUBLIST \ - {"zifencei", .feature = KVM_RISCV_ISA_EXT_ZIFENCEI, .regs = zifencei_regs, .regs_n = ARRAY_SIZE(zifencei_regs),} -#define ZIHPM_REGS_SUBLIST \ - {"zihpm", .feature = KVM_RISCV_ISA_EXT_ZIHPM, .regs = zihpm_regs, .regs_n = ARRAY_SIZE(zihpm_regs),} -#define AIA_REGS_SUBLIST \ +#define SUBLIST_AIA \ {"aia", .feature = KVM_RISCV_ISA_EXT_SSAIA, .regs = aia_regs, .regs_n = ARRAY_SIZE(aia_regs),} -#define SMSTATEEN_REGS_SUBLIST \ +#define SUBLIST_SMSTATEEN \ {"smstateen", .feature = KVM_RISCV_ISA_EXT_SMSTATEEN, .regs = smstateen_regs, .regs_n = ARRAY_SIZE(smstateen_regs),} -#define FP_F_REGS_SUBLIST \ +#define SUBLIST_FP_F \ {"fp_f", .feature = KVM_RISCV_ISA_EXT_F, .regs = fp_f_regs, \ .regs_n = ARRAY_SIZE(fp_f_regs),} -#define FP_D_REGS_SUBLIST \ +#define SUBLIST_FP_D \ {"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \ .regs_n = ARRAY_SIZE(fp_d_regs),} -static struct vcpu_reg_list h_config = { - .sublists = { - BASE_SUBLIST, - H_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list zicbom_config = { - .sublists = { - BASE_SUBLIST, - ZICBOM_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list zicboz_config = { - .sublists = { - BASE_SUBLIST, - ZICBOZ_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list svpbmt_config = { - .sublists = { - BASE_SUBLIST, - SVPBMT_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list sstc_config = { - .sublists = { - BASE_SUBLIST, - SSTC_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list svinval_config = { - .sublists = { - BASE_SUBLIST, - SVINVAL_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list zihintpause_config = { - .sublists = { - BASE_SUBLIST, - ZIHINTPAUSE_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list zba_config = { - .sublists = { - BASE_SUBLIST, - ZBA_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list zbb_config = { - .sublists = { - BASE_SUBLIST, - ZBB_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list zbs_config = { - .sublists = { - BASE_SUBLIST, - ZBS_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list zicntr_config = { - .sublists = { - BASE_SUBLIST, - ZICNTR_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list zicond_config = { - .sublists = { - BASE_SUBLIST, - ZICOND_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list zicsr_config = { - .sublists = { - BASE_SUBLIST, - ZICSR_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list zifencei_config = { - .sublists = { - BASE_SUBLIST, - ZIFENCEI_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list zihpm_config = { - .sublists = { - BASE_SUBLIST, - ZIHPM_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list aia_config = { - .sublists = { - BASE_SUBLIST, - AIA_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list smstateen_config = { - .sublists = { - BASE_SUBLIST, - SMSTATEEN_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list fp_f_config = { - .sublists = { - BASE_SUBLIST, - FP_F_REGS_SUBLIST, - {0}, - }, -}; - -static struct vcpu_reg_list fp_d_config = { - .sublists = { - BASE_SUBLIST, - FP_D_REGS_SUBLIST, - {0}, - }, -}; +#define KVM_ISA_EXT_SIMPLE_CONFIG(ext, extu) \ +static __u64 regs_##ext[] = { \ + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \ + KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | \ + KVM_RISCV_ISA_EXT_##extu, \ +}; \ +static struct vcpu_reg_list config_##ext = { \ + .sublists = { \ + SUBLIST_BASE, \ + { \ + .name = #ext, \ + .feature = KVM_RISCV_ISA_EXT_##extu, \ + .regs = regs_##ext, \ + .regs_n = ARRAY_SIZE(regs_##ext), \ + }, \ + {0}, \ + }, \ +} \ + +#define KVM_SBI_EXT_SIMPLE_CONFIG(ext, extu) \ +static __u64 regs_sbi_##ext[] = { \ + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \ + KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | \ + KVM_RISCV_SBI_EXT_##extu, \ +}; \ +static struct vcpu_reg_list config_sbi_##ext = { \ + .sublists = { \ + SUBLIST_BASE, \ + { \ + .name = "sbi-"#ext, \ + .feature_type = VCPU_FEATURE_SBI_EXT, \ + .feature = KVM_RISCV_SBI_EXT_##extu, \ + .regs = regs_sbi_##ext, \ + .regs_n = ARRAY_SIZE(regs_sbi_##ext), \ + }, \ + {0}, \ + }, \ +} \ + +#define KVM_ISA_EXT_SUBLIST_CONFIG(ext, extu) \ +static struct vcpu_reg_list config_##ext = { \ + .sublists = { \ + SUBLIST_BASE, \ + SUBLIST_##extu, \ + {0}, \ + }, \ +} \ + +#define KVM_SBI_EXT_SUBLIST_CONFIG(ext, extu) \ +static struct vcpu_reg_list config_sbi_##ext = { \ + .sublists = { \ + SUBLIST_BASE, \ + SUBLIST_SBI_##extu, \ + {0}, \ + }, \ +} \ + +/* Note: The below list is alphabetically sorted. */ + +KVM_SBI_EXT_SUBLIST_CONFIG(base, BASE); +KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA); +KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU); +KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN); + +KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA); +KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F); +KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D); +KVM_ISA_EXT_SIMPLE_CONFIG(h, H); +KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN); +KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC); +KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); +KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); +KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); +KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA); +KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB); +KVM_ISA_EXT_SIMPLE_CONFIG(zbc, ZBC); +KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB); +KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC); +KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX); +KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS); +KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA); +KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); +KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN); +KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM); +KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ); +KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR); +KVM_ISA_EXT_SIMPLE_CONFIG(zicond, ZICOND); +KVM_ISA_EXT_SIMPLE_CONFIG(zicsr, ZICSR); +KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI); +KVM_ISA_EXT_SIMPLE_CONFIG(zihintntl, ZIHINTNTL); +KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE); +KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM); +KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND); +KVM_ISA_EXT_SIMPLE_CONFIG(zkne, ZKNE); +KVM_ISA_EXT_SIMPLE_CONFIG(zknh, ZKNH); +KVM_ISA_EXT_SIMPLE_CONFIG(zkr, ZKR); +KVM_ISA_EXT_SIMPLE_CONFIG(zksed, ZKSED); +KVM_ISA_EXT_SIMPLE_CONFIG(zksh, ZKSH); +KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT); +KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB); +KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfh, ZVFH); +KVM_ISA_EXT_SIMPLE_CONFIG(zvfhmin, ZVFHMIN); +KVM_ISA_EXT_SIMPLE_CONFIG(zvkb, ZVKB); +KVM_ISA_EXT_SIMPLE_CONFIG(zvkg, ZVKG); +KVM_ISA_EXT_SIMPLE_CONFIG(zvkned, ZVKNED); +KVM_ISA_EXT_SIMPLE_CONFIG(zvknha, ZVKNHA); +KVM_ISA_EXT_SIMPLE_CONFIG(zvknhb, ZVKNHB); +KVM_ISA_EXT_SIMPLE_CONFIG(zvksed, ZVKSED); +KVM_ISA_EXT_SIMPLE_CONFIG(zvksh, ZVKSH); +KVM_ISA_EXT_SIMPLE_CONFIG(zvkt, ZVKT); struct vcpu_reg_list *vcpu_configs[] = { - &h_config, - &zicbom_config, - &zicboz_config, - &svpbmt_config, - &sstc_config, - &svinval_config, - &zihintpause_config, - &zba_config, - &zbb_config, - &zbs_config, - &zicntr_config, - &zicond_config, - &zicsr_config, - &zifencei_config, - &zihpm_config, - &aia_config, - &smstateen_config, - &fp_f_config, - &fp_d_config, + &config_sbi_base, + &config_sbi_sta, + &config_sbi_pmu, + &config_sbi_dbcn, + &config_aia, + &config_fp_f, + &config_fp_d, + &config_h, + &config_smstateen, + &config_sstc, + &config_svinval, + &config_svnapot, + &config_svpbmt, + &config_zba, + &config_zbb, + &config_zbc, + &config_zbkb, + &config_zbkc, + &config_zbkx, + &config_zbs, + &config_zfa, + &config_zfh, + &config_zfhmin, + &config_zicbom, + &config_zicboz, + &config_zicntr, + &config_zicond, + &config_zicsr, + &config_zifencei, + &config_zihintntl, + &config_zihintpause, + &config_zihpm, + &config_zknd, + &config_zkne, + &config_zknh, + &config_zkr, + &config_zksed, + &config_zksh, + &config_zkt, + &config_zvbb, + &config_zvbc, + &config_zvfh, + &config_zvfhmin, + &config_zvkb, + &config_zvkg, + &config_zvkned, + &config_zvknha, + &config_zvknhb, + &config_zvksed, + &config_zvksh, + &config_zvkt, }; int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index f74e76d03b..28f97fb520 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -245,7 +245,7 @@ int main(int argc, char *argv[]) } while (snapshot != atomic_read(&seq_cnt)); TEST_ASSERT(rseq_cpu == cpu, - "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu); + "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu); } /* @@ -256,7 +256,7 @@ int main(int argc, char *argv[]) * migrations given the 1us+ delay in the migration task. */ TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2), - "Only performed %d KVM_RUNs, task stalled too much?\n", i); + "Only performed %d KVM_RUNs, task stalled too much?", i); pthread_join(migration_thread, NULL); diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c index c8e0a6495a..626a2b8a20 100644 --- a/tools/testing/selftests/kvm/s390x/cmma_test.c +++ b/tools/testing/selftests/kvm/s390x/cmma_test.c @@ -94,11 +94,6 @@ static void guest_dirty_test_data(void) ); } -static struct kvm_vm *create_vm(void) -{ - return ____vm_create(VM_MODE_DEFAULT); -} - static void create_main_memslot(struct kvm_vm *vm) { int i; @@ -157,7 +152,7 @@ static struct kvm_vm *create_vm_two_memslots(void) { struct kvm_vm *vm; - vm = create_vm(); + vm = vm_create_barebones(); create_memslots(vm); @@ -276,7 +271,7 @@ static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu) static void test_migration_mode(void) { - struct kvm_vm *vm = create_vm(); + struct kvm_vm *vm = vm_create_barebones(); struct kvm_vcpu *vcpu; u64 orig_psw; int rc; @@ -670,7 +665,7 @@ struct testdef { */ static int machine_has_cmma(void) { - struct kvm_vm *vm = create_vm(); + struct kvm_vm *vm = vm_create_barebones(); int r; r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA); diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c index e41e2cb8ff..357943f2be 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -78,7 +78,7 @@ static void assert_noirq(struct kvm_vcpu *vcpu) * (notably, the emergency call interrupt we have injected) should * be cleared by the resets, so this should be 0. */ - TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d\n", errno); + TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno); TEST_ASSERT(!irqs, "IRQ pending"); } @@ -199,7 +199,7 @@ static void inject_irq(struct kvm_vcpu *vcpu) irq->type = KVM_S390_INT_EMERGENCY; irq->u.emerg.code = vcpu->id; irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state); - TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d\n", errno); + TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno); } static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu) diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index 636a70ddac..43fb25ddc3 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -39,13 +39,13 @@ static void guest_code(void) #define REG_COMPARE(reg) \ TEST_ASSERT(left->reg == right->reg, \ "Register " #reg \ - " values did not match: 0x%llx, 0x%llx\n", \ + " values did not match: 0x%llx, 0x%llx", \ left->reg, right->reg) #define REG_COMPARE32(reg) \ TEST_ASSERT(left->reg == right->reg, \ "Register " #reg \ - " values did not match: 0x%x, 0x%x\n", \ + " values did not match: 0x%x, 0x%x", \ left->reg, right->reg) @@ -82,14 +82,14 @@ void test_read_invalid(struct kvm_vcpu *vcpu) run->kvm_valid_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; } @@ -103,14 +103,14 @@ void test_set_invalid(struct kvm_vcpu *vcpu) run->kvm_dirty_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; } @@ -125,12 +125,12 @@ void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu) /* Request and verify all valid register sets. */ run->kvm_valid_regs = TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); - TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); TEST_ASSERT(run->s390_sieic.icptcode == 4 && (run->s390_sieic.ipa >> 8) == 0x83 && (run->s390_sieic.ipb >> 16) == 0x501, - "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x\n", + "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x", run->s390_sieic.icptcode, run->s390_sieic.ipa, run->s390_sieic.ipb); @@ -161,7 +161,7 @@ void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu) } rv = _vcpu_run(vcpu); - TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1, "r11 sync regs value incorrect 0x%llx.", @@ -193,7 +193,7 @@ void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu) run->s.regs.gprs[11] = 0xDEADBEEF; run->s.regs.diag318 = 0x4B1D; rv = _vcpu_run(vcpu); - TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); + TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF, "r11 sync regs value incorrect 0x%llx.", diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index b32960189f..06b43ed235 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -98,11 +98,11 @@ static void wait_for_vcpu(void) struct timespec ts; TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts), - "clock_gettime() failed: %d\n", errno); + "clock_gettime() failed: %d", errno); ts.tv_sec += 2; TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts), - "sem_timedwait() failed: %d\n", errno); + "sem_timedwait() failed: %d", errno); /* Wait for the vCPU thread to reenter the guest. */ usleep(100000); @@ -157,17 +157,17 @@ static void guest_code_move_memory_region(void) */ val = guest_spin_on_val(0); __GUEST_ASSERT(val == 1 || val == MMIO_VAL, - "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val); + "Expected '1' or MMIO ('%lx'), got '%lx'", MMIO_VAL, val); /* Spin until the misaligning memory region move completes. */ val = guest_spin_on_val(MMIO_VAL); __GUEST_ASSERT(val == 1 || val == 0, - "Expected '0' or '1' (no MMIO), got '%llx'", val); + "Expected '0' or '1' (no MMIO), got '%lx'", val); /* Spin until the memory region starts to get re-aligned. */ val = guest_spin_on_val(0); __GUEST_ASSERT(val == 1 || val == MMIO_VAL, - "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val); + "Expected '1' or MMIO ('%lx'), got '%lx'", MMIO_VAL, val); /* Spin until the re-aligning memory region move completes. */ val = guest_spin_on_val(MMIO_VAL); @@ -302,7 +302,7 @@ static void test_delete_memory_region(void) if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR) TEST_ASSERT(regs.rip >= final_rip_start && regs.rip < final_rip_end, - "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx\n", + "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx", final_rip_start, final_rip_end, regs.rip); kvm_vm_free(vm); @@ -326,6 +326,65 @@ static void test_zero_memory_regions(void) } #endif /* __x86_64__ */ +static void test_invalid_memory_region_flags(void) +{ + uint32_t supported_flags = KVM_MEM_LOG_DIRTY_PAGES; + const uint32_t v2_only_flags = KVM_MEM_GUEST_MEMFD; + struct kvm_vm *vm; + int r, i; + +#if defined __aarch64__ || defined __x86_64__ + supported_flags |= KVM_MEM_READONLY; +#endif + +#ifdef __x86_64__ + if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)) + vm = vm_create_barebones_protected_vm(); + else +#endif + vm = vm_create_barebones(); + + if (kvm_check_cap(KVM_CAP_MEMORY_ATTRIBUTES) & KVM_MEMORY_ATTRIBUTE_PRIVATE) + supported_flags |= KVM_MEM_GUEST_MEMFD; + + for (i = 0; i < 32; i++) { + if ((supported_flags & BIT(i)) && !(v2_only_flags & BIT(i))) + continue; + + r = __vm_set_user_memory_region(vm, 0, BIT(i), + 0, MEM_REGION_SIZE, NULL); + + TEST_ASSERT(r && errno == EINVAL, + "KVM_SET_USER_MEMORY_REGION should have failed on v2 only flag 0x%lx", BIT(i)); + + if (supported_flags & BIT(i)) + continue; + + r = __vm_set_user_memory_region2(vm, 0, BIT(i), + 0, MEM_REGION_SIZE, NULL, 0, 0); + TEST_ASSERT(r && errno == EINVAL, + "KVM_SET_USER_MEMORY_REGION2 should have failed on unsupported flag 0x%lx", BIT(i)); + } + + if (supported_flags & KVM_MEM_GUEST_MEMFD) { + int guest_memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE, 0); + + r = __vm_set_user_memory_region2(vm, 0, + KVM_MEM_LOG_DIRTY_PAGES | KVM_MEM_GUEST_MEMFD, + 0, MEM_REGION_SIZE, NULL, guest_memfd, 0); + TEST_ASSERT(r && errno == EINVAL, + "KVM_SET_USER_MEMORY_REGION2 should have failed, dirty logging private memory is unsupported"); + + r = __vm_set_user_memory_region2(vm, 0, + KVM_MEM_READONLY | KVM_MEM_GUEST_MEMFD, + 0, MEM_REGION_SIZE, NULL, guest_memfd, 0); + TEST_ASSERT(r && errno == EINVAL, + "KVM_SET_USER_MEMORY_REGION2 should have failed, read-only GUEST_MEMFD memslots are unsupported"); + + close(guest_memfd); + } +} + /* * Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any * tentative to add further slots should fail. @@ -385,13 +444,105 @@ static void test_add_max_memory_regions(void) kvm_vm_free(vm); } + +#ifdef __x86_64__ +static void test_invalid_guest_memfd(struct kvm_vm *vm, int memfd, + size_t offset, const char *msg) +{ + int r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, MEM_REGION_SIZE, + 0, memfd, offset); + TEST_ASSERT(r == -1 && errno == EINVAL, "%s", msg); +} + +static void test_add_private_memory_region(void) +{ + struct kvm_vm *vm, *vm2; + int memfd, i; + + pr_info("Testing ADD of KVM_MEM_GUEST_MEMFD memory regions\n"); + + vm = vm_create_barebones_protected_vm(); + + test_invalid_guest_memfd(vm, vm->kvm_fd, 0, "KVM fd should fail"); + test_invalid_guest_memfd(vm, vm->fd, 0, "VM's fd should fail"); + + memfd = kvm_memfd_alloc(MEM_REGION_SIZE, false); + test_invalid_guest_memfd(vm, memfd, 0, "Regular memfd() should fail"); + close(memfd); + + vm2 = vm_create_barebones_protected_vm(); + memfd = vm_create_guest_memfd(vm2, MEM_REGION_SIZE, 0); + test_invalid_guest_memfd(vm, memfd, 0, "Other VM's guest_memfd() should fail"); + + vm_set_user_memory_region2(vm2, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, 0); + close(memfd); + kvm_vm_free(vm2); + + memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE, 0); + for (i = 1; i < PAGE_SIZE; i++) + test_invalid_guest_memfd(vm, memfd, i, "Unaligned offset should fail"); + + vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, 0); + close(memfd); + + kvm_vm_free(vm); +} + +static void test_add_overlapping_private_memory_regions(void) +{ + struct kvm_vm *vm; + int memfd; + int r; + + pr_info("Testing ADD of overlapping KVM_MEM_GUEST_MEMFD memory regions\n"); + + vm = vm_create_barebones_protected_vm(); + + memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE * 4, 0); + + vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, MEM_REGION_SIZE * 2, 0, memfd, 0); + + vm_set_user_memory_region2(vm, MEM_REGION_SLOT + 1, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA * 2, MEM_REGION_SIZE * 2, + 0, memfd, MEM_REGION_SIZE * 2); + + /* + * Delete the first memslot, and then attempt to recreate it except + * with a "bad" offset that results in overlap in the guest_memfd(). + */ + vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA, 0, NULL, -1, 0); + + /* Overlap the front half of the other slot. */ + r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA * 2 - MEM_REGION_SIZE, + MEM_REGION_SIZE * 2, + 0, memfd, 0); + TEST_ASSERT(r == -1 && errno == EEXIST, "%s", + "Overlapping guest_memfd() bindings should fail with EEXIST"); + + /* And now the back half of the other slot. */ + r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD, + MEM_REGION_GPA * 2 + MEM_REGION_SIZE, + MEM_REGION_SIZE * 2, + 0, memfd, 0); + TEST_ASSERT(r == -1 && errno == EEXIST, "%s", + "Overlapping guest_memfd() bindings should fail with EEXIST"); + + close(memfd); + kvm_vm_free(vm); +} +#endif + int main(int argc, char *argv[]) { #ifdef __x86_64__ int i, loops; -#endif -#ifdef __x86_64__ /* * FIXME: the zero-memslot test fails on aarch64 and s390x because * KVM_RUN fails with ENOEXEC or EFAULT. @@ -399,9 +550,19 @@ int main(int argc, char *argv[]) test_zero_memory_regions(); #endif + test_invalid_memory_region_flags(); + test_add_max_memory_regions(); #ifdef __x86_64__ + if (kvm_has_cap(KVM_CAP_GUEST_MEMFD) && + (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))) { + test_add_private_memory_region(); + test_add_overlapping_private_memory_regions(); + } else { + pr_info("Skipping tests for KVM_MEM_GUEST_MEMFD memory regions\n"); + } + if (argc > 1) loops = atoi_positive("Number of iterations", argv[1]); else diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index 171adfb2a6..bae0c5026f 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -11,7 +11,9 @@ #include <pthread.h> #include <linux/kernel.h> #include <asm/kvm.h> +#ifndef __riscv #include <asm/kvm_para.h> +#endif #include "test_util.h" #include "kvm_util.h" @@ -203,6 +205,103 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) pr_info(" st_time: %ld\n", st->st_time); } +#elif defined(__riscv) + +/* SBI STA shmem must have 64-byte alignment */ +#define STEAL_TIME_SIZE ((sizeof(struct sta_struct) + 63) & ~63) + +static vm_paddr_t st_gpa[NR_VCPUS]; + +struct sta_struct { + uint32_t sequence; + uint32_t flags; + uint64_t steal; + uint8_t preempted; + uint8_t pad[47]; +} __packed; + +static void sta_set_shmem(vm_paddr_t gpa, unsigned long flags) +{ + unsigned long lo = (unsigned long)gpa; +#if __riscv_xlen == 32 + unsigned long hi = (unsigned long)(gpa >> 32); +#else + unsigned long hi = gpa == -1 ? -1 : 0; +#endif + struct sbiret ret = sbi_ecall(SBI_EXT_STA, 0, lo, hi, flags, 0, 0, 0); + + GUEST_ASSERT(ret.value == 0 && ret.error == 0); +} + +static void check_status(struct sta_struct *st) +{ + GUEST_ASSERT(!(READ_ONCE(st->sequence) & 1)); + GUEST_ASSERT(READ_ONCE(st->flags) == 0); + GUEST_ASSERT(READ_ONCE(st->preempted) == 0); +} + +static void guest_code(int cpu) +{ + struct sta_struct *st = st_gva[cpu]; + uint32_t sequence; + long out_val = 0; + bool probe; + + probe = guest_sbi_probe_extension(SBI_EXT_STA, &out_val); + GUEST_ASSERT(probe && out_val == 1); + + sta_set_shmem(st_gpa[cpu], 0); + GUEST_SYNC(0); + + check_status(st); + WRITE_ONCE(guest_stolen_time[cpu], st->steal); + sequence = READ_ONCE(st->sequence); + check_status(st); + GUEST_SYNC(1); + + check_status(st); + GUEST_ASSERT(sequence < READ_ONCE(st->sequence)); + WRITE_ONCE(guest_stolen_time[cpu], st->steal); + check_status(st); + GUEST_DONE(); +} + +static bool is_steal_time_supported(struct kvm_vcpu *vcpu) +{ + uint64_t id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA); + unsigned long enabled; + + vcpu_get_reg(vcpu, id, &enabled); + TEST_ASSERT(enabled == 0 || enabled == 1, "Expected boolean result"); + + return enabled; +} + +static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) +{ + /* ST_GPA_BASE is identity mapped */ + st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); + st_gpa[i] = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)st_gva[i]); + sync_global_to_guest(vcpu->vm, st_gva[i]); + sync_global_to_guest(vcpu->vm, st_gpa[i]); +} + +static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) +{ + struct sta_struct *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); + int i; + + pr_info("VCPU%d:\n", vcpu_idx); + pr_info(" sequence: %d\n", st->sequence); + pr_info(" flags: %d\n", st->flags); + pr_info(" steal: %"PRIu64"\n", st->steal); + pr_info(" preempted: %d\n", st->preempted); + pr_info(" pad: "); + for (i = 0; i < 47; ++i) + pr_info("%d", st->pad[i]); + pr_info("\n"); +} + #endif static void *do_steal_time(void *arg) diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c index 7f5b330b6a..513d421a9b 100644 --- a/tools/testing/selftests/kvm/system_counter_offset_test.c +++ b/tools/testing/selftests/kvm/system_counter_offset_test.c @@ -108,7 +108,7 @@ static void enter_guest(struct kvm_vcpu *vcpu) handle_abort(&uc); return; default: - TEST_ASSERT(0, "unhandled ucall %ld\n", + TEST_ASSERT(0, "unhandled ucall %ld", get_ucall(vcpu, &uc)); } } diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index 11329e5ff9..eae521f050 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -221,7 +221,7 @@ int main(int argc, char *argv[]) vm_vaddr_t amx_cfg, tiledata, xstate; struct ucall uc; u32 amx_offset; - int stage, ret; + int ret; /* * Note, all off-by-default features must be enabled before anything @@ -263,7 +263,7 @@ int main(int argc, char *argv[]) memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE)); vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate); - for (stage = 1; ; stage++) { + for (;;) { vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); @@ -296,7 +296,7 @@ int main(int argc, char *argv[]) void *tiles_data = (void *)addr_gva2hva(vm, tiledata); /* Only check TMM0 register, 1 tile */ ret = memcmp(amx_start, tiles_data, TILE_SIZE); - TEST_ASSERT(ret == 0, "memcmp failed, ret=%d\n", ret); + TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret); kvm_x86_state_cleanup(state); break; case 9: diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c index 3b34d8156d..8c579ce714 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -84,7 +84,7 @@ static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, TEST_ASSERT(e1->function == e2->function && e1->index == e2->index && e1->flags == e2->flags, - "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x\n", + "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x", i, e1->function, e1->index, e1->flags, e2->function, e2->index, e2->flags); @@ -170,7 +170,7 @@ static void test_get_cpuid2(struct kvm_vcpu *vcpu) vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid); TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent, - "KVM didn't update nent on success, wanted %u, got %u\n", + "KVM didn't update nent on success, wanted %u, got %u", vcpu->cpuid->nent, cpuid->nent); for (i = 0; i < vcpu->cpuid->nent; i++) { diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c index 634c6bfcd5..ee3b384b99 100644 --- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c +++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c @@ -92,7 +92,6 @@ static void run_test(enum vm_guest_mode mode, void *unused) uint64_t host_num_pages; uint64_t pages_per_slot; int i; - uint64_t total_4k_pages; struct kvm_page_stats stats_populated; struct kvm_page_stats stats_dirty_logging_enabled; struct kvm_page_stats stats_dirty_pass[ITERATIONS]; @@ -107,6 +106,9 @@ static void run_test(enum vm_guest_mode mode, void *unused) guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); host_num_pages = vm_num_host_pages(mode, guest_num_pages); pages_per_slot = host_num_pages / SLOTS; + TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS); + TEST_ASSERT(!(host_num_pages % 512), + "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages); bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot); @@ -165,10 +167,8 @@ static void run_test(enum vm_guest_mode mode, void *unused) memstress_free_bitmaps(bitmaps, SLOTS); memstress_destroy_vm(vm); - /* Make assertions about the page counts. */ - total_4k_pages = stats_populated.pages_4k; - total_4k_pages += stats_populated.pages_2m * 512; - total_4k_pages += stats_populated.pages_1g * 512 * 512; + TEST_ASSERT_EQ((stats_populated.pages_2m * 512 + + stats_populated.pages_1g * 512 * 512), host_num_pages); /* * Check that all huge pages were split. Since large pages can only @@ -180,19 +180,22 @@ static void run_test(enum vm_guest_mode mode, void *unused) */ if (dirty_log_manual_caps) { TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0); - TEST_ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages); + TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages, + "Expected at least '%lu' 4KiB pages, found only '%lu'", + host_num_pages, stats_clear_pass[0].pages_4k); TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages); } else { TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0); - TEST_ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages); + TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages, + "Expected at least '%lu' 4KiB pages, found only '%lu'", + host_num_pages, stats_dirty_logging_enabled.pages_4k); } /* * Once dirty logging is disabled and the vCPUs have touched all their - * memory again, the page counts should be the same as they were + * memory again, the hugepage counts should be the same as they were * right after initial population of memory. */ - TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k); TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m); TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g); } diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h index 0a1573d528..37b1a9f528 100644 --- a/tools/testing/selftests/kvm/x86_64/flds_emulation.h +++ b/tools/testing/selftests/kvm/x86_64/flds_emulation.h @@ -41,7 +41,7 @@ static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu) insn_bytes = run->emulation_failure.insn_bytes; TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0, - "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x\n", + "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x", insn_bytes[0], insn_bytes[1]); vcpu_regs_get(vcpu, ®s); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c index f25749eaa6..e058bc676c 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -211,6 +211,9 @@ int main(void) vm_vaddr_t tsc_page_gva; int stage; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME)); + TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); + vm = vm_create_with_one_vcpu(&vcpu, guest_main); vcpu_set_hv_cpuid(vcpu); @@ -218,7 +221,7 @@ int main(void) tsc_page_gva = vm_vaddr_alloc_page(vm); memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize()); TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, - "TSC page has to be page aligned\n"); + "TSC page has to be page aligned"); vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); host_check_tsc_msr_rdtsc(vcpu); @@ -235,7 +238,7 @@ int main(void) break; case UCALL_DONE: /* Keep in sync with guest_main() */ - TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n", + TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d", stage); goto out; default: diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c index 7bde0c4dfd..4c7257ecd2 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c @@ -240,11 +240,12 @@ int main(int argc, char *argv[]) struct ucall uc; int stage; - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); hcall_page = vm_vaddr_alloc_pages(vm, 1); memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c index e036db1f32..949e08e98f 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c @@ -43,6 +43,8 @@ int main(void) uint64_t *outval; struct ucall uc; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); + /* Verify if extended hypercalls are supported */ if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(), HV_ENABLE_EXTENDED_HYPERCALLS)) { diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 9f28aa276c..b923a285e9 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -55,18 +55,18 @@ static void guest_msr(struct msr_data *msr) if (msr->fault_expected) __GUEST_ASSERT(vector == GP_VECTOR, "Expected #GP on %sMSR(0x%x), got vector '0x%x'", - msr->idx, msr->write ? "WR" : "RD", vector); + msr->write ? "WR" : "RD", msr->idx, vector); else __GUEST_ASSERT(!vector, "Expected success on %sMSR(0x%x), got vector '0x%x'", - msr->idx, msr->write ? "WR" : "RD", vector); + msr->write ? "WR" : "RD", msr->idx, vector); if (vector || is_write_only_msr(msr->idx)) goto done; if (msr->write) __GUEST_ASSERT(!vector, - "WRMSR(0x%x) to '0x%llx', RDMSR read '0x%llx'", + "WRMSR(0x%x) to '0x%lx', RDMSR read '0x%lx'", msr->idx, msr->write_val, msr_val); /* Invariant TSC bit appears when TSC invariant control MSR is written to */ @@ -102,11 +102,11 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) vector = __hyperv_hypercall(hcall->control, input, output, &res); if (hcall->ud_expected) { __GUEST_ASSERT(vector == UD_VECTOR, - "Expected #UD for control '%u', got vector '0x%x'", + "Expected #UD for control '%lu', got vector '0x%x'", hcall->control, vector); } else { __GUEST_ASSERT(!vector, - "Expected no exception for control '%u', got vector '0x%x'", + "Expected no exception for control '%lu', got vector '0x%x'", hcall->control, vector); GUEST_ASSERT_EQ(res, hcall->expect); } @@ -454,7 +454,7 @@ static void guest_test_msrs_access(void) case 44: /* MSR is not available when CPUID feature bit is unset */ if (!has_invtsc) - continue; + goto next_stage; msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = false; msr->fault_expected = true; @@ -462,7 +462,7 @@ static void guest_test_msrs_access(void) case 45: /* MSR is vailable when CPUID feature bit is set */ if (!has_invtsc) - continue; + goto next_stage; vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT); msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = false; @@ -471,7 +471,7 @@ static void guest_test_msrs_access(void) case 46: /* Writing bits other than 0 is forbidden */ if (!has_invtsc) - continue; + goto next_stage; msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = true; msr->write_val = 0xdeadbeef; @@ -480,7 +480,7 @@ static void guest_test_msrs_access(void) case 47: /* Setting bit 0 enables the feature */ if (!has_invtsc) - continue; + goto next_stage; msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL; msr->write = true; msr->write_val = 1; @@ -513,6 +513,7 @@ static void guest_test_msrs_access(void) return; } +next_stage: stage++; kvm_vm_free(vm); } @@ -690,6 +691,8 @@ static void guest_test_hcalls_access(void) int main(void) { + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENFORCE_CPUID)); + pr_info("Testing access to Hyper-V specific MSRs\n"); guest_test_msrs_access(); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c index 6feb5ddb03..f1617762c2 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c @@ -248,6 +248,8 @@ int main(int argc, char *argv[]) int stage = 1, r; struct ucall uc; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_SEND_IPI)); + vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); /* Hypercall input/output */ @@ -287,7 +289,7 @@ int main(int argc, char *argv[]) switch (get_ucall(vcpu[0], &uc)) { case UCALL_SYNC: TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)\n", + "Unexpected stage: %ld (%d expected)", uc.args[1], stage); break; case UCALL_DONE: diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index 6c12785620..c9b18707ed 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -158,6 +158,7 @@ int main(int argc, char *argv[]) int stage; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH)); /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c index 4758b6ef56..05b56095cf 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c @@ -590,6 +590,8 @@ int main(int argc, char *argv[]) struct ucall uc; int stage = 1, r, i; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TLBFLUSH)); + vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code); /* Test data page */ @@ -656,7 +658,7 @@ int main(int argc, char *argv[]) switch (get_ucall(vcpu[0], &uc)) { case UCALL_SYNC: TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)\n", + "Unexpected stage: %ld (%d expected)", uc.args[1], stage); break; case UCALL_ABORT: diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c index 1778704360..5bc12222d8 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c @@ -92,7 +92,7 @@ static void setup_clock(struct kvm_vm *vm, struct test_case *test_case) break; } while (errno == EINTR); - TEST_ASSERT(!r, "clock_gettime() failed: %d\n", r); + TEST_ASSERT(!r, "clock_gettime() failed: %d", r); data.realtime = ts.tv_sec * NSEC_PER_SEC; data.realtime += ts.tv_nsec; @@ -127,47 +127,11 @@ static void enter_guest(struct kvm_vcpu *vcpu) handle_abort(&uc); return; default: - TEST_ASSERT(0, "unhandled ucall: %ld\n", uc.cmd); + TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd); } } } -#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource" - -static void check_clocksource(void) -{ - char *clk_name; - struct stat st; - FILE *fp; - - fp = fopen(CLOCKSOURCE_PATH, "r"); - if (!fp) { - pr_info("failed to open clocksource file: %d; assuming TSC.\n", - errno); - return; - } - - if (fstat(fileno(fp), &st)) { - pr_info("failed to stat clocksource file: %d; assuming TSC.\n", - errno); - goto out; - } - - clk_name = malloc(st.st_size); - TEST_ASSERT(clk_name, "failed to allocate buffer to read file\n"); - - if (!fgets(clk_name, st.st_size, fp)) { - pr_info("failed to read clocksource file: %d; assuming TSC.\n", - ferror(fp)); - goto out; - } - - TEST_ASSERT(!strncmp(clk_name, "tsc\n", st.st_size), - "clocksource not supported: %s", clk_name); -out: - fclose(fp); -} - int main(void) { struct kvm_vcpu *vcpu; @@ -179,7 +143,7 @@ int main(void) flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK); TEST_REQUIRE(flags & KVM_CLOCK_REALTIME); - check_clocksource(); + TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); vm = vm_create_with_one_vcpu(&vcpu, guest_main); diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c deleted file mode 100644 index ce1ccc4c15..0000000000 --- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * mmio_warning_test - * - * Copyright (C) 2019, Google LLC. - * - * This work is licensed under the terms of the GNU GPL, version 2. - * - * Test that we don't get a kernel warning when we call KVM_RUN after a - * triple fault occurs. To get the triple fault to occur we call KVM_RUN - * on a VCPU that hasn't been properly setup. - * - */ - -#define _GNU_SOURCE -#include <fcntl.h> -#include <kvm_util.h> -#include <linux/kvm.h> -#include <processor.h> -#include <pthread.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> -#include <sys/mman.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <test_util.h> -#include <unistd.h> - -#define NTHREAD 4 -#define NPROCESS 5 - -struct thread_context { - int kvmcpu; - struct kvm_run *run; -}; - -void *thr(void *arg) -{ - struct thread_context *tc = (struct thread_context *)arg; - int res; - int kvmcpu = tc->kvmcpu; - struct kvm_run *run = tc->run; - - res = ioctl(kvmcpu, KVM_RUN, 0); - pr_info("ret1=%d exit_reason=%d suberror=%d\n", - res, run->exit_reason, run->internal.suberror); - - return 0; -} - -void test(void) -{ - int i, kvm, kvmvm, kvmcpu; - pthread_t th[NTHREAD]; - struct kvm_run *run; - struct thread_context tc; - - kvm = open("/dev/kvm", O_RDWR); - TEST_ASSERT(kvm != -1, "failed to open /dev/kvm"); - kvmvm = __kvm_ioctl(kvm, KVM_CREATE_VM, NULL); - TEST_ASSERT(kvmvm > 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, kvmvm)); - kvmcpu = ioctl(kvmvm, KVM_CREATE_VCPU, 0); - TEST_ASSERT(kvmcpu != -1, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, kvmcpu)); - run = (struct kvm_run *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, - kvmcpu, 0); - tc.kvmcpu = kvmcpu; - tc.run = run; - srand(getpid()); - for (i = 0; i < NTHREAD; i++) { - pthread_create(&th[i], NULL, thr, (void *)(uintptr_t)&tc); - usleep(rand() % 10000); - } - for (i = 0; i < NTHREAD; i++) - pthread_join(th[i], NULL); -} - -int get_warnings_count(void) -{ - int warnings; - FILE *f; - - f = popen("dmesg | grep \"WARNING:\" | wc -l", "r"); - if (fscanf(f, "%d", &warnings) < 1) - warnings = 0; - pclose(f); - - return warnings; -} - -int main(void) -{ - int warnings_before, warnings_after; - - TEST_REQUIRE(host_cpu_is_intel); - - TEST_REQUIRE(!vm_is_unrestricted_guest(NULL)); - - warnings_before = get_warnings_count(); - - for (int i = 0; i < NPROCESS; ++i) { - int status; - int pid = fork(); - - if (pid < 0) - exit(1); - if (pid == 0) { - test(); - exit(0); - } - while (waitpid(pid, &status, __WALL) != pid) - ; - } - - warnings_after = get_warnings_count(); - TEST_ASSERT(warnings_before == warnings_after, - "Warnings found in kernel. Run 'dmesg' to inspect them."); - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c index 80aa3d8b18..853802641e 100644 --- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c @@ -27,10 +27,12 @@ do { \ \ if (fault_wanted) \ __GUEST_ASSERT((vector) == UD_VECTOR, \ - "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", vector); \ + "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \ + testcase, vector); \ else \ __GUEST_ASSERT(!(vector), \ - "Expected success on " insn " for testcase '0x%x', got '0x%x'", vector); \ + "Expected success on " insn " for testcase '0x%x', got '0x%x'", \ + testcase, vector); \ } while (0) static void guest_monitor_wait(int testcase) diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 83e25bccc1..17bbb96fc4 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -257,9 +257,9 @@ int main(int argc, char **argv) TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES)); __TEST_REQUIRE(token == MAGIC_TOKEN, - "This test must be run with the magic token %d.\n" - "This is done by nx_huge_pages_test.sh, which\n" - "also handles environment setup for the test.", MAGIC_TOKEN); + "This test must be run with the magic token via '-t %d'.\n" + "Running via nx_huge_pages_test.sh, which also handles " + "environment setup, is strongly recommended.", MAGIC_TOKEN); run_test(reclaim_period_ms, false, reboot_permissions); run_test(reclaim_period_ms, true, reboot_permissions); diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index c9a07963d6..87011965dc 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -44,7 +44,7 @@ static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu) get_ucall(vcpu, &uc); TEST_ASSERT(uc.cmd == UCALL_SYNC, - "Received ucall other than UCALL_SYNC: %lu\n", uc.cmd); + "Received ucall other than UCALL_SYNC: %lu", uc.cmd); TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) == MSR_PLATFORM_INFO_MAX_TURBO_RATIO, "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.", diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 283cc55597..a3bd54b925 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -866,7 +866,7 @@ static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx, * userspace doesn't set any pmu filter. */ count = run_vcpu_to_sync(vcpu); - TEST_ASSERT(count, "Unexpected count value: %ld\n", count); + TEST_ASSERT(count, "Unexpected count value: %ld", count); for (i = 0; i < BIT(nr_fixed_counters); i++) { bitmap = BIT(i); diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c new file mode 100644 index 0000000000..65ad38b6be --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c @@ -0,0 +1,482 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022, Google LLC. + */ +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <limits.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/kvm_para.h> +#include <linux/memfd.h> +#include <linux/sizes.h> + +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> + +#define BASE_DATA_SLOT 10 +#define BASE_DATA_GPA ((uint64_t)(1ull << 32)) +#define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE)) + +/* Horrific macro so that the line info is captured accurately :-( */ +#define memcmp_g(gpa, pattern, size) \ +do { \ + uint8_t *mem = (uint8_t *)gpa; \ + size_t i; \ + \ + for (i = 0; i < size; i++) \ + __GUEST_ASSERT(mem[i] == pattern, \ + "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x", \ + pattern, i, gpa + i, mem[i]); \ +} while (0) + +static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size) +{ + size_t i; + + for (i = 0; i < size; i++) + TEST_ASSERT(mem[i] == pattern, + "Host expected 0x%x at gpa 0x%lx, got 0x%x", + pattern, gpa + i, mem[i]); +} + +/* + * Run memory conversion tests with explicit conversion: + * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit + * to back/unback private memory. Subsequent accesses by guest to the gpa range + * will not cause exit to userspace. + * + * Test memory conversion scenarios with following steps: + * 1) Access private memory using private access and verify that memory contents + * are not visible to userspace. + * 2) Convert memory to shared using explicit conversions and ensure that + * userspace is able to access the shared regions. + * 3) Convert memory back to private using explicit conversions and ensure that + * userspace is again not able to access converted private regions. + */ + +#define GUEST_STAGE(o, s) { .offset = o, .size = s } + +enum ucall_syncs { + SYNC_SHARED, + SYNC_PRIVATE, +}; + +static void guest_sync_shared(uint64_t gpa, uint64_t size, + uint8_t current_pattern, uint8_t new_pattern) +{ + GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern); +} + +static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern) +{ + GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern); +} + +/* Arbitrary values, KVM doesn't care about the attribute flags. */ +#define MAP_GPA_SET_ATTRIBUTES BIT(0) +#define MAP_GPA_SHARED BIT(1) +#define MAP_GPA_DO_FALLOCATE BIT(2) + +static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared, + bool do_fallocate) +{ + uint64_t flags = MAP_GPA_SET_ATTRIBUTES; + + if (map_shared) + flags |= MAP_GPA_SHARED; + if (do_fallocate) + flags |= MAP_GPA_DO_FALLOCATE; + kvm_hypercall_map_gpa_range(gpa, size, flags); +} + +static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate) +{ + guest_map_mem(gpa, size, true, do_fallocate); +} + +static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate) +{ + guest_map_mem(gpa, size, false, do_fallocate); +} + +struct { + uint64_t offset; + uint64_t size; +} static const test_ranges[] = { + GUEST_STAGE(0, PAGE_SIZE), + GUEST_STAGE(0, SZ_2M), + GUEST_STAGE(PAGE_SIZE, PAGE_SIZE), + GUEST_STAGE(PAGE_SIZE, SZ_2M), + GUEST_STAGE(SZ_2M, PAGE_SIZE), +}; + +static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate) +{ + const uint8_t def_p = 0xaa; + const uint8_t init_p = 0xcc; + uint64_t j; + int i; + + /* Memory should be shared by default. */ + memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE); + memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE); + guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p); + + memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE); + + for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { + uint64_t gpa = base_gpa + test_ranges[i].offset; + uint64_t size = test_ranges[i].size; + uint8_t p1 = 0x11; + uint8_t p2 = 0x22; + uint8_t p3 = 0x33; + uint8_t p4 = 0x44; + + /* + * Set the test region to pattern one to differentiate it from + * the data range as a whole (contains the initial pattern). + */ + memset((void *)gpa, p1, size); + + /* + * Convert to private, set and verify the private data, and + * then verify that the rest of the data (map shared) still + * holds the initial pattern, and that the host always sees the + * shared memory (initial pattern). Unlike shared memory, + * punching a hole in private memory is destructive, i.e. + * previous values aren't guaranteed to be preserved. + */ + guest_map_private(gpa, size, do_fallocate); + + if (size > PAGE_SIZE) { + memset((void *)gpa, p2, PAGE_SIZE); + goto skip; + } + + memset((void *)gpa, p2, size); + guest_sync_private(gpa, size, p1); + + /* + * Verify that the private memory was set to pattern two, and + * that shared memory still holds the initial pattern. + */ + memcmp_g(gpa, p2, size); + if (gpa > base_gpa) + memcmp_g(base_gpa, init_p, gpa - base_gpa); + if (gpa + size < base_gpa + PER_CPU_DATA_SIZE) + memcmp_g(gpa + size, init_p, + (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size)); + + /* + * Convert odd-number page frames back to shared to verify KVM + * also correctly handles holes in private ranges. + */ + for (j = 0; j < size; j += PAGE_SIZE) { + if ((j >> PAGE_SHIFT) & 1) { + guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate); + guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3); + + memcmp_g(gpa + j, p3, PAGE_SIZE); + } else { + guest_sync_private(gpa + j, PAGE_SIZE, p1); + } + } + +skip: + /* + * Convert the entire region back to shared, explicitly write + * pattern three to fill in the even-number frames before + * asking the host to verify (and write pattern four). + */ + guest_map_shared(gpa, size, do_fallocate); + memset((void *)gpa, p3, size); + guest_sync_shared(gpa, size, p3, p4); + memcmp_g(gpa, p4, size); + + /* Reset the shared memory back to the initial pattern. */ + memset((void *)gpa, init_p, size); + + /* + * Free (via PUNCH_HOLE) *all* private memory so that the next + * iteration starts from a clean slate, e.g. with respect to + * whether or not there are pages/folios in guest_mem. + */ + guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true); + } +} + +static void guest_punch_hole(uint64_t gpa, uint64_t size) +{ + /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */ + uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE; + + kvm_hypercall_map_gpa_range(gpa, size, flags); +} + +/* + * Test that PUNCH_HOLE actually frees memory by punching holes without doing a + * proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating + * (subsequent fault) should zero memory. + */ +static void guest_test_punch_hole(uint64_t base_gpa, bool precise) +{ + const uint8_t init_p = 0xcc; + int i; + + /* + * Convert the entire range to private, this testcase is all about + * punching holes in guest_memfd, i.e. shared mappings aren't needed. + */ + guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false); + + for (i = 0; i < ARRAY_SIZE(test_ranges); i++) { + uint64_t gpa = base_gpa + test_ranges[i].offset; + uint64_t size = test_ranges[i].size; + + /* + * Free all memory before each iteration, even for the !precise + * case where the memory will be faulted back in. Freeing and + * reallocating should obviously work, and freeing all memory + * minimizes the probability of cross-testcase influence. + */ + guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE); + + /* Fault-in and initialize memory, and verify the pattern. */ + if (precise) { + memset((void *)gpa, init_p, size); + memcmp_g(gpa, init_p, size); + } else { + memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE); + memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE); + } + + /* + * Punch a hole at the target range and verify that reads from + * the guest succeed and return zeroes. + */ + guest_punch_hole(gpa, size); + memcmp_g(gpa, 0, size); + } +} + +static void guest_code(uint64_t base_gpa) +{ + /* + * Run the conversion test twice, with and without doing fallocate() on + * the guest_memfd backing when converting between shared and private. + */ + guest_test_explicit_conversion(base_gpa, false); + guest_test_explicit_conversion(base_gpa, true); + + /* + * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd + * faulted in, once with only the target range faulted in. + */ + guest_test_punch_hole(base_gpa, false); + guest_test_punch_hole(base_gpa, true); + GUEST_DONE(); +} + +static void handle_exit_hypercall(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + uint64_t gpa = run->hypercall.args[0]; + uint64_t size = run->hypercall.args[1] * PAGE_SIZE; + bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES; + bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED; + bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE; + struct kvm_vm *vm = vcpu->vm; + + TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE, + "Wanted MAP_GPA_RANGE (%u), got '%llu'", + KVM_HC_MAP_GPA_RANGE, run->hypercall.nr); + + if (do_fallocate) + vm_guest_mem_fallocate(vm, gpa, size, map_shared); + + if (set_attributes) + vm_set_memory_attributes(vm, gpa, size, + map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE); + run->hypercall.ret = 0; +} + +static bool run_vcpus; + +static void *__test_mem_conversions(void *__vcpu) +{ + struct kvm_vcpu *vcpu = __vcpu; + struct kvm_run *run = vcpu->run; + struct kvm_vm *vm = vcpu->vm; + struct ucall uc; + + while (!READ_ONCE(run_vcpus)) + ; + + for ( ;; ) { + vcpu_run(vcpu); + + if (run->exit_reason == KVM_EXIT_HYPERCALL) { + handle_exit_hypercall(vcpu); + continue; + } + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Wanted KVM_EXIT_IO, got exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: { + uint64_t gpa = uc.args[1]; + size_t size = uc.args[2]; + size_t i; + + TEST_ASSERT(uc.args[0] == SYNC_SHARED || + uc.args[0] == SYNC_PRIVATE, + "Unknown sync command '%ld'", uc.args[0]); + + for (i = 0; i < size; i += vm->page_size) { + size_t nr_bytes = min_t(size_t, vm->page_size, size - i); + uint8_t *hva = addr_gpa2hva(vm, gpa + i); + + /* In all cases, the host should observe the shared data. */ + memcmp_h(hva, gpa + i, uc.args[3], nr_bytes); + + /* For shared, write the new pattern to guest memory. */ + if (uc.args[0] == SYNC_SHARED) + memset(hva, uc.args[4], nr_bytes); + } + break; + } + case UCALL_DONE: + return NULL; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } + } +} + +static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus, + uint32_t nr_memslots) +{ + /* + * Allocate enough memory so that each vCPU's chunk of memory can be + * naturally aligned with respect to the size of the backing store. + */ + const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type)); + const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment); + const size_t memfd_size = per_cpu_size * nr_vcpus; + const size_t slot_size = memfd_size / nr_memslots; + struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; + pthread_t threads[KVM_MAX_VCPUS]; + struct kvm_vm *vm; + int memfd, i, r; + + const struct vm_shape shape = { + .mode = VM_MODE_DEFAULT, + .type = KVM_X86_SW_PROTECTED_VM, + }; + + TEST_ASSERT(slot_size * nr_memslots == memfd_size, + "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)", + memfd_size, nr_memslots); + vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus); + + vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE)); + + memfd = vm_create_guest_memfd(vm, memfd_size, 0); + + for (i = 0; i < nr_memslots; i++) + vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i, + BASE_DATA_SLOT + i, slot_size / vm->page_size, + KVM_MEM_GUEST_MEMFD, memfd, slot_size * i); + + for (i = 0; i < nr_vcpus; i++) { + uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size; + + vcpu_args_set(vcpus[i], 1, gpa); + + /* + * Map only what is needed so that an out-of-bounds access + * results #PF => SHUTDOWN instead of data corruption. + */ + virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size); + + pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]); + } + + WRITE_ONCE(run_vcpus, true); + + for (i = 0; i < nr_vcpus; i++) + pthread_join(threads[i], NULL); + + kvm_vm_free(vm); + + /* + * Allocate and free memory from the guest_memfd after closing the VM + * fd. The guest_memfd is gifted a reference to its owning VM, i.e. + * should prevent the VM from being fully destroyed until the last + * reference to the guest_memfd is also put. + */ + r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size); + TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); + + r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size); + TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); +} + +static void usage(const char *cmd) +{ + puts(""); + printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd); + puts(""); + backing_src_help("-s"); + puts(""); + puts(" -n: specify the number of vcpus (default: 1)"); + puts(""); + puts(" -m: specify the number of memslots (default: 1)"); + puts(""); +} + +int main(int argc, char *argv[]) +{ + enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC; + uint32_t nr_memslots = 1; + uint32_t nr_vcpus = 1; + int opt; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)); + + while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) { + switch (opt) { + case 's': + src_type = parse_backing_src_type(optarg); + break; + case 'n': + nr_vcpus = atoi_positive("nr_vcpus", optarg); + break; + case 'm': + nr_memslots = atoi_positive("nr_memslots", optarg); + break; + case 'h': + default: + usage(argv[0]); + exit(0); + } + } + + test_mem_conversions(src_type, nr_vcpus, nr_memslots); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c new file mode 100644 index 0000000000..13e72fcec8 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023, Google LLC. + */ +#include <linux/kvm.h> +#include <pthread.h> +#include <stdint.h> + +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + +/* Arbitrarily selected to avoid overlaps with anything else */ +#define EXITS_TEST_GVA 0xc0000000 +#define EXITS_TEST_GPA EXITS_TEST_GVA +#define EXITS_TEST_NPAGES 1 +#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE) +#define EXITS_TEST_SLOT 10 + +static uint64_t guest_repeatedly_read(void) +{ + volatile uint64_t value; + + while (true) + value = *((uint64_t *) EXITS_TEST_GVA); + + return value; +} + +static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu) +{ + int r; + + r = _vcpu_run(vcpu); + if (r) { + TEST_ASSERT(errno == EFAULT, KVM_IOCTL_ERROR(KVM_RUN, r)); + TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_MEMORY_FAULT); + } + return vcpu->run->exit_reason; +} + +const struct vm_shape protected_vm_shape = { + .mode = VM_MODE_DEFAULT, + .type = KVM_X86_SW_PROTECTED_VM, +}; + +static void test_private_access_memslot_deleted(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + pthread_t vm_thread; + void *thread_return; + uint32_t exit_reason; + + vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu, + guest_repeatedly_read); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + EXITS_TEST_GPA, EXITS_TEST_SLOT, + EXITS_TEST_NPAGES, + KVM_MEM_GUEST_MEMFD); + + virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES); + + /* Request to access page privately */ + vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE); + + pthread_create(&vm_thread, NULL, + (void *(*)(void *))run_vcpu_get_exit_reason, + (void *)vcpu); + + vm_mem_region_delete(vm, EXITS_TEST_SLOT); + + pthread_join(vm_thread, &thread_return); + exit_reason = (uint32_t)(uint64_t)thread_return; + + TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT); + TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE); + TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA); + TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE); + + kvm_vm_free(vm); +} + +static void test_private_access_memslot_not_private(void) +{ + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + uint32_t exit_reason; + + vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu, + guest_repeatedly_read); + + /* Add a non-private memslot (flags = 0) */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + EXITS_TEST_GPA, EXITS_TEST_SLOT, + EXITS_TEST_NPAGES, 0); + + virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES); + + /* Request to access page privately */ + vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE); + + exit_reason = run_vcpu_get_exit_reason(vcpu); + + TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT); + TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE); + TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA); + TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM)); + + test_private_access_memslot_deleted(); + test_private_access_memslot_not_private(); +} diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c index c7ef975610..a49828adf2 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c @@ -91,7 +91,7 @@ static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src) int ret; ret = __sev_migrate_from(dst, src); - TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno); + TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno); } static void test_sev_migrate_from(bool es) @@ -113,7 +113,7 @@ static void test_sev_migrate_from(bool es) /* Migrate the guest back to the original VM. */ ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]); TEST_ASSERT(ret == -1 && errno == EIO, - "VM that was migrated from should be dead. ret %d, errno: %d\n", ret, + "VM that was migrated from should be dead. ret %d, errno: %d", ret, errno); kvm_vm_free(src_vm); @@ -172,7 +172,7 @@ static void test_sev_migrate_parameters(void) vm_no_sev = aux_vm_create(true); ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev); TEST_ASSERT(ret == -1 && errno == EINVAL, - "Migrations require SEV enabled. ret %d, errno: %d\n", ret, + "Migrations require SEV enabled. ret %d, errno: %d", ret, errno); if (!have_sev_es) @@ -187,25 +187,25 @@ static void test_sev_migrate_parameters(void) ret = __sev_migrate_from(sev_vm, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d\n", + "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d", ret, errno); ret = __sev_migrate_from(sev_es_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d\n", + "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d", ret, errno); ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d\n", + "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d", ret, errno); ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa); TEST_ASSERT( ret == -1 && errno == EINVAL, - "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n", + "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d", ret, errno); kvm_vm_free(sev_vm); @@ -227,7 +227,7 @@ static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src) int ret; ret = __sev_mirror_create(dst, src); - TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno); + TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno); } static void verify_mirror_allowed_cmds(int vm_fd) @@ -259,7 +259,7 @@ static void verify_mirror_allowed_cmds(int vm_fd) ret = __sev_ioctl(vm_fd, cmd_id, NULL, &fw_error); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able call command: %d. ret: %d, errno: %d\n", + "Should not be able call command: %d. ret: %d, errno: %d", cmd_id, ret, errno); } @@ -301,18 +301,18 @@ static void test_sev_mirror_parameters(void) ret = __sev_mirror_create(sev_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able copy context to self. ret: %d, errno: %d\n", + "Should not be able copy context to self. ret: %d, errno: %d", ret, errno); ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu); TEST_ASSERT(ret == -1 && errno == EINVAL, - "Copy context requires SEV enabled. ret %d, errno: %d\n", ret, + "Copy context requires SEV enabled. ret %d, errno: %d", ret, errno); ret = __sev_mirror_create(vm_with_vcpu, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n", + "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d", ret, errno); if (!have_sev_es) @@ -322,13 +322,13 @@ static void test_sev_mirror_parameters(void) ret = __sev_mirror_create(sev_vm, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n", + "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d", ret, errno); ret = __sev_mirror_create(sev_es_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, - "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n", + "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d", ret, errno); kvm_vm_free(sev_es_vm); diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c index 06edf00a97..1a46dd7bb3 100644 --- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c +++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c @@ -74,7 +74,7 @@ int main(int argc, char *argv[]) MEM_REGION_SIZE / PAGE_SIZE, 0); gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE, MEM_REGION_GPA, MEM_REGION_SLOT); - TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n"); + TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc"); virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1); hva = addr_gpa2hva(vm, MEM_REGION_GPA); memset(hva, 0, PAGE_SIZE); @@ -102,7 +102,7 @@ int main(int argc, char *argv[]) case UCALL_DONE: break; default: - TEST_FAIL("Unrecognized ucall: %lu\n", uc.cmd); + TEST_FAIL("Unrecognized ucall: %lu", uc.cmd); } kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c index 7ee44496cf..0c7ce3d4e8 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c @@ -103,7 +103,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i run_guest(vmcb, svm->vmcb_gpa); __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL, - "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'", + "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'", vmcb->control.exit_code, vmcb->control.exit_info_1, vmcb->control.exit_info_2); @@ -133,7 +133,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i run_guest(vmcb, svm->vmcb_gpa); __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT, - "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'", + "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'", vmcb->control.exit_code, vmcb->control.exit_info_1, vmcb->control.exit_info_2); diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index 00965ba33f..a91b5b145f 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -46,7 +46,7 @@ static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) #define REG_COMPARE(reg) \ TEST_ASSERT(left->reg == right->reg, \ "Register " #reg \ - " values did not match: 0x%llx, 0x%llx\n", \ + " values did not match: 0x%llx, 0x%llx", \ left->reg, right->reg) REG_COMPARE(rax); REG_COMPARE(rbx); @@ -230,14 +230,14 @@ int main(int argc, char *argv[]) run->kvm_valid_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_valid_regs = 0; @@ -245,14 +245,14 @@ int main(int argc, char *argv[]) run->kvm_dirty_regs = INVALID_SYNC_FIELD; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, - "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d", rv); run->kvm_dirty_regs = 0; diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c index 85f34ca7e4..dcbb3c29fb 100644 --- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c +++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c @@ -143,7 +143,7 @@ static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC, - "Expect UCALL_SYNC\n"); + "Expect UCALL_SYNC"); TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected."); printf("vCPU received GP in guest.\n"); } @@ -188,7 +188,7 @@ static void *run_ucna_injection(void *arg) TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC, - "Expect UCALL_SYNC\n"); + "Expect UCALL_SYNC"); TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA."); printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR); @@ -198,7 +198,7 @@ static void *run_ucna_injection(void *arg) TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC, - "Expect UCALL_SYNC\n"); + "Expect UCALL_SYNC"); TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA."); printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR); @@ -208,7 +208,7 @@ static void *run_ucna_injection(void *arg) TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO); if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) { - TEST_ASSERT(false, "vCPU assertion failure: %s.\n", + TEST_ASSERT(false, "vCPU assertion failure: %s.", (const char *)uc.args[0]); } @@ -271,7 +271,7 @@ int main(int argc, char *argv[]) kvm_check_cap(KVM_CAP_MCE); - vm = __vm_create(VM_MODE_DEFAULT, 3, 0); + vm = __vm_create(VM_SHAPE_DEFAULT, 3, 0); kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED, &supported_mcg_caps); diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c index 255c50b0dc..9481cbcf28 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c @@ -71,7 +71,7 @@ int main(int argc, char *argv[]) break; TEST_ASSERT(run->io.port == 0x80, - "Expected I/O at port 0x80, got port 0x%x\n", run->io.port); + "Expected I/O at port 0x80, got port 0x%x", run->io.port); /* * Modify the rep string count in RCX: 2 => 1 and 3 => 8192. diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c index 2bed5fb3a0..a81a24761a 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c @@ -99,7 +99,7 @@ int main(int argc, char *argv[]) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); TEST_ASSERT(run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION, - "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n", + "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u", run->internal.suberror); break; } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index e4ad5fef52..7f6f5f23fb 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -128,17 +128,17 @@ int main(int argc, char *argv[]) */ kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); if (uc.args[1]) { - TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n"); - TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n"); + TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean"); + TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest"); } else { - TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n"); - TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n"); + TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty"); + TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest"); } - TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n"); - TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n"); - TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n"); - TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n"); + TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty"); + TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest"); + TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty"); + TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest"); break; case UCALL_DONE: done = true; diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c index a9b827c69f..fad3634fd9 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c @@ -28,7 +28,7 @@ static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu) TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR); TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, - "Expected emulation failure, got %d\n", + "Expected emulation failure, got %d", run->emulation_failure.suberror); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c index e710b6e7fb..1759fa5cb3 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c @@ -116,23 +116,6 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } -static bool system_has_stable_tsc(void) -{ - bool tsc_is_stable; - FILE *fp; - char buf[4]; - - fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r"); - if (fp == NULL) - return false; - - tsc_is_stable = fgets(buf, sizeof(buf), fp) && - !strncmp(buf, "tsc", sizeof(buf)); - - fclose(fp); - return tsc_is_stable; -} - int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; @@ -148,7 +131,7 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL)); - TEST_REQUIRE(system_has_stable_tsc()); + TEST_REQUIRE(sys_clocksource_is_based_on_tsc()); /* * We set L1's scale factor to be a random number from 2 to 10. diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index ebbcb0a3f7..2a8d4ac2f0 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -56,7 +56,7 @@ static void guest_test_perf_capabilities_gp(uint64_t val) uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val); __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP for value '0x%llx', got vector '0x%x'", + "Expected #GP for value '0x%lx', got vector '0x%x'", val, vector); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index 41ea7028a1..67a62a5a88 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -125,21 +125,25 @@ void test_vmx_nested_state(struct kvm_vcpu *vcpu) /* * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without - * setting the nested state but flags other than eVMCS must be clear. - * The eVMCS flag can be set if the enlightened VMCS capability has - * been enabled. + * setting the nested state. When the eVMCS flag is not set, the + * expected return value is '0'. */ set_default_vmx_state(state, state_sz); + state->flags = 0; state->hdr.vmx.vmxon_pa = -1ull; state->hdr.vmx.vmcs12_pa = -1ull; - test_nested_state_expect_einval(vcpu, state); + test_nested_state(vcpu, state); - state->flags &= KVM_STATE_NESTED_EVMCS; + /* + * When eVMCS is supported, the eVMCS flag can only be set if the + * enlightened VMCS capability has been enabled. + */ if (have_evmcs) { + state->flags = KVM_STATE_NESTED_EVMCS; test_nested_state_expect_einval(vcpu, state); vcpu_enable_evmcs(vcpu); + test_nested_state(vcpu, state); } - test_nested_state(vcpu, state); /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */ state->hdr.vmx.smm.flags = 1; diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c index 67ac2a3292..725c206ba0 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c @@ -216,7 +216,7 @@ static void *vcpu_thread(void *arg) "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n" "Halter TPR=%#x PPR=%#x LVR=%#x\n" "Migrations attempted: %lu\n" - "Migrations completed: %lu\n", + "Migrations completed: %lu", vcpu->id, (const char *)uc.args[0], params->data->ipis_sent, params->data->hlt_count, params->data->wake_count, @@ -288,7 +288,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, } TEST_ASSERT(nodes > 1, - "Did not find at least 2 numa nodes. Can't do migration\n"); + "Did not find at least 2 numa nodes. Can't do migration"); fprintf(stderr, "Migrating amongst %d nodes found\n", nodes); @@ -347,7 +347,7 @@ void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs, wake_count != data->wake_count, "IPI, HLT and wake count have not increased " "in the last %lu seconds. " - "HLTer is likely hung.\n", interval_secs); + "HLTer is likely hung.", interval_secs); ipis_sent = data->ipis_sent; hlt_count = data->hlt_count; @@ -381,7 +381,7 @@ void get_cmdline_args(int argc, char *argv[], int *run_secs, "-m adds calls to migrate_pages while vCPUs are running." " Default is no migrations.\n" "-d <delay microseconds> - delay between migrate_pages() calls." - " Default is %d microseconds.\n", + " Default is %d microseconds.", DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS); } } diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c index 77d04a7bda..25a0b0db5c 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c @@ -25,7 +25,7 @@ do { \ \ __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \ __supported == ((xfeatures) | (dependencies)), \ - "supported = 0x%llx, xfeatures = 0x%llx, dependencies = 0x%llx", \ + "supported = 0x%lx, xfeatures = 0x%llx, dependencies = 0x%llx", \ __supported, (xfeatures), (dependencies)); \ } while (0) @@ -42,7 +42,7 @@ do { \ uint64_t __supported = (supported_xcr0) & (xfeatures); \ \ __GUEST_ASSERT(!__supported || __supported == (xfeatures), \ - "supported = 0x%llx, xfeatures = 0x%llx", \ + "supported = 0x%lx, xfeatures = 0x%llx", \ __supported, (xfeatures)); \ } while (0) @@ -81,7 +81,7 @@ static void guest_code(void) vector = xsetbv_safe(0, supported_xcr0); __GUEST_ASSERT(!vector, - "Expected success on XSETBV(0x%llx), got vector '0x%x'", + "Expected success on XSETBV(0x%lx), got vector '0x%x'", supported_xcr0, vector); for (i = 0; i < 64; i++) { @@ -90,7 +90,7 @@ static void guest_code(void) vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i)); __GUEST_ASSERT(vector == GP_VECTOR, - "Expected #GP on XSETBV(0x%llx), supported XCR0 = %llx, got vector '0x%x'", + "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'", BIT_ULL(i), supported_xcr0, vector); } @@ -116,7 +116,7 @@ int main(int argc, char *argv[]) vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, - "Unexpected exit reason: %u (%s),\n", + "Unexpected exit reason: %u (%s),", run->exit_reason, exit_reason_str(run->exit_reason)); diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c index e0ddf47362..167c97abff 100644 --- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c +++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c @@ -29,7 +29,7 @@ int main(int argc, char *argv[]) xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS); TEST_ASSERT(xss_val == 0, - "MSR_IA32_XSS should be initialized to zero\n"); + "MSR_IA32_XSS should be initialized to zero"); vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val); diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index fde1a96ef9..2d6d9b43d9 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -591,7 +591,7 @@ TEST_F_FORK(layout1, file_and_dir_access_rights) ASSERT_EQ(0, close(ruleset_fd)); } -TEST_F_FORK(layout0, unknown_access_rights) +TEST_F_FORK(layout0, ruleset_with_unknown_access) { __u64 access_mask; @@ -607,6 +607,67 @@ TEST_F_FORK(layout0, unknown_access_rights) } } +TEST_F_FORK(layout0, rule_with_unknown_access) +{ + __u64 access; + struct landlock_path_beneath_attr path_beneath = {}; + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = ACCESS_ALL, + }; + const int ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + + ASSERT_LE(0, ruleset_fd); + + path_beneath.parent_fd = + open(TMP_DIR, O_PATH | O_DIRECTORY | O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd); + + for (access = 1ULL << 63; access != ACCESS_LAST; access >>= 1) { + path_beneath.allowed_access = access; + EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, + LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0)); + EXPECT_EQ(EINVAL, errno); + } + ASSERT_EQ(0, close(path_beneath.parent_fd)); + ASSERT_EQ(0, close(ruleset_fd)); +} + +TEST_F_FORK(layout1, rule_with_unhandled_access) +{ + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE, + }; + struct landlock_path_beneath_attr path_beneath = {}; + int ruleset_fd; + __u64 access; + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + path_beneath.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC); + ASSERT_LE(0, path_beneath.parent_fd); + + for (access = 1; access > 0; access <<= 1) { + int err; + + path_beneath.allowed_access = access; + err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, + &path_beneath, 0); + if (access == ruleset_attr.handled_access_fs) { + EXPECT_EQ(0, err); + } else { + EXPECT_EQ(-1, err); + EXPECT_EQ(EINVAL, errno); + } + } + + EXPECT_EQ(0, close(path_beneath.parent_fd)); + EXPECT_EQ(0, close(ruleset_fd)); +} + static void add_path_beneath(struct __test_metadata *const _metadata, const int ruleset_fd, const __u64 allowed_access, const char *const path) @@ -3629,7 +3690,7 @@ FIXTURE_TEARDOWN(ftruncate) FIXTURE_VARIANT(ftruncate) { const __u64 handled; - const __u64 permitted; + const __u64 allowed; const int expected_open_result; const int expected_ftruncate_result; }; @@ -3638,7 +3699,7 @@ FIXTURE_VARIANT(ftruncate) FIXTURE_VARIANT_ADD(ftruncate, w_w) { /* clang-format on */ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE, - .permitted = LANDLOCK_ACCESS_FS_WRITE_FILE, + .allowed = LANDLOCK_ACCESS_FS_WRITE_FILE, .expected_open_result = 0, .expected_ftruncate_result = 0, }; @@ -3647,7 +3708,7 @@ FIXTURE_VARIANT_ADD(ftruncate, w_w) { FIXTURE_VARIANT_ADD(ftruncate, t_t) { /* clang-format on */ .handled = LANDLOCK_ACCESS_FS_TRUNCATE, - .permitted = LANDLOCK_ACCESS_FS_TRUNCATE, + .allowed = LANDLOCK_ACCESS_FS_TRUNCATE, .expected_open_result = 0, .expected_ftruncate_result = 0, }; @@ -3656,7 +3717,7 @@ FIXTURE_VARIANT_ADD(ftruncate, t_t) { FIXTURE_VARIANT_ADD(ftruncate, wt_w) { /* clang-format on */ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE, - .permitted = LANDLOCK_ACCESS_FS_WRITE_FILE, + .allowed = LANDLOCK_ACCESS_FS_WRITE_FILE, .expected_open_result = 0, .expected_ftruncate_result = EACCES, }; @@ -3665,8 +3726,7 @@ FIXTURE_VARIANT_ADD(ftruncate, wt_w) { FIXTURE_VARIANT_ADD(ftruncate, wt_wt) { /* clang-format on */ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE, - .permitted = LANDLOCK_ACCESS_FS_WRITE_FILE | - LANDLOCK_ACCESS_FS_TRUNCATE, + .allowed = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE, .expected_open_result = 0, .expected_ftruncate_result = 0, }; @@ -3675,7 +3735,7 @@ FIXTURE_VARIANT_ADD(ftruncate, wt_wt) { FIXTURE_VARIANT_ADD(ftruncate, wt_t) { /* clang-format on */ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE, - .permitted = LANDLOCK_ACCESS_FS_TRUNCATE, + .allowed = LANDLOCK_ACCESS_FS_TRUNCATE, .expected_open_result = EACCES, }; @@ -3685,7 +3745,7 @@ TEST_F_FORK(ftruncate, open_and_ftruncate) const struct rule rules[] = { { .path = path, - .access = variant->permitted, + .access = variant->allowed, }, {}, }; @@ -3726,7 +3786,7 @@ TEST_F_FORK(ftruncate, open_and_ftruncate_in_different_processes) const struct rule rules[] = { { .path = path, - .access = variant->permitted, + .access = variant->allowed, }, {}, }; diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c index 4499b2736e..936cfc879f 100644 --- a/tools/testing/selftests/landlock/net_test.c +++ b/tools/testing/selftests/landlock/net_test.c @@ -1269,7 +1269,7 @@ TEST_F(mini, network_access_rights) } /* Checks invalid attribute, out of landlock network access range. */ -TEST_F(mini, unknown_access_rights) +TEST_F(mini, ruleset_with_unknown_access) { __u64 access_mask; @@ -1285,6 +1285,63 @@ TEST_F(mini, unknown_access_rights) } } +TEST_F(mini, rule_with_unknown_access) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = ACCESS_ALL, + }; + struct landlock_net_port_attr net_port = { + .port = sock_port_start, + }; + int ruleset_fd; + __u64 access; + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + for (access = 1ULL << 63; access != ACCESS_LAST; access >>= 1) { + net_port.allowed_access = access; + EXPECT_EQ(-1, + landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &net_port, 0)); + EXPECT_EQ(EINVAL, errno); + } + EXPECT_EQ(0, close(ruleset_fd)); +} + +TEST_F(mini, rule_with_unhandled_access) +{ + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP, + }; + struct landlock_net_port_attr net_port = { + .port = sock_port_start, + }; + int ruleset_fd; + __u64 access; + + ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, ruleset_fd); + + for (access = 1; access > 0; access <<= 1) { + int err; + + net_port.allowed_access = access; + err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &net_port, 0); + if (access == ruleset_attr.handled_access_net) { + EXPECT_EQ(0, err); + } else { + EXPECT_EQ(-1, err); + EXPECT_EQ(EINVAL, errno); + } + } + + EXPECT_EQ(0, close(ruleset_fd)); +} + TEST_F(mini, inval) { const struct landlock_ruleset_attr ruleset_attr = { diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index c8416c54b4..b1fd7362c2 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -42,17 +42,6 @@ function die() { exit 1 } -# save existing dmesg so we can detect new content -function save_dmesg() { - SAVED_DMESG=$(mktemp --tmpdir -t klp-dmesg-XXXXXX) - dmesg > "$SAVED_DMESG" -} - -# cleanup temporary dmesg file from save_dmesg() -function cleanup_dmesg_file() { - rm -f "$SAVED_DMESG" -} - function push_config() { DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \ awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}') @@ -99,7 +88,6 @@ function set_ftrace_enabled() { function cleanup() { pop_config - cleanup_dmesg_file } # setup_config - save the current config and set a script exit trap that @@ -280,7 +268,15 @@ function set_pre_patch_ret { function start_test { local test="$1" - save_dmesg + # Dump something unique into the dmesg log, then stash the entry + # in LAST_DMESG. The check_result() function will use it to + # find new kernel messages since the test started. + local last_dmesg_msg="livepatch kselftest timestamp: $(date --rfc-3339=ns)" + log "$last_dmesg_msg" + loop_until 'dmesg | grep -q "$last_dmesg_msg"' || + die "buffer busy? can't find canary dmesg message: $last_dmesg_msg" + LAST_DMESG=$(dmesg | grep "$last_dmesg_msg") + echo -n "TEST: $test ... " log "===== TEST: $test =====" } @@ -291,23 +287,24 @@ function check_result { local expect="$*" local result - # Note: when comparing dmesg output, the kernel log timestamps - # help differentiate repeated testing runs. Remove them with a - # post-comparison sed filter. - - result=$(dmesg | comm --nocheck-order -13 "$SAVED_DMESG" - | \ + # Test results include any new dmesg entry since LAST_DMESG, then: + # - include lines matching keywords + # - exclude lines matching keywords + # - filter out dmesg timestamp prefixes + result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \ grep -e 'livepatch:' -e 'test_klp' | \ grep -v '\(tainting\|taints\) kernel' | \ sed 's/^\[[ 0-9.]*\] //') if [[ "$expect" == "$result" ]] ; then echo "ok" + elif [[ "$result" == "" ]] ; then + echo -e "not ok\n\nbuffer overrun? can't find canary dmesg entry: $LAST_DMESG\n" + die "livepatch kselftest(s) failed" else echo -e "not ok\n\n$(diff -upr --label expected --label result <(echo "$expect") <(echo "$result"))\n" die "livepatch kselftest(s) failed" fi - - cleanup_dmesg_file } # check_sysfs_rights(modname, rel_path, expected_rights) - check sysfs diff --git a/tools/testing/selftests/lsm/.gitignore b/tools/testing/selftests/lsm/.gitignore new file mode 100644 index 0000000000..bd68f6c3fd --- /dev/null +++ b/tools/testing/selftests/lsm/.gitignore @@ -0,0 +1 @@ +/*_test diff --git a/tools/testing/selftests/lsm/Makefile b/tools/testing/selftests/lsm/Makefile new file mode 100644 index 0000000000..3f80c0bc09 --- /dev/null +++ b/tools/testing/selftests/lsm/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# First run: make -C ../../../.. headers_install + +CFLAGS += -Wall -O2 $(KHDR_INCLUDES) +LOCAL_HDRS += common.h + +TEST_GEN_PROGS := lsm_get_self_attr_test lsm_list_modules_test \ + lsm_set_self_attr_test + +include ../lib.mk + +$(OUTPUT)/lsm_get_self_attr_test: lsm_get_self_attr_test.c common.c +$(OUTPUT)/lsm_set_self_attr_test: lsm_set_self_attr_test.c common.c +$(OUTPUT)/lsm_list_modules_test: lsm_list_modules_test.c common.c + +EXTRA_CLEAN = $(OUTPUT)/common.o diff --git a/tools/testing/selftests/lsm/common.c b/tools/testing/selftests/lsm/common.c new file mode 100644 index 0000000000..9ad2589126 --- /dev/null +++ b/tools/testing/selftests/lsm/common.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux Security Module infrastructure tests + * + * Copyright © 2023 Casey Schaufler <casey@schaufler-ca.com> + */ + +#define _GNU_SOURCE +#include <linux/lsm.h> +#include <fcntl.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include "common.h" + +#define PROCATTR "/proc/self/attr/" + +int read_proc_attr(const char *attr, char *value, size_t size) +{ + int fd; + int len; + char *path; + + len = strlen(PROCATTR) + strlen(attr) + 1; + path = calloc(len, 1); + if (path == NULL) + return -1; + sprintf(path, "%s%s", PROCATTR, attr); + + fd = open(path, O_RDONLY); + free(path); + + if (fd < 0) + return -1; + len = read(fd, value, size); + + close(fd); + + /* Ensure value is terminated */ + if (len <= 0 || len == size) + return -1; + value[len] = '\0'; + + path = strchr(value, '\n'); + if (path) + *path = '\0'; + + return 0; +} + +int read_sysfs_lsms(char *lsms, size_t size) +{ + FILE *fp; + size_t red; + + fp = fopen("/sys/kernel/security/lsm", "r"); + if (fp == NULL) + return -1; + red = fread(lsms, 1, size, fp); + fclose(fp); + + if (red <= 0 || red == size) + return -1; + lsms[red] = '\0'; + return 0; +} + +int attr_lsm_count(void) +{ + char *names = calloc(sysconf(_SC_PAGESIZE), 1); + int count = 0; + + if (!names) + return 0; + + if (read_sysfs_lsms(names, sysconf(_SC_PAGESIZE))) + return 0; + + if (strstr(names, "selinux")) + count++; + if (strstr(names, "smack")) + count++; + if (strstr(names, "apparmor")) + count++; + + return count; +} diff --git a/tools/testing/selftests/lsm/common.h b/tools/testing/selftests/lsm/common.h new file mode 100644 index 0000000000..06d12110d2 --- /dev/null +++ b/tools/testing/selftests/lsm/common.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linux Security Module infrastructure tests + * + * Copyright © 2023 Casey Schaufler <casey@schaufler-ca.com> + */ + +#ifndef lsm_get_self_attr +static inline int lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx, + __u32 *size, __u32 flags) +{ + return syscall(__NR_lsm_get_self_attr, attr, ctx, size, flags); +} +#endif + +#ifndef lsm_set_self_attr +static inline int lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx, + __u32 size, __u32 flags) +{ + return syscall(__NR_lsm_set_self_attr, attr, ctx, size, flags); +} +#endif + +#ifndef lsm_list_modules +static inline int lsm_list_modules(__u64 *ids, __u32 *size, __u32 flags) +{ + return syscall(__NR_lsm_list_modules, ids, size, flags); +} +#endif + +extern int read_proc_attr(const char *attr, char *value, size_t size); +extern int read_sysfs_lsms(char *lsms, size_t size); +int attr_lsm_count(void); diff --git a/tools/testing/selftests/lsm/config b/tools/testing/selftests/lsm/config new file mode 100644 index 0000000000..1c0c4c020f --- /dev/null +++ b/tools/testing/selftests/lsm/config @@ -0,0 +1,3 @@ +CONFIG_SYSFS=y +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y diff --git a/tools/testing/selftests/lsm/lsm_get_self_attr_test.c b/tools/testing/selftests/lsm/lsm_get_self_attr_test.c new file mode 100644 index 0000000000..df215e4aa6 --- /dev/null +++ b/tools/testing/selftests/lsm/lsm_get_self_attr_test.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux Security Module infrastructure tests + * Tests for the lsm_get_self_attr system call + * + * Copyright © 2022 Casey Schaufler <casey@schaufler-ca.com> + */ + +#define _GNU_SOURCE +#include <linux/lsm.h> +#include <fcntl.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include "../kselftest_harness.h" +#include "common.h" + +static struct lsm_ctx *next_ctx(struct lsm_ctx *ctxp) +{ + void *vp; + + vp = (void *)ctxp + sizeof(*ctxp) + ctxp->ctx_len; + return (struct lsm_ctx *)vp; +} + +TEST(size_null_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + + ASSERT_NE(NULL, ctx); + errno = 0; + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, NULL, 0)); + ASSERT_EQ(EINVAL, errno); + + free(ctx); +} + +TEST(ctx_null_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + __u32 size = page_size; + int rc; + + rc = lsm_get_self_attr(LSM_ATTR_CURRENT, NULL, &size, 0); + + if (attr_lsm_count()) { + ASSERT_NE(-1, rc); + ASSERT_NE(1, size); + } else { + ASSERT_EQ(-1, rc); + } +} + +TEST(size_too_small_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + __u32 size = 1; + + ASSERT_NE(NULL, ctx); + errno = 0; + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, 0)); + if (attr_lsm_count()) { + ASSERT_EQ(E2BIG, errno); + } else { + ASSERT_EQ(EOPNOTSUPP, errno); + } + ASSERT_NE(1, size); + + free(ctx); +} + +TEST(flags_zero_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + __u64 *syscall_lsms = calloc(page_size, 1); + __u32 size; + int lsmcount; + int i; + + ASSERT_NE(NULL, ctx); + errno = 0; + size = page_size; + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, + LSM_FLAG_SINGLE)); + ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(page_size, size); + + lsmcount = syscall(__NR_lsm_list_modules, syscall_lsms, &size, 0); + ASSERT_LE(1, lsmcount); + ASSERT_NE(NULL, syscall_lsms); + + for (i = 0; i < lsmcount; i++) { + errno = 0; + size = page_size; + ctx->id = syscall_lsms[i]; + + if (syscall_lsms[i] == LSM_ID_SELINUX || + syscall_lsms[i] == LSM_ID_SMACK || + syscall_lsms[i] == LSM_ID_APPARMOR) { + ASSERT_EQ(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, + &size, LSM_FLAG_SINGLE)); + } else { + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, + &size, + LSM_FLAG_SINGLE)); + } + } + + free(ctx); +} + +TEST(flags_overset_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + __u32 size; + + ASSERT_NE(NULL, ctx); + + errno = 0; + size = page_size; + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, ctx, + &size, 0)); + ASSERT_EQ(EOPNOTSUPP, errno); + + errno = 0; + size = page_size; + ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, + LSM_FLAG_SINGLE | + (LSM_FLAG_SINGLE << 1))); + ASSERT_EQ(EINVAL, errno); + + free(ctx); +} + +TEST(basic_lsm_get_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + __u32 size = page_size; + struct lsm_ctx *ctx = calloc(page_size, 1); + struct lsm_ctx *tctx = NULL; + __u64 *syscall_lsms = calloc(page_size, 1); + char *attr = calloc(page_size, 1); + int cnt_current = 0; + int cnt_exec = 0; + int cnt_fscreate = 0; + int cnt_keycreate = 0; + int cnt_prev = 0; + int cnt_sockcreate = 0; + int lsmcount; + int count; + int i; + + ASSERT_NE(NULL, ctx); + ASSERT_NE(NULL, syscall_lsms); + + lsmcount = syscall(__NR_lsm_list_modules, syscall_lsms, &size, 0); + ASSERT_LE(1, lsmcount); + + for (i = 0; i < lsmcount; i++) { + switch (syscall_lsms[i]) { + case LSM_ID_SELINUX: + cnt_current++; + cnt_exec++; + cnt_fscreate++; + cnt_keycreate++; + cnt_prev++; + cnt_sockcreate++; + break; + case LSM_ID_SMACK: + cnt_current++; + break; + case LSM_ID_APPARMOR: + cnt_current++; + cnt_exec++; + cnt_prev++; + break; + default: + break; + } + } + + if (cnt_current) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, 0); + ASSERT_EQ(cnt_current, count); + tctx = ctx; + ASSERT_EQ(0, read_proc_attr("current", attr, page_size)); + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + if (cnt_exec) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_EXEC, ctx, &size, 0); + ASSERT_GE(cnt_exec, count); + if (count > 0) { + tctx = ctx; + if (read_proc_attr("exec", attr, page_size) == 0) + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + } + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + if (cnt_fscreate) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_FSCREATE, ctx, &size, 0); + ASSERT_GE(cnt_fscreate, count); + if (count > 0) { + tctx = ctx; + if (read_proc_attr("fscreate", attr, page_size) == 0) + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + } + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + if (cnt_keycreate) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_KEYCREATE, ctx, &size, 0); + ASSERT_GE(cnt_keycreate, count); + if (count > 0) { + tctx = ctx; + if (read_proc_attr("keycreate", attr, page_size) == 0) + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + } + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + if (cnt_prev) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_PREV, ctx, &size, 0); + ASSERT_GE(cnt_prev, count); + if (count > 0) { + tctx = ctx; + ASSERT_EQ(0, read_proc_attr("prev", attr, page_size)); + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + } + if (cnt_sockcreate) { + size = page_size; + count = lsm_get_self_attr(LSM_ATTR_SOCKCREATE, ctx, &size, 0); + ASSERT_GE(cnt_sockcreate, count); + if (count > 0) { + tctx = ctx; + if (read_proc_attr("sockcreate", attr, page_size) == 0) + ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr)); + } + for (i = 1; i < count; i++) { + tctx = next_ctx(tctx); + ASSERT_NE(0, strcmp((char *)tctx->ctx, attr)); + } + } + + free(ctx); + free(attr); + free(syscall_lsms); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/lsm/lsm_list_modules_test.c b/tools/testing/selftests/lsm/lsm_list_modules_test.c new file mode 100644 index 0000000000..868641dbb3 --- /dev/null +++ b/tools/testing/selftests/lsm/lsm_list_modules_test.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux Security Module infrastructure tests + * Tests for the lsm_list_modules system call + * + * Copyright © 2022 Casey Schaufler <casey@schaufler-ca.com> + */ + +#define _GNU_SOURCE +#include <linux/lsm.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include "../kselftest_harness.h" +#include "common.h" + +TEST(size_null_lsm_list_modules) +{ + const long page_size = sysconf(_SC_PAGESIZE); + __u64 *syscall_lsms = calloc(page_size, 1); + + ASSERT_NE(NULL, syscall_lsms); + errno = 0; + ASSERT_EQ(-1, lsm_list_modules(syscall_lsms, NULL, 0)); + ASSERT_EQ(EFAULT, errno); + + free(syscall_lsms); +} + +TEST(ids_null_lsm_list_modules) +{ + const long page_size = sysconf(_SC_PAGESIZE); + __u32 size = page_size; + + errno = 0; + ASSERT_EQ(-1, lsm_list_modules(NULL, &size, 0)); + ASSERT_EQ(EFAULT, errno); + ASSERT_NE(1, size); +} + +TEST(size_too_small_lsm_list_modules) +{ + const long page_size = sysconf(_SC_PAGESIZE); + __u64 *syscall_lsms = calloc(page_size, 1); + __u32 size = 1; + + ASSERT_NE(NULL, syscall_lsms); + errno = 0; + ASSERT_EQ(-1, lsm_list_modules(syscall_lsms, &size, 0)); + ASSERT_EQ(E2BIG, errno); + ASSERT_NE(1, size); + + free(syscall_lsms); +} + +TEST(flags_set_lsm_list_modules) +{ + const long page_size = sysconf(_SC_PAGESIZE); + __u64 *syscall_lsms = calloc(page_size, 1); + __u32 size = page_size; + + ASSERT_NE(NULL, syscall_lsms); + errno = 0; + ASSERT_EQ(-1, lsm_list_modules(syscall_lsms, &size, 7)); + ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(page_size, size); + + free(syscall_lsms); +} + +TEST(correct_lsm_list_modules) +{ + const long page_size = sysconf(_SC_PAGESIZE); + __u32 size = page_size; + __u64 *syscall_lsms = calloc(page_size, 1); + char *sysfs_lsms = calloc(page_size, 1); + char *name; + char *cp; + int count; + int i; + + ASSERT_NE(NULL, sysfs_lsms); + ASSERT_NE(NULL, syscall_lsms); + ASSERT_EQ(0, read_sysfs_lsms(sysfs_lsms, page_size)); + + count = lsm_list_modules(syscall_lsms, &size, 0); + ASSERT_LE(1, count); + cp = sysfs_lsms; + for (i = 0; i < count; i++) { + switch (syscall_lsms[i]) { + case LSM_ID_CAPABILITY: + name = "capability"; + break; + case LSM_ID_SELINUX: + name = "selinux"; + break; + case LSM_ID_SMACK: + name = "smack"; + break; + case LSM_ID_TOMOYO: + name = "tomoyo"; + break; + case LSM_ID_APPARMOR: + name = "apparmor"; + break; + case LSM_ID_YAMA: + name = "yama"; + break; + case LSM_ID_LOADPIN: + name = "loadpin"; + break; + case LSM_ID_SAFESETID: + name = "safesetid"; + break; + case LSM_ID_LOCKDOWN: + name = "lockdown"; + break; + case LSM_ID_BPF: + name = "bpf"; + break; + case LSM_ID_LANDLOCK: + name = "landlock"; + break; + default: + name = "INVALID"; + break; + } + ASSERT_EQ(0, strncmp(cp, name, strlen(name))); + cp += strlen(name) + 1; + } + + free(sysfs_lsms); + free(syscall_lsms); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c new file mode 100644 index 0000000000..66dec47e3c --- /dev/null +++ b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux Security Module infrastructure tests + * Tests for the lsm_set_self_attr system call + * + * Copyright © 2022 Casey Schaufler <casey@schaufler-ca.com> + */ + +#define _GNU_SOURCE +#include <linux/lsm.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include "../kselftest_harness.h" +#include "common.h" + +TEST(ctx_null_lsm_set_self_attr) +{ + ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT, NULL, + sizeof(struct lsm_ctx), 0)); +} + +TEST(size_too_small_lsm_set_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + __u32 size = page_size; + + ASSERT_NE(NULL, ctx); + if (attr_lsm_count()) { + ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, + 0)); + } + ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT, ctx, 1, 0)); + + free(ctx); +} + +TEST(flags_zero_lsm_set_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + struct lsm_ctx *ctx = calloc(page_size, 1); + __u32 size = page_size; + + ASSERT_NE(NULL, ctx); + if (attr_lsm_count()) { + ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, + 0)); + } + ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT, ctx, size, 1)); + + free(ctx); +} + +TEST(flags_overset_lsm_set_self_attr) +{ + const long page_size = sysconf(_SC_PAGESIZE); + char *ctx = calloc(page_size, 1); + __u32 size = page_size; + struct lsm_ctx *tctx = (struct lsm_ctx *)ctx; + + ASSERT_NE(NULL, ctx); + if (attr_lsm_count()) { + ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, tctx, &size, + 0)); + } + ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, tctx, + size, 0)); + + free(ctx); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index dede0bcf97..2453add65d 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -117,8 +117,8 @@ TEST_FILES += va_high_addr_switch.sh include ../lib.mk -$(TEST_GEN_PROGS): vm_util.c -$(TEST_GEN_FILES): vm_util.c +$(TEST_GEN_PROGS): vm_util.c thp_settings.c +$(TEST_GEN_FILES): vm_util.c thp_settings.c $(OUTPUT)/uffd-stress: uffd-common.c $(OUTPUT)/uffd-unit-tests: uffd-common.c diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index 9b420140ba..656afba02d 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -33,7 +33,7 @@ int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize) FILE *cmdfile = popen(cmd, "r"); if (!(fgets(buffer, sizeof(buffer), cmdfile))) { - perror("Failed to read meminfo\n"); + ksft_print_msg("Failed to read meminfo: %s\n", strerror(errno)); return -1; } @@ -44,7 +44,7 @@ int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize) cmdfile = popen(cmd, "r"); if (!(fgets(buffer, sizeof(buffer), cmdfile))) { - perror("Failed to read meminfo\n"); + ksft_print_msg("Failed to read meminfo: %s\n", strerror(errno)); return -1; } @@ -62,14 +62,14 @@ int prereq(void) fd = open("/proc/sys/vm/compact_unevictable_allowed", O_RDONLY | O_NONBLOCK); if (fd < 0) { - perror("Failed to open\n" - "/proc/sys/vm/compact_unevictable_allowed\n"); + ksft_print_msg("Failed to open /proc/sys/vm/compact_unevictable_allowed: %s\n", + strerror(errno)); return -1; } if (read(fd, &allowed, sizeof(char)) != sizeof(char)) { - perror("Failed to read from\n" - "/proc/sys/vm/compact_unevictable_allowed\n"); + ksft_print_msg("Failed to read from /proc/sys/vm/compact_unevictable_allowed: %s\n", + strerror(errno)); close(fd); return -1; } @@ -78,12 +78,13 @@ int prereq(void) if (allowed == '1') return 0; + ksft_print_msg("Compaction isn't allowed\n"); return -1; } int check_compaction(unsigned long mem_free, unsigned int hugepage_size) { - int fd; + int fd, ret = -1; int compaction_index = 0; char initial_nr_hugepages[10] = {0}; char nr_hugepages[10] = {0}; @@ -94,18 +95,21 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK); if (fd < 0) { - perror("Failed to open /proc/sys/vm/nr_hugepages"); + ksft_test_result_fail("Failed to open /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); return -1; } if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) { - perror("Failed to read from /proc/sys/vm/nr_hugepages"); + ksft_test_result_fail("Failed to read from /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); goto close_fd; } /* Start with the initial condition of 0 huge pages*/ if (write(fd, "0", sizeof(char)) != sizeof(char)) { - perror("Failed to write 0 to /proc/sys/vm/nr_hugepages\n"); + ksft_test_result_fail("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); goto close_fd; } @@ -114,14 +118,16 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) /* Request a large number of huge pages. The Kernel will allocate as much as it can */ if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { - perror("Failed to write 100000 to /proc/sys/vm/nr_hugepages\n"); + ksft_test_result_fail("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); goto close_fd; } lseek(fd, 0, SEEK_SET); if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) { - perror("Failed to re-read from /proc/sys/vm/nr_hugepages\n"); + ksft_test_result_fail("Failed to re-read from /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); goto close_fd; } @@ -129,67 +135,58 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) huge pages */ compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size); - if (compaction_index > 3) { - printf("No of huge pages allocated = %d\n", - (atoi(nr_hugepages))); - fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n" - "as huge pages\n", compaction_index); - goto close_fd; - } - - printf("No of huge pages allocated = %d\n", - (atoi(nr_hugepages))); - lseek(fd, 0, SEEK_SET); if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages)) != strlen(initial_nr_hugepages)) { - perror("Failed to write value to /proc/sys/vm/nr_hugepages\n"); + ksft_test_result_fail("Failed to write value to /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); goto close_fd; } - close(fd); - return 0; + if (compaction_index > 3) { + ksft_print_msg("ERROR: Less that 1/%d of memory is available\n" + "as huge pages\n", compaction_index); + ksft_test_result_fail("No of huge pages allocated = %d\n", (atoi(nr_hugepages))); + goto close_fd; + } + + ksft_test_result_pass("Memory compaction succeeded. No of huge pages allocated = %d\n", + (atoi(nr_hugepages))); + ret = 0; close_fd: close(fd); - printf("Not OK. Compaction test failed."); - return -1; + return ret; } int main(int argc, char **argv) { struct rlimit lim; - struct map_list *list, *entry; + struct map_list *list = NULL, *entry; size_t page_size, i; void *map = NULL; unsigned long mem_free = 0; unsigned long hugepage_size = 0; long mem_fragmentable_MB = 0; - if (prereq() != 0) { - printf("Either the sysctl compact_unevictable_allowed is not\n" - "set to 1 or couldn't read the proc file.\n" - "Skipping the test\n"); - return KSFT_SKIP; - } + ksft_print_header(); + + if (prereq() || geteuid()) + return ksft_exit_pass(); + + ksft_set_plan(1); lim.rlim_cur = RLIM_INFINITY; lim.rlim_max = RLIM_INFINITY; - if (setrlimit(RLIMIT_MEMLOCK, &lim)) { - perror("Failed to set rlimit:\n"); - return -1; - } + if (setrlimit(RLIMIT_MEMLOCK, &lim)) + ksft_exit_fail_msg("Failed to set rlimit: %s\n", strerror(errno)); page_size = getpagesize(); - list = NULL; - - if (read_memory_info(&mem_free, &hugepage_size) != 0) { - printf("ERROR: Cannot read meminfo\n"); - return -1; - } + if (read_memory_info(&mem_free, &hugepage_size) != 0) + ksft_exit_fail_msg("Failed to get meminfo\n"); mem_fragmentable_MB = mem_free * 0.8 / 1024; @@ -225,7 +222,7 @@ int main(int argc, char **argv) } if (check_compaction(mem_free, hugepage_size) == 0) - return 0; + return ksft_exit_pass(); - return -1; + return ksft_exit_fail(); } diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 6f2f839904..363bf5f801 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -29,15 +29,49 @@ #include "../../../../mm/gup_test.h" #include "../kselftest.h" #include "vm_util.h" +#include "thp_settings.h" static size_t pagesize; static int pagemap_fd; -static size_t thpsize; +static size_t pmdsize; +static int nr_thpsizes; +static size_t thpsizes[20]; static int nr_hugetlbsizes; static size_t hugetlbsizes[10]; static int gup_fd; static bool has_huge_zeropage; +static int sz2ord(size_t size) +{ + return __builtin_ctzll(size / pagesize); +} + +static int detect_thp_sizes(size_t sizes[], int max) +{ + int count = 0; + unsigned long orders; + size_t kb; + int i; + + /* thp not supported at all. */ + if (!pmdsize) + return 0; + + orders = 1UL << sz2ord(pmdsize); + orders |= thp_supported_orders(); + + for (i = 0; orders && count < max; i++) { + if (!(orders & (1UL << i))) + continue; + orders &= ~(1UL << i); + kb = (pagesize >> 10) << i; + sizes[count++] = kb * 1024; + ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb); + } + + return count; +} + static void detect_huge_zeropage(void) { int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page", @@ -734,7 +768,7 @@ enum thp_run { THP_RUN_PARTIAL_SHARED, }; -static void do_run_with_thp(test_fn fn, enum thp_run thp_run) +static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) { char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED; size_t size, mmap_size, mremap_size; @@ -759,11 +793,11 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run) } /* - * Try to populate a THP. Touch the first sub-page and test if we get - * another sub-page populated automatically. + * Try to populate a THP. Touch the first sub-page and test if + * we get the last sub-page populated automatically. */ mem[0] = 0; - if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) { + if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { ksft_test_result_skip("Did not get a THP populated\n"); goto munmap; } @@ -773,12 +807,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run) switch (thp_run) { case THP_RUN_PMD: case THP_RUN_PMD_SWAPOUT: + assert(thpsize == pmdsize); break; case THP_RUN_PTE: case THP_RUN_PTE_SWAPOUT: /* * Trigger PTE-mapping the THP by temporarily mapping a single - * subpage R/O. + * subpage R/O. This is a noop if the THP is not pmdsize (and + * therefore already PTE-mapped). */ ret = mprotect(mem + pagesize, pagesize, PROT_READ); if (ret) { @@ -875,52 +911,60 @@ munmap: munmap(mremap_mem, mremap_size); } -static void run_with_thp(test_fn fn, const char *desc) +static void run_with_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with THP\n", desc); - do_run_with_thp(fn, THP_RUN_PMD); + ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PMD, size); } -static void run_with_thp_swap(test_fn fn, const char *desc) +static void run_with_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc); - do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT); + ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size); } -static void run_with_pte_mapped_thp(test_fn fn, const char *desc) +static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc); - do_run_with_thp(fn, THP_RUN_PTE); + ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PTE, size); } -static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc) +static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc); - do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT); + ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size); } -static void run_with_single_pte_of_thp(test_fn fn, const char *desc) +static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc); - do_run_with_thp(fn, THP_RUN_SINGLE_PTE); + ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size); } -static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc) +static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc); - do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT); + ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size); } -static void run_with_partial_mremap_thp(test_fn fn, const char *desc) +static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc); - do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP); + ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size); } -static void run_with_partial_shared_thp(test_fn fn, const char *desc) +static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc); - do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED); + ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n", + desc, size / 1024); + do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size); } static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) @@ -1091,15 +1135,27 @@ static void run_anon_test_case(struct test_case const *test_case) run_with_base_page(test_case->fn, test_case->desc); run_with_base_page_swap(test_case->fn, test_case->desc); - if (thpsize) { - run_with_thp(test_case->fn, test_case->desc); - run_with_thp_swap(test_case->fn, test_case->desc); - run_with_pte_mapped_thp(test_case->fn, test_case->desc); - run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc); - run_with_single_pte_of_thp(test_case->fn, test_case->desc); - run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc); - run_with_partial_mremap_thp(test_case->fn, test_case->desc); - run_with_partial_shared_thp(test_case->fn, test_case->desc); + for (i = 0; i < nr_thpsizes; i++) { + size_t size = thpsizes[i]; + struct thp_settings settings = *thp_current_settings(); + + settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER; + settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; + thp_push_settings(&settings); + + if (size == pmdsize) { + run_with_thp(test_case->fn, test_case->desc, size); + run_with_thp_swap(test_case->fn, test_case->desc, size); + } + + run_with_pte_mapped_thp(test_case->fn, test_case->desc, size); + run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size); + run_with_single_pte_of_thp(test_case->fn, test_case->desc, size); + run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size); + run_with_partial_mremap_thp(test_case->fn, test_case->desc, size); + run_with_partial_shared_thp(test_case->fn, test_case->desc, size); + + thp_pop_settings(); } for (i = 0; i < nr_hugetlbsizes; i++) run_with_hugetlb(test_case->fn, test_case->desc, @@ -1120,8 +1176,9 @@ static int tests_per_anon_test_case(void) { int tests = 2 + nr_hugetlbsizes; - if (thpsize) - tests += 8; + tests += 6 * nr_thpsizes; + if (pmdsize) + tests += 2; return tests; } @@ -1329,7 +1386,7 @@ static void run_anon_thp_test_cases(void) { int i; - if (!thpsize) + if (!pmdsize) return; ksft_print_msg("[INFO] Anonymous THP tests\n"); @@ -1338,13 +1395,13 @@ static void run_anon_thp_test_cases(void) struct test_case const *test_case = &anon_thp_test_cases[i]; ksft_print_msg("[RUN] %s\n", test_case->desc); - do_run_with_thp(test_case->fn, THP_RUN_PMD); + do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize); } } static int tests_per_anon_thp_test_case(void) { - return thpsize ? 1 : 0; + return pmdsize ? 1 : 0; } typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size); @@ -1419,7 +1476,7 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) } /* For alignment purposes, we need twice the thp size. */ - mmap_size = 2 * thpsize; + mmap_size = 2 * pmdsize; mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_mem == MAP_FAILED) { @@ -1434,11 +1491,11 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) } /* We need a THP-aligned memory area. */ - mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1)); - smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1)); + mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1)); + smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); - ret = madvise(mem, thpsize, MADV_HUGEPAGE); - ret |= madvise(smem, thpsize, MADV_HUGEPAGE); + ret = madvise(mem, pmdsize, MADV_HUGEPAGE); + ret |= madvise(smem, pmdsize, MADV_HUGEPAGE); if (ret) { ksft_test_result_fail("MADV_HUGEPAGE failed\n"); goto munmap; @@ -1457,7 +1514,7 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) goto munmap; } - fn(mem, smem, thpsize); + fn(mem, smem, pmdsize); munmap: munmap(mmap_mem, mmap_size); if (mmap_smem != MAP_FAILED) @@ -1650,7 +1707,7 @@ static void run_non_anon_test_case(struct non_anon_test_case const *test_case) run_with_zeropage(test_case->fn, test_case->desc); run_with_memfd(test_case->fn, test_case->desc); run_with_tmpfile(test_case->fn, test_case->desc); - if (thpsize) + if (pmdsize) run_with_huge_zeropage(test_case->fn, test_case->desc); for (i = 0; i < nr_hugetlbsizes; i++) run_with_memfd_hugetlb(test_case->fn, test_case->desc, @@ -1671,7 +1728,7 @@ static int tests_per_non_anon_test_case(void) { int tests = 3 + nr_hugetlbsizes; - if (thpsize) + if (pmdsize) tests += 1; return tests; } @@ -1679,14 +1736,23 @@ static int tests_per_non_anon_test_case(void) int main(int argc, char **argv) { int err; + struct thp_settings default_settings; ksft_print_header(); pagesize = getpagesize(); - thpsize = read_pmd_pagesize(); - if (thpsize) - ksft_print_msg("[INFO] detected THP size: %zu KiB\n", - thpsize / 1024); + pmdsize = read_pmd_pagesize(); + if (pmdsize) { + /* Only if THP is supported. */ + thp_read_settings(&default_settings); + default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT; + thp_save_settings(); + thp_push_settings(&default_settings); + + ksft_print_msg("[INFO] detected PMD size: %zu KiB\n", + pmdsize / 1024); + nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes)); + } nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, ARRAY_SIZE(hugetlbsizes)); detect_huge_zeropage(); @@ -1704,6 +1770,11 @@ int main(int argc, char **argv) run_anon_thp_test_cases(); run_non_anon_test_cases(); + if (pmdsize) { + /* Only if THP is supported. */ + thp_restore_settings(); + } + err = ksft_get_fail_cnt(); if (err) ksft_exit_fail_msg("%d out of %d tests failed\n", diff --git a/tools/testing/selftests/mm/hugepage-mmap.c b/tools/testing/selftests/mm/hugepage-mmap.c index 955ef87f38..267eea2e0e 100644 --- a/tools/testing/selftests/mm/hugepage-mmap.c +++ b/tools/testing/selftests/mm/hugepage-mmap.c @@ -22,6 +22,7 @@ #include <unistd.h> #include <sys/mman.h> #include <fcntl.h> +#include "../kselftest.h" #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) @@ -37,7 +38,7 @@ static void check_bytes(char *addr) { - printf("First hex is %x\n", *((unsigned int *)addr)); + ksft_print_msg("First hex is %x\n", *((unsigned int *)addr)); } static void write_bytes(char *addr) @@ -55,7 +56,7 @@ static int read_bytes(char *addr) check_bytes(addr); for (i = 0; i < LENGTH; i++) if (*(addr + i) != (char)i) { - printf("Mismatch at %lu\n", i); + ksft_print_msg("Error: Mismatch at %lu\n", i); return 1; } return 0; @@ -66,20 +67,20 @@ int main(void) void *addr; int fd, ret; + ksft_print_header(); + ksft_set_plan(1); + fd = memfd_create("hugepage-mmap", MFD_HUGETLB); - if (fd < 0) { - perror("memfd_create() failed"); - exit(1); - } + if (fd < 0) + ksft_exit_fail_msg("memfd_create() failed: %s\n", strerror(errno)); addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0); if (addr == MAP_FAILED) { - perror("mmap"); close(fd); - exit(1); + ksft_exit_fail_msg("mmap(): %s\n", strerror(errno)); } - printf("Returned address is %p\n", addr); + ksft_print_msg("Returned address is %p\n", addr); check_bytes(addr); write_bytes(addr); ret = read_bytes(addr); @@ -87,5 +88,7 @@ int main(void) munmap(addr, LENGTH); close(fd); - return ret; + ksft_test_result(!ret, "Read same data\n"); + + ksft_exit(!ret); } diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c index cabd0084f5..c463d1c09c 100644 --- a/tools/testing/selftests/mm/hugepage-mremap.c +++ b/tools/testing/selftests/mm/hugepage-mremap.c @@ -24,6 +24,7 @@ #include <sys/ioctl.h> #include <string.h> #include <stdbool.h> +#include "../kselftest.h" #include "vm_util.h" #define DEFAULT_LENGTH_MB 10UL @@ -34,7 +35,7 @@ static void check_bytes(char *addr) { - printf("First hex is %x\n", *((unsigned int *)addr)); + ksft_print_msg("First hex is %x\n", *((unsigned int *)addr)); } static void write_bytes(char *addr, size_t len) @@ -52,7 +53,7 @@ static int read_bytes(char *addr, size_t len) check_bytes(addr); for (i = 0; i < len; i++) if (*(addr + i) != (char)i) { - printf("Mismatch at %lu\n", i); + ksft_print_msg("Mismatch at %lu\n", i); return 1; } return 0; @@ -66,17 +67,13 @@ static void register_region_with_uffd(char *addr, size_t len) /* Create and enable userfaultfd object. */ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); - if (uffd == -1) { - perror("userfaultfd"); - exit(1); - } + if (uffd == -1) + ksft_exit_fail_msg("userfaultfd: %s\n", strerror(errno)); uffdio_api.api = UFFD_API; uffdio_api.features = 0; - if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { - perror("ioctl-UFFDIO_API"); - exit(1); - } + if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) + ksft_exit_fail_msg("ioctl-UFFDIO_API: %s\n", strerror(errno)); /* Create a private anonymous mapping. The memory will be * demand-zero paged--that is, not yet allocated. When we @@ -86,21 +83,17 @@ static void register_region_with_uffd(char *addr, size_t len) addr = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (addr == MAP_FAILED) { - perror("mmap"); - exit(1); - } + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mmap: %s\n", strerror(errno)); - printf("Address returned by mmap() = %p\n", addr); + ksft_print_msg("Address returned by mmap() = %p\n", addr); /* Register the memory range of the mapping we just created for * handling by the userfaultfd object. In mode, we request to track * missing pages (i.e., pages that have not yet been faulted in). */ - if (uffd_register(uffd, addr, len, true, false, false)) { - perror("ioctl-UFFDIO_REGISTER"); - exit(1); - } + if (uffd_register(uffd, addr, len, true, false, false)) + ksft_exit_fail_msg("ioctl-UFFDIO_REGISTER: %s\n", strerror(errno)); } int main(int argc, char *argv[]) @@ -108,10 +101,11 @@ int main(int argc, char *argv[]) size_t length = 0; int ret = 0, fd; - if (argc >= 2 && !strcmp(argv[1], "-h")) { - printf("Usage: %s [length_in_MB]\n", argv[0]); - exit(1); - } + ksft_print_header(); + ksft_set_plan(1); + + if (argc >= 2 && !strcmp(argv[1], "-h")) + ksft_exit_fail_msg("Usage: %s [length_in_MB]\n", argv[0]); /* Read memory length as the first arg if valid, otherwise fallback to * the default length. @@ -123,50 +117,40 @@ int main(int argc, char *argv[]) length = MB_TO_BYTES(length); fd = memfd_create(argv[0], MFD_HUGETLB); - if (fd < 0) { - perror("Open failed"); - exit(1); - } + if (fd < 0) + ksft_exit_fail_msg("Open failed: %s\n", strerror(errno)); /* mmap to a PUD aligned address to hopefully trigger pmd sharing. */ unsigned long suggested_addr = 0x7eaa40000000; void *haddr = mmap((void *)suggested_addr, length, PROTECTION, MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0); - printf("Map haddr: Returned address is %p\n", haddr); - if (haddr == MAP_FAILED) { - perror("mmap1"); - exit(1); - } + ksft_print_msg("Map haddr: Returned address is %p\n", haddr); + if (haddr == MAP_FAILED) + ksft_exit_fail_msg("mmap1: %s\n", strerror(errno)); /* mmap again to a dummy address to hopefully trigger pmd sharing. */ suggested_addr = 0x7daa40000000; void *daddr = mmap((void *)suggested_addr, length, PROTECTION, MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0); - printf("Map daddr: Returned address is %p\n", daddr); - if (daddr == MAP_FAILED) { - perror("mmap3"); - exit(1); - } + ksft_print_msg("Map daddr: Returned address is %p\n", daddr); + if (daddr == MAP_FAILED) + ksft_exit_fail_msg("mmap3: %s\n", strerror(errno)); suggested_addr = 0x7faa40000000; void *vaddr = mmap((void *)suggested_addr, length, PROTECTION, FLAGS, -1, 0); - printf("Map vaddr: Returned address is %p\n", vaddr); - if (vaddr == MAP_FAILED) { - perror("mmap2"); - exit(1); - } + ksft_print_msg("Map vaddr: Returned address is %p\n", vaddr); + if (vaddr == MAP_FAILED) + ksft_exit_fail_msg("mmap2: %s\n", strerror(errno)); register_region_with_uffd(haddr, length); void *addr = mremap(haddr, length, length, MREMAP_MAYMOVE | MREMAP_FIXED, vaddr); - if (addr == MAP_FAILED) { - perror("mremap"); - exit(1); - } + if (addr == MAP_FAILED) + ksft_exit_fail_msg("mremap: %s\n", strerror(errno)); - printf("Mremap: Returned address is %p\n", addr); + ksft_print_msg("Mremap: Returned address is %p\n", addr); check_bytes(addr); write_bytes(addr, length); ret = read_bytes(addr, length); @@ -174,12 +158,11 @@ int main(int argc, char *argv[]) munmap(addr, length); addr = mremap(addr, length, length, 0); - if (addr != MAP_FAILED) { - printf("mremap: Expected failure, but call succeeded\n"); - exit(1); - } + if (addr != MAP_FAILED) + ksft_exit_fail_msg("mremap: Expected failure, but call succeeded\n"); close(fd); - return ret; + ksft_test_result(!ret, "Read same data\n"); + ksft_exit(!ret); } diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 030667cb55..829320a519 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -22,13 +22,14 @@ #include "linux/magic.h" #include "vm_util.h" +#include "thp_settings.h" #define BASE_ADDR ((void *)(1UL << 30)) static unsigned long hpage_pmd_size; static unsigned long page_size; static int hpage_pmd_nr; +static int anon_order; -#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" #define PID_SMAPS "/proc/self/smaps" #define TEST_FILE "collapse_test_file" @@ -71,78 +72,7 @@ struct file_info { }; static struct file_info finfo; - -enum thp_enabled { - THP_ALWAYS, - THP_MADVISE, - THP_NEVER, -}; - -static const char *thp_enabled_strings[] = { - "always", - "madvise", - "never", - NULL -}; - -enum thp_defrag { - THP_DEFRAG_ALWAYS, - THP_DEFRAG_DEFER, - THP_DEFRAG_DEFER_MADVISE, - THP_DEFRAG_MADVISE, - THP_DEFRAG_NEVER, -}; - -static const char *thp_defrag_strings[] = { - "always", - "defer", - "defer+madvise", - "madvise", - "never", - NULL -}; - -enum shmem_enabled { - SHMEM_ALWAYS, - SHMEM_WITHIN_SIZE, - SHMEM_ADVISE, - SHMEM_NEVER, - SHMEM_DENY, - SHMEM_FORCE, -}; - -static const char *shmem_enabled_strings[] = { - "always", - "within_size", - "advise", - "never", - "deny", - "force", - NULL -}; - -struct khugepaged_settings { - bool defrag; - unsigned int alloc_sleep_millisecs; - unsigned int scan_sleep_millisecs; - unsigned int max_ptes_none; - unsigned int max_ptes_swap; - unsigned int max_ptes_shared; - unsigned long pages_to_scan; -}; - -struct settings { - enum thp_enabled thp_enabled; - enum thp_defrag thp_defrag; - enum shmem_enabled shmem_enabled; - bool use_zero_page; - struct khugepaged_settings khugepaged; - unsigned long read_ahead_kb; -}; - -static struct settings saved_settings; static bool skip_settings_restore; - static int exit_status; static void success(const char *msg) @@ -161,260 +91,34 @@ static void skip(const char *msg) printf(" \e[33m%s\e[0m\n", msg); } -static int read_file(const char *path, char *buf, size_t buflen) -{ - int fd; - ssize_t numread; - - fd = open(path, O_RDONLY); - if (fd == -1) - return 0; - - numread = read(fd, buf, buflen - 1); - if (numread < 1) { - close(fd); - return 0; - } - - buf[numread] = '\0'; - close(fd); - - return (unsigned int) numread; -} - -static int write_file(const char *path, const char *buf, size_t buflen) -{ - int fd; - ssize_t numwritten; - - fd = open(path, O_WRONLY); - if (fd == -1) { - printf("open(%s)\n", path); - exit(EXIT_FAILURE); - return 0; - } - - numwritten = write(fd, buf, buflen - 1); - close(fd); - if (numwritten < 1) { - printf("write(%s)\n", buf); - exit(EXIT_FAILURE); - return 0; - } - - return (unsigned int) numwritten; -} - -static int read_string(const char *name, const char *strings[]) +static void restore_settings_atexit(void) { - char path[PATH_MAX]; - char buf[256]; - char *c; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - - if (!read_file(path, buf, sizeof(buf))) { - perror(path); - exit(EXIT_FAILURE); - } - - c = strchr(buf, '['); - if (!c) { - printf("%s: Parse failure\n", __func__); - exit(EXIT_FAILURE); - } - - c++; - memmove(buf, c, sizeof(buf) - (c - buf)); - - c = strchr(buf, ']'); - if (!c) { - printf("%s: Parse failure\n", __func__); - exit(EXIT_FAILURE); - } - *c = '\0'; - - ret = 0; - while (strings[ret]) { - if (!strcmp(strings[ret], buf)) - return ret; - ret++; - } - - printf("Failed to parse %s\n", name); - exit(EXIT_FAILURE); -} - -static void write_string(const char *name, const char *val) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - - if (!write_file(path, val, strlen(val) + 1)) { - perror(path); - exit(EXIT_FAILURE); - } -} - -static const unsigned long _read_num(const char *path) -{ - char buf[21]; - - if (read_file(path, buf, sizeof(buf)) < 0) { - perror("read_file(read_num)"); - exit(EXIT_FAILURE); - } - - return strtoul(buf, NULL, 10); -} - -static const unsigned long read_num(const char *name) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - return _read_num(path); -} - -static void _write_num(const char *path, unsigned long num) -{ - char buf[21]; - - sprintf(buf, "%ld", num); - if (!write_file(path, buf, strlen(buf) + 1)) { - perror(path); - exit(EXIT_FAILURE); - } -} - -static void write_num(const char *name, unsigned long num) -{ - char path[PATH_MAX]; - int ret; - - ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); - if (ret >= PATH_MAX) { - printf("%s: Pathname is too long\n", __func__); - exit(EXIT_FAILURE); - } - _write_num(path, num); -} - -static void write_settings(struct settings *settings) -{ - struct khugepaged_settings *khugepaged = &settings->khugepaged; - - write_string("enabled", thp_enabled_strings[settings->thp_enabled]); - write_string("defrag", thp_defrag_strings[settings->thp_defrag]); - write_string("shmem_enabled", - shmem_enabled_strings[settings->shmem_enabled]); - write_num("use_zero_page", settings->use_zero_page); - - write_num("khugepaged/defrag", khugepaged->defrag); - write_num("khugepaged/alloc_sleep_millisecs", - khugepaged->alloc_sleep_millisecs); - write_num("khugepaged/scan_sleep_millisecs", - khugepaged->scan_sleep_millisecs); - write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none); - write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); - write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared); - write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); - - if (file_ops && finfo.type == VMA_FILE) - _write_num(finfo.dev_queue_read_ahead_path, - settings->read_ahead_kb); -} - -#define MAX_SETTINGS_DEPTH 4 -static struct settings settings_stack[MAX_SETTINGS_DEPTH]; -static int settings_index; - -static struct settings *current_settings(void) -{ - if (!settings_index) { - printf("Fail: No settings set"); - exit(EXIT_FAILURE); - } - return settings_stack + settings_index - 1; -} + if (skip_settings_restore) + return; -static void push_settings(struct settings *settings) -{ - if (settings_index >= MAX_SETTINGS_DEPTH) { - printf("Fail: Settings stack exceeded"); - exit(EXIT_FAILURE); - } - settings_stack[settings_index++] = *settings; - write_settings(current_settings()); -} + printf("Restore THP and khugepaged settings..."); + thp_restore_settings(); + success("OK"); -static void pop_settings(void) -{ - if (settings_index <= 0) { - printf("Fail: Settings stack empty"); - exit(EXIT_FAILURE); - } - --settings_index; - write_settings(current_settings()); + skip_settings_restore = true; } static void restore_settings(int sig) { - if (skip_settings_restore) - goto out; - - printf("Restore THP and khugepaged settings..."); - write_settings(&saved_settings); - success("OK"); - if (sig) - exit(EXIT_FAILURE); -out: - exit(exit_status); + /* exit() will invoke the restore_settings_atexit handler. */ + exit(sig ? EXIT_FAILURE : exit_status); } static void save_settings(void) { printf("Save THP and khugepaged settings..."); - saved_settings = (struct settings) { - .thp_enabled = read_string("enabled", thp_enabled_strings), - .thp_defrag = read_string("defrag", thp_defrag_strings), - .shmem_enabled = - read_string("shmem_enabled", shmem_enabled_strings), - .use_zero_page = read_num("use_zero_page"), - }; - saved_settings.khugepaged = (struct khugepaged_settings) { - .defrag = read_num("khugepaged/defrag"), - .alloc_sleep_millisecs = - read_num("khugepaged/alloc_sleep_millisecs"), - .scan_sleep_millisecs = - read_num("khugepaged/scan_sleep_millisecs"), - .max_ptes_none = read_num("khugepaged/max_ptes_none"), - .max_ptes_swap = read_num("khugepaged/max_ptes_swap"), - .max_ptes_shared = read_num("khugepaged/max_ptes_shared"), - .pages_to_scan = read_num("khugepaged/pages_to_scan"), - }; if (file_ops && finfo.type == VMA_FILE) - saved_settings.read_ahead_kb = - _read_num(finfo.dev_queue_read_ahead_path); + thp_set_read_ahead_path(finfo.dev_queue_read_ahead_path); + thp_save_settings(); success("OK"); + atexit(restore_settings_atexit); signal(SIGTERM, restore_settings); signal(SIGINT, restore_settings); signal(SIGHUP, restore_settings); @@ -793,7 +497,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages, struct mem_ops *ops, bool expect) { int ret; - struct settings settings = *current_settings(); + struct thp_settings settings = *thp_current_settings(); printf("%s...", msg); @@ -803,7 +507,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages, */ settings.thp_enabled = THP_NEVER; settings.shmem_enabled = SHMEM_NEVER; - push_settings(&settings); + thp_push_settings(&settings); /* Clear VM_NOHUGEPAGE */ madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); @@ -815,7 +519,7 @@ static void __madvise_collapse(const char *msg, char *p, int nr_hpages, else success("OK"); - pop_settings(); + thp_pop_settings(); } static void madvise_collapse(const char *msg, char *p, int nr_hpages, @@ -845,13 +549,13 @@ static bool wait_for_scan(const char *msg, char *p, int nr_hpages, madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE); /* Wait until the second full_scan completed */ - full_scans = read_num("khugepaged/full_scans") + 2; + full_scans = thp_read_num("khugepaged/full_scans") + 2; printf("%s...", msg); while (timeout--) { if (ops->check_huge(p, nr_hpages)) break; - if (read_num("khugepaged/full_scans") >= full_scans) + if (thp_read_num("khugepaged/full_scans") >= full_scans) break; printf("."); usleep(TICK); @@ -904,13 +608,18 @@ static bool is_tmpfs(struct mem_ops *ops) return ops == &__file_ops && finfo.type == VMA_SHMEM; } +static bool is_anon(struct mem_ops *ops) +{ + return ops == &__anon_ops; +} + static void alloc_at_fault(void) { - struct settings settings = *current_settings(); + struct thp_settings settings = *thp_current_settings(); char *p; settings.thp_enabled = THP_ALWAYS; - push_settings(&settings); + thp_push_settings(&settings); p = alloc_mapping(1); *p = 1; @@ -920,7 +629,7 @@ static void alloc_at_fault(void) else fail("Fail"); - pop_settings(); + thp_pop_settings(); madvise(p, page_size, MADV_DONTNEED); printf("Split huge PMD on MADV_DONTNEED..."); @@ -968,11 +677,12 @@ static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops) { int max_ptes_none = hpage_pmd_nr / 2; - struct settings settings = *current_settings(); + struct thp_settings settings = *thp_current_settings(); void *p; + int fault_nr_pages = is_anon(ops) ? 1 << anon_order : 1; settings.khugepaged.max_ptes_none = max_ptes_none; - push_settings(&settings); + thp_push_settings(&settings); p = ops->setup_area(1); @@ -983,10 +693,10 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o goto skip; } - ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); + ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size); c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1, ops, !c->enforce_pte_scan_limits); - validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size); + validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size); if (c->enforce_pte_scan_limits) { ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size); @@ -997,7 +707,7 @@ static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *o } skip: ops->cleanup_area(p, hpage_pmd_size); - pop_settings(); + thp_pop_settings(); } static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops) @@ -1028,7 +738,7 @@ out: static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops) { - int max_ptes_swap = read_num("khugepaged/max_ptes_swap"); + int max_ptes_swap = thp_read_num("khugepaged/max_ptes_swap"); void *p; p = ops->setup_area(1); @@ -1245,11 +955,11 @@ static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *o fail("Fail"); ops->fault(p, 0, page_size); - write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); + thp_write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1); c->collapse("Collapse PTE table full of compound pages in child", p, 1, ops, true); - write_num("khugepaged/max_ptes_shared", - current_settings()->khugepaged.max_ptes_shared); + thp_write_num("khugepaged/max_ptes_shared", + thp_current_settings()->khugepaged.max_ptes_shared); validate_memory(p, 0, hpage_pmd_size); ops->cleanup_area(p, hpage_pmd_size); @@ -1270,7 +980,7 @@ static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *o static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops) { - int max_ptes_shared = read_num("khugepaged/max_ptes_shared"); + int max_ptes_shared = thp_read_num("khugepaged/max_ptes_shared"); int wstatus; void *p; @@ -1373,7 +1083,7 @@ static void madvise_retracted_page_tables(struct collapse_context *c, static void usage(void) { - fprintf(stderr, "\nUsage: ./khugepaged <test type> [dir]\n\n"); + fprintf(stderr, "\nUsage: ./khugepaged [OPTIONS] <test type> [dir]\n\n"); fprintf(stderr, "\t<test type>\t: <context>:<mem_type>\n"); fprintf(stderr, "\t<context>\t: [all|khugepaged|madvise]\n"); fprintf(stderr, "\t<mem_type>\t: [all|anon|file|shmem]\n"); @@ -1382,15 +1092,34 @@ static void usage(void) fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n"); fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n"); fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n"); + fprintf(stderr, "\n\tSupported Options:\n"); + fprintf(stderr, "\t\t-h: This help message.\n"); + fprintf(stderr, "\t\t-s: mTHP size, expressed as page order.\n"); + fprintf(stderr, "\t\t Defaults to 0. Use this size for anon allocations.\n"); exit(1); } -static void parse_test_type(int argc, const char **argv) +static void parse_test_type(int argc, char **argv) { + int opt; char *buf; const char *token; - if (argc == 1) { + while ((opt = getopt(argc, argv, "s:h")) != -1) { + switch (opt) { + case 's': + anon_order = atoi(optarg); + break; + case 'h': + default: + usage(); + } + } + + argv += optind; + argc -= optind; + + if (argc == 0) { /* Backwards compatibility */ khugepaged_context = &__khugepaged_context; madvise_context = &__madvise_context; @@ -1398,7 +1127,7 @@ static void parse_test_type(int argc, const char **argv) return; } - buf = strdup(argv[1]); + buf = strdup(argv[0]); token = strsep(&buf, ":"); if (!strcmp(token, "all")) { @@ -1432,13 +1161,16 @@ static void parse_test_type(int argc, const char **argv) if (!file_ops) return; - if (argc != 3) + if (argc != 2) usage(); + + get_finfo(argv[1]); } -int main(int argc, const char **argv) +int main(int argc, char **argv) { - struct settings default_settings = { + int hpage_pmd_order; + struct thp_settings default_settings = { .thp_enabled = THP_MADVISE, .thp_defrag = THP_DEFRAG_ALWAYS, .shmem_enabled = SHMEM_ADVISE, @@ -1460,9 +1192,6 @@ int main(int argc, const char **argv) parse_test_type(argc, argv); - if (file_ops) - get_finfo(argv[2]); - setbuf(stdout, NULL); page_size = getpagesize(); @@ -1472,14 +1201,17 @@ int main(int argc, const char **argv) exit(EXIT_FAILURE); } hpage_pmd_nr = hpage_pmd_size / page_size; + hpage_pmd_order = __builtin_ctz(hpage_pmd_nr); default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1; default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8; default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2; default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; + default_settings.hugepages[hpage_pmd_order].enabled = THP_INHERIT; + default_settings.hugepages[anon_order].enabled = THP_ALWAYS; save_settings(); - push_settings(&default_settings); + thp_push_settings(&default_settings); alloc_at_fault(); diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 1d4c1589c3..2f8b991f78 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -360,7 +360,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb, char pattern_seed) { void *addr, *src_addr, *dest_addr, *dest_preamble_addr; - unsigned long long i; + int d; + unsigned long long t; struct timespec t_start = {0, 0}, t_end = {0, 0}; long long start_ns, end_ns, align_mask, ret, offset; unsigned long long threshold; @@ -378,8 +379,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Set byte pattern for source block. */ srand(pattern_seed); - for (i = 0; i < threshold; i++) - memset((char *) src_addr + i, (char) rand(), 1); + for (t = 0; t < threshold; t++) + memset((char *) src_addr + t, (char) rand(), 1); /* Mask to zero out lower bits of address for alignment */ align_mask = ~(c.dest_alignment - 1); @@ -420,8 +421,8 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Set byte pattern for the dest preamble block. */ srand(pattern_seed); - for (i = 0; i < c.dest_preamble_size; i++) - memset((char *) dest_preamble_addr + i, (char) rand(), 1); + for (d = 0; d < c.dest_preamble_size; d++) + memset((char *) dest_preamble_addr + d, (char) rand(), 1); } clock_gettime(CLOCK_MONOTONIC, &t_start); @@ -437,14 +438,14 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Verify byte pattern after remapping */ srand(pattern_seed); - for (i = 0; i < threshold; i++) { + for (t = 0; t < threshold; t++) { char c = (char) rand(); - if (((char *) dest_addr)[i] != c) { + if (((char *) dest_addr)[t] != c) { ksft_print_msg("Data after remap doesn't match at offset %llu\n", - i); + t); ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, - ((char *) dest_addr)[i] & 0xff); + ((char *) dest_addr)[t] & 0xff); ret = -1; goto clean_up_dest; } @@ -453,14 +454,14 @@ static long long remap_region(struct config c, unsigned int threshold_mb, /* Verify the dest preamble byte pattern after remapping */ if (c.dest_preamble_size) { srand(pattern_seed); - for (i = 0; i < c.dest_preamble_size; i++) { + for (d = 0; d < c.dest_preamble_size; d++) { char c = (char) rand(); - if (((char *) dest_preamble_addr)[i] != c) { + if (((char *) dest_preamble_addr)[d] != c) { ksft_print_msg("Preamble data after remap doesn't match at offset %d\n", - i); + d); ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff, - ((char *) dest_preamble_addr)[i] & 0xff); + ((char *) dest_preamble_addr)[d] & 0xff); ret = -1; goto clean_up_dest; } diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 0075744527..246d53a5d7 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -5,6 +5,7 @@ # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +count_total=0 count_pass=0 count_fail=0 count_skip=0 @@ -17,6 +18,7 @@ usage: ${BASH_SOURCE[0]:-$0} [ options ] -a: run all tests, including extra ones -t: specify specific categories to tests to run -h: display this message + -n: disable TAP output The default behavior is to run required tests only. If -a is specified, will run all tests. @@ -77,12 +79,14 @@ EOF } RUN_ALL=false +TAP_PREFIX="# " -while getopts "aht:" OPT; do +while getopts "aht:n" OPT; do case ${OPT} in "a") RUN_ALL=true ;; "h") usage ;; "t") VM_SELFTEST_ITEMS=${OPTARG} ;; + "n") TAP_PREFIX= ;; esac done shift $((OPTIND -1)) @@ -184,30 +188,52 @@ fi VADDR64=0 echo "$ARCH64STR" | grep "$ARCH" &>/dev/null && VADDR64=1 +tap_prefix() { + sed -e "s/^/${TAP_PREFIX}/" +} + +tap_output() { + if [[ ! -z "$TAP_PREFIX" ]]; then + read str + echo $str + fi +} + +pretty_name() { + echo "$*" | sed -e 's/^\(bash \)\?\.\///' +} + # Usage: run_test [test binary] [arbitrary test arguments...] run_test() { if test_selected ${CATEGORY}; then + local test=$(pretty_name "$*") local title="running $*" local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -) - printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" + printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" | tap_prefix - "$@" - local ret=$? + ("$@" 2>&1) | tap_prefix + local ret=${PIPESTATUS[0]} + count_total=$(( count_total + 1 )) if [ $ret -eq 0 ]; then count_pass=$(( count_pass + 1 )) - echo "[PASS]" + echo "[PASS]" | tap_prefix + echo "ok ${count_total} ${test}" | tap_output elif [ $ret -eq $ksft_skip ]; then count_skip=$(( count_skip + 1 )) - echo "[SKIP]" + echo "[SKIP]" | tap_prefix + echo "ok ${count_total} ${test} # SKIP" | tap_output exitcode=$ksft_skip else count_fail=$(( count_fail + 1 )) - echo "[FAIL]" + echo "[FAIL]" | tap_prefix + echo "not ok ${count_total} ${test} # exit=$ret" | tap_output exitcode=1 fi fi # test_selected } +echo "TAP version 13" | tap_output + CATEGORY="hugetlb" run_test ./hugepage-mmap shmmax=$(cat /proc/sys/kernel/shmmax) @@ -231,9 +257,9 @@ CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages if test_selected "hugetlb"; then - echo "NOTE: These hugetlb tests provide minimal coverage. Use" - echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" - echo " hugetlb regression testing." + echo "NOTE: These hugetlb tests provide minimal coverage. Use" | tap_prefix + echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" | tap_prefix + echo " hugetlb regression testing." | tap_prefix fi CATEGORY="mmap" run_test ./map_fixed_noreplace @@ -312,7 +338,7 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate -echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope +(echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix CATEGORY="memfd_secret" run_test ./memfd_secret # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 @@ -334,8 +360,6 @@ CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0 CATEGORY="ksm" run_test ./ksm_functional_tests -run_test ./ksm_functional_tests - # protection_keys tests if [ -x ./protection_keys_32 ] then @@ -359,6 +383,8 @@ CATEGORY="cow" run_test ./cow CATEGORY="thp" run_test ./khugepaged +CATEGORY="thp" run_test ./khugepaged -s 2 + CATEGORY="thp" run_test ./transhuge-stress -d 20 CATEGORY="thp" run_test ./split_huge_page_test @@ -369,6 +395,7 @@ CATEGORY="mkdirty" run_test ./mkdirty CATEGORY="mdwe" run_test ./mdwe_test -echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" +echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix +echo "1..${count_total}" | tap_output exit $exitcode diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c new file mode 100644 index 0000000000..a416343810 --- /dev/null +++ b/tools/testing/selftests/mm/thp_settings.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "thp_settings.h" + +#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/" +#define MAX_SETTINGS_DEPTH 4 +static struct thp_settings settings_stack[MAX_SETTINGS_DEPTH]; +static int settings_index; +static struct thp_settings saved_settings; +static char dev_queue_read_ahead_path[PATH_MAX]; + +static const char * const thp_enabled_strings[] = { + "never", + "always", + "inherit", + "madvise", + NULL +}; + +static const char * const thp_defrag_strings[] = { + "always", + "defer", + "defer+madvise", + "madvise", + "never", + NULL +}; + +static const char * const shmem_enabled_strings[] = { + "always", + "within_size", + "advise", + "never", + "deny", + "force", + NULL +}; + +int read_file(const char *path, char *buf, size_t buflen) +{ + int fd; + ssize_t numread; + + fd = open(path, O_RDONLY); + if (fd == -1) + return 0; + + numread = read(fd, buf, buflen - 1); + if (numread < 1) { + close(fd); + return 0; + } + + buf[numread] = '\0'; + close(fd); + + return (unsigned int) numread; +} + +int write_file(const char *path, const char *buf, size_t buflen) +{ + int fd; + ssize_t numwritten; + + fd = open(path, O_WRONLY); + if (fd == -1) { + printf("open(%s)\n", path); + exit(EXIT_FAILURE); + return 0; + } + + numwritten = write(fd, buf, buflen - 1); + close(fd); + if (numwritten < 1) { + printf("write(%s)\n", buf); + exit(EXIT_FAILURE); + return 0; + } + + return (unsigned int) numwritten; +} + +const unsigned long read_num(const char *path) +{ + char buf[21]; + + if (read_file(path, buf, sizeof(buf)) < 0) { + perror("read_file()"); + exit(EXIT_FAILURE); + } + + return strtoul(buf, NULL, 10); +} + +void write_num(const char *path, unsigned long num) +{ + char buf[21]; + + sprintf(buf, "%ld", num); + if (!write_file(path, buf, strlen(buf) + 1)) { + perror(path); + exit(EXIT_FAILURE); + } +} + +int thp_read_string(const char *name, const char * const strings[]) +{ + char path[PATH_MAX]; + char buf[256]; + char *c; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + + if (!read_file(path, buf, sizeof(buf))) { + perror(path); + exit(EXIT_FAILURE); + } + + c = strchr(buf, '['); + if (!c) { + printf("%s: Parse failure\n", __func__); + exit(EXIT_FAILURE); + } + + c++; + memmove(buf, c, sizeof(buf) - (c - buf)); + + c = strchr(buf, ']'); + if (!c) { + printf("%s: Parse failure\n", __func__); + exit(EXIT_FAILURE); + } + *c = '\0'; + + ret = 0; + while (strings[ret]) { + if (!strcmp(strings[ret], buf)) + return ret; + ret++; + } + + printf("Failed to parse %s\n", name); + exit(EXIT_FAILURE); +} + +void thp_write_string(const char *name, const char *val) +{ + char path[PATH_MAX]; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + + if (!write_file(path, val, strlen(val) + 1)) { + perror(path); + exit(EXIT_FAILURE); + } +} + +const unsigned long thp_read_num(const char *name) +{ + char path[PATH_MAX]; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + return read_num(path); +} + +void thp_write_num(const char *name, unsigned long num) +{ + char path[PATH_MAX]; + int ret; + + ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + write_num(path, num); +} + +void thp_read_settings(struct thp_settings *settings) +{ + unsigned long orders = thp_supported_orders(); + char path[PATH_MAX]; + int i; + + *settings = (struct thp_settings) { + .thp_enabled = thp_read_string("enabled", thp_enabled_strings), + .thp_defrag = thp_read_string("defrag", thp_defrag_strings), + .shmem_enabled = + thp_read_string("shmem_enabled", shmem_enabled_strings), + .use_zero_page = thp_read_num("use_zero_page"), + }; + settings->khugepaged = (struct khugepaged_settings) { + .defrag = thp_read_num("khugepaged/defrag"), + .alloc_sleep_millisecs = + thp_read_num("khugepaged/alloc_sleep_millisecs"), + .scan_sleep_millisecs = + thp_read_num("khugepaged/scan_sleep_millisecs"), + .max_ptes_none = thp_read_num("khugepaged/max_ptes_none"), + .max_ptes_swap = thp_read_num("khugepaged/max_ptes_swap"), + .max_ptes_shared = thp_read_num("khugepaged/max_ptes_shared"), + .pages_to_scan = thp_read_num("khugepaged/pages_to_scan"), + }; + if (dev_queue_read_ahead_path[0]) + settings->read_ahead_kb = read_num(dev_queue_read_ahead_path); + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & orders)) { + settings->hugepages[i].enabled = THP_NEVER; + continue; + } + snprintf(path, PATH_MAX, "hugepages-%ukB/enabled", + (getpagesize() >> 10) << i); + settings->hugepages[i].enabled = + thp_read_string(path, thp_enabled_strings); + } +} + +void thp_write_settings(struct thp_settings *settings) +{ + struct khugepaged_settings *khugepaged = &settings->khugepaged; + unsigned long orders = thp_supported_orders(); + char path[PATH_MAX]; + int enabled; + int i; + + thp_write_string("enabled", thp_enabled_strings[settings->thp_enabled]); + thp_write_string("defrag", thp_defrag_strings[settings->thp_defrag]); + thp_write_string("shmem_enabled", + shmem_enabled_strings[settings->shmem_enabled]); + thp_write_num("use_zero_page", settings->use_zero_page); + + thp_write_num("khugepaged/defrag", khugepaged->defrag); + thp_write_num("khugepaged/alloc_sleep_millisecs", + khugepaged->alloc_sleep_millisecs); + thp_write_num("khugepaged/scan_sleep_millisecs", + khugepaged->scan_sleep_millisecs); + thp_write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none); + thp_write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap); + thp_write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared); + thp_write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan); + + if (dev_queue_read_ahead_path[0]) + write_num(dev_queue_read_ahead_path, settings->read_ahead_kb); + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & orders)) + continue; + snprintf(path, PATH_MAX, "hugepages-%ukB/enabled", + (getpagesize() >> 10) << i); + enabled = settings->hugepages[i].enabled; + thp_write_string(path, thp_enabled_strings[enabled]); + } +} + +struct thp_settings *thp_current_settings(void) +{ + if (!settings_index) { + printf("Fail: No settings set"); + exit(EXIT_FAILURE); + } + return settings_stack + settings_index - 1; +} + +void thp_push_settings(struct thp_settings *settings) +{ + if (settings_index >= MAX_SETTINGS_DEPTH) { + printf("Fail: Settings stack exceeded"); + exit(EXIT_FAILURE); + } + settings_stack[settings_index++] = *settings; + thp_write_settings(thp_current_settings()); +} + +void thp_pop_settings(void) +{ + if (settings_index <= 0) { + printf("Fail: Settings stack empty"); + exit(EXIT_FAILURE); + } + --settings_index; + thp_write_settings(thp_current_settings()); +} + +void thp_restore_settings(void) +{ + thp_write_settings(&saved_settings); +} + +void thp_save_settings(void) +{ + thp_read_settings(&saved_settings); +} + +void thp_set_read_ahead_path(char *path) +{ + if (!path) { + dev_queue_read_ahead_path[0] = '\0'; + return; + } + + strncpy(dev_queue_read_ahead_path, path, + sizeof(dev_queue_read_ahead_path)); + dev_queue_read_ahead_path[sizeof(dev_queue_read_ahead_path) - 1] = '\0'; +} + +unsigned long thp_supported_orders(void) +{ + unsigned long orders = 0; + char path[PATH_MAX]; + char buf[256]; + int ret; + int i; + + for (i = 0; i < NR_ORDERS; i++) { + ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/enabled", + (getpagesize() >> 10) << i); + if (ret >= PATH_MAX) { + printf("%s: Pathname is too long\n", __func__); + exit(EXIT_FAILURE); + } + + ret = read_file(path, buf, sizeof(buf)); + if (ret) + orders |= 1UL << i; + } + + return orders; +} diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h new file mode 100644 index 0000000000..71cbff05f4 --- /dev/null +++ b/tools/testing/selftests/mm/thp_settings.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __THP_SETTINGS_H__ +#define __THP_SETTINGS_H__ + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +enum thp_enabled { + THP_NEVER, + THP_ALWAYS, + THP_INHERIT, + THP_MADVISE, +}; + +enum thp_defrag { + THP_DEFRAG_ALWAYS, + THP_DEFRAG_DEFER, + THP_DEFRAG_DEFER_MADVISE, + THP_DEFRAG_MADVISE, + THP_DEFRAG_NEVER, +}; + +enum shmem_enabled { + SHMEM_ALWAYS, + SHMEM_WITHIN_SIZE, + SHMEM_ADVISE, + SHMEM_NEVER, + SHMEM_DENY, + SHMEM_FORCE, +}; + +#define NR_ORDERS 20 + +struct hugepages_settings { + enum thp_enabled enabled; +}; + +struct khugepaged_settings { + bool defrag; + unsigned int alloc_sleep_millisecs; + unsigned int scan_sleep_millisecs; + unsigned int max_ptes_none; + unsigned int max_ptes_swap; + unsigned int max_ptes_shared; + unsigned long pages_to_scan; +}; + +struct thp_settings { + enum thp_enabled thp_enabled; + enum thp_defrag thp_defrag; + enum shmem_enabled shmem_enabled; + bool use_zero_page; + struct khugepaged_settings khugepaged; + unsigned long read_ahead_kb; + struct hugepages_settings hugepages[NR_ORDERS]; +}; + +int read_file(const char *path, char *buf, size_t buflen); +int write_file(const char *path, const char *buf, size_t buflen); +const unsigned long read_num(const char *path); +void write_num(const char *path, unsigned long num); + +int thp_read_string(const char *name, const char * const strings[]); +void thp_write_string(const char *name, const char *val); +const unsigned long thp_read_num(const char *name); +void thp_write_num(const char *name, unsigned long num); + +void thp_write_settings(struct thp_settings *settings); +void thp_read_settings(struct thp_settings *settings); +struct thp_settings *thp_current_settings(void); +void thp_push_settings(struct thp_settings *settings); +void thp_pop_settings(void); +void thp_restore_settings(void); +void thp_save_settings(void); + +void thp_set_read_ahead_path(char *path); +unsigned long thp_supported_orders(void); + +#endif /* __THP_SETTINGS_H__ */ diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index 16ed4dfa73..622987f12c 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -3,7 +3,8 @@ Before running this huge pages for each huge page size must have been reserved. - For large pages beyond MAX_ORDER (like 1GB on x86) boot options must be used. + For large pages beyond MAX_PAGE_ORDER (like 1GB on x86) boot options must + be used. Also shmmax must be increased. And you need to run as root to work around some weird permissions in shm. And nothing using huge pages should run in parallel. diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index ba6777cdf4..7ad6ba660c 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -17,6 +17,7 @@ bool map_shared; bool test_uffdio_wp = true; unsigned long long *count_verify; uffd_test_ops_t *uffd_test_ops; +uffd_test_case_ops_t *uffd_test_case_ops; atomic_bool ready_for_fork; static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) @@ -263,7 +264,7 @@ static inline void munmap_area(void **area) *area = NULL; } -static void uffd_test_ctx_clear(void) +void uffd_test_ctx_clear(void) { size_t i; @@ -299,7 +300,11 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) unsigned long nr, cpu; int ret; - uffd_test_ctx_clear(); + if (uffd_test_case_ops && uffd_test_case_ops->pre_alloc) { + ret = uffd_test_case_ops->pre_alloc(errmsg); + if (ret) + return ret; + } ret = uffd_test_ops->allocate_area((void **)&area_src, true); ret |= uffd_test_ops->allocate_area((void **)&area_dst, false); @@ -309,6 +314,12 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) return ret; } + if (uffd_test_case_ops && uffd_test_case_ops->post_alloc) { + ret = uffd_test_case_ops->post_alloc(errmsg); + if (ret) + return ret; + } + ret = userfaultfd_open(&features); if (ret) { if (errmsg) @@ -623,6 +634,30 @@ int copy_page(int ufd, unsigned long offset, bool wp) return __copy_page(ufd, offset, false, wp); } +int move_page(int ufd, unsigned long offset, unsigned long len) +{ + struct uffdio_move uffdio_move; + + if (offset + len > nr_pages * page_size) + err("unexpected offset %lu and length %lu\n", offset, len); + uffdio_move.dst = (unsigned long) area_dst + offset; + uffdio_move.src = (unsigned long) area_src + offset; + uffdio_move.len = len; + uffdio_move.mode = UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES; + uffdio_move.move = 0; + if (ioctl(ufd, UFFDIO_MOVE, &uffdio_move)) { + /* real retval in uffdio_move.move */ + if (uffdio_move.move != -EEXIST) + err("UFFDIO_MOVE error: %"PRId64, + (int64_t)uffdio_move.move); + wake_range(ufd, uffdio_move.dst, len); + } else if (uffdio_move.move != len) { + err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move); + } else + return 1; + return 0; +} + int uffd_open_dev(unsigned int flags) { int fd, uffd; diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index 1f0d573f30..cc5629c3d2 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -91,6 +91,12 @@ struct uffd_test_ops { }; typedef struct uffd_test_ops uffd_test_ops_t; +struct uffd_test_case_ops { + int (*pre_alloc)(const char **errmsg); + int (*post_alloc)(const char **errmsg); +}; +typedef struct uffd_test_case_ops uffd_test_case_ops_t; + extern unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; extern char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; extern int uffd, uffd_flags, finished, *pipefd, test_type; @@ -104,15 +110,18 @@ extern uffd_test_ops_t anon_uffd_test_ops; extern uffd_test_ops_t shmem_uffd_test_ops; extern uffd_test_ops_t hugetlb_uffd_test_ops; extern uffd_test_ops_t *uffd_test_ops; +extern uffd_test_case_ops_t *uffd_test_case_ops; void uffd_stats_report(struct uffd_args *args, int n_cpus); int uffd_test_ctx_init(uint64_t features, const char **errmsg); +void uffd_test_ctx_clear(void); int userfaultfd_open(uint64_t *features); int uffd_read_msg(int ufd, struct uffd_msg *msg); void wp_range(int ufd, __u64 start, __u64 len, bool wp); void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args); int __copy_page(int ufd, unsigned long offset, bool retry, bool wp); int copy_page(int ufd, unsigned long offset, bool wp); +int move_page(int ufd, unsigned long offset, unsigned long len); void *uffd_poll_thread(void *arg); int uffd_open_dev(unsigned int flags); diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 469e0476af..7e83829bbb 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -323,8 +323,10 @@ static int userfaultfd_stress(void) uffd_stats_reset(args, nr_cpus); /* bounce pass */ - if (stress(args)) + if (stress(args)) { + uffd_test_ctx_clear(); return 1; + } /* Clear all the write protections if there is any */ if (test_uffdio_wp) @@ -354,6 +356,7 @@ static int userfaultfd_stress(void) uffd_stats_report(args, nr_cpus); } + uffd_test_ctx_clear(); return 0; } diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 92d51768b7..21ec23206a 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -23,6 +23,9 @@ #define MEM_ALL (MEM_ANON | MEM_SHMEM | MEM_SHMEM_PRIVATE | \ MEM_HUGETLB | MEM_HUGETLB_PRIVATE) +#define ALIGN_UP(x, align_to) \ + ((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1))) + struct mem_type { const char *name; unsigned int mem_flag; @@ -78,6 +81,7 @@ typedef struct { uffd_test_fn uffd_fn; unsigned int mem_targets; uint64_t uffd_feature_required; + uffd_test_case_ops_t *test_case_ops; } uffd_test_case_t; static void uffd_test_report(void) @@ -185,6 +189,7 @@ uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test, { map_shared = mem_type->shared; uffd_test_ops = mem_type->mem_ops; + uffd_test_case_ops = test->test_case_ops; if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB)) page_size = default_huge_page_size(); @@ -1072,6 +1077,188 @@ static void uffd_poison_test(uffd_test_args_t *targs) uffd_test_pass(); } +static void +uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args, + unsigned long len) +{ + unsigned long offset; + + if (msg->event != UFFD_EVENT_PAGEFAULT) + err("unexpected msg event %u", msg->event); + + if (msg->arg.pagefault.flags & + (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE)) + err("unexpected fault type %llu", msg->arg.pagefault.flags); + + offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; + offset &= ~(len-1); + + if (move_page(uffd, offset, len)) + args->missing_faults++; +} + +static void uffd_move_handle_fault(struct uffd_msg *msg, + struct uffd_args *args) +{ + uffd_move_handle_fault_common(msg, args, page_size); +} + +static void uffd_move_pmd_handle_fault(struct uffd_msg *msg, + struct uffd_args *args) +{ + uffd_move_handle_fault_common(msg, args, read_pmd_pagesize()); +} + +static void +uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, + void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args)) +{ + unsigned long nr; + pthread_t uffd_mon; + char c; + unsigned long long count; + struct uffd_args args = { 0 }; + char *orig_area_src, *orig_area_dst; + unsigned long step_size, step_count; + unsigned long src_offs = 0; + unsigned long dst_offs = 0; + + /* Prevent source pages from being mapped more than once */ + if (madvise(area_src, nr_pages * page_size, MADV_DONTFORK)) + err("madvise(MADV_DONTFORK) failure"); + + if (uffd_register(uffd, area_dst, nr_pages * page_size, + true, false, false)) + err("register failure"); + + args.handle_fault = handle_fault; + if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) + err("uffd_poll_thread create"); + + step_size = chunk_size / page_size; + step_count = nr_pages / step_size; + + if (chunk_size > page_size) { + char *aligned_src = ALIGN_UP(area_src, chunk_size); + char *aligned_dst = ALIGN_UP(area_dst, chunk_size); + + if (aligned_src != area_src || aligned_dst != area_dst) { + src_offs = (aligned_src - area_src) / page_size; + dst_offs = (aligned_dst - area_dst) / page_size; + step_count--; + } + orig_area_src = area_src; + orig_area_dst = area_dst; + area_src = aligned_src; + area_dst = aligned_dst; + } + + /* + * Read each of the pages back using the UFFD-registered mapping. We + * expect that the first time we touch a page, it will result in a missing + * fault. uffd_poll_thread will resolve the fault by moving source + * page to destination. + */ + for (nr = 0; nr < step_count * step_size; nr += step_size) { + unsigned long i; + + /* Check area_src content */ + for (i = 0; i < step_size; i++) { + count = *area_count(area_src, nr + i); + if (count != count_verify[src_offs + nr + i]) + err("nr %lu source memory invalid %llu %llu\n", + nr + i, count, count_verify[src_offs + nr + i]); + } + + /* Faulting into area_dst should move the page or the huge page */ + for (i = 0; i < step_size; i++) { + count = *area_count(area_dst, nr + i); + if (count != count_verify[dst_offs + nr + i]) + err("nr %lu memory corruption %llu %llu\n", + nr, count, count_verify[dst_offs + nr + i]); + } + + /* Re-check area_src content which should be empty */ + for (i = 0; i < step_size; i++) { + count = *area_count(area_src, nr + i); + if (count != 0) + err("nr %lu move failed %llu %llu\n", + nr, count, count_verify[src_offs + nr + i]); + } + } + if (step_size > page_size) { + area_src = orig_area_src; + area_dst = orig_area_dst; + } + + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + err("pipe write"); + if (pthread_join(uffd_mon, NULL)) + err("join() failed"); + + if (args.missing_faults != step_count || args.minor_faults != 0) + uffd_test_fail("stats check error"); + else + uffd_test_pass(); +} + +static void uffd_move_test(uffd_test_args_t *targs) +{ + uffd_move_test_common(targs, page_size, uffd_move_handle_fault); +} + +static void uffd_move_pmd_test(uffd_test_args_t *targs) +{ + if (madvise(area_dst, nr_pages * page_size, MADV_HUGEPAGE)) + err("madvise(MADV_HUGEPAGE) failure"); + uffd_move_test_common(targs, read_pmd_pagesize(), + uffd_move_pmd_handle_fault); +} + +static void uffd_move_pmd_split_test(uffd_test_args_t *targs) +{ + if (madvise(area_dst, nr_pages * page_size, MADV_NOHUGEPAGE)) + err("madvise(MADV_NOHUGEPAGE) failure"); + uffd_move_test_common(targs, read_pmd_pagesize(), + uffd_move_pmd_handle_fault); +} + +static int prevent_hugepages(const char **errmsg) +{ + /* This should be done before source area is populated */ + if (madvise(area_src, nr_pages * page_size, MADV_NOHUGEPAGE)) { + /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */ + if (errno != EINVAL) { + if (errmsg) + *errmsg = "madvise(MADV_NOHUGEPAGE) failed"; + return -errno; + } + } + return 0; +} + +static int request_hugepages(const char **errmsg) +{ + /* This should be done before source area is populated */ + if (madvise(area_src, nr_pages * page_size, MADV_HUGEPAGE)) { + if (errmsg) { + *errmsg = (errno == EINVAL) ? + "CONFIG_TRANSPARENT_HUGEPAGE is not set" : + "madvise(MADV_HUGEPAGE) failed"; + } + return -errno; + } + return 0; +} + +struct uffd_test_case_ops uffd_move_test_case_ops = { + .post_alloc = prevent_hugepages, +}; + +struct uffd_test_case_ops uffd_move_test_pmd_case_ops = { + .post_alloc = request_hugepages, +}; + /* * Test the returned uffdio_register.ioctls with different register modes. * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test. @@ -1150,6 +1337,27 @@ uffd_test_case_t uffd_tests[] = { .uffd_feature_required = 0, }, { + .name = "move", + .uffd_fn = uffd_move_test, + .mem_targets = MEM_ANON, + .uffd_feature_required = UFFD_FEATURE_MOVE, + .test_case_ops = &uffd_move_test_case_ops, + }, + { + .name = "move-pmd", + .uffd_fn = uffd_move_pmd_test, + .mem_targets = MEM_ANON, + .uffd_feature_required = UFFD_FEATURE_MOVE, + .test_case_ops = &uffd_move_test_pmd_case_ops, + }, + { + .name = "move-pmd-split", + .uffd_fn = uffd_move_pmd_split_test, + .mem_targets = MEM_ANON, + .uffd_feature_required = UFFD_FEATURE_MOVE, + .test_case_ops = &uffd_move_test_pmd_case_ops, + }, + { .name = "wp-fork", .uffd_fn = uffd_wp_fork_test, .mem_targets = MEM_ALL, @@ -1336,6 +1544,7 @@ int main(int argc, char *argv[]) continue; } test->uffd_fn(&args); + uffd_test_ctx_clear(); } } diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 3082b40492..05736c6157 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -4,6 +4,7 @@ #include <dirent.h> #include <sys/ioctl.h> #include <linux/userfaultfd.h> +#include <linux/fs.h> #include <sys/syscall.h> #include <unistd.h> #include "../kselftest.h" @@ -28,19 +29,92 @@ uint64_t pagemap_get_entry(int fd, char *start) return entry; } +static uint64_t __pagemap_scan_get_categories(int fd, char *start, struct page_region *r) +{ + struct pm_scan_arg arg; + + arg.start = (uintptr_t)start; + arg.end = (uintptr_t)(start + psize()); + arg.vec = (uintptr_t)r; + arg.vec_len = 1; + arg.flags = 0; + arg.size = sizeof(struct pm_scan_arg); + arg.max_pages = 0; + arg.category_inverted = 0; + arg.category_mask = 0; + arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE | + PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | + PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY; + arg.return_mask = arg.category_anyof_mask; + + return ioctl(fd, PAGEMAP_SCAN, &arg); +} + +static uint64_t pagemap_scan_get_categories(int fd, char *start) +{ + struct page_region r; + long ret; + + ret = __pagemap_scan_get_categories(fd, start, &r); + if (ret < 0) + ksft_exit_fail_msg("PAGEMAP_SCAN failed: %s\n", strerror(errno)); + if (ret == 0) + return 0; + return r.categories; +} + +/* `start` is any valid address. */ +static bool pagemap_scan_supported(int fd, char *start) +{ + static int supported = -1; + int ret; + + if (supported != -1) + return supported; + + /* Provide an invalid address in order to trigger EFAULT. */ + ret = __pagemap_scan_get_categories(fd, start, (struct page_region *) ~0UL); + if (ret == 0) + ksft_exit_fail_msg("PAGEMAP_SCAN succeeded unexpectedly\n"); + + supported = errno == EFAULT; + + return supported; +} + +static bool page_entry_is(int fd, char *start, char *desc, + uint64_t pagemap_flags, uint64_t pagescan_flags) +{ + bool m = pagemap_get_entry(fd, start) & pagemap_flags; + + if (pagemap_scan_supported(fd, start)) { + bool s = pagemap_scan_get_categories(fd, start) & pagescan_flags; + + if (m == s) + return m; + + ksft_exit_fail_msg( + "read and ioctl return unmatched results for %s: %d %d", desc, m, s); + } + return m; +} + bool pagemap_is_softdirty(int fd, char *start) { - return pagemap_get_entry(fd, start) & PM_SOFT_DIRTY; + return page_entry_is(fd, start, "soft-dirty", + PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY); } bool pagemap_is_swapped(int fd, char *start) { - return pagemap_get_entry(fd, start) & PM_SWAP; + return page_entry_is(fd, start, "swap", PM_SWAP, PAGE_IS_SWAPPED); } bool pagemap_is_populated(int fd, char *start) { - return pagemap_get_entry(fd, start) & (PM_PRESENT | PM_SWAP); + return page_entry_is(fd, start, "populated", + PM_PRESENT | PM_SWAP, + PAGE_IS_PRESENT | PAGE_IS_SWAPPED); } unsigned long pagemap_get_pfn(int fd, char *start) diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index c02990bbd5..9007c420d5 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -3,7 +3,7 @@ #include <stdbool.h> #include <sys/mman.h> #include <err.h> -#include <string.h> /* ffsl() */ +#include <strings.h> /* ffsl() */ #include <unistd.h> /* _SC_PAGESIZE */ #define BIT_ULL(nr) (1ULL << (nr)) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index c1ae90c785..211753756b 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -53,8 +53,7 @@ TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh -TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh -TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh +TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite @@ -92,9 +91,11 @@ TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += fdb_flush.sh +TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh TEST_FILES := settings +TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh include ../lib.mk diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh index 4a110bb01e..92eb880c52 100755 --- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh @@ -12,7 +12,8 @@ # {arp,ndisc}_evict_nocarrer=0 should still contain the single ARP/ND entry # -readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +source lib.sh + readonly V4_ADDR0=10.0.10.1 readonly V4_ADDR1=10.0.10.2 readonly V6_ADDR0=2001:db8:91::1 @@ -22,43 +23,29 @@ ret=0 cleanup_v6() { - ip netns del me - ip netns del peer + cleanup_ns ${me} ${peer} sysctl -w net.ipv6.conf.veth1.ndisc_evict_nocarrier=1 >/dev/null 2>&1 sysctl -w net.ipv6.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 } -create_ns() -{ - local n=${1} - - ip netns del ${n} 2>/dev/null - - ip netns add ${n} - ip netns set ${n} $((nsid++)) - ip -netns ${n} link set lo up -} - - setup_v6() { - create_ns me - create_ns peer + setup_ns me peer - IP="ip -netns me" + IP="ip -netns ${me}" $IP li add veth1 type veth peer name veth2 $IP li set veth1 up $IP -6 addr add $V6_ADDR0/64 dev veth1 nodad - $IP li set veth2 netns peer up - ip -netns peer -6 addr add $V6_ADDR1/64 dev veth2 nodad + $IP li set veth2 netns ${peer} up + ip -netns ${peer} -6 addr add $V6_ADDR1/64 dev veth2 nodad - ip netns exec me sysctl -w $1 >/dev/null 2>&1 + ip netns exec ${me} sysctl -w $1 >/dev/null 2>&1 # Establish an ND cache entry - ip netns exec me ping -6 -c1 -Iveth1 $V6_ADDR1 >/dev/null 2>&1 + ip netns exec ${me} ping -6 -c1 -Iveth1 $V6_ADDR1 >/dev/null 2>&1 # Should have the veth1 entry in ND table - ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 if [ $? -ne 0 ]; then cleanup_v6 echo "failed" @@ -66,11 +53,11 @@ setup_v6() { fi # Set veth2 down, which will put veth1 in NOCARRIER state - ip netns exec peer ip link set veth2 down + ip netns exec ${peer} ip link set veth2 down } setup_v4() { - ip netns add "${PEER_NS}" + setup_ns PEER_NS ip link add name veth0 type veth peer name veth1 ip link set dev veth0 up ip link set dev veth1 netns "${PEER_NS}" @@ -99,8 +86,7 @@ setup_v4() { cleanup_v4() { ip neigh flush dev veth0 ip link del veth0 - local -r ns="$(ip netns list|grep $PEER_NS)" - [ -n "$ns" ] && ip netns del $ns 2>/dev/null + cleanup_ns $PEER_NS sysctl -w net.ipv4.conf.veth0.arp_evict_nocarrier=1 >/dev/null 2>&1 sysctl -w net.ipv4.conf.all.arp_evict_nocarrier=1 >/dev/null 2>&1 @@ -163,7 +149,7 @@ run_ndisc_evict_nocarrier_enabled() { setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=1" - ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 if [ $? -eq 0 ];then echo "failed" @@ -180,7 +166,7 @@ run_ndisc_evict_nocarrier_disabled() { setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=0" - ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 if [ $? -eq 0 ];then echo "ok" @@ -197,7 +183,7 @@ run_ndisc_evict_nocarrier_disabled_all() { setup_v6 "net.ipv6.conf.all.ndisc_evict_nocarrier=0" - ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 if [ $? -eq 0 ];then echo "ok" diff --git a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh index 327427ec10..a40c0e9bd0 100755 --- a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh +++ b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh @@ -5,16 +5,14 @@ # garp to the router. Router accepts or ignores based on its arp_accept # or accept_untracked_na configuration. +source lib.sh + TESTS="arp ndisc" -ROUTER_NS="ns-router" -ROUTER_NS_V6="ns-router-v6" ROUTER_INTF="veth-router" ROUTER_ADDR="10.0.10.1" ROUTER_ADDR_V6="2001:db8:abcd:0012::1" -HOST_NS="ns-host" -HOST_NS_V6="ns-host-v6" HOST_INTF="veth-host" HOST_ADDR="10.0.10.2" HOST_ADDR_V6="2001:db8:abcd:0012::2" @@ -23,13 +21,11 @@ SUBNET_WIDTH=24 PREFIX_WIDTH_V6=64 cleanup() { - ip netns del ${HOST_NS} - ip netns del ${ROUTER_NS} + cleanup_ns ${HOST_NS} ${ROUTER_NS} } cleanup_v6() { - ip netns del ${HOST_NS_V6} - ip netns del ${ROUTER_NS_V6} + cleanup_ns ${HOST_NS_V6} ${ROUTER_NS_V6} } setup() { @@ -37,8 +33,7 @@ setup() { local arp_accept=$1 # Set up two namespaces - ip netns add ${ROUTER_NS} - ip netns add ${HOST_NS} + setup_ns HOST_NS ROUTER_NS # Set up interfaces veth0 and veth1, which are pairs in separate # namespaces. veth0 is veth-router, veth1 is veth-host. @@ -72,8 +67,7 @@ setup_v6() { local accept_untracked_na=$1 # Set up two namespaces - ip netns add ${ROUTER_NS_V6} - ip netns add ${HOST_NS_V6} + setup_ns HOST_NS_V6 ROUTER_NS_V6 # Set up interfaces veth0 and veth1, which are pairs in separate # namespaces. veth0 is veth-router, veth1 is veth-host. diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh index c921750ca1..8bc23fb4c8 100755 --- a/tools/testing/selftests/net/cmsg_ipv6.sh +++ b/tools/testing/selftests/net/cmsg_ipv6.sh @@ -1,9 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ksft_skip=4 +source lib.sh -NS=ns IP6=2001:db8:1::1/64 TGT6=2001:db8:1::2 TMPF=$(mktemp --suffix ".pcap") @@ -11,13 +10,11 @@ TMPF=$(mktemp --suffix ".pcap") cleanup() { rm -f $TMPF - ip netns del $NS + cleanup_ns $NS } trap cleanup EXIT -NSEXE="ip netns exec $NS" - tcpdump -h | grep immediate-mode >> /dev/null if [ $? -ne 0 ]; then echo "SKIP - tcpdump with --immediate-mode option required" @@ -25,7 +22,8 @@ if [ $? -ne 0 ]; then fi # Namespaces -ip netns add $NS +setup_ns NS +NSEXE="ip netns exec $NS" $NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 6ff3e732f4..c79e65581d 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -45,11 +45,13 @@ struct options { const char *host; const char *service; unsigned int size; + unsigned int num_pkt; struct { unsigned int mark; unsigned int dontfrag; unsigned int tclass; unsigned int hlimit; + unsigned int priority; } sockopt; struct { unsigned int family; @@ -72,6 +74,7 @@ struct options { } v6; } opt = { .size = 13, + .num_pkt = 1, .sock = { .family = AF_UNSPEC, .type = SOCK_DGRAM, @@ -112,7 +115,7 @@ static void cs_parse_args(int argc, char *argv[]) { int o; - while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) { + while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) { switch (o) { case 's': opt.silent_send = true; @@ -138,7 +141,9 @@ static void cs_parse_args(int argc, char *argv[]) cs_usage(argv[0]); } break; - + case 'P': + opt.sockopt.priority = atoi(optarg); + break; case 'm': opt.mark.ena = true; opt.mark.val = atoi(optarg); @@ -146,6 +151,9 @@ static void cs_parse_args(int argc, char *argv[]) case 'M': opt.sockopt.mark = atoi(optarg); break; + case 'n': + opt.num_pkt = atoi(optarg); + break; case 'd': opt.txtime.ena = true; opt.txtime.delay = atoi(optarg); @@ -410,6 +418,10 @@ static void ca_set_sockopts(int fd) setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT"); + if (opt.sockopt.priority && + setsockopt(fd, SOL_SOCKET, SO_PRIORITY, + &opt.sockopt.priority, sizeof(opt.sockopt.priority))) + error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY"); } int main(int argc, char *argv[]) @@ -421,6 +433,7 @@ int main(int argc, char *argv[]) char cbuf[1024]; int err; int fd; + int i; cs_parse_args(argc, argv); @@ -480,24 +493,27 @@ int main(int argc, char *argv[]) cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf)); - err = sendmsg(fd, &msg, 0); - if (err < 0) { - if (!opt.silent_send) - fprintf(stderr, "send failed: %s\n", strerror(errno)); - err = ERN_SEND; - goto err_out; - } else if (err != (int)opt.size) { - fprintf(stderr, "short send\n"); - err = ERN_SEND_SHORT; - goto err_out; - } else { - err = ERN_SUCCESS; + for (i = 0; i < opt.num_pkt; i++) { + err = sendmsg(fd, &msg, 0); + if (err < 0) { + if (!opt.silent_send) + fprintf(stderr, "send failed: %s\n", strerror(errno)); + err = ERN_SEND; + goto err_out; + } else if (err != (int)opt.size) { + fprintf(stderr, "short send\n"); + err = ERN_SEND_SHORT; + goto err_out; + } } + err = ERN_SUCCESS; - /* Make sure all timestamps have time to loop back */ - usleep(opt.txtime.delay); + if (opt.ts.ena) { + /* Make sure all timestamps have time to loop back */ + usleep(opt.txtime.delay); - cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf)); + cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf)); + } err_out: close(fd); diff --git a/tools/testing/selftests/net/cmsg_so_mark.sh b/tools/testing/selftests/net/cmsg_so_mark.sh index 1650b8622f..772ad0cc26 100755 --- a/tools/testing/selftests/net/cmsg_so_mark.sh +++ b/tools/testing/selftests/net/cmsg_so_mark.sh @@ -1,7 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -NS=ns +source lib.sh + IP4=172.16.0.1/24 TGT4=172.16.0.2 IP6=2001:db8:1::1/64 @@ -10,13 +11,13 @@ MARK=1000 cleanup() { - ip netns del $NS + cleanup_ns $NS } trap cleanup EXIT # Namespaces -ip netns add $NS +setup_ns NS ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh index 91161e1da7..af85267ad1 100755 --- a/tools/testing/selftests/net/cmsg_time.sh +++ b/tools/testing/selftests/net/cmsg_time.sh @@ -1,7 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -NS=ns +source lib.sh + IP4=172.16.0.1/24 TGT4=172.16.0.2 IP6=2001:db8:1::1/64 @@ -9,13 +10,13 @@ TGT6=2001:db8:1::2 cleanup() { - ip netns del $NS + cleanup_ns $NS } trap cleanup EXIT # Namespaces -ip netns add $NS +setup_ns NS ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 3b749addd3..5e4390cac1 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -24,10 +24,14 @@ CONFIG_IFB=y CONFIG_INET_DIAG=y CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y +CONFIG_NET_FOU=y +CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_IP_GRE=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m +CONFIG_IPV6_SIT=y +CONFIG_IP_DCCP=m CONFIG_NF_NAT=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP_NF_IPTABLES=m @@ -62,6 +66,7 @@ CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_U32=m CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPGRE=m +CONFIG_NET_IPIP=y CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m @@ -78,7 +83,6 @@ CONFIG_TLS=m CONFIG_TRACEPOINTS=y CONFIG_NET_DROP_MONITOR=m CONFIG_NETDEVSIM=m -CONFIG_NET_FOU=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_SCH_INGRESS=m diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh index b7650e30d1..7c4818c971 100755 --- a/tools/testing/selftests/net/drop_monitor_tests.sh +++ b/tools/testing/selftests/net/drop_monitor_tests.sh @@ -2,10 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # This test is for checking drop monitor functionality. - +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # all tests in this script. Can be overridden with -t option TESTS=" @@ -13,10 +11,6 @@ TESTS=" hw_drops " -IP="ip -netns ns1" -TC="tc -netns ns1" -DEVLINK="devlink -N ns1" -NS_EXEC="ip netns exec ns1" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} @@ -43,7 +37,7 @@ setup() modprobe netdevsim &> /dev/null set -e - ip netns add ns1 + setup_ns NS1 $IP link add dummy10 up type dummy $NS_EXEC echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device @@ -57,7 +51,7 @@ setup() cleanup() { $NS_EXEC echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device - ip netns del ns1 + cleanup_ns ${NS1} } sw_drops_test() @@ -194,8 +188,15 @@ if [ $? -ne 0 ]; then exit $ksft_skip fi -# start clean +# create netns first so we can get the namespace name +setup_ns NS1 cleanup &> /dev/null +trap cleanup EXIT + +IP="ip -netns ${NS1}" +TC="tc -netns ${NS1}" +DEVLINK="devlink -N ${NS1}" +NS_EXEC="ip netns exec ${NS1}" for t in $TESTS do diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index d32a14ba06..0d4f252427 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -37,9 +37,7 @@ # # server / client nomenclature relative to ns-A -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - +source lib.sh VERBOSE=0 NSA_DEV=eth1 @@ -82,14 +80,6 @@ MCAST=ff02::1 NSA_LINKIP6= NSB_LINKIP6= -NSA=ns-A -NSB=ns-B -NSC=ns-C - -NSA_CMD="ip netns exec ${NSA}" -NSB_CMD="ip netns exec ${NSB}" -NSC_CMD="ip netns exec ${NSC}" - which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) # Check if FIPS mode is enabled @@ -406,9 +396,6 @@ create_ns() local addr=$2 local addr6=$3 - ip netns add ${ns} - - ip -netns ${ns} link set lo up if [ "${addr}" != "-" ]; then ip -netns ${ns} addr add dev lo ${addr} fi @@ -467,13 +454,12 @@ cleanup() ip -netns ${NSA} link del dev ${NSA_DEV} ip netns pids ${NSA} | xargs kill 2>/dev/null - ip netns del ${NSA} + cleanup_ns ${NSA} fi ip netns pids ${NSB} | xargs kill 2>/dev/null - ip netns del ${NSB} ip netns pids ${NSC} | xargs kill 2>/dev/null - ip netns del ${NSC} >/dev/null 2>&1 + cleanup_ns ${NSB} ${NSC} } cleanup_vrf_dup() @@ -487,6 +473,8 @@ setup_vrf_dup() { # some VRF tests use ns-C which has the same config as # ns-B but for a device NOT in the VRF + setup_ns NSC + NSC_CMD="ip netns exec ${NSC}" create_ns ${NSC} "-" "-" connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \ ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64 @@ -503,6 +491,10 @@ setup() log_debug "Configuring network namespaces" set -e + setup_ns NSA NSB + NSA_CMD="ip netns exec ${NSA}" + NSB_CMD="ip netns exec ${NSB}" + create_ns ${NSA} ${NSA_LO_IP}/32 ${NSA_LO_IP6}/128 create_ns ${NSB} ${NSB_LO_IP}/32 ${NSB_LO_IP6}/128 connect_ns ${NSA} ${NSA_DEV} ${NSA_IP}/24 ${NSA_IP6}/64 \ @@ -545,6 +537,10 @@ setup_lla_only() log_debug "Configuring network namespaces" set -e + setup_ns NSA NSB NSC + NSA_CMD="ip netns exec ${NSA}" + NSB_CMD="ip netns exec ${NSB}" + NSC_CMD="ip netns exec ${NSC}" create_ns ${NSA} "-" "-" create_ns ${NSB} "-" "-" create_ns ${NSC} "-" "-" diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh index 90e7a29e04..d5e3abb865 100755 --- a/tools/testing/selftests/net/fdb_flush.sh +++ b/tools/testing/selftests/net/fdb_flush.sh @@ -5,6 +5,8 @@ # Check that flush works as expected with all the supported arguments and verify # some combinations of arguments. +source lib.sh + FLUSH_BY_STATE_TESTS=" vxlan_test_flush_by_permanent vxlan_test_flush_by_nopermanent @@ -739,10 +741,9 @@ bridge_vxlan_test_flush() setup() { - IP="ip -netns ns1" - BRIDGE="bridge -netns ns1" - - ip netns add ns1 + setup_ns NS + IP="ip -netns ${NS}" + BRIDGE="bridge -netns ${NS}" $IP link add name vx10 type vxlan id 1000 dstport "$VXPORT" $IP link add name vx20 type vxlan id 2000 dstport "$VXPORT" @@ -759,7 +760,7 @@ cleanup() $IP link del dev vx20 $IP link del dev vx10 - ip netns del ns1 + cleanup_ns ${NS} } ################################################################################ diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh index c287b90b8a..ec2d6ceb1f 100755 --- a/tools/testing/selftests/net/fib-onlink-tests.sh +++ b/tools/testing/selftests/net/fib-onlink-tests.sh @@ -3,6 +3,7 @@ # IPv4 and IPv6 onlink tests +source lib.sh PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} VERBOSE=0 @@ -74,9 +75,6 @@ TEST_NET4IN6[2]=10.2.1.254 # mcast address MCAST6=ff02::1 - -PEER_NS=bart -PEER_CMD="ip netns exec ${PEER_NS}" VRF=lisa VRF_TABLE=1101 PBR_TABLE=101 @@ -176,8 +174,7 @@ setup() set -e # create namespace - ip netns add ${PEER_NS} - ip -netns ${PEER_NS} li set lo up + setup_ns PEER_NS # add vrf table ip li add ${VRF} type vrf table ${VRF_TABLE} @@ -219,7 +216,7 @@ setup() cleanup() { # make sure we start from a clean slate - ip netns del ${PEER_NS} 2>/dev/null + cleanup_ns ${PEER_NS} 2>/dev/null for n in 1 3 5 7; do ip link del ${NETIFS[p${n}]} 2>/dev/null done diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh index b52d59547f..e85248609a 100755 --- a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh +++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh @@ -12,6 +12,7 @@ # # routing in h0 to hN is done with nexthop objects. +source lib.sh PAUSE_ON_FAIL=no VERBOSE=0 @@ -72,12 +73,6 @@ create_ns() { local ns=${1} - ip netns del ${ns} 2>/dev/null - - ip netns add ${ns} - ip -netns ${ns} addr add 127.0.0.1/8 dev lo - ip -netns ${ns} link set lo up - ip netns exec ${ns} sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 case ${ns} in h*) @@ -97,7 +92,13 @@ setup() #set -e - for ns in h0 r1 h1 h2 h3 + setup_ns h0 r1 h1 h2 h3 + h[0]=$h0 + h[1]=$h1 + h[2]=$h2 + h[3]=$h3 + r[1]=$r1 + for ns in ${h[0]} ${r[1]} ${h[1]} ${h[2]} ${h[3]} do create_ns ${ns} done @@ -108,35 +109,35 @@ setup() for i in 0 1 2 3 do - ip -netns h${i} li add eth0 type veth peer name r1h${i} - ip -netns h${i} li set eth0 up - ip -netns h${i} li set r1h${i} netns r1 name eth${i} up - - ip -netns h${i} addr add dev eth0 172.16.10${i}.1/24 - ip -netns h${i} -6 addr add dev eth0 2001:db8:10${i}::1/64 - ip -netns r1 addr add dev eth${i} 172.16.10${i}.254/24 - ip -netns r1 -6 addr add dev eth${i} 2001:db8:10${i}::64/64 + ip -netns ${h[$i]} li add eth0 type veth peer name r1h${i} + ip -netns ${h[$i]} li set eth0 up + ip -netns ${h[$i]} li set r1h${i} netns ${r[1]} name eth${i} up + + ip -netns ${h[$i]} addr add dev eth0 172.16.10${i}.1/24 + ip -netns ${h[$i]} -6 addr add dev eth0 2001:db8:10${i}::1/64 + ip -netns ${r[1]} addr add dev eth${i} 172.16.10${i}.254/24 + ip -netns ${r[1]} -6 addr add dev eth${i} 2001:db8:10${i}::64/64 done - ip -netns h0 nexthop add id 4 via 172.16.100.254 dev eth0 - ip -netns h0 nexthop add id 6 via 2001:db8:100::64 dev eth0 + ip -netns ${h[0]} nexthop add id 4 via 172.16.100.254 dev eth0 + ip -netns ${h[0]} nexthop add id 6 via 2001:db8:100::64 dev eth0 - # routing from h0 to h1-h3 and back + # routing from ${h[0]} to h1-h3 and back for i in 1 2 3 do - ip -netns h0 ro add 172.16.10${i}.0/24 nhid 4 - ip -netns h${i} ro add 172.16.100.0/24 via 172.16.10${i}.254 + ip -netns ${h[0]} ro add 172.16.10${i}.0/24 nhid 4 + ip -netns ${h[$i]} ro add 172.16.100.0/24 via 172.16.10${i}.254 - ip -netns h0 -6 ro add 2001:db8:10${i}::/64 nhid 6 - ip -netns h${i} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64 + ip -netns ${h[0]} -6 ro add 2001:db8:10${i}::/64 nhid 6 + ip -netns ${h[$i]} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64 done if [ "$VERBOSE" = "1" ]; then echo echo "host 1 config" - ip -netns h0 li sh - ip -netns h0 ro sh - ip -netns h0 -6 ro sh + ip -netns ${h[0]} li sh + ip -netns ${h[0]} ro sh + ip -netns ${h[0]} -6 ro sh fi #set +e @@ -144,10 +145,7 @@ setup() cleanup() { - for n in h0 r1 h1 h2 h3 - do - ip netns del ${n} 2>/dev/null - done + cleanup_all_ns } change_mtu() @@ -156,7 +154,7 @@ change_mtu() local mtu=$2 run_cmd ip -netns h${hostid} li set eth0 mtu ${mtu} - run_cmd ip -netns r1 li set eth${hostid} mtu ${mtu} + run_cmd ip -netns ${r1} li set eth${hostid} mtu ${mtu} } ################################################################################ @@ -168,23 +166,23 @@ validate_v4_exception() local mtu=$2 local ping_sz=$3 local dst="172.16.10${i}.1" - local h0=172.16.100.1 - local r1=172.16.100.254 + local h0_ip=172.16.100.1 + local r1_ip=172.16.100.254 local rc if [ ${ping_sz} != "0" ]; then - run_cmd ip netns exec h0 ping -s ${ping_sz} -c5 -w5 ${dst} + run_cmd ip netns exec ${h0} ping -s ${ping_sz} -c5 -w5 ${dst} fi if [ "$VERBOSE" = "1" ]; then echo "Route get" - ip -netns h0 ro get ${dst} + ip -netns ${h0} ro get ${dst} echo "Searching for:" echo " cache .* mtu ${mtu}" echo fi - ip -netns h0 ro get ${dst} | \ + ip -netns ${h0} ro get ${dst} | \ grep -q "cache .* mtu ${mtu}" rc=$? @@ -197,24 +195,24 @@ validate_v6_exception() local mtu=$2 local ping_sz=$3 local dst="2001:db8:10${i}::1" - local h0=2001:db8:100::1 - local r1=2001:db8:100::64 + local h0_ip=2001:db8:100::1 + local r1_ip=2001:db8:100::64 local rc if [ ${ping_sz} != "0" ]; then - run_cmd ip netns exec h0 ${ping6} -s ${ping_sz} -c5 -w5 ${dst} + run_cmd ip netns exec ${h0} ${ping6} -s ${ping_sz} -c5 -w5 ${dst} fi if [ "$VERBOSE" = "1" ]; then echo "Route get" - ip -netns h0 -6 ro get ${dst} + ip -netns ${h0} -6 ro get ${dst} echo "Searching for:" - echo " ${dst}.* via ${r1} dev eth0 src ${h0} .* mtu ${mtu}" + echo " ${dst}.* via ${r1_ip} dev eth0 src ${h0_ip} .* mtu ${mtu}" echo fi - ip -netns h0 -6 ro get ${dst} | \ - grep -q "${dst}.* via ${r1} dev eth0 src ${h0} .* mtu ${mtu}" + ip -netns ${h0} -6 ro get ${dst} | \ + grep -q "${dst}.* via ${r1_ip} dev eth0 src ${h0_ip} .* mtu ${mtu}" rc=$? log_test $rc 0 "IPv6: host 0 to host ${i}, mtu ${mtu}" @@ -242,11 +240,11 @@ for i in 1 2 3 do # generate a cached route per-cpu for c in ${cpus}; do - run_cmd taskset -c ${c} ip netns exec h0 ping -c1 -w1 172.16.10${i}.1 - [ $? -ne 0 ] && printf "\nERROR: ping to h${i} failed\n" && ret=1 + run_cmd taskset -c ${c} ip netns exec ${h0} ping -c1 -w1 172.16.10${i}.1 + [ $? -ne 0 ] && printf "\nERROR: ping to ${h[$i]} failed\n" && ret=1 - run_cmd taskset -c ${c} ip netns exec h0 ${ping6} -c1 -w1 2001:db8:10${i}::1 - [ $? -ne 0 ] && printf "\nERROR: ping6 to h${i} failed\n" && ret=1 + run_cmd taskset -c ${c} ip netns exec ${h0} ${ping6} -c1 -w1 2001:db8:10${i}::1 + [ $? -ne 0 ] && printf "\nERROR: ping6 to ${h[$i]} failed\n" && ret=1 [ $ret -ne 0 ] && break done @@ -282,11 +280,11 @@ if [ $ret -eq 0 ]; then validate_v6_exception 3 1400 0 # targeted deletes to trigger cleanup paths in kernel - ip -netns h0 ro del 172.16.102.0/24 nhid 4 - ip -netns h0 -6 ro del 2001:db8:102::/64 nhid 6 + ip -netns ${h0} ro del 172.16.102.0/24 nhid 4 + ip -netns ${h0} -6 ro del 2001:db8:102::/64 nhid 6 - ip -netns h0 nexthop del id 4 - ip -netns h0 nexthop del id 6 + ip -netns ${h0} nexthop del id 4 + ip -netns ${h0} nexthop del id 6 fi cleanup diff --git a/tools/testing/selftests/net/fib_nexthop_nongw.sh b/tools/testing/selftests/net/fib_nexthop_nongw.sh index b7b928b38c..1ccf56f101 100755 --- a/tools/testing/selftests/net/fib_nexthop_nongw.sh +++ b/tools/testing/selftests/net/fib_nexthop_nongw.sh @@ -8,6 +8,7 @@ # veth0 <---|---> veth1 # Validate source address selection for route without gateway +source lib.sh PAUSE_ON_FAIL=no VERBOSE=0 ret=0 @@ -64,35 +65,31 @@ run_cmd() # config setup() { - ip netns add h1 - ip -n h1 link set lo up - ip netns add h2 - ip -n h2 link set lo up + setup_ns h1 h2 # Add a fake eth0 to support an ip address - ip -n h1 link add name eth0 type dummy - ip -n h1 link set eth0 up - ip -n h1 address add 192.168.0.1/24 dev eth0 + ip -n $h1 link add name eth0 type dummy + ip -n $h1 link set eth0 up + ip -n $h1 address add 192.168.0.1/24 dev eth0 # Configure veths (same @mac, arp off) - ip -n h1 link add name veth0 type veth peer name veth1 netns h2 - ip -n h1 link set veth0 up + ip -n $h1 link add name veth0 type veth peer name veth1 netns $h2 + ip -n $h1 link set veth0 up - ip -n h2 link set veth1 up + ip -n $h2 link set veth1 up # Configure @IP in the peer netns - ip -n h2 address add 192.168.1.1/32 dev veth1 - ip -n h2 route add default dev veth1 + ip -n $h2 address add 192.168.1.1/32 dev veth1 + ip -n $h2 route add default dev veth1 # Add a nexthop without @gw and use it in a route - ip -n h1 nexthop add id 1 dev veth0 - ip -n h1 route add 192.168.1.1 nhid 1 + ip -n $h1 nexthop add id 1 dev veth0 + ip -n $h1 route add 192.168.1.1 nhid 1 } cleanup() { - ip netns del h1 2>/dev/null - ip netns del h2 2>/dev/null + cleanup_ns $h1 $h2 } trap cleanup EXIT @@ -108,12 +105,11 @@ do esac done -cleanup setup -run_cmd ip -netns h1 route get 192.168.1.1 +run_cmd ip -netns $h1 route get 192.168.1.1 log_test $? 0 "nexthop: get route with nexthop without gw" -run_cmd ip netns exec h1 ping -c1 192.168.1.1 +run_cmd ip netns exec $h1 ping -c1 192.168.1.1 log_test $? 0 "nexthop: ping through nexthop without gw" exit $ret diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index a6f2c0b955..d5a281aadb 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -14,6 +14,7 @@ # objects. Device reference counts and network namespace cleanup tested # by use of network namespace for peer. +source lib.sh ret=0 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -148,13 +149,7 @@ create_ns() { local n=${1} - ip netns del ${n} 2>/dev/null - set -e - ip netns add ${n} - ip netns set ${n} $((nsid++)) - ip -netns ${n} addr add 127.0.0.1/8 dev lo - ip -netns ${n} link set lo up ip netns exec ${n} sysctl -qw net.ipv4.ip_forward=1 ip netns exec ${n} sysctl -qw net.ipv4.fib_multipath_use_neigh=1 @@ -173,12 +168,13 @@ setup() { cleanup - create_ns me - create_ns peer - create_ns remote + setup_ns me peer remote + create_ns $me + create_ns $peer + create_ns $remote - IP="ip -netns me" - BRIDGE="bridge -netns me" + IP="ip -netns $me" + BRIDGE="bridge -netns $me" set -e $IP li add veth1 type veth peer name veth2 $IP li set veth1 up @@ -190,24 +186,24 @@ setup() $IP addr add 172.16.2.1/24 dev veth3 $IP -6 addr add 2001:db8:92::1/64 dev veth3 nodad - $IP li set veth2 netns peer up - ip -netns peer addr add 172.16.1.2/24 dev veth2 - ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad + $IP li set veth2 netns $peer up + ip -netns $peer addr add 172.16.1.2/24 dev veth2 + ip -netns $peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad - $IP li set veth4 netns peer up - ip -netns peer addr add 172.16.2.2/24 dev veth4 - ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad + $IP li set veth4 netns $peer up + ip -netns $peer addr add 172.16.2.2/24 dev veth4 + ip -netns $peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad - ip -netns remote li add veth5 type veth peer name veth6 - ip -netns remote li set veth5 up - ip -netns remote addr add dev veth5 172.16.101.1/24 - ip -netns remote -6 addr add dev veth5 2001:db8:101::1/64 nodad - ip -netns remote ro add 172.16.0.0/22 via 172.16.101.2 - ip -netns remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2 + ip -netns $remote li add veth5 type veth peer name veth6 + ip -netns $remote li set veth5 up + ip -netns $remote addr add dev veth5 172.16.101.1/24 + ip -netns $remote -6 addr add dev veth5 2001:db8:101::1/64 nodad + ip -netns $remote ro add 172.16.0.0/22 via 172.16.101.2 + ip -netns $remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2 - ip -netns remote li set veth6 netns peer up - ip -netns peer addr add dev veth6 172.16.101.2/24 - ip -netns peer -6 addr add dev veth6 2001:db8:101::2/64 nodad + ip -netns $remote li set veth6 netns $peer up + ip -netns $peer addr add dev veth6 172.16.101.2/24 + ip -netns $peer -6 addr add dev veth6 2001:db8:101::2/64 nodad set +e } @@ -215,7 +211,7 @@ cleanup() { local ns - for ns in me peer remote; do + for ns in $me $peer $remote; do ip netns del ${ns} 2>/dev/null done } @@ -779,7 +775,7 @@ ipv6_grp_refs() run_cmd "$IP route add 2001:db8:101::1/128 nhid 102" # create per-cpu dsts through nh 100 - run_cmd "ip netns exec me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1" + run_cmd "ip netns exec $me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1" # remove nh 100 from the group to delete the route potentially leaving # a stale per-cpu dst which holds a reference to the nexthop's net @@ -805,7 +801,7 @@ ipv6_grp_refs() # if a reference was lost this command will hang because the net device # cannot be removed - timeout -s KILL 5 ip netns exec me ip link del veth1.10 >/dev/null 2>&1 + timeout -s KILL 5 ip netns exec $me ip link del veth1.10 >/dev/null 2>&1 # we can't cleanup if the command is hung trying to delete the netdev if [ $? -eq 137 ]; then @@ -1012,13 +1008,13 @@ ipv6_fcnal_runtime() log_test $? 0 "Route delete" run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping with nexthop" run_cmd "$IP nexthop add id 82 via 2001:db8:92::2 dev veth3" run_cmd "$IP nexthop add id 122 group 81/82" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping - multipath" # @@ -1026,26 +1022,26 @@ ipv6_fcnal_runtime() # run_cmd "$IP -6 nexthop add id 83 blackhole" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 83" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 2 "Ping - blackhole" run_cmd "$IP nexthop replace id 83 via 2001:db8:91::2 dev veth1" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping - blackhole replaced with gateway" run_cmd "$IP -6 nexthop replace id 83 blackhole" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 2 "Ping - gateway replaced by blackhole" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" if [ $? -eq 0 ]; then run_cmd "$IP nexthop replace id 122 group 83" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 2 "Ping - group with blackhole" run_cmd "$IP nexthop replace id 122 group 81/82" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping - group blackhole replaced with gateways" else log_test 2 0 "Ping - multipath failed" @@ -1128,15 +1124,15 @@ ipv6_fcnal_runtime() # rpfilter and default route $IP nexthop flush >/dev/null 2>&1 - run_cmd "ip netns exec me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP" + run_cmd "ip netns exec $me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP" run_cmd "$IP nexthop add id 91 via 2001:db8:91::2 dev veth1" run_cmd "$IP nexthop add id 92 via 2001:db8:92::2 dev veth3" run_cmd "$IP nexthop add id 93 group 91/92" run_cmd "$IP -6 ro add default nhid 91" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Nexthop with default route and rpfilter" run_cmd "$IP -6 ro replace default nhid 93" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Nexthop with multipath default route and rpfilter" # TO-DO: @@ -1216,11 +1212,11 @@ ipv6_torture() pid1=$! ipv6_grp_replace_loop & pid2=$! - ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 & + ip netns exec $me ping -f 2001:db8:101::1 >/dev/null 2>&1 & pid3=$! - ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & + ip netns exec $me ping -f 2001:db8:101::2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + ip netns exec $me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! sleep 300 @@ -1270,11 +1266,11 @@ ipv6_res_torture() pid1=$! ipv6_res_grp_replace_loop & pid2=$! - ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 & + ip netns exec $me ping -f 2001:db8:101::1 >/dev/null 2>&1 & pid3=$! - ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & + ip netns exec $me ping -f 2001:db8:101::2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn -6 veth1 \ + ip netns exec $me mausezahn -6 veth1 \ -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 \ -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! @@ -1544,7 +1540,7 @@ ipv4_withv6_fcnal() local lladdr set -e - lladdr=$(get_linklocal veth2 peer) + lladdr=$(get_linklocal veth2 $peer) run_cmd "$IP nexthop add id 11 via ${lladdr} dev veth1" set +e run_cmd "$IP ro add 172.16.101.1/32 nhid 11" @@ -1606,13 +1602,13 @@ ipv4_fcnal_runtime() # run_cmd "$IP nexthop replace id 21 via 172.16.1.2 dev veth1" run_cmd "$IP ro replace 172.16.101.1/32 nhid 21" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Basic ping" run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3" run_cmd "$IP nexthop add id 122 group 21/22" run_cmd "$IP ro replace 172.16.101.1/32 nhid 122" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - multipath" run_cmd "$IP ro delete 172.16.101.1/32 nhid 122" @@ -1623,7 +1619,7 @@ ipv4_fcnal_runtime() run_cmd "$IP nexthop add id 501 via 172.16.1.2 dev veth1" run_cmd "$IP ro add default nhid 501" run_cmd "$IP ro add default via 172.16.1.3 dev veth1 metric 20" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - multiple default routes, nh first" # flip the order @@ -1632,7 +1628,7 @@ ipv4_fcnal_runtime() run_cmd "$IP ro add default via 172.16.1.2 dev veth1 metric 20" run_cmd "$IP nexthop replace id 501 via 172.16.1.3 dev veth1" run_cmd "$IP ro add default nhid 501 metric 20" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - multiple default routes, nh second" run_cmd "$IP nexthop delete nhid 501" @@ -1643,26 +1639,26 @@ ipv4_fcnal_runtime() # run_cmd "$IP nexthop add id 23 blackhole" run_cmd "$IP ro replace 172.16.101.1/32 nhid 23" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 2 "Ping - blackhole" run_cmd "$IP nexthop replace id 23 via 172.16.1.2 dev veth1" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - blackhole replaced with gateway" run_cmd "$IP nexthop replace id 23 blackhole" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 2 "Ping - gateway replaced by blackhole" run_cmd "$IP ro replace 172.16.101.1/32 nhid 122" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" if [ $? -eq 0 ]; then run_cmd "$IP nexthop replace id 122 group 23" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 2 "Ping - group with blackhole" run_cmd "$IP nexthop replace id 122 group 21/22" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - group blackhole replaced with gateways" else log_test 2 0 "Ping - multipath failed" @@ -1685,11 +1681,11 @@ ipv4_fcnal_runtime() # IPv4 with IPv6 # set -e - lladdr=$(get_linklocal veth2 peer) + lladdr=$(get_linklocal veth2 $peer) run_cmd "$IP nexthop add id 24 via ${lladdr} dev veth1" set +e run_cmd "$IP ro replace 172.16.101.1/32 nhid 24" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv6 nexthop with IPv4 route" $IP neigh sh | grep -q "${lladdr} dev veth1" @@ -1713,11 +1709,11 @@ ipv4_fcnal_runtime() check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv6 nexthop with IPv4 route" run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv4 route with IPv6 gateway" $IP neigh sh | grep -q "${lladdr} dev veth1" @@ -1734,7 +1730,7 @@ ipv4_fcnal_runtime() run_cmd "$IP ro del 172.16.101.1/32 via inet6 ${lladdr} dev veth1" run_cmd "$IP -4 ro add default via inet6 ${lladdr} dev veth1" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv4 default route with IPv6 gateway" # @@ -1785,7 +1781,7 @@ sysctl_nexthop_compat_mode_check() local sysctlname="net.ipv4.nexthop_compat_mode" local lprefix=$1 - IPE="ip netns exec me" + IPE="ip netns exec $me" $IPE sysctl -q $sysctlname 2>&1 >/dev/null if [ $? -ne 0 ]; then @@ -1804,7 +1800,7 @@ sysctl_nexthop_compat_mode_set() local mode=$1 local lprefix=$2 - IPE="ip netns exec me" + IPE="ip netns exec $me" out=$($IPE sysctl -w $sysctlname=$mode) log_test $? 0 "$lprefix set compat mode - $mode" @@ -1988,11 +1984,11 @@ ipv4_torture() pid1=$! ipv4_grp_replace_loop & pid2=$! - ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 & + ip netns exec $me ping -f 172.16.101.1 >/dev/null 2>&1 & pid3=$! - ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 & + ip netns exec $me ping -f 172.16.101.2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + ip netns exec $me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! sleep 300 @@ -2042,11 +2038,11 @@ ipv4_res_torture() pid1=$! ipv4_res_grp_replace_loop & pid2=$! - ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 & + ip netns exec $me ping -f 172.16.101.1 >/dev/null 2>&1 & pid3=$! - ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 & + ip netns exec $me ping -f 172.16.101.2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn veth1 \ + ip netns exec $me mausezahn veth1 \ -B 172.16.101.2 -A 172.16.1.1 -c 0 \ -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! @@ -2081,10 +2077,10 @@ basic() # create nh with linkdown device - fails $IP li set veth1 up - ip -netns peer li set veth2 down + ip -netns $peer li set veth2 down run_cmd "$IP nexthop add id 1 dev veth1" log_test $? 2 "Nexthop with device that is linkdown" - ip -netns peer li set veth2 up + ip -netns $peer li set veth2 up # device only run_cmd "$IP nexthop add id 1 dev veth1" @@ -2465,7 +2461,7 @@ fi for t in $TESTS do case $t in - none) IP="ip -netns peer"; setup; exit 0;; + none) IP="ip -netns $peer"; setup; exit 0;; *) setup; $t; cleanup;; esac done diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 63c3eaec8d..51157a5559 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -3,14 +3,9 @@ # This test is for checking IPv4 and IPv6 FIB rules API -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - +source lib.sh ret=0 - PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} -IP="ip -netns testns" -IP_PEER="ip -netns peerns" RTABLE=100 RTABLE_PEER=101 @@ -84,8 +79,8 @@ check_nettest() setup() { set -e - ip netns add testns - $IP link set dev lo up + setup_ns testns + IP="ip -netns $testns" $IP link add dummy0 type dummy $IP link set dev dummy0 up @@ -98,18 +93,19 @@ setup() cleanup() { $IP link del dev dummy0 &> /dev/null - ip netns del testns + cleanup_ns $testns } setup_peer() { set -e - ip netns add peerns + setup_ns peerns + IP_PEER="ip -netns $peerns" $IP_PEER link set dev lo up - ip link add name veth0 netns testns type veth \ - peer name veth1 netns peerns + ip link add name veth0 netns $testns type veth \ + peer name veth1 netns $peerns $IP link set dev veth0 up $IP_PEER link set dev veth1 up @@ -131,7 +127,7 @@ setup_peer() cleanup_peer() { $IP link del dev veth0 - ip netns del peerns + ip netns del $peerns } fib_check_iproute_support() @@ -270,11 +266,11 @@ fib_rule6_connect_test() # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). # The ECN bits shouldn't influence the result of the test. for dsfield in 0x04 0x05 0x06 0x07; do - nettest -q -6 -B -t 5 -N testns -O peerns -U -D \ + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D \ -Q "${dsfield}" -l 2001:db8::1:11 -r 2001:db8::1:11 log_test $? 0 "rule6 dsfield udp connect (dsfield ${dsfield})" - nettest -q -6 -B -t 5 -N testns -O peerns -Q "${dsfield}" \ + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q "${dsfield}" \ -l 2001:db8::1:11 -r 2001:db8::1:11 log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})" done @@ -337,11 +333,11 @@ fib_rule4_test() # need enable forwarding and disable rp_filter temporarily as all the # addresses are in the same subnet and egress device == ingress device. - ip netns exec testns sysctl -qw net.ipv4.ip_forward=1 - ip netns exec testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0 + ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0 match="from $SRC_IP iif $DEV" fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table" - ip netns exec testns sysctl -qw net.ipv4.ip_forward=0 + ip netns exec $testns sysctl -qw net.ipv4.ip_forward=0 # Reject dsfield (tos) options which have ECN bits set for cnt in $(seq 1 3); do @@ -407,11 +403,11 @@ fib_rule4_connect_test() # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). # The ECN bits shouldn't influence the result of the test. for dsfield in 0x04 0x05 0x06 0x07; do - nettest -q -B -t 5 -N testns -O peerns -D -U -Q "${dsfield}" \ + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q "${dsfield}" \ -l 198.51.100.11 -r 198.51.100.11 log_test $? 0 "rule4 dsfield udp connect (dsfield ${dsfield})" - nettest -q -B -t 5 -N testns -O peerns -Q "${dsfield}" \ + nettest -q -B -t 5 -N $testns -O $peerns -Q "${dsfield}" \ -l 198.51.100.11 -r 198.51.100.11 log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})" done diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 66d0db7a26..b3ecccbbfc 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -3,10 +3,8 @@ # This test is for checking IPv4 and IPv6 FIB behavior in response to # different events. - +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # all tests in this script. Can be overridden with -t option TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ @@ -18,8 +16,6 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ VERBOSE=0 PAUSE_ON_FAIL=no PAUSE=no -IP="$(which ip) -netns ns1" -NS_EXEC="$(which ip) netns exec ns1" which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) @@ -55,11 +51,11 @@ log_test() setup() { set -e - ip netns add ns1 - ip netns set ns1 auto - $IP link set dev lo up - ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 - ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1 + setup_ns ns1 + IP="$(which ip) -netns $ns1" + NS_EXEC="$(which ip) netns exec $ns1" + ip netns exec $ns1 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $ns1 sysctl -qw net.ipv6.conf.all.forwarding=1 $IP link add dummy0 type dummy $IP link set dev dummy0 up @@ -72,8 +68,7 @@ setup() cleanup() { $IP link del dev dummy0 &> /dev/null - ip netns del ns1 &> /dev/null - ip netns del ns2 &> /dev/null + cleanup_ns $ns1 $ns2 } get_linklocal() @@ -448,28 +443,25 @@ fib_rp_filter_test() setup set -e - ip netns add ns2 - ip netns set ns2 auto - - ip -netns ns2 link set dev lo up + setup_ns ns2 $IP link add name veth1 type veth peer name veth2 - $IP link set dev veth2 netns ns2 + $IP link set dev veth2 netns $ns2 $IP address add 192.0.2.1/24 dev veth1 - ip -netns ns2 address add 192.0.2.1/24 dev veth2 + ip -netns $ns2 address add 192.0.2.1/24 dev veth2 $IP link set dev veth1 up - ip -netns ns2 link set dev veth2 up + ip -netns $ns2 link set dev veth2 up $IP link set dev lo address 52:54:00:6a:c7:5e $IP link set dev veth1 address 52:54:00:6a:c7:5e - ip -netns ns2 link set dev lo address 52:54:00:6a:c7:5e - ip -netns ns2 link set dev veth2 address 52:54:00:6a:c7:5e + ip -netns $ns2 link set dev lo address 52:54:00:6a:c7:5e + ip -netns $ns2 link set dev veth2 address 52:54:00:6a:c7:5e # 1. (ns2) redirect lo's egress to veth2's egress - ip netns exec ns2 tc qdisc add dev lo parent root handle 1: fq_codel - ip netns exec ns2 tc filter add dev lo parent 1: protocol arp basic \ + ip netns exec $ns2 tc qdisc add dev lo parent root handle 1: fq_codel + ip netns exec $ns2 tc filter add dev lo parent 1: protocol arp basic \ action mirred egress redirect dev veth2 - ip netns exec ns2 tc filter add dev lo parent 1: protocol ip basic \ + ip netns exec $ns2 tc filter add dev lo parent 1: protocol ip basic \ action mirred egress redirect dev veth2 # 2. (ns1) redirect veth1's ingress to lo's ingress @@ -487,24 +479,24 @@ fib_rp_filter_test() action mirred egress redirect dev veth1 # 4. (ns2) redirect veth2's ingress to lo's ingress - ip netns exec ns2 tc qdisc add dev veth2 ingress - ip netns exec ns2 tc filter add dev veth2 ingress protocol arp basic \ + ip netns exec $ns2 tc qdisc add dev veth2 ingress + ip netns exec $ns2 tc filter add dev veth2 ingress protocol arp basic \ action mirred ingress redirect dev lo - ip netns exec ns2 tc filter add dev veth2 ingress protocol ip basic \ + ip netns exec $ns2 tc filter add dev veth2 ingress protocol ip basic \ action mirred ingress redirect dev lo $NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1 - ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1 - ip netns exec ns2 sysctl -qw net.ipv4.conf.all.accept_local=1 - ip netns exec ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1 + ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1 + ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.accept_local=1 + ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1 set +e - run_cmd "ip netns exec ns2 ping -w1 -c1 192.0.2.1" + run_cmd "ip netns exec $ns2 ping -w1 -c1 192.0.2.1" log_test $? 0 "rp_filter passes local packets" - run_cmd "ip netns exec ns2 ping -w1 -c1 127.0.0.1" + run_cmd "ip netns exec $ns2 ping -w1 -c1 127.0.0.1" log_test $? 0 "rp_filter passes loopback packets" cleanup @@ -959,34 +951,32 @@ route_setup() [ "${VERBOSE}" = "1" ] && set -x set -e - ip netns add ns2 - ip netns set ns2 auto - ip -netns ns2 link set dev lo up - ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1 - ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1 + setup_ns ns2 + ip netns exec $ns2 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=1 $IP li add veth1 type veth peer name veth2 $IP li add veth3 type veth peer name veth4 $IP li set veth1 up $IP li set veth3 up - $IP li set veth2 netns ns2 up - $IP li set veth4 netns ns2 up - ip -netns ns2 li add dummy1 type dummy - ip -netns ns2 li set dummy1 up + $IP li set veth2 netns $ns2 up + $IP li set veth4 netns $ns2 up + ip -netns $ns2 li add dummy1 type dummy + ip -netns $ns2 li set dummy1 up $IP -6 addr add 2001:db8:101::1/64 dev veth1 nodad $IP -6 addr add 2001:db8:103::1/64 dev veth3 nodad $IP addr add 172.16.101.1/24 dev veth1 $IP addr add 172.16.103.1/24 dev veth3 - ip -netns ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad - ip -netns ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad - ip -netns ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad + ip -netns $ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad + ip -netns $ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad + ip -netns $ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad - ip -netns ns2 addr add 172.16.101.2/24 dev veth2 - ip -netns ns2 addr add 172.16.103.2/24 dev veth4 - ip -netns ns2 addr add 172.16.104.1/24 dev dummy1 + ip -netns $ns2 addr add 172.16.101.2/24 dev veth2 + ip -netns $ns2 addr add 172.16.103.2/24 dev veth4 + ip -netns $ns2 addr add 172.16.104.1/24 dev dummy1 set +e } @@ -1238,7 +1228,7 @@ ipv6_addr_metric_test() log_test $rc 0 "Modify metric of address" # verify prefix route removed on down - run_cmd "ip netns exec ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" + run_cmd "ip netns exec $ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then @@ -1344,7 +1334,7 @@ ipv6_route_metrics_test() log_test $rc 0 "Multipath route with mtu metric" $IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300 - run_cmd "ip netns exec ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1" + run_cmd "ip netns exec $ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1" log_test $? 0 "Using route with mtu metric" run_cmd "$IP -6 ro add 2001:db8:114::/64 via 2001:db8:101::2 congctl lock foo" @@ -1599,19 +1589,19 @@ ipv4_rt_replace() ipv4_local_rt_cache() { run_cmd "ip addr add 10.0.0.1/32 dev lo" - run_cmd "ip netns add test-ns" + run_cmd "setup_ns test-ns" run_cmd "ip link add veth-outside type veth peer name veth-inside" run_cmd "ip link add vrf-100 type vrf table 1100" run_cmd "ip link set veth-outside master vrf-100" - run_cmd "ip link set veth-inside netns test-ns" + run_cmd "ip link set veth-inside netns $test-ns" run_cmd "ip link set veth-outside up" run_cmd "ip link set vrf-100 up" run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100" - run_cmd "ip netns exec test-ns ip link set veth-inside up" - run_cmd "ip netns exec test-ns ip addr add 10.1.1.1/32 dev veth-inside" - run_cmd "ip netns exec test-ns ip route add 10.0.0.1/32 dev veth-inside" - run_cmd "ip netns exec test-ns ip route add default via 10.0.0.1" - run_cmd "ip netns exec test-ns ping 10.0.0.1 -c 1 -i 1" + run_cmd "ip netns exec $test-ns ip link set veth-inside up" + run_cmd "ip netns exec $test-ns ip addr add 10.1.1.1/32 dev veth-inside" + run_cmd "ip netns exec $test-ns ip route add 10.0.0.1/32 dev veth-inside" + run_cmd "ip netns exec $test-ns ip route add default via 10.0.0.1" + run_cmd "ip netns exec $test-ns ping 10.0.0.1 -c 1 -i 1" run_cmd "ip link delete vrf-100" # if we do not hang test is a success @@ -1841,7 +1831,7 @@ ipv4_route_metrics_test() log_test $rc 0 "Multipath route with mtu metric" $IP ro add 172.16.104.0/24 via 172.16.101.2 mtu 1300 - run_cmd "ip netns exec ns1 ping -w1 -c1 -s 1500 172.16.104.1" + run_cmd "ip netns exec $ns1 ping -w1 -c1 -s 1500 172.16.104.1" log_test $? 0 "Using route with mtu metric" run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 congctl lock foo" @@ -2105,7 +2095,7 @@ ipv4_route_v6_gw_test() check_route "172.16.104.0/24 via inet6 2001:db8:101::2 dev veth1" fi - run_cmd "ip netns exec ns1 ping -w1 -c1 172.16.104.1" + run_cmd "ip netns exec $ns1 ping -w1 -c1 172.16.104.1" log_test $rc 0 "Single path route with IPv6 gateway - ping" run_cmd "$IP ro del 172.16.104.0/24 via inet6 2001:db8:101::2" @@ -2196,7 +2186,7 @@ ipv4_mangle_test() sleep 2 local tmp_file=$(mktemp) - ip netns exec ns2 socat UDP4-LISTEN:54321,fork $tmp_file & + ip netns exec $ns2 socat UDP4-LISTEN:54321,fork $tmp_file & # Add a FIB rule and a route that will direct our connection to the # listening server. @@ -2254,7 +2244,7 @@ ipv6_mangle_test() sleep 2 local tmp_file=$(mktemp) - ip netns exec ns2 socat UDP6-LISTEN:54321,fork $tmp_file & + ip netns exec $ns2 socat UDP6-LISTEN:54321,fork $tmp_file & # Add a FIB rule and a route that will direct our connection to the # listening server. @@ -2423,37 +2413,37 @@ ipv4_mpath_list_test() route_setup set -e - run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off" - - run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" - run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" - run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on" - run_cmd "ip -n ns2 link add name nh1 up type dummy" - run_cmd "ip -n ns2 link add name nh2 up type dummy" - run_cmd "ip -n ns2 address add 172.16.201.1/24 dev nh1" - run_cmd "ip -n ns2 address add 172.16.202.1/24 dev nh2" - run_cmd "ip -n ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1" - run_cmd "ip -n ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" - run_cmd "ip -n ns2 route add 203.0.113.0/24 + run_cmd "ip netns exec $ns1 ethtool -K veth1 tcp-segmentation-offload off" + + run_cmd "ip netns exec $ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" + run_cmd "ip netns exec $ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" + run_cmd "ip netns exec $ns2 ethtool -K veth2 generic-receive-offload on" + run_cmd "ip -n $ns2 link add name nh1 up type dummy" + run_cmd "ip -n $ns2 link add name nh2 up type dummy" + run_cmd "ip -n $ns2 address add 172.16.201.1/24 dev nh1" + run_cmd "ip -n $ns2 address add 172.16.202.1/24 dev nh2" + run_cmd "ip -n $ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1" + run_cmd "ip -n $ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" + run_cmd "ip -n $ns2 route add 203.0.113.0/24 nexthop via 172.16.201.2 nexthop via 172.16.202.2" - run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" - run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" - run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" - run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" set +e - local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]') + local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]') local tmp_file=$(mktemp) - local cmd="ip netns exec ns1 mausezahn veth1 -a own -b $dmac + local cmd="ip netns exec $ns1 mausezahn veth1 -a own -b $dmac -A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q" # Packets forwarded in a list using a multipath route must not reuse a # cached result so that a flow always hits the same nexthop. In other # words, the FIB lookup tracepoint needs to be triggered for every # packet. - local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + local t0_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) run_cmd "perf stat -a -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" - local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + local t1_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff @@ -2471,34 +2461,34 @@ ipv6_mpath_list_test() route_setup set -e - run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off" - - run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" - run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" - run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on" - run_cmd "ip -n ns2 link add name nh1 up type dummy" - run_cmd "ip -n ns2 link add name nh2 up type dummy" - run_cmd "ip -n ns2 -6 address add 2001:db8:201::1/64 dev nh1" - run_cmd "ip -n ns2 -6 address add 2001:db8:202::1/64 dev nh2" - run_cmd "ip -n ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1" - run_cmd "ip -n ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" - run_cmd "ip -n ns2 -6 route add 2001:db8:301::/64 + run_cmd "ip netns exec $ns1 ethtool -K veth1 tcp-segmentation-offload off" + + run_cmd "ip netns exec $ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" + run_cmd "ip netns exec $ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" + run_cmd "ip netns exec $ns2 ethtool -K veth2 generic-receive-offload on" + run_cmd "ip -n $ns2 link add name nh1 up type dummy" + run_cmd "ip -n $ns2 link add name nh2 up type dummy" + run_cmd "ip -n $ns2 -6 address add 2001:db8:201::1/64 dev nh1" + run_cmd "ip -n $ns2 -6 address add 2001:db8:202::1/64 dev nh2" + run_cmd "ip -n $ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1" + run_cmd "ip -n $ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" + run_cmd "ip -n $ns2 -6 route add 2001:db8:301::/64 nexthop via 2001:db8:201::2 nexthop via 2001:db8:202::2" - run_cmd "ip netns exec ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1" set +e - local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]') + local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]') local tmp_file=$(mktemp) - local cmd="ip netns exec ns1 mausezahn -6 veth1 -a own -b $dmac + local cmd="ip netns exec $ns1 mausezahn -6 veth1 -a own -b $dmac -A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q" # Packets forwarded in a list using a multipath route must not reuse a # cached result so that a flow always hits the same nexthop. In other # words, the FIB lookup tracepoint needs to be triggered for every # packet. - local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + local t0_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) run_cmd "perf stat -a -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" - local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + local t1_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index df593b7b3e..4de92632f4 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -17,6 +17,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ dual_vxlan_bridge.sh \ ethtool_extended_state.sh \ ethtool_mm.sh \ + ethtool_rmon.sh \ ethtool.sh \ gre_custom_multipath_hash.sh \ gre_inner_v4_multipath.sh \ @@ -111,7 +112,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_symmetric_ipv6.sh \ vxlan_symmetric.sh -TEST_PROGS_EXTENDED := devlink_lib.sh \ +TEST_FILES := devlink_lib.sh \ ethtool_lib.sh \ fib_offload_lib.sh \ forwarding.config.sample \ diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index a3678dfe58..d9d587454d 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -803,11 +803,198 @@ cfg_test_dump() cfg_test_dump_common "L2" l2_grps_get } +# Check flush functionality with different parameters. +cfg_test_flush() +{ + local num_entries + + # Add entries with different attributes and check that they are all + # flushed when the flush command is given with no parameters. + + # Different port. + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.2 vid 10 + + # Different VLAN ID. + bridge mdb add dev br0 port $swp1 grp 239.1.1.3 vid 10 + bridge mdb add dev br0 port $swp1 grp 239.1.1.4 vid 20 + + # Different routing protocol. + bridge mdb add dev br0 port $swp1 grp 239.1.1.5 vid 10 proto bgp + bridge mdb add dev br0 port $swp1 grp 239.1.1.6 vid 10 proto zebra + + # Different state. + bridge mdb add dev br0 port $swp1 grp 239.1.1.7 vid 10 permanent + bridge mdb add dev br0 port $swp1 grp 239.1.1.8 vid 10 temp + + bridge mdb flush dev br0 + num_entries=$(bridge mdb show dev br0 | wc -l) + [[ $num_entries -eq 0 ]] + check_err $? 0 "Not all entries flushed after flush all" + + # Check that when flushing by port only entries programmed with the + # specified port are flushed and the rest are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10 + + bridge mdb flush dev br0 port $swp1 + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_fail $? "Entry not flushed by specified port" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_err $? "Entry flushed by wrong port" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0" + check_err $? "Host entry flushed by wrong port" + + bridge mdb flush dev br0 port br0 + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0" + check_fail $? "Host entry not flushed by specified port" + + bridge mdb flush dev br0 + + # Check that when flushing by VLAN ID only entries programmed with the + # specified VLAN ID are flushed and the rest are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 20 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 20 + + bridge mdb flush dev br0 vid 10 + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null + check_fail $? "Entry not flushed by specified VLAN ID" + bridge mdb get dev br0 grp 239.1.1.1 vid 20 &> /dev/null + check_err $? "Entry flushed by wrong VLAN ID" + + bridge mdb flush dev br0 + + # Check that all permanent entries are flushed when "permanent" is + # specified and that temporary entries are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10 + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_fail $? "Entry not flushed by \"permanent\" state" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_err $? "Entry flushed by wrong state (\"permanent\")" + + bridge mdb flush dev br0 + + # Check that all temporary entries are flushed when "nopermanent" is + # specified and that permanent entries are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10 + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_err $? "Entry flushed by wrong state (\"nopermanent\")" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_fail $? "Entry not flushed by \"nopermanent\" state" + + bridge mdb flush dev br0 + + # Check that L2 host entries are not flushed when "nopermanent" is + # specified, but flushed when "permanent" is specified. + + bridge mdb add dev br0 port br0 grp 01:02:03:04:05:06 permanent vid 10 + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null + check_err $? "L2 host entry flushed by wrong state (\"nopermanent\")" + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null + check_fail $? "L2 host entry not flushed by \"permanent\" state" + + bridge mdb flush dev br0 + + # Check that IPv4 host entries are not flushed when "permanent" is + # specified, but flushed when "nopermanent" is specified. + + bridge mdb add dev br0 port br0 grp 239.1.1.1 temp vid 10 + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null + check_err $? "IPv4 host entry flushed by wrong state (\"permanent\")" + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null + check_fail $? "IPv4 host entry not flushed by \"nopermanent\" state" + + bridge mdb flush dev br0 + + # Check that IPv6 host entries are not flushed when "permanent" is + # specified, but flushed when "nopermanent" is specified. + + bridge mdb add dev br0 port br0 grp ff0e::1 temp vid 10 + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null + check_err $? "IPv6 host entry flushed by wrong state (\"permanent\")" + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null + check_fail $? "IPv6 host entry not flushed by \"nopermanent\" state" + + bridge mdb flush dev br0 + + # Check that when flushing by routing protocol only entries programmed + # with the specified routing protocol are flushed and the rest are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 proto bgp + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 proto zebra + bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10 + + bridge mdb flush dev br0 proto bgp + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_fail $? "Entry not flushed by specified routing protocol" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_err $? "Entry flushed by wrong routing protocol" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0" + check_err $? "Host entry flushed by wrong routing protocol" + + bridge mdb flush dev br0 + + # Test that an error is returned when trying to flush using unsupported + # parameters. + + bridge mdb flush dev br0 src_vni 10 &> /dev/null + check_fail $? "Managed to flush by source VNI" + + bridge mdb flush dev br0 dst 198.51.100.1 &> /dev/null + check_fail $? "Managed to flush by destination IP" + + bridge mdb flush dev br0 dst_port 4789 &> /dev/null + check_fail $? "Managed to flush by UDP destination port" + + bridge mdb flush dev br0 vni 10 &> /dev/null + check_fail $? "Managed to flush by destination VNI" + + log_test "Flush tests" +} + cfg_test() { cfg_test_host cfg_test_port cfg_test_dump + cfg_test_flush } __fwd_test_host_ip() @@ -1170,8 +1357,8 @@ ctrl_test() ctrl_mldv2_is_in_test } -if ! bridge mdb help 2>&1 | grep -q "get"; then - echo "SKIP: iproute2 too old, missing bridge mdb get support" +if ! bridge mdb help 2>&1 | grep -q "flush"; then + echo "SKIP: iproute2 too old, missing bridge mdb flush support" exit $ksft_skip fi diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh index 39e736f303..50d5bfb17e 100755 --- a/tools/testing/selftests/net/forwarding/ethtool_mm.sh +++ b/tools/testing/selftests/net/forwarding/ethtool_mm.sh @@ -25,6 +25,10 @@ traffic_test() local after= local delta= + if [ ${has_pmac_stats[$if]} = false ]; then + src="aggregate" + fi + before=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src) $MZ $if -q -c $num_pkts -p 64 -b bcast -t ip -R $PREEMPTIBLE_PRIO @@ -155,15 +159,48 @@ manual_failed_verification_h2_to_h1() manual_failed_verification $h2 $h1 } +smallest_supported_add_frag_size() +{ + local iface=$1 + local rx_min_frag_size= + + rx_min_frag_size=$(ethtool --json --show-mm $iface | \ + jq '.[]."rx-min-frag-size"') + + if [ $rx_min_frag_size -le 60 ]; then + echo 0 + elif [ $rx_min_frag_size -le 124 ]; then + echo 1 + elif [ $rx_min_frag_size -le 188 ]; then + echo 2 + elif [ $rx_min_frag_size -le 252 ]; then + echo 3 + else + echo "$iface: RX min frag size $rx_min_frag_size cannot be advertised over LLDP" + exit 1 + fi +} + +expected_add_frag_size() +{ + local iface=$1 + local requested=$2 + local min=$(smallest_supported_add_frag_size $iface) + + [ $requested -le $min ] && echo $min || echo $requested +} + lldp_change_add_frag_size() { local add_frag_size=$1 + local pattern= lldptool -T -i $h1 -V addEthCaps addFragSize=$add_frag_size >/dev/null # Wait for TLVs to be received sleep 2 - lldptool -i $h2 -t -n -V addEthCaps | \ - grep -q "Additional fragment size: $add_frag_size" + pattern=$(printf "Additional fragment size: %d" \ + $(expected_add_frag_size $h1 $add_frag_size)) + lldptool -i $h2 -t -n -V addEthCaps | grep -q "$pattern" } lldp() @@ -284,6 +321,13 @@ for netif in ${NETIFS[@]}; do echo "SKIP: $netif does not support MAC Merge" exit $ksft_skip fi + + if check_ethtool_pmac_std_stats_support $netif eth-mac; then + has_pmac_stats[$netif]=true + else + has_pmac_stats[$netif]=false + echo "$netif does not report pMAC statistics, falling back to aggregate" + fi done trap cleanup EXIT diff --git a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh b/tools/testing/selftests/net/forwarding/ethtool_rmon.sh new file mode 100755 index 0000000000..41a34a61f7 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ethtool_rmon.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + rmon_rx_histogram + rmon_tx_histogram +" + +NUM_NETIFS=2 +source lib.sh + +ETH_FCS_LEN=4 +ETH_HLEN=$((6+6+2)) + +declare -A netif_mtu + +ensure_mtu() +{ + local iface=$1; shift + local len=$1; shift + local current=$(ip -j link show dev $iface | jq -r '.[0].mtu') + local required=$((len - ETH_HLEN - ETH_FCS_LEN)) + + if [ $current -lt $required ]; then + ip link set dev $iface mtu $required || return 1 + fi +} + +bucket_test() +{ + local iface=$1; shift + local neigh=$1; shift + local set=$1; shift + local bucket=$1; shift + local len=$1; shift + local num_rx=10000 + local num_tx=20000 + local expected= + local before= + local after= + local delta= + + # Mausezahn does not include FCS bytes in its length - but the + # histogram counters do + len=$((len - ETH_FCS_LEN)) + + before=$(ethtool --json -S $iface --groups rmon | \ + jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") + + # Send 10k one way and 20k in the other, to detect counters + # mapped to the wrong direction + $MZ $neigh -q -c $num_rx -p $len -a own -b bcast -d 10us + $MZ $iface -q -c $num_tx -p $len -a own -b bcast -d 10us + + after=$(ethtool --json -S $iface --groups rmon | \ + jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val") + + delta=$((after - before)) + + expected=$([ $set = rx ] && echo $num_rx || echo $num_tx) + + # Allow some extra tolerance for other packets sent by the stack + [ $delta -ge $expected ] && [ $delta -le $((expected + 100)) ] +} + +rmon_histogram() +{ + local iface=$1; shift + local neigh=$1; shift + local set=$1; shift + local nbuckets=0 + local step= + + RET=0 + + while read -r -a bucket; do + step="$set-pkts${bucket[0]}to${bucket[1]} on $iface" + + for if in $iface $neigh; do + if ! ensure_mtu $if ${bucket[0]}; then + log_test_skip "$if does not support the required MTU for $step" + return + fi + done + + if ! bucket_test $iface $neigh $set $nbuckets ${bucket[0]}; then + check_err 1 "$step failed" + return 1 + fi + log_test "$step" + nbuckets=$((nbuckets + 1)) + done < <(ethtool --json -S $iface --groups rmon | \ + jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null) + + if [ $nbuckets -eq 0 ]; then + log_test_skip "$iface does not support $set histogram counters" + return + fi +} + +rmon_rx_histogram() +{ + rmon_histogram $h1 $h2 rx + rmon_histogram $h2 $h1 rx +} + +rmon_tx_histogram() +{ + rmon_histogram $h1 $h2 tx + rmon_histogram $h2 $h1 tx +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + for iface in $h1 $h2; do + netif_mtu[$iface]=$(ip -j link show dev $iface | jq -r '.[0].mtu') + ip link set dev $iface up + done +} + +cleanup() +{ + pre_cleanup + + for iface in $h2 $h1; do + ip link set dev $iface \ + mtu ${netif_mtu[$iface]} \ + down + done +} + +check_ethtool_counter_group_support +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index e37a15eda6..8a61464ab6 100755..100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -4,9 +4,6 @@ ############################################################################## # Defines -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - # Can be overridden by the configuration file. PING=${PING:=ping} PING6=${PING6:=ping6} @@ -41,6 +38,32 @@ if [[ -f $relative_path/forwarding.config ]]; then source "$relative_path/forwarding.config" fi +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +busywait() +{ + local timeout=$1; shift + + local start_time="$(date -u +%s%3N)" + while true + do + local out + out=$("$@") + local ret=$? + if ((!ret)); then + echo -n "$out" + return 0 + fi + + local current_time="$(date -u +%s%3N)" + if ((current_time - start_time > timeout)); then + echo -n "$out" + return 1 + fi + done +} + ############################################################################## # Sanity checks @@ -148,6 +171,24 @@ check_ethtool_mm_support() fi } +check_ethtool_counter_group_support() +{ + ethtool --help 2>&1| grep -- '--all-groups' &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: ethtool too old; it is missing standard counter group support" + exit $ksft_skip + fi +} + +check_ethtool_pmac_std_stats_support() +{ + local dev=$1; shift + local grp=$1; shift + + [ 0 -ne $(ethtool --json -S $dev --all-groups --src pmac 2>/dev/null \ + | jq ".[].\"$grp\" | length") ] +} + check_locked_port_support() { if ! bridge -d link show | grep -q " locked"; then @@ -395,29 +436,6 @@ log_info() echo "INFO: $msg" } -busywait() -{ - local timeout=$1; shift - - local start_time="$(date -u +%s%3N)" - while true - do - local out - out=$("$@") - local ret=$? - if ((!ret)); then - echo -n "$out" - return 0 - fi - - local current_time="$(date -u +%s%3N)" - if ((current_time - start_time > timeout)); then - echo -n "$out" - return 1 - fi - done -} - not() { "$@" diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh new file mode 100755 index 0000000000..24b77bdf41 --- /dev/null +++ b/tools/testing/selftests/net/fq_band_pktlimit.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Verify that FQ has a packet limit per band: +# +# 1. set the limit to 10 per band +# 2. send 20 pkts on band A: verify that 10 are queued, 10 dropped +# 3. send 20 pkts on band A: verify that 0 are queued, 20 dropped +# 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped +# +# Send packets with a 100ms delay to ensure that previously sent +# packets are still queued when later ones are sent. +# Use SO_TXTIME for this. + +die() { + echo "$1" + exit 1 +} + +# run inside private netns +if [[ $# -eq 0 ]]; then + ./in_netns.sh "$0" __subprocess + exit +fi + +ip link add type dummy +ip link set dev dummy0 up +ip -6 addr add fdaa::1/128 dev dummy0 +ip -6 route add fdaa::/64 dev dummy0 +tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10 + +./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000 +OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000 +OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +./cmsg_sender -6 -p u -d 100000 -n 20 -P 7 fdaa::2 8000 +OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +# Initial stats will report zero sent, as all packets are still +# queued in FQ. Sleep for the delay period (100ms) and see that +# twenty are now sent. +sleep 0.1 +OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +# Log the output after the test +echo "${OUT1}" +echo "${OUT2}" +echo "${OUT3}" +echo "${OUT4}" + +# Test the output for expected values +echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10' || die "unexpected drop count at 1" +echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30' || die "unexpected drop count at 2" +echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40' || die "unexpected drop count at 3" +echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4" diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh index 3224651db9..5100d90f92 100755 --- a/tools/testing/selftests/net/gre_gso.sh +++ b/tools/testing/selftests/net/gre_gso.sh @@ -2,10 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # This test is for checking GRE GSO. - +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # all tests in this script. Can be overridden with -t option TESTS="gre_gso" @@ -13,8 +11,6 @@ TESTS="gre_gso" VERBOSE=0 PAUSE_ON_FAIL=no PAUSE=no -IP="ip -netns ns1" -NS_EXEC="ip netns exec ns1" TMPFILE=`mktemp` PID= @@ -50,13 +46,13 @@ log_test() setup() { set -e - ip netns add ns1 - ip netns set ns1 auto - $IP link set dev lo up + setup_ns ns1 + IP="ip -netns $ns1" + NS_EXEC="ip netns exec $ns1" ip link add veth0 type veth peer name veth1 ip link set veth0 up - ip link set veth1 netns ns1 + ip link set veth1 netns $ns1 $IP link set veth1 name veth0 $IP link set veth0 up @@ -70,7 +66,7 @@ cleanup() [ -n "$PID" ] && kill $PID ip link del dev gre1 &> /dev/null ip link del dev veth0 &> /dev/null - ip netns del ns1 + cleanup_ns $ns1 } get_linklocal() @@ -145,7 +141,7 @@ gre6_gso_test() setup a1=$(get_linklocal veth0) - a2=$(get_linklocal veth0 ns1) + a2=$(get_linklocal veth0 $ns1) gre_create_tun $a1 $a2 diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index 30024d0ed3..353e1e867f 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -71,6 +71,12 @@ #define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) #define NUM_LARGE_PKT (MAX_PAYLOAD / MSS) #define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) +#define MIN_EXTHDR_SIZE 8 +#define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00" +#define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11" + +#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) static const char *addr6_src = "fdaa::2"; static const char *addr6_dst = "fdaa::1"; @@ -104,7 +110,7 @@ static void setup_sock_filter(int fd) const int dport_off = tcp_offset + offsetof(struct tcphdr, dest); const int ethproto_off = offsetof(struct ethhdr, h_proto); int optlen = 0; - int ipproto_off; + int ipproto_off, opt_ipproto_off; int next_off; if (proto == PF_INET) @@ -116,14 +122,30 @@ static void setup_sock_filter(int fd) if (strcmp(testname, "ip") == 0) { if (proto == PF_INET) optlen = sizeof(struct ip_timestamp); - else - optlen = sizeof(struct ip6_frag); + else { + BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE); + BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE); + BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE); + + /* same size for HBH and Fragment extension header types */ + optlen = MIN_EXTHDR_SIZE; + opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr) + + offsetof(struct ip6_ext, ip6e_nxt); + } } + /* this filter validates the following: + * - packet is IPv4/IPv6 according to the running test. + * - packet is TCP. Also handles the case of one extension header and then TCP. + * - checks the packet tcp dport equals to DPORT. Also handles the case of one + * extension header and then TCP. + */ struct sock_filter filter[] = { BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9), BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, opt_ipproto_off), BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5), BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off), BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0), @@ -576,6 +598,39 @@ static void add_ipv4_ts_option(void *buf, void *optpkt) iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0); } +static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext_payload) +{ + struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset); + struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN); + char *exthdr_payload_start = (char *)(exthdr + 1); + + exthdr->hdrlen = 0; + exthdr->nexthdr = IPPROTO_TCP; + + memcpy(exthdr_payload_start, ext_payload, MIN_EXTHDR_SIZE - sizeof(*exthdr)); + + memcpy(optpkt, buf, tcp_offset); + memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset, + sizeof(struct tcphdr) + PAYLOAD_LEN); + + iph->nexthdr = exthdr_type; + iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE); +} + +static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE]; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1); + write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); + + create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2); + write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); +} + /* IPv4 options shouldn't coalesce */ static void send_ip_options(int fd, struct sockaddr_ll *daddr) { @@ -697,7 +752,7 @@ static void send_fragment6(int fd, struct sockaddr_ll *daddr) create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0); write_packet(fd, buf, bufpkt_len, daddr); } - + sleep(1); create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); memset(extpkt, 0, extpkt_len); @@ -760,6 +815,7 @@ static void check_recv_pkts(int fd, int *correct_payload, vlog("}, Total %d packets\nReceived {", correct_num_pkts); while (1) { + ip_ext_len = 0; pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); if (pkt_size < 0) error(1, errno, "could not receive"); @@ -767,7 +823,7 @@ static void check_recv_pkts(int fd, int *correct_payload, if (iph->version == 4) ip_ext_len = (iph->ihl - 5) * 4; else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP) - ip_ext_len = sizeof(struct ip6_frag); + ip_ext_len = MIN_EXTHDR_SIZE; tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len); @@ -880,7 +936,21 @@ static void gro_sender(void) sleep(1); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (proto == PF_INET6) { + sleep(1); send_fragment6(txfd, &daddr); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + /* send IPv6 packets with ext header with same payload */ + send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + /* send IPv6 packets with ext header with different payload */ + send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2); + sleep(1); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } } else if (strcmp(testname, "large") == 0) { @@ -997,6 +1067,17 @@ static void gro_receiver(void) */ printf("fragmented ip6 doesn't coalesce: "); correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 3); + + printf("ipv6 with ext header does coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("ipv6 with ext header with different payloads doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; check_recv_pkts(rxfd, correct_payload, 2); } } else if (strcmp(testname, "large") == 0) { diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh index 342ad27f63..02c21ff4ca 100755 --- a/tools/testing/selftests/net/gro.sh +++ b/tools/testing/selftests/net/gro.sh @@ -23,14 +23,19 @@ run_test() { # on every try. for tries in {1..3}; do # Actual test starts here - ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ + ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ 1>>log.txt & server_pid=$! sleep 0.5 # to allow for socket init - ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \ + ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \ 1>>log.txt wait "${server_pid}" exit_code=$? + if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \ + ${exit_code} -ne 0 ]]; then + echo "Ignoring errors due to slow environment" 1>&2 + exit_code=0 + fi if [[ "${exit_code}" -eq 0 ]]; then break; fi diff --git a/tools/testing/selftests/net/icmp.sh b/tools/testing/selftests/net/icmp.sh index e4b04cd164..824cb0e35e 100755 --- a/tools/testing/selftests/net/icmp.sh +++ b/tools/testing/selftests/net/icmp.sh @@ -18,8 +18,8 @@ # that address space, so the kernel should substitute the dummy address # 192.0.0.8 defined in RFC7600. -NS1=ns1 -NS2=ns2 +source lib.sh + H1_IP=172.16.0.1/32 H1_IP6=2001:db8:1::1 RT1=172.16.1.0/24 @@ -32,15 +32,13 @@ TMPFILE=$(mktemp) cleanup() { rm -f "$TMPFILE" - ip netns del $NS1 - ip netns del $NS2 + cleanup_ns $NS1 $NS2 } trap cleanup EXIT # Namespaces -ip netns add $NS1 -ip netns add $NS2 +setup_ns NS1 NS2 # Connectivity ip -netns $NS1 link add veth0 type veth peer name veth0 netns $NS2 diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index 7b9d6e31b8..d6f0e449c0 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -19,6 +19,7 @@ # Route on r1 changed to go to r2 via eth0. This causes a redirect to be sent # from r1 to h1 telling h1 to use r2 when talking to h2. +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no @@ -140,11 +141,7 @@ get_linklocal() cleanup() { - local ns - - for ns in h1 h2 r1 r2; do - ip netns del $ns 2>/dev/null - done + cleanup_ns $h1 $h2 $r1 $r2 } create_vrf() @@ -171,102 +168,99 @@ setup() # # create nodes as namespaces - # - for ns in h1 h2 r1 r2; do - ip netns add $ns - ip -netns $ns li set lo up - - case "${ns}" in - h[12]) ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 - ;; - r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 - ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0 - ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0 - - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 - ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 - esac + setup_ns h1 h2 r1 r2 + for ns in $h1 $h2 $r1 $r2; do + if echo $ns | grep -q h[12]-; then + ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + else + ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 + ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0 + ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0 + + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 + fi done # # create interconnects # - ip -netns h1 li add eth0 type veth peer name r1h1 - ip -netns h1 li set r1h1 netns r1 name eth0 up + ip -netns $h1 li add eth0 type veth peer name r1h1 + ip -netns $h1 li set r1h1 netns $r1 name eth0 up - ip -netns h1 li add eth1 type veth peer name r2h1 - ip -netns h1 li set r2h1 netns r2 name eth0 up + ip -netns $h1 li add eth1 type veth peer name r2h1 + ip -netns $h1 li set r2h1 netns $r2 name eth0 up - ip -netns h2 li add eth0 type veth peer name r2h2 - ip -netns h2 li set eth0 up - ip -netns h2 li set r2h2 netns r2 name eth2 up + ip -netns $h2 li add eth0 type veth peer name r2h2 + ip -netns $h2 li set eth0 up + ip -netns $h2 li set r2h2 netns $r2 name eth2 up - ip -netns r1 li add eth1 type veth peer name r2r1 - ip -netns r1 li set eth1 up - ip -netns r1 li set r2r1 netns r2 name eth1 up + ip -netns $r1 li add eth1 type veth peer name r2r1 + ip -netns $r1 li set eth1 up + ip -netns $r1 li set r2r1 netns $r2 name eth1 up # # h1 # if [ "${WITH_VRF}" = "yes" ]; then - create_vrf "h1" + create_vrf "$h1" H1_VRF_ARG="vrf ${VRF}" H1_PING_ARG="-I ${VRF}" else H1_VRF_ARG= H1_PING_ARG= fi - ip -netns h1 li add br0 type bridge + ip -netns $h1 li add br0 type bridge if [ "${WITH_VRF}" = "yes" ]; then - ip -netns h1 li set br0 vrf ${VRF} up + ip -netns $h1 li set br0 vrf ${VRF} up else - ip -netns h1 li set br0 up + ip -netns $h1 li set br0 up fi - ip -netns h1 addr add dev br0 ${H1_N1_IP}/24 - ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad - ip -netns h1 li set eth0 master br0 up - ip -netns h1 li set eth1 master br0 up + ip -netns $h1 addr add dev br0 ${H1_N1_IP}/24 + ip -netns $h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad + ip -netns $h1 li set eth0 master br0 up + ip -netns $h1 li set eth1 master br0 up # # h2 # - ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24 - ip -netns h2 ro add default via ${R2_N2_IP} dev eth0 - ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad - ip -netns h2 -6 ro add default via ${R2_N2_IP6} dev eth0 + ip -netns $h2 addr add dev eth0 ${H2_N2_IP}/24 + ip -netns $h2 ro add default via ${R2_N2_IP} dev eth0 + ip -netns $h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad + ip -netns $h2 -6 ro add default via ${R2_N2_IP6} dev eth0 # # r1 # - ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 - ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad - ip -netns r1 addr add dev eth1 ${R1_R2_N1_IP}/30 - ip -netns r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad + ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns $r1 addr add dev eth1 ${R1_R2_N1_IP}/30 + ip -netns $r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad # # r2 # - ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24 - ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad - ip -netns r2 addr add dev eth1 ${R2_R1_N1_IP}/30 - ip -netns r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad - ip -netns r2 addr add dev eth2 ${R2_N2_IP}/24 - ip -netns r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad + ip -netns $r2 addr add dev eth0 ${R2_N1_IP}/24 + ip -netns $r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad + ip -netns $r2 addr add dev eth1 ${R2_R1_N1_IP}/30 + ip -netns $r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad + ip -netns $r2 addr add dev eth2 ${R2_N2_IP}/24 + ip -netns $r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad sleep 2 - R1_LLADDR=$(get_linklocal r1 eth0) + R1_LLADDR=$(get_linklocal $r1 eth0) if [ $? -ne 0 ]; then echo "Error: Failed to get link-local address of r1's eth0" exit 1 fi log_debug "initial gateway is R1's lladdr = ${R1_LLADDR}" - R2_LLADDR=$(get_linklocal r2 eth0) + R2_LLADDR=$(get_linklocal $r2 eth0) if [ $? -ne 0 ]; then echo "Error: Failed to get link-local address of r2's eth0" exit 1 @@ -278,8 +272,8 @@ change_h2_mtu() { local mtu=$1 - run_cmd ip -netns h2 li set eth0 mtu ${mtu} - run_cmd ip -netns r2 li set eth2 mtu ${mtu} + run_cmd ip -netns $h2 li set eth0 mtu ${mtu} + run_cmd ip -netns $r2 li set eth2 mtu ${mtu} } check_exception() @@ -291,40 +285,40 @@ check_exception() # From 172.16.1.101: icmp_seq=1 Redirect Host(New nexthop: 172.16.1.102) if [ "$VERBOSE" = "1" ]; then echo "Commands to check for exception:" - run_cmd ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} - run_cmd ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} + run_cmd ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} + run_cmd ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} fi if [ -n "${mtu}" ]; then mtu=" mtu ${mtu}" fi if [ "$with_redirect" = "yes" ]; then - ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ + ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ grep -q "cache <redirected> expires [0-9]*sec${mtu}" elif [ -n "${mtu}" ]; then - ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ + ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ grep -q "cache expires [0-9]*sec${mtu}" else # want to verify that neither mtu nor redirected appears in # the route get output. The -v will wipe out the cache line # if either are set so the last grep -q will not find a match - ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ + ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ grep -E -v 'mtu|redirected' | grep -q "cache" fi log_test $? 0 "IPv4: ${desc}" 0 # No PMTU info for test "redirect" and "mtu exception plus redirect" if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then - ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ + ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -v "mtu" | grep -q "${H2_N2_IP6} .*via ${R2_LLADDR} dev br0" elif [ -n "${mtu}" ]; then - ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ + ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -q "${mtu}" else # IPv6 is a bit harder. First strip out the match if it # contains an mtu exception and then look for the first # gateway - R1's lladdr - ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ + ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -v "mtu" | grep -q "${R1_LLADDR}" fi log_test $? 0 "IPv6: ${desc}" 1 @@ -334,21 +328,21 @@ run_ping() { local sz=$1 - run_cmd ip netns exec h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP} - run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6} + run_cmd ip netns exec $h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP} + run_cmd ip netns exec $h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6} } replace_route_new() { # r1 to h2 via r2 and eth0 - run_cmd ip -netns r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0 - run_cmd ip -netns r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0 + run_cmd ip -netns $r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0 + run_cmd ip -netns $r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0 } reset_route_new() { - run_cmd ip -netns r1 nexthop flush - run_cmd ip -netns h1 nexthop flush + run_cmd ip -netns $r1 nexthop flush + run_cmd ip -netns $h1 nexthop flush initial_route_new } @@ -356,34 +350,34 @@ reset_route_new() initial_route_new() { # r1 to h2 via r2 and eth1 - run_cmd ip -netns r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1 - run_cmd ip -netns r1 ro add ${H2_N2} nhid 1 + run_cmd ip -netns $r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1 + run_cmd ip -netns $r1 ro add ${H2_N2} nhid 1 - run_cmd ip -netns r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1 - run_cmd ip -netns r1 -6 ro add ${H2_N2_6} nhid 2 + run_cmd ip -netns $r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1 + run_cmd ip -netns $r1 -6 ro add ${H2_N2_6} nhid 2 # h1 to h2 via r1 - run_cmd ip -netns h1 nexthop add id 1 via ${R1_N1_IP} dev br0 - run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1 + run_cmd ip -netns $h1 nexthop add id 1 via ${R1_N1_IP} dev br0 + run_cmd ip -netns $h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1 - run_cmd ip -netns h1 nexthop add id 2 via ${R1_LLADDR} dev br0 - run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2 + run_cmd ip -netns $h1 nexthop add id 2 via ${R1_LLADDR} dev br0 + run_cmd ip -netns $h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2 } replace_route_legacy() { # r1 to h2 via r2 and eth0 - run_cmd ip -netns r1 ro replace ${H2_N2} via ${R2_N1_IP} dev eth0 - run_cmd ip -netns r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0 + run_cmd ip -netns $r1 ro replace ${H2_N2} via ${R2_N1_IP} dev eth0 + run_cmd ip -netns $r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0 } reset_route_legacy() { - run_cmd ip -netns r1 ro del ${H2_N2} - run_cmd ip -netns r1 -6 ro del ${H2_N2_6} + run_cmd ip -netns $r1 ro del ${H2_N2} + run_cmd ip -netns $r1 -6 ro del ${H2_N2_6} - run_cmd ip -netns h1 ro del ${H1_VRF_ARG} ${H2_N2} - run_cmd ip -netns h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6} + run_cmd ip -netns $h1 ro del ${H1_VRF_ARG} ${H2_N2} + run_cmd ip -netns $h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6} initial_route_legacy } @@ -391,22 +385,22 @@ reset_route_legacy() initial_route_legacy() { # r1 to h2 via r2 and eth1 - run_cmd ip -netns r1 ro add ${H2_N2} via ${R2_R1_N1_IP} dev eth1 - run_cmd ip -netns r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1 + run_cmd ip -netns $r1 ro add ${H2_N2} via ${R2_R1_N1_IP} dev eth1 + run_cmd ip -netns $r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1 # h1 to h2 via r1 # - IPv6 redirect only works if gateway is the LLA - run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0 - run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0 + run_cmd ip -netns $h1 ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0 + run_cmd ip -netns $h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0 } check_connectivity() { local rc - run_cmd ip netns exec h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP} + run_cmd ip netns exec $h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP} rc=$? - run_cmd ip netns exec h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6} + run_cmd ip netns exec $h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6} [ $? -ne 0 ] && rc=$? return $rc diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh index 9ac4456d48..1234395450 100755 --- a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh @@ -76,23 +76,22 @@ case "${TXMODE}" in esac # Start of state changes: install cleanup handler -save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})" cleanup() { ip netns del "${NS2}" ip netns del "${NS1}" - sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}" } trap cleanup EXIT -# Configure system settings -sysctl -w -q "${path_sysctl_mem}=1000000" - # Create virtual ethernet pair between network namespaces ip netns add "${NS1}" ip netns add "${NS2}" +# Configure system settings +ip netns exec "${NS1}" sysctl -w -q "${path_sysctl_mem}=1000000" +ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000" + ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index 4ceb401da1..12491850ae 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -117,8 +117,7 @@ # | Schema Data | | # +-----------------------------------------------------------+ -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh ################################################################################ # # @@ -195,32 +194,32 @@ TESTS_GLOBAL=" check_kernel_compatibility() { - ip netns add ioam-tmp-node - ip link add name veth0 netns ioam-tmp-node type veth \ - peer name veth1 netns ioam-tmp-node + setup_ns ioam_tmp_node + ip link add name veth0 netns $ioam_tmp_node type veth \ + peer name veth1 netns $ioam_tmp_node - ip -netns ioam-tmp-node link set veth0 up - ip -netns ioam-tmp-node link set veth1 up + ip -netns $ioam_tmp_node link set veth0 up + ip -netns $ioam_tmp_node link set veth1 up - ip -netns ioam-tmp-node ioam namespace add 0 + ip -netns $ioam_tmp_node ioam namespace add 0 ns_ad=$? - ip -netns ioam-tmp-node ioam namespace show | grep -q "namespace 0" + ip -netns $ioam_tmp_node ioam namespace show | grep -q "namespace 0" ns_sh=$? if [[ $ns_ad != 0 || $ns_sh != 0 ]] then echo "SKIP: kernel version probably too old, missing ioam support" ip link del veth0 2>/dev/null || true - ip netns del ioam-tmp-node || true + cleanup_ns $ioam_tmp_node || true exit $ksft_skip fi - ip -netns ioam-tmp-node route add db02::/64 encap ioam6 mode inline \ + ip -netns $ioam_tmp_node route add db02::/64 encap ioam6 mode inline \ trace prealloc type 0x800000 ns 0 size 4 dev veth0 tr_ad=$? - ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6" + ip -netns $ioam_tmp_node -6 route | grep -q "encap ioam6" tr_sh=$? if [[ $tr_ad != 0 || $tr_sh != 0 ]] @@ -228,12 +227,12 @@ check_kernel_compatibility() echo "SKIP: cannot attach an ioam trace to a route, did you compile" \ "without CONFIG_IPV6_IOAM6_LWTUNNEL?" ip link del veth0 2>/dev/null || true - ip netns del ioam-tmp-node || true + cleanup_ns $ioam_tmp_node || true exit $ksft_skip fi ip link del veth0 2>/dev/null || true - ip netns del ioam-tmp-node || true + cleanup_ns $ioam_tmp_node || true lsmod | grep -q "ip6_tunnel" ip6tnl_loaded=$? @@ -265,9 +264,7 @@ cleanup() ip link del ioam-veth-alpha 2>/dev/null || true ip link del ioam-veth-gamma 2>/dev/null || true - ip netns del ioam-node-alpha || true - ip netns del ioam-node-beta || true - ip netns del ioam-node-gamma || true + cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma || true if [ $ip6tnl_loaded != 0 ] then @@ -277,69 +274,67 @@ cleanup() setup() { - ip netns add ioam-node-alpha - ip netns add ioam-node-beta - ip netns add ioam-node-gamma - - ip link add name ioam-veth-alpha netns ioam-node-alpha type veth \ - peer name ioam-veth-betaL netns ioam-node-beta - ip link add name ioam-veth-betaR netns ioam-node-beta type veth \ - peer name ioam-veth-gamma netns ioam-node-gamma - - ip -netns ioam-node-alpha link set ioam-veth-alpha name veth0 - ip -netns ioam-node-beta link set ioam-veth-betaL name veth0 - ip -netns ioam-node-beta link set ioam-veth-betaR name veth1 - ip -netns ioam-node-gamma link set ioam-veth-gamma name veth0 - - ip -netns ioam-node-alpha addr add db01::2/64 dev veth0 - ip -netns ioam-node-alpha link set veth0 up - ip -netns ioam-node-alpha link set lo up - ip -netns ioam-node-alpha route add db02::/64 via db01::1 dev veth0 - ip -netns ioam-node-alpha route del db01::/64 - ip -netns ioam-node-alpha route add db01::/64 dev veth0 - - ip -netns ioam-node-beta addr add db01::1/64 dev veth0 - ip -netns ioam-node-beta addr add db02::1/64 dev veth1 - ip -netns ioam-node-beta link set veth0 up - ip -netns ioam-node-beta link set veth1 up - ip -netns ioam-node-beta link set lo up - - ip -netns ioam-node-gamma addr add db02::2/64 dev veth0 - ip -netns ioam-node-gamma link set veth0 up - ip -netns ioam-node-gamma link set lo up - ip -netns ioam-node-gamma route add db01::/64 via db02::1 dev veth0 + setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma + + ip link add name ioam-veth-alpha netns $ioam_node_alpha type veth \ + peer name ioam-veth-betaL netns $ioam_node_beta + ip link add name ioam-veth-betaR netns $ioam_node_beta type veth \ + peer name ioam-veth-gamma netns $ioam_node_gamma + + ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0 + ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0 + ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 + ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 + + ip -netns $ioam_node_alpha addr add db01::2/64 dev veth0 + ip -netns $ioam_node_alpha link set veth0 up + ip -netns $ioam_node_alpha link set lo up + ip -netns $ioam_node_alpha route add db02::/64 via db01::1 dev veth0 + ip -netns $ioam_node_alpha route del db01::/64 + ip -netns $ioam_node_alpha route add db01::/64 dev veth0 + + ip -netns $ioam_node_beta addr add db01::1/64 dev veth0 + ip -netns $ioam_node_beta addr add db02::1/64 dev veth1 + ip -netns $ioam_node_beta link set veth0 up + ip -netns $ioam_node_beta link set veth1 up + ip -netns $ioam_node_beta link set lo up + + ip -netns $ioam_node_gamma addr add db02::2/64 dev veth0 + ip -netns $ioam_node_gamma link set veth0 up + ip -netns $ioam_node_gamma link set lo up + ip -netns $ioam_node_gamma route add db01::/64 via db02::1 dev veth0 # - IOAM config - - ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} - ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} - ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} - ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} - ip -netns ioam-node-alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} - ip -netns ioam-node-alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}" - ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]} - - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.all.forwarding=1 - ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]} - ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} - ip -netns ioam-node-beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]} - ip -netns ioam-node-beta ioam schema add ${BETA[8]} "${BETA[9]}" - ip -netns ioam-node-beta ioam namespace set 123 schema ${BETA[8]} - - ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]} - ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]} - ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]} - ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]} - ip -netns ioam-node-gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]} + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} + ip -netns $ioam_node_alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} + ip -netns $ioam_node_alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}" + ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} + + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.all.forwarding=1 + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} + ip -netns $ioam_node_beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]} + ip -netns $ioam_node_beta ioam schema add ${BETA[8]} "${BETA[9]}" + ip -netns $ioam_node_beta ioam namespace set 123 schema ${BETA[8]} + + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]} + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]} + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]} + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]} + ip -netns $ioam_node_gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]} sleep 1 - ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null + ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 db02::2 &>/dev/null if [ $? != 0 ] then echo "Setup FAILED" @@ -372,14 +367,12 @@ run_test() local desc=$2 local node_src=$3 local node_dst=$4 - local ip6_src=$5 - local ip6_dst=$6 - local if_dst=$7 - local trace_type=$8 - local ioam_ns=$9 - - ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \ - $trace_type $ioam_ns & + local ip6_dst=$5 + local trace_type=$6 + local ioam_ns=$7 + local type=$8 + + ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type & local spid=$! sleep 0.1 @@ -412,7 +405,7 @@ run() echo # set OUTPUT settings - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 for t in $TESTS_OUTPUT do @@ -421,8 +414,8 @@ run() done # clean OUTPUT settings - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip -netns ioam-node-alpha route change db01::/64 dev veth0 + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip -netns $ioam_node_alpha route change db01::/64 dev veth0 echo @@ -433,7 +426,7 @@ run() echo # set INPUT settings - ip -netns ioam-node-alpha ioam namespace del 123 + ip -netns $ioam_node_alpha ioam namespace del 123 for t in $TESTS_INPUT do @@ -442,10 +435,10 @@ run() done # clean INPUT settings - ip -netns ioam-node-alpha ioam namespace add 123 \ + ip -netns $ioam_node_alpha ioam namespace add 123 \ data ${ALPHA[6]} wide ${ALPHA[7]} - ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]} - ip -netns ioam-node-alpha route change db01::/64 dev veth0 + ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} + ip -netns $ioam_node_alpha route change db01::/64 dev veth0 echo printf "%0.s-" {1..74} @@ -488,15 +481,15 @@ out_undef_ns() local desc="Unknown IOAM namespace" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0x800000 ns 0 size 4 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0x800000 0 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0x800000 0 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } out_no_room() @@ -508,15 +501,15 @@ out_no_room() local desc="Missing trace room" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0xc00000 ns 123 size 4 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0xc00000 123 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0xc00000 123 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } out_bits() @@ -532,11 +525,11 @@ out_bits() bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up for i in {0..22} do - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ dev veth0 &>/dev/null @@ -548,18 +541,18 @@ out_bits() if [ $cmd_res != 0 ] then npassed=$((npassed+1)) - log_test_passed "$descr" + log_test_passed "$descr ($1 mode)" else nfailed=$((nfailed+1)) - log_test_failed "$descr" + log_test_failed "$descr ($1 mode)" fi else - run_test "out_bit$i" "$descr ($1 mode)" ioam-node-alpha \ - ioam-node-beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \ + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 fi done - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down bit2size[22]=$tmp } @@ -573,15 +566,15 @@ out_full_supp_trace() local desc="Full supported trace" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0xfff002 ns 123 size 100 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0xfff002 123 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0xfff002 123 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -603,15 +596,15 @@ in_undef_ns() local desc="Unknown IOAM namespace" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0x800000 ns 0 size 4 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0x800000 0 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0x800000 0 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } in_no_room() @@ -623,15 +616,15 @@ in_no_room() local desc="Missing trace room" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0xc00000 ns 123 size 4 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0xc00000 123 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0xc00000 123 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } in_bits() @@ -647,19 +640,19 @@ in_bits() bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up for i in {0..11} {22..22} do - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ dev veth0 - run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" ioam-node-alpha \ - ioam-node-beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \ + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 done - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down bit2size[22]=$tmp } @@ -675,22 +668,22 @@ in_oflag() # Exception: # Here, we need the sender to set the Overflow flag. For that, we will add # back the IOAM namespace that was previously configured on the sender. - ip -netns ioam-node-alpha ioam namespace add 123 + ip -netns $ioam_node_alpha ioam namespace add 123 [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0xc00000 ns 123 size 4 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0xc00000 123 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0xc00000 123 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down # And we clean the exception for this test to get things back to normal for # other INPUT tests - ip -netns ioam-node-alpha ioam namespace del 123 + ip -netns $ioam_node_alpha ioam namespace del 123 } in_full_supp_trace() @@ -702,15 +695,15 @@ in_full_supp_trace() local desc="Full supported trace" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0xfff002 ns 123 size 80 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0xfff002 123 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0xfff002 123 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -730,15 +723,15 @@ fwd_full_supp_trace() local desc="Forward - Full supported trace" [ "$1" = "encap" ] && mode="$1 tundst db02::2" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-gamma link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 up - ip -netns ioam-node-alpha route change db02::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db02::/64 encap ioam6 mode $mode \ trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-gamma \ - db01::2 db02::2 veth0 0xfff002 123 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \ + db02::2 0xfff002 123 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-gamma link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down } diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index d9d1d41901..895e5bb504 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -8,7 +8,6 @@ #include <errno.h> #include <limits.h> #include <linux/const.h> -#include <linux/if_ether.h> #include <linux/ioam6.h> #include <linux/ipv6.h> #include <stdlib.h> @@ -512,14 +511,6 @@ static int str2id(const char *tname) return -1; } -static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) -{ - return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | - (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | - (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | - (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; -} - static int get_u32(__u32 *val, const char *arg, int base) { unsigned long res; @@ -603,70 +594,80 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { int main(int argc, char **argv) { - int fd, size, hoplen, tid, ret = 1; - struct in6_addr src, dst; + int fd, size, hoplen, tid, ret = 1, on = 1; struct ioam6_hdr *opt; - struct ipv6hdr *ip6h; - __u8 buffer[400], *p; - __u16 ioam_ns; + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec iov; + __u8 buffer[512]; __u32 tr_type; + __u16 ioam_ns; + __u8 *ptr; - if (argc != 7) + if (argc != 5) goto out; - tid = str2id(argv[2]); + tid = str2id(argv[1]); if (tid < 0 || !func[tid]) goto out; - if (inet_pton(AF_INET6, argv[3], &src) != 1 || - inet_pton(AF_INET6, argv[4], &dst) != 1) + if (get_u32(&tr_type, argv[2], 16) || + get_u16(&ioam_ns, argv[3], 0)) goto out; - if (get_u32(&tr_type, argv[5], 16) || - get_u16(&ioam_ns, argv[6], 0)) + fd = socket(PF_INET6, SOCK_RAW, + !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6); + if (fd < 0) goto out; - fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6)); - if (!fd) - goto out; + setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS, &on, sizeof(on)); - if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, - argv[1], strlen(argv[1]))) + iov.iov_len = 1; + iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer))); + if (!iov.iov_base) goto close; - recv: - size = recv(fd, buffer, sizeof(buffer), 0); + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = buffer; + msg.msg_controllen = CMSG_SPACE(sizeof(buffer)); + + size = recvmsg(fd, &msg, 0); if (size <= 0) goto close; - ip6h = (struct ipv6hdr *)buffer; + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level != IPPROTO_IPV6 || + cmsg->cmsg_type != IPV6_HOPOPTS || + cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr)) + continue; - if (!ipv6_addr_equal(&ip6h->saddr, &src) || - !ipv6_addr_equal(&ip6h->daddr, &dst)) - goto recv; + ptr = (__u8 *)CMSG_DATA(cmsg); - if (ip6h->nexthdr != IPPROTO_HOPOPTS) - goto close; + hoplen = (ptr[1] + 1) << 3; + ptr += sizeof(struct ipv6_hopopt_hdr); - p = buffer + sizeof(*ip6h); - hoplen = (p[1] + 1) << 3; - p += sizeof(struct ipv6_hopopt_hdr); + while (hoplen > 0) { + opt = (struct ioam6_hdr *)ptr; - while (hoplen > 0) { - opt = (struct ioam6_hdr *)p; + if (opt->opt_type == IPV6_TLV_IOAM && + opt->type == IOAM6_TYPE_PREALLOC) { + ptr += sizeof(*opt); + ret = func[tid](tid, + (struct ioam6_trace_hdr *)ptr, + tr_type, ioam_ns); + goto close; + } - if (opt->opt_type == IPV6_TLV_IOAM && - opt->type == IOAM6_TYPE_PREALLOC) { - p += sizeof(*opt); - ret = func[tid](tid, (struct ioam6_trace_hdr *)p, - tr_type, ioam_ns); - break; + ptr += opt->opt_len + 2; + hoplen -= opt->opt_len + 2; } - - p += opt->opt_len + 2; - hoplen -= opt->opt_len + 2; } + + goto recv; close: + free(iov.iov_base); close(fd); out: return ret; diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c index 75e3fdacdf..6ebd58869a 100644 --- a/tools/testing/selftests/net/ip_local_port_range.c +++ b/tools/testing/selftests/net/ip_local_port_range.c @@ -16,6 +16,10 @@ #define IP_LOCAL_PORT_RANGE 51 #endif +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + static __u32 pack_port_range(__u16 lo, __u16 hi) { return (hi << 16) | (lo << 0); @@ -146,6 +150,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_stcp) { .so_protocol = IPPROTO_SCTP, }; +FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_mptcp) { + .so_domain = AF_INET, + .so_type = SOCK_STREAM, + .so_protocol = IPPROTO_MPTCP, +}; + FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_tcp) { .so_domain = AF_INET6, .so_type = SOCK_STREAM, @@ -164,6 +174,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_stcp) { .so_protocol = IPPROTO_SCTP, }; +FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_mptcp) { + .so_domain = AF_INET6, + .so_type = SOCK_STREAM, + .so_protocol = IPPROTO_MPTCP, +}; + TEST_F(ip_local_port_range, invalid_option_value) { __u16 val16; diff --git a/tools/testing/selftests/net/l2tp.sh b/tools/testing/selftests/net/l2tp.sh index 5782433886..88de7166c8 100755 --- a/tools/testing/selftests/net/l2tp.sh +++ b/tools/testing/selftests/net/l2tp.sh @@ -13,6 +13,7 @@ # 10.1.1.1 | | 10.1.2.1 # 2001:db8:1::1 | | 2001:db8:2::1 +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no @@ -80,9 +81,6 @@ create_ns() [ -z "${addr}" ] && addr="-" [ -z "${addr6}" ] && addr6="-" - ip netns add ${ns} - - ip -netns ${ns} link set lo up if [ "${addr}" != "-" ]; then ip -netns ${ns} addr add dev lo ${addr} fi @@ -133,12 +131,7 @@ connect_ns() cleanup() { - local ns - - for ns in host-1 host-2 router - do - ip netns del ${ns} 2>/dev/null - done + cleanup_ns $host_1 $host_2 $router } setup_l2tp_ipv4() @@ -146,28 +139,28 @@ setup_l2tp_ipv4() # # configure l2tpv3 tunnel on host-1 # - ip -netns host-1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \ + ip -netns $host_1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \ encap ip local 10.1.1.1 remote 10.1.2.1 - ip -netns host-1 l2tp add session name l2tp4 tunnel_id 1041 \ + ip -netns $host_1 l2tp add session name l2tp4 tunnel_id 1041 \ session_id 1041 peer_session_id 1042 - ip -netns host-1 link set dev l2tp4 up - ip -netns host-1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2 + ip -netns $host_1 link set dev l2tp4 up + ip -netns $host_1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2 # # configure l2tpv3 tunnel on host-2 # - ip -netns host-2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \ + ip -netns $host_2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \ encap ip local 10.1.2.1 remote 10.1.1.1 - ip -netns host-2 l2tp add session name l2tp4 tunnel_id 1042 \ + ip -netns $host_2 l2tp add session name l2tp4 tunnel_id 1042 \ session_id 1042 peer_session_id 1041 - ip -netns host-2 link set dev l2tp4 up - ip -netns host-2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1 + ip -netns $host_2 link set dev l2tp4 up + ip -netns $host_2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1 # # add routes to loopback addresses # - ip -netns host-1 ro add 172.16.101.2/32 via 172.16.1.2 - ip -netns host-2 ro add 172.16.101.1/32 via 172.16.1.1 + ip -netns $host_1 ro add 172.16.101.2/32 via 172.16.1.2 + ip -netns $host_2 ro add 172.16.101.1/32 via 172.16.1.1 } setup_l2tp_ipv6() @@ -175,28 +168,28 @@ setup_l2tp_ipv6() # # configure l2tpv3 tunnel on host-1 # - ip -netns host-1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \ + ip -netns $host_1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \ encap ip local 2001:db8:1::1 remote 2001:db8:2::1 - ip -netns host-1 l2tp add session name l2tp6 tunnel_id 1061 \ + ip -netns $host_1 l2tp add session name l2tp6 tunnel_id 1061 \ session_id 1061 peer_session_id 1062 - ip -netns host-1 link set dev l2tp6 up - ip -netns host-1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2 + ip -netns $host_1 link set dev l2tp6 up + ip -netns $host_1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2 # # configure l2tpv3 tunnel on host-2 # - ip -netns host-2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \ + ip -netns $host_2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \ encap ip local 2001:db8:2::1 remote 2001:db8:1::1 - ip -netns host-2 l2tp add session name l2tp6 tunnel_id 1062 \ + ip -netns $host_2 l2tp add session name l2tp6 tunnel_id 1062 \ session_id 1062 peer_session_id 1061 - ip -netns host-2 link set dev l2tp6 up - ip -netns host-2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1 + ip -netns $host_2 link set dev l2tp6 up + ip -netns $host_2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1 # # add routes to loopback addresses # - ip -netns host-1 -6 ro add fc00:101::2/128 via fc00:1::2 - ip -netns host-2 -6 ro add fc00:101::1/128 via fc00:1::1 + ip -netns $host_1 -6 ro add fc00:101::2/128 via fc00:1::2 + ip -netns $host_2 -6 ro add fc00:101::1/128 via fc00:1::1 } setup() @@ -205,21 +198,22 @@ setup() cleanup set -e - create_ns host-1 172.16.101.1/32 fc00:101::1/128 - create_ns host-2 172.16.101.2/32 fc00:101::2/128 - create_ns router + setup_ns host_1 host_2 router + create_ns $host_1 172.16.101.1/32 fc00:101::1/128 + create_ns $host_2 172.16.101.2/32 fc00:101::2/128 + create_ns $router - connect_ns host-1 eth0 10.1.1.1/24 2001:db8:1::1/64 \ - router eth1 10.1.1.2/24 2001:db8:1::2/64 + connect_ns $host_1 eth0 10.1.1.1/24 2001:db8:1::1/64 \ + $router eth1 10.1.1.2/24 2001:db8:1::2/64 - connect_ns host-2 eth0 10.1.2.1/24 2001:db8:2::1/64 \ - router eth2 10.1.2.2/24 2001:db8:2::2/64 + connect_ns $host_2 eth0 10.1.2.1/24 2001:db8:2::1/64 \ + $router eth2 10.1.2.2/24 2001:db8:2::2/64 - ip -netns host-1 ro add 10.1.2.0/24 via 10.1.1.2 - ip -netns host-1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2 + ip -netns $host_1 ro add 10.1.2.0/24 via 10.1.1.2 + ip -netns $host_1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2 - ip -netns host-2 ro add 10.1.1.0/24 via 10.1.2.2 - ip -netns host-2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2 + ip -netns $host_2 ro add 10.1.1.0/24 via 10.1.2.2 + ip -netns $host_2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2 setup_l2tp_ipv4 setup_l2tp_ipv6 @@ -231,38 +225,38 @@ setup_ipsec() # # IPv4 # - run_cmd host-1 ip xfrm policy add \ + run_cmd $host_1 ip xfrm policy add \ src 10.1.1.1 dst 10.1.2.1 dir out \ tmpl proto esp mode transport - run_cmd host-1 ip xfrm policy add \ + run_cmd $host_1 ip xfrm policy add \ src 10.1.2.1 dst 10.1.1.1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip xfrm policy add \ + run_cmd $host_2 ip xfrm policy add \ src 10.1.1.1 dst 10.1.2.1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip xfrm policy add \ + run_cmd $host_2 ip xfrm policy add \ src 10.1.2.1 dst 10.1.1.1 dir out \ tmpl proto esp mode transport - ip -netns host-1 xfrm state add \ + ip -netns $host_1 xfrm state add \ src 10.1.1.1 dst 10.1.2.1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-1 xfrm state add \ + ip -netns $host_1 xfrm state add \ src 10.1.2.1 dst 10.1.1.1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 xfrm state add \ + ip -netns $host_2 xfrm state add \ src 10.1.1.1 dst 10.1.2.1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 xfrm state add \ + ip -netns $host_2 xfrm state add \ src 10.1.2.1 dst 10.1.1.1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport @@ -270,38 +264,38 @@ setup_ipsec() # # IPV6 # - run_cmd host-1 ip -6 xfrm policy add \ + run_cmd $host_1 ip -6 xfrm policy add \ src 2001:db8:1::1 dst 2001:db8:2::1 dir out \ tmpl proto esp mode transport - run_cmd host-1 ip -6 xfrm policy add \ + run_cmd $host_1 ip -6 xfrm policy add \ src 2001:db8:2::1 dst 2001:db8:1::1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip -6 xfrm policy add \ + run_cmd $host_2 ip -6 xfrm policy add \ src 2001:db8:1::1 dst 2001:db8:2::1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip -6 xfrm policy add \ + run_cmd $host_2 ip -6 xfrm policy add \ src 2001:db8:2::1 dst 2001:db8:1::1 dir out \ tmpl proto esp mode transport - ip -netns host-1 -6 xfrm state add \ + ip -netns $host_1 -6 xfrm state add \ src 2001:db8:1::1 dst 2001:db8:2::1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-1 -6 xfrm state add \ + ip -netns $host_1 -6 xfrm state add \ src 2001:db8:2::1 dst 2001:db8:1::1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 -6 xfrm state add \ + ip -netns $host_2 -6 xfrm state add \ src 2001:db8:1::1 dst 2001:db8:2::1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 -6 xfrm state add \ + ip -netns $host_2 -6 xfrm state add \ src 2001:db8:2::1 dst 2001:db8:1::1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport @@ -309,10 +303,10 @@ setup_ipsec() teardown_ipsec() { - run_cmd host-1 ip xfrm state flush - run_cmd host-1 ip xfrm policy flush - run_cmd host-2 ip xfrm state flush - run_cmd host-2 ip xfrm policy flush + run_cmd $host_1 ip xfrm state flush + run_cmd $host_1 ip xfrm policy flush + run_cmd $host_2 ip xfrm state flush + run_cmd $host_2 ip xfrm policy flush } ################################################################################ @@ -322,16 +316,16 @@ run_ping() { local desc="$1" - run_cmd host-1 ping -c1 -w1 172.16.1.2 + run_cmd $host_1 ping -c1 -w1 172.16.1.2 log_test $? 0 "IPv4 basic L2TP tunnel ${desc}" - run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 + run_cmd $host_1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 log_test $? 0 "IPv4 route through L2TP tunnel ${desc}" - run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2 + run_cmd $host_1 ${ping6} -c1 -w1 fc00:1::2 log_test $? 0 "IPv6 basic L2TP tunnel ${desc}" - run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 + run_cmd $host_1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 log_test $? 0 "IPv6 route through L2TP tunnel ${desc}" } @@ -344,16 +338,16 @@ run_tests() setup_ipsec run_ping "- with IPsec" - run_cmd host-1 ping -c1 -w1 172.16.1.2 + run_cmd $host_1 ping -c1 -w1 172.16.1.2 log_test $? 0 "IPv4 basic L2TP tunnel ${desc}" - run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 + run_cmd $host_1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 log_test $? 0 "IPv4 route through L2TP tunnel ${desc}" - run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2 + run_cmd $host_1 ${ping6} -c1 -w1 fc00:1::2 log_test $? 0 "IPv6 basic L2TP tunnel - with IPsec" - run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 + run_cmd $host_1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 log_test $? 0 "IPv6 route through L2TP tunnel - with IPsec" teardown_ipsec diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh new file mode 100644 index 0000000000..f9fe182dfb --- /dev/null +++ b/tools/testing/selftests/net/lib.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +############################################################################## +# Defines + +WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} +BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +# namespace list created by setup_ns +NS_LIST="" + +############################################################################## +# Helpers +busywait() +{ + local timeout=$1; shift + + local start_time="$(date -u +%s%3N)" + while true + do + local out + out=$("$@") + local ret=$? + if ((!ret)); then + echo -n "$out" + return 0 + fi + + local current_time="$(date -u +%s%3N)" + if ((current_time - start_time > timeout)); then + echo -n "$out" + return 1 + fi + done +} + +cleanup_ns() +{ + local ns="" + local errexit=0 + local ret=0 + + # disable errexit temporary + if [[ $- =~ "e" ]]; then + errexit=1 + set +e + fi + + for ns in "$@"; do + ip netns delete "${ns}" &> /dev/null + if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then + echo "Warn: Failed to remove namespace $ns" + ret=1 + fi + done + + [ $errexit -eq 1 ] && set -e + return $ret +} + +cleanup_all_ns() +{ + cleanup_ns $NS_LIST +} + +# setup netns with given names as prefix. e.g +# setup_ns local remote +setup_ns() +{ + local ns="" + local ns_name="" + local ns_list="" + for ns_name in "$@"; do + # Some test may setup/remove same netns multi times + if unset ${ns_name} 2> /dev/null; then + ns="${ns_name,,}-$(mktemp -u XXXXXX)" + eval readonly ${ns_name}="$ns" + else + eval ns='$'${ns_name} + cleanup_ns "$ns" + + fi + + if ! ip netns add "$ns"; then + echo "Failed to create namespace $ns_name" + cleanup_ns "$ns_list" + return $ksft_skip + fi + ip -n "$ns" link set lo up + ns_list="$ns_list $ns" + done + NS_LIST="$NS_LIST $ns_list" +} diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 7f89623f10..75fc95675e 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -20,7 +20,7 @@ flush_pids() ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null - for _ in $(seq 10); do + for _ in $(seq $((timeout_poll * 10))); do [ -z "$(ip netns pids "${ns}")" ] && break sleep 0.1 done @@ -91,6 +91,15 @@ chk_msk_nr() __chk_msk_nr "grep -c token:" "$@" } +chk_listener_nr() +{ + local expected=$1 + local msg="$2" + + __chk_nr "ss -nlHMON $ns | wc -l" "$expected" "$msg - mptcp" 0 + __chk_nr "ss -nlHtON $ns | wc -l" "$expected" "$msg - subflows" +} + wait_msk_nr() { local condition="grep -c token:" @@ -186,23 +195,6 @@ chk_msk_inuse() __chk_nr get_msk_inuse $expected "${msg}" 0 } -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex i - - port_hex="$(printf "%04X" "${port}")" - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - # $1: cestab nr chk_msk_cestab() { @@ -240,7 +232,7 @@ echo "a" | \ ip netns exec $ns \ ./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & -wait_local_port_listen $ns 10000 +mptcp_lib_wait_local_port_listen $ns 10000 chk_msk_nr 0 "no msk on netns creation" chk_msk_listen 10000 @@ -265,7 +257,7 @@ echo "a" | \ ip netns exec $ns \ ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & -wait_local_port_listen $ns 10001 +mptcp_lib_wait_local_port_listen $ns 10001 echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ @@ -288,7 +280,7 @@ for I in `seq 1 $NR_CLIENTS`; do ./mptcp_connect -p $((I+10001)) -l -w 20 \ -t ${timeout_poll} 0.0.0.0 >/dev/null & done -wait_local_port_listen $ns $((NR_CLIENTS + 10001)) +mptcp_lib_wait_local_port_listen $ns $((NR_CLIENTS + 10001)) for I in `seq 1 $NR_CLIENTS`; do echo "b" | \ @@ -306,5 +298,21 @@ flush_pids chk_msk_inuse 0 "many->0" chk_msk_cestab 0 "many->0" +chk_listener_nr 0 "no listener sockets" +NR_SERVERS=100 +for I in $(seq 1 $NR_SERVERS); do + ip netns exec $ns ./mptcp_connect -p $((I + 20001)) \ + -t ${timeout_poll} -l 0.0.0.0 >/dev/null 2>&1 & +done +mptcp_lib_wait_local_port_listen $ns $((NR_SERVERS + 20001)) + +chk_listener_nr $NR_SERVERS "many listener sockets" + +# graceful termination +for I in $(seq 1 $NR_SERVERS); do + echo a | ip netns exec $ns ./mptcp_connect -p $((I + 20001)) 127.0.0.1 >/dev/null 2>&1 & +done +flush_pids + mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 3b971d1617..713de81822 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -1,6 +1,11 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + . "$(dirname "${0}")/mptcp_lib.sh" time_start=$(date +%s) @@ -13,7 +18,6 @@ sout="" cin_disconnect="" cin="" cout="" -ksft_skip=4 capture=false timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) @@ -131,6 +135,8 @@ ns4="ns4-$rndh" TEST_COUNT=0 TEST_GROUP="" +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { rm -f "$cin_disconnect" "$cout_disconnect" @@ -225,8 +231,9 @@ set_ethtool_flags() { local dev="$2" local flags="$3" - ip netns exec $ns ethtool -K $dev $flags 2>/dev/null - [ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags" + if ip netns exec $ns ethtool -K $dev $flags 2>/dev/null; then + echo "INFO: set $ns dev $dev: ethtool -K $flags" + fi } set_random_ethtool_flags() { @@ -254,31 +261,6 @@ else set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args" fi -print_file_err() -{ - ls -l "$1" 1>&2 - echo "Trailing bytes are: " - tail -c 27 "$1" -} - -check_transfer() -{ - local in=$1 - local out=$2 - local what=$3 - - cmp "$in" "$out" > /dev/null 2>&1 - if [ $? -ne 0 ] ;then - echo "[ FAIL ] $what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" - - return 1 - fi - - return 0 -} - check_mptcp_disabled() { local disabled_ns="ns_disabled-$rndh" @@ -335,23 +317,6 @@ do_ping() return 0 } -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex i - - port_hex="$(printf "%04X" "${port}")" - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - do_transfer() { local listener_ns="$1" @@ -363,19 +328,19 @@ do_transfer() local extra_args="$7" local port - port=$((10000+$TEST_COUNT)) + port=$((10000+TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) if [ "$rcvbuf" -gt 0 ]; then - extra_args="$extra_args -R $rcvbuf" + extra_args+=" -R $rcvbuf" fi if [ "$sndbuf" -gt 0 ]; then - extra_args="$extra_args -S $sndbuf" + extra_args+=" -S $sndbuf" fi if [ -n "$testmode" ]; then - extra_args="$extra_args -m $testmode" + extra_args+=" -m $testmode" fi if [ -n "$extra_args" ] && $options_log; then @@ -420,12 +385,20 @@ do_transfer() nstat -n fi - local stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - local stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - local stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") - local stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") - local stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") - local stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") + local stat_synrx_last_l + local stat_ackrx_last_l + local stat_cookietx_last + local stat_cookierx_last + local stat_csum_err_s + local stat_csum_err_c + local stat_tcpfb_last_l + stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") + stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") timeout ${timeout_test} \ ip netns exec ${listener_ns} \ @@ -433,7 +406,7 @@ do_transfer() $extra_args $local_addr < "$sin" > "$sout" & local spid=$! - wait_local_port_listen "${listener_ns}" "${port}" + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" local start start=$(date +%s%3N) @@ -483,16 +456,24 @@ do_transfer() return 1 fi - check_transfer $sin $cout "file received by client" + mptcp_lib_check_transfer $sin $cout "file received by client" retc=$? - check_transfer $cin $sout "file received by server" + mptcp_lib_check_transfer $cin $sout "file received by server" rets=$? - local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - local stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - local stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") - local stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") - local stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") + local extra="" + local stat_synrx_now_l + local stat_ackrx_now_l + local stat_cookietx_now + local stat_cookierx_now + local stat_ooo_now + local stat_tcpfb_now_l + stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") + stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -501,8 +482,8 @@ do_transfer() cookies=${cookies##*=} if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then - expect_synrx=$((stat_synrx_last_l+$connect_per_transfer)) - expect_ackrx=$((stat_ackrx_last_l+$connect_per_transfer)) + expect_synrx=$((stat_synrx_last_l+connect_per_transfer)) + expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer)) fi if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then @@ -510,66 +491,75 @@ do_transfer() "${stat_synrx_now_l}" "${expect_synrx}" 1>&2 retc=1 fi - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then if [ ${stat_ooo_now} -eq 0 ]; then printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 rets=1 else - printf "[ Note ] fallback due to TCP OoO" + extra+=" [ Note ] fallback due to TCP OoO" fi fi if $checksum; then - local csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") - local csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") + local csum_err_s + local csum_err_c + csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) if [ $csum_err_s_nr -gt 0 ]; then - printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]" + printf "[ FAIL ]\nserver got %d data checksum error[s]" ${csum_err_s_nr} rets=1 fi local csum_err_c_nr=$((csum_err_c - stat_csum_err_c)) if [ $csum_err_c_nr -gt 0 ]; then - printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]" + printf "[ FAIL ]\nclient got %d data checksum error[s]" ${csum_err_c_nr} retc=1 fi fi - if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then - printf "[ OK ]" - mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}" - else - mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" + if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then + mptcp_lib_pr_fail "unexpected fallback to TCP" + rets=1 fi if [ $cookies -eq 2 ];then if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then - printf " WARN: CookieSent: did not advance" + extra+=" WARN: CookieSent: did not advance" fi if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then - printf " WARN: CookieRecv: did not advance" + extra+=" WARN: CookieRecv: did not advance" fi else if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then - printf " WARN: CookieSent: changed" + extra+=" WARN: CookieSent: changed" fi if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then - printf " WARN: CookieRecv: changed" + extra+=" WARN: CookieRecv: changed" fi fi if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then - printf " WARN: SYNRX: expect %d, got %d (probably retransmissions)" \ - "${expect_synrx}" "${stat_synrx_now_l}" + extra+=" WARN: SYNRX: expect ${expect_synrx}," + extra+=" got ${stat_synrx_now_l} (probably retransmissions)" fi if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then - printf " WARN: ACKRX: expect %d, got %d (probably retransmissions)" \ - "${expect_ackrx}" "${stat_ackrx_now_l}" + extra+=" WARN: ACKRX: expect ${expect_ackrx}," + extra+=" got ${stat_ackrx_now_l} (probably retransmissions)" + fi + + if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then + printf "[ OK ]%s\n" "${extra:1}" + mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}" + else + if [ -n "${extra}" ]; then + printf "%s\n" "${extra:1}" + fi + mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" fi - echo cat "$capout" [ $retc -eq 0 ] && [ $rets -eq 0 ] } @@ -592,9 +582,8 @@ make_file() ksize=$((SIZE / 1024)) rem=$((SIZE - (ksize * 1024))) - dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null - dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + mptcp_lib_make_file $name 1024 $ksize + dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null echo "Created $name (size $(du -b "$name")) containing data sent by $who" } @@ -701,7 +690,7 @@ run_test_transparent() return fi -ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF" + if ! ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF" flush ruleset table inet mangle { chain divert { @@ -712,7 +701,7 @@ table inet mangle { } } EOF - if [ $? -ne 0 ]; then + then echo "SKIP: $msg, could not load nft ruleset" mptcp_lib_fail_if_expected_feature "nft rules" mptcp_lib_result_skip "${TEST_GROUP}" @@ -727,8 +716,7 @@ EOF local_addr="0.0.0.0" fi - ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100 - if [ $? -ne 0 ]; then + if ! ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100; then ip netns exec "$listener_ns" nft flush ruleset echo "SKIP: $msg, ip $r6flag rule failed" mptcp_lib_fail_if_expected_feature "ip rule" @@ -736,8 +724,7 @@ EOF return fi - ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100 - if [ $? -ne 0 ]; then + if ! ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100; then ip netns exec "$listener_ns" nft flush ruleset ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 echo "SKIP: $msg, ip route add local $local_addr failed" @@ -897,10 +884,10 @@ mptcp_lib_result_code "${ret}" "ping tests" stop_if_error "Could not even run ping tests" [ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms -echo -n "INFO: Using loss of $tc_loss " -test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms " +tc_info="loss of $tc_loss " +test "$tc_delay" -gt 0 && tc_info+="delay $tc_delay ms " -reorder_delay=$(($tc_delay / 4)) +reorder_delay=$((tc_delay / 4)) if [ -z "${tc_reorder}" ]; then reorder1=$((RANDOM%10)) @@ -909,17 +896,17 @@ if [ -z "${tc_reorder}" ]; then if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then tc_reorder="reorder ${reorder1}% ${reorder2}%" - echo -n "$tc_reorder with delay ${reorder_delay}ms " + tc_info+="$tc_reorder with delay ${reorder_delay}ms " fi elif [ "$tc_reorder" = "0" ];then tc_reorder="" elif [ "$reorder_delay" -gt 0 ];then # reordering requires some delay tc_reorder="reorder $tc_reorder" - echo -n "$tc_reorder with delay ${reorder_delay}ms " + tc_info+="$tc_reorder with delay ${reorder_delay}ms " fi -echo "on ns3eth4" +echo "INFO: Using ${tc_info}on ns3eth4" tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index e6b778a9a9..24be952b4d 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -56,6 +56,8 @@ unset FAILING_LINKS unset test_linkfail unset addr_nr_ns1 unset addr_nr_ns2 +unset cestab_ns1 +unset cestab_ns2 unset sflags unset fastclose unset fullmesh @@ -516,13 +518,6 @@ get_failed_tests_ids() done | sort -n } -print_file_err() -{ - ls -l "$1" 1>&2 - echo -n "Trailing bytes are: " - tail -c 27 "$1" -} - check_transfer() { local in=$1 @@ -553,8 +548,8 @@ check_transfer() local sum=$((0${a} + 0${b})) if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then fail_test "$what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" + mptcp_lib_print_file_err "$in" + mptcp_lib_print_file_err "$out" return 1 else @@ -592,24 +587,6 @@ link_failure() done } -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex - port_hex="$(printf "%04X" "${port}")" - - local i - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - rm_addr_count() { mptcp_lib_get_counter "${1}" "MPTcpExtRmAddr" @@ -796,7 +773,7 @@ pm_nl_check_endpoint() [ -n "$_flags" ]; flags="flags $_flags" shift elif [ $1 = "dev" ]; then - [ -n "$2" ]; dev="dev $1" + [ -n "$2" ]; dev="dev $2" shift elif [ $1 = "id" ]; then _id=$2 @@ -822,18 +799,18 @@ pm_nl_check_endpoint() line="${line% }" # the dump order is: address id flags port dev [ -n "$addr" ] && expected_line="$addr" - expected_line="$expected_line $id" - [ -n "$_flags" ] && expected_line="$expected_line ${_flags//","/" "}" - [ -n "$dev" ] && expected_line="$expected_line $dev" - [ -n "$port" ] && expected_line="$expected_line $port" + expected_line+=" $id" + [ -n "$_flags" ] && expected_line+=" ${_flags//","/" "}" + [ -n "$dev" ] && expected_line+=" $dev" + [ -n "$port" ] && expected_line+=" $port" else line=$(ip netns exec $ns ./pm_nl_ctl get $_id) # the dump order is: id flags dev address port expected_line="$id" - [ -n "$flags" ] && expected_line="$expected_line $flags" - [ -n "$dev" ] && expected_line="$expected_line $dev" - [ -n "$addr" ] && expected_line="$expected_line $addr" - [ -n "$_port" ] && expected_line="$expected_line $_port" + [ -n "$flags" ] && expected_line+=" $flags" + [ -n "$dev" ] && expected_line+=" $dev" + [ -n "$addr" ] && expected_line+=" $addr" + [ -n "$_port" ] && expected_line+=" $_port" fi if [ "$line" = "$expected_line" ]; then print_ok @@ -999,6 +976,34 @@ pm_nl_set_endpoint() fi } +chk_cestab_nr() +{ + local ns=$1 + local cestab=$2 + local count + + print_check "cestab $cestab" + count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$cestab" ]; then + fail_test "got $count current establish[s] expected $cestab" + else + print_ok + fi +} + +# $1 namespace 1, $2 namespace 2 +check_cestab() +{ + if [ -n "${cestab_ns1}" ]; then + chk_cestab_nr ${1} ${cestab_ns1} + fi + if [ -n "${cestab_ns2}" ]; then + chk_cestab_nr ${2} ${cestab_ns2} + fi +} + do_transfer() { local listener_ns="$1" @@ -1087,7 +1092,7 @@ do_transfer() fi local spid=$! - wait_local_port_listen "${listener_ns}" "${port}" + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" extra_cl_args="$extra_args $extra_cl_args" if [ "$test_linkfail" -eq 0 ];then @@ -1112,6 +1117,7 @@ do_transfer() local cpid=$! pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr + check_cestab $listener_ns $connector_ns wait $cpid local retc=$? @@ -1169,8 +1175,7 @@ make_file() local who=$2 local size=$3 - dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + mptcp_lib_make_file $name 1024 $size print_info "Test file (size $size KB) for $who" } @@ -1256,7 +1261,7 @@ chk_csum_nr() print_check "sum" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns1" ]; then - extra_msg="$extra_msg ns1=$count" + extra_msg+=" ns1=$count" fi if [ -z "$count" ]; then print_skip @@ -1269,7 +1274,7 @@ chk_csum_nr() print_check "csum" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns2" ]; then - extra_msg="$extra_msg ns2=$count" + extra_msg+=" ns2=$count" fi if [ -z "$count" ]; then print_skip @@ -1313,7 +1318,7 @@ chk_fail_nr() print_check "ftx" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx") if [ "$count" != "$fail_tx" ]; then - extra_msg="$extra_msg,tx=$count" + extra_msg+=",tx=$count" fi if [ -z "$count" ]; then print_skip @@ -1327,7 +1332,7 @@ chk_fail_nr() print_check "failrx" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx") if [ "$count" != "$fail_rx" ]; then - extra_msg="$extra_msg,rx=$count" + extra_msg+=",rx=$count" fi if [ -z "$count" ]; then print_skip @@ -1362,7 +1367,7 @@ chk_fclose_nr() if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_tx" ]; then - extra_msg="$extra_msg,tx=$count" + extra_msg+=",tx=$count" fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx" else print_ok @@ -1373,7 +1378,7 @@ chk_fclose_nr() if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_rx" ]; then - extra_msg="$extra_msg,rx=$count" + extra_msg+=",rx=$count" fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx" else print_ok @@ -1742,7 +1747,7 @@ chk_rm_nr() count=$((count + cnt)) if [ "$count" != "$rm_subflow_nr" ]; then suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" - extra_msg="$extra_msg simult" + extra_msg+=" simult" fi if [ $count -ge "$rm_subflow_nr" ] && \ [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then @@ -2501,47 +2506,52 @@ add_tests() if reset "add single subflow"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 - addr_nr_ns2=1 speed=slow \ + addr_nr_ns2=1 speed=slow cestab_ns2=1 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 + chk_cestab_nr $ns2 0 fi # add signal address if reset "add signal address"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 - addr_nr_ns1=1 speed=slow \ + addr_nr_ns1=1 speed=slow cestab_ns1=1 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 + chk_cestab_nr $ns1 0 fi # add multiple subflows if reset "add multiple subflows"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - addr_nr_ns2=2 speed=slow \ + addr_nr_ns2=2 speed=slow cestab_ns2=1 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 + chk_cestab_nr $ns2 0 fi # add multiple subflows IPv6 if reset "add multiple subflows IPv6"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - addr_nr_ns2=2 speed=slow \ + addr_nr_ns2=2 speed=slow cestab_ns2=1 \ run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 2 2 2 + chk_cestab_nr $ns2 0 fi # add multiple addresses IPv6 if reset "add multiple addresses IPv6"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 2 2 - addr_nr_ns1=2 speed=slow \ + addr_nr_ns1=2 speed=slow cestab_ns1=1 \ run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 2 2 2 chk_add_nr 2 2 + chk_cestab_nr $ns1 0 fi } @@ -3469,6 +3479,75 @@ userspace_tests() kill_events_pids mptcp_lib_kill_wait $tests_pid fi + + # userspace pm create id 0 subflow + if reset_with_events "userspace pm create id 0 subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns2 + chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 + userspace_pm_add_sf $ns2 10.0.3.2 0 + chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + kill_events_pids + mptcp_lib_kill_wait $tests_pid + fi + + # userspace pm remove initial subflow + if reset_with_events "userspace pm remove initial subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns2 + userspace_pm_add_sf $ns2 10.0.3.2 20 + chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + userspace_pm_rm_sf $ns2 10.0.1.2 + # we don't look at the counter linked to the RM_ADDR but + # to the one linked to the subflows that have been removed + chk_rm_nr 0 1 + chk_rst_nr 0 0 invert + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 1 1 + kill_events_pids + mptcp_lib_kill_wait $tests_pid + fi + + # userspace pm send RM_ADDR for ID 0 + if reset_with_events "userspace pm send RM_ADDR for ID 0" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns2 1 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns1 + userspace_pm_add_addr $ns1 10.0.2.1 10 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 + userspace_pm_rm_addr $ns1 0 + # we don't look at the counter linked to the subflows that + # have been removed but to the one linked to the RM_ADDR + chk_rm_nr 1 0 invert + chk_rst_nr 0 0 invert + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 1 1 + kill_events_pids + mptcp_lib_kill_wait $tests_pid + fi } endpoint_tests() @@ -3510,6 +3589,8 @@ endpoint_tests() local tests_pid=$! wait_mpj $ns2 + pm_nl_check_endpoint "creation" \ + $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2 chk_subflow_nr "before delete" 2 chk_mptcp_info subflows 1 subflows 1 diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 8939d5c135..3777d66fc5 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -247,3 +247,54 @@ mptcp_lib_get_counter() { echo "${count}" } + +mptcp_lib_make_file() { + local name="${1}" + local bs="${2}" + local size="${3}" + + dd if=/dev/urandom of="${name}" bs="${bs}" count="${size}" 2> /dev/null + echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "${name}" +} + +# $1: file +mptcp_lib_print_file_err() { + ls -l "${1}" 1>&2 + echo "Trailing bytes are: " + tail -c 27 "${1}" +} + +# $1: input file ; $2: output file ; $3: what kind of file +mptcp_lib_check_transfer() { + local in="${1}" + local out="${2}" + local what="${3}" + + if ! cmp "$in" "$out" > /dev/null 2>&1; then + echo "[ FAIL ] $what does not match (in, out):" + mptcp_lib_print_file_err "$in" + mptcp_lib_print_file_err "$out" + + return 1 + fi + + return 0 +} + +# $1: ns, $2: port +mptcp_lib_wait_local_port_listen() { + local listener_ns="${1}" + local port="${2}" + + local port_hex + port_hex="$(printf "%04X" "${port}")" + + local _ + for _ in $(seq 10); do + ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ + awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) \ + {rc=0; exit}} END {exit rc}" && + break + sleep 0.1 + done +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index bfa744e350..c643872ddf 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -135,32 +135,6 @@ check_mark() return 0 } -print_file_err() -{ - ls -l "$1" 1>&2 - echo "Trailing bytes are: " - tail -c 27 "$1" -} - -check_transfer() -{ - local in=$1 - local out=$2 - local what=$3 - - cmp "$in" "$out" > /dev/null 2>&1 - if [ $? -ne 0 ] ;then - echo "[ FAIL ] $what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" - ret=1 - - return 1 - fi - - return 0 -} - do_transfer() { local listener_ns="$1" @@ -232,7 +206,7 @@ do_transfer() check_mark $connector_ns 4 || retc=1 fi - check_transfer $cin $sout "file received by server" + mptcp_lib_check_transfer $cin $sout "file received by server" rets=$? mptcp_lib_result_code "${retc}" "mark ${ip}" @@ -251,8 +225,7 @@ make_file() local who=$2 local size=$3 - dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + mptcp_lib_make_file $name 1024 $size echo "Created $name (size $size KB) containing data sent by $who" } diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 25693b37f8..8f9ddb3ad4 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -123,23 +123,6 @@ setup() grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550)) } -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex i - - port_hex="$(printf "%04X" "${port}")" - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - do_transfer() { local cin=$1 @@ -179,7 +162,7 @@ do_transfer() 0.0.0.0 < "$sin" > "$sout" & local spid=$! - wait_local_port_listen "${ns3}" "${port}" + mptcp_lib_wait_local_port_listen "${ns3}" "${port}" timeout ${timeout_test} \ ip netns exec ${ns1} \ diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 4c62114de0..1b94a75604 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -164,22 +164,12 @@ print_title "Init" print_test "Created network namespaces ns1, ns2" test_pass -make_file() -{ - # Store a chunk of data in a file to transmit over an MPTCP connection - local name=$1 - local ksize=1 - - dd if=/dev/urandom of="$name" bs=2 count=$ksize 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" -} - make_connection() { if [ -z "$file" ]; then file=$(mktemp) fi - make_file "$file" "client" + mptcp_lib_make_file "$file" 2 1 local is_v6=$1 local app_port=$app4_port diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh index 825ffec85c..89c22f5320 100755 --- a/tools/testing/selftests/net/msg_zerocopy.sh +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -70,23 +70,22 @@ case "${TXMODE}" in esac # Start of state changes: install cleanup handler -save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})" cleanup() { ip netns del "${NS2}" ip netns del "${NS1}" - sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}" } trap cleanup EXIT -# Configure system settings -sysctl -w -q "${path_sysctl_mem}=1000000" - # Create virtual ethernet pair between network namespaces ip netns add "${NS1}" ip netns add "${NS2}" +# Configure system settings +ip netns exec "${NS1}" sysctl -w -q "${path_sysctl_mem}=1000000" +ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000" + ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" diff --git a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh index 86e621b7b9..5db69dad0c 100755 --- a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh +++ b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh @@ -10,16 +10,12 @@ # 0 1 0 Don't update NC # 0 1 1 Add a STALE NC entry +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 PAUSE_ON_FAIL=no PAUSE=no -HOST_NS="ns-host" -ROUTER_NS="ns-router" - HOST_INTF="veth-host" ROUTER_INTF="veth-router" @@ -29,11 +25,6 @@ SUBNET_WIDTH=64 ROUTER_ADDR_WITH_MASK="${ROUTER_ADDR}/${SUBNET_WIDTH}" HOST_ADDR_WITH_MASK="${HOST_ADDR}/${SUBNET_WIDTH}" -IP_HOST="ip -6 -netns ${HOST_NS}" -IP_HOST_EXEC="ip netns exec ${HOST_NS}" -IP_ROUTER="ip -6 -netns ${ROUTER_NS}" -IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}" - tcpdump_stdout= tcpdump_stderr= @@ -76,8 +67,12 @@ setup() # Setup two namespaces and a veth tunnel across them. # On end of the tunnel is a router and the other end is a host. - ip netns add ${HOST_NS} - ip netns add ${ROUTER_NS} + setup_ns HOST_NS ROUTER_NS + IP_HOST="ip -6 -netns ${HOST_NS}" + IP_HOST_EXEC="ip netns exec ${HOST_NS}" + IP_ROUTER="ip -6 -netns ${ROUTER_NS}" + IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}" + ${IP_ROUTER} link add ${ROUTER_INTF} type veth \ peer name ${HOST_INTF} netns ${HOST_NS} diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh new file mode 100644 index 0000000000..6596fe03c7 --- /dev/null +++ b/tools/testing/selftests/net/net_helper.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Helper functions + +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + local protocol="${3}" + local pattern + local i + + pattern=":$(printf "%04X" "${port}") " + + # for tcp protocol additionally check the socket state + [ ${protocol} = "tcp" ] && pattern="${pattern}0A" + for i in $(seq 10); do + if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \ + /proc/net/"${protocol}"* | grep -q "${pattern}"; then + break + fi + sleep 0.1 + done +} diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh index 7d3d3fc994..6974474c26 100755 --- a/tools/testing/selftests/net/netns-name.sh +++ b/tools/testing/selftests/net/netns-name.sh @@ -1,9 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +source lib.sh set -o pipefail -NS=netns-name-test DEV=dummy-dev0 DEV2=dummy-dev1 ALT_NAME=some-alt-name @@ -11,7 +11,7 @@ ALT_NAME=some-alt-name RET_CODE=0 cleanup() { - ip netns del $NS + cleanup_ns $NS $test_ns } trap cleanup EXIT @@ -21,50 +21,50 @@ fail() { RET_CODE=1 } -ip netns add $NS +setup_ns NS test_ns # # Test basic move without a rename # ip -netns $NS link add name $DEV type dummy || fail -ip -netns $NS link set dev $DEV netns 1 || +ip -netns $NS link set dev $DEV netns $test_ns || fail "Can't perform a netns move" -ip link show dev $DEV >> /dev/null || fail "Device not found after move" -ip link del $DEV || fail +ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move" +ip -netns $test_ns link del $DEV || fail # # Test move with a conflict # -ip link add name $DEV type dummy +ip -netns $test_ns link add name $DEV type dummy ip -netns $NS link add name $DEV type dummy || fail -ip -netns $NS link set dev $DEV netns 1 2> /dev/null && +ip -netns $NS link set dev $DEV netns $test_ns 2> /dev/null && fail "Performed a netns move with a name conflict" -ip link show dev $DEV >> /dev/null || fail "Device not found after move" +ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move" ip -netns $NS link del $DEV || fail -ip link del $DEV || fail +ip -netns $test_ns link del $DEV || fail # # Test move with a conflict and rename # -ip link add name $DEV type dummy +ip -netns $test_ns link add name $DEV type dummy ip -netns $NS link add name $DEV type dummy || fail -ip -netns $NS link set dev $DEV netns 1 name $DEV2 || +ip -netns $NS link set dev $DEV netns $test_ns name $DEV2 || fail "Can't perform a netns move with rename" -ip link del $DEV2 || fail -ip link del $DEV || fail +ip -netns $test_ns link del $DEV2 || fail +ip -netns $test_ns link del $DEV || fail # # Test dup alt-name with netns move # -ip link add name $DEV type dummy || fail -ip link property add dev $DEV altname $ALT_NAME || fail +ip -netns $test_ns link add name $DEV type dummy || fail +ip -netns $test_ns link property add dev $DEV altname $ALT_NAME || fail ip -netns $NS link add name $DEV2 type dummy || fail ip -netns $NS link property add dev $DEV2 altname $ALT_NAME || fail -ip -netns $NS link set dev $DEV2 netns 1 2> /dev/null && +ip -netns $NS link set dev $DEV2 netns $test_ns 2> /dev/null && fail "Moved with alt-name dup" -ip link del $DEV || fail +ip -netns $test_ns link del $DEV || fail ip -netns $NS link del $DEV2 || fail # @@ -72,11 +72,11 @@ ip -netns $NS link del $DEV2 || fail # ip -netns $NS link add name $DEV type dummy || fail ip -netns $NS link property add dev $DEV altname $ALT_NAME || fail -ip -netns $NS link set dev $DEV netns 1 || fail -ip link show dev $ALT_NAME >> /dev/null || fail "Can't find alt-name after move" -ip -netns $NS link show dev $ALT_NAME 2> /dev/null && +ip -netns $NS link set dev $DEV netns $test_ns || fail +ip -netns $test_ns link show dev $ALT_NAME >> /dev/null || fail "Can't find alt-name after move" +ip -netns $NS link show dev $ALT_NAME 2> /dev/null && fail "Can still find alt-name after move" -ip link del $DEV || fail +ip -netns $test_ns link del $DEV || fail echo -ne "$(basename $0) \t\t\t\t" if [ $RET_CODE -eq 0 ]; then diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index d65fdd407d..cfc8495802 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -1336,16 +1336,16 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { else TCPDST="TCP:[${dst}]:50000" fi - ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000 STDOUT > $tmpoutfile & + ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000,reuseaddr STDOUT > $tmpoutfile & local socat_pid=$! wait_local_port_listen ${NS_B} 50000 tcp dd if=/dev/zero status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 + wait ${socat_pid} size=$(du -sb $tmpoutfile) size=${size%%/tmp/*} - wait ${socat_pid} [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1 done diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c index 7c5b12664b..bfb07dc495 100644 --- a/tools/testing/selftests/net/reuseaddr_conflict.c +++ b/tools/testing/selftests/net/reuseaddr_conflict.c @@ -109,6 +109,6 @@ int main(void) fd1 = open_port(0, 1); if (fd1 >= 0) error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6"); - fprintf(stderr, "Success"); + fprintf(stderr, "Success\n"); return 0; } diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 26827ea4e3..874a2952aa 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -28,6 +28,7 @@ ALL_TESTS=" kci_test_neigh_get kci_test_bridge_parent_id kci_test_address_proto + kci_test_enslave_bonding " devdummy="test-dummy0" @@ -35,8 +36,7 @@ VERBOSE=0 PAUSE=no PAUSE_ON_FAIL=no -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh # set global exit status, but never reset nonzero one. check_err() @@ -440,7 +440,6 @@ kci_test_encap_vxlan() local ret=0 vxlan="test-vxlan0" vlan="test-vlan0" - testns="$1" run_cmd ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \ dev "$devdummy" dstport 4789 if [ $? -ne 0 ]; then @@ -485,7 +484,6 @@ kci_test_encap_fou() { local ret=0 name="test-fou" - testns="$1" run_cmd_grep 'Usage: ip fou' ip fou help if [ $? -ne 0 ];then end_test "SKIP: fou: iproute2 too old" @@ -517,9 +515,8 @@ kci_test_encap_fou() # test various encap methods, use netns to avoid unwanted interference kci_test_encap() { - testns="testns" local ret=0 - run_cmd ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP encap tests: cannot add net namespace $testns" return $ksft_skip @@ -527,8 +524,8 @@ kci_test_encap() run_cmd ip -netns "$testns" link set lo up run_cmd ip -netns "$testns" link add name "$devdummy" type dummy run_cmd ip -netns "$testns" link set "$devdummy" up - run_cmd kci_test_encap_vxlan "$testns" - run_cmd kci_test_encap_fou "$testns" + run_cmd kci_test_encap_vxlan + run_cmd kci_test_encap_fou ip netns del "$testns" return $ret @@ -574,6 +571,10 @@ kci_test_macsec_offload() return $ksft_skip fi + if ! mount | grep -q debugfs; then + mount -t debugfs none /sys/kernel/debug/ &> /dev/null + fi + # setup netdevsim since dummydev doesn't have offload support if [ ! -w /sys/bus/netdevsim/new_device ] ; then run_cmd modprobe -q netdevsim @@ -738,6 +739,10 @@ kci_test_ipsec_offload() sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ probed=false + if ! mount | grep -q debugfs; then + mount -t debugfs none /sys/kernel/debug/ &> /dev/null + fi + # setup netdevsim since dummydev doesn't have offload support if [ ! -w /sys/bus/netdevsim/new_device ] ; then run_cmd modprobe -q netdevsim @@ -836,11 +841,10 @@ EOF kci_test_gretap() { - testns="testns" DEV_NS=gretap00 local ret=0 - run_cmd ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP gretap tests: cannot add net namespace $testns" return $ksft_skip @@ -878,11 +882,10 @@ kci_test_gretap() kci_test_ip6gretap() { - testns="testns" DEV_NS=ip6gretap00 local ret=0 - run_cmd ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP ip6gretap tests: cannot add net namespace $testns" return $ksft_skip @@ -920,7 +923,6 @@ kci_test_ip6gretap() kci_test_erspan() { - testns="testns" DEV_NS=erspan00 local ret=0 run_cmd_grep "^Usage:" ip link help erspan @@ -928,7 +930,7 @@ kci_test_erspan() end_test "SKIP: erspan: iproute2 too old" return $ksft_skip fi - run_cmd ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP erspan tests: cannot add net namespace $testns" return $ksft_skip @@ -970,7 +972,6 @@ kci_test_erspan() kci_test_ip6erspan() { - testns="testns" DEV_NS=ip6erspan00 local ret=0 run_cmd_grep "^Usage:" ip link help ip6erspan @@ -978,7 +979,7 @@ kci_test_ip6erspan() end_test "SKIP: ip6erspan: iproute2 too old" return $ksft_skip fi - run_cmd ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP ip6erspan tests: cannot add net namespace $testns" return $ksft_skip @@ -1022,8 +1023,6 @@ kci_test_ip6erspan() kci_test_fdb_get() { - IP="ip -netns testns" - BRIDGE="bridge -netns testns" brdev="test-br0" vxlandev="vxlan10" test_mac=de:ad:be:ef:13:37 @@ -1037,11 +1036,13 @@ kci_test_fdb_get() return $ksft_skip fi - run_cmd ip netns add testns + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP fdb get tests: cannot add net namespace $testns" return $ksft_skip fi + IP="ip -netns $testns" + BRIDGE="bridge -netns $testns" run_cmd $IP link add "$vxlandev" type vxlan id 10 local $localip \ dstport 4789 run_cmd $IP link add name "$brdev" type bridge @@ -1052,7 +1053,7 @@ kci_test_fdb_get() run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac br "$brdev" run_cmd_grep "dev $vxlandev dst $dstip" $BRIDGE fdb get $test_mac dev "$vxlandev" self - ip netns del testns &>/dev/null + ip netns del $testns &>/dev/null if [ $ret -ne 0 ]; then end_test "FAIL: bridge fdb get" @@ -1239,6 +1240,31 @@ kci_test_address_proto() return $ret } +kci_test_enslave_bonding() +{ + local bond="bond123" + local ret=0 + + setup_ns testns + if [ $? -ne 0 ]; then + end_test "SKIP bonding tests: cannot add net namespace $testns" + return $ksft_skip + fi + + run_cmd ip -netns $testns link add dev $bond type bond mode balance-rr + run_cmd ip -netns $testns link add dev $devdummy type dummy + run_cmd ip -netns $testns link set dev $devdummy up + run_cmd ip -netns $testns link set dev $devdummy master $bond down + if [ $ret -ne 0 ]; then + end_test "FAIL: initially up interface added to a bond and set down" + ip netns del "$testns" + return 1 + fi + + end_test "PASS: enslave interface in a bond" + ip netns del "$testns" +} + kci_test_rtnl() { local current_test diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh index c721e952e5..c854034b6a 100755 --- a/tools/testing/selftests/net/sctp_vrf.sh +++ b/tools/testing/selftests/net/sctp_vrf.sh @@ -6,13 +6,11 @@ # SERVER_NS # CLIENT_NS2 (veth1) <---> (veth2) -> vrf_s2 -CLIENT_NS1="client-ns1" -CLIENT_NS2="client-ns2" +source lib.sh CLIENT_IP4="10.0.0.1" CLIENT_IP6="2000::1" CLIENT_PORT=1234 -SERVER_NS="server-ns" SERVER_IP4="10.0.0.2" SERVER_IP6="2000::2" SERVER_PORT=1234 @@ -20,9 +18,7 @@ SERVER_PORT=1234 setup() { modprobe sctp modprobe sctp_diag - ip netns add $CLIENT_NS1 - ip netns add $CLIENT_NS2 - ip netns add $SERVER_NS + setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null @@ -67,9 +63,7 @@ setup() { cleanup() { ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null - ip netns del "$CLIENT_NS1" - ip netns del "$CLIENT_NS2" - ip netns del "$SERVER_NS" + cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS } wait_server() { diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings index dfc27cdc6c..ed8418e821 100644 --- a/tools/testing/selftests/net/settings +++ b/tools/testing/selftests/net/settings @@ -1 +1 @@ -timeout=1500 +timeout=3600 diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh index e57bbfbc52..2070b57849 100755..100644 --- a/tools/testing/selftests/net/setup_loopback.sh +++ b/tools/testing/selftests/net/setup_loopback.sh @@ -5,6 +5,8 @@ readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout" readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs" readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})" readonly HARD_IRQS="$(< ${IRQ_PATH})" +readonly server_ns=$(mktemp -u server-XXXXXXXX) +readonly client_ns=$(mktemp -u client-XXXXXXXX) netdev_check_for_carrier() { local -r dev="$1" @@ -97,12 +99,12 @@ setup_interrupt() { setup_ns() { # Set up server_ns namespace and client_ns namespace - setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}" - setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}" + setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}" + setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" } cleanup_ns() { - cleanup_macvlan_ns server_ns server client_ns client + cleanup_macvlan_ns ${server_ns} server ${client_ns} client } setup() { diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh index 227fd1076f..1f78a87f6f 100644 --- a/tools/testing/selftests/net/setup_veth.sh +++ b/tools/testing/selftests/net/setup_veth.sh @@ -1,6 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +readonly server_ns=$(mktemp -u server-XXXXXXXX) +readonly client_ns=$(mktemp -u client-XXXXXXXX) + setup_veth_ns() { local -r link_dev="$1" local -r ns_name="$2" @@ -19,14 +22,14 @@ setup_ns() { # Set up server_ns namespace and client_ns namespace ip link add name server type veth peer name client - setup_veth_ns "${dev}" server_ns server "${SERVER_MAC}" - setup_veth_ns "${dev}" client_ns client "${CLIENT_MAC}" + setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}" + setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" } cleanup_ns() { local ns_name - for ns_name in client_ns server_ns; do + for ns_name in ${client_ns} ${server_ns}; do [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}" done } diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh index 3f06f4d286..5e861ad32a 100755 --- a/tools/testing/selftests/net/so_txtime.sh +++ b/tools/testing/selftests/net/so_txtime.sh @@ -5,6 +5,7 @@ set -e +readonly ksft_skip=4 readonly DEV="veth0" readonly BIN="./so_txtime" @@ -46,7 +47,7 @@ ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad -do_test() { +run_test() { local readonly IP="$1" local readonly CLOCK="$2" local readonly TXARGS="$3" @@ -64,12 +65,25 @@ do_test() { fi local readonly START="$(date +%s%N --date="+ 0.1 seconds")" + ip netns exec "${NS2}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${RXARGS}" -r & ip netns exec "${NS1}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${TXARGS}" wait "$!" } +do_test() { + run_test $@ + [ $? -ne 0 ] && ret=1 +} + +do_fail_test() { + run_test $@ + [ $? -eq 0 ] && ret=1 +} + ip netns exec "${NS1}" tc qdisc add dev "${DEV}" root fq +set +e +ret=0 do_test 4 mono a,-1 a,-1 do_test 6 mono a,0 a,0 do_test 6 mono a,10 a,10 @@ -77,13 +91,20 @@ do_test 4 mono a,10,b,20 a,10,b,20 do_test 6 mono a,20,b,10 b,20,a,20 if ip netns exec "${NS1}" tc qdisc replace dev "${DEV}" root etf clockid CLOCK_TAI delta 400000; then - ! do_test 4 tai a,-1 a,-1 - ! do_test 6 tai a,0 a,0 + do_fail_test 4 tai a,-1 a,-1 + do_fail_test 6 tai a,0 a,0 do_test 6 tai a,10 a,10 do_test 4 tai a,10,b,20 a,10,b,20 do_test 6 tai a,20,b,10 b,10,a,20 else echo "tc ($(tc -V)) does not support qdisc etf. skipping" + [ $ret -eq 0 ] && ret=$ksft_skip fi -echo OK. All tests passed +if [ $ret -eq 0 ]; then + echo OK. All tests passed +elif [[ $ret -ne $ksft_skip && -n "$KSFT_MACHINE_SLOW" ]]; then + echo "Ignoring errors due to slow environment" 1>&2 + ret=0 +fi +exit $ret diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh index 441eededa0..02d6170407 100755 --- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh @@ -193,8 +193,7 @@ # +---------------------------------------------------+ # -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh readonly LOCALSID_TABLE_ID=90 readonly IPv6_RT_NETWORK=fd00 @@ -250,26 +249,22 @@ cleanup() ip link del veth-rt-1 2>/dev/null || true ip link del veth-rt-2 2>/dev/null || true - # destroy routers rt-* and hosts hs-* - for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do - ip netns del ${ns} || true - done + cleanup_all_ns } # Setup the basic networking for the routers setup_rt_networking() { - local rt=$1 - local nsname=rt-${rt} + local id=$1 + eval local nsname=\${rt_${id}} - ip netns add ${nsname} - ip link set veth-rt-${rt} netns ${nsname} - ip -netns ${nsname} link set veth-rt-${rt} name veth0 + ip link set veth-rt-${id} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${id} name veth0 ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 - ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad ip -netns ${nsname} link set veth0 up ip -netns ${nsname} link set lo up @@ -279,16 +274,14 @@ setup_rt_networking() setup_hs() { - local hs=$1 - local rt=$2 + local hid=$1 + local rid=$2 local tid=$3 - local hsname=hs-t${tid}-${hs} - local rtname=rt-${rt} + eval local hsname=\${hs_t${tid}_${hid}} + eval local rtname=\${rt_${rid}} local rtveth=veth-t${tid} # set the networking for the host - ip netns add ${hsname} - ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -299,8 +292,8 @@ setup_hs() ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} - ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad - ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0 ip -netns ${hsname} link set veth0 up ip -netns ${hsname} link set lo up @@ -332,10 +325,8 @@ setup_vpn_config() local rtdst=$4 local tid=$5 - local hssrc_name=hs-t${tid}-${hssrc} - local hsdst_name=hs-t${tid}-${hsdst} - local rtsrc_name=rt-${rtsrc} - local rtdst_name=rt-${rtdst} + eval local rtsrc_name=\${rt_${rtsrc}} + eval local rtdst_name=\${rt_${rtdst}} local rtveth=veth-t${tid} local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6046 @@ -379,18 +370,21 @@ setup() { ip link add veth-rt-1 type veth peer name veth-rt-2 # setup the networking for router rt-1 and router rt-2 + setup_ns rt_1 rt_2 setup_rt_networking 1 setup_rt_networking 2 # setup two hosts for the tenant 100. # - host hs-1 is directly connected to the router rt-1; # - host hs-2 is directly connected to the router rt-2. + setup_ns hs_t100_1 hs_t100_2 setup_hs 1 1 100 #args: host router tenant setup_hs 2 2 100 # setup two hosts for the tenant 200 # - host hs-3 is directly connected to the router rt-1; # - host hs-4 is directly connected to the router rt-2. + setup_ns hs_t200_3 hs_t200_4 setup_hs 3 1 200 setup_hs 4 2 200 @@ -409,8 +403,9 @@ check_rt_connectivity() { local rtsrc=$1 local rtdst=$2 + eval local nsname=\${rt_${rtsrc}} - ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ >/dev/null 2>&1 } @@ -428,8 +423,9 @@ check_hs_ipv6_connectivity() local hssrc=$1 local hsdst=$2 local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} - ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 } @@ -438,8 +434,9 @@ check_hs_ipv4_connectivity() local hssrc=$1 local hsdst=$2 local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} - ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 } diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh index f962823628..79fb81e63c 100755 --- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh @@ -163,8 +163,7 @@ # +---------------------------------------------------+ # -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh readonly LOCALSID_TABLE_ID=90 readonly IPv6_RT_NETWORK=fd00 @@ -219,27 +218,22 @@ cleanup() ip link del veth-rt-1 2>/dev/null || true ip link del veth-rt-2 2>/dev/null || true - # destroy routers rt-* and hosts hs-* - for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do - ip netns del ${ns} || true - done + cleanup_all_ns } # Setup the basic networking for the routers setup_rt_networking() { - local rt=$1 - local nsname=rt-${rt} - - ip netns add ${nsname} + local id=$1 + eval local nsname=\${rt_${id}} ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 - ip link set veth-rt-${rt} netns ${nsname} - ip -netns ${nsname} link set veth-rt-${rt} name veth0 + ip link set veth-rt-${id} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${id} name veth0 - ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad ip -netns ${nsname} link set veth0 up ip -netns ${nsname} link set lo up @@ -249,16 +243,13 @@ setup_rt_networking() setup_hs() { - local hs=$1 - local rt=$2 + local hid=$1 + local rid=$2 local tid=$3 - local hsname=hs-t${tid}-${hs} - local rtname=rt-${rt} + eval local hsname=\${hs_t${tid}_${hid}} + eval local rtname=\${rt_${rid}} local rtveth=veth-t${tid} - # set the networking for the host - ip netns add ${hsname} - # disable the rp_filter otherwise the kernel gets confused about how # to route decap ipv4 packets. ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 @@ -266,7 +257,7 @@ setup_hs() ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} - ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0 ip -netns ${hsname} link set veth0 up ip -netns ${hsname} link set lo up @@ -293,10 +284,8 @@ setup_vpn_config() local rtdst=$4 local tid=$5 - local hssrc_name=hs-t${tid}-${hssrc} - local hsdst_name=hs-t${tid}-${hsdst} - local rtsrc_name=rt-${rtsrc} - local rtdst_name=rt-${rtdst} + eval local rtsrc_name=\${rt_${rtsrc}} + eval local rtdst_name=\${rt_${rtdst}} local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6004 # set the encap route for encapsulating packets which arrive from the @@ -328,18 +317,21 @@ setup() { ip link add veth-rt-1 type veth peer name veth-rt-2 # setup the networking for router rt-1 and router rt-2 + setup_ns rt_1 rt_2 setup_rt_networking 1 setup_rt_networking 2 # setup two hosts for the tenant 100. # - host hs-1 is directly connected to the router rt-1; # - host hs-2 is directly connected to the router rt-2. + setup_ns hs_t100_1 hs_t100_2 setup_hs 1 1 100 #args: host router tenant setup_hs 2 2 100 # setup two hosts for the tenant 200 # - host hs-3 is directly connected to the router rt-1; # - host hs-4 is directly connected to the router rt-2. + setup_ns hs_t200_3 hs_t200_4 setup_hs 3 1 200 setup_hs 4 2 200 @@ -358,8 +350,9 @@ check_rt_connectivity() { local rtsrc=$1 local rtdst=$2 + eval local nsname=\${rt_${rtsrc}} - ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ >/dev/null 2>&1 } @@ -377,8 +370,9 @@ check_hs_connectivity() local hssrc=$1 local hsdst=$2 local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} - ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 } diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh index b9b06ef80d..e408406d84 100755 --- a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh @@ -164,8 +164,7 @@ # +---------------------------------------------------+ # -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh readonly LOCALSID_TABLE_ID=90 readonly IPv6_RT_NETWORK=fd00 @@ -220,26 +219,22 @@ cleanup() ip link del veth-rt-1 2>/dev/null || true ip link del veth-rt-2 2>/dev/null || true - # destroy routers rt-* and hosts hs-* - for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do - ip netns del ${ns} || true - done + cleanup_all_ns } # Setup the basic networking for the routers setup_rt_networking() { - local rt=$1 - local nsname=rt-${rt} + local id=$1 + eval local nsname=\${rt_${id}} - ip netns add ${nsname} - ip link set veth-rt-${rt} netns ${nsname} - ip -netns ${nsname} link set veth-rt-${rt} name veth0 + ip link set veth-rt-${id} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${id} name veth0 ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 - ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad ip -netns ${nsname} link set veth0 up ip -netns ${nsname} link set lo up @@ -248,22 +243,20 @@ setup_rt_networking() setup_hs() { - local hs=$1 - local rt=$2 + local hid=$1 + local rid=$2 local tid=$3 - local hsname=hs-t${tid}-${hs} - local rtname=rt-${rt} + eval local hsname=\${hs_t${tid}_${hid}} + eval local rtname=\${rt_${rid}} local rtveth=veth-t${tid} # set the networking for the host - ip netns add ${hsname} - ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} - ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad ip -netns ${hsname} link set veth0 up ip -netns ${hsname} link set lo up @@ -293,10 +286,8 @@ setup_vpn_config() local rtdst=$4 local tid=$5 - local hssrc_name=hs-t${tid}-${hssrc} - local hsdst_name=hs-t${tid}-${hsdst} - local rtsrc_name=rt-${rtsrc} - local rtdst_name=rt-${rtdst} + eval local rtsrc_name=\${rt_${rtsrc}} + eval local rtdst_name=\${rt_${rtdst}} local rtveth=veth-t${tid} local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6006 @@ -331,18 +322,21 @@ setup() { ip link add veth-rt-1 type veth peer name veth-rt-2 # setup the networking for router rt-1 and router rt-2 + setup_ns rt_1 rt_2 setup_rt_networking 1 setup_rt_networking 2 # setup two hosts for the tenant 100. # - host hs-1 is directly connected to the router rt-1; # - host hs-2 is directly connected to the router rt-2. + setup_ns hs_t100_1 hs_t100_2 setup_hs 1 1 100 #args: host router tenant setup_hs 2 2 100 # setup two hosts for the tenant 200 # - host hs-3 is directly connected to the router rt-1; # - host hs-4 is directly connected to the router rt-2. + setup_ns hs_t200_3 hs_t200_4 setup_hs 3 1 200 setup_hs 4 2 200 @@ -361,8 +355,9 @@ check_rt_connectivity() { local rtsrc=$1 local rtdst=$2 + eval local nsname=\${rt_${rtsrc}} - ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ >/dev/null 2>&1 } @@ -380,8 +375,9 @@ check_hs_connectivity() local hssrc=$1 local hsdst=$2 local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} - ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 } diff --git a/tools/testing/selftests/net/stress_reuseport_listen.sh b/tools/testing/selftests/net/stress_reuseport_listen.sh index 4de11da409..94d5d1a1c9 100755 --- a/tools/testing/selftests/net/stress_reuseport_listen.sh +++ b/tools/testing/selftests/net/stress_reuseport_listen.sh @@ -2,18 +2,18 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (c) 2022 Meta Platforms, Inc. and affiliates. -NS='stress_reuseport_listen_ns' +source lib.sh NR_FILES=24100 SAVED_NR_FILES=$(ulimit -n) setup() { - ip netns add $NS + setup_ns NS ip netns exec $NS sysctl -q -w net.ipv6.ip_nonlocal_bind=1 ulimit -n $NR_FILES } cleanup() { - ip netns del $NS + cleanup_ns $NS ulimit -n $SAVED_NR_FILES } diff --git a/tools/testing/selftests/net/tcp_ao/.gitignore b/tools/testing/selftests/net/tcp_ao/.gitignore new file mode 100644 index 0000000000..e8bb81b715 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/.gitignore @@ -0,0 +1,2 @@ +*_ipv4 +*_ipv6 diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile new file mode 100644 index 0000000000..522d991e31 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: GPL-2.0 +TEST_BOTH_AF := bench-lookups +TEST_BOTH_AF += connect +TEST_BOTH_AF += connect-deny +TEST_BOTH_AF += icmps-accept icmps-discard +TEST_BOTH_AF += key-management +TEST_BOTH_AF += restore +TEST_BOTH_AF += rst +TEST_BOTH_AF += self-connect +TEST_BOTH_AF += seq-ext +TEST_BOTH_AF += setsockopt-closed +TEST_BOTH_AF += unsigned-md5 + +TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4) +TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6) + +TEST_GEN_PROGS := $(TEST_IPV4_PROGS) $(TEST_IPV6_PROGS) + +top_srcdir := ../../../../.. +include ../../lib.mk + +HOSTAR ?= ar + +LIBDIR := $(OUTPUT)/lib +LIB := $(LIBDIR)/libaotst.a +LDLIBS += $(LIB) -pthread +LIBDEPS := lib/aolib.h Makefile + +CFLAGS := -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing +CFLAGS += $(KHDR_INCLUDES) +CFLAGS += -iquote ./lib/ -I ../../../../include/ + +# Library +LIBSRC := kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c +LIBOBJ := $(LIBSRC:%.c=$(LIBDIR)/%.o) +EXTRA_CLEAN += $(LIBOBJ) $(LIB) + +$(LIB): $(LIBOBJ) + $(HOSTAR) rcs $@ $^ + +$(LIBDIR)/%.o: ./lib/%.c $(LIBDEPS) + mkdir -p $(LIBDIR) + $(CC) $< $(CFLAGS) $(CPPFLAGS) -o $@ -c + +$(TEST_GEN_PROGS): $(LIB) + +$(OUTPUT)/%_ipv4: %.c + $(LINK.c) $^ $(LDLIBS) -o $@ + +$(OUTPUT)/%_ipv6: %.c + $(LINK.c) -DIPV6_TEST $^ $(LDLIBS) -o $@ + +$(OUTPUT)/icmps-accept_ipv4: CFLAGS+= -DTEST_ICMPS_ACCEPT +$(OUTPUT)/icmps-accept_ipv6: CFLAGS+= -DTEST_ICMPS_ACCEPT +$(OUTPUT)/bench-lookups_ipv4: LDLIBS+= -lm +$(OUTPUT)/bench-lookups_ipv6: LDLIBS+= -lm diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c new file mode 100644 index 0000000000..a1e6e007c2 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c @@ -0,0 +1,360 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <arpa/inet.h> +#include <inttypes.h> +#include <math.h> +#include <stdlib.h> +#include <stdio.h> +#include <time.h> + +#include "../../../../include/linux/bits.h" +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +#define BENCH_NR_ITERS 100 /* number of times to run gathering statistics */ + +static void gen_test_ips(union tcp_addr *ips, size_t ips_nr, bool use_rand) +{ + union tcp_addr net = {}; + size_t i, j; + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &net) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + if (!use_rand) { + for (i = 0; i < ips_nr; i++) + ips[i] = gen_tcp_addr(net, 2 * i + 1); + return; + } + for (i = 0; i < ips_nr; i++) { + size_t r = (size_t)random() | 0x1; + + ips[i] = gen_tcp_addr(net, r); + + for (j = i - 1; j > 0 && i > 0; j--) { + if (!memcmp(&ips[i], &ips[j], sizeof(union tcp_addr))) { + i--; /* collision */ + break; + } + } + } +} + +static void test_add_routes(union tcp_addr *ips, size_t ips_nr) +{ + size_t i; + + for (i = 0; i < ips_nr; i++) { + union tcp_addr *p = (union tcp_addr *)&ips[i]; + int err; + + err = ip_route_add(veth_name, TEST_FAMILY, this_ip_addr, *p); + if (err && err != -EEXIST) + test_error("Failed to add route"); + } +} + +static void server_apply_keys(int lsk, union tcp_addr *ips, size_t ips_nr) +{ + size_t i; + + for (i = 0; i < ips_nr; i++) { + union tcp_addr *p = (union tcp_addr *)&ips[i]; + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, *p, -1, 100, 100)) + test_error("setsockopt(TCP_AO)"); + } +} + +static const size_t nr_keys[] = { 512, 1024, 2048, 4096, 8192 }; +static union tcp_addr *test_ips; + +struct bench_stats { + uint64_t min; + uint64_t max; + uint64_t nr; + double mean; + double s2; +}; + +static struct bench_tests { + struct bench_stats delete_last_key; + struct bench_stats add_key; + struct bench_stats delete_rand_key; + struct bench_stats connect_last_key; + struct bench_stats connect_rand_key; + struct bench_stats delete_async; +} bench_results[ARRAY_SIZE(nr_keys)]; + +#define NSEC_PER_SEC 1000000000ULL + +static void measure_call(struct bench_stats *st, + void (*f)(int, void *), int sk, void *arg) +{ + struct timespec start = {}, end = {}; + double delta; + uint64_t nsec; + + if (clock_gettime(CLOCK_MONOTONIC, &start)) + test_error("clock_gettime()"); + + f(sk, arg); + + if (clock_gettime(CLOCK_MONOTONIC, &end)) + test_error("clock_gettime()"); + + nsec = (end.tv_sec - start.tv_sec) * NSEC_PER_SEC; + if (end.tv_nsec >= start.tv_nsec) + nsec += end.tv_nsec - start.tv_nsec; + else + nsec -= start.tv_nsec - end.tv_nsec; + + if (st->nr == 0) { + st->min = st->max = nsec; + } else { + if (st->min > nsec) + st->min = nsec; + if (st->max < nsec) + st->max = nsec; + } + + /* Welford-Knuth algorithm */ + st->nr++; + delta = (double)nsec - st->mean; + st->mean += delta / st->nr; + st->s2 += delta * ((double)nsec - st->mean); +} + +static void delete_mkt(int sk, void *arg) +{ + struct tcp_ao_del *ao = arg; + + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_DEL_KEY, ao, sizeof(*ao))) + test_error("setsockopt(TCP_AO_DEL_KEY)"); +} + +static void add_back_mkt(int sk, void *arg) +{ + union tcp_addr *p = arg; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, *p, -1, 100, 100)) + test_error("setsockopt(TCP_AO)"); +} + +static void bench_delete(int lsk, struct bench_stats *add, + struct bench_stats *del, + union tcp_addr *ips, size_t ips_nr, + bool rand_order, bool async) +{ + struct tcp_ao_del ao_del = {}; + union tcp_addr *p; + size_t i; + + ao_del.sndid = 100; + ao_del.rcvid = 100; + ao_del.del_async = !!async; + ao_del.prefix = DEFAULT_TEST_PREFIX; + + /* Remove the first added */ + p = (union tcp_addr *)&ips[0]; + tcp_addr_to_sockaddr_in(&ao_del.addr, p, 0); + + for (i = 0; i < BENCH_NR_ITERS; i++) { + measure_call(del, delete_mkt, lsk, (void *)&ao_del); + + /* Restore it back */ + measure_call(add, add_back_mkt, lsk, (void *)p); + + /* + * Slowest for FILO-linked-list: + * on (i) iteration removing ips[i] element. When it gets + * added to the list back - it becomes first to fetch, so + * on (i + 1) iteration go to ips[i + 1] element. + */ + if (rand_order) + p = (union tcp_addr *)&ips[rand() % ips_nr]; + else + p = (union tcp_addr *)&ips[i % ips_nr]; + tcp_addr_to_sockaddr_in(&ao_del.addr, p, 0); + } +} + +static void bench_connect_srv(int lsk, union tcp_addr *ips, size_t ips_nr) +{ + size_t i; + + for (i = 0; i < BENCH_NR_ITERS; i++) { + int sk; + + synchronize_threads(); + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + close(sk); + } +} + +static void test_print_stats(const char *desc, size_t nr, struct bench_stats *bs) +{ + test_ok("%-20s\t%zu keys: min=%" PRIu64 "ms max=%" PRIu64 "ms mean=%gms stddev=%g", + desc, nr, bs->min / 1000000, bs->max / 1000000, + bs->mean / 1000000, sqrt((bs->mean / 1000000) / bs->nr)); +} + +static void *server_fn(void *arg) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(nr_keys); i++) { + struct bench_tests *bt = &bench_results[i]; + int lsk; + + test_ips = malloc(nr_keys[i] * sizeof(union tcp_addr)); + if (!test_ips) + test_error("malloc()"); + + lsk = test_listen_socket(this_ip_addr, test_server_port + i, 1); + + gen_test_ips(test_ips, nr_keys[i], false); + test_add_routes(test_ips, nr_keys[i]); + test_set_optmem(KERNEL_TCP_AO_KEY_SZ_ROUND_UP * nr_keys[i]); + server_apply_keys(lsk, test_ips, nr_keys[i]); + + synchronize_threads(); + bench_connect_srv(lsk, test_ips, nr_keys[i]); + bench_connect_srv(lsk, test_ips, nr_keys[i]); + + /* The worst case for FILO-list */ + bench_delete(lsk, &bt->add_key, &bt->delete_last_key, + test_ips, nr_keys[i], false, false); + test_print_stats("Add a new key", + nr_keys[i], &bt->add_key); + test_print_stats("Delete: worst case", + nr_keys[i], &bt->delete_last_key); + + bench_delete(lsk, &bt->add_key, &bt->delete_rand_key, + test_ips, nr_keys[i], true, false); + test_print_stats("Delete: random-search", + nr_keys[i], &bt->delete_rand_key); + + bench_delete(lsk, &bt->add_key, &bt->delete_async, + test_ips, nr_keys[i], false, true); + test_print_stats("Delete: async", nr_keys[i], &bt->delete_async); + + free(test_ips); + close(lsk); + } + + return NULL; +} + +static void connect_client(int sk, void *arg) +{ + size_t *p = arg; + + if (test_connect_socket(sk, this_ip_dest, test_server_port + *p) <= 0) + test_error("failed to connect()"); +} + +static void client_addr_setup(int sk, union tcp_addr taddr) +{ +#ifdef IPV6_TEST + struct sockaddr_in6 addr = { + .sin6_family = AF_INET6, + .sin6_port = 0, + .sin6_addr = taddr.a6, + }; +#else + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = 0, + .sin_addr = taddr.a4, + }; +#endif + int ret; + + ret = ip_addr_add(veth_name, TEST_FAMILY, taddr, TEST_PREFIX); + if (ret && ret != -EEXIST) + test_error("Failed to add ip address"); + ret = ip_route_add(veth_name, TEST_FAMILY, taddr, this_ip_dest); + if (ret && ret != -EEXIST) + test_error("Failed to add route"); + + if (bind(sk, &addr, sizeof(addr))) + test_error("bind()"); +} + +static void bench_connect_client(size_t port_off, struct bench_tests *bt, + union tcp_addr *ips, size_t ips_nr, bool rand_order) +{ + struct bench_stats *con; + union tcp_addr *p; + size_t i; + + if (rand_order) + con = &bt->connect_rand_key; + else + con = &bt->connect_last_key; + + p = (union tcp_addr *)&ips[0]; + + for (i = 0; i < BENCH_NR_ITERS; i++) { + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + client_addr_setup(sk, *p); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); + + measure_call(con, connect_client, sk, (void *)&port_off); + + close(sk); + + /* + * Slowest for FILO-linked-list: + * on (i) iteration removing ips[i] element. When it gets + * added to the list back - it becomes first to fetch, so + * on (i + 1) iteration go to ips[i + 1] element. + */ + if (rand_order) + p = (union tcp_addr *)&ips[rand() % ips_nr]; + else + p = (union tcp_addr *)&ips[i % ips_nr]; + } +} + +static void *client_fn(void *arg) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(nr_keys); i++) { + struct bench_tests *bt = &bench_results[i]; + + synchronize_threads(); + bench_connect_client(i, bt, test_ips, nr_keys[i], false); + test_print_stats("Connect: worst case", + nr_keys[i], &bt->connect_last_key); + + bench_connect_client(i, bt, test_ips, nr_keys[i], false); + test_print_stats("Connect: random-search", + nr_keys[i], &bt->connect_last_key); + } + synchronize_threads(); + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(30, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config new file mode 100644 index 0000000000..d3277a9de9 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/config @@ -0,0 +1,10 @@ +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_RMD160=y +CONFIG_CRYPTO_SHA1=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=y +CONFIG_TCP_AO=y +CONFIG_TCP_MD5SIG=y +CONFIG_VETH=m diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c new file mode 100644 index 0000000000..185a2f6e5f --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "aolib.h" + +#define fault(type) (inj == FAULT_ ## type) + +static inline int test_add_key_maclen(int sk, const char *key, uint8_t maclen, + union tcp_addr in_addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + if (prefix > DEFAULT_TEST_PREFIX) + prefix = DEFAULT_TEST_PREFIX; + + err = test_prepare_key(&tmp, DEFAULT_TEST_ALGO, in_addr, false, false, + prefix, 0, sndid, rcvid, maclen, + 0, strlen(key), key); + if (err) + return err; + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static void try_accept(const char *tst_name, unsigned int port, const char *pwd, + union tcp_addr addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid, uint8_t maclen, + const char *cnt_name, test_cnt cnt_expected, + fault_t inj) +{ + struct tcp_ao_counters ao_cnt1, ao_cnt2; + uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ + int lsk, err, sk = 0; + time_t timeout; + + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (pwd && test_add_key_maclen(lsk, pwd, maclen, addr, prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + if (cnt_name) + before_cnt = netstat_get_one(cnt_name, NULL); + if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + err = test_wait_fd(lsk, timeout, 0); + if (err == -ETIMEDOUT) { + if (!fault(TIMEOUT)) + test_fail("timed out for accept()"); + } else if (err < 0) { + test_error("test_wait_fd()"); + } else { + if (fault(TIMEOUT)) + test_fail("ready to accept"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) { + test_error("accept()"); + } else { + if (fault(TIMEOUT)) + test_fail("%s: accepted", tst_name); + } + } + + if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + + close(lsk); + if (pwd) + test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + + if (!cnt_name) + goto out; + + after_cnt = netstat_get_one(cnt_name, NULL); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + +out: + synchronize_threads(); /* close() */ + if (sk > 0) + close(sk); +} + +static void *server_fn(void *arg) +{ + union tcp_addr wrong_addr, network_addr; + unsigned int port = test_server_port; + + if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) + test_error("Can't convert ip address %s", TEST_WRONG_IP); + + try_accept("Non-AO server + AO client", port++, NULL, + this_ip_dest, -1, 100, 100, 0, + "TCPAOKeyNotFound", 0, FAULT_TIMEOUT); + + try_accept("AO server + Non-AO client", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, + "TCPAORequired", TEST_CNT_AO_REQUIRED, FAULT_TIMEOUT); + + try_accept("Wrong password", port++, "something that is not DEFAULT_TEST_PASSWORD", + this_ip_dest, -1, 100, 100, 0, + "TCPAOBad", TEST_CNT_BAD, FAULT_TIMEOUT); + + try_accept("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 101, 0, + "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT); + + try_accept("Wrong snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 101, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, FAULT_TIMEOUT); + + try_accept("Different maclen", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 8, + "TCPAOBad", TEST_CNT_BAD, FAULT_TIMEOUT); + + try_accept("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD, + wrong_addr, -1, 100, 100, 0, + "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT); + + try_accept("Client: Wrong addr", port++, NULL, + this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_TIMEOUT); + + try_accept("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 200, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, 0); + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_addr) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + try_accept("Server: prefix match", port++, DEFAULT_TEST_PASSWORD, + network_addr, 16, 100, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, 0); + + try_accept("Client: prefix match", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, 0); + + /* client exits */ + synchronize_threads(); + return NULL; +} + +static void try_connect(const char *tst_name, unsigned int port, + const char *pwd, union tcp_addr addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid, + test_cnt cnt_expected, fault_t inj) +{ + struct tcp_ao_counters ao_cnt1, ao_cnt2; + time_t timeout; + int sk, ret; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (pwd && test_add_key(sk, pwd, addr, prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + if (pwd && test_get_tcp_ao_counters(sk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + + if (ret < 0) { + if (fault(KEYREJECT) && ret == -EKEYREJECTED) { + test_ok("%s: connect() was prevented", tst_name); + } else if (ret == -ETIMEDOUT && fault(TIMEOUT)) { + test_ok("%s", tst_name); + } else if (ret == -ECONNREFUSED && + (fault(TIMEOUT) || fault(KEYREJECT))) { + test_ok("%s: refused to connect", tst_name); + } else { + test_error("%s: connect() returned %d", tst_name, ret); + } + goto out; + } + + if (fault(TIMEOUT) || fault(KEYREJECT)) + test_fail("%s: connected", tst_name); + else + test_ok("%s: connected", tst_name); + if (pwd && ret > 0) { + if (test_get_tcp_ao_counters(sk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + } +out: + synchronize_threads(); /* close() */ + + if (ret > 0) + close(sk); +} + +static void *client_fn(void *arg) +{ + union tcp_addr wrong_addr, network_addr; + unsigned int port = test_server_port; + + if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) + test_error("Can't convert ip address %s", TEST_WRONG_IP); + + try_connect("Non-AO server + AO client", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("AO server + Non-AO client", port++, NULL, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Wrong password", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Different maclen", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Client: Wrong addr", port++, DEFAULT_TEST_PASSWORD, + wrong_addr, -1, 100, 100, 0, FAULT_KEYREJECT); + + try_connect("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 200, TEST_CNT_GOOD, 0); + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_addr) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + try_connect("Server: prefix match", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, TEST_CNT_GOOD, 0); + + try_connect("Client: prefix match", port++, DEFAULT_TEST_PASSWORD, + network_addr, 16, 100, 100, TEST_CNT_GOOD, 0); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(21, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c new file mode 100644 index 0000000000..81653b47f3 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/connect.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "aolib.h" + +static void *server_fn(void *arg) +{ + int sk, lsk; + ssize_t bytes; + + lsk = test_listen_socket(this_ip_addr, test_server_port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + synchronize_threads(); + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); + + bytes = test_server_run(sk, 0, 0); + + test_fail("server served: %zd", bytes); + return NULL; +} + +static void *client_fn(void *arg) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + uint64_t before_aogood, after_aogood; + const size_t nr_packets = 20; + struct netstat *ns_before, *ns_after; + struct tcp_ao_counters ao1, ao2; + + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); + if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0) + test_error("failed to connect()"); + synchronize_threads(); + + ns_before = netstat_read(); + before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("verify failed"); + return NULL; + } + + ns_after = netstat_read(); + after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + netstat_print_diff(ns_before, ns_after); + netstat_free(ns_before); + netstat_free(ns_after); + + if (nr_packets > (after_aogood - before_aogood)) { + test_fail("TCPAOGood counter mismatch: %zu > (%zu - %zu)", + nr_packets, after_aogood, before_aogood); + return NULL; + } + if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD)) + return NULL; + + test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %" PRIu64, + before_aogood, ao1.ao_info_pkt_good, + ao1.key_cnts[0].pkt_good, + after_aogood, ao2.ao_info_pkt_good, + ao2.key_cnts[0].pkt_good, + nr_packets); + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(1, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/icmps-accept.c b/tools/testing/selftests/net/tcp_ao/icmps-accept.c new file mode 120000 index 0000000000..0a5bb85eb2 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/icmps-accept.c @@ -0,0 +1 @@ +icmps-discard.c
\ No newline at end of file diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c new file mode 100644 index 0000000000..d69bcba3c9 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c @@ -0,0 +1,449 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Selftest that verifies that incomping ICMPs are ignored, + * the TCP connection stays alive, no hard or soft errors get reported + * to the usespace and the counter for ignored ICMPs is updated. + * + * RFC5925, 7.8: + * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4 + * messages of Type 3 (destination unreachable), Codes 2-4 (protocol + * unreachable, port unreachable, and fragmentation needed -- ’hard + * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1 + * (administratively prohibited) and Code 4 (port unreachable) intended + * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN- + * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs. + * + * Author: Dmitry Safonov <dima@arista.com> + */ +#include <inttypes.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/ipv6.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <sys/socket.h> +#include "aolib.h" +#include "../../../../include/linux/compiler.h" + +const size_t packets_nr = 20; +const size_t packet_size = 100; +const char *tcpao_icmps = "TCPAODroppedIcmps"; + +#ifdef IPV6_TEST +const char *dst_unreach = "Icmp6InDestUnreachs"; +const int sk_ip_level = SOL_IPV6; +const int sk_recverr = IPV6_RECVERR; +#else +const char *dst_unreach = "InDestUnreachs"; +const int sk_ip_level = SOL_IP; +const int sk_recverr = IP_RECVERR; +#endif + +/* Server is expected to fail with hard error if ::accept_icmp is set */ +#ifdef TEST_ICMPS_ACCEPT +# define test_icmps_fail test_ok +# define test_icmps_ok test_fail +#else +# define test_icmps_fail test_fail +# define test_icmps_ok test_ok +#endif + +static void serve_interfered(int sk) +{ + ssize_t test_quota = packet_size * packets_nr * 10; + uint64_t dest_unreach_a, dest_unreach_b; + uint64_t icmp_ignored_a, icmp_ignored_b; + struct tcp_ao_counters ao_cnt1, ao_cnt2; + bool counter_not_found; + struct netstat *ns_after, *ns_before; + ssize_t bytes; + + ns_before = netstat_read(); + dest_unreach_a = netstat_get(ns_before, dst_unreach, NULL); + icmp_ignored_a = netstat_get(ns_before, tcpao_icmps, NULL); + if (test_get_tcp_ao_counters(sk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + bytes = test_server_run(sk, test_quota, 0); + ns_after = netstat_read(); + netstat_print_diff(ns_before, ns_after); + dest_unreach_b = netstat_get(ns_after, dst_unreach, NULL); + icmp_ignored_b = netstat_get(ns_after, tcpao_icmps, + &counter_not_found); + if (test_get_tcp_ao_counters(sk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + + netstat_free(ns_before); + netstat_free(ns_after); + + if (dest_unreach_a >= dest_unreach_b) { + test_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64, + dst_unreach, dest_unreach_a, dest_unreach_b); + return; + } + test_ok("%s delivered %" PRIu64, + dst_unreach, dest_unreach_b - dest_unreach_a); + if (bytes < 0) + test_icmps_fail("Server failed with %zd: %s", bytes, strerrordesc_np(-bytes)); + else + test_icmps_ok("Server survived %zd bytes of traffic", test_quota); + if (counter_not_found) { + test_fail("Not found %s counter", tcpao_icmps); + return; + } +#ifdef TEST_ICMPS_ACCEPT + test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD); +#else + test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP); +#endif + if (icmp_ignored_a >= icmp_ignored_b) { + test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64, + tcpao_icmps, icmp_ignored_a, icmp_ignored_b); + return; + } + test_icmps_ok("ICMPs ignored %" PRIu64, icmp_ignored_b - icmp_ignored_a); +} + +static void *server_fn(void *arg) +{ + int val, sk, lsk; + bool accept_icmps = false; + + lsk = test_listen_socket(this_ip_addr, test_server_port, 1); + +#ifdef TEST_ICMPS_ACCEPT + accept_icmps = true; +#endif + + if (test_set_ao_flags(lsk, false, accept_icmps)) + test_error("setsockopt(TCP_AO_INFO)"); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + synchronize_threads(); + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + /* Fail on hard ip errors, such as dest unreachable (RFC1122) */ + val = 1; + if (setsockopt(sk, sk_ip_level, sk_recverr, &val, sizeof(val))) + test_error("setsockopt()"); + + synchronize_threads(); + + serve_interfered(sk); + return NULL; +} + +static size_t packets_sent; +static size_t icmps_sent; + +static uint32_t checksum4_nofold(void *data, size_t len, uint32_t sum) +{ + uint16_t *words = data; + size_t i; + + for (i = 0; i < len / sizeof(uint16_t); i++) + sum += words[i]; + if (len & 1) + sum += ((char *)data)[len - 1]; + return sum; +} + +static uint16_t checksum4_fold(void *data, size_t len, uint32_t sum) +{ + sum = checksum4_nofold(data, len, sum); + while (sum > 0xFFFF) + sum = (sum & 0xFFFF) + (sum >> 16); + return ~sum; +} + +static void set_ip4hdr(struct iphdr *iph, size_t packet_len, int proto, + struct sockaddr_in *src, struct sockaddr_in *dst) +{ + iph->version = 4; + iph->ihl = 5; + iph->tos = 0; + iph->tot_len = htons(packet_len); + iph->ttl = 2; + iph->protocol = proto; + iph->saddr = src->sin_addr.s_addr; + iph->daddr = dst->sin_addr.s_addr; + iph->check = checksum4_fold((void *)iph, iph->ihl << 1, 0); +} + +static void icmp_interfere4(uint8_t type, uint8_t code, uint32_t rcv_nxt, + struct sockaddr_in *src, struct sockaddr_in *dst) +{ + int sk = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); + struct { + struct iphdr iph; + struct icmphdr icmph; + struct iphdr iphe; + struct { + uint16_t sport; + uint16_t dport; + uint32_t seq; + } tcph; + } packet = {}; + size_t packet_len; + ssize_t bytes; + + if (sk < 0) + test_error("socket(AF_INET, SOCK_RAW, IPPROTO_RAW)"); + + packet_len = sizeof(packet); + set_ip4hdr(&packet.iph, packet_len, IPPROTO_ICMP, src, dst); + + packet.icmph.type = type; + packet.icmph.code = code; + if (code == ICMP_FRAG_NEEDED) { + randomize_buffer(&packet.icmph.un.frag.mtu, + sizeof(packet.icmph.un.frag.mtu)); + } + + packet_len = sizeof(packet.iphe) + sizeof(packet.tcph); + set_ip4hdr(&packet.iphe, packet_len, IPPROTO_TCP, dst, src); + + packet.tcph.sport = dst->sin_port; + packet.tcph.dport = src->sin_port; + packet.tcph.seq = htonl(rcv_nxt); + + packet_len = sizeof(packet) - sizeof(packet.iph); + packet.icmph.checksum = checksum4_fold((void *)&packet.icmph, + packet_len, 0); + + bytes = sendto(sk, &packet, sizeof(packet), 0, + (struct sockaddr *)dst, sizeof(*dst)); + if (bytes != sizeof(packet)) + test_error("send(): %zd", bytes); + icmps_sent++; + + close(sk); +} + +static void set_ip6hdr(struct ipv6hdr *iph, size_t packet_len, int proto, + struct sockaddr_in6 *src, struct sockaddr_in6 *dst) +{ + iph->version = 6; + iph->payload_len = htons(packet_len); + iph->nexthdr = proto; + iph->hop_limit = 2; + iph->saddr = src->sin6_addr; + iph->daddr = dst->sin6_addr; +} + +static inline uint16_t csum_fold(uint32_t csum) +{ + uint32_t sum = csum; + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (uint16_t)~sum; +} + +static inline uint32_t csum_add(uint32_t csum, uint32_t addend) +{ + uint32_t res = csum; + + res += addend; + return res + (res < addend); +} + +noinline uint32_t checksum6_nofold(void *data, size_t len, uint32_t sum) +{ + uint16_t *words = data; + size_t i; + + for (i = 0; i < len / sizeof(uint16_t); i++) + sum = csum_add(sum, words[i]); + if (len & 1) + sum = csum_add(sum, ((char *)data)[len - 1]); + return sum; +} + +noinline uint16_t icmp6_checksum(struct sockaddr_in6 *src, + struct sockaddr_in6 *dst, + void *ptr, size_t len, uint8_t proto) +{ + struct { + struct in6_addr saddr; + struct in6_addr daddr; + uint32_t payload_len; + uint8_t zero[3]; + uint8_t nexthdr; + } pseudo_header = {}; + uint32_t sum; + + pseudo_header.saddr = src->sin6_addr; + pseudo_header.daddr = dst->sin6_addr; + pseudo_header.payload_len = htonl(len); + pseudo_header.nexthdr = proto; + + sum = checksum6_nofold(&pseudo_header, sizeof(pseudo_header), 0); + sum = checksum6_nofold(ptr, len, sum); + + return csum_fold(sum); +} + +static void icmp6_interfere(int type, int code, uint32_t rcv_nxt, + struct sockaddr_in6 *src, struct sockaddr_in6 *dst) +{ + int sk = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW); + struct sockaddr_in6 dst_raw = *dst; + struct { + struct ipv6hdr iph; + struct icmp6hdr icmph; + struct ipv6hdr iphe; + struct { + uint16_t sport; + uint16_t dport; + uint32_t seq; + } tcph; + } packet = {}; + size_t packet_len; + ssize_t bytes; + + + if (sk < 0) + test_error("socket(AF_INET6, SOCK_RAW, IPPROTO_RAW)"); + + packet_len = sizeof(packet) - sizeof(packet.iph); + set_ip6hdr(&packet.iph, packet_len, IPPROTO_ICMPV6, src, dst); + + packet.icmph.icmp6_type = type; + packet.icmph.icmp6_code = code; + + packet_len = sizeof(packet.iphe) + sizeof(packet.tcph); + set_ip6hdr(&packet.iphe, packet_len, IPPROTO_TCP, dst, src); + + packet.tcph.sport = dst->sin6_port; + packet.tcph.dport = src->sin6_port; + packet.tcph.seq = htonl(rcv_nxt); + + packet_len = sizeof(packet) - sizeof(packet.iph); + + packet.icmph.icmp6_cksum = icmp6_checksum(src, dst, + (void *)&packet.icmph, packet_len, IPPROTO_ICMPV6); + + dst_raw.sin6_port = htons(IPPROTO_RAW); + bytes = sendto(sk, &packet, sizeof(packet), 0, + (struct sockaddr *)&dst_raw, sizeof(dst_raw)); + if (bytes != sizeof(packet)) + test_error("send(): %zd", bytes); + icmps_sent++; + + close(sk); +} + +static uint32_t get_rcv_nxt(int sk) +{ + int val = TCP_REPAIR_ON; + uint32_t ret; + socklen_t sz = sizeof(ret); + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); + val = TCP_RECV_QUEUE; + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + if (getsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &ret, &sz)) + test_error("getsockopt(TCP_QUEUE_SEQ)"); + val = TCP_REPAIR_OFF_NO_WP; + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); + return ret; +} + +static void icmp_interfere(const size_t nr, uint32_t rcv_nxt, void *src, void *dst) +{ + struct sockaddr_in *saddr4 = src; + struct sockaddr_in *daddr4 = dst; + struct sockaddr_in6 *saddr6 = src; + struct sockaddr_in6 *daddr6 = dst; + size_t i; + + if (saddr4->sin_family != daddr4->sin_family) + test_error("Different address families"); + + for (i = 0; i < nr; i++) { + if (saddr4->sin_family == AF_INET) { + icmp_interfere4(ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, + rcv_nxt, saddr4, daddr4); + icmp_interfere4(ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, + rcv_nxt, saddr4, daddr4); + icmp_interfere4(ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + rcv_nxt, saddr4, daddr4); + icmps_sent += 3; + } else if (saddr4->sin_family == AF_INET6) { + icmp6_interfere(ICMPV6_DEST_UNREACH, + ICMPV6_ADM_PROHIBITED, + rcv_nxt, saddr6, daddr6); + icmp6_interfere(ICMPV6_DEST_UNREACH, + ICMPV6_PORT_UNREACH, + rcv_nxt, saddr6, daddr6); + icmps_sent += 2; + } else { + test_error("Not ip address family"); + } + } +} + +static void send_interfered(int sk) +{ + const unsigned int timeout = TEST_TIMEOUT_SEC; + struct sockaddr_in6 src, dst; + socklen_t addr_sz; + + addr_sz = sizeof(src); + if (getsockname(sk, &src, &addr_sz)) + test_error("getsockname()"); + addr_sz = sizeof(dst); + if (getpeername(sk, &dst, &addr_sz)) + test_error("getpeername()"); + + while (1) { + uint32_t rcv_nxt; + + if (test_client_verify(sk, packet_size, packets_nr, timeout)) { + test_fail("client: connection is broken"); + return; + } + packets_sent += packets_nr; + rcv_nxt = get_rcv_nxt(sk); + icmp_interfere(packets_nr, rcv_nxt, (void *)&src, (void *)&dst); + } +} + +static void *client_fn(void *arg) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); + if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0) + test_error("failed to connect()"); + synchronize_threads(); + + send_interfered(sk); + + /* Not expecting client to quit */ + test_fail("client disconnected"); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(3, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c new file mode 100644 index 0000000000..24e62120b7 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -0,0 +1,1186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +const size_t nr_packets = 20; +const size_t msg_len = 100; +const size_t quota = nr_packets * msg_len; +union tcp_addr wrong_addr; +#define SECOND_PASSWORD "at all times sincere friends of freedom have been rare" +#define fault(type) (inj == FAULT_ ## type) + +static const int test_vrf_ifindex = 200; +static const uint8_t test_vrf_tabid = 42; +static void setup_vrfs(void) +{ + int err; + + if (!kernel_config_has(KCONFIG_NET_VRF)) + return; + + err = add_vrf("ksft-vrf", test_vrf_tabid, test_vrf_ifindex, -1); + if (err) + test_error("Failed to add a VRF: %d", err); + + err = link_set_up("ksft-vrf"); + if (err) + test_error("Failed to bring up a VRF"); + + err = ip_route_add_vrf(veth_name, TEST_FAMILY, + this_ip_addr, this_ip_dest, test_vrf_tabid); + if (err) + test_error("Failed to add a route to VRF"); +} + + +static int prepare_sk(union tcp_addr *addr, uint8_t sndid, uint8_t rcvid) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("test_add_key()"); + + if (addr && test_add_key(sk, SECOND_PASSWORD, *addr, + DEFAULT_TEST_PREFIX, sndid, rcvid)) + test_error("test_add_key()"); + + return sk; +} + +static int prepare_lsk(union tcp_addr *addr, uint8_t sndid, uint8_t rcvid) +{ + int sk = prepare_sk(addr, sndid, rcvid); + + if (listen(sk, 10)) + test_error("listen()"); + + return sk; +} + +static int test_del_key(int sk, uint8_t sndid, uint8_t rcvid, bool async, + int current_key, int rnext_key) +{ + struct tcp_ao_info_opt ao_info = {}; + struct tcp_ao_getsockopt key = {}; + struct tcp_ao_del del = {}; + sockaddr_af sockaddr; + int err; + + tcp_addr_to_sockaddr_in(&del.addr, &this_ip_dest, 0); + del.prefix = DEFAULT_TEST_PREFIX; + del.sndid = sndid; + del.rcvid = rcvid; + + if (current_key >= 0) { + del.set_current = 1; + del.current_key = (uint8_t)current_key; + } + if (rnext_key >= 0) { + del.set_rnext = 1; + del.rnext = (uint8_t)rnext_key; + } + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_DEL_KEY, &del, sizeof(del)); + if (err < 0) + return -errno; + + if (async) + return 0; + + tcp_addr_to_sockaddr_in(&sockaddr, &this_ip_dest, 0); + err = test_get_one_ao(sk, &key, &sockaddr, sizeof(sockaddr), + DEFAULT_TEST_PREFIX, sndid, rcvid); + if (!err) + return -EEXIST; + if (err != -E2BIG) + test_error("getsockopt()"); + if (current_key < 0 && rnext_key < 0) + return 0; + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + if (current_key >= 0 && ao_info.current_key != (uint8_t)current_key) + return -ENOTRECOVERABLE; + if (rnext_key >= 0 && ao_info.rnext != (uint8_t)rnext_key) + return -ENOTRECOVERABLE; + return 0; +} + +static void try_delete_key(char *tst_name, int sk, uint8_t sndid, uint8_t rcvid, + bool async, int current_key, int rnext_key, + fault_t inj) +{ + int err; + + err = test_del_key(sk, sndid, rcvid, async, current_key, rnext_key); + if ((err == -EBUSY && fault(BUSY)) || (err == -EINVAL && fault(CURRNEXT))) { + test_ok("%s: key deletion was prevented", tst_name); + return; + } + if (err && fault(FIXME)) { + test_xfail("%s: failed to delete the key %u:%u %d", + tst_name, sndid, rcvid, err); + return; + } + if (!err) { + if (fault(BUSY) || fault(CURRNEXT)) { + test_fail("%s: the key was deleted %u:%u %d", tst_name, + sndid, rcvid, err); + } else { + test_ok("%s: the key was deleted", tst_name); + } + return; + } + test_fail("%s: can't delete the key %u:%u %d", tst_name, sndid, rcvid, err); +} + +static int test_set_key(int sk, int current_keyid, int rnext_keyid) +{ + struct tcp_ao_info_opt ao_info = {}; + int err; + + if (current_keyid >= 0) { + ao_info.set_current = 1; + ao_info.current_key = (uint8_t)current_keyid; + } + if (rnext_keyid >= 0) { + ao_info.set_rnext = 1; + ao_info.rnext = (uint8_t)rnext_keyid; + } + + err = test_set_ao_info(sk, &ao_info); + if (err) + return err; + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + if (current_keyid >= 0 && ao_info.current_key != (uint8_t)current_keyid) + return -ENOTRECOVERABLE; + if (rnext_keyid >= 0 && ao_info.rnext != (uint8_t)rnext_keyid) + return -ENOTRECOVERABLE; + return 0; +} + +static int test_add_current_rnext_key(int sk, const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + bool set_current, bool set_rnext, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + err = test_prepare_key(&tmp, DEFAULT_TEST_ALGO, in_addr, + set_current, set_rnext, + prefix, 0, sndid, rcvid, 0, keyflags, + strlen(key), key); + if (err) + return err; + + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static int __try_add_current_rnext_key(int sk, const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + bool set_current, bool set_rnext, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_info_opt ao_info = {}; + int err; + + err = test_add_current_rnext_key(sk, key, keyflags, in_addr, prefix, + set_current, set_rnext, sndid, rcvid); + if (err) + return err; + + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + if (set_current && ao_info.current_key != sndid) + return -ENOTRECOVERABLE; + if (set_rnext && ao_info.rnext != rcvid) + return -ENOTRECOVERABLE; + return 0; +} + +static void try_add_current_rnext_key(char *tst_name, int sk, const char *key, + uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + bool set_current, bool set_rnext, + uint8_t sndid, uint8_t rcvid, fault_t inj) +{ + int err; + + err = __try_add_current_rnext_key(sk, key, keyflags, in_addr, prefix, + set_current, set_rnext, sndid, rcvid); + if (!err && !fault(CURRNEXT)) { + test_ok("%s", tst_name); + return; + } + if (err == -EINVAL && fault(CURRNEXT)) { + test_ok("%s", tst_name); + return; + } + test_fail("%s", tst_name); +} + +static void check_closed_socket(void) +{ + int sk; + + sk = prepare_sk(&this_ip_dest, 200, 200); + try_delete_key("closed socket, delete a key", sk, 200, 200, 0, -1, -1, 0); + try_delete_key("closed socket, delete all keys", sk, 100, 100, 0, -1, -1, 0); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_set_key(sk, 100, 200)) + test_error("failed to set current/rnext keys"); + try_delete_key("closed socket, delete current key", sk, 100, 100, 0, -1, -1, FAULT_BUSY); + try_delete_key("closed socket, delete rnext key", sk, 200, 200, 0, -1, -1, FAULT_BUSY); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_add_key(sk, "Glory to heros!", this_ip_dest, + DEFAULT_TEST_PREFIX, 10, 11)) + test_error("test_add_key()"); + if (test_add_key(sk, "Glory to Ukraine!", this_ip_dest, + DEFAULT_TEST_PREFIX, 12, 13)) + test_error("test_add_key()"); + try_delete_key("closed socket, delete a key + set current/rnext", sk, 100, 100, 0, 10, 13, 0); + try_delete_key("closed socket, force-delete current key", sk, 10, 11, 0, 200, -1, 0); + try_delete_key("closed socket, force-delete rnext key", sk, 12, 13, 0, -1, 200, 0); + try_delete_key("closed socket, delete current+rnext key", sk, 200, 200, 0, -1, -1, FAULT_BUSY); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_set_key(sk, 100, 200)) + test_error("failed to set current/rnext keys"); + try_add_current_rnext_key("closed socket, add + change current key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + true, false, 10, 20, 0); + try_add_current_rnext_key("closed socket, add + change rnext key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + false, true, 20, 10, 0); + close(sk); +} + +static void assert_no_current_rnext(const char *tst_msg, int sk) +{ + struct tcp_ao_info_opt ao_info = {}; + + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + + errno = 0; + if (ao_info.set_current || ao_info.set_rnext) { + test_xfail("%s: the socket has current/rnext keys: %d:%d", + tst_msg, + (ao_info.set_current) ? ao_info.current_key : -1, + (ao_info.set_rnext) ? ao_info.rnext : -1); + } else { + test_ok("%s: the socket has no current/rnext keys", tst_msg); + } +} + +static void assert_no_tcp_repair(void) +{ + struct tcp_ao_repair ao_img = {}; + socklen_t len = sizeof(ao_img); + int sk, err; + + sk = prepare_sk(&this_ip_dest, 200, 200); + test_enable_repair(sk); + if (listen(sk, 10)) + test_error("listen()"); + errno = 0; + err = getsockopt(sk, SOL_TCP, TCP_AO_REPAIR, &ao_img, &len); + if (err && errno == EPERM) + test_ok("listen socket, getsockopt(TCP_AO_REPAIR) is restricted"); + else + test_fail("listen socket, getsockopt(TCP_AO_REPAIR) works"); + errno = 0; + err = setsockopt(sk, SOL_TCP, TCP_AO_REPAIR, &ao_img, sizeof(ao_img)); + if (err && errno == EPERM) + test_ok("listen socket, setsockopt(TCP_AO_REPAIR) is restricted"); + else + test_fail("listen socket, setsockopt(TCP_AO_REPAIR) works"); + close(sk); +} + +static void check_listen_socket(void) +{ + int sk, err; + + sk = prepare_lsk(&this_ip_dest, 200, 200); + try_delete_key("listen socket, delete a key", sk, 200, 200, 0, -1, -1, 0); + try_delete_key("listen socket, delete all keys", sk, 100, 100, 0, -1, -1, 0); + close(sk); + + sk = prepare_lsk(&this_ip_dest, 200, 200); + err = test_set_key(sk, 100, -1); + if (err == -EINVAL) + test_ok("listen socket, setting current key not allowed"); + else + test_fail("listen socket, set current key"); + err = test_set_key(sk, -1, 200); + if (err == -EINVAL) + test_ok("listen socket, setting rnext key not allowed"); + else + test_fail("listen socket, set rnext key"); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_set_key(sk, 100, 200)) + test_error("failed to set current/rnext keys"); + if (listen(sk, 10)) + test_error("listen()"); + assert_no_current_rnext("listen() after current/rnext keys set", sk); + try_delete_key("listen socket, delete current key from before listen()", sk, 100, 100, 0, -1, -1, FAULT_FIXME); + try_delete_key("listen socket, delete rnext key from before listen()", sk, 200, 200, 0, -1, -1, FAULT_FIXME); + close(sk); + + assert_no_tcp_repair(); + + sk = prepare_lsk(&this_ip_dest, 200, 200); + if (test_add_key(sk, "Glory to heros!", this_ip_dest, + DEFAULT_TEST_PREFIX, 10, 11)) + test_error("test_add_key()"); + if (test_add_key(sk, "Glory to Ukraine!", this_ip_dest, + DEFAULT_TEST_PREFIX, 12, 13)) + test_error("test_add_key()"); + try_delete_key("listen socket, delete a key + set current/rnext", sk, + 100, 100, 0, 10, 13, FAULT_CURRNEXT); + try_delete_key("listen socket, force-delete current key", sk, + 10, 11, 0, 200, -1, FAULT_CURRNEXT); + try_delete_key("listen socket, force-delete rnext key", sk, + 12, 13, 0, -1, 200, FAULT_CURRNEXT); + try_delete_key("listen socket, delete a key", sk, + 200, 200, 0, -1, -1, 0); + close(sk); + + sk = prepare_lsk(&this_ip_dest, 200, 200); + try_add_current_rnext_key("listen socket, add + change current key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + true, false, 10, 20, FAULT_CURRNEXT); + try_add_current_rnext_key("listen socket, add + change rnext key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + false, true, 20, 10, FAULT_CURRNEXT); + close(sk); +} + +static const char *fips_fpath = "/proc/sys/crypto/fips_enabled"; +static bool is_fips_enabled(void) +{ + static int fips_checked = -1; + FILE *fenabled; + int enabled; + + if (fips_checked >= 0) + return !!fips_checked; + if (access(fips_fpath, R_OK)) { + if (errno != ENOENT) + test_error("Can't open %s", fips_fpath); + fips_checked = 0; + return false; + } + fenabled = fopen(fips_fpath, "r"); + if (!fenabled) + test_error("Can't open %s", fips_fpath); + if (fscanf(fenabled, "%d", &enabled) != 1) + test_error("Can't read from %s", fips_fpath); + fclose(fenabled); + fips_checked = !!enabled; + return !!fips_checked; +} + +struct test_key { + char password[TCP_AO_MAXKEYLEN]; + const char *alg; + unsigned int len; + uint8_t client_keyid; + uint8_t server_keyid; + uint8_t maclen; + uint8_t matches_client : 1, + matches_server : 1, + matches_vrf : 1, + is_current : 1, + is_rnext : 1, + used_on_server_tx : 1, + used_on_client_tx : 1, + skip_counters_checks : 1; +}; + +struct key_collection { + unsigned int nr_keys; + struct test_key *keys; +}; + +static struct key_collection collection; + +#define TEST_MAX_MACLEN 16 +const char *test_algos[] = { + "cmac(aes128)", + "hmac(sha1)", "hmac(sha512)", "hmac(sha384)", "hmac(sha256)", + "hmac(sha224)", "hmac(sha3-512)", + /* only if !CONFIG_FIPS */ +#define TEST_NON_FIPS_ALGOS 2 + "hmac(rmd160)", "hmac(md5)" +}; +const unsigned int test_maclens[] = { 1, 4, 12, 16 }; +#define MACLEN_SHIFT 2 +#define ALGOS_SHIFT 4 + +static unsigned int make_mask(unsigned int shift, unsigned int prev_shift) +{ + unsigned int ret = BIT(shift) - 1; + + return ret << prev_shift; +} + +static void init_key_in_collection(unsigned int index, bool randomized) +{ + struct test_key *key = &collection.keys[index]; + unsigned int algos_nr, algos_index; + + /* Same for randomized and non-randomized test flows */ + key->client_keyid = index; + key->server_keyid = 127 + index; + key->matches_client = 1; + key->matches_server = 1; + key->matches_vrf = 1; + /* not really even random, but good enough for a test */ + key->len = rand() % (TCP_AO_MAXKEYLEN - TEST_TCP_AO_MINKEYLEN); + key->len += TEST_TCP_AO_MINKEYLEN; + randomize_buffer(key->password, key->len); + + if (randomized) { + key->maclen = (rand() % TEST_MAX_MACLEN) + 1; + algos_index = rand(); + } else { + unsigned int shift = MACLEN_SHIFT; + + key->maclen = test_maclens[index & make_mask(shift, 0)]; + algos_index = index & make_mask(ALGOS_SHIFT, shift); + } + algos_nr = ARRAY_SIZE(test_algos); + if (is_fips_enabled()) + algos_nr -= TEST_NON_FIPS_ALGOS; + key->alg = test_algos[algos_index % algos_nr]; +} + +static int init_default_key_collection(unsigned int nr_keys, bool randomized) +{ + size_t key_sz = sizeof(collection.keys[0]); + + if (!nr_keys) { + free(collection.keys); + collection.keys = NULL; + return 0; + } + + /* + * All keys have uniq sndid/rcvid and sndid != rcvid in order to + * check for any bugs/issues for different keyids, visible to both + * peers. Keyid == 254 is unused. + */ + if (nr_keys > 127) + test_error("Test requires too many keys, correct the source"); + + collection.keys = reallocarray(collection.keys, nr_keys, key_sz); + if (!collection.keys) + return -ENOMEM; + + memset(collection.keys, 0, nr_keys * key_sz); + collection.nr_keys = nr_keys; + while (nr_keys--) + init_key_in_collection(nr_keys, randomized); + + return 0; +} + +static void test_key_error(const char *msg, struct test_key *key) +{ + test_error("%s: key: { %s, %u:%u, %u, %u:%u:%u:%u:%u (%u)}", + msg, key->alg, key->client_keyid, key->server_keyid, + key->maclen, key->matches_client, key->matches_server, + key->matches_vrf, key->is_current, key->is_rnext, key->len); +} + +static int test_add_key_cr(int sk, const char *pwd, unsigned int pwd_len, + union tcp_addr addr, uint8_t vrf, + uint8_t sndid, uint8_t rcvid, + uint8_t maclen, const char *alg, + bool set_current, bool set_rnext) +{ + struct tcp_ao_add tmp = {}; + uint8_t keyflags = 0; + int err; + + if (!alg) + alg = DEFAULT_TEST_ALGO; + + if (vrf) + keyflags |= TCP_AO_KEYF_IFINDEX; + err = test_prepare_key(&tmp, alg, addr, set_current, set_rnext, + DEFAULT_TEST_PREFIX, vrf, sndid, rcvid, maclen, + keyflags, pwd_len, pwd); + if (err) + return err; + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static void verify_current_rnext(const char *tst, int sk, + int current_keyid, int rnext_keyid) +{ + struct tcp_ao_info_opt ao_info = {}; + + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + + errno = 0; + if (current_keyid >= 0) { + if (!ao_info.set_current) + test_fail("%s: the socket doesn't have current key", tst); + else if (ao_info.current_key != current_keyid) + test_fail("%s: current key is not the expected one %d != %u", + tst, current_keyid, ao_info.current_key); + else + test_ok("%s: current key %u as expected", + tst, ao_info.current_key); + } + if (rnext_keyid >= 0) { + if (!ao_info.set_rnext) + test_fail("%s: the socket doesn't have rnext key", tst); + else if (ao_info.rnext != rnext_keyid) + test_fail("%s: rnext key is not the expected one %d != %u", + tst, rnext_keyid, ao_info.rnext); + else + test_ok("%s: rnext key %u as expected", tst, ao_info.rnext); + } +} + + +static int key_collection_socket(bool server, unsigned int port) +{ + unsigned int i; + int sk; + + if (server) + sk = test_listen_socket(this_ip_addr, port, 1); + else + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + for (i = 0; i < collection.nr_keys; i++) { + struct test_key *key = &collection.keys[i]; + union tcp_addr *addr = &wrong_addr; + uint8_t sndid, rcvid, vrf; + bool set_current = false, set_rnext = false; + + if (key->matches_vrf) + vrf = 0; + else + vrf = test_vrf_ifindex; + if (server) { + if (key->matches_client) + addr = &this_ip_dest; + sndid = key->server_keyid; + rcvid = key->client_keyid; + } else { + if (key->matches_server) + addr = &this_ip_dest; + sndid = key->client_keyid; + rcvid = key->server_keyid; + key->used_on_client_tx = set_current = key->is_current; + key->used_on_server_tx = set_rnext = key->is_rnext; + } + + if (test_add_key_cr(sk, key->password, key->len, + *addr, vrf, sndid, rcvid, key->maclen, + key->alg, set_current, set_rnext)) + test_key_error("setsockopt(TCP_AO_ADD_KEY)", key); +#ifdef DEBUG + test_print("%s [%u/%u] key: { %s, %u:%u, %u, %u:%u:%u:%u (%u)}", + server ? "server" : "client", i, collection.nr_keys, + key->alg, rcvid, sndid, key->maclen, + key->matches_client, key->matches_server, + key->is_current, key->is_rnext, key->len); +#endif + } + return sk; +} + +static void verify_counters(const char *tst_name, bool is_listen_sk, bool server, + struct tcp_ao_counters *a, struct tcp_ao_counters *b) +{ + unsigned int i; + + __test_tcp_ao_counters_cmp(tst_name, a, b, TEST_CNT_GOOD); + + for (i = 0; i < collection.nr_keys; i++) { + struct test_key *key = &collection.keys[i]; + uint8_t sndid, rcvid; + bool rx_cnt_expected; + + if (key->skip_counters_checks) + continue; + if (server) { + sndid = key->server_keyid; + rcvid = key->client_keyid; + rx_cnt_expected = key->used_on_client_tx; + } else { + sndid = key->client_keyid; + rcvid = key->server_keyid; + rx_cnt_expected = key->used_on_server_tx; + } + + test_tcp_ao_key_counters_cmp(tst_name, a, b, + rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0, + sndid, rcvid); + } + test_tcp_ao_counters_free(a); + test_tcp_ao_counters_free(b); + test_ok("%s: passed counters checks", tst_name); +} + +static struct tcp_ao_getsockopt *lookup_key(struct tcp_ao_getsockopt *buf, + size_t len, int sndid, int rcvid) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (sndid >= 0 && buf[i].sndid != sndid) + continue; + if (rcvid >= 0 && buf[i].rcvid != rcvid) + continue; + return &buf[i]; + } + return NULL; +} + +static void verify_keys(const char *tst_name, int sk, + bool is_listen_sk, bool server) +{ + socklen_t len = sizeof(struct tcp_ao_getsockopt); + struct tcp_ao_getsockopt *keys; + bool passed_test = true; + unsigned int i; + + keys = calloc(collection.nr_keys, len); + if (!keys) + test_error("calloc()"); + + keys->nkeys = collection.nr_keys; + keys->get_all = 1; + + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, keys, &len)) { + free(keys); + test_error("getsockopt(TCP_AO_GET_KEYS)"); + } + + for (i = 0; i < collection.nr_keys; i++) { + struct test_key *key = &collection.keys[i]; + struct tcp_ao_getsockopt *dump_key; + bool is_kdf_aes_128_cmac = false; + bool is_cmac_aes = false; + uint8_t sndid, rcvid; + bool matches = false; + + if (server) { + if (key->matches_client) + matches = true; + sndid = key->server_keyid; + rcvid = key->client_keyid; + } else { + if (key->matches_server) + matches = true; + sndid = key->client_keyid; + rcvid = key->server_keyid; + } + if (!key->matches_vrf) + matches = false; + /* no keys get removed on the original listener socket */ + if (is_listen_sk) + matches = true; + + dump_key = lookup_key(keys, keys->nkeys, sndid, rcvid); + if (matches != !!dump_key) { + test_fail("%s: key %u:%u %s%s on the socket", + tst_name, sndid, rcvid, + key->matches_vrf ? "" : "[vrf] ", + matches ? "disappeared" : "yet present"); + passed_test = false; + goto out; + } + if (!dump_key) + continue; + + if (!strcmp("cmac(aes128)", key->alg)) { + is_kdf_aes_128_cmac = (key->len != 16); + is_cmac_aes = true; + } + + if (is_cmac_aes) { + if (strcmp(dump_key->alg_name, "cmac(aes)")) { + test_fail("%s: key %u:%u cmac(aes) has unexpected alg %s", + tst_name, sndid, rcvid, + dump_key->alg_name); + passed_test = false; + continue; + } + } else if (strcmp(dump_key->alg_name, key->alg)) { + test_fail("%s: key %u:%u has unexpected alg %s != %s", + tst_name, sndid, rcvid, + dump_key->alg_name, key->alg); + passed_test = false; + continue; + } + if (is_kdf_aes_128_cmac) { + if (dump_key->keylen != 16) { + test_fail("%s: key %u:%u cmac(aes128) has unexpected len %u", + tst_name, sndid, rcvid, + dump_key->keylen); + continue; + } + } else if (dump_key->keylen != key->len) { + test_fail("%s: key %u:%u changed password len %u != %u", + tst_name, sndid, rcvid, + dump_key->keylen, key->len); + passed_test = false; + continue; + } + if (!is_kdf_aes_128_cmac && + memcmp(dump_key->key, key->password, key->len)) { + test_fail("%s: key %u:%u has different password", + tst_name, sndid, rcvid); + passed_test = false; + continue; + } + if (dump_key->maclen != key->maclen) { + test_fail("%s: key %u:%u changed maclen %u != %u", + tst_name, sndid, rcvid, + dump_key->maclen, key->maclen); + passed_test = false; + continue; + } + } + + if (passed_test) + test_ok("%s: The socket keys are consistent with the expectations", + tst_name); +out: + free(keys); +} + +static int start_server(const char *tst_name, unsigned int port, size_t quota, + struct tcp_ao_counters *begin, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters lsk_c1, lsk_c2; + ssize_t bytes; + int sk, lsk; + + synchronize_threads(); /* 1: key collection initialized */ + lsk = key_collection_socket(true, port); + if (test_get_tcp_ao_counters(lsk, &lsk_c1)) + test_error("test_get_tcp_ao_counters()"); + synchronize_threads(); /* 2: MKTs added => connect() */ + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + if (test_get_tcp_ao_counters(sk, begin)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 3: accepted => send data */ + if (test_get_tcp_ao_counters(lsk, &lsk_c2)) + test_error("test_get_tcp_ao_counters()"); + verify_keys(tst_name, lsk, true, true); + close(lsk); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) + test_fail("%s: server served: %zd", tst_name, bytes); + else + test_ok("%s: server alive", tst_name); + + verify_counters(tst_name, true, true, &lsk_c1, &lsk_c2); + + return sk; +} + +static void end_server(const char *tst_name, int sk, + struct tcp_ao_counters *begin) +{ + struct tcp_ao_counters end; + + if (test_get_tcp_ao_counters(sk, &end)) + test_error("test_get_tcp_ao_counters()"); + verify_keys(tst_name, sk, false, true); + + synchronize_threads(); /* 4: verified => closed */ + close(sk); + + verify_counters(tst_name, false, true, begin, &end); + synchronize_threads(); /* 5: counters */ +} + +static void try_server_run(const char *tst_name, unsigned int port, size_t quota, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters tmp; + int sk; + + sk = start_server(tst_name, port, quota, &tmp, + current_index, rnext_index); + end_server(tst_name, sk, &tmp); +} + +static void server_rotations(const char *tst_name, unsigned int port, + size_t quota, unsigned int rotations, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters tmp; + unsigned int i; + int sk; + + sk = start_server(tst_name, port, quota, &tmp, + current_index, rnext_index); + + for (i = current_index + 1; rotations > 0; i++, rotations--) { + ssize_t bytes; + + if (i >= collection.nr_keys) + i = 0; + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + test_fail("%s: server served: %zd", tst_name, bytes); + return; + } + verify_current_rnext(tst_name, sk, + collection.keys[i].server_keyid, -1); + synchronize_threads(); /* verify current/rnext */ + } + end_server(tst_name, sk, &tmp); +} + +static int run_client(const char *tst_name, unsigned int port, + unsigned int nr_keys, int current_index, int rnext_index, + struct tcp_ao_counters *before, + const size_t msg_sz, const size_t msg_nr) +{ + int sk; + + synchronize_threads(); /* 1: key collection initialized */ + sk = key_collection_socket(false, port); + + if (current_index >= 0 || rnext_index >= 0) { + int sndid = -1, rcvid = -1; + + if (current_index >= 0) + sndid = collection.keys[current_index].client_keyid; + if (rnext_index >= 0) + rcvid = collection.keys[rnext_index].server_keyid; + if (test_set_key(sk, sndid, rcvid)) + test_error("failed to set current/rnext keys"); + } + if (before && test_get_tcp_ao_counters(sk, before)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 2: MKTs added => connect() */ + if (test_connect_socket(sk, this_ip_dest, port++) <= 0) + test_error("failed to connect()"); + if (current_index < 0) + current_index = nr_keys - 1; + if (rnext_index < 0) + rnext_index = nr_keys - 1; + collection.keys[current_index].used_on_client_tx = 1; + collection.keys[rnext_index].used_on_server_tx = 1; + + synchronize_threads(); /* 3: accepted => send data */ + if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) { + test_fail("verify failed"); + close(sk); + if (before) + test_tcp_ao_counters_free(before); + return -1; + } + + return sk; +} + +static int start_client(const char *tst_name, unsigned int port, + unsigned int nr_keys, int current_index, int rnext_index, + struct tcp_ao_counters *before, + const size_t msg_sz, const size_t msg_nr) +{ + if (init_default_key_collection(nr_keys, true)) + test_error("Failed to init the key collection"); + + return run_client(tst_name, port, nr_keys, current_index, + rnext_index, before, msg_sz, msg_nr); +} + +static void end_client(const char *tst_name, int sk, unsigned int nr_keys, + int current_index, int rnext_index, + struct tcp_ao_counters *start) +{ + struct tcp_ao_counters end; + + /* Some application may become dependent on this kernel choice */ + if (current_index < 0) + current_index = nr_keys - 1; + if (rnext_index < 0) + rnext_index = nr_keys - 1; + verify_current_rnext(tst_name, sk, + collection.keys[current_index].client_keyid, + collection.keys[rnext_index].server_keyid); + if (start && test_get_tcp_ao_counters(sk, &end)) + test_error("test_get_tcp_ao_counters()"); + verify_keys(tst_name, sk, false, false); + synchronize_threads(); /* 4: verify => closed */ + close(sk); + if (start) + verify_counters(tst_name, false, false, start, &end); + synchronize_threads(); /* 5: counters */ +} + +static void try_unmatched_keys(int sk, int *rnext_index) +{ + struct test_key *key; + unsigned int i = 0; + int err; + + do { + key = &collection.keys[i]; + if (!key->matches_server) + break; + } while (++i < collection.nr_keys); + if (key->matches_server) + test_error("all keys on client match the server"); + + err = test_add_key_cr(sk, key->password, key->len, wrong_addr, + 0, key->client_keyid, key->server_keyid, + key->maclen, key->alg, 0, 0); + if (!err) { + test_fail("Added a key with non-matching ip-address for established sk"); + return; + } + if (err == -EINVAL) + test_ok("Can't add a key with non-matching ip-address for established sk"); + else + test_error("Failed to add a key"); + + err = test_add_key_cr(sk, key->password, key->len, this_ip_dest, + test_vrf_ifindex, + key->client_keyid, key->server_keyid, + key->maclen, key->alg, 0, 0); + if (!err) { + test_fail("Added a key with non-matching VRF for established sk"); + return; + } + if (err == -EINVAL) + test_ok("Can't add a key with non-matching VRF for established sk"); + else + test_error("Failed to add a key"); + + for (i = 0; i < collection.nr_keys; i++) { + key = &collection.keys[i]; + if (!key->matches_client) + break; + } + if (key->matches_client) + test_error("all keys on server match the client"); + if (test_set_key(sk, -1, key->server_keyid)) + test_error("Can't change the current key"); + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("verify failed"); + *rnext_index = i; +} + +static int client_non_matching(const char *tst_name, unsigned int port, + unsigned int nr_keys, + int current_index, int rnext_index, + const size_t msg_sz, const size_t msg_nr) +{ + unsigned int i; + + if (init_default_key_collection(nr_keys, true)) + test_error("Failed to init the key collection"); + + for (i = 0; i < nr_keys; i++) { + /* key (0, 0) matches */ + collection.keys[i].matches_client = !!((i + 3) % 4); + collection.keys[i].matches_server = !!((i + 2) % 4); + if (kernel_config_has(KCONFIG_NET_VRF)) + collection.keys[i].matches_vrf = !!((i + 1) % 4); + } + + return run_client(tst_name, port, nr_keys, current_index, + rnext_index, NULL, msg_sz, msg_nr); +} + +static void check_current_back(const char *tst_name, unsigned int port, + unsigned int nr_keys, + unsigned int current_index, unsigned int rnext_index, + unsigned int rotate_to_index) +{ + struct tcp_ao_counters tmp; + int sk; + + sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, + &tmp, msg_len, nr_packets); + if (sk < 0) + return; + if (test_set_key(sk, collection.keys[rotate_to_index].client_keyid, -1)) + test_error("Can't change the current key"); + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("verify failed"); + /* There is a race here: between setting the current_key with + * setsockopt(TCP_AO_INFO) and starting to send some data - there + * might have been a segment received with the desired + * RNext_key set. In turn that would mean that the first outgoing + * segment will have the desired current_key (flipped back). + * Which is what the user/test wants. As it's racy, skip checking + * the counters, yet check what are the resulting current/rnext + * keys on both sides. + */ + collection.keys[rotate_to_index].skip_counters_checks = 1; + + end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); +} + +static void roll_over_keys(const char *tst_name, unsigned int port, + unsigned int nr_keys, unsigned int rotations, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters tmp; + unsigned int i; + int sk; + + sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, + &tmp, msg_len, nr_packets); + if (sk < 0) + return; + for (i = rnext_index + 1; rotations > 0; i++, rotations--) { + if (i >= collection.nr_keys) + i = 0; + if (test_set_key(sk, -1, collection.keys[i].server_keyid)) + test_error("Can't change the Rnext key"); + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("verify failed"); + close(sk); + test_tcp_ao_counters_free(&tmp); + return; + } + verify_current_rnext(tst_name, sk, -1, + collection.keys[i].server_keyid); + collection.keys[i].used_on_server_tx = 1; + synchronize_threads(); /* verify current/rnext */ + } + end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); +} + +static void try_client_run(const char *tst_name, unsigned int port, + unsigned int nr_keys, int current_index, int rnext_index) +{ + struct tcp_ao_counters tmp; + int sk; + + sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, + &tmp, msg_len, nr_packets); + if (sk < 0) + return; + end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); +} + +static void try_client_match(const char *tst_name, unsigned int port, + unsigned int nr_keys, + int current_index, int rnext_index) +{ + int sk; + + sk = client_non_matching(tst_name, port, nr_keys, current_index, + rnext_index, msg_len, nr_packets); + if (sk < 0) + return; + try_unmatched_keys(sk, &rnext_index); + end_client(tst_name, sk, nr_keys, current_index, rnext_index, NULL); +} + +static void *server_fn(void *arg) +{ + unsigned int port = test_server_port; + + setup_vrfs(); + try_server_run("server: Check current/rnext keys unset before connect()", + port++, quota, 19, 19); + try_server_run("server: Check current/rnext keys set before connect()", + port++, quota, 10, 10); + try_server_run("server: Check current != rnext keys set before connect()", + port++, quota, 5, 10); + try_server_run("server: Check current flapping back on peer's RnextKey request", + port++, quota * 2, 5, 10); + server_rotations("server: Rotate over all different keys", port++, + quota, 20, 0, 0); + try_server_run("server: Check accept() => established key matching", + port++, quota * 2, 0, 0); + + synchronize_threads(); /* don't race to exit: client exits */ + return NULL; +} + +static void check_established_socket(void) +{ + unsigned int port = test_server_port; + + setup_vrfs(); + try_client_run("client: Check current/rnext keys unset before connect()", + port++, 20, -1, -1); + try_client_run("client: Check current/rnext keys set before connect()", + port++, 20, 10, 10); + try_client_run("client: Check current != rnext keys set before connect()", + port++, 20, 10, 5); + check_current_back("client: Check current flapping back on peer's RnextKey request", + port++, 20, 10, 5, 2); + roll_over_keys("client: Rotate over all different keys", port++, + 20, 20, 0, 0); + try_client_match("client: Check connect() => established key matching", + port++, 20, 0, 0); +} + +static void *client_fn(void *arg) +{ + if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) + test_error("Can't convert ip address %s", TEST_WRONG_IP); + check_closed_socket(); + check_listen_socket(); + check_established_socket(); + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(120, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h new file mode 100644 index 0000000000..fbc7f61118 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h @@ -0,0 +1,605 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * TCP-AO selftest library. Provides helpers to unshare network + * namespaces, create veth, assign ip addresses, set routes, + * manipulate socket options, read network counter and etc. + * Author: Dmitry Safonov <dima@arista.com> + */ +#ifndef _AOLIB_H_ +#define _AOLIB_H_ + +#include <arpa/inet.h> +#include <errno.h> +#include <linux/snmp.h> +#include <linux/tcp.h> +#include <netinet/in.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "../../../../../include/linux/stringify.h" +#include "../../../../../include/linux/bits.h" + +#ifndef SOL_TCP +/* can't include <netinet/tcp.h> as including <linux/tcp.h> */ +# define SOL_TCP 6 /* TCP level */ +#endif + +/* Working around ksft, see the comment in lib/setup.c */ +extern void __test_msg(const char *buf); +extern void __test_ok(const char *buf); +extern void __test_fail(const char *buf); +extern void __test_xfail(const char *buf); +extern void __test_error(const char *buf); +extern void __test_skip(const char *buf); + +__attribute__((__format__(__printf__, 2, 3))) +static inline void __test_print(void (*fn)(const char *), const char *fmt, ...) +{ +#define TEST_MSG_BUFFER_SIZE 4096 + char buf[TEST_MSG_BUFFER_SIZE]; + va_list arg; + + va_start(arg, fmt); + vsnprintf(buf, sizeof(buf), fmt, arg); + va_end(arg); + fn(buf); +} + +#define test_print(fmt, ...) \ + __test_print(__test_msg, "%ld[%s:%u] " fmt "\n", \ + syscall(SYS_gettid), \ + __FILE__, __LINE__, ##__VA_ARGS__) + +#define test_ok(fmt, ...) \ + __test_print(__test_ok, fmt "\n", ##__VA_ARGS__) +#define test_skip(fmt, ...) \ + __test_print(__test_skip, fmt "\n", ##__VA_ARGS__) +#define test_xfail(fmt, ...) \ + __test_print(__test_xfail, fmt "\n", ##__VA_ARGS__) + +#define test_fail(fmt, ...) \ +do { \ + if (errno) \ + __test_print(__test_fail, fmt ": %m\n", ##__VA_ARGS__); \ + else \ + __test_print(__test_fail, fmt "\n", ##__VA_ARGS__); \ + test_failed(); \ +} while (0) + +#define KSFT_FAIL 1 +#define test_error(fmt, ...) \ +do { \ + if (errno) \ + __test_print(__test_error, "%ld[%s:%u] " fmt ": %m\n", \ + syscall(SYS_gettid), __FILE__, __LINE__, \ + ##__VA_ARGS__); \ + else \ + __test_print(__test_error, "%ld[%s:%u] " fmt "\n", \ + syscall(SYS_gettid), __FILE__, __LINE__, \ + ##__VA_ARGS__); \ + exit(KSFT_FAIL); \ +} while (0) + +enum test_fault { + FAULT_TIMEOUT = 1, + FAULT_KEYREJECT, + FAULT_PREINSTALL_AO, + FAULT_PREINSTALL_MD5, + FAULT_POSTINSTALL, + FAULT_BUSY, + FAULT_CURRNEXT, + FAULT_FIXME, +}; +typedef enum test_fault fault_t; + +enum test_needs_kconfig { + KCONFIG_NET_NS = 0, /* required */ + KCONFIG_VETH, /* required */ + KCONFIG_TCP_AO, /* required */ + KCONFIG_TCP_MD5, /* optional, for TCP-MD5 features */ + KCONFIG_NET_VRF, /* optional, for L3/VRF testing */ + __KCONFIG_LAST__ +}; +extern bool kernel_config_has(enum test_needs_kconfig k); +extern const char *tests_skip_reason[__KCONFIG_LAST__]; +static inline bool should_skip_test(const char *tst_name, + enum test_needs_kconfig k) +{ + if (kernel_config_has(k)) + return false; + test_skip("%s: %s", tst_name, tests_skip_reason[k]); + return true; +} + +union tcp_addr { + struct in_addr a4; + struct in6_addr a6; +}; + +typedef void *(*thread_fn)(void *); +extern void test_failed(void); +extern void __test_init(unsigned int ntests, int family, unsigned int prefix, + union tcp_addr addr1, union tcp_addr addr2, + thread_fn peer1, thread_fn peer2); + +static inline void test_init2(unsigned int ntests, + thread_fn peer1, thread_fn peer2, + int family, unsigned int prefix, + const char *addr1, const char *addr2) +{ + union tcp_addr taddr1, taddr2; + + if (inet_pton(family, addr1, &taddr1) != 1) + test_error("Can't convert ip address %s", addr1); + if (inet_pton(family, addr2, &taddr2) != 1) + test_error("Can't convert ip address %s", addr2); + + __test_init(ntests, family, prefix, taddr1, taddr2, peer1, peer2); +} +extern void test_add_destructor(void (*d)(void)); + +/* To adjust optmem socket limit, approximately estimate a number, + * that is bigger than sizeof(struct tcp_ao_key). + */ +#define KERNEL_TCP_AO_KEY_SZ_ROUND_UP 300 + +extern void test_set_optmem(size_t value); +extern size_t test_get_optmem(void); + +extern const struct sockaddr_in6 addr_any6; +extern const struct sockaddr_in addr_any4; + +#ifdef IPV6_TEST +# define __TEST_CLIENT_IP(n) ("2001:db8:" __stringify(n) "::1") +# define TEST_CLIENT_IP __TEST_CLIENT_IP(1) +# define TEST_WRONG_IP "2001:db8:253::1" +# define TEST_SERVER_IP "2001:db8:254::1" +# define TEST_NETWORK "2001::" +# define TEST_PREFIX 128 +# define TEST_FAMILY AF_INET6 +# define SOCKADDR_ANY addr_any6 +# define sockaddr_af struct sockaddr_in6 +#else +# define __TEST_CLIENT_IP(n) ("10.0." __stringify(n) ".1") +# define TEST_CLIENT_IP __TEST_CLIENT_IP(1) +# define TEST_WRONG_IP "10.0.253.1" +# define TEST_SERVER_IP "10.0.254.1" +# define TEST_NETWORK "10.0.0.0" +# define TEST_PREFIX 32 +# define TEST_FAMILY AF_INET +# define SOCKADDR_ANY addr_any4 +# define sockaddr_af struct sockaddr_in +#endif + +static inline union tcp_addr gen_tcp_addr(union tcp_addr net, size_t n) +{ + union tcp_addr ret = net; + +#ifdef IPV6_TEST + ret.a6.s6_addr32[3] = htonl(n & (BIT(32) - 1)); + ret.a6.s6_addr32[2] = htonl((n >> 32) & (BIT(32) - 1)); +#else + ret.a4.s_addr = htonl(ntohl(net.a4.s_addr) + n); +#endif + + return ret; +} + +static inline void tcp_addr_to_sockaddr_in(void *dest, + const union tcp_addr *src, + unsigned int port) +{ + sockaddr_af *out = dest; + + memset(out, 0, sizeof(*out)); +#ifdef IPV6_TEST + out->sin6_family = AF_INET6; + out->sin6_port = port; + out->sin6_addr = src->a6; +#else + out->sin_family = AF_INET; + out->sin_port = port; + out->sin_addr = src->a4; +#endif +} + +static inline void test_init(unsigned int ntests, + thread_fn peer1, thread_fn peer2) +{ + test_init2(ntests, peer1, peer2, TEST_FAMILY, TEST_PREFIX, + TEST_SERVER_IP, TEST_CLIENT_IP); +} +extern void synchronize_threads(void); +extern void switch_ns(int fd); + +extern __thread union tcp_addr this_ip_addr; +extern __thread union tcp_addr this_ip_dest; +extern int test_family; + +extern void randomize_buffer(void *buf, size_t buflen); +extern int open_netns(void); +extern int unshare_open_netns(void); +extern const char veth_name[]; +extern int add_veth(const char *name, int nsfda, int nsfdb); +extern int add_vrf(const char *name, uint32_t tabid, int ifindex, int nsfd); +extern int ip_addr_add(const char *intf, int family, + union tcp_addr addr, uint8_t prefix); +extern int ip_route_add(const char *intf, int family, + union tcp_addr src, union tcp_addr dst); +extern int ip_route_add_vrf(const char *intf, int family, + union tcp_addr src, union tcp_addr dst, + uint8_t vrf); +extern int link_set_up(const char *intf); + +extern const unsigned int test_server_port; +extern int test_wait_fd(int sk, time_t sec, bool write); +extern int __test_connect_socket(int sk, const char *device, + void *addr, size_t addr_sz, time_t timeout); +extern int __test_listen_socket(int backlog, void *addr, size_t addr_sz); + +static inline int test_listen_socket(const union tcp_addr taddr, + unsigned int port, int backlog) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); + return __test_listen_socket(backlog, (void *)&addr, sizeof(addr)); +} + +/* + * In order for selftests to work under CONFIG_CRYPTO_FIPS=y, + * the password should be loger than 14 bytes, see hmac_setkey() + */ +#define TEST_TCP_AO_MINKEYLEN 14 +#define DEFAULT_TEST_PASSWORD "In this hour, I do not believe that any darkness will endure." + +#ifndef DEFAULT_TEST_ALGO +#define DEFAULT_TEST_ALGO "cmac(aes128)" +#endif + +#ifdef IPV6_TEST +#define DEFAULT_TEST_PREFIX 128 +#else +#define DEFAULT_TEST_PREFIX 32 +#endif + +/* + * Timeout on syscalls where failure is not expected. + * You may want to rise it if the test machine is very busy. + */ +#ifndef TEST_TIMEOUT_SEC +#define TEST_TIMEOUT_SEC 5 +#endif + +/* + * Timeout on connect() where a failure is expected. + * If set to 0 - kernel will try to retransmit SYN number of times, set in + * /proc/sys/net/ipv4/tcp_syn_retries + * By default set to 1 to make tests pass faster on non-busy machine. + */ +#ifndef TEST_RETRANSMIT_SEC +#define TEST_RETRANSMIT_SEC 1 +#endif + +static inline int _test_connect_socket(int sk, const union tcp_addr taddr, + unsigned int port, time_t timeout) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); + return __test_connect_socket(sk, veth_name, + (void *)&addr, sizeof(addr), timeout); +} + +static inline int test_connect_socket(int sk, const union tcp_addr taddr, + unsigned int port) +{ + return _test_connect_socket(sk, taddr, port, TEST_TIMEOUT_SEC); +} + +extern int __test_set_md5(int sk, void *addr, size_t addr_sz, + uint8_t prefix, int vrf, const char *password); +static inline int test_set_md5(int sk, const union tcp_addr in_addr, + uint8_t prefix, int vrf, const char *password) +{ + sockaddr_af addr; + + if (prefix > DEFAULT_TEST_PREFIX) + prefix = DEFAULT_TEST_PREFIX; + + tcp_addr_to_sockaddr_in(&addr, &in_addr, 0); + return __test_set_md5(sk, (void *)&addr, sizeof(addr), + prefix, vrf, password); +} + +extern int test_prepare_key_sockaddr(struct tcp_ao_add *ao, const char *alg, + void *addr, size_t addr_sz, bool set_current, bool set_rnext, + uint8_t prefix, uint8_t vrf, + uint8_t sndid, uint8_t rcvid, uint8_t maclen, + uint8_t keyflags, uint8_t keylen, const char *key); + +static inline int test_prepare_key(struct tcp_ao_add *ao, + const char *alg, union tcp_addr taddr, + bool set_current, bool set_rnext, + uint8_t prefix, uint8_t vrf, + uint8_t sndid, uint8_t rcvid, uint8_t maclen, + uint8_t keyflags, uint8_t keylen, const char *key) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, 0); + return test_prepare_key_sockaddr(ao, alg, (void *)&addr, sizeof(addr), + set_current, set_rnext, prefix, vrf, sndid, rcvid, + maclen, keyflags, keylen, key); +} + +static inline int test_prepare_def_key(struct tcp_ao_add *ao, + const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, uint8_t vrf, + uint8_t sndid, uint8_t rcvid) +{ + if (prefix > DEFAULT_TEST_PREFIX) + prefix = DEFAULT_TEST_PREFIX; + + return test_prepare_key(ao, DEFAULT_TEST_ALGO, in_addr, false, false, + prefix, vrf, sndid, rcvid, 0, keyflags, + strlen(key), key); +} + +extern int test_get_one_ao(int sk, struct tcp_ao_getsockopt *out, + void *addr, size_t addr_sz, + uint8_t prefix, uint8_t sndid, uint8_t rcvid); +extern int test_get_ao_info(int sk, struct tcp_ao_info_opt *out); +extern int test_set_ao_info(int sk, struct tcp_ao_info_opt *in); +extern int test_cmp_getsockopt_setsockopt(const struct tcp_ao_add *a, + const struct tcp_ao_getsockopt *b); +extern int test_cmp_getsockopt_setsockopt_ao(const struct tcp_ao_info_opt *a, + const struct tcp_ao_info_opt *b); + +static inline int test_verify_socket_key(int sk, struct tcp_ao_add *key) +{ + struct tcp_ao_getsockopt key2 = {}; + int err; + + err = test_get_one_ao(sk, &key2, &key->addr, sizeof(key->addr), + key->prefix, key->sndid, key->rcvid); + if (err) + return err; + + return test_cmp_getsockopt_setsockopt(key, &key2); +} + +static inline int test_add_key_vrf(int sk, + const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + uint8_t vrf, uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + err = test_prepare_def_key(&tmp, key, keyflags, in_addr, prefix, + vrf, sndid, rcvid); + if (err) + return err; + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static inline int test_add_key(int sk, const char *key, + union tcp_addr in_addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + return test_add_key_vrf(sk, key, 0, in_addr, prefix, 0, sndid, rcvid); +} + +static inline int test_verify_socket_ao(int sk, struct tcp_ao_info_opt *ao) +{ + struct tcp_ao_info_opt ao2 = {}; + int err; + + err = test_get_ao_info(sk, &ao2); + if (err) + return err; + + return test_cmp_getsockopt_setsockopt_ao(ao, &ao2); +} + +static inline int test_set_ao_flags(int sk, bool ao_required, bool accept_icmps) +{ + struct tcp_ao_info_opt ao = {}; + int err; + + err = test_get_ao_info(sk, &ao); + /* Maybe ao_info wasn't allocated yet */ + if (err && err != -ENOENT) + return err; + + ao.ao_required = !!ao_required; + ao.accept_icmps = !!accept_icmps; + err = test_set_ao_info(sk, &ao); + if (err) + return err; + + return test_verify_socket_ao(sk, &ao); +} + +extern ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec); +extern ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, + const size_t msg_len, time_t timeout_sec); +extern int test_client_verify(int sk, const size_t msg_len, const size_t nr, + time_t timeout_sec); + +struct tcp_ao_key_counters { + uint8_t sndid; + uint8_t rcvid; + uint64_t pkt_good; + uint64_t pkt_bad; +}; + +struct tcp_ao_counters { + /* per-netns */ + uint64_t netns_ao_good; + uint64_t netns_ao_bad; + uint64_t netns_ao_key_not_found; + uint64_t netns_ao_required; + uint64_t netns_ao_dropped_icmp; + /* per-socket */ + uint64_t ao_info_pkt_good; + uint64_t ao_info_pkt_bad; + uint64_t ao_info_pkt_key_not_found; + uint64_t ao_info_pkt_ao_required; + uint64_t ao_info_pkt_dropped_icmp; + /* per-key */ + size_t nr_keys; + struct tcp_ao_key_counters *key_cnts; +}; +extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out); + +#define TEST_CNT_KEY_GOOD BIT(0) +#define TEST_CNT_KEY_BAD BIT(1) +#define TEST_CNT_SOCK_GOOD BIT(2) +#define TEST_CNT_SOCK_BAD BIT(3) +#define TEST_CNT_SOCK_KEY_NOT_FOUND BIT(4) +#define TEST_CNT_SOCK_AO_REQUIRED BIT(5) +#define TEST_CNT_SOCK_DROPPED_ICMP BIT(6) +#define TEST_CNT_NS_GOOD BIT(7) +#define TEST_CNT_NS_BAD BIT(8) +#define TEST_CNT_NS_KEY_NOT_FOUND BIT(9) +#define TEST_CNT_NS_AO_REQUIRED BIT(10) +#define TEST_CNT_NS_DROPPED_ICMP BIT(11) +typedef uint16_t test_cnt; + +#define TEST_CNT_AO_GOOD (TEST_CNT_SOCK_GOOD | TEST_CNT_NS_GOOD) +#define TEST_CNT_AO_BAD (TEST_CNT_SOCK_BAD | TEST_CNT_NS_BAD) +#define TEST_CNT_AO_KEY_NOT_FOUND (TEST_CNT_SOCK_KEY_NOT_FOUND | \ + TEST_CNT_NS_KEY_NOT_FOUND) +#define TEST_CNT_AO_REQUIRED (TEST_CNT_SOCK_AO_REQUIRED | \ + TEST_CNT_NS_AO_REQUIRED) +#define TEST_CNT_AO_DROPPED_ICMP (TEST_CNT_SOCK_DROPPED_ICMP | \ + TEST_CNT_NS_DROPPED_ICMP) +#define TEST_CNT_GOOD (TEST_CNT_KEY_GOOD | TEST_CNT_AO_GOOD) +#define TEST_CNT_BAD (TEST_CNT_KEY_BAD | TEST_CNT_AO_BAD) + +extern int __test_tcp_ao_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, struct tcp_ao_counters *after, + test_cnt expected); +extern int test_tcp_ao_key_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, struct tcp_ao_counters *after, + test_cnt expected, int sndid, int rcvid); +extern void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts); +/* + * Frees buffers allocated in test_get_tcp_ao_counters(). + * The function doesn't expect new keys or keys removed between calls + * to test_get_tcp_ao_counters(). Check key counters manually if they + * may change. + */ +static inline int test_tcp_ao_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected) +{ + int ret; + + ret = __test_tcp_ao_counters_cmp(tst_name, before, after, expected); + if (ret) + goto out; + ret = test_tcp_ao_key_counters_cmp(tst_name, before, after, + expected, -1, -1); +out: + test_tcp_ao_counters_free(before); + test_tcp_ao_counters_free(after); + return ret; +} + +struct netstat; +extern struct netstat *netstat_read(void); +extern void netstat_free(struct netstat *ns); +extern void netstat_print_diff(struct netstat *nsa, struct netstat *nsb); +extern uint64_t netstat_get(struct netstat *ns, + const char *name, bool *not_found); + +static inline uint64_t netstat_get_one(const char *name, bool *not_found) +{ + struct netstat *ns = netstat_read(); + uint64_t ret; + + ret = netstat_get(ns, name, not_found); + + netstat_free(ns); + return ret; +} + +struct tcp_sock_queue { + uint32_t seq; + void *buf; +}; + +struct tcp_sock_state { + struct tcp_info info; + struct tcp_repair_window trw; + struct tcp_sock_queue out; + int outq_len; /* output queue size (not sent + not acked) */ + int outq_nsd_len; /* output queue size (not sent only) */ + struct tcp_sock_queue in; + int inq_len; + int mss; + int timestamp; +}; + +extern void __test_sock_checkpoint(int sk, struct tcp_sock_state *state, + void *addr, size_t addr_size); +static inline void test_sock_checkpoint(int sk, struct tcp_sock_state *state, + sockaddr_af *saddr) +{ + __test_sock_checkpoint(sk, state, saddr, sizeof(*saddr)); +} +extern void test_ao_checkpoint(int sk, struct tcp_ao_repair *state); +extern void __test_sock_restore(int sk, const char *device, + struct tcp_sock_state *state, + void *saddr, void *daddr, size_t addr_size); +static inline void test_sock_restore(int sk, struct tcp_sock_state *state, + sockaddr_af *saddr, + const union tcp_addr daddr, + unsigned int dport) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &daddr, htons(dport)); + __test_sock_restore(sk, veth_name, state, saddr, &addr, sizeof(addr)); +} +extern void test_ao_restore(int sk, struct tcp_ao_repair *state); +extern void test_sock_state_free(struct tcp_sock_state *state); +extern void test_enable_repair(int sk); +extern void test_disable_repair(int sk); +extern void test_kill_sk(int sk); +static inline int test_add_repaired_key(int sk, + const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + err = test_prepare_def_key(&tmp, key, keyflags, in_addr, prefix, + 0, sndid, rcvid); + if (err) + return err; + + tmp.set_current = 1; + tmp.set_rnext = 1; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +#endif /* _AOLIB_H_ */ diff --git a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c new file mode 100644 index 0000000000..f279ffc384 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Check what features does the kernel support (where the selftest is running). + * Somewhat inspired by CRIU kerndat/kdat kernel features detector. + */ +#include <pthread.h> +#include "aolib.h" + +struct kconfig_t { + int _errno; /* the returned error if not supported */ + int (*check_kconfig)(int *error); +}; + +static int has_net_ns(int *err) +{ + if (access("/proc/self/ns/net", F_OK) < 0) { + *err = errno; + if (errno == ENOENT) + return 0; + test_print("Unable to access /proc/self/ns/net: %m"); + return -errno; + } + return *err = errno = 0; +} + +static int has_veth(int *err) +{ + int orig_netns, ns_a, ns_b; + + orig_netns = open_netns(); + ns_a = unshare_open_netns(); + ns_b = unshare_open_netns(); + + *err = add_veth("check_veth", ns_a, ns_b); + + switch_ns(orig_netns); + close(orig_netns); + close(ns_a); + close(ns_b); + return 0; +} + +static int has_tcp_ao(int *err) +{ + struct sockaddr_in addr = { + .sin_family = test_family, + }; + struct tcp_ao_add tmp = {}; + const char *password = DEFAULT_TEST_PASSWORD; + int sk, ret = 0; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) { + test_print("socket(): %m"); + return -errno; + } + + tmp.sndid = 100; + tmp.rcvid = 100; + tmp.keylen = strlen(password); + memcpy(tmp.key, password, strlen(password)); + strcpy(tmp.alg_name, "hmac(sha1)"); + memcpy(&tmp.addr, &addr, sizeof(addr)); + *err = 0; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) { + *err = errno; + if (errno != ENOPROTOOPT) + ret = -errno; + } + close(sk); + return ret; +} + +static int has_tcp_md5(int *err) +{ + union tcp_addr addr_any = {}; + int sk, ret = 0; + + sk = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) { + test_print("socket(): %m"); + return -errno; + } + + /* + * Under CONFIG_CRYPTO_FIPS=y it fails with ENOMEM, rather with + * anything more descriptive. Oh well. + */ + *err = 0; + if (test_set_md5(sk, addr_any, 0, -1, DEFAULT_TEST_PASSWORD)) { + *err = errno; + if (errno != ENOPROTOOPT && errno == ENOMEM) { + test_print("setsockopt(TCP_MD5SIG_EXT): %m"); + ret = -errno; + } + } + close(sk); + return ret; +} + +static int has_vrfs(int *err) +{ + int orig_netns, ns_test, ret = 0; + + orig_netns = open_netns(); + ns_test = unshare_open_netns(); + + *err = add_vrf("ksft-check", 55, 101, ns_test); + if (*err && *err != -EOPNOTSUPP) { + test_print("Failed to add a VRF: %d", *err); + ret = *err; + } + + switch_ns(orig_netns); + close(orig_netns); + close(ns_test); + return ret; +} + +static pthread_mutex_t kconfig_lock = PTHREAD_MUTEX_INITIALIZER; +static struct kconfig_t kconfig[__KCONFIG_LAST__] = { + { -1, has_net_ns }, + { -1, has_veth }, + { -1, has_tcp_ao }, + { -1, has_tcp_md5 }, + { -1, has_vrfs }, +}; + +const char *tests_skip_reason[__KCONFIG_LAST__] = { + "Tests require network namespaces support (CONFIG_NET_NS)", + "Tests require veth support (CONFIG_VETH)", + "Tests require TCP-AO support (CONFIG_TCP_AO)", + "setsockopt(TCP_MD5SIG_EXT) is not supported (CONFIG_TCP_MD5)", + "VRFs are not supported (CONFIG_NET_VRF)", +}; + +bool kernel_config_has(enum test_needs_kconfig k) +{ + bool ret; + + pthread_mutex_lock(&kconfig_lock); + if (kconfig[k]._errno == -1) { + if (kconfig[k].check_kconfig(&kconfig[k]._errno)) + test_error("Failed to initialize kconfig %u", k); + } + ret = kconfig[k]._errno == 0; + pthread_mutex_unlock(&kconfig_lock); + return ret; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/netlink.c b/tools/testing/selftests/net/tcp_ao/lib/netlink.c new file mode 100644 index 0000000000..7f108493a2 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/netlink.c @@ -0,0 +1,413 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Original from tools/testing/selftests/net/ipsec.c */ +#include <linux/netlink.h> +#include <linux/random.h> +#include <linux/rtnetlink.h> +#include <linux/veth.h> +#include <net/if.h> +#include <stdint.h> +#include <string.h> +#include <sys/socket.h> + +#include "aolib.h" + +#define MAX_PAYLOAD 2048 + +static int netlink_sock(int *sock, uint32_t *seq_nr, int proto) +{ + if (*sock > 0) { + seq_nr++; + return 0; + } + + *sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto); + if (*sock < 0) { + test_print("socket(AF_NETLINK)"); + return -1; + } + + randomize_buffer(seq_nr, sizeof(*seq_nr)); + + return 0; +} + +static int netlink_check_answer(int sock, bool quite) +{ + struct nlmsgerror { + struct nlmsghdr hdr; + int error; + struct nlmsghdr orig_msg; + } answer; + + if (recv(sock, &answer, sizeof(answer), 0) < 0) { + test_print("recv()"); + return -1; + } else if (answer.hdr.nlmsg_type != NLMSG_ERROR) { + test_print("expected NLMSG_ERROR, got %d", + (int)answer.hdr.nlmsg_type); + return -1; + } else if (answer.error) { + if (!quite) { + test_print("NLMSG_ERROR: %d: %s", + answer.error, strerror(-answer.error)); + } + return answer.error; + } + + return 0; +} + +static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh) +{ + return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len)); +} + +static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + /* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */ + struct rtattr *attr = rtattr_hdr(nh); + size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size); + + if (req_sz < nl_size) { + test_print("req buf is too small: %zu < %zu", req_sz, nl_size); + return -1; + } + nh->nlmsg_len = nl_size; + + attr->rta_len = RTA_LENGTH(size); + attr->rta_type = rta_type; + memcpy(RTA_DATA(attr), payload, size); + + return 0; +} + +static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + struct rtattr *ret = rtattr_hdr(nh); + + if (rtattr_pack(nh, req_sz, rta_type, payload, size)) + return 0; + + return ret; +} + +static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type) +{ + return _rtattr_begin(nh, req_sz, rta_type, 0, 0); +} + +static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr) +{ + char *nlmsg_end = (char *)nh + nh->nlmsg_len; + + attr->rta_len = nlmsg_end - (char *)attr; +} + +static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz, + const char *peer, int ns) +{ + struct ifinfomsg pi; + struct rtattr *peer_attr; + + memset(&pi, 0, sizeof(pi)); + pi.ifi_family = AF_UNSPEC; + pi.ifi_change = 0xFFFFFFFF; + + peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi)); + if (!peer_attr) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer))) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns))) + return -1; + + rtattr_end(nh, peer_attr); + + return 0; +} + +static int __add_veth(int sock, uint32_t seq, const char *name, + int ns_a, int ns_b) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + static const char veth_type[] = "veth"; + struct rtattr *link_info, *info_data; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, name, strlen(name))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a))) + return -1; + + link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO); + if (!link_info) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type))) + return -1; + + info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA); + if (!info_data) + return -1; + + if (veth_pack_peerb(&req.nh, sizeof(req), name, ns_b)) + return -1; + + rtattr_end(&req.nh, info_data); + rtattr_end(&req.nh, link_info); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, false); +} + +int add_veth(const char *name, int nsfda, int nsfdb) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __add_veth(route_sock, route_seq++, name, nsfda, nsfdb); + close(route_sock); + return ret; +} + +static int __ip_addr_add(int sock, uint32_t seq, const char *intf, + int family, union tcp_addr addr, uint8_t prefix) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifaddrmsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + size_t addr_len = (family == AF_INET) ? sizeof(struct in_addr) : + sizeof(struct in6_addr); + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWADDR; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifa_family = family; + req.info.ifa_prefixlen = prefix; + req.info.ifa_index = if_nametoindex(intf); + req.info.ifa_flags = IFA_F_NODAD; + + if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, addr_len)) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, true); +} + +int ip_addr_add(const char *intf, int family, + union tcp_addr addr, uint8_t prefix) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __ip_addr_add(route_sock, route_seq++, intf, + family, addr, prefix); + + close(route_sock); + return ret; +} + +static int __ip_route_add(int sock, uint32_t seq, const char *intf, int family, + union tcp_addr src, union tcp_addr dst, uint8_t vrf) +{ + struct { + struct nlmsghdr nh; + struct rtmsg rt; + char attrbuf[MAX_PAYLOAD]; + } req; + unsigned int index = if_nametoindex(intf); + size_t addr_len = (family == AF_INET) ? sizeof(struct in_addr) : + sizeof(struct in6_addr); + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt)); + req.nh.nlmsg_type = RTM_NEWROUTE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE; + req.nh.nlmsg_seq = seq; + req.rt.rtm_family = family; + req.rt.rtm_dst_len = (family == AF_INET) ? 32 : 128; + req.rt.rtm_table = vrf; + req.rt.rtm_protocol = RTPROT_BOOT; + req.rt.rtm_scope = RT_SCOPE_UNIVERSE; + req.rt.rtm_type = RTN_UNICAST; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, addr_len)) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, addr_len)) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index))) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + + return netlink_check_answer(sock, true); +} + +int ip_route_add_vrf(const char *intf, int family, + union tcp_addr src, union tcp_addr dst, uint8_t vrf) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __ip_route_add(route_sock, route_seq++, intf, + family, src, dst, vrf); + + close(route_sock); + return ret; +} + +int ip_route_add(const char *intf, int family, + union tcp_addr src, union tcp_addr dst) +{ + return ip_route_add_vrf(intf, family, src, dst, RT_TABLE_MAIN); +} + +static int __link_set_up(int sock, uint32_t seq, const char *intf) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + req.info.ifi_index = if_nametoindex(intf); + req.info.ifi_flags = IFF_UP; + req.info.ifi_change = IFF_UP; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, false); +} + +int link_set_up(const char *intf) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __link_set_up(route_sock, route_seq++, intf); + + close(route_sock); + return ret; +} + +static int __add_vrf(int sock, uint32_t seq, const char *name, + uint32_t tabid, int ifindex, int nsfd) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + static const char vrf_type[] = "vrf"; + struct rtattr *link_info, *info_data; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + req.info.ifi_index = ifindex; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, name, strlen(name))) + return -1; + + if (nsfd >= 0) + if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, + &nsfd, sizeof(nsfd))) + return -1; + + link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO); + if (!link_info) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, vrf_type, sizeof(vrf_type))) + return -1; + + info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA); + if (!info_data) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_VRF_TABLE, + &tabid, sizeof(tabid))) + return -1; + + rtattr_end(&req.nh, info_data); + rtattr_end(&req.nh, link_info); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, true); +} + +int add_vrf(const char *name, uint32_t tabid, int ifindex, int nsfd) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __add_vrf(route_sock, route_seq++, name, tabid, ifindex, nsfd); + close(route_sock); + return ret; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/proc.c b/tools/testing/selftests/net/tcp_ao/lib/proc.c new file mode 100644 index 0000000000..8b984fa042 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/proc.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> +#include <pthread.h> +#include <stdio.h> +#include "../../../../../include/linux/compiler.h" +#include "../../../../../include/linux/kernel.h" +#include "aolib.h" + +struct netstat_counter { + uint64_t val; + char *name; +}; + +struct netstat { + char *header_name; + struct netstat *next; + size_t counters_nr; + struct netstat_counter *counters; +}; + +static struct netstat *lookup_type(struct netstat *ns, + const char *type, size_t len) +{ + while (ns != NULL) { + size_t cmp = max(len, strlen(ns->header_name)); + + if (!strncmp(ns->header_name, type, cmp)) + return ns; + ns = ns->next; + } + return NULL; +} + +static struct netstat *lookup_get(struct netstat *ns, + const char *type, const size_t len) +{ + struct netstat *ret; + + ret = lookup_type(ns, type, len); + if (ret != NULL) + return ret; + + ret = malloc(sizeof(struct netstat)); + if (!ret) + test_error("malloc()"); + + ret->header_name = strndup(type, len); + if (ret->header_name == NULL) + test_error("strndup()"); + ret->next = ns; + ret->counters_nr = 0; + ret->counters = NULL; + + return ret; +} + +static struct netstat *lookup_get_column(struct netstat *ns, const char *line) +{ + char *column; + + column = strchr(line, ':'); + if (!column) + test_error("can't parse netstat file"); + + return lookup_get(ns, line, column - line); +} + +static void netstat_read_type(FILE *fnetstat, struct netstat **dest, char *line) +{ + struct netstat *type = lookup_get_column(*dest, line); + const char *pos = line; + size_t i, nr_elems = 0; + char tmp; + + while ((pos = strchr(pos, ' '))) { + nr_elems++; + pos++; + } + + *dest = type; + type->counters = reallocarray(type->counters, + type->counters_nr + nr_elems, + sizeof(struct netstat_counter)); + if (!type->counters) + test_error("reallocarray()"); + + pos = strchr(line, ' ') + 1; + + if (fscanf(fnetstat, "%[^ :]", type->header_name) == EOF) + test_error("fscanf(%s)", type->header_name); + if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != ':') + test_error("Unexpected netstat format (%c)", tmp); + + for (i = type->counters_nr; i < type->counters_nr + nr_elems; i++) { + struct netstat_counter *nc = &type->counters[i]; + const char *new_pos = strchr(pos, ' '); + const char *fmt = " %" PRIu64; + + if (new_pos == NULL) + new_pos = strchr(pos, '\n'); + + nc->name = strndup(pos, new_pos - pos); + if (nc->name == NULL) + test_error("strndup()"); + + if (unlikely(!strcmp(nc->name, "MaxConn"))) + fmt = " %" PRId64; /* MaxConn is signed, RFC 2012 */ + if (fscanf(fnetstat, fmt, &nc->val) != 1) + test_error("fscanf(%s)", nc->name); + pos = new_pos + 1; + } + type->counters_nr += nr_elems; + + if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != '\n') + test_error("Unexpected netstat format"); +} + +static const char *snmp6_name = "Snmp6"; +static void snmp6_read(FILE *fnetstat, struct netstat **dest) +{ + struct netstat *type = lookup_get(*dest, snmp6_name, strlen(snmp6_name)); + char *counter_name; + size_t i; + + for (i = type->counters_nr;; i++) { + struct netstat_counter *nc; + uint64_t counter; + + if (fscanf(fnetstat, "%ms", &counter_name) == EOF) + break; + if (fscanf(fnetstat, "%" PRIu64, &counter) == EOF) + test_error("Unexpected snmp6 format"); + type->counters = reallocarray(type->counters, i + 1, + sizeof(struct netstat_counter)); + if (!type->counters) + test_error("reallocarray()"); + nc = &type->counters[i]; + nc->name = counter_name; + nc->val = counter; + } + type->counters_nr = i; + *dest = type; +} + +struct netstat *netstat_read(void) +{ + struct netstat *ret = 0; + size_t line_sz = 0; + char *line = NULL; + FILE *fnetstat; + + /* + * Opening thread-self instead of /proc/net/... as the latter + * points to /proc/self/net/ which instantiates thread-leader's + * net-ns, see: + * commit 155134fef2b6 ("Revert "proc: Point /proc/{mounts,net} at..") + */ + errno = 0; + fnetstat = fopen("/proc/thread-self/net/netstat", "r"); + if (fnetstat == NULL) + test_error("failed to open /proc/net/netstat"); + + while (getline(&line, &line_sz, fnetstat) != -1) + netstat_read_type(fnetstat, &ret, line); + fclose(fnetstat); + + errno = 0; + fnetstat = fopen("/proc/thread-self/net/snmp", "r"); + if (fnetstat == NULL) + test_error("failed to open /proc/net/snmp"); + + while (getline(&line, &line_sz, fnetstat) != -1) + netstat_read_type(fnetstat, &ret, line); + fclose(fnetstat); + + errno = 0; + fnetstat = fopen("/proc/thread-self/net/snmp6", "r"); + if (fnetstat == NULL) + test_error("failed to open /proc/net/snmp6"); + + snmp6_read(fnetstat, &ret); + fclose(fnetstat); + + free(line); + return ret; +} + +void netstat_free(struct netstat *ns) +{ + while (ns != NULL) { + struct netstat *prev = ns; + size_t i; + + free(ns->header_name); + for (i = 0; i < ns->counters_nr; i++) + free(ns->counters[i].name); + free(ns->counters); + ns = ns->next; + free(prev); + } +} + +static inline void +__netstat_print_diff(uint64_t a, struct netstat *nsb, size_t i) +{ + if (unlikely(!strcmp(nsb->header_name, "MaxConn"))) { + test_print("%8s %25s: %" PRId64 " => %" PRId64, + nsb->header_name, nsb->counters[i].name, + a, nsb->counters[i].val); + return; + } + + test_print("%8s %25s: %" PRIu64 " => %" PRIu64, nsb->header_name, + nsb->counters[i].name, a, nsb->counters[i].val); +} + +void netstat_print_diff(struct netstat *nsa, struct netstat *nsb) +{ + size_t i, j; + + while (nsb != NULL) { + if (unlikely(strcmp(nsb->header_name, nsa->header_name))) { + for (i = 0; i < nsb->counters_nr; i++) + __netstat_print_diff(0, nsb, i); + nsb = nsb->next; + continue; + } + + if (nsb->counters_nr < nsa->counters_nr) + test_error("Unexpected: some counters disappeared!"); + + for (j = 0, i = 0; i < nsb->counters_nr; i++) { + if (strcmp(nsb->counters[i].name, nsa->counters[j].name)) { + __netstat_print_diff(0, nsb, i); + continue; + } + + if (nsa->counters[j].val == nsb->counters[i].val) { + j++; + continue; + } + + __netstat_print_diff(nsa->counters[j].val, nsb, i); + j++; + } + if (j != nsa->counters_nr) + test_error("Unexpected: some counters disappeared!"); + + nsb = nsb->next; + nsa = nsa->next; + } +} + +uint64_t netstat_get(struct netstat *ns, const char *name, bool *not_found) +{ + if (not_found) + *not_found = false; + + while (ns != NULL) { + size_t i; + + for (i = 0; i < ns->counters_nr; i++) { + if (!strcmp(name, ns->counters[i].name)) + return ns->counters[i].val; + } + + ns = ns->next; + } + + if (not_found) + *not_found = true; + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/repair.c b/tools/testing/selftests/net/tcp_ao/lib/repair.c new file mode 100644 index 0000000000..9893b3ba69 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/repair.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0 +/* This is over-simplified TCP_REPAIR for TCP_ESTABLISHED sockets + * It tests that TCP-AO enabled connection can be restored. + * For the proper socket repair see: + * https://github.com/checkpoint-restore/criu/blob/criu-dev/soccr/soccr.h + */ +#include <fcntl.h> +#include <linux/sockios.h> +#include <sys/ioctl.h> +#include "aolib.h" + +#ifndef TCPOPT_MAXSEG +# define TCPOPT_MAXSEG 2 +#endif +#ifndef TCPOPT_WINDOW +# define TCPOPT_WINDOW 3 +#endif +#ifndef TCPOPT_SACK_PERMITTED +# define TCPOPT_SACK_PERMITTED 4 +#endif +#ifndef TCPOPT_TIMESTAMP +# define TCPOPT_TIMESTAMP 8 +#endif + +enum { + TCP_ESTABLISHED = 1, + TCP_SYN_SENT, + TCP_SYN_RECV, + TCP_FIN_WAIT1, + TCP_FIN_WAIT2, + TCP_TIME_WAIT, + TCP_CLOSE, + TCP_CLOSE_WAIT, + TCP_LAST_ACK, + TCP_LISTEN, + TCP_CLOSING, /* Now a valid state */ + TCP_NEW_SYN_RECV, + + TCP_MAX_STATES /* Leave at the end! */ +}; + +static void test_sock_checkpoint_queue(int sk, int queue, int qlen, + struct tcp_sock_queue *q) +{ + socklen_t len; + int ret; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + + len = sizeof(q->seq); + ret = getsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &q->seq, &len); + if (ret || len != sizeof(q->seq)) + test_error("getsockopt(TCP_QUEUE_SEQ): %d", (int)len); + + if (!qlen) { + q->buf = NULL; + return; + } + + q->buf = malloc(qlen); + if (q->buf == NULL) + test_error("malloc()"); + ret = recv(sk, q->buf, qlen, MSG_PEEK | MSG_DONTWAIT); + if (ret != qlen) + test_error("recv(%d): %d", qlen, ret); +} + +void __test_sock_checkpoint(int sk, struct tcp_sock_state *state, + void *addr, size_t addr_size) +{ + socklen_t len = sizeof(state->info); + int ret; + + memset(state, 0, sizeof(*state)); + + ret = getsockopt(sk, SOL_TCP, TCP_INFO, &state->info, &len); + if (ret || len != sizeof(state->info)) + test_error("getsockopt(TCP_INFO): %d", (int)len); + + len = addr_size; + if (getsockname(sk, addr, &len) || len != addr_size) + test_error("getsockname(): %d", (int)len); + + len = sizeof(state->trw); + ret = getsockopt(sk, SOL_TCP, TCP_REPAIR_WINDOW, &state->trw, &len); + if (ret || len != sizeof(state->trw)) + test_error("getsockopt(TCP_REPAIR_WINDOW): %d", (int)len); + + if (ioctl(sk, SIOCOUTQ, &state->outq_len)) + test_error("ioctl(SIOCOUTQ)"); + + if (ioctl(sk, SIOCOUTQNSD, &state->outq_nsd_len)) + test_error("ioctl(SIOCOUTQNSD)"); + test_sock_checkpoint_queue(sk, TCP_SEND_QUEUE, state->outq_len, &state->out); + + if (ioctl(sk, SIOCINQ, &state->inq_len)) + test_error("ioctl(SIOCINQ)"); + test_sock_checkpoint_queue(sk, TCP_RECV_QUEUE, state->inq_len, &state->in); + + if (state->info.tcpi_state == TCP_CLOSE) + state->outq_len = state->outq_nsd_len = 0; + + len = sizeof(state->mss); + ret = getsockopt(sk, SOL_TCP, TCP_MAXSEG, &state->mss, &len); + if (ret || len != sizeof(state->mss)) + test_error("getsockopt(TCP_MAXSEG): %d", (int)len); + + len = sizeof(state->timestamp); + ret = getsockopt(sk, SOL_TCP, TCP_TIMESTAMP, &state->timestamp, &len); + if (ret || len != sizeof(state->timestamp)) + test_error("getsockopt(TCP_TIMESTAMP): %d", (int)len); +} + +void test_ao_checkpoint(int sk, struct tcp_ao_repair *state) +{ + socklen_t len = sizeof(*state); + int ret; + + memset(state, 0, sizeof(*state)); + + ret = getsockopt(sk, SOL_TCP, TCP_AO_REPAIR, state, &len); + if (ret || len != sizeof(*state)) + test_error("getsockopt(TCP_AO_REPAIR): %d", (int)len); +} + +static void test_sock_restore_seq(int sk, int queue, uint32_t seq) +{ + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + + if (setsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &seq, sizeof(seq))) + test_error("setsockopt(TCP_QUEUE_SEQ)"); +} + +static void test_sock_restore_queue(int sk, int queue, void *buf, int len) +{ + int chunk = len; + size_t off = 0; + + if (len == 0) + return; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + + do { + int ret; + + ret = send(sk, buf + off, chunk, 0); + if (ret <= 0) { + if (chunk > 1024) { + chunk >>= 1; + continue; + } + test_error("send()"); + } + off += ret; + len -= ret; + } while (len > 0); +} + +void __test_sock_restore(int sk, const char *device, + struct tcp_sock_state *state, + void *saddr, void *daddr, size_t addr_size) +{ + struct tcp_repair_opt opts[4]; + unsigned int opt_nr = 0; + long flags; + + if (bind(sk, saddr, addr_size)) + test_error("bind()"); + + flags = fcntl(sk, F_GETFL); + if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) + test_error("fcntl()"); + + test_sock_restore_seq(sk, TCP_RECV_QUEUE, state->in.seq - state->inq_len); + test_sock_restore_seq(sk, TCP_SEND_QUEUE, state->out.seq - state->outq_len); + + if (device != NULL && setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, + device, strlen(device) + 1)) + test_error("setsockopt(SO_BINDTODEVICE, %s)", device); + + if (connect(sk, daddr, addr_size)) + test_error("connect()"); + + if (state->info.tcpi_options & TCPI_OPT_SACK) { + opts[opt_nr].opt_code = TCPOPT_SACK_PERMITTED; + opts[opt_nr].opt_val = 0; + opt_nr++; + } + if (state->info.tcpi_options & TCPI_OPT_WSCALE) { + opts[opt_nr].opt_code = TCPOPT_WINDOW; + opts[opt_nr].opt_val = state->info.tcpi_snd_wscale + + (state->info.tcpi_rcv_wscale << 16); + opt_nr++; + } + if (state->info.tcpi_options & TCPI_OPT_TIMESTAMPS) { + opts[opt_nr].opt_code = TCPOPT_TIMESTAMP; + opts[opt_nr].opt_val = 0; + opt_nr++; + } + opts[opt_nr].opt_code = TCPOPT_MAXSEG; + opts[opt_nr].opt_val = state->mss; + opt_nr++; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_OPTIONS, opts, opt_nr * sizeof(opts[0]))) + test_error("setsockopt(TCP_REPAIR_OPTIONS)"); + + if (state->info.tcpi_options & TCPI_OPT_TIMESTAMPS) { + if (setsockopt(sk, SOL_TCP, TCP_TIMESTAMP, + &state->timestamp, opt_nr * sizeof(opts[0]))) + test_error("setsockopt(TCP_TIMESTAMP)"); + } + test_sock_restore_queue(sk, TCP_RECV_QUEUE, state->in.buf, state->inq_len); + test_sock_restore_queue(sk, TCP_SEND_QUEUE, state->out.buf, state->outq_len); + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_WINDOW, &state->trw, sizeof(state->trw))) + test_error("setsockopt(TCP_REPAIR_WINDOW)"); +} + +void test_ao_restore(int sk, struct tcp_ao_repair *state) +{ + if (setsockopt(sk, SOL_TCP, TCP_AO_REPAIR, state, sizeof(*state))) + test_error("setsockopt(TCP_AO_REPAIR)"); +} + +void test_sock_state_free(struct tcp_sock_state *state) +{ + free(state->out.buf); + free(state->in.buf); +} + +void test_enable_repair(int sk) +{ + int val = TCP_REPAIR_ON; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); +} + +void test_disable_repair(int sk) +{ + int val = TCP_REPAIR_OFF_NO_WP; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); +} + +void test_kill_sk(int sk) +{ + test_enable_repair(sk); + close(sk); +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c new file mode 100644 index 0000000000..e408b9243b --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <fcntl.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include "aolib.h" + +/* + * Can't be included in the header: it defines static variables which + * will be unique to every object. Let's include it only once here. + */ +#include "../../../kselftest.h" + +/* Prevent overriding of one thread's output by another */ +static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER; + +void __test_msg(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_print_msg("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_ok(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_pass("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_fail(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_fail("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_xfail(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_xfail("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_error(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_error("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_skip(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_skip("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); +} + +static volatile int failed; +static volatile int skipped; + +void test_failed(void) +{ + failed = 1; +} + +static void test_exit(void) +{ + if (failed) { + ksft_exit_fail(); + } else if (skipped) { + /* ksft_exit_skip() is different from ksft_exit_*() */ + ksft_print_cnts(); + exit(KSFT_SKIP); + } else { + ksft_exit_pass(); + } +} + +struct dlist_t { + void (*destruct)(void); + struct dlist_t *next; +}; +static struct dlist_t *destructors_list; + +void test_add_destructor(void (*d)(void)) +{ + struct dlist_t *p; + + p = malloc(sizeof(struct dlist_t)); + if (p == NULL) + test_error("malloc() failed"); + + p->next = destructors_list; + p->destruct = d; + destructors_list = p; +} + +static void test_destructor(void) __attribute__((destructor)); +static void test_destructor(void) +{ + while (destructors_list) { + struct dlist_t *p = destructors_list->next; + + destructors_list->destruct(); + free(destructors_list); + destructors_list = p; + } + test_exit(); +} + +static void sig_int(int signo) +{ + test_error("Caught SIGINT - exiting"); +} + +int open_netns(void) +{ + const char *netns_path = "/proc/self/ns/net"; + int fd; + + fd = open(netns_path, O_RDONLY); + if (fd < 0) + test_error("open(%s)", netns_path); + return fd; +} + +int unshare_open_netns(void) +{ + if (unshare(CLONE_NEWNET) != 0) + test_error("unshare()"); + + return open_netns(); +} + +void switch_ns(int fd) +{ + if (setns(fd, CLONE_NEWNET)) + test_error("setns()"); +} + +int switch_save_ns(int new_ns) +{ + int ret = open_netns(); + + switch_ns(new_ns); + return ret; +} + +static int nsfd_outside = -1; +static int nsfd_parent = -1; +static int nsfd_child = -1; +const char veth_name[] = "ktst-veth"; + +static void init_namespaces(void) +{ + nsfd_outside = open_netns(); + nsfd_parent = unshare_open_netns(); + nsfd_child = unshare_open_netns(); +} + +static void link_init(const char *veth, int family, uint8_t prefix, + union tcp_addr addr, union tcp_addr dest) +{ + if (link_set_up(veth)) + test_error("Failed to set link up"); + if (ip_addr_add(veth, family, addr, prefix)) + test_error("Failed to add ip address"); + if (ip_route_add(veth, family, addr, dest)) + test_error("Failed to add route"); +} + +static unsigned int nr_threads = 1; + +static pthread_mutex_t sync_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t sync_cond = PTHREAD_COND_INITIALIZER; +static volatile unsigned int stage_threads[2]; +static volatile unsigned int stage_nr; + +/* synchronize all threads in the same stage */ +void synchronize_threads(void) +{ + unsigned int q = stage_nr; + + pthread_mutex_lock(&sync_lock); + stage_threads[q]++; + if (stage_threads[q] == nr_threads) { + stage_nr ^= 1; + stage_threads[stage_nr] = 0; + pthread_cond_signal(&sync_cond); + } + while (stage_threads[q] < nr_threads) + pthread_cond_wait(&sync_cond, &sync_lock); + pthread_mutex_unlock(&sync_lock); +} + +__thread union tcp_addr this_ip_addr; +__thread union tcp_addr this_ip_dest; +int test_family; + +struct new_pthread_arg { + thread_fn func; + union tcp_addr my_ip; + union tcp_addr dest_ip; +}; +static void *new_pthread_entry(void *arg) +{ + struct new_pthread_arg *p = arg; + + this_ip_addr = p->my_ip; + this_ip_dest = p->dest_ip; + p->func(NULL); /* shouldn't return */ + exit(KSFT_FAIL); +} + +static void __test_skip_all(const char *msg) +{ + ksft_set_plan(1); + ksft_print_header(); + skipped = 1; + test_skip("%s", msg); + exit(KSFT_SKIP); +} + +void __test_init(unsigned int ntests, int family, unsigned int prefix, + union tcp_addr addr1, union tcp_addr addr2, + thread_fn peer1, thread_fn peer2) +{ + struct sigaction sa = { + .sa_handler = sig_int, + .sa_flags = SA_RESTART, + }; + time_t seed = time(NULL); + + sigemptyset(&sa.sa_mask); + if (sigaction(SIGINT, &sa, NULL)) + test_error("Can't set SIGINT handler"); + + test_family = family; + if (!kernel_config_has(KCONFIG_NET_NS)) + __test_skip_all(tests_skip_reason[KCONFIG_NET_NS]); + if (!kernel_config_has(KCONFIG_VETH)) + __test_skip_all(tests_skip_reason[KCONFIG_VETH]); + if (!kernel_config_has(KCONFIG_TCP_AO)) + __test_skip_all(tests_skip_reason[KCONFIG_TCP_AO]); + + ksft_set_plan(ntests); + test_print("rand seed %u", (unsigned int)seed); + srand(seed); + + + ksft_print_header(); + init_namespaces(); + + if (add_veth(veth_name, nsfd_parent, nsfd_child)) + test_error("Failed to add veth"); + + switch_ns(nsfd_child); + link_init(veth_name, family, prefix, addr2, addr1); + if (peer2) { + struct new_pthread_arg targ; + pthread_t t; + + targ.my_ip = addr2; + targ.dest_ip = addr1; + targ.func = peer2; + nr_threads++; + if (pthread_create(&t, NULL, new_pthread_entry, &targ)) + test_error("Failed to create pthread"); + } + switch_ns(nsfd_parent); + link_init(veth_name, family, prefix, addr1, addr2); + + this_ip_addr = addr1; + this_ip_dest = addr2; + peer1(NULL); + if (failed) + exit(KSFT_FAIL); + else + exit(KSFT_PASS); +} + +/* /proc/sys/net/core/optmem_max artifically limits the amount of memory + * that can be allocated with sock_kmalloc() on each socket in the system. + * It is not virtualized in v6.7, so it has to written outside test + * namespaces. To be nice a test will revert optmem back to the old value. + * Keeping it simple without any file lock, which means the tests that + * need to set/increase optmem value shouldn't run in parallel. + * Also, not re-entrant. + * Since commit f5769faeec36 ("net: Namespace-ify sysctl_optmem_max") + * it is per-namespace, keeping logic for non-virtualized optmem_max + * for v6.7, which supports TCP-AO. + */ +static const char *optmem_file = "/proc/sys/net/core/optmem_max"; +static size_t saved_optmem; +static int optmem_ns = -1; + +static bool is_optmem_namespaced(void) +{ + if (optmem_ns == -1) { + int old_ns = switch_save_ns(nsfd_child); + + optmem_ns = !access(optmem_file, F_OK); + switch_ns(old_ns); + } + return !!optmem_ns; +} + +size_t test_get_optmem(void) +{ + int old_ns = 0; + FILE *foptmem; + size_t ret; + + if (!is_optmem_namespaced()) + old_ns = switch_save_ns(nsfd_outside); + foptmem = fopen(optmem_file, "r"); + if (!foptmem) + test_error("failed to open %s", optmem_file); + + if (fscanf(foptmem, "%zu", &ret) != 1) + test_error("can't read from %s", optmem_file); + fclose(foptmem); + if (!is_optmem_namespaced()) + switch_ns(old_ns); + return ret; +} + +static void __test_set_optmem(size_t new, size_t *old) +{ + int old_ns = 0; + FILE *foptmem; + + if (old != NULL) + *old = test_get_optmem(); + + if (!is_optmem_namespaced()) + old_ns = switch_save_ns(nsfd_outside); + foptmem = fopen(optmem_file, "w"); + if (!foptmem) + test_error("failed to open %s", optmem_file); + + if (fprintf(foptmem, "%zu", new) <= 0) + test_error("can't write %zu to %s", new, optmem_file); + fclose(foptmem); + if (!is_optmem_namespaced()) + switch_ns(old_ns); +} + +static void test_revert_optmem(void) +{ + if (saved_optmem == 0) + return; + + __test_set_optmem(saved_optmem, NULL); +} + +void test_set_optmem(size_t value) +{ + if (saved_optmem == 0) { + __test_set_optmem(value, &saved_optmem); + test_add_destructor(test_revert_optmem); + } else { + __test_set_optmem(value, NULL); + } +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c new file mode 100644 index 0000000000..15aeb09630 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c @@ -0,0 +1,596 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <alloca.h> +#include <fcntl.h> +#include <inttypes.h> +#include <string.h> +#include "../../../../../include/linux/kernel.h" +#include "../../../../../include/linux/stringify.h" +#include "aolib.h" + +const unsigned int test_server_port = 7010; +int __test_listen_socket(int backlog, void *addr, size_t addr_sz) +{ + int err, sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + long flags; + + if (sk < 0) + test_error("socket()"); + + err = setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, veth_name, + strlen(veth_name) + 1); + if (err < 0) + test_error("setsockopt(SO_BINDTODEVICE)"); + + if (bind(sk, (struct sockaddr *)addr, addr_sz) < 0) + test_error("bind()"); + + flags = fcntl(sk, F_GETFL); + if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) + test_error("fcntl()"); + + if (listen(sk, backlog)) + test_error("listen()"); + + return sk; +} + +int test_wait_fd(int sk, time_t sec, bool write) +{ + struct timeval tv = { .tv_sec = sec }; + struct timeval *ptv = NULL; + fd_set fds, efds; + int ret; + socklen_t slen = sizeof(ret); + + FD_ZERO(&fds); + FD_SET(sk, &fds); + FD_ZERO(&efds); + FD_SET(sk, &efds); + + if (sec) + ptv = &tv; + + errno = 0; + if (write) + ret = select(sk + 1, NULL, &fds, &efds, ptv); + else + ret = select(sk + 1, &fds, NULL, &efds, ptv); + if (ret < 0) + return -errno; + if (ret == 0) { + errno = ETIMEDOUT; + return -ETIMEDOUT; + } + + if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen)) + return -errno; + if (ret) + return -ret; + return 0; +} + +int __test_connect_socket(int sk, const char *device, + void *addr, size_t addr_sz, time_t timeout) +{ + long flags; + int err; + + if (device != NULL) { + err = setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, device, + strlen(device) + 1); + if (err < 0) + test_error("setsockopt(SO_BINDTODEVICE, %s)", device); + } + + if (!timeout) { + err = connect(sk, addr, addr_sz); + if (err) { + err = -errno; + goto out; + } + return 0; + } + + flags = fcntl(sk, F_GETFL); + if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) + test_error("fcntl()"); + + if (connect(sk, addr, addr_sz) < 0) { + if (errno != EINPROGRESS) { + err = -errno; + goto out; + } + if (timeout < 0) + return sk; + err = test_wait_fd(sk, timeout, 1); + if (err) + goto out; + } + return sk; + +out: + close(sk); + return err; +} + +int __test_set_md5(int sk, void *addr, size_t addr_sz, uint8_t prefix, + int vrf, const char *password) +{ + size_t pwd_len = strlen(password); + struct tcp_md5sig md5sig = {}; + + md5sig.tcpm_keylen = pwd_len; + memcpy(md5sig.tcpm_key, password, pwd_len); + md5sig.tcpm_flags = TCP_MD5SIG_FLAG_PREFIX; + md5sig.tcpm_prefixlen = prefix; + if (vrf >= 0) { + md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX; + md5sig.tcpm_ifindex = (uint8_t)vrf; + } + memcpy(&md5sig.tcpm_addr, addr, addr_sz); + + errno = 0; + return setsockopt(sk, IPPROTO_TCP, TCP_MD5SIG_EXT, + &md5sig, sizeof(md5sig)); +} + + +int test_prepare_key_sockaddr(struct tcp_ao_add *ao, const char *alg, + void *addr, size_t addr_sz, bool set_current, bool set_rnext, + uint8_t prefix, uint8_t vrf, uint8_t sndid, uint8_t rcvid, + uint8_t maclen, uint8_t keyflags, + uint8_t keylen, const char *key) +{ + memset(ao, 0, sizeof(struct tcp_ao_add)); + + ao->set_current = !!set_current; + ao->set_rnext = !!set_rnext; + ao->prefix = prefix; + ao->sndid = sndid; + ao->rcvid = rcvid; + ao->maclen = maclen; + ao->keyflags = keyflags; + ao->keylen = keylen; + ao->ifindex = vrf; + + memcpy(&ao->addr, addr, addr_sz); + + if (strlen(alg) > 64) + return -ENOBUFS; + strncpy(ao->alg_name, alg, 64); + + memcpy(ao->key, key, + (keylen > TCP_AO_MAXKEYLEN) ? TCP_AO_MAXKEYLEN : keylen); + return 0; +} + +static int test_get_ao_keys_nr(int sk) +{ + struct tcp_ao_getsockopt tmp = {}; + socklen_t tmp_sz = sizeof(tmp); + int ret; + + tmp.nkeys = 1; + tmp.get_all = 1; + + ret = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &tmp, &tmp_sz); + if (ret) + return -errno; + return (int)tmp.nkeys; +} + +int test_get_one_ao(int sk, struct tcp_ao_getsockopt *out, + void *addr, size_t addr_sz, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_getsockopt tmp = {}; + socklen_t tmp_sz = sizeof(tmp); + int ret; + + memcpy(&tmp.addr, addr, addr_sz); + tmp.prefix = prefix; + tmp.sndid = sndid; + tmp.rcvid = rcvid; + tmp.nkeys = 1; + + ret = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &tmp, &tmp_sz); + if (ret) + return ret; + if (tmp.nkeys != 1) + return -E2BIG; + *out = tmp; + return 0; +} + +int test_get_ao_info(int sk, struct tcp_ao_info_opt *out) +{ + socklen_t sz = sizeof(*out); + + out->reserved = 0; + out->reserved2 = 0; + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, out, &sz)) + return -errno; + if (sz != sizeof(*out)) + return -EMSGSIZE; + return 0; +} + +int test_set_ao_info(int sk, struct tcp_ao_info_opt *in) +{ + socklen_t sz = sizeof(*in); + + in->reserved = 0; + in->reserved2 = 0; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, in, sz)) + return -errno; + return 0; +} + +int test_cmp_getsockopt_setsockopt(const struct tcp_ao_add *a, + const struct tcp_ao_getsockopt *b) +{ + bool is_kdf_aes_128_cmac = false; + bool is_cmac_aes = false; + + if (!strcmp("cmac(aes128)", a->alg_name)) { + is_kdf_aes_128_cmac = (a->keylen != 16); + is_cmac_aes = true; + } + +#define __cmp_ao(member) \ +do { \ + if (b->member != a->member) { \ + test_fail("getsockopt(): " __stringify(member) " %u != %u", \ + b->member, a->member); \ + return -1; \ + } \ +} while(0) + __cmp_ao(sndid); + __cmp_ao(rcvid); + __cmp_ao(prefix); + __cmp_ao(keyflags); + __cmp_ao(ifindex); + if (a->maclen) { + __cmp_ao(maclen); + } else if (b->maclen != 12) { + test_fail("getsockopt(): expected default maclen 12, but it's %u", + b->maclen); + return -1; + } + if (!is_kdf_aes_128_cmac) { + __cmp_ao(keylen); + } else if (b->keylen != 16) { + test_fail("getsockopt(): expected keylen 16 for cmac(aes128), but it's %u", + b->keylen); + return -1; + } +#undef __cmp_ao + if (!is_kdf_aes_128_cmac && memcmp(b->key, a->key, a->keylen)) { + test_fail("getsockopt(): returned key is different `%s' != `%s'", + b->key, a->key); + return -1; + } + if (memcmp(&b->addr, &a->addr, sizeof(b->addr))) { + test_fail("getsockopt(): returned address is different"); + return -1; + } + if (!is_cmac_aes && strcmp(b->alg_name, a->alg_name)) { + test_fail("getsockopt(): returned algorithm %s is different than %s", b->alg_name, a->alg_name); + return -1; + } + if (is_cmac_aes && strcmp(b->alg_name, "cmac(aes)")) { + test_fail("getsockopt(): returned algorithm %s is different than cmac(aes)", b->alg_name); + return -1; + } + /* For a established key rotation test don't add a key with + * set_current = 1, as it's likely to change by peer's request; + * rather use setsockopt(TCP_AO_INFO) + */ + if (a->set_current != b->is_current) { + test_fail("getsockopt(): returned key is not Current_key"); + return -1; + } + if (a->set_rnext != b->is_rnext) { + test_fail("getsockopt(): returned key is not RNext_key"); + return -1; + } + + return 0; +} + +int test_cmp_getsockopt_setsockopt_ao(const struct tcp_ao_info_opt *a, + const struct tcp_ao_info_opt *b) +{ + /* No check for ::current_key, as it may change by the peer */ + if (a->ao_required != b->ao_required) { + test_fail("getsockopt(): returned ao doesn't have ao_required"); + return -1; + } + if (a->accept_icmps != b->accept_icmps) { + test_fail("getsockopt(): returned ao doesn't accept ICMPs"); + return -1; + } + if (a->set_rnext && a->rnext != b->rnext) { + test_fail("getsockopt(): RNext KeyID has changed"); + return -1; + } +#define __cmp_cnt(member) \ +do { \ + if (b->member != a->member) { \ + test_fail("getsockopt(): " __stringify(member) " %llu != %llu", \ + b->member, a->member); \ + return -1; \ + } \ +} while(0) + if (a->set_counters) { + __cmp_cnt(pkt_good); + __cmp_cnt(pkt_bad); + __cmp_cnt(pkt_key_not_found); + __cmp_cnt(pkt_ao_required); + __cmp_cnt(pkt_dropped_icmp); + } +#undef __cmp_cnt + return 0; +} + +int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) +{ + struct tcp_ao_getsockopt *key_dump; + socklen_t key_dump_sz = sizeof(*key_dump); + struct tcp_ao_info_opt info = {}; + bool c1, c2, c3, c4, c5; + struct netstat *ns; + int err, nr_keys; + + memset(out, 0, sizeof(*out)); + + /* per-netns */ + ns = netstat_read(); + out->netns_ao_good = netstat_get(ns, "TCPAOGood", &c1); + out->netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2); + out->netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3); + out->netns_ao_required = netstat_get(ns, "TCPAORequired", &c4); + out->netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5); + netstat_free(ns); + if (c1 || c2 || c3 || c4 || c5) + return -EOPNOTSUPP; + + err = test_get_ao_info(sk, &info); + if (err) + return err; + + /* per-socket */ + out->ao_info_pkt_good = info.pkt_good; + out->ao_info_pkt_bad = info.pkt_bad; + out->ao_info_pkt_key_not_found = info.pkt_key_not_found; + out->ao_info_pkt_ao_required = info.pkt_ao_required; + out->ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp; + + /* per-key */ + nr_keys = test_get_ao_keys_nr(sk); + if (nr_keys < 0) + return nr_keys; + if (nr_keys == 0) + test_error("test_get_ao_keys_nr() == 0"); + out->nr_keys = (size_t)nr_keys; + key_dump = calloc(nr_keys, key_dump_sz); + if (!key_dump) + return -errno; + + key_dump[0].nkeys = nr_keys; + key_dump[0].get_all = 1; + key_dump[0].get_all = 1; + err = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, + key_dump, &key_dump_sz); + if (err) { + free(key_dump); + return -errno; + } + + out->key_cnts = calloc(nr_keys, sizeof(out->key_cnts[0])); + if (!out->key_cnts) { + free(key_dump); + return -errno; + } + + while (nr_keys--) { + out->key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid; + out->key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid; + out->key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good; + out->key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad; + } + free(key_dump); + + return 0; +} + +int __test_tcp_ao_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected) +{ +#define __cmp_ao(cnt, expecting_inc) \ +do { \ + if (before->cnt > after->cnt) { \ + test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64, \ + tst_name ?: "", before->cnt, after->cnt); \ + return -1; \ + } \ + if ((before->cnt != after->cnt) != (expecting_inc)) { \ + test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64, \ + tst_name ?: "", (expecting_inc) ? "" : "not ", \ + before->cnt, after->cnt); \ + return -1; \ + } \ +} while(0) + + errno = 0; + /* per-netns */ + __cmp_ao(netns_ao_good, !!(expected & TEST_CNT_NS_GOOD)); + __cmp_ao(netns_ao_bad, !!(expected & TEST_CNT_NS_BAD)); + __cmp_ao(netns_ao_key_not_found, + !!(expected & TEST_CNT_NS_KEY_NOT_FOUND)); + __cmp_ao(netns_ao_required, !!(expected & TEST_CNT_NS_AO_REQUIRED)); + __cmp_ao(netns_ao_dropped_icmp, + !!(expected & TEST_CNT_NS_DROPPED_ICMP)); + /* per-socket */ + __cmp_ao(ao_info_pkt_good, !!(expected & TEST_CNT_SOCK_GOOD)); + __cmp_ao(ao_info_pkt_bad, !!(expected & TEST_CNT_SOCK_BAD)); + __cmp_ao(ao_info_pkt_key_not_found, + !!(expected & TEST_CNT_SOCK_KEY_NOT_FOUND)); + __cmp_ao(ao_info_pkt_ao_required, !!(expected & TEST_CNT_SOCK_AO_REQUIRED)); + __cmp_ao(ao_info_pkt_dropped_icmp, + !!(expected & TEST_CNT_SOCK_DROPPED_ICMP)); + return 0; +#undef __cmp_ao +} + +int test_tcp_ao_key_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected, + int sndid, int rcvid) +{ + size_t i; +#define __cmp_ao(i, cnt, expecting_inc) \ +do { \ + if (before->key_cnts[i].cnt > after->key_cnts[i].cnt) { \ + test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64 " for key %u:%u", \ + tst_name ?: "", before->key_cnts[i].cnt, \ + after->key_cnts[i].cnt, \ + before->key_cnts[i].sndid, \ + before->key_cnts[i].rcvid); \ + return -1; \ + } \ + if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != (expecting_inc)) { \ + test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64 " for key %u:%u", \ + tst_name ?: "", (expecting_inc) ? "" : "not ",\ + before->key_cnts[i].cnt, \ + after->key_cnts[i].cnt, \ + before->key_cnts[i].sndid, \ + before->key_cnts[i].rcvid); \ + return -1; \ + } \ +} while(0) + + if (before->nr_keys != after->nr_keys) { + test_fail("%s: Keys changed on the socket %zu != %zu", + tst_name, before->nr_keys, after->nr_keys); + return -1; + } + + /* per-key */ + i = before->nr_keys; + while (i--) { + if (sndid >= 0 && before->key_cnts[i].sndid != sndid) + continue; + if (rcvid >= 0 && before->key_cnts[i].rcvid != rcvid) + continue; + __cmp_ao(i, pkt_good, !!(expected & TEST_CNT_KEY_GOOD)); + __cmp_ao(i, pkt_bad, !!(expected & TEST_CNT_KEY_BAD)); + } + return 0; +#undef __cmp_ao +} + +void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts) +{ + free(cnts->key_cnts); +} + +#define TEST_BUF_SIZE 4096 +ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) +{ + ssize_t total = 0; + + do { + char buf[TEST_BUF_SIZE]; + ssize_t bytes, sent; + int ret; + + ret = test_wait_fd(sk, timeout_sec, 0); + if (ret) + return ret; + + bytes = recv(sk, buf, sizeof(buf), 0); + + if (bytes < 0) + test_error("recv(): %zd", bytes); + if (bytes == 0) + break; + + ret = test_wait_fd(sk, timeout_sec, 1); + if (ret) + return ret; + + sent = send(sk, buf, bytes, 0); + if (sent == 0) + break; + if (sent != bytes) + test_error("send()"); + total += bytes; + } while (!quota || total < quota); + + return total; +} + +ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, + const size_t msg_len, time_t timeout_sec) +{ + char msg[msg_len]; + int nodelay = 1; + size_t i; + + if (setsockopt(sk, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay))) + test_error("setsockopt(TCP_NODELAY)"); + + for (i = 0; i < buf_sz; i += min(msg_len, buf_sz - i)) { + size_t sent, bytes = min(msg_len, buf_sz - i); + int ret; + + ret = test_wait_fd(sk, timeout_sec, 1); + if (ret) + return ret; + + sent = send(sk, buf + i, bytes, 0); + if (sent == 0) + break; + if (sent != bytes) + test_error("send()"); + + bytes = 0; + do { + ssize_t got; + + ret = test_wait_fd(sk, timeout_sec, 0); + if (ret) + return ret; + + got = recv(sk, msg + bytes, sizeof(msg) - bytes, 0); + if (got <= 0) + return i; + bytes += got; + } while (bytes < sent); + if (bytes > sent) + test_error("recv(): %zd > %zd", bytes, sent); + if (memcmp(buf + i, msg, bytes) != 0) { + test_fail("received message differs"); + return -1; + } + } + return i; +} + +int test_client_verify(int sk, const size_t msg_len, const size_t nr, + time_t timeout_sec) +{ + size_t buf_sz = msg_len * nr; + char *buf = alloca(buf_sz); + ssize_t ret; + + randomize_buffer(buf, buf_sz); + ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec); + if (ret < 0) + return (int)ret; + return ret != buf_sz ? -1 : 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/utils.c b/tools/testing/selftests/net/tcp_ao/lib/utils.c new file mode 100644 index 0000000000..372daca525 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/utils.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aolib.h" +#include <string.h> + +void randomize_buffer(void *buf, size_t buflen) +{ + int *p = (int *)buf; + size_t words = buflen / sizeof(int); + size_t leftover = buflen % sizeof(int); + + if (!buflen) + return; + + while (words--) + *p++ = rand(); + + if (leftover) { + int tmp = rand(); + + memcpy(buf + buflen - leftover, &tmp, leftover); + } +} + +const struct sockaddr_in6 addr_any6 = { + .sin6_family = AF_INET6, +}; + +const struct sockaddr_in addr_any4 = { + .sin_family = AF_INET, +}; diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c new file mode 100644 index 0000000000..8fdc808df3 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/restore.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +/* This is over-simplified TCP_REPAIR for TCP_ESTABLISHED sockets + * It tests that TCP-AO enabled connection can be restored. + * For the proper socket repair see: + * https://github.com/checkpoint-restore/criu/blob/criu-dev/soccr/soccr.h + */ +#include <inttypes.h> +#include "aolib.h" + +const size_t nr_packets = 20; +const size_t msg_len = 100; +const size_t quota = nr_packets * msg_len; +#define fault(type) (inj == FAULT_ ## type) + +static void try_server_run(const char *tst_name, unsigned int port, + fault_t inj, test_cnt cnt_expected) +{ + const char *cnt_name = "TCPAOGood"; + struct tcp_ao_counters ao1, ao2; + uint64_t before_cnt, after_cnt; + int sk, lsk; + time_t timeout; + ssize_t bytes; + + if (fault(TIMEOUT)) + cnt_name = "TCPAOBad"; + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + synchronize_threads(); /* 1: MKT added => connect() */ + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: accepted => send data */ + close(lsk); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + test_fail("%s: server served: %zd", tst_name, bytes); + goto out; + } + + before_cnt = netstat_get_one(cnt_name, NULL); + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + bytes = test_server_run(sk, quota, timeout); + if (fault(TIMEOUT)) { + if (bytes > 0) + test_fail("%s: server served: %zd", tst_name, bytes); + else + test_ok("%s: server couldn't serve", tst_name); + } else { + if (bytes != quota) + test_fail("%s: server served: %zd", tst_name, bytes); + else + test_ok("%s: server alive", tst_name); + } + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_cnt = netstat_get_one(cnt_name, NULL); + + test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + + /* + * Before close() as that will send FIN and move the peer in TCP_CLOSE + * and that will prevent reading AO counters from the peer's socket. + */ + synchronize_threads(); /* 3: verified => closed */ +out: + close(sk); +} + +static void *server_fn(void *arg) +{ + unsigned int port = test_server_port; + + try_server_run("TCP-AO migrate to another socket", port++, + 0, TEST_CNT_GOOD); + try_server_run("TCP-AO with wrong send ISN", port++, + FAULT_TIMEOUT, TEST_CNT_BAD); + try_server_run("TCP-AO with wrong receive ISN", port++, + FAULT_TIMEOUT, TEST_CNT_BAD); + try_server_run("TCP-AO with wrong send SEQ ext number", port++, + FAULT_TIMEOUT, TEST_CNT_BAD); + try_server_run("TCP-AO with wrong receive SEQ ext number", port++, + FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); + + synchronize_threads(); /* don't race to exit: client exits */ + return NULL; +} + +static void test_get_sk_checkpoint(unsigned int server_port, sockaddr_af *saddr, + struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img) +{ + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_connect_socket(sk, this_ip_dest, server_port) <= 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 2: accepted => send data */ + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("pre-migrate verify failed"); + + test_enable_repair(sk); + test_sock_checkpoint(sk, img, saddr); + test_ao_checkpoint(sk, ao_img); + test_kill_sk(sk); +} + +static void test_sk_restore(const char *tst_name, unsigned int server_port, + sockaddr_af *saddr, struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img, + fault_t inj, test_cnt cnt_expected) +{ + const char *cnt_name = "TCPAOGood"; + struct tcp_ao_counters ao1, ao2; + uint64_t before_cnt, after_cnt; + time_t timeout; + int sk; + + if (fault(TIMEOUT)) + cnt_name = "TCPAOBad"; + + before_cnt = netstat_get_one(cnt_name, NULL); + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + test_sock_restore(sk, img, saddr, this_ip_dest, server_port); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + test_ao_restore(sk, ao_img); + + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + test_disable_repair(sk); + test_sock_state_free(img); + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + if (test_client_verify(sk, msg_len, nr_packets, timeout)) { + if (fault(TIMEOUT)) + test_ok("%s: post-migrate connection is broken", tst_name); + else + test_fail("%s: post-migrate connection is working", tst_name); + } else { + if (fault(TIMEOUT)) + test_fail("%s: post-migrate connection still working", tst_name); + else + test_ok("%s: post-migrate connection is alive", tst_name); + } + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_cnt = netstat_get_one(cnt_name, NULL); + + test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + synchronize_threads(); /* 3: verified => closed */ + close(sk); +} + +static void *client_fn(void *arg) +{ + unsigned int port = test_server_port; + struct tcp_sock_state tcp_img; + struct tcp_ao_repair ao_img; + sockaddr_af saddr; + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + test_sk_restore("TCP-AO migrate to another socket", port++, + &saddr, &tcp_img, &ao_img, 0, TEST_CNT_GOOD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.snt_isn += 1; + test_sk_restore("TCP-AO with wrong send ISN", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.rcv_isn += 1; + test_sk_restore("TCP-AO with wrong receive ISN", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.snd_sne += 1; + test_sk_restore("TCP-AO with wrong send SEQ ext number", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + TEST_CNT_NS_BAD | TEST_CNT_GOOD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.rcv_sne += 1; + test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + TEST_CNT_NS_GOOD | TEST_CNT_BAD); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(20, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c new file mode 100644 index 0000000000..a2fe88d35a --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/rst.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The test checks that both active and passive reset have correct TCP-AO + * signature. An "active" reset (abort) here is procured from closing + * listen() socket with non-accepted connections in the queue: + * inet_csk_listen_stop() => inet_child_forget() => + * => tcp_disconnect() => tcp_send_active_reset() + * + * The passive reset is quite hard to get on established TCP connections. + * It could be procured from non-established states, but the synchronization + * part from userspace in order to reliably get RST seems uneasy. + * So, instead it's procured by corrupting SEQ number on TIMED-WAIT state. + * + * It's important to test both passive and active RST as they go through + * different code-paths: + * - tcp_send_active_reset() makes no-data skb, sends it with tcp_transmit_skb() + * - tcp_v*_send_reset() create their reply skbs and send them with + * ip_send_unicast_reply() + * + * In both cases TCP-AO signatures have to be correct, which is verified by + * (1) checking that the TCP-AO connection was reset and (2) TCP-AO counters. + * + * Author: Dmitry Safonov <dima@arista.com> + */ +#include <inttypes.h> +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +const size_t quota = 1000; +const size_t packet_sz = 100; +/* + * Backlog == 0 means 1 connection in queue, see: + * commit 64a146513f8f ("[NET]: Revert incorrect accept queue...") + */ +const unsigned int backlog; + +static void netstats_check(struct netstat *before, struct netstat *after, + char *msg) +{ + uint64_t before_cnt, after_cnt; + + before_cnt = netstat_get(before, "TCPAORequired", NULL); + after_cnt = netstat_get(after, "TCPAORequired", NULL); + if (after_cnt > before_cnt) + test_fail("Segments without AO sign (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + else + test_ok("No segments without AO sign (%s)", msg); + + before_cnt = netstat_get(before, "TCPAOGood", NULL); + after_cnt = netstat_get(after, "TCPAOGood", NULL); + if (after_cnt <= before_cnt) + test_fail("Signed AO segments (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + else + test_ok("Signed AO segments (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + + before_cnt = netstat_get(before, "TCPAOBad", NULL); + after_cnt = netstat_get(after, "TCPAOBad", NULL); + if (after_cnt > before_cnt) + test_fail("Segments with bad AO sign (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + else + test_ok("No segments with bad AO sign (%s)", msg); +} + +/* + * Another way to send RST, but not through tcp_v{4,6}_send_reset() + * is tcp_send_active_reset(), that is not in reply to inbound segment, + * but rather active send. It uses tcp_transmit_skb(), so that should + * work, but as it also sends RST - nice that it can be covered as well. + */ +static void close_forced(int sk) +{ + struct linger sl; + + sl.l_onoff = 1; + sl.l_linger = 0; + if (setsockopt(sk, SOL_SOCKET, SO_LINGER, &sl, sizeof(sl))) + test_error("setsockopt(SO_LINGER)"); + close(sk); +} + +static void test_server_active_rst(unsigned int port) +{ + struct tcp_ao_counters cnt1, cnt2; + ssize_t bytes; + int sk, lsk; + + lsk = test_listen_socket(this_ip_addr, port, backlog); + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + if (test_get_tcp_ao_counters(lsk, &cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 1: MKT added */ + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: connection accept()ed, another queued */ + if (test_get_tcp_ao_counters(lsk, &cnt2)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 3: close listen socket */ + close(lsk); + bytes = test_server_run(sk, quota, 0); + if (bytes != quota) + test_error("servered only %zd bytes", bytes); + else + test_ok("servered %zd bytes", bytes); + + synchronize_threads(); /* 4: finishing up */ + close_forced(sk); + + synchronize_threads(); /* 5: closed active sk */ + + synchronize_threads(); /* 6: counters checks */ + if (test_tcp_ao_counters_cmp("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD)) + test_fail("MKT counters (server) have not only good packets"); + else + test_ok("MKT counters are good on server"); +} + +static void test_server_passive_rst(unsigned int port) +{ + struct tcp_ao_counters ao1, ao2; + int sk, lsk; + ssize_t bytes; + + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: accepted => send data */ + close(lsk); + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + if (bytes > 0) + test_fail("server served: %zd", bytes); + else + test_fail("server returned %zd", bytes); + } + + synchronize_threads(); /* 3: checkpoint the client */ + synchronize_threads(); /* 4: close the server, creating twsk */ + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + close(sk); + + synchronize_threads(); /* 5: restore the socket, send more data */ + test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD); + + synchronize_threads(); /* 6: server exits */ +} + +static void *server_fn(void *arg) +{ + struct netstat *ns_before, *ns_after; + unsigned int port = test_server_port; + + ns_before = netstat_read(); + + test_server_active_rst(port++); + test_server_passive_rst(port++); + + ns_after = netstat_read(); + netstats_check(ns_before, ns_after, "server"); + netstat_free(ns_after); + netstat_free(ns_before); + synchronize_threads(); /* exit */ + + synchronize_threads(); /* don't race to exit() - client exits */ + return NULL; +} + +static int test_wait_fds(int sk[], size_t nr, bool is_writable[], + ssize_t wait_for, time_t sec) +{ + struct timeval tv = { .tv_sec = sec }; + struct timeval *ptv = NULL; + fd_set left; + size_t i; + int ret; + + FD_ZERO(&left); + for (i = 0; i < nr; i++) { + FD_SET(sk[i], &left); + if (is_writable) + is_writable[i] = false; + } + + if (sec) + ptv = &tv; + + do { + bool is_empty = true; + fd_set fds, efds; + int nfd = 0; + + FD_ZERO(&fds); + FD_ZERO(&efds); + for (i = 0; i < nr; i++) { + if (!FD_ISSET(sk[i], &left)) + continue; + + if (sk[i] > nfd) + nfd = sk[i]; + + FD_SET(sk[i], &fds); + FD_SET(sk[i], &efds); + is_empty = false; + } + if (is_empty) + return -ENOENT; + + errno = 0; + ret = select(nfd + 1, NULL, &fds, &efds, ptv); + if (ret < 0) + return -errno; + if (!ret) + return -ETIMEDOUT; + for (i = 0; i < nr; i++) { + if (FD_ISSET(sk[i], &fds)) { + if (is_writable) + is_writable[i] = true; + FD_CLR(sk[i], &left); + wait_for--; + continue; + } + if (FD_ISSET(sk[i], &efds)) { + FD_CLR(sk[i], &left); + wait_for--; + } + } + } while (wait_for > 0); + + return 0; +} + +static void test_client_active_rst(unsigned int port) +{ + int i, sk[3], err; + bool is_writable[ARRAY_SIZE(sk)] = {false}; + unsigned int last = ARRAY_SIZE(sk) - 1; + + for (i = 0; i < ARRAY_SIZE(sk); i++) { + sk[i] = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk[i] < 0) + test_error("socket()"); + if (test_add_key(sk[i], DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + + synchronize_threads(); /* 1: MKT added */ + for (i = 0; i < last; i++) { + err = _test_connect_socket(sk[i], this_ip_dest, port, + (i == 0) ? TEST_TIMEOUT_SEC : -1); + if (err < 0) + test_error("failed to connect()"); + } + + synchronize_threads(); /* 2: two connections: one accept()ed, another queued */ + err = test_wait_fds(sk, last, is_writable, last, TEST_TIMEOUT_SEC); + if (err < 0) + test_error("test_wait_fds(): %d", err); + + /* async connect() with third sk to get into request_sock_queue */ + err = _test_connect_socket(sk[last], this_ip_dest, port, -1); + if (err < 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 3: close listen socket */ + if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) + test_fail("Failed to send data on connected socket"); + else + test_ok("Verified established tcp connection"); + + synchronize_threads(); /* 4: finishing up */ + + synchronize_threads(); /* 5: closed active sk */ + /* + * Wait for 2 connections: one accepted, another in the accept queue, + * the one in request_sock_queue won't get fully established, so + * doesn't receive an active RST, see inet_csk_listen_stop(). + */ + err = test_wait_fds(sk, last, NULL, last, TEST_TIMEOUT_SEC); + if (err < 0) + test_error("select(): %d", err); + + for (i = 0; i < ARRAY_SIZE(sk); i++) { + socklen_t slen = sizeof(err); + + if (getsockopt(sk[i], SOL_SOCKET, SO_ERROR, &err, &slen)) + test_error("getsockopt()"); + if (is_writable[i] && err != ECONNRESET) { + test_fail("sk[%d] = %d, err = %d, connection wasn't reset", + i, sk[i], err); + } else { + test_ok("sk[%d] = %d%s", i, sk[i], + is_writable[i] ? ", connection was reset" : ""); + } + } + synchronize_threads(); /* 6: counters checks */ +} + +static void test_client_passive_rst(unsigned int port) +{ + struct tcp_ao_counters ao1, ao2; + struct tcp_ao_repair ao_img; + struct tcp_sock_state img; + sockaddr_af saddr; + int sk, err; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_connect_socket(sk, this_ip_dest, port) <= 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 2: accepted => send data */ + if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) + test_fail("Failed to send data on connected socket"); + else + test_ok("Verified established tcp connection"); + + synchronize_threads(); /* 3: checkpoint the client */ + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &saddr); + test_ao_checkpoint(sk, &ao_img); + test_disable_repair(sk); + + synchronize_threads(); /* 4: close the server, creating twsk */ + + /* + * The "corruption" in SEQ has to be small enough to fit into TCP + * window, see tcp_timewait_state_process() for out-of-window + * segments. + */ + img.out.seq += 5; /* 5 is more noticeable in tcpdump than 1 */ + + /* + * FIXME: This is kind-of ugly and dirty, but it works. + * + * At this moment, the server has close'ed(sk). + * The passive RST that is being targeted here is new data after + * half-duplex close, see tcp_timewait_state_process() => TCP_TW_RST + * + * What is needed here is: + * (1) wait for FIN from the server + * (2) make sure that the ACK from the client went out + * (3) make sure that the ACK was received and processed by the server + * + * Otherwise, the data that will be sent from "repaired" socket + * post SEQ corruption may get to the server before it's in + * TCP_FIN_WAIT2. + * + * (1) is easy with select()/poll() + * (2) is possible by polling tcpi_state from TCP_INFO + * (3) is quite complex: as server's socket was already closed, + * probably the way to do it would be tcp-diag. + */ + sleep(TEST_RETRANSMIT_SEC); + + synchronize_threads(); /* 5: restore the socket, send more data */ + test_kill_sk(sk); + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + test_sock_restore(sk, &img, &saddr, this_ip_dest, port); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + test_ao_restore(sk, &ao_img); + + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + test_disable_repair(sk); + test_sock_state_free(&img); + + /* + * This is how "passive reset" is acquired in this test from TCP_TW_RST: + * + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [P.], seq 901:1001, ack 1001, win 249, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x10217d6c36a22379086ef3b1], length 100 + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [F.], seq 1001, ack 1001, win 249, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x104ffc99b98c10a5298cc268], length 0 + * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [.], ack 1002, win 251, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0xe496dd4f7f5a8a66873c6f93,nop,nop,sack 1 {1001:1002}], length 0 + * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [P.], seq 1006:1106, ack 1001, win 251, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x1b5f3330fb23fbcd0c77d0ca], length 100 + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0 + */ + err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC); + /* Make sure that the connection was reset, not timeouted */ + if (err && err == -ECONNRESET) + test_ok("client sock was passively reset post-seq-adjust"); + else if (err) + test_fail("client sock was not reset post-seq-adjust: %d", err); + else + test_fail("client sock is yet connected post-seq-adjust"); + + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 6: server exits */ + close(sk); + test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD); +} + +static void *client_fn(void *arg) +{ + struct netstat *ns_before, *ns_after; + unsigned int port = test_server_port; + + ns_before = netstat_read(); + + test_client_active_rst(port++); + test_client_passive_rst(port++); + + ns_after = netstat_read(); + netstats_check(ns_before, ns_after, "client"); + netstat_free(ns_after); + netstat_free(ns_before); + + synchronize_threads(); /* exit */ + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(14, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c new file mode 100644 index 0000000000..e154d9e198 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/self-connect.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "aolib.h" + +static union tcp_addr local_addr; + +static void __setup_lo_intf(const char *lo_intf, + const char *addr_str, uint8_t prefix) +{ + if (inet_pton(TEST_FAMILY, addr_str, &local_addr) != 1) + test_error("Can't convert local ip address"); + + if (ip_addr_add(lo_intf, TEST_FAMILY, local_addr, prefix)) + test_error("Failed to add %s ip address", lo_intf); + + if (link_set_up(lo_intf)) + test_error("Failed to bring %s up", lo_intf); +} + +static void setup_lo_intf(const char *lo_intf) +{ +#ifdef IPV6_TEST + __setup_lo_intf(lo_intf, "::1", 128); +#else + __setup_lo_intf(lo_intf, "127.0.0.1", 8); +#endif +} + +static void tcp_self_connect(const char *tst, unsigned int port, + bool different_keyids, bool check_restore) +{ + uint64_t before_challenge_ack, after_challenge_ack; + uint64_t before_syn_challenge, after_syn_challenge; + struct tcp_ao_counters before_ao, after_ao; + uint64_t before_aogood, after_aogood; + struct netstat *ns_before, *ns_after; + const size_t nr_packets = 20; + struct tcp_ao_repair ao_img; + struct tcp_sock_state img; + sockaddr_af addr; + int sk; + + tcp_addr_to_sockaddr_in(&addr, &local_addr, htons(port)); + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (different_keyids) { + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 5, 7)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 7, 5)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } else { + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + + if (bind(sk, (struct sockaddr *)&addr, sizeof(addr)) < 0) + test_error("bind()"); + + ns_before = netstat_read(); + before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); + before_challenge_ack = netstat_get(ns_before, "TCPChallengeACK", NULL); + before_syn_challenge = netstat_get(ns_before, "TCPSYNChallenge", NULL); + if (test_get_tcp_ao_counters(sk, &before_ao)) + test_error("test_get_tcp_ao_counters()"); + + if (__test_connect_socket(sk, "lo", (struct sockaddr *)&addr, + sizeof(addr), TEST_TIMEOUT_SEC) < 0) { + ns_after = netstat_read(); + netstat_print_diff(ns_before, ns_after); + test_error("failed to connect()"); + } + + if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("%s: tcp connection verify failed", tst); + close(sk); + return; + } + + ns_after = netstat_read(); + after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); + after_challenge_ack = netstat_get(ns_after, "TCPChallengeACK", NULL); + after_syn_challenge = netstat_get(ns_after, "TCPSYNChallenge", NULL); + if (test_get_tcp_ao_counters(sk, &after_ao)) + test_error("test_get_tcp_ao_counters()"); + if (!check_restore) { + /* to debug: netstat_print_diff(ns_before, ns_after); */ + netstat_free(ns_before); + } + netstat_free(ns_after); + + if (after_aogood <= before_aogood) { + test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + tst, after_aogood, before_aogood); + close(sk); + return; + } + if (after_challenge_ack <= before_challenge_ack || + after_syn_challenge <= before_syn_challenge) { + /* + * It's also meant to test simultaneous open, so check + * these counters as well. + */ + test_fail("%s: Didn't challenge SYN or ACK: %zu <= %zu OR %zu <= %zu", + tst, after_challenge_ack, before_challenge_ack, + after_syn_challenge, before_syn_challenge); + close(sk); + return; + } + + if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) { + close(sk); + return; + } + + if (!check_restore) { + test_ok("%s: connect TCPAOGood %" PRIu64 " => %" PRIu64, + tst, before_aogood, after_aogood); + close(sk); + return; + } + + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &addr); +#ifdef IPV6_TEST + addr.sin6_port = htons(port + 1); +#else + addr.sin_port = htons(port + 1); +#endif + test_ao_checkpoint(sk, &ao_img); + test_kill_sk(sk); + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + __test_sock_restore(sk, "lo", &img, &addr, &addr, sizeof(addr)); + if (different_keyids) { + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, + local_addr, -1, 7, 5)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, + local_addr, -1, 5, 7)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } else { + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, + local_addr, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + test_ao_restore(sk, &ao_img); + test_disable_repair(sk); + test_sock_state_free(&img); + if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("%s: tcp connection verify failed", tst); + close(sk); + return; + } + ns_after = netstat_read(); + after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); + /* to debug: netstat_print_diff(ns_before, ns_after); */ + netstat_free(ns_before); + netstat_free(ns_after); + close(sk); + if (after_aogood <= before_aogood) { + test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + tst, after_aogood, before_aogood); + return; + } + test_ok("%s: connect TCPAOGood %" PRIu64 " => %" PRIu64, + tst, before_aogood, after_aogood); +} + +static void *client_fn(void *arg) +{ + unsigned int port = test_server_port; + + setup_lo_intf("lo"); + + tcp_self_connect("self-connect(same keyids)", port++, false, false); + tcp_self_connect("self-connect(different keyids)", port++, true, false); + tcp_self_connect("self-connect(restore)", port, false, true); + port += 2; + tcp_self_connect("self-connect(restore, different keyids)", port, true, true); + port += 2; + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(4, client_fn, NULL); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c new file mode 100644 index 0000000000..ad4e77d682 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Check that after SEQ number wrap-around: + * 1. SEQ-extension has upper bytes set + * 2. TCP conneciton is alive and no TCPAOBad segments + * In order to test (2), the test doesn't just adjust seq number for a queue + * on a connected socket, but migrates it to another sk+port number, so + * that there won't be any delayed packets that will fail to verify + * with the new SEQ numbers. + */ +#include <inttypes.h> +#include "aolib.h" + +const unsigned int nr_packets = 1000; +const unsigned int msg_len = 1000; +const unsigned int quota = nr_packets * msg_len; +unsigned int client_new_port; + +/* Move them closer to roll-over */ +static void test_adjust_seqs(struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img, + bool server) +{ + uint32_t new_seq1, new_seq2; + + /* make them roll-over during quota, but on different segments */ + if (server) { + new_seq1 = ((uint32_t)-1) - msg_len; + new_seq2 = ((uint32_t)-1) - (quota - 2 * msg_len); + } else { + new_seq1 = ((uint32_t)-1) - (quota - 2 * msg_len); + new_seq2 = ((uint32_t)-1) - msg_len; + } + + img->in.seq = new_seq1; + img->trw.snd_wl1 = img->in.seq - msg_len; + img->out.seq = new_seq2; + img->trw.rcv_wup = img->in.seq; +} + +static int test_sk_restore(struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img, sockaddr_af *saddr, + const union tcp_addr daddr, unsigned int dport, + struct tcp_ao_counters *cnt) +{ + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + test_sock_restore(sk, img, saddr, daddr, dport); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, daddr, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + test_ao_restore(sk, ao_img); + + if (test_get_tcp_ao_counters(sk, cnt)) + test_error("test_get_tcp_ao_counters()"); + + test_disable_repair(sk); + test_sock_state_free(img); + return sk; +} + +static void *server_fn(void *arg) +{ + uint64_t before_good, after_good, after_bad; + struct tcp_ao_counters ao1, ao2; + struct tcp_sock_state img; + struct tcp_ao_repair ao_img; + sockaddr_af saddr; + ssize_t bytes; + int sk, lsk; + + lsk = test_listen_socket(this_ip_addr, test_server_port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: accepted => send data */ + close(lsk); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + if (bytes > 0) + test_fail("server served: %zd", bytes); + else + test_fail("server returned: %zd", bytes); + goto out; + } + + before_good = netstat_get_one("TCPAOGood", NULL); + + synchronize_threads(); /* 3: restore the connection on another port */ + + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &saddr); + test_ao_checkpoint(sk, &ao_img); + test_kill_sk(sk); +#ifdef IPV6_TEST + saddr.sin6_port = htons(ntohs(saddr.sin6_port) + 1); +#else + saddr.sin_port = htons(ntohs(saddr.sin_port) + 1); +#endif + test_adjust_seqs(&img, &ao_img, true); + synchronize_threads(); /* 4: dump finished */ + sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, + client_new_port, &ao1); + + synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + if (bytes > 0) + test_fail("server served: %zd", bytes); + else + test_fail("server returned: %zd", bytes); + } else { + test_ok("server alive"); + } + + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_good = netstat_get_one("TCPAOGood", NULL); + + test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + + if (after_good <= before_good) { + test_fail("TCPAOGood counter did not increase: %zu <= %zu", + after_good, before_good); + } else { + test_ok("TCPAOGood counter increased %zu => %zu", + before_good, after_good); + } + after_bad = netstat_get_one("TCPAOBad", NULL); + if (after_bad) + test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + else + test_ok("TCPAOBad counter didn't increase"); + test_enable_repair(sk); + test_ao_checkpoint(sk, &ao_img); + if (ao_img.snd_sne && ao_img.rcv_sne) { + test_ok("SEQ extension incremented: %u/%u", + ao_img.snd_sne, ao_img.rcv_sne); + } else { + test_fail("SEQ extension was not incremented: %u/%u", + ao_img.snd_sne, ao_img.rcv_sne); + } + + synchronize_threads(); /* 6: verified => closed */ +out: + close(sk); + return NULL; +} + +static void *client_fn(void *arg) +{ + uint64_t before_good, after_good, after_bad; + struct tcp_ao_counters ao1, ao2; + struct tcp_sock_state img; + struct tcp_ao_repair ao_img; + sockaddr_af saddr; + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 2: accepted => send data */ + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("pre-migrate verify failed"); + return NULL; + } + + before_good = netstat_get_one("TCPAOGood", NULL); + + synchronize_threads(); /* 3: restore the connection on another port */ + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &saddr); + test_ao_checkpoint(sk, &ao_img); + test_kill_sk(sk); +#ifdef IPV6_TEST + client_new_port = ntohs(saddr.sin6_port) + 1; + saddr.sin6_port = htons(ntohs(saddr.sin6_port) + 1); +#else + client_new_port = ntohs(saddr.sin_port) + 1; + saddr.sin_port = htons(ntohs(saddr.sin_port) + 1); +#endif + test_adjust_seqs(&img, &ao_img, false); + synchronize_threads(); /* 4: dump finished */ + sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, + test_server_port + 1, &ao1); + + synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("post-migrate verify failed"); + else + test_ok("post-migrate connection alive"); + + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_good = netstat_get_one("TCPAOGood", NULL); + + test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + + if (after_good <= before_good) { + test_fail("TCPAOGood counter did not increase: %zu <= %zu", + after_good, before_good); + } else { + test_ok("TCPAOGood counter increased %zu => %zu", + before_good, after_good); + } + after_bad = netstat_get_one("TCPAOBad", NULL); + if (after_bad) + test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + else + test_ok("TCPAOBad counter didn't increase"); + + synchronize_threads(); /* 6: verified => closed */ + close(sk); + + synchronize_threads(); /* don't race to exit: let server exit() */ + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(7, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c new file mode 100644 index 0000000000..517930f972 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c @@ -0,0 +1,835 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +static union tcp_addr tcp_md5_client; + +static int test_port = 7788; +static void make_listen(int sk) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &this_ip_addr, htons(test_port++)); + if (bind(sk, (struct sockaddr *)&addr, sizeof(addr)) < 0) + test_error("bind()"); + if (listen(sk, 1)) + test_error("listen()"); +} + +static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info, + const char *tst) +{ + struct tcp_ao_info_opt tmp = {}; + socklen_t len = sizeof(tmp); + + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, &tmp, &len)) + test_error("getsockopt(TCP_AO_INFO) failed"); + +#define __cmp_ao(member) \ +do { \ + if (info->member != tmp.member) { \ + test_fail("%s: getsockopt(): " __stringify(member) " %zu != %zu", \ + tst, (size_t)info->member, (size_t)tmp.member); \ + return; \ + } \ +} while(0) + if (info->set_current) + __cmp_ao(current_key); + if (info->set_rnext) + __cmp_ao(rnext); + if (info->set_counters) { + __cmp_ao(pkt_good); + __cmp_ao(pkt_bad); + __cmp_ao(pkt_key_not_found); + __cmp_ao(pkt_ao_required); + __cmp_ao(pkt_dropped_icmp); + } + __cmp_ao(ao_required); + __cmp_ao(accept_icmps); + + test_ok("AO info get: %s", tst); +#undef __cmp_ao +} + +static void __setsockopt_checked(int sk, int optname, bool get, + void *optval, socklen_t *len, + int err, const char *tst, const char *tst2) +{ + int ret; + + if (!tst) + tst = ""; + if (!tst2) + tst2 = ""; + + errno = 0; + if (get) + ret = getsockopt(sk, IPPROTO_TCP, optname, optval, len); + else + ret = setsockopt(sk, IPPROTO_TCP, optname, optval, *len); + if (ret == -1) { + if (errno == err) + test_ok("%s%s", tst ?: "", tst2 ?: ""); + else + test_fail("%s%s: %setsockopt() failed", + tst, tst2, get ? "g" : "s"); + close(sk); + return; + } + + if (err) { + test_fail("%s%s: %setsockopt() was expected to fail with %d", + tst, tst2, get ? "g" : "s", err); + } else { + test_ok("%s%s", tst ?: "", tst2 ?: ""); + if (optname == TCP_AO_ADD_KEY) { + test_verify_socket_key(sk, optval); + } else if (optname == TCP_AO_INFO && !get) { + test_vefify_ao_info(sk, optval, tst2); + } else if (optname == TCP_AO_GET_KEYS) { + if (*len != sizeof(struct tcp_ao_getsockopt)) + test_fail("%s%s: get keys returned wrong tcp_ao_getsockopt size", + tst, tst2); + } + } + close(sk); +} + +static void setsockopt_checked(int sk, int optname, void *optval, + int err, const char *tst) +{ + const char *cmd = NULL; + socklen_t len; + + switch (optname) { + case TCP_AO_ADD_KEY: + cmd = "key add: "; + len = sizeof(struct tcp_ao_add); + break; + case TCP_AO_DEL_KEY: + cmd = "key del: "; + len = sizeof(struct tcp_ao_del); + break; + case TCP_AO_INFO: + cmd = "AO info set: "; + len = sizeof(struct tcp_ao_info_opt); + break; + default: + break; + } + + __setsockopt_checked(sk, optname, false, optval, &len, err, cmd, tst); +} + +static int prepare_defs(int cmd, void *optval) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + switch (cmd) { + case TCP_AO_ADD_KEY: { + struct tcp_ao_add *add = optval; + + if (test_prepare_def_key(add, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, + -1, 0, 100, 100)) + test_error("prepare default tcp_ao_add"); + break; + } + case TCP_AO_DEL_KEY: { + struct tcp_ao_del *del = optval; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("add default key"); + memset(del, 0, sizeof(struct tcp_ao_del)); + del->sndid = 100; + del->rcvid = 100; + del->prefix = DEFAULT_TEST_PREFIX; + tcp_addr_to_sockaddr_in(&del->addr, &this_ip_dest, 0); + break; + } + case TCP_AO_INFO: { + struct tcp_ao_info_opt *info = optval; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("add default key"); + memset(info, 0, sizeof(struct tcp_ao_info_opt)); + break; + } + case TCP_AO_GET_KEYS: { + struct tcp_ao_getsockopt *get = optval; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("add default key"); + memset(get, 0, sizeof(struct tcp_ao_getsockopt)); + get->nkeys = 1; + get->get_all = 1; + break; + } + default: + test_error("unknown cmd"); + } + + return sk; +} + +static void test_extend(int cmd, bool get, const char *tst, socklen_t under_size) +{ + struct { + union { + struct tcp_ao_add add; + struct tcp_ao_del del; + struct tcp_ao_getsockopt get; + struct tcp_ao_info_opt info; + }; + char *extend[100]; + } tmp_opt; + socklen_t extended_size = sizeof(tmp_opt); + int sk; + + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, &tmp_opt, &under_size, + EINVAL, tst, ": minimum size"); + + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, &tmp_opt, &extended_size, + 0, tst, ": extended size"); + + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, NULL, &extended_size, + EFAULT, tst, ": null optval"); + + if (get) { + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, &tmp_opt, NULL, + EFAULT, tst, ": null optlen"); + } +} + +static void extend_tests(void) +{ + test_extend(TCP_AO_ADD_KEY, false, "AO add", + offsetof(struct tcp_ao_add, key)); + test_extend(TCP_AO_DEL_KEY, false, "AO del", + offsetof(struct tcp_ao_del, keyflags)); + test_extend(TCP_AO_INFO, false, "AO set info", + offsetof(struct tcp_ao_info_opt, pkt_dropped_icmp)); + test_extend(TCP_AO_INFO, true, "AO get info", -1); + test_extend(TCP_AO_GET_KEYS, true, "AO get keys", -1); +} + +static void test_optmem_limit(void) +{ + size_t i, keys_limit, current_optmem = test_get_optmem(); + struct tcp_ao_add ao; + union tcp_addr net = {}; + int sk; + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &net) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + keys_limit = current_optmem / KERNEL_TCP_AO_KEY_SZ_ROUND_UP; + for (i = 0;; i++) { + union tcp_addr key_peer; + int err; + + key_peer = gen_tcp_addr(net, i + 1); + tcp_addr_to_sockaddr_in(&ao.addr, &key_peer, 0); + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, + &ao, sizeof(ao)); + if (!err) { + /* + * TCP_AO_ADD_KEY should be the same order as the real + * sizeof(struct tcp_ao_key) in kernel. + */ + if (i <= keys_limit * 10) + continue; + test_fail("optmem limit test failed: added %zu key", i); + break; + } + if (i < keys_limit) { + test_fail("optmem limit test failed: couldn't add %zu key", i); + break; + } + test_ok("optmem limit was hit on adding %zu key", i); + break; + } + close(sk); +} + +static void test_einval_add_key(void) +{ + struct tcp_ao_add ao; + int sk; + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.keylen = TCP_AO_MAXKEYLEN + 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too big keylen"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.reserved = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "using reserved padding"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.reserved2 = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "using reserved2 padding"); + + /* tcp_ao_verify_ipv{4,6}() checks */ + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.addr.ss_family = AF_UNIX; + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "wrong address family"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + tcp_addr_to_sockaddr_in(&ao.addr, &this_ip_dest, 1234); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "port (unsupported)"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 0; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "no prefix, addr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 0; + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "no prefix, any addr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 32; + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "prefix, any addr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 129; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too big prefix"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 2; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too short prefix"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.keyflags = (uint8_t)(-1); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "bad key flags"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + make_listen(sk); + ao.set_current = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add current key on a listen socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + make_listen(sk); + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + make_listen(sk); + ao.set_current = 1; + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add current+rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.set_current = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as current"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as rnext"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.set_current = 1; + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as current+rnext"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.ifindex = 42; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, + "ifindex without TCP_AO_KEYF_IFNINDEX"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.keyflags |= TCP_AO_KEYF_IFINDEX; + ao.ifindex = 42; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "non-existent VRF"); + /* + * tcp_md5_do_lookup{,_any_l3index}() are checked in unsigned-md5 + * see client_vrf_tests(). + */ + + test_optmem_limit(); + + /* tcp_ao_parse_crypto() */ + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.maclen = 100; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EMSGSIZE, "maclen bigger than TCP hdr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + strcpy(ao.alg_name, "imaginary hash algo"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, ENOENT, "bad algo"); +} + +static void test_einval_del_key(void) +{ + struct tcp_ao_del del; + int sk; + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.reserved = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "using reserved padding"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.reserved2 = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "using reserved2 padding"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + make_listen(sk); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set current key on a listen socket"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + make_listen(sk); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + make_listen(sk); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set current+rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.keyflags = (uint8_t)(-1); + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "bad key flags"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.ifindex = 42; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, + "ifindex without TCP_AO_KEYF_IFNINDEX"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.keyflags |= TCP_AO_KEYF_IFINDEX; + del.ifindex = 42; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existent VRF"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing current key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing current+rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set current key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set current+rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + del.current_key = 100; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as current key to be removed"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_rnext = 1; + del.rnext = 100; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as rnext key to be removed"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + del.current_key = 100; + del.set_rnext = 1; + del.rnext = 100; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as current+rnext key to be removed"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.del_async = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "async on non-listen"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.sndid = 101; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existing sndid"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.rcvid = 101; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existing rcvid"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + tcp_addr_to_sockaddr_in(&del.addr, &this_ip_addr, 0); + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "incorrect addr"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "correct key delete"); +} + +static void test_einval_ao_info(void) +{ + struct tcp_ao_info_opt info; + int sk; + + sk = prepare_defs(TCP_AO_INFO, &info); + make_listen(sk); + info.set_current = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set current key on a listen socket"); + + sk = prepare_defs(TCP_AO_INFO, &info); + make_listen(sk); + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_INFO, &info); + make_listen(sk); + info.set_current = 1; + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set current+rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.reserved = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "using reserved padding"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.reserved2 = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "using reserved2 padding"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.accept_icmps = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "accept_icmps"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.ao_required = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "ao required"); + + if (!should_skip_test("ao required with MD5 key", KCONFIG_TCP_MD5)) { + sk = prepare_defs(TCP_AO_INFO, &info); + info.ao_required = 1; + if (test_set_md5(sk, tcp_md5_client, TEST_PREFIX, -1, + "long long secret")) { + test_error("setsockopt(TCP_MD5SIG_EXT)"); + close(sk); + } else { + setsockopt_checked(sk, TCP_AO_INFO, &info, EKEYREJECTED, + "ao required with MD5 key"); + } + } + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing current key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing current+rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + info.current_key = 100; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set current key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_rnext = 1; + info.rnext = 100; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + info.set_rnext = 1; + info.current_key = 100; + info.rnext = 100; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set current+rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_counters = 1; + info.pkt_good = 321; + info.pkt_bad = 888; + info.pkt_key_not_found = 654; + info.pkt_ao_required = 987654; + info.pkt_dropped_icmp = 10000; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set counters"); + + sk = prepare_defs(TCP_AO_INFO, &info); + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "no-op"); +} + +static void getsockopt_checked(int sk, struct tcp_ao_getsockopt *optval, + int err, const char *tst) +{ + socklen_t len = sizeof(struct tcp_ao_getsockopt); + + __setsockopt_checked(sk, TCP_AO_GET_KEYS, true, optval, &len, err, + "get keys: ", tst); +} + +static void test_einval_get_keys(void) +{ + struct tcp_ao_getsockopt out; + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + getsockopt_checked(sk, &out, ENOENT, "no ao_info"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + getsockopt_checked(sk, &out, 0, "proper tcp_ao_get_mkts()"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.pkt_good = 643; + getsockopt_checked(sk, &out, EINVAL, "set out-only pkt_good counter"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.pkt_bad = 94; + getsockopt_checked(sk, &out, EINVAL, "set out-only pkt_bad counter"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.keyflags = (uint8_t)(-1); + getsockopt_checked(sk, &out, EINVAL, "bad keyflags"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.ifindex = 42; + getsockopt_checked(sk, &out, EINVAL, + "ifindex without TCP_AO_KEYF_IFNINDEX"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.reserved = 1; + getsockopt_checked(sk, &out, EINVAL, "using reserved field"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 0; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "no prefix, addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 0; + memcpy(&out.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + getsockopt_checked(sk, &out, 0, "no prefix, any addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 32; + memcpy(&out.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + getsockopt_checked(sk, &out, EINVAL, "prefix, any addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 129; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "too big prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 2; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "too short prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = DEFAULT_TEST_PREFIX; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, 0, "prefix + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.prefix = DEFAULT_TEST_PREFIX; + getsockopt_checked(sk, &out, EINVAL, "get_all + prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "get_all + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.sndid = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + sndid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.rcvid = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + rcvid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.prefix = DEFAULT_TEST_PREFIX; + getsockopt_checked(sk, &out, EINVAL, "current + prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "current + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.sndid = 1; + getsockopt_checked(sk, &out, EINVAL, "current + sndid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.rcvid = 1; + getsockopt_checked(sk, &out, EINVAL, "current + rcvid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + out.prefix = DEFAULT_TEST_PREFIX; + getsockopt_checked(sk, &out, EINVAL, "rnext + prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "rnext + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + out.sndid = 1; + getsockopt_checked(sk, &out, EINVAL, "rnext + sndid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + out.rcvid = 1; + getsockopt_checked(sk, &out, EINVAL, "rnext + rcvid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.is_current = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + current"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.is_rnext = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + rnext"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.is_rnext = 1; + getsockopt_checked(sk, &out, 0, "current + rnext"); +} + +static void einval_tests(void) +{ + test_einval_add_key(); + test_einval_del_key(); + test_einval_ao_info(); + test_einval_get_keys(); +} + +static void duplicate_tests(void) +{ + union tcp_addr network_dup; + struct tcp_ao_add ao, ao2; + int sk; + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: full copy"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao2 = ao; + memcpy(&ao2.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + ao2.prefix = 0; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao2, sizeof(ao))) + test_error("setsockopt()"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: any addr key on the socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + ao.prefix = 0; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: add any addr key"); + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_dup) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + if (test_prepare_def_key(&ao, "password", 0, network_dup, + 16, 0, 100, 100)) + test_error("prepare default tcp_ao_add"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: add any addr for the same subnet"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: full copy of a key"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + ao.rcvid = 101; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: RecvID differs"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + ao.sndid = 101; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: SendID differs"); +} + +static void *client_fn(void *arg) +{ + if (inet_pton(TEST_FAMILY, __TEST_CLIENT_IP(2), &tcp_md5_client) != 1) + test_error("Can't convert ip address"); + extend_tests(); + einval_tests(); + duplicate_tests(); + /* + * TODO: check getsockopt(TCP_AO_GET_KEYS) with different filters + * returning proper nr & keys; + */ + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(120, client_fn, NULL); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/settings b/tools/testing/selftests/net/tcp_ao/settings new file mode 100644 index 0000000000..6091b45d22 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/settings @@ -0,0 +1 @@ +timeout=120 diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c new file mode 100644 index 0000000000..6b59a65215 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c @@ -0,0 +1,741 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "aolib.h" + +#define fault(type) (inj == FAULT_ ## type) +static const char *md5_password = "Some evil genius, enemy to mankind, must have been the first contriver."; +static const char *ao_password = DEFAULT_TEST_PASSWORD; + +static union tcp_addr client2; +static union tcp_addr client3; + +static const int test_vrf_ifindex = 200; +static const uint8_t test_vrf_tabid = 42; +static void setup_vrfs(void) +{ + int err; + + if (!kernel_config_has(KCONFIG_NET_VRF)) + return; + + err = add_vrf("ksft-vrf", test_vrf_tabid, test_vrf_ifindex, -1); + if (err) + test_error("Failed to add a VRF: %d", err); + + err = link_set_up("ksft-vrf"); + if (err) + test_error("Failed to bring up a VRF"); + + err = ip_route_add_vrf(veth_name, TEST_FAMILY, + this_ip_addr, this_ip_dest, test_vrf_tabid); + if (err) + test_error("Failed to add a route to VRF: %d", err); +} + +static void try_accept(const char *tst_name, unsigned int port, + union tcp_addr *md5_addr, uint8_t md5_prefix, + union tcp_addr *ao_addr, uint8_t ao_prefix, + bool set_ao_required, + uint8_t sndid, uint8_t rcvid, uint8_t vrf, + const char *cnt_name, test_cnt cnt_expected, + int needs_tcp_md5, fault_t inj) +{ + struct tcp_ao_counters ao_cnt1, ao_cnt2; + uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ + int lsk, err, sk = 0; + time_t timeout; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (md5_addr && test_set_md5(lsk, *md5_addr, md5_prefix, -1, md5_password)) + test_error("setsockopt(TCP_MD5SIG_EXT)"); + + if (ao_addr && test_add_key(lsk, ao_password, + *ao_addr, ao_prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + if (set_ao_required && test_set_ao_flags(lsk, true, false)) + test_error("setsockopt(TCP_AO_INFO)"); + + if (cnt_name) + before_cnt = netstat_get_one(cnt_name, NULL); + if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + err = test_wait_fd(lsk, timeout, 0); + if (err == -ETIMEDOUT) { + if (!fault(TIMEOUT)) + test_fail("timed out for accept()"); + } else if (err < 0) { + test_error("test_wait_fd()"); + } else { + if (fault(TIMEOUT)) + test_fail("ready to accept"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) { + test_error("accept()"); + } else { + if (fault(TIMEOUT)) + test_fail("%s: accepted", tst_name); + } + } + + if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + close(lsk); + + if (!cnt_name) { + test_ok("%s: no counter checks", tst_name); + goto out; + } + + after_cnt = netstat_get_one(cnt_name, NULL); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + if (ao_addr) + test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + +out: + synchronize_threads(); /* test_kill_sk() */ + if (sk > 0) + test_kill_sk(sk); +} + +static void server_add_routes(void) +{ + int family = TEST_FAMILY; + + synchronize_threads(); /* client_add_ips() */ + + if (ip_route_add(veth_name, family, this_ip_addr, client2)) + test_error("Failed to add route"); + if (ip_route_add(veth_name, family, this_ip_addr, client3)) + test_error("Failed to add route"); +} + +static void server_add_fail_tests(unsigned int *port) +{ + union tcp_addr addr_any = {}; + + try_accept("TCP-AO established: add TCP-MD5 key", (*port)++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, + 1, 0); + try_accept("TCP-MD5 established: add TCP-AO key", (*port)++, &addr_any, + 0, NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0); + try_accept("non-signed established: add TCP-AO key", (*port)++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0); +} + +static void server_vrf_tests(unsigned int *port) +{ + setup_vrfs(); +} + +static void *server_fn(void *arg) +{ + unsigned int port = test_server_port; + union tcp_addr addr_any = {}; + + server_add_routes(); + + try_accept("AO server (INADDR_ANY): AO client", port++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPAOGood", + TEST_CNT_GOOD, 0, 0); + try_accept("AO server (INADDR_ANY): MD5 client", port++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPMD5Unexpected", + 0, 1, FAULT_TIMEOUT); + try_accept("AO server (INADDR_ANY): no sign client", port++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPAORequired", + TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); + try_accept("AO server (AO_REQUIRED): AO client", port++, NULL, 0, + &this_ip_dest, TEST_PREFIX, true, + 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0); + try_accept("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, + &this_ip_dest, TEST_PREFIX, true, + 100, 100, 0, "TCPAORequired", + TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); + + try_accept("MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0, + NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", + 0, 1, FAULT_TIMEOUT); + try_accept("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0); + try_accept("MD5 server (INADDR_ANY): no sign client", port++, &addr_any, + 0, NULL, 0, 0, 0, 0, 0, "TCPMD5NotFound", + 0, 1, FAULT_TIMEOUT); + + try_accept("no sign server: AO client", port++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", + TEST_CNT_AO_KEY_NOT_FOUND, 0, FAULT_TIMEOUT); + try_accept("no sign server: MD5 client", port++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "TCPMD5Unexpected", + 0, 1, FAULT_TIMEOUT); + try_accept("no sign server: no sign client", port++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0); + + try_accept("AO+MD5 server: AO client (matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 1, 0); + try_accept("AO+MD5 server: AO client (misconfig, matching MD5)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, + 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: AO client (misconfig, non-matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, + 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: MD5 client (matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, NULL, 0, 1, 0); + try_accept("AO+MD5 server: MD5 client (misconfig, matching AO)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: MD5 client (misconfig, non-matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: no sign client (unmatched)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "CurrEstab", 0, 1, 0); + try_accept("AO+MD5 server: no sign client (misconfig, matching AO)", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAORequired", + TEST_CNT_AO_REQUIRED, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: no sign client (misconfig, matching MD5)", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPMD5NotFound", 0, 1, FAULT_TIMEOUT); + + try_accept("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); + + server_add_fail_tests(&port); + + server_vrf_tests(&port); + + /* client exits */ + synchronize_threads(); + return NULL; +} + +static int client_bind(int sk, union tcp_addr bind_addr) +{ +#ifdef IPV6_TEST + struct sockaddr_in6 addr = { + .sin6_family = AF_INET6, + .sin6_port = 0, + .sin6_addr = bind_addr.a6, + }; +#else + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = 0, + .sin_addr = bind_addr.a4, + }; +#endif + return bind(sk, &addr, sizeof(addr)); +} + +static void try_connect(const char *tst_name, unsigned int port, + union tcp_addr *md5_addr, uint8_t md5_prefix, + union tcp_addr *ao_addr, uint8_t ao_prefix, + uint8_t sndid, uint8_t rcvid, uint8_t vrf, + fault_t inj, int needs_tcp_md5, union tcp_addr *bind_addr) +{ + time_t timeout; + int sk, ret; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (bind_addr && client_bind(sk, *bind_addr)) + test_error("bind()"); + + if (md5_addr && test_set_md5(sk, *md5_addr, md5_prefix, -1, md5_password)) + test_error("setsockopt(TCP_MD5SIG_EXT)"); + + if (ao_addr && test_add_key(sk, ao_password, *ao_addr, + ao_prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + + if (ret < 0) { + if (fault(KEYREJECT) && ret == -EKEYREJECTED) + test_ok("%s: connect() was prevented", tst_name); + else if (ret == -ETIMEDOUT && fault(TIMEOUT)) + test_ok("%s", tst_name); + else if (ret == -ECONNREFUSED && + (fault(TIMEOUT) || fault(KEYREJECT))) + test_ok("%s: refused to connect", tst_name); + else + test_error("%s: connect() returned %d", tst_name, ret); + goto out; + } + + if (fault(TIMEOUT) || fault(KEYREJECT)) + test_fail("%s: connected", tst_name); + else + test_ok("%s: connected", tst_name); + +out: + synchronize_threads(); /* test_kill_sk() */ + /* _test_connect_socket() cleans up on failure */ + if (ret > 0) + test_kill_sk(sk); +} + +#define PREINSTALL_MD5_FIRST BIT(0) +#define PREINSTALL_AO BIT(1) +#define POSTINSTALL_AO BIT(2) +#define PREINSTALL_MD5 BIT(3) +#define POSTINSTALL_MD5 BIT(4) + +static int try_add_key_vrf(int sk, union tcp_addr in_addr, uint8_t prefix, + int vrf, uint8_t sndid, uint8_t rcvid, + bool set_ao_required) +{ + uint8_t keyflags = 0; + + if (vrf >= 0) + keyflags |= TCP_AO_KEYF_IFINDEX; + else + vrf = 0; + if (set_ao_required) { + int err = test_set_ao_flags(sk, true, 0); + + if (err) + return err; + } + return test_add_key_vrf(sk, ao_password, keyflags, in_addr, prefix, + (uint8_t)vrf, sndid, rcvid); +} + +static bool test_continue(const char *tst_name, int err, + fault_t inj, bool added_ao) +{ + bool expected_to_fail; + + expected_to_fail = fault(PREINSTALL_AO) && added_ao; + expected_to_fail |= fault(PREINSTALL_MD5) && !added_ao; + + if (!err) { + if (!expected_to_fail) + return true; + test_fail("%s: setsockopt()s were expected to fail", tst_name); + return false; + } + if (err != -EKEYREJECTED || !expected_to_fail) { + test_error("%s: setsockopt(%s) = %d", tst_name, + added_ao ? "TCP_AO_ADD_KEY" : "TCP_MD5SIG_EXT", err); + return false; + } + test_ok("%s: prefailed as expected: %m", tst_name); + return false; +} + +static int open_add(const char *tst_name, unsigned int port, + unsigned int strategy, + union tcp_addr md5_addr, uint8_t md5_prefix, int md5_vrf, + union tcp_addr ao_addr, uint8_t ao_prefix, + int ao_vrf, bool set_ao_required, + uint8_t sndid, uint8_t rcvid, + fault_t inj) +{ + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (client_bind(sk, this_ip_addr)) + test_error("bind()"); + + if (strategy & PREINSTALL_MD5_FIRST) { + if (test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password)) + test_error("setsockopt(TCP_MD5SIG_EXT)"); + } + + if (strategy & PREINSTALL_AO) { + int err = try_add_key_vrf(sk, ao_addr, ao_prefix, ao_vrf, + sndid, rcvid, set_ao_required); + + if (!test_continue(tst_name, err, inj, true)) { + close(sk); + return -1; + } + } + + if (strategy & PREINSTALL_MD5) { + errno = 0; + test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password); + if (!test_continue(tst_name, -errno, inj, false)) { + close(sk); + return -1; + } + } + + return sk; +} + +static void try_to_preadd(const char *tst_name, unsigned int port, + unsigned int strategy, + union tcp_addr md5_addr, uint8_t md5_prefix, + int md5_vrf, + union tcp_addr ao_addr, uint8_t ao_prefix, + int ao_vrf, bool set_ao_required, + uint8_t sndid, uint8_t rcvid, + int needs_tcp_md5, int needs_vrf, fault_t inj) +{ + int sk; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + if (needs_vrf && should_skip_test(tst_name, KCONFIG_NET_VRF)) + return; + + sk = open_add(tst_name, port, strategy, md5_addr, md5_prefix, md5_vrf, + ao_addr, ao_prefix, ao_vrf, set_ao_required, + sndid, rcvid, inj); + if (sk < 0) + return; + + test_ok("%s", tst_name); + close(sk); +} + +static void try_to_add(const char *tst_name, unsigned int port, + unsigned int strategy, + union tcp_addr md5_addr, uint8_t md5_prefix, + int md5_vrf, + union tcp_addr ao_addr, uint8_t ao_prefix, + int ao_vrf, uint8_t sndid, uint8_t rcvid, + int needs_tcp_md5, fault_t inj) +{ + time_t timeout; + int sk, ret; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + + sk = open_add(tst_name, port, strategy, md5_addr, md5_prefix, md5_vrf, + ao_addr, ao_prefix, ao_vrf, 0, sndid, rcvid, inj); + if (sk < 0) + return; + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + + if (ret <= 0) { + test_error("%s: connect() returned %d", tst_name, ret); + goto out; + } + + if (strategy & POSTINSTALL_MD5) { + if (test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password)) { + if (fault(POSTINSTALL)) { + test_ok("%s: postfailed as expected", tst_name); + goto out; + } else { + test_error("setsockopt(TCP_MD5SIG_EXT)"); + } + } else if (fault(POSTINSTALL)) { + test_fail("%s: post setsockopt() was expected to fail", tst_name); + goto out; + } + } + + if (strategy & POSTINSTALL_AO) { + if (try_add_key_vrf(sk, ao_addr, ao_prefix, ao_vrf, + sndid, rcvid, 0)) { + if (fault(POSTINSTALL)) { + test_ok("%s: postfailed as expected", tst_name); + goto out; + } else { + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + } else if (fault(POSTINSTALL)) { + test_fail("%s: post setsockopt() was expected to fail", tst_name); + goto out; + } + } + +out: + synchronize_threads(); /* test_kill_sk() */ + /* _test_connect_socket() cleans up on failure */ + if (ret > 0) + test_kill_sk(sk); +} + +static void client_add_ip(union tcp_addr *client, const char *ip) +{ + int err, family = TEST_FAMILY; + + if (inet_pton(family, ip, client) != 1) + test_error("Can't convert ip address %s", ip); + + err = ip_addr_add(veth_name, family, *client, TEST_PREFIX); + if (err) + test_error("Failed to add ip address: %d", err); +} + +static void client_add_ips(void) +{ + client_add_ip(&client2, __TEST_CLIENT_IP(2)); + client_add_ip(&client3, __TEST_CLIENT_IP(3)); + synchronize_threads(); /* server_add_routes() */ +} + +static void client_add_fail_tests(unsigned int *port) +{ + try_to_add("TCP-AO established: add TCP-MD5 key", + (*port)++, POSTINSTALL_MD5 | PREINSTALL_AO, + this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0, + 100, 100, 1, FAULT_POSTINSTALL); + try_to_add("TCP-MD5 established: add TCP-AO key", + (*port)++, PREINSTALL_MD5 | POSTINSTALL_AO, + this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0, + 100, 100, 1, FAULT_POSTINSTALL); + try_to_add("non-signed established: add TCP-AO key", + (*port)++, POSTINSTALL_AO, + this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0, + 100, 100, 0, FAULT_POSTINSTALL); + + try_to_add("TCP-AO key intersects with existing TCP-MD5 key", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, this_ip_addr, TEST_PREFIX, -1, + 100, 100, 1, FAULT_PREINSTALL_AO); + try_to_add("TCP-MD5 key intersects with existing TCP-AO key", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, this_ip_addr, TEST_PREFIX, -1, + 100, 100, 1, FAULT_PREINSTALL_MD5); + + try_to_preadd("TCP-MD5 key + TCP-AO required", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, true, + 100, 100, 1, 0, FAULT_PREINSTALL_AO); + try_to_preadd("TCP-AO required on socket + TCP-MD5 key", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, true, + 100, 100, 1, 0, FAULT_PREINSTALL_MD5); +} + +static void client_vrf_tests(unsigned int *port) +{ + setup_vrfs(); + + /* The following restrictions for setsockopt()s are expected: + * + * |--------------|-----------------|-------------|-------------| + * | | MD5 key without | MD5 key | MD5 key | + * | | l3index | l3index=0 | l3index=N | + * |--------------|-----------------|-------------|-------------| + * | TCP-AO key | | | | + * | without | reject | reject | reject | + * | l3index | | | | + * |--------------|-----------------|-------------|-------------| + * | TCP-AO key | | | | + * | l3index=0 | reject | reject | allow | + * |--------------|-----------------|-------------|-------------| + * | TCP-AO key | | | | + * | l3index=N | reject | allow | reject | + * |--------------|-----------------|-------------|-------------| + */ + try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (no l3index)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (no l3index)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=0)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (no l3index)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=N)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (no l3index)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + + try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (no l3index)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=0)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=0)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=0)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=N)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, 0); + try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=0)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, 0); + + try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (no l3index)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=N)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=0)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, 0); + try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=N)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, 0); + try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=N)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=N)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); +} + +static void *client_fn(void *arg) +{ + unsigned int port = test_server_port; + union tcp_addr addr_any = {}; + + client_add_ips(); + + try_connect("AO server (INADDR_ANY): AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + try_connect("AO server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("AO server (INADDR_ANY): unsigned client", port++, NULL, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); + try_connect("AO server (AO_REQUIRED): AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + try_connect("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &client2); + + try_connect("MD5 server (INADDR_ANY): AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + try_connect("MD5 server (INADDR_ANY): no sign client", port++, NULL, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + + try_connect("no sign server: AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); + try_connect("no sign server: MD5 client", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("no sign server: no sign client", port++, NULL, 0, + NULL, 0, 100, 100, 0, 0, 0, &this_ip_addr); + + try_connect("AO+MD5 server: AO client (matching)", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, 0, 1, &client2); + try_connect("AO+MD5 server: AO client (misconfig, matching MD5)", + port++, NULL, 0, &addr_any, 0, 100, 100, 0, + FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("AO+MD5 server: AO client (misconfig, non-matching)", + port++, NULL, 0, &addr_any, 0, 100, 100, 0, + FAULT_TIMEOUT, 1, &client3); + try_connect("AO+MD5 server: MD5 client (matching)", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + try_connect("AO+MD5 server: MD5 client (misconfig, matching AO)", + port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &client2); + try_connect("AO+MD5 server: MD5 client (misconfig, non-matching)", + port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &client3); + try_connect("AO+MD5 server: no sign client (unmatched)", + port++, NULL, 0, NULL, 0, 100, 100, 0, 0, 1, &client3); + try_connect("AO+MD5 server: no sign client (misconfig, matching AO)", + port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &client2); + try_connect("AO+MD5 server: no sign client (misconfig, matching MD5)", + port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &this_ip_addr); + + try_connect("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", + port++, &this_ip_addr, TEST_PREFIX, + &client2, TEST_PREFIX, 100, 100, 0, FAULT_KEYREJECT, + 1, &this_ip_addr); + try_connect("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", + port++, &this_ip_addr, TEST_PREFIX, + &client2, TEST_PREFIX, 100, 100, 0, FAULT_KEYREJECT, + 1, &client2); + + client_add_fail_tests(&port); + client_vrf_tests(&port); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(72, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh index d80f2cd876..8533393a4f 100755 --- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh +++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh @@ -45,9 +45,8 @@ # | sw1 | | sw2 | # +------------------------------------+ +------------------------------------+ +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # All tests in this script. Can be overridden with -t option. TESTS=" @@ -140,9 +139,6 @@ setup_topo_ns() { local ns=$1; shift - ip netns add $ns - ip -n $ns link set dev lo up - ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1 ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0 @@ -153,21 +149,22 @@ setup_topo() { local ns - for ns in h1 h2 sw1 sw2; do + setup_ns h1 h2 sw1 sw2 + for ns in $h1 $h2 $sw1 $sw2; do setup_topo_ns $ns done ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns h1 name eth0 - ip link set dev veth1 netns sw1 name swp1 + ip link set dev veth0 netns $h1 name eth0 + ip link set dev veth1 netns $sw1 name swp1 ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns sw1 name veth0 - ip link set dev veth1 netns sw2 name veth0 + ip link set dev veth0 netns $sw1 name veth0 + ip link set dev veth1 netns $sw2 name veth0 ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns h2 name eth0 - ip link set dev veth1 netns sw2 name swp1 + ip link set dev veth0 netns $h2 name eth0 + ip link set dev veth1 netns $sw2 name swp1 } setup_host_common() @@ -190,7 +187,7 @@ setup_host_common() setup_h1() { - local ns=h1 + local ns=$h1 local v4addr1=192.0.2.1/28 local v4addr2=192.0.2.17/28 local v6addr1=2001:db8:1::1/64 @@ -201,7 +198,7 @@ setup_h1() setup_h2() { - local ns=h2 + local ns=$h2 local v4addr1=192.0.2.2/28 local v4addr2=192.0.2.18/28 local v6addr1=2001:db8:1::2/64 @@ -254,7 +251,7 @@ setup_sw_common() setup_sw1() { - local ns=sw1 + local ns=$sw1 local local_addr=192.0.2.33 local remote_addr=192.0.2.34 local veth_addr=192.0.2.49 @@ -265,7 +262,7 @@ setup_sw1() setup_sw2() { - local ns=sw2 + local ns=$sw2 local local_addr=192.0.2.34 local remote_addr=192.0.2.33 local veth_addr=192.0.2.50 @@ -291,11 +288,7 @@ setup() cleanup() { - local ns - - for ns in h1 h2 sw1 sw2; do - ip netns del $ns &> /dev/null - done + cleanup_ns $h1 $h2 $sw1 $sw2 } ################################################################################ @@ -312,80 +305,80 @@ neigh_suppress_arp_common() echo "Per-port ARP suppression - VLAN $vid" echo "----------------------------------" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip arp_sip $sip arp_op request action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip arp_sip $sip arp_op request action pass" # Initial state - check that ARP requests are not suppressed and that # ARP replies are received. - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "ARP suppression" # Enable neighbor suppression and check that nothing changes compared # to the initial state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "ARP suppression" # Install an FDB entry for the remote host and check that nothing # changes compared to the initial state. - h2_mac=$(ip -n h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') - run_cmd "bridge -n sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" + h2_mac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" log_test $? 0 "FDB entry installation" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "ARP suppression" # Install a neighbor on the matching SVI interface and check that ARP # requests are suppressed. - run_cmd "ip -n sw1 neigh replace $tip lladdr $h2_mac nud permanent dev br0.$vid" + run_cmd "ip -n $sw1 neigh replace $tip lladdr $h2_mac nud permanent dev br0.$vid" log_test $? 0 "Neighbor entry installation" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "ARP suppression" # Take the second host down and check that ARP requests are suppressed # and that ARP replies are received. - run_cmd "ip -n h2 link set dev eth0.$vid down" + run_cmd "ip -n $h2 link set dev eth0.$vid down" log_test $? 0 "H2 down" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "ARP suppression" - run_cmd "ip -n h2 link set dev eth0.$vid up" + run_cmd "ip -n $h2 link set dev eth0.$vid up" log_test $? 0 "H2 up" # Disable neighbor suppression and check that ARP requests are no # longer suppressed. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 4 + tc_check_packets $sw1 "dev vx0 egress" 101 4 log_test $? 0 "ARP suppression" # Take the second host down and check that ARP requests are not # suppressed and that ARP replies are not received. - run_cmd "ip -n h2 link set dev eth0.$vid down" + run_cmd "ip -n $h2 link set dev eth0.$vid down" log_test $? 0 "H2 down" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 1 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 5 + tc_check_packets $sw1 "dev vx0 egress" 101 5 log_test $? 0 "ARP suppression" } @@ -415,80 +408,80 @@ neigh_suppress_ns_common() echo "Per-port NS suppression - VLAN $vid" echo "---------------------------------" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr type 135 code 0 action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr type 135 code 0 action pass" # Initial state - check that NS messages are not suppressed and that ND # messages are received. - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "NS suppression" # Enable neighbor suppression and check that nothing changes compared # to the initial state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "NS suppression" # Install an FDB entry for the remote host and check that nothing # changes compared to the initial state. - h2_mac=$(ip -n h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') - run_cmd "bridge -n sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" + h2_mac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" log_test $? 0 "FDB entry installation" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "NS suppression" # Install a neighbor on the matching SVI interface and check that NS # messages are suppressed. - run_cmd "ip -n sw1 neigh replace $daddr lladdr $h2_mac nud permanent dev br0.$vid" + run_cmd "ip -n $sw1 neigh replace $daddr lladdr $h2_mac nud permanent dev br0.$vid" log_test $? 0 "Neighbor entry installation" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "NS suppression" # Take the second host down and check that NS messages are suppressed # and that ND messages are received. - run_cmd "ip -n h2 link set dev eth0.$vid down" + run_cmd "ip -n $h2 link set dev eth0.$vid down" log_test $? 0 "H2 down" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "NS suppression" - run_cmd "ip -n h2 link set dev eth0.$vid up" + run_cmd "ip -n $h2 link set dev eth0.$vid up" log_test $? 0 "H2 up" # Disable neighbor suppression and check that NS messages are no longer # suppressed. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 4 + tc_check_packets $sw1 "dev vx0 egress" 101 4 log_test $? 0 "NS suppression" # Take the second host down and check that NS messages are not # suppressed and that ND messages are not received. - run_cmd "ip -n h2 link set dev eth0.$vid down" + run_cmd "ip -n $h2 link set dev eth0.$vid down" log_test $? 0 "H2 down" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 2 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 5 + tc_check_packets $sw1 "dev vx0 egress" 101 5 log_test $? 0 "NS suppression" } @@ -524,118 +517,118 @@ neigh_vlan_suppress_arp() echo "Per-{Port, VLAN} ARP suppression" echo "--------------------------------" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip1 arp_sip $sip1 arp_op request action pass" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 102 proto 0x0806 flower indev swp1 arp_tip $tip2 arp_sip $sip2 arp_op request action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip1 arp_sip $sip1 arp_op request action pass" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto 0x0806 flower indev swp1 arp_tip $tip2 arp_sip $sip2 arp_op request action pass" - h2_mac1=$(ip -n h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]') - h2_mac2=$(ip -n h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]') - run_cmd "bridge -n sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" - run_cmd "bridge -n sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" - run_cmd "ip -n sw1 neigh replace $tip1 lladdr $h2_mac1 nud permanent dev br0.$vid1" - run_cmd "ip -n sw1 neigh replace $tip2 lladdr $h2_mac2 nud permanent dev br0.$vid2" + h2_mac1=$(ip -n $h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]') + h2_mac2=$(ip -n $h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" + run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" + run_cmd "ip -n $sw1 neigh replace $tip1 lladdr $h2_mac1 nud permanent dev br0.$vid1" + run_cmd "ip -n $sw1 neigh replace $tip2 lladdr $h2_mac2 nud permanent dev br0.$vid2" # Enable per-{Port, VLAN} neighbor suppression and check that ARP # requests are not suppressed and that ARP replies are received. - run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" log_test $? 0 "\"neigh_vlan_suppress\" is on" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 1 + tc_check_packets $sw1 "dev vx0 egress" 102 1 log_test $? 0 "ARP suppression (VLAN $vid2)" # Enable neighbor suppression on VLAN 10 and check that only on this # VLAN ARP requests are suppressed. - run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 2 + tc_check_packets $sw1 "dev vx0 egress" 102 2 log_test $? 0 "ARP suppression (VLAN $vid2)" # Enable neighbor suppression on the port and check that it has no # effect compared to previous state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 3 + tc_check_packets $sw1 "dev vx0 egress" 102 3 log_test $? 0 "ARP suppression (VLAN $vid2)" # Disable neighbor suppression on the port and check that it has no # effect compared to previous state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 4 + tc_check_packets $sw1 "dev vx0 egress" 102 4 log_test $? 0 "ARP suppression (VLAN $vid2)" # Disable neighbor suppression on VLAN 10 and check that ARP requests # are no longer suppressed on this VLAN. - run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 5 + tc_check_packets $sw1 "dev vx0 egress" 102 5 log_test $? 0 "ARP suppression (VLAN $vid2)" # Disable per-{Port, VLAN} neighbor suppression, enable neighbor # suppression on the port and check that on both VLANs ARP requests are # suppressed. - run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\"" log_test $? 0 "\"neigh_vlan_suppress\" is off" - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 5 + tc_check_packets $sw1 "dev vx0 egress" 102 5 log_test $? 0 "ARP suppression (VLAN $vid2)" } @@ -655,118 +648,118 @@ neigh_vlan_suppress_ns() echo "Per-{Port, VLAN} NS suppression" echo "-------------------------------" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr1 type 135 code 0 action pass" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 102 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr2 type 135 code 0 action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr1 type 135 code 0 action pass" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr2 type 135 code 0 action pass" - h2_mac1=$(ip -n h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]') - h2_mac2=$(ip -n h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]') - run_cmd "bridge -n sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" - run_cmd "bridge -n sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" - run_cmd "ip -n sw1 neigh replace $daddr1 lladdr $h2_mac1 nud permanent dev br0.$vid1" - run_cmd "ip -n sw1 neigh replace $daddr2 lladdr $h2_mac2 nud permanent dev br0.$vid2" + h2_mac1=$(ip -n $h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]') + h2_mac2=$(ip -n $h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" + run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" + run_cmd "ip -n $sw1 neigh replace $daddr1 lladdr $h2_mac1 nud permanent dev br0.$vid1" + run_cmd "ip -n $sw1 neigh replace $daddr2 lladdr $h2_mac2 nud permanent dev br0.$vid2" # Enable per-{Port, VLAN} neighbor suppression and check that NS # messages are not suppressed and that ND messages are received. - run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" log_test $? 0 "\"neigh_vlan_suppress\" is on" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 1 + tc_check_packets $sw1 "dev vx0 egress" 102 1 log_test $? 0 "NS suppression (VLAN $vid2)" # Enable neighbor suppression on VLAN 10 and check that only on this # VLAN NS messages are suppressed. - run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 2 + tc_check_packets $sw1 "dev vx0 egress" 102 2 log_test $? 0 "NS suppression (VLAN $vid2)" # Enable neighbor suppression on the port and check that it has no # effect compared to previous state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 3 + tc_check_packets $sw1 "dev vx0 egress" 102 3 log_test $? 0 "NS suppression (VLAN $vid2)" # Disable neighbor suppression on the port and check that it has no # effect compared to previous state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 4 + tc_check_packets $sw1 "dev vx0 egress" 102 4 log_test $? 0 "NS suppression (VLAN $vid2)" # Disable neighbor suppression on VLAN 10 and check that NS messages # are no longer suppressed on this VLAN. - run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 5 + tc_check_packets $sw1 "dev vx0 egress" 102 5 log_test $? 0 "NS suppression (VLAN $vid2)" # Disable per-{Port, VLAN} neighbor suppression, enable neighbor # suppression on the port and check that on both VLANs NS messages are # suppressed. - run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\"" log_test $? 0 "\"neigh_vlan_suppress\" is off" - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 5 + tc_check_packets $sw1 "dev vx0 egress" 102 5 log_test $? 0 "NS suppression (VLAN $vid2)" } diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh index 6e996f8063..04fb17a92e 100755 --- a/tools/testing/selftests/net/test_vxlan_mdb.sh +++ b/tools/testing/selftests/net/test_vxlan_mdb.sh @@ -55,9 +55,8 @@ # | ns2_v4 | | ns2_v6 | # +------------------------------------+ +------------------------------------+ +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 CONTROL_PATH_TESTS=" basic_star_g_ipv4_ipv4 @@ -80,6 +79,7 @@ CONTROL_PATH_TESTS=" dump_ipv6_ipv4 dump_ipv4_ipv6 dump_ipv6_ipv6 + flush " DATA_PATH_TESTS=" @@ -260,9 +260,6 @@ setup_common() local local_addr1=$1; shift local local_addr2=$1; shift - ip netns add $ns1 - ip netns add $ns2 - ip link add name veth0 type veth peer name veth1 ip link set dev veth0 netns $ns1 name veth0 ip link set dev veth1 netns $ns2 name veth0 @@ -273,36 +270,36 @@ setup_common() setup_v4() { - setup_common ns1_v4 ns2_v4 192.0.2.1 192.0.2.2 + setup_ns ns1_v4 ns2_v4 + setup_common $ns1_v4 $ns2_v4 192.0.2.1 192.0.2.2 - ip -n ns1_v4 address add 192.0.2.17/28 dev veth0 - ip -n ns2_v4 address add 192.0.2.18/28 dev veth0 + ip -n $ns1_v4 address add 192.0.2.17/28 dev veth0 + ip -n $ns2_v4 address add 192.0.2.18/28 dev veth0 - ip -n ns1_v4 route add default via 192.0.2.18 - ip -n ns2_v4 route add default via 192.0.2.17 + ip -n $ns1_v4 route add default via 192.0.2.18 + ip -n $ns2_v4 route add default via 192.0.2.17 } cleanup_v4() { - ip netns del ns2_v4 - ip netns del ns1_v4 + cleanup_ns $ns2_v4 $ns1_v4 } setup_v6() { - setup_common ns1_v6 ns2_v6 2001:db8:1::1 2001:db8:1::2 + setup_ns ns1_v6 ns2_v6 + setup_common $ns1_v6 $ns2_v6 2001:db8:1::1 2001:db8:1::2 - ip -n ns1_v6 address add 2001:db8:2::1/64 dev veth0 nodad - ip -n ns2_v6 address add 2001:db8:2::2/64 dev veth0 nodad + ip -n $ns1_v6 address add 2001:db8:2::1/64 dev veth0 nodad + ip -n $ns2_v6 address add 2001:db8:2::2/64 dev veth0 nodad - ip -n ns1_v6 route add default via 2001:db8:2::2 - ip -n ns2_v6 route add default via 2001:db8:2::1 + ip -n $ns1_v6 route add default via 2001:db8:2::2 + ip -n $ns2_v6 route add default via 2001:db8:2::1 } cleanup_v6() { - ip netns del ns2_v6 - ip netns del ns1_v6 + cleanup_ns $ns2_v6 $ns1_v6 } setup() @@ -433,7 +430,7 @@ basic_common() basic_star_g_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp_key="grp 239.1.1.1" local vtep_ip=198.51.100.100 @@ -446,7 +443,7 @@ basic_star_g_ipv4_ipv4() basic_star_g_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp_key="grp ff0e::1" local vtep_ip=198.51.100.100 @@ -459,7 +456,7 @@ basic_star_g_ipv6_ipv4() basic_star_g_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp_key="grp 239.1.1.1" local vtep_ip=2001:db8:1000::1 @@ -472,7 +469,7 @@ basic_star_g_ipv4_ipv6() basic_star_g_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp_key="grp ff0e::1" local vtep_ip=2001:db8:1000::1 @@ -485,7 +482,7 @@ basic_star_g_ipv6_ipv6() basic_sg_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp_key="grp 239.1.1.1 src 192.0.2.129" local vtep_ip=198.51.100.100 @@ -498,7 +495,7 @@ basic_sg_ipv4_ipv4() basic_sg_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp_key="grp ff0e::1 src 2001:db8:100::1" local vtep_ip=198.51.100.100 @@ -511,7 +508,7 @@ basic_sg_ipv6_ipv4() basic_sg_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp_key="grp 239.1.1.1 src 192.0.2.129" local vtep_ip=2001:db8:1000::1 @@ -524,7 +521,7 @@ basic_sg_ipv4_ipv6() basic_sg_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp_key="grp ff0e::1 src 2001:db8:100::1" local vtep_ip=2001:db8:1000::1 @@ -694,7 +691,7 @@ star_g_common() star_g_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp=239.1.1.1 local src1=192.0.2.129 local src2=192.0.2.130 @@ -711,7 +708,7 @@ star_g_ipv4_ipv4() star_g_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp=ff0e::1 local src1=2001:db8:100::1 local src2=2001:db8:100::2 @@ -728,7 +725,7 @@ star_g_ipv6_ipv4() star_g_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp=239.1.1.1 local src1=192.0.2.129 local src2=192.0.2.130 @@ -745,7 +742,7 @@ star_g_ipv4_ipv6() star_g_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp=ff0e::1 local src1=2001:db8:100::1 local src2=2001:db8:100::2 @@ -793,7 +790,7 @@ sg_common() sg_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp=239.1.1.1 local src=192.0.2.129 local vtep_ip=198.51.100.100 @@ -808,7 +805,7 @@ sg_ipv4_ipv4() sg_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp=ff0e::1 local src=2001:db8:100::1 local vtep_ip=198.51.100.100 @@ -823,7 +820,7 @@ sg_ipv6_ipv4() sg_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp=239.1.1.1 local src=192.0.2.129 local vtep_ip=2001:db8:1000::1 @@ -838,7 +835,7 @@ sg_ipv4_ipv6() sg_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp=ff0e::1 local src=2001:db8:100::1 local vtep_ip=2001:db8:1000::1 @@ -918,7 +915,7 @@ dump_common() dump_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local local_addr=192.0.2.1 local remote_prefix=198.51.100. local fn=ipv4_grps_get @@ -932,7 +929,7 @@ dump_ipv4_ipv4() dump_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local local_addr=192.0.2.1 local remote_prefix=198.51.100. local fn=ipv6_grps_get @@ -946,7 +943,7 @@ dump_ipv6_ipv4() dump_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local local_addr=2001:db8:1::1 local remote_prefix=2001:db8:1000:: local fn=ipv4_grps_get @@ -960,7 +957,7 @@ dump_ipv4_ipv6() dump_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local local_addr=2001:db8:1::1 local remote_prefix=2001:db8:1000:: local fn=ipv6_grps_get @@ -972,6 +969,202 @@ dump_ipv6_ipv6() dump_common $ns1 $local_addr $remote_prefix $fn } +flush() +{ + local num_entries + + echo + echo "Control path: Flush" + echo "-------------------" + + # Add entries with different attributes and check that they are all + # flushed when the flush command is given with no parameters. + + # Different source VNI. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.2 permanent dst 198.51.100.1 src_vni 10011" + + # Different routing protocol. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.3 permanent proto bgp dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.4 permanent proto zebra dst 198.51.100.1 src_vni 10010" + + # Different destination IP. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.5 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.6 permanent dst 198.51.100.2 src_vni 10010" + + # Different destination port. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.7 permanent dst 198.51.100.1 dst_port 11111 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.8 permanent dst 198.51.100.1 dst_port 22222 src_vni 10010" + + # Different VNI. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.9 permanent dst 198.51.100.1 vni 10010 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.10 permanent dst 198.51.100.1 vni 10020 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + num_entries=$(bridge -n $ns1_v4 mdb show dev vx0 | wc -l) + [[ $num_entries -eq 0 ]] + log_test $? 0 "Flush all" + + # Check that entries are flushed when port is specified as the VXLAN + # device and that an error is returned when port is specified as a + # different net device. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port vx0" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" + log_test $? 254 "Flush by port" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port veth0" + log_test $? 255 "Flush by wrong port" + + # Check that when flushing by source VNI only entries programmed with + # the specified source VNI are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10011" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10011" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" + log_test $? 254 "Flush by specified source VNI" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10011" + log_test $? 0 "Flush by unspecified source VNI" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that all entries are flushed when "permanent" is specified and + # that an error is returned when "nopermanent" is specified. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 permanent" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" + log_test $? 254 "Flush by \"permanent\" state" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 nopermanent" + log_test $? 255 "Flush by \"nopermanent\" state" + + # Check that when flushing by routing protocol only entries programmed + # with the specified routing protocol are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto bgp dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto zebra dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 proto bgp" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto bgp\"" + log_test $? 1 "Flush by specified routing protocol" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto zebra\"" + log_test $? 0 "Flush by unspecified routing protocol" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that when flushing by destination IP only entries programmed + # with the specified destination IP are flushed and the rest are not. + + # IPv4. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 198.51.100.2" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" + log_test $? 1 "Flush by specified destination IP - IPv4" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" + log_test $? 0 "Flush by unspecified destination IP - IPv4" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # IPv6. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 2001:db8:1000::2" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::2" + log_test $? 1 "Flush by specified destination IP - IPv6" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::1" + log_test $? 0 "Flush by unspecified destination IP - IPv6" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that when flushing by UDP destination port only entries + # programmed with the specified port are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 11111 dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 11111" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 11111\"" + log_test $? 1 "Flush by specified UDP destination port" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 22222\"" + log_test $? 0 "Flush by unspecified UDP destination port" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # When not specifying a UDP destination port for an entry, traffic is + # encapsulated with the device's UDP destination port. Check that when + # flushing by the device's UDP destination port only entries programmed + # with this port are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 4789" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" + log_test $? 1 "Flush by device's UDP destination port" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" + log_test $? 0 "Flush by unspecified UDP destination port" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that when flushing by destination VNI only entries programmed + # with the specified destination VNI are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20011 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 20010" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20010\"" + log_test $? 1 "Flush by specified destination VNI" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20011\"" + log_test $? 0 "Flush by unspecified destination VNI" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # When not specifying a destination VNI for an entry, traffic is + # encapsulated with the source VNI. Check that when flushing by a + # destination VNI that is equal to the source VNI only such entries are + # flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 10010" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" + log_test $? 1 "Flush by destination VNI equal to source VNI" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" + log_test $? 0 "Flush by unspecified destination VNI" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Test that an error is returned when trying to flush using VLAN ID. + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vid 10" + log_test $? 255 "Flush by VLAN ID" +} + ################################################################################ # Tests - Data path @@ -984,6 +1177,7 @@ encap_params_common() local plen=$1; shift local enc_ethtype=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local src=$1; shift local mz=$1; shift @@ -1002,11 +1196,11 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020" run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Destination IP - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Destination IP - no match" @@ -1019,20 +1213,20 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020" run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Default destination port - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Default destination port - no match" run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Non-default destination port - match" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Non-default destination port - no match" @@ -1045,11 +1239,11 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020" run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Default destination VNI - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Default destination VNI - no match" @@ -1057,11 +1251,11 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020" run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Non-default destination VNI - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Non-default destination VNI - no match" @@ -1072,13 +1266,14 @@ encap_params_common() encap_params_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local enc_ethtype="ip" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1086,18 +1281,19 @@ encap_params_ipv4_ipv4() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn" + $grp $grp_dmac $src "mausezahn" } encap_params_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local enc_ethtype="ip" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1105,18 +1301,19 @@ encap_params_ipv6_ipv4() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn -6" + $grp $grp_dmac $src "mausezahn -6" } encap_params_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local enc_ethtype="ipv6" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1124,18 +1321,19 @@ encap_params_ipv4_ipv6() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn" + $grp $grp_dmac $src "mausezahn" } encap_params_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local enc_ethtype="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1143,7 +1341,7 @@ encap_params_ipv6_ipv6() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn -6" + $grp $grp_dmac $src "mausezahn -6" } starg_exclude_ir_common() @@ -1154,6 +1352,7 @@ starg_exclude_ir_common() local vtep2_ip=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1175,14 +1374,14 @@ starg_exclude_ir_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 log_test $? 0 "Block excluded source - second VTEP" # Check that valid source is forwarded to both VTEPs. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1192,14 +1391,14 @@ starg_exclude_ir_common() run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Block excluded source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 log_test $? 0 "Block excluded source after removal - second VTEP" # Check that valid source is forwarded to the remaining VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Forward valid source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1208,12 +1407,13 @@ starg_exclude_ir_common() starg_exclude_ir_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1222,17 +1422,18 @@ starg_exclude_ir_ipv4_ipv4() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_exclude_ir_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1241,17 +1442,18 @@ starg_exclude_ir_ipv6_ipv4() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_exclude_ir_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1260,17 +1462,18 @@ starg_exclude_ir_ipv4_ipv6() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_exclude_ir_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1279,7 +1482,7 @@ starg_exclude_ir_ipv6_ipv6() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_include_ir_common() @@ -1290,6 +1493,7 @@ starg_include_ir_common() local vtep2_ip=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1311,14 +1515,14 @@ starg_include_ir_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 log_test $? 0 "Block excluded source - second VTEP" # Check that valid source is forwarded to both VTEPs. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1328,14 +1532,14 @@ starg_include_ir_common() run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Block excluded source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 log_test $? 0 "Block excluded source after removal - second VTEP" # Check that valid source is forwarded to the remaining VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Forward valid source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1344,12 +1548,13 @@ starg_include_ir_common() starg_include_ir_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1358,17 +1563,18 @@ starg_include_ir_ipv4_ipv4() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_include_ir_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1377,17 +1583,18 @@ starg_include_ir_ipv6_ipv4() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_include_ir_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1396,17 +1603,18 @@ starg_include_ir_ipv4_ipv6() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_include_ir_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1415,7 +1623,7 @@ starg_include_ir_ipv6_ipv6() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_exclude_p2mp_common() @@ -1425,6 +1633,7 @@ starg_exclude_p2mp_common() local mcast_grp=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1442,12 +1651,12 @@ starg_exclude_p2mp_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0" # Check that invalid source is not forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source" # Check that valid source is forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source" @@ -1455,18 +1664,19 @@ starg_exclude_p2mp_common() run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" # Check that valid source is not received anymore. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Receive of valid source after removal from group" } starg_exclude_p2mp_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1474,17 +1684,18 @@ starg_exclude_p2mp_ipv4_ipv4() echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } starg_exclude_p2mp_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1492,17 +1703,18 @@ starg_exclude_p2mp_ipv6_ipv4() echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } starg_exclude_p2mp_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1510,17 +1722,18 @@ starg_exclude_p2mp_ipv4_ipv6() echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } starg_exclude_p2mp_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1528,7 +1741,7 @@ starg_exclude_p2mp_ipv6_ipv6() echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1539,6 +1752,7 @@ starg_include_p2mp_common() local mcast_grp=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1556,12 +1770,12 @@ starg_include_p2mp_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0" # Check that invalid source is not forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source" # Check that valid source is forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source" @@ -1569,18 +1783,19 @@ starg_include_p2mp_common() run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" # Check that valid source is not received anymore. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Receive of valid source after removal from group" } starg_include_p2mp_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1588,17 +1803,18 @@ starg_include_p2mp_ipv4_ipv4() echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } starg_include_p2mp_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1606,17 +1822,18 @@ starg_include_p2mp_ipv6_ipv4() echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } starg_include_p2mp_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1624,17 +1841,18 @@ starg_include_p2mp_ipv4_ipv6() echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } starg_include_p2mp_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1642,7 +1860,7 @@ starg_include_p2mp_ipv6_ipv6() echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1654,6 +1872,7 @@ egress_vni_translation_common() local plen=$1; shift local proto=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local src=$1; shift local mz=$1; shift @@ -1689,32 +1908,33 @@ egress_vni_translation_common() # Make sure that packets sent from the first VTEP over VLAN 10 are # received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on # the second VTEP, since it is configured as PVID. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 log_test $? 0 "Egress VNI translation - PVID configured" # Remove PVID flag from VLAN 4000 on the second VTEP and make sure # packets are no longer received by the SVI interface. run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 log_test $? 0 "Egress VNI translation - no PVID configured" # Reconfigure the PVID and make sure packets are received again. run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2 log_test $? 0 "Egress VNI translation - PVID reconfigured" } egress_vni_translation_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1722,17 +1942,18 @@ egress_vni_translation_ipv4_ipv4() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn" + $grp_dmac $src "mausezahn" } egress_vni_translation_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1740,17 +1961,18 @@ egress_vni_translation_ipv6_ipv4() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn -6" + $grp_dmac $src "mausezahn -6" } egress_vni_translation_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1758,17 +1980,18 @@ egress_vni_translation_ipv4_ipv6() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn" + $grp_dmac $src "mausezahn" } egress_vni_translation_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1776,7 +1999,7 @@ egress_vni_translation_ipv6_ipv6() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn -6" + $grp_dmac $src "mausezahn -6" } all_zeros_mdb_common() @@ -1789,12 +2012,18 @@ all_zeros_mdb_common() local vtep4_ip=$1; shift local plen=$1; shift local ipv4_grp=239.1.1.1 + local ipv4_grp_dmac=01:00:5e:01:01:01 local ipv4_unreg_grp=239.2.2.2 + local ipv4_unreg_grp_dmac=01:00:5e:02:02:02 local ipv4_ll_grp=224.0.0.100 + local ipv4_ll_grp_dmac=01:00:5e:00:00:64 local ipv4_src=192.0.2.129 local ipv6_grp=ff0e::1 + local ipv6_grp_dmac=33:33:00:00:00:01 local ipv6_unreg_grp=ff0e::2 + local ipv6_unreg_grp_dmac=33:33:00:00:00:02 local ipv6_ll_grp=ff02::1 + local ipv6_ll_grp_dmac=33:33:00:00:00:01 local ipv6_src=2001:db8:100::1 # Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic @@ -1830,7 +2059,7 @@ all_zeros_mdb_common() # Send registered IPv4 multicast and make sure it only arrives to the # first VTEP. - run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_grp_dmac -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Registered IPv4 multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 @@ -1838,7 +2067,7 @@ all_zeros_mdb_common() # Send unregistered IPv4 multicast that is not link-local and make sure # it arrives to the first and second VTEPs. - run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_unreg_grp_dmac -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Unregistered IPv4 multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1846,7 +2075,7 @@ all_zeros_mdb_common() # Send IPv4 link-local multicast traffic and make sure it does not # arrive to any VTEP. - run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_ll_grp_dmac -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Link-local IPv4 multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1881,7 +2110,7 @@ all_zeros_mdb_common() # Send registered IPv6 multicast and make sure it only arrives to the # third VTEP. - run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_grp_dmac -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 103 1 log_test $? 0 "Registered IPv6 multicast - third VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 104 0 @@ -1889,7 +2118,7 @@ all_zeros_mdb_common() # Send unregistered IPv6 multicast that is not link-local and make sure # it arrives to the third and fourth VTEPs. - run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_unreg_grp_dmac -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 103 2 log_test $? 0 "Unregistered IPv6 multicast - third VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 104 1 @@ -1897,7 +2126,7 @@ all_zeros_mdb_common() # Send IPv6 link-local multicast traffic and make sure it does not # arrive to any VTEP. - run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_ll_grp_dmac -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 103 2 log_test $? 0 "Link-local IPv6 multicast - third VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 104 1 @@ -1929,8 +2158,8 @@ all_zeros_mdb_common() all_zeros_mdb_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.101 local vtep2_ip=198.51.100.102 local vtep3_ip=198.51.100.103 @@ -1947,8 +2176,8 @@ all_zeros_mdb_ipv4() all_zeros_mdb_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local vtep3_ip=2001:db8:3000::1 @@ -1972,6 +2201,7 @@ mdb_fdb_common() local plen=$1; shift local proto=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local src=$1; shift local mz=$1; shift @@ -1995,7 +2225,7 @@ mdb_fdb_common() # Send IP multicast traffic and make sure it is forwarded by the MDB # and only arrives to the first VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "IP multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 @@ -2012,7 +2242,7 @@ mdb_fdb_common() # Remove the MDB entry and make sure that IP multicast is now forwarded # by the FDB to the second VTEP. run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "IP multicast after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 2 @@ -2021,78 +2251,82 @@ mdb_fdb_common() mdb_fdb_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn" } mdb_fdb_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn -6" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn -6" } mdb_fdb_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn" } mdb_fdb_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn -6" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn -6" } mdb_grp1_loop() @@ -2127,7 +2361,9 @@ mdb_torture_common() local vtep1_ip=$1; shift local vtep2_ip=$1; shift local grp1=$1; shift + local grp1_dmac=$1; shift local grp2=$1; shift + local grp2_dmac=$1; shift local src=$1; shift local mz=$1; shift local pid1 @@ -2152,9 +2388,9 @@ mdb_torture_common() pid1=$! mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 & pid2=$! - ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + ip netns exec $ns1 $mz br0.10 -a own -b $grp1_dmac -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & pid3=$! - ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + ip netns exec $ns1 $mz br0.10 -a own -b $grp2_dmac -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & pid4=$! sleep 30 @@ -2166,70 +2402,78 @@ mdb_torture_common() mdb_torture_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local grp1=239.1.1.1 + local grp1_dmac=01:00:5e:01:01:01 local grp2=239.2.2.2 + local grp2_dmac=01:00:5e:02:02:02 local src=192.0.2.129 echo echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn" } mdb_torture_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local grp1=ff0e::1 + local grp1_dmac=33:33:00:00:00:01 local grp2=ff0e::2 + local grp2_dmac=33:33:00:00:00:02 local src=2001:db8:100::1 echo echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn -6" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn -6" } mdb_torture_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local grp1=239.1.1.1 + local grp1_dmac=01:00:5e:01:01:01 local grp2=239.2.2.2 + local grp2_dmac=01:00:5e:02:02:02 local src=192.0.2.129 echo echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn" } mdb_torture_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local grp1=ff0e::1 + local grp1_dmac=33:33:00:00:00:01 local grp2=ff0e::2 + local grp2_dmac=33:33:00:00:00:02 local src=2001:db8:100::1 echo echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn -6" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn -6" } ################################################################################ @@ -2296,9 +2540,9 @@ if [ ! -x "$(command -v jq)" ]; then exit $ksft_skip fi -bridge mdb help 2>&1 | grep -q "get" +bridge mdb help 2>&1 | grep -q "flush" if [ $? -ne 0 ]; then - echo "SKIP: iproute2 bridge too old, missing VXLAN MDB get support" + echo "SKIP: iproute2 bridge too old, missing VXLAN MDB flush support" exit $ksft_skip fi diff --git a/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh index f75212bf14..b8805983b7 100755 --- a/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh +++ b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh @@ -9,9 +9,8 @@ # option and verifies that packets are no longer received by the second VXLAN # device. +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 TESTS=" nolocalbypass @@ -98,20 +97,19 @@ tc_check_packets() setup() { - ip netns add ns1 + setup_ns ns1 - ip -n ns1 link set dev lo up - ip -n ns1 address add 192.0.2.1/32 dev lo - ip -n ns1 address add 198.51.100.1/32 dev lo + ip -n $ns1 address add 192.0.2.1/32 dev lo + ip -n $ns1 address add 198.51.100.1/32 dev lo - ip -n ns1 link add name vx0 up type vxlan id 100 local 198.51.100.1 \ + ip -n $ns1 link add name vx0 up type vxlan id 100 local 198.51.100.1 \ dstport 4789 nolearning - ip -n ns1 link add name vx1 up type vxlan id 100 dstport 4790 + ip -n $ns1 link add name vx1 up type vxlan id 100 dstport 4790 } cleanup() { - ip netns del ns1 &> /dev/null + cleanup_ns $ns1 } ################################################################################ @@ -122,40 +120,40 @@ nolocalbypass() local smac=00:01:02:03:04:05 local dmac=00:0a:0b:0c:0d:0e - run_cmd "bridge -n ns1 fdb add $dmac dev vx0 self static dst 192.0.2.1 port 4790" + run_cmd "bridge -n $ns1 fdb add $dmac dev vx0 self static dst 192.0.2.1 port 4790" - run_cmd "tc -n ns1 qdisc add dev vx1 clsact" - run_cmd "tc -n ns1 filter add dev vx1 ingress pref 1 handle 101 proto all flower src_mac $smac dst_mac $dmac action pass" + run_cmd "tc -n $ns1 qdisc add dev vx1 clsact" + run_cmd "tc -n $ns1 filter add dev vx1 ingress pref 1 handle 101 proto all flower src_mac $smac dst_mac $dmac action pass" - run_cmd "tc -n ns1 qdisc add dev lo clsact" - run_cmd "tc -n ns1 filter add dev lo ingress pref 1 handle 101 proto ip flower ip_proto udp dst_port 4790 action drop" + run_cmd "tc -n $ns1 qdisc add dev lo clsact" + run_cmd "tc -n $ns1 filter add dev lo ingress pref 1 handle 101 proto ip flower ip_proto udp dst_port 4790 action drop" - run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" + run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" log_test $? 0 "localbypass enabled" - run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" + run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" - tc_check_packets "ns1" "dev vx1 ingress" 101 1 + tc_check_packets "$ns1" "dev vx1 ingress" 101 1 log_test $? 0 "Packet received by local VXLAN device - localbypass" - run_cmd "ip -n ns1 link set dev vx0 type vxlan nolocalbypass" + run_cmd "ip -n $ns1 link set dev vx0 type vxlan nolocalbypass" - run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == false'" + run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == false'" log_test $? 0 "localbypass disabled" - run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" + run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" - tc_check_packets "ns1" "dev vx1 ingress" 101 1 + tc_check_packets "$ns1" "dev vx1 ingress" 101 1 log_test $? 0 "Packet not received by local VXLAN device - nolocalbypass" - run_cmd "ip -n ns1 link set dev vx0 type vxlan localbypass" + run_cmd "ip -n $ns1 link set dev vx0 type vxlan localbypass" - run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" + run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" log_test $? 0 "localbypass enabled" - run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" + run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" - tc_check_packets "ns1" "dev vx1 ingress" 101 2 + tc_check_packets "$ns1" "dev vx1 ingress" 101 2 log_test $? 0 "Packet received by local VXLAN device - localbypass" } diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh index 1fd1250ebc..ae8fbe3f07 100755 --- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh +++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh @@ -43,15 +43,14 @@ # This tests both the connectivity between vm-1 and vm-2, and that the underlay # can be moved in and out of the vrf by unsetting and setting veth0's master. +source lib.sh set -e cleanup() { ip link del veth-hv-1 2>/dev/null || true ip link del veth-tap 2>/dev/null || true - for ns in hv-1 hv-2 vm-1 vm-2; do - ip netns del $ns 2>/dev/null || true - done + cleanup_ns $hv_1 $hv_2 $vm_1 $vm_2 } # Clean start @@ -60,72 +59,75 @@ cleanup &> /dev/null [[ $1 == "clean" ]] && exit 0 trap cleanup EXIT +setup_ns hv_1 hv_2 vm_1 vm_2 +hv[1]=$hv_1 +hv[2]=$hv_2 +vm[1]=$vm_1 +vm[2]=$vm_2 # Setup "Hypervisors" simulated with netns ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking() { - hv=$1 + id=$1 - ip netns add hv-$hv - ip link set veth-hv-$hv netns hv-$hv - ip -netns hv-$hv link set veth-hv-$hv name veth0 + ip link set veth-hv-$id netns ${hv[$id]} + ip -netns ${hv[$id]} link set veth-hv-$id name veth0 - ip -netns hv-$hv link add vrf-underlay type vrf table 1 - ip -netns hv-$hv link set vrf-underlay up - ip -netns hv-$hv addr add 172.16.0.$hv/24 dev veth0 - ip -netns hv-$hv link set veth0 up + ip -netns ${hv[$id]} link add vrf-underlay type vrf table 1 + ip -netns ${hv[$id]} link set vrf-underlay up + ip -netns ${hv[$id]} addr add 172.16.0.$id/24 dev veth0 + ip -netns ${hv[$id]} link set veth0 up - ip -netns hv-$hv link add br0 type bridge - ip -netns hv-$hv link set br0 up + ip -netns ${hv[$id]} link add br0 type bridge + ip -netns ${hv[$id]} link set br0 up - ip -netns hv-$hv link add vxlan0 type vxlan id 10 local 172.16.0.$hv dev veth0 dstport 4789 - ip -netns hv-$hv link set vxlan0 master br0 - ip -netns hv-$hv link set vxlan0 up + ip -netns ${hv[$id]} link add vxlan0 type vxlan id 10 local 172.16.0.$id dev veth0 dstport 4789 + ip -netns ${hv[$id]} link set vxlan0 master br0 + ip -netns ${hv[$id]} link set vxlan0 up } setup-hv-networking 1 setup-hv-networking 2 # Check connectivity between HVs by pinging hv-2 from hv-1 echo -n "Checking HV connectivity " -ip netns exec hv-1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false) +ip netns exec $hv_1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false) echo "[ OK ]" # Setups a "VM" simulated by a netns an a veth pair setup-vm() { id=$1 - ip netns add vm-$id ip link add veth-tap type veth peer name veth-hv - ip link set veth-tap netns hv-$id - ip -netns hv-$id link set veth-tap master br0 - ip -netns hv-$id link set veth-tap up + ip link set veth-tap netns ${hv[$id]} + ip -netns ${hv[$id]} link set veth-tap master br0 + ip -netns ${hv[$id]} link set veth-tap up ip link set veth-hv address 02:1d:8d:dd:0c:6$id - ip link set veth-hv netns vm-$id - ip -netns vm-$id addr add 10.0.0.$id/24 dev veth-hv - ip -netns vm-$id link set veth-hv up + ip link set veth-hv netns ${vm[$id]} + ip -netns ${vm[$id]} addr add 10.0.0.$id/24 dev veth-hv + ip -netns ${vm[$id]} link set veth-hv up } setup-vm 1 setup-vm 2 # Setup VTEP routes to make ARP work -bridge -netns hv-1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent -bridge -netns hv-2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent +bridge -netns $hv_1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent +bridge -netns $hv_2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent echo -n "Check VM connectivity through VXLAN (underlay in the default VRF) " -ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +ip netns exec $vm_1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) echo "[ OK ]" # Move the underlay to a non-default VRF -ip -netns hv-1 link set veth0 vrf vrf-underlay -ip -netns hv-1 link set vxlan0 down -ip -netns hv-1 link set vxlan0 up -ip -netns hv-2 link set veth0 vrf vrf-underlay -ip -netns hv-2 link set vxlan0 down -ip -netns hv-2 link set vxlan0 up +ip -netns $hv_1 link set veth0 vrf vrf-underlay +ip -netns $hv_1 link set vxlan0 down +ip -netns $hv_1 link set vxlan0 up +ip -netns $hv_2 link set veth0 vrf vrf-underlay +ip -netns $hv_2 link set vxlan0 down +ip -netns $hv_2 link set vxlan0 up echo -n "Check VM connectivity through VXLAN (underlay in a VRF) " -ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +ip netns exec $vm_1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) echo "[ OK ]" diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh index 8c3ac0a725..6127a78ee9 100755 --- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh +++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh @@ -78,10 +78,8 @@ # # # This test tests the new vxlan vnifiltering api - +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # all tests in this script. Can be overridden with -t option TESTS=" @@ -148,18 +146,18 @@ run_cmd() } check_hv_connectivity() { - ip netns exec hv-1 ping -c 1 -W 1 $1 &>/dev/null + ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null sleep 1 - ip netns exec hv-1 ping -c 1 -W 1 $2 &>/dev/null + ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null return $? } check_vm_connectivity() { - run_cmd "ip netns exec vm-11 ping -c 1 -W 1 10.0.10.12" + run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)" - run_cmd "ip netns exec vm-21 ping -c 1 -W 1 10.0.10.22" + run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)" } @@ -167,26 +165,23 @@ cleanup() { ip link del veth-hv-1 2>/dev/null || true ip link del vethhv-11 vethhv-12 vethhv-21 vethhv-22 2>/dev/null || true - for ns in hv-1 hv-2 vm-11 vm-21 vm-12 vm-22 vm-31 vm-32; do - ip netns del $ns 2>/dev/null || true - done + cleanup_ns $hv_1 $hv_2 $vm_11 $vm_21 $vm_12 $vm_22 $vm_31 $vm_32 } trap cleanup EXIT setup-hv-networking() { - hv=$1 + id=$1 local1=$2 mask1=$3 local2=$4 mask2=$5 - ip netns add hv-$hv - ip link set veth-hv-$hv netns hv-$hv - ip -netns hv-$hv link set veth-hv-$hv name veth0 - ip -netns hv-$hv addr add $local1/$mask1 dev veth0 - ip -netns hv-$hv addr add $local2/$mask2 dev veth0 - ip -netns hv-$hv link set veth0 up + ip link set veth-hv-$id netns ${hv[$id]} + ip -netns ${hv[$id]} link set veth-hv-$id name veth0 + ip -netns ${hv[$id]} addr add $local1/$mask1 dev veth0 + ip -netns ${hv[$id]} addr add $local2/$mask2 dev veth0 + ip -netns ${hv[$id]} link set veth0 up } # Setups a "VM" simulated by a netns an a veth pair @@ -208,21 +203,20 @@ setup-vm() { lastvxlandev="" # create bridge - ip -netns hv-$hvid link add br$brid type bridge vlan_filtering 1 vlan_default_pvid 0 \ + ip -netns ${hv[$hvid]} link add br$brid type bridge vlan_filtering 1 vlan_default_pvid 0 \ mcast_snooping 0 - ip -netns hv-$hvid link set br$brid up + ip -netns ${hv[$hvid]} link set br$brid up # create vm namespace and interfaces and connect to hypervisor # namespace - ip netns add vm-$vmid hvvethif="vethhv-$vmid" vmvethif="veth-$vmid" ip link add $hvvethif type veth peer name $vmvethif - ip link set $hvvethif netns hv-$hvid - ip link set $vmvethif netns vm-$vmid - ip -netns hv-$hvid link set $hvvethif up - ip -netns vm-$vmid link set $vmvethif up - ip -netns hv-$hvid link set $hvvethif master br$brid + ip link set $hvvethif netns ${hv[$hvid]} + ip link set $vmvethif netns ${vm[$vmid]} + ip -netns ${hv[$hvid]} link set $hvvethif up + ip -netns ${vm[$vmid]} link set $vmvethif up + ip -netns ${hv[$hvid]} link set $hvvethif master br$brid # configure VM vlan/vni filtering on hypervisor for vmap in $(echo $vattrs | cut -d "," -f1- --output-delimiter=' ') @@ -234,9 +228,9 @@ setup-vm() { local vtype=$(echo $vmap | awk -F'-' '{print ($5)}') local port=$(echo $vmap | awk -F'-' '{print ($6)}') - ip -netns vm-$vmid link add name $vmvethif.$vid link $vmvethif type vlan id $vid - ip -netns vm-$vmid addr add 10.0.$vid.$vmid/24 dev $vmvethif.$vid - ip -netns vm-$vmid link set $vmvethif.$vid up + ip -netns ${vm[$vmid]} link add name $vmvethif.$vid link $vmvethif type vlan id $vid + ip -netns ${vm[$vmid]} addr add 10.0.$vid.$vmid/24 dev $vmvethif.$vid + ip -netns ${vm[$vmid]} link set $vmvethif.$vid up tid=$vid vxlandev="vxlan$brid" @@ -268,35 +262,35 @@ setup-vm() { # create vxlan device if [ "$vxlandev" != "$lastvxlandev" ]; then - ip -netns hv-$hvid link add $vxlandev type vxlan local $localip $vxlandevflags dev veth0 2>/dev/null - ip -netns hv-$hvid link set $vxlandev master br$brid - ip -netns hv-$hvid link set $vxlandev up + ip -netns ${hv[$hvid]} link add $vxlandev type vxlan local $localip $vxlandevflags dev veth0 2>/dev/null + ip -netns ${hv[$hvid]} link set $vxlandev master br$brid + ip -netns ${hv[$hvid]} link set $vxlandev up lastvxlandev=$vxlandev fi # add vlan - bridge -netns hv-$hvid vlan add vid $vid dev $hvvethif - bridge -netns hv-$hvid vlan add vid $vid pvid dev $vxlandev + bridge -netns ${hv[$hvid]} vlan add vid $vid dev $hvvethif + bridge -netns ${hv[$hvid]} vlan add vid $vid pvid dev $vxlandev # Add bridge vni filter for tx if [[ -n $vtype && $vtype == "metadata" || $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then - bridge -netns hv-$hvid link set dev $vxlandev vlan_tunnel on - bridge -netns hv-$hvid vlan add dev $vxlandev vid $vid tunnel_info id $tid + bridge -netns ${hv[$hvid]} link set dev $vxlandev vlan_tunnel on + bridge -netns ${hv[$hvid]} vlan add dev $vxlandev vid $vid tunnel_info id $tid fi if [[ -n $vtype && $vtype == "metadata" ]]; then - bridge -netns hv-$hvid fdb add 00:00:00:00:00:00 dev $vxlandev \ + bridge -netns ${hv[$hvid]} fdb add 00:00:00:00:00:00 dev $vxlandev \ src_vni $tid vni $tid dst $group self elif [[ -n $vtype && $vtype == "vnifilter" ]]; then # Add per vni rx filter with 'bridge vni' api - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid + bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid elif [[ -n $vtype && $vtype == "vnifilterg" ]]; then # Add per vni group config with 'bridge vni' api if [ -n "$group" ]; then if [ $mcast -eq 1 ]; then - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group $group + bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid group $group else - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote $group + bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid remote $group fi fi fi @@ -306,14 +300,14 @@ setup-vm() { setup_vnifilter_api() { ip link add veth-host type veth peer name veth-testns - ip netns add testns - ip link set veth-testns netns testns + setup_ns testns + ip link set veth-testns netns $testns } cleanup_vnifilter_api() { ip link del veth-host 2>/dev/null || true - ip netns del testns 2>/dev/null || true + ip netns del $testns 2>/dev/null || true } # tests vxlan filtering api @@ -331,52 +325,52 @@ vxlan_vnifilter_api() # Duplicate vni test # create non-vnifiltering traditional vni device - run_cmd "ip -netns testns link add vxlan100 type vxlan id 100 local $localip dev veth-testns dstport 4789" + run_cmd "ip -netns $testns link add vxlan100 type vxlan id 100 local $localip dev veth-testns dstport 4789" log_test $? 0 "Create traditional vxlan device" # create vni filtering device - run_cmd "ip -netns testns link add vxlan-ext1 type vxlan vnifilter local $localip dev veth-testns dstport 4789" + run_cmd "ip -netns $testns link add vxlan-ext1 type vxlan vnifilter local $localip dev veth-testns dstport 4789" log_test $? 1 "Cannot create vnifilter device without external flag" - run_cmd "ip -netns testns link add vxlan-ext1 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" + run_cmd "ip -netns $testns link add vxlan-ext1 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" log_test $? 0 "Creating external vxlan device with vnifilter flag" - run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 100" + run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 100" log_test $? 0 "Cannot set in-use vni id on vnifiltering device" - run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 200" + run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 200" log_test $? 0 "Set new vni id on vnifiltering device" - run_cmd "ip -netns testns link add vxlan-ext2 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" + run_cmd "ip -netns $testns link add vxlan-ext2 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" log_test $? 0 "Create second external vxlan device with vnifilter flag" - run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 200" + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 200" log_test $? 255 "Cannot set in-use vni id on vnifiltering device" - run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300" + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300" log_test $? 0 "Set new vni id on vnifiltering device" # check in bridge vni show - run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300" + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300" log_test $? 0 "Update vni id on vnifiltering device" - run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 400" + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 400" log_test $? 0 "Add new vni id on vnifiltering device" # add multicast group per vni - run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 200 group $group" + run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 200 group $group" log_test $? 0 "Set multicast group on existing vni" # add multicast group per vni - run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300 group $group" + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300 group $group" log_test $? 0 "Set multicast group on existing vni" # set vnifilter on an existing external vxlan device - run_cmd "ip -netns testns link set dev vxlan-ext1 type vxlan external vnifilter" + run_cmd "ip -netns $testns link set dev vxlan-ext1 type vxlan external vnifilter" log_test $? 2 "Cannot set vnifilter flag on a device" # change vxlan vnifilter flag - run_cmd "ip -netns testns link set dev vxlan-ext1 type vxlan external novnifilter" + run_cmd "ip -netns $testns link set dev vxlan-ext1 type vxlan external novnifilter" log_test $? 2 "Cannot unset vnifilter flag on a device" } @@ -390,12 +384,20 @@ vxlan_vnifilter_datapath() hv1addr2="2002:fee1::1" hv2addr2="2002:fee1::2" + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 $hv2addr1 $hv2addr2 setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 $hv1addr1 $hv1addr2 check_hv_connectivity hv2addr1 hv2addr2 + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0 setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0 @@ -415,12 +417,20 @@ vxlan_vnifilter_datapath_pervni() hv1addr2="2002:fee1::1" hv2addr2="2002:fee1::2" + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 check_hv_connectivity hv2addr1 hv2addr2 + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilterg,20-v4-$hv1addr1-$hv2addr1-vnifilterg 0 setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilterg,20-v6-$hv1addr2-$hv2addr2-vnifilterg 0 @@ -440,12 +450,20 @@ vxlan_vnifilter_datapath_mgroup() group="239.1.1.100" group6="ff07::1" + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 check_hv_connectivity hv2addr1 hv2addr2 + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilter,20-v4-$hv1addr1-$group-vnifilter 1 setup-vm 1 21 2 "10-v6-$hv1addr2-$group6-vnifilter,20-v6-$hv1addr2-$group6-vnifilter" 1 @@ -464,12 +482,20 @@ vxlan_vnifilter_datapath_mgroup_pervni() group="239.1.1.100" group6="ff07::1" + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 check_hv_connectivity hv2addr1 hv2addr2 + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilterg,20-v4-$hv1addr1-$group-vnifilterg 1 setup-vm 1 21 2 10-v6-$hv1addr2-$group6-vnifilterg,20-v6-$hv1addr2-$group6-vnifilterg 1 @@ -486,12 +512,22 @@ vxlan_vnifilter_metadata_and_traditional_mix() hv1addr2="2002:fee1::1" hv2addr2="2002:fee1::2" + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 check_hv_connectivity hv2addr1 hv2addr2 + setup_ns vm_11 vm_21 vm_31 vm_12 vm_22 vm_32 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[31]=$vm_31 + vm[12]=$vm_12 + vm[22]=$vm_22 + vm[32]=$vm_32 setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0 setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0 setup-vm 1 31 3 30-v4-$hv1addr1-$hv2addr1-default-4790,40-v6-$hv1addr2-$hv2addr2-default-4790,50-v4-$hv1addr1-$hv2addr1-metadata-4791 0 @@ -504,13 +540,13 @@ vxlan_vnifilter_metadata_and_traditional_mix() check_vm_connectivity "vnifiltering vxlan pervni remote mix" # check VM connectivity over traditional/non-vxlan filtering vxlan devices - run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.30.32" + run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.30.32" log_test $? 0 "VM connectivity over traditional vxlan (ipv4 default rdst)" - run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.40.32" + run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.40.32" log_test $? 0 "VM connectivity over traditional vxlan (ipv6 default rdst)" - run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.50.32" + run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.50.32" log_test $? 0 "VM connectivity over metadata nonfiltering vxlan (ipv4 default rdst)" } diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index ad993ab3ac..b95c249f81 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -707,6 +707,20 @@ TEST_F(tls, splice_from_pipe) EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } +TEST_F(tls, splice_more) +{ + unsigned int f = SPLICE_F_NONBLOCK | SPLICE_F_MORE | SPLICE_F_GIFT; + int send_len = TLS_PAYLOAD_MAX_LEN; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + int i, send_pipe = 1; + int p[2]; + + ASSERT_GE(pipe(p), 0); + EXPECT_GE(write(p[1], mem_send, send_len), 0); + for (i = 0; i < 32; i++) + EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, send_pipe, f), 1); +} + TEST_F(tls, splice_from_pipe2) { int send_len = 16000; @@ -1471,6 +1485,51 @@ TEST_F(tls, control_msg) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls, control_msg_nomerge) +{ + char *rec1 = "1111"; + char *rec2 = "2222"; + int send_len = 5; + char buf[15]; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec1, send_len, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); + EXPECT_EQ(memcmp(buf, rec2, send_len), 0); +} + +TEST_F(tls, data_control_data) +{ + char *rec1 = "1111"; + char *rec2 = "2222"; + char *rec3 = "3333"; + int send_len = 5; + char buf[15]; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(send(self->fd, rec1, send_len, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); + EXPECT_EQ(send(self->fd, rec3, send_len, 0), send_len); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); +} + TEST_F(tls, shutdown) { char const *test_str = "test_read"; @@ -1860,13 +1919,13 @@ TEST_F(tls_err, poll_partial_rec_async) /* Child should sleep in poll(), never get a wake */ pfd.fd = self->cfd2; pfd.events = POLLIN; - EXPECT_EQ(poll(&pfd, 1, 5), 0); + EXPECT_EQ(poll(&pfd, 1, 20), 0); EXPECT_EQ(write(p[1], &token, 1), 1); /* Barrier #1 */ pfd.fd = self->cfd2; pfd.events = POLLIN; - EXPECT_EQ(poll(&pfd, 1, 5), 1); + EXPECT_EQ(poll(&pfd, 1, 20), 1); exit(!_metadata->passed); } diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh index da5bfd834e..8ff172f7bb 100755 --- a/tools/testing/selftests/net/toeplitz.sh +++ b/tools/testing/selftests/net/toeplitz.sh @@ -147,14 +147,14 @@ setup() { setup_loopback_environment "${DEV}" # Set up server_ns namespace and client_ns namespace - setup_macvlan_ns "${DEV}" server_ns server \ + setup_macvlan_ns "${DEV}" $server_ns server \ "${SERVER_MAC}" "${SERVER_IP}" - setup_macvlan_ns "${DEV}" client_ns client \ + setup_macvlan_ns "${DEV}" $client_ns client \ "${CLIENT_MAC}" "${CLIENT_IP}" } cleanup() { - cleanup_macvlan_ns server_ns server client_ns client + cleanup_macvlan_ns $server_ns server $client_ns client cleanup_loopback "${DEV}" } @@ -170,22 +170,22 @@ if [[ "${TEST_RSS}" = true ]]; then # RPS/RFS must be disabled because they move packets between cpus, # which breaks the PACKET_FANOUT_CPU identification of RSS decisions. eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \ - ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ -C "$(get_rx_irq_cpus)" -s -v & elif [[ ! -z "${RPS_MAP}" ]]; then eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \ - ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ -r "0x${RPS_MAP}" -s -v & else - ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v & fi server_pid=$! -ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \ +ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \ "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" & client_pid=$! diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh index de9ca97abc..282f147609 100755 --- a/tools/testing/selftests/net/traceroute.sh +++ b/tools/testing/selftests/net/traceroute.sh @@ -4,6 +4,7 @@ # Run traceroute/traceroute6 tests # +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no @@ -69,9 +70,6 @@ create_ns() [ -z "${addr}" ] && addr="-" [ -z "${addr6}" ] && addr6="-" - ip netns add ${ns} - - ip netns exec ${ns} ip link set lo up if [ "${addr}" != "-" ]; then ip netns exec ${ns} ip addr add dev lo ${addr} fi @@ -160,12 +158,7 @@ connect_ns() cleanup_traceroute6() { - local ns - - for ns in host-1 host-2 router-1 router-2 - do - ip netns del ${ns} 2>/dev/null - done + cleanup_ns $h1 $h2 $r1 $r2 } setup_traceroute6() @@ -176,33 +169,34 @@ setup_traceroute6() cleanup_traceroute6 set -e - create_ns host-1 - create_ns host-2 - create_ns router-1 - create_ns router-2 + setup_ns h1 h2 r1 r2 + create_ns $h1 + create_ns $h2 + create_ns $r1 + create_ns $r2 # Setup N3 - connect_ns router-2 eth3 - 2000:103::2/64 host-2 eth3 - 2000:103::4/64 - ip netns exec host-2 ip route add default via 2000:103::2 + connect_ns $r2 eth3 - 2000:103::2/64 $h2 eth3 - 2000:103::4/64 + ip netns exec $h2 ip route add default via 2000:103::2 # Setup N2 - connect_ns router-1 eth2 - 2000:102::1/64 router-2 eth2 - 2000:102::2/64 - ip netns exec router-1 ip route add default via 2000:102::2 + connect_ns $r1 eth2 - 2000:102::1/64 $r2 eth2 - 2000:102::2/64 + ip netns exec $r1 ip route add default via 2000:102::2 # Setup N1. host-1 and router-2 connect to a bridge in router-1. - ip netns exec router-1 ip link add name ${brdev} type bridge - ip netns exec router-1 ip link set ${brdev} up - ip netns exec router-1 ip addr add 2000:101::1/64 dev ${brdev} + ip netns exec $r1 ip link add name ${brdev} type bridge + ip netns exec $r1 ip link set ${brdev} up + ip netns exec $r1 ip addr add 2000:101::1/64 dev ${brdev} - connect_ns host-1 eth0 - 2000:101::3/64 router-1 eth0 - - - ip netns exec router-1 ip link set dev eth0 master ${brdev} - ip netns exec host-1 ip route add default via 2000:101::1 + connect_ns $h1 eth0 - 2000:101::3/64 $r1 eth0 - - + ip netns exec $r1 ip link set dev eth0 master ${brdev} + ip netns exec $h1 ip route add default via 2000:101::1 - connect_ns router-2 eth1 - 2000:101::2/64 router-1 eth1 - - - ip netns exec router-1 ip link set dev eth1 master ${brdev} + connect_ns $r2 eth1 - 2000:101::2/64 $r1 eth1 - - + ip netns exec $r1 ip link set dev eth1 master ${brdev} # Prime the network - ip netns exec host-1 ping6 -c5 2000:103::4 >/dev/null 2>&1 + ip netns exec $h1 ping6 -c5 2000:103::4 >/dev/null 2>&1 set +e } @@ -217,7 +211,7 @@ run_traceroute6() setup_traceroute6 # traceroute6 host-2 from host-1 (expects 2000:102::2) - run_cmd host-1 "traceroute6 2000:103::4 | grep -q 2000:102::2" + run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2" log_test $? 0 "IPV6 traceroute" cleanup_traceroute6 @@ -240,12 +234,7 @@ run_traceroute6() cleanup_traceroute() { - local ns - - for ns in host-1 host-2 router - do - ip netns del ${ns} 2>/dev/null - done + cleanup_ns $h1 $h2 $router } setup_traceroute() @@ -254,24 +243,25 @@ setup_traceroute() cleanup_traceroute set -e - create_ns host-1 - create_ns host-2 - create_ns router + setup_ns h1 h2 router + create_ns $h1 + create_ns $h2 + create_ns $router - connect_ns host-1 eth0 1.0.1.3/24 - \ - router eth1 1.0.3.1/24 - - ip netns exec host-1 ip route add default via 1.0.1.1 + connect_ns $h1 eth0 1.0.1.3/24 - \ + $router eth1 1.0.3.1/24 - + ip netns exec $h1 ip route add default via 1.0.1.1 - ip netns exec router ip addr add 1.0.1.1/24 dev eth1 - ip netns exec router sysctl -qw \ + ip netns exec $router ip addr add 1.0.1.1/24 dev eth1 + ip netns exec $router sysctl -qw \ net.ipv4.icmp_errors_use_inbound_ifaddr=1 - connect_ns host-2 eth0 1.0.2.4/24 - \ - router eth2 1.0.2.1/24 - - ip netns exec host-2 ip route add default via 1.0.2.1 + connect_ns $h2 eth0 1.0.2.4/24 - \ + $router eth2 1.0.2.1/24 - + ip netns exec $h2 ip route add default via 1.0.2.1 # Prime the network - ip netns exec host-1 ping -c5 1.0.2.4 >/dev/null 2>&1 + ip netns exec $h1 ping -c5 1.0.2.4 >/dev/null 2>&1 set +e } @@ -286,7 +276,7 @@ run_traceroute() setup_traceroute # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while. - run_cmd host-1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" + run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" log_test $? 0 "IPV4 traceroute" cleanup_traceroute diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 3f09ac78f4..8802604148 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -3,6 +3,8 @@ # # Run a series of udpgro functional tests. +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" BPF_FILE="xdp_dummy.o" @@ -51,8 +53,7 @@ run_one() { echo "ok" || \ echo "failed" & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen ${PEER_NS} 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? wait $(jobs -p) @@ -97,7 +98,7 @@ run_one_nat() { echo "ok" || \ echo "failed"& - sleep 0.1 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? kill -INT $pid @@ -118,11 +119,9 @@ run_one_2sock() { echo "ok" || \ echo "failed" & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 12345 udp ./udpgso_bench_tx ${tx_args} -p 12345 - sleep 0.1 - # first UDP GSO socket should be closed at this point + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? wait $(jobs -p) diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 65ff1d4240..7080eae531 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -3,6 +3,8 @@ # # Run a series of udpgro benchmarks +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" BPF_FILE="xdp_dummy.o" @@ -40,8 +42,7 @@ run_one() { ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} } diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index bd51d386b5..e1ff645bd3 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -3,6 +3,8 @@ # # Run a series of udpgro benchmarks +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" BPF_FILE="xdp_dummy.o" @@ -45,8 +47,7 @@ run_one() { echo ${rx_args} ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} } diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 9cd5e885e9..f4549e6894 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -241,7 +241,7 @@ for family in 4 6; do create_vxlan_pair ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on - run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1 + run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10 cleanup # use NAT to circumvent GRO FWD check @@ -254,13 +254,7 @@ for family in 4 6; do # load arp cache before running the test to reduce the amount of # stray traffic on top of the UDP tunnel ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null - run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST - cleanup - - create_vxlan_pair - run_bench "UDP tunnel fwd perf" $OL_NET$DST - ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on - run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST + run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST cleanup done diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 7badaf215d..b02080d09f 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -34,7 +34,7 @@ #endif #ifndef UDP_MAX_SEGMENTS -#define UDP_MAX_SEGMENTS (1 << 6UL) +#define UDP_MAX_SEGMENTS (1 << 7UL) #endif #define CONST_MTU_TEST 1500 diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 27574bbf2d..5ae85def07 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -247,6 +247,20 @@ chk_gro " - aggregation with TSO off" 1 cleanup create_ns +ip -n $NS_DST link set dev veth$DST up +ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp +chk_gro_flag "gro vs xdp while down - gro flag on" $DST on +ip -n $NS_DST link set dev veth$DST down +chk_gro_flag " - after down" $DST on +ip -n $NS_DST link set dev veth$DST xdp off +chk_gro_flag " - after xdp off" $DST off +ip -n $NS_DST link set dev veth$DST up +chk_gro_flag " - after up" $DST off +ip -n $NS_SRC link set dev veth$SRC xdp object ${BPF_FILE} section xdp +chk_gro_flag " - after peer xdp" $DST off +cleanup + +create_ns chk_channels "default channels" $DST 1 1 ip -n $NS_DST link set dev veth$DST down diff --git a/tools/testing/selftests/net/vrf-xfrm-tests.sh b/tools/testing/selftests/net/vrf-xfrm-tests.sh index 452638ae8a..b64dd89169 100755 --- a/tools/testing/selftests/net/vrf-xfrm-tests.sh +++ b/tools/testing/selftests/net/vrf-xfrm-tests.sh @@ -3,9 +3,7 @@ # # Various combinations of VRF with xfrms and qdisc. -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - +source lib.sh PAUSE_ON_FAIL=no VERBOSE=0 ret=0 @@ -67,7 +65,7 @@ run_cmd_host1() printf " COMMAND: $cmd\n" fi - out=$(eval ip netns exec host1 $cmd 2>&1) + out=$(eval ip netns exec $host1 $cmd 2>&1) rc=$? if [ "$VERBOSE" = "1" ]; then if [ -n "$out" ]; then @@ -116,9 +114,6 @@ create_ns() [ -z "${addr}" ] && addr="-" [ -z "${addr6}" ] && addr6="-" - ip netns add ${ns} - - ip -netns ${ns} link set lo up if [ "${addr}" != "-" ]; then ip -netns ${ns} addr add dev lo ${addr} fi @@ -177,25 +172,25 @@ connect_ns() cleanup() { - ip netns del host1 - ip netns del host2 + cleanup_ns $host1 $host2 } setup() { - create_ns "host1" - create_ns "host2" + setup_ns host1 host2 + create_ns "$host1" + create_ns "$host2" - connect_ns "host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \ - "host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64 + connect_ns "$host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \ + "$host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64 - create_vrf "host1" ${VRF} ${TABLE} - ip -netns host1 link set dev eth0 master ${VRF} + create_vrf "$host1" ${VRF} ${TABLE} + ip -netns $host1 link set dev eth0 master ${VRF} } cleanup_xfrm() { - for ns in host1 host2 + for ns in $host1 $host2 do for x in state policy do @@ -218,57 +213,57 @@ setup_xfrm() # # host1 - IPv4 out - ip -netns host1 xfrm policy add \ + ip -netns $host1 xfrm policy add \ src ${h1_4} dst ${h2_4} ${devarg} dir out \ tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel # host2 - IPv4 in - ip -netns host2 xfrm policy add \ + ip -netns $host2 xfrm policy add \ src ${h1_4} dst ${h2_4} dir in \ tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel # host1 - IPv4 in - ip -netns host1 xfrm policy add \ + ip -netns $host1 xfrm policy add \ src ${h2_4} dst ${h1_4} ${devarg} dir in \ tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel # host2 - IPv4 out - ip -netns host2 xfrm policy add \ + ip -netns $host2 xfrm policy add \ src ${h2_4} dst ${h1_4} dir out \ tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel # host1 - IPv6 out - ip -6 -netns host1 xfrm policy add \ + ip -6 -netns $host1 xfrm policy add \ src ${h1_6} dst ${h2_6} ${devarg} dir out \ tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel # host2 - IPv6 in - ip -6 -netns host2 xfrm policy add \ + ip -6 -netns $host2 xfrm policy add \ src ${h1_6} dst ${h2_6} dir in \ tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel # host1 - IPv6 in - ip -6 -netns host1 xfrm policy add \ + ip -6 -netns $host1 xfrm policy add \ src ${h2_6} dst ${h1_6} ${devarg} dir in \ tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel # host2 - IPv6 out - ip -6 -netns host2 xfrm policy add \ + ip -6 -netns $host2 xfrm policy add \ src ${h2_6} dst ${h1_6} dir out \ tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel # # state # - ip -netns host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ + ip -netns $host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ enc 'cbc(aes)' ${ENC_1} \ sel src ${h1_4} dst ${h2_4} ${devarg} - ip -netns host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ + ip -netns $host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ @@ -276,14 +271,14 @@ setup_xfrm() sel src ${h1_4} dst ${h2_4} - ip -netns host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ + ip -netns $host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ enc 'cbc(aes)' ${ENC_2} \ sel src ${h2_4} dst ${h1_4} ${devarg} - ip -netns host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ + ip -netns $host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ @@ -291,14 +286,14 @@ setup_xfrm() sel src ${h2_4} dst ${h1_4} - ip -6 -netns host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ + ip -6 -netns $host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ enc 'cbc(aes)' ${ENC_1} \ sel src ${h1_6} dst ${h2_6} ${devarg} - ip -6 -netns host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ + ip -6 -netns $host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ @@ -306,14 +301,14 @@ setup_xfrm() sel src ${h1_6} dst ${h2_6} - ip -6 -netns host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ + ip -6 -netns $host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ enc 'cbc(aes)' ${ENC_2} \ sel src ${h2_6} dst ${h1_6} ${devarg} - ip -6 -netns host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ + ip -6 -netns $host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ @@ -323,22 +318,22 @@ setup_xfrm() cleanup_xfrm_dev() { - ip -netns host1 li del xfrm0 - ip -netns host2 addr del ${XFRM2_4}/24 dev eth0 - ip -netns host2 addr del ${XFRM2_6}/64 dev eth0 + ip -netns $host1 li del xfrm0 + ip -netns $host2 addr del ${XFRM2_4}/24 dev eth0 + ip -netns $host2 addr del ${XFRM2_6}/64 dev eth0 } setup_xfrm_dev() { local vrfarg="vrf ${VRF}" - ip -netns host1 li add type xfrm dev eth0 if_id ${IF_ID} - ip -netns host1 li set xfrm0 ${vrfarg} up - ip -netns host1 addr add ${XFRM1_4}/24 dev xfrm0 - ip -netns host1 addr add ${XFRM1_6}/64 dev xfrm0 + ip -netns $host1 li add type xfrm dev eth0 if_id ${IF_ID} + ip -netns $host1 li set xfrm0 ${vrfarg} up + ip -netns $host1 addr add ${XFRM1_4}/24 dev xfrm0 + ip -netns $host1 addr add ${XFRM1_6}/64 dev xfrm0 - ip -netns host2 addr add ${XFRM2_4}/24 dev eth0 - ip -netns host2 addr add ${XFRM2_6}/64 dev eth0 + ip -netns $host2 addr add ${XFRM2_4}/24 dev eth0 + ip -netns $host2 addr add ${XFRM2_6}/64 dev eth0 setup_xfrm ${XFRM1_4} ${XFRM2_4} ${XFRM1_6} ${XFRM2_6} "if_id ${IF_ID}" } diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index dedc52562b..2da32f4c47 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -58,6 +58,7 @@ # to send an ICMP error back to the source when the ttl of a packet reaches 1 # while it is forwarded between different vrfs. +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no DEFAULT_TTYPE=sym @@ -171,11 +172,7 @@ run_cmd_grep() cleanup() { - local ns - - for ns in h1 h2 r1 r2; do - ip netns del $ns 2>/dev/null - done + cleanup_ns $h1 $h2 $r1 $r2 } setup_vrf() @@ -212,72 +209,69 @@ setup_sym() # # create nodes as namespaces - # - for ns in h1 h2 r1; do - ip netns add $ns - ip -netns $ns link set lo up - - case "${ns}" in - h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 - ;; - r1) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 - esac + setup_ns h1 h2 r1 + for ns in $h1 $h2 $r1; do + if echo $ns | grep -q h[12]-; then + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + else + ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + fi done # # create interconnects # - ip -netns h1 link add eth0 type veth peer name r1h1 - ip -netns h1 link set r1h1 netns r1 name eth0 up + ip -netns $h1 link add eth0 type veth peer name r1h1 + ip -netns $h1 link set r1h1 netns $r1 name eth0 up - ip -netns h2 link add eth0 type veth peer name r1h2 - ip -netns h2 link set r1h2 netns r1 name eth1 up + ip -netns $h2 link add eth0 type veth peer name r1h2 + ip -netns $h2 link set r1h2 netns $r1 name eth1 up # # h1 # - ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24 - ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad - ip -netns h1 link set eth0 up + ip -netns $h1 addr add dev eth0 ${H1_N1_IP}/24 + ip -netns $h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad + ip -netns $h1 link set eth0 up # h1 to h2 via r1 - ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0 - ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0 + ip -netns $h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0 + ip -netns $h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0 # # h2 # - ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24 - ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad - ip -netns h2 link set eth0 up + ip -netns $h2 addr add dev eth0 ${H2_N2_IP}/24 + ip -netns $h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad + ip -netns $h2 link set eth0 up # h2 to h1 via r1 - ip -netns h2 route add default via ${R1_N2_IP} dev eth0 - ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0 + ip -netns $h2 route add default via ${R1_N2_IP} dev eth0 + ip -netns $h2 -6 route add default via ${R1_N2_IP6} dev eth0 # # r1 # - setup_vrf r1 - create_vrf r1 blue 1101 - create_vrf r1 red 1102 - ip -netns r1 link set mtu 1400 dev eth1 - ip -netns r1 link set eth0 vrf blue up - ip -netns r1 link set eth1 vrf red up - ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 - ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad - ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24 - ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad + setup_vrf $r1 + create_vrf $r1 blue 1101 + create_vrf $r1 red 1102 + ip -netns $r1 link set mtu 1400 dev eth1 + ip -netns $r1 link set eth0 vrf blue up + ip -netns $r1 link set eth1 vrf red up + ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns $r1 addr add dev eth1 ${R1_N2_IP}/24 + ip -netns $r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad # Route leak from blue to red - ip -netns r1 route add vrf blue ${H2_N2} dev red - ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red + ip -netns $r1 route add vrf blue ${H2_N2} dev red + ip -netns $r1 -6 route add vrf blue ${H2_N2_6} dev red # Route leak from red to blue - ip -netns r1 route add vrf red ${H1_N1} dev blue - ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue + ip -netns $r1 route add vrf red ${H1_N1} dev blue + ip -netns $r1 -6 route add vrf red ${H1_N1_6} dev blue # Wait for ip config to settle @@ -293,90 +287,87 @@ setup_asym() # # create nodes as namespaces - # - for ns in h1 h2 r1 r2; do - ip netns add $ns - ip -netns $ns link set lo up - - case "${ns}" in - h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 - ;; - r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 - esac + setup_ns h1 h2 r1 r2 + for ns in $h1 $h2 $r1 $r2; do + if echo $ns | grep -q h[12]-; then + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + else + ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + fi done # # create interconnects # - ip -netns h1 link add eth0 type veth peer name r1h1 - ip -netns h1 link set r1h1 netns r1 name eth0 up + ip -netns $h1 link add eth0 type veth peer name r1h1 + ip -netns $h1 link set r1h1 netns $r1 name eth0 up - ip -netns h1 link add eth1 type veth peer name r2h1 - ip -netns h1 link set r2h1 netns r2 name eth0 up + ip -netns $h1 link add eth1 type veth peer name r2h1 + ip -netns $h1 link set r2h1 netns $r2 name eth0 up - ip -netns h2 link add eth0 type veth peer name r1h2 - ip -netns h2 link set r1h2 netns r1 name eth1 up + ip -netns $h2 link add eth0 type veth peer name r1h2 + ip -netns $h2 link set r1h2 netns $r1 name eth1 up - ip -netns h2 link add eth1 type veth peer name r2h2 - ip -netns h2 link set r2h2 netns r2 name eth1 up + ip -netns $h2 link add eth1 type veth peer name r2h2 + ip -netns $h2 link set r2h2 netns $r2 name eth1 up # # h1 # - ip -netns h1 link add br0 type bridge - ip -netns h1 link set br0 up - ip -netns h1 addr add dev br0 ${H1_N1_IP}/24 - ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad - ip -netns h1 link set eth0 master br0 up - ip -netns h1 link set eth1 master br0 up + ip -netns $h1 link add br0 type bridge + ip -netns $h1 link set br0 up + ip -netns $h1 addr add dev br0 ${H1_N1_IP}/24 + ip -netns $h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad + ip -netns $h1 link set eth0 master br0 up + ip -netns $h1 link set eth1 master br0 up # h1 to h2 via r1 - ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev br0 - ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0 + ip -netns $h1 route add ${H2_N2} via ${R1_N1_IP} dev br0 + ip -netns $h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0 # # h2 # - ip -netns h2 link add br0 type bridge - ip -netns h2 link set br0 up - ip -netns h2 addr add dev br0 ${H2_N2_IP}/24 - ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad - ip -netns h2 link set eth0 master br0 up - ip -netns h2 link set eth1 master br0 up + ip -netns $h2 link add br0 type bridge + ip -netns $h2 link set br0 up + ip -netns $h2 addr add dev br0 ${H2_N2_IP}/24 + ip -netns $h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad + ip -netns $h2 link set eth0 master br0 up + ip -netns $h2 link set eth1 master br0 up # h2 to h1 via r2 - ip -netns h2 route add default via ${R2_N2_IP} dev br0 - ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0 + ip -netns $h2 route add default via ${R2_N2_IP} dev br0 + ip -netns $h2 -6 route add default via ${R2_N2_IP6} dev br0 # # r1 # - setup_vrf r1 - create_vrf r1 blue 1101 - create_vrf r1 red 1102 - ip -netns r1 link set mtu 1400 dev eth1 - ip -netns r1 link set eth0 vrf blue up - ip -netns r1 link set eth1 vrf red up - ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 - ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad - ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24 - ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad + setup_vrf $r1 + create_vrf $r1 blue 1101 + create_vrf $r1 red 1102 + ip -netns $r1 link set mtu 1400 dev eth1 + ip -netns $r1 link set eth0 vrf blue up + ip -netns $r1 link set eth1 vrf red up + ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns $r1 addr add dev eth1 ${R1_N2_IP}/24 + ip -netns $r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad # Route leak from blue to red - ip -netns r1 route add vrf blue ${H2_N2} dev red - ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red + ip -netns $r1 route add vrf blue ${H2_N2} dev red + ip -netns $r1 -6 route add vrf blue ${H2_N2_6} dev red # No route leak from red to blue # # r2 # - ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24 - ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad - ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24 - ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad + ip -netns $r2 addr add dev eth0 ${R2_N1_IP}/24 + ip -netns $r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad + ip -netns $r2 addr add dev eth1 ${R2_N2_IP}/24 + ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad # Wait for ip config to settle sleep 2 @@ -384,14 +375,14 @@ setup_asym() check_connectivity() { - ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1 + ip netns exec $h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1 log_test $? 0 "Basic IPv4 connectivity" return $? } check_connectivity6() { - ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 + ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 log_test $? 0 "Basic IPv6 connectivity" return $? } @@ -426,7 +417,7 @@ ipv4_traceroute() check_connectivity || return - run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP} + run_cmd_grep "${R1_N1_IP}" ip netns exec $h1 traceroute ${H2_N2_IP} log_test $? 0 "Traceroute reports a hop on r1" } @@ -449,7 +440,7 @@ ipv6_traceroute() check_connectivity6 || return - run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6} + run_cmd_grep "${R1_N1_IP6}" ip netns exec $h1 traceroute6 ${H2_N2_IP6} log_test $? 0 "Traceroute6 reports a hop on r1" } @@ -470,7 +461,7 @@ ipv4_ping_ttl() check_connectivity || return - run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP} + run_cmd_grep "Time to live exceeded" ip netns exec $h1 ping -t1 -c1 -W2 ${H2_N2_IP} log_test $? 0 "Ping received ICMP ttl exceeded" } @@ -491,7 +482,7 @@ ipv4_ping_frag() check_connectivity || return - run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP} + run_cmd_grep "Frag needed" ip netns exec $h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP} log_test $? 0 "Ping received ICMP Frag needed" } @@ -512,7 +503,7 @@ ipv6_ping_ttl() check_connectivity6 || return - run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6} + run_cmd_grep "Time exceeded: Hop limit" ip netns exec $h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6} log_test $? 0 "Ping received ICMP Hop limit" } @@ -533,7 +524,7 @@ ipv6_ping_frag() check_connectivity6 || return - run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6} + run_cmd_grep "Packet too big" ip netns exec $h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6} log_test $? 0 "Ping received ICMP Packet too big" } diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh index 417d214264..01552b5425 100755 --- a/tools/testing/selftests/net/vrf_strict_mode_test.sh +++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh @@ -3,9 +3,7 @@ # This test is designed for testing the new VRF strict_mode functionality. -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - +source lib.sh ret=0 # identifies the "init" network namespace which is often called root network @@ -247,13 +245,12 @@ setup() { modprobe vrf - ip netns add testns - ip netns exec testns ip link set lo up + setup_ns testns } cleanup() { - ip netns del testns 2>/dev/null + ip netns del $testns 2>/dev/null ip link del vrf100 2>/dev/null ip link del vrf101 2>/dev/null @@ -298,28 +295,28 @@ vrf_strict_mode_tests_testns() { log_section "VRF strict_mode test on testns network namespace" - vrf_strict_mode_check_support testns + vrf_strict_mode_check_support $testns - strict_mode_check_default testns + strict_mode_check_default $testns - enable_strict_mode_and_check testns + enable_strict_mode_and_check $testns - add_vrf_and_check testns vrf100 100 - config_vrf_and_check testns 10.0.100.1/24 vrf100 + add_vrf_and_check $testns vrf100 100 + config_vrf_and_check $testns 10.0.100.1/24 vrf100 - add_vrf_and_check_fail testns vrf101 100 + add_vrf_and_check_fail $testns vrf101 100 - add_vrf_and_check_fail testns vrf102 100 + add_vrf_and_check_fail $testns vrf102 100 - add_vrf_and_check testns vrf200 200 + add_vrf_and_check $testns vrf200 200 - disable_strict_mode_and_check testns + disable_strict_mode_and_check $testns - add_vrf_and_check testns vrf101 100 + add_vrf_and_check $testns vrf101 100 - add_vrf_and_check testns vrf102 100 + add_vrf_and_check $testns vrf102 100 - #the strict_mode is disabled in the testns + #the strict_mode is disabled in the $testns } vrf_strict_mode_tests_mix() @@ -328,25 +325,25 @@ vrf_strict_mode_tests_mix() read_strict_mode_compare_and_check init 1 - read_strict_mode_compare_and_check testns 0 + read_strict_mode_compare_and_check $testns 0 - del_vrf_and_check testns vrf101 + del_vrf_and_check $testns vrf101 - del_vrf_and_check testns vrf102 + del_vrf_and_check $testns vrf102 disable_strict_mode_and_check init - enable_strict_mode_and_check testns + enable_strict_mode_and_check $testns enable_strict_mode_and_check init enable_strict_mode_and_check init - disable_strict_mode_and_check testns - disable_strict_mode_and_check testns + disable_strict_mode_and_check $testns + disable_strict_mode_and_check $testns read_strict_mode_compare_and_check init 1 - read_strict_mode_compare_and_check testns 0 + read_strict_mode_compare_and_check $testns 0 } ################################################################################ diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index bdf450eaf6..4577895306 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -18,8 +18,7 @@ # ns1: ping 10.0.2.254: does NOT pass via ipsec tunnel (exception) # ns2: ping 10.0.1.254: does NOT pass via ipsec tunnel (exception) -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh ret=0 policy_checks_ok=1 @@ -204,24 +203,24 @@ check_xfrm() { ip=$2 local lret=0 - ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null + ip netns exec ${ns[1]} ping -q -c 1 10.0.2.$ip > /dev/null - check_ipt_policy_count ns3 + check_ipt_policy_count ${ns[3]} if [ $? -ne $rval ] ; then lret=1 fi - check_ipt_policy_count ns4 + check_ipt_policy_count ${ns[4]} if [ $? -ne $rval ] ; then lret=1 fi - ip netns exec ns2 ping -q -c 1 10.0.1.$ip > /dev/null + ip netns exec ${ns[2]} ping -q -c 1 10.0.1.$ip > /dev/null - check_ipt_policy_count ns3 + check_ipt_policy_count ${ns[3]} if [ $? -ne $rval ] ; then lret=1 fi - check_ipt_policy_count ns4 + check_ipt_policy_count ${ns[4]} if [ $? -ne $rval ] ; then lret=1 fi @@ -270,11 +269,11 @@ check_hthresh_repeat() i=0 for i in $(seq 1 10);do - ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break - ip -net ns1 xfrm policy set hthresh6 0 28 || break + ip -net ${ns[1]} xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break + ip -net ${ns[1]} xfrm policy set hthresh6 0 28 || break - ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break - ip -net ns1 xfrm policy set hthresh6 0 28 || break + ip -net ${ns[1]} xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break + ip -net ${ns[1]} xfrm policy set hthresh6 0 28 || break done if [ $i -ne 10 ] ;then @@ -347,79 +346,80 @@ if [ $? -ne 0 ];then exit $ksft_skip fi -for i in 1 2 3 4; do - ip netns add ns$i - ip -net ns$i link set lo up -done +setup_ns ns1 ns2 ns3 ns4 +ns[1]=$ns1 +ns[2]=$ns2 +ns[3]=$ns3 +ns[4]=$ns4 DEV=veth0 -ip link add $DEV netns ns1 type veth peer name eth1 netns ns3 -ip link add $DEV netns ns2 type veth peer name eth1 netns ns4 +ip link add $DEV netns ${ns[1]} type veth peer name eth1 netns ${ns[3]} +ip link add $DEV netns ${ns[2]} type veth peer name eth1 netns ${ns[4]} -ip link add $DEV netns ns3 type veth peer name veth0 netns ns4 +ip link add $DEV netns ${ns[3]} type veth peer name veth0 netns ${ns[4]} DEV=veth0 for i in 1 2; do - ip -net ns$i link set $DEV up - ip -net ns$i addr add 10.0.$i.2/24 dev $DEV - ip -net ns$i addr add dead:$i::2/64 dev $DEV - - ip -net ns$i addr add 10.0.$i.253 dev $DEV - ip -net ns$i addr add 10.0.$i.254 dev $DEV - ip -net ns$i addr add dead:$i::fd dev $DEV - ip -net ns$i addr add dead:$i::fe dev $DEV + ip -net ${ns[$i]} link set $DEV up + ip -net ${ns[$i]} addr add 10.0.$i.2/24 dev $DEV + ip -net ${ns[$i]} addr add dead:$i::2/64 dev $DEV + + ip -net ${ns[$i]} addr add 10.0.$i.253 dev $DEV + ip -net ${ns[$i]} addr add 10.0.$i.254 dev $DEV + ip -net ${ns[$i]} addr add dead:$i::fd dev $DEV + ip -net ${ns[$i]} addr add dead:$i::fe dev $DEV done for i in 3 4; do -ip -net ns$i link set eth1 up -ip -net ns$i link set veth0 up + ip -net ${ns[$i]} link set eth1 up + ip -net ${ns[$i]} link set veth0 up done -ip -net ns1 route add default via 10.0.1.1 -ip -net ns2 route add default via 10.0.2.1 +ip -net ${ns[1]} route add default via 10.0.1.1 +ip -net ${ns[2]} route add default via 10.0.2.1 -ip -net ns3 addr add 10.0.1.1/24 dev eth1 -ip -net ns3 addr add 10.0.3.1/24 dev veth0 -ip -net ns3 addr add 2001:1::1/64 dev eth1 -ip -net ns3 addr add 2001:3::1/64 dev veth0 +ip -net ${ns[3]} addr add 10.0.1.1/24 dev eth1 +ip -net ${ns[3]} addr add 10.0.3.1/24 dev veth0 +ip -net ${ns[3]} addr add 2001:1::1/64 dev eth1 +ip -net ${ns[3]} addr add 2001:3::1/64 dev veth0 -ip -net ns3 route add default via 10.0.3.10 +ip -net ${ns[3]} route add default via 10.0.3.10 -ip -net ns4 addr add 10.0.2.1/24 dev eth1 -ip -net ns4 addr add 10.0.3.10/24 dev veth0 -ip -net ns4 addr add 2001:2::1/64 dev eth1 -ip -net ns4 addr add 2001:3::10/64 dev veth0 -ip -net ns4 route add default via 10.0.3.1 +ip -net ${ns[4]} addr add 10.0.2.1/24 dev eth1 +ip -net ${ns[4]} addr add 10.0.3.10/24 dev veth0 +ip -net ${ns[4]} addr add 2001:2::1/64 dev eth1 +ip -net ${ns[4]} addr add 2001:3::10/64 dev veth0 +ip -net ${ns[4]} route add default via 10.0.3.1 for j in 4 6; do for i in 3 4;do - ip netns exec ns$i sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null - ip netns exec ns$i sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null + ip netns exec ${ns[$i]} sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null + ip netns exec ${ns[$i]} sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null done done # abuse iptables rule counter to check if ping matches a policy -ip netns exec ns3 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec -ip netns exec ns4 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec +ip netns exec ${ns[3]} iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec +ip netns exec ${ns[4]} iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec if [ $? -ne 0 ];then echo "SKIP: Could not insert iptables rule" - for i in 1 2 3 4;do ip netns del ns$i;done + cleanup_ns $ns1 $ns2 $ns3 $ns4 exit $ksft_skip fi # localip remoteip localnet remotenet -do_esp ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 -do_esp ns3 dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2 -do_esp ns4 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 -do_esp ns4 dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1 +do_esp ${ns[3]} 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 +do_esp ${ns[3]} dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2 +do_esp ${ns[4]} 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 +do_esp ${ns[4]} dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1 -do_dummies4 ns3 -do_dummies6 ns4 +do_dummies4 ${ns[3]} +do_dummies6 ${ns[4]} -do_esp_policy_get_check ns3 10.0.1.0/24 10.0.2.0/24 -do_esp_policy_get_check ns4 10.0.2.0/24 10.0.1.0/24 -do_esp_policy_get_check ns3 dead:1::/64 dead:2::/64 -do_esp_policy_get_check ns4 dead:2::/64 dead:1::/64 +do_esp_policy_get_check ${ns[3]} 10.0.1.0/24 10.0.2.0/24 +do_esp_policy_get_check ${ns[4]} 10.0.2.0/24 10.0.1.0/24 +do_esp_policy_get_check ${ns[3]} dead:1::/64 dead:2::/64 +do_esp_policy_get_check ${ns[4]} dead:2::/64 dead:1::/64 # ping to .254 should use ipsec, exception is not installed. check_xfrm 1 254 @@ -432,11 +432,11 @@ fi # installs exceptions # localip remoteip encryptdst plaindst -do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 -do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28 +do_exception ${ns[3]} 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 +do_exception ${ns[4]} 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28 -do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96 -do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96 +do_exception ${ns[3]} dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96 +do_exception ${ns[4]} dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96 check_exceptions "exceptions" if [ $? -ne 0 ]; then @@ -444,14 +444,14 @@ if [ $? -ne 0 ]; then fi # insert block policies with adjacent/overlapping netmasks -do_overlap ns3 +do_overlap ${ns[3]} check_exceptions "exceptions and block policies" if [ $? -ne 0 ]; then ret=1 fi -for n in ns3 ns4;do +for n in ${ns[3]} ${ns[4]};do ip -net $n xfrm policy set hthresh4 28 24 hthresh6 126 125 sleep $((RANDOM%5)) done @@ -459,19 +459,19 @@ done check_exceptions "exceptions and block policies after hresh changes" # full flush of policy db, check everything gets freed incl. internal meta data -ip -net ns3 xfrm policy flush +ip -net ${ns[3]} xfrm policy flush -do_esp_policy ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 -do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 +do_esp_policy ${ns[3]} 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 +do_exception ${ns[3]} 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 # move inexact policies to hash table -ip -net ns3 xfrm policy set hthresh4 16 16 +ip -net ${ns[3]} xfrm policy set hthresh4 16 16 sleep $((RANDOM%5)) check_exceptions "exceptions and block policies after hthresh change in ns3" # restore original hthresh settings -- move policies back to tables -for n in ns3 ns4;do +for n in ${ns[3]} ${ns[4]};do ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128 sleep $((RANDOM%5)) done @@ -479,8 +479,8 @@ check_exceptions "exceptions and block policies after htresh change to normal" check_hthresh_repeat "policies with repeated htresh change" -check_random_order ns3 "policies inserted in random order" +check_random_order ${ns[3]} "policies inserted in random order" -for i in 1 2 3 4;do ip netns del ns$i;done +cleanup_ns $ns1 $ns2 $ns3 $ns4 exit $ret diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore index 4b2928e1c1..c2229b3e40 100644 --- a/tools/testing/selftests/netfilter/.gitignore +++ b/tools/testing/selftests/netfilter/.gitignore @@ -2,3 +2,5 @@ nf-queue connect_close audit_logread +conntrack_dump_flush +sctp_collision diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index bced422b78..936c3085bb 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -7,13 +7,15 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \ - conntrack_sctp_collision.sh xt_string.sh + conntrack_sctp_collision.sh xt_string.sh \ + bridge_netfilter.sh HOSTPKG_CONFIG := pkg-config CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) -TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision +TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision \ + conntrack_dump_flush include ../lib.mk diff --git a/tools/testing/selftests/netfilter/bridge_netfilter.sh b/tools/testing/selftests/netfilter/bridge_netfilter.sh new file mode 100644 index 0000000000..659b3ab02c --- /dev/null +++ b/tools/testing/selftests/netfilter/bridge_netfilter.sh @@ -0,0 +1,188 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test bridge netfilter + conntrack, a combination that doesn't really work, +# with multicast/broadcast packets racing for hash table insertion. + +# eth0 br0 eth0 +# setup is: ns1 <->,ns0 <-> ns3 +# ns2 <-' `'-> ns4 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns0="ns0-$sfx" +ns1="ns1-$sfx" +ns2="ns2-$sfx" +ns3="ns3-$sfx" +ns4="ns4-$sfx" + +ebtables -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ebtables" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +for i in $(seq 0 4); do + eval ip netns add \$ns$i +done + +cleanup() { + for i in $(seq 0 4); do eval ip netns del \$ns$i;done +} + +trap cleanup EXIT + +do_ping() +{ + fromns="$1" + dstip="$2" + + ip netns exec $fromns ping -c 1 -q $dstip > /dev/null + if [ $? -ne 0 ]; then + echo "ERROR: ping from $fromns to $dstip" + ip netns exec ${ns0} nft list ruleset + ret=1 + fi +} + +bcast_ping() +{ + fromns="$1" + dstip="$2" + + for i in $(seq 1 1000); do + ip netns exec $fromns ping -q -f -b -c 1 -q $dstip > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "ERROR: ping -b from $fromns to $dstip" + ip netns exec ${ns0} nft list ruleset + fi + done +} + +ip link add veth1 netns ${ns0} type veth peer name eth0 netns ${ns1} +if [ $? -ne 0 ]; then + echo "SKIP: Can't create veth device" + exit $ksft_skip +fi + +ip link add veth2 netns ${ns0} type veth peer name eth0 netns $ns2 +ip link add veth3 netns ${ns0} type veth peer name eth0 netns $ns3 +ip link add veth4 netns ${ns0} type veth peer name eth0 netns $ns4 + +ip -net ${ns0} link set lo up + +for i in $(seq 1 4); do + ip -net ${ns0} link set veth$i up +done + +ip -net ${ns0} link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1 +if [ $? -ne 0 ]; then + echo "SKIP: Can't create bridge br0" + exit $ksft_skip +fi + +# make veth0,1,2 part of bridge. +for i in $(seq 1 3); do + ip -net ${ns0} link set veth$i master br0 +done + +# add a macvlan on top of the bridge. +MACVLAN_ADDR=ba:f3:13:37:42:23 +ip -net ${ns0} link add link br0 name macvlan0 type macvlan mode private +ip -net ${ns0} link set macvlan0 address ${MACVLAN_ADDR} +ip -net ${ns0} link set macvlan0 up +ip -net ${ns0} addr add 10.23.0.1/24 dev macvlan0 + +# add a macvlan on top of veth4. +MACVLAN_ADDR=ba:f3:13:37:42:24 +ip -net ${ns0} link add link veth4 name macvlan4 type macvlan mode vepa +ip -net ${ns0} link set macvlan4 address ${MACVLAN_ADDR} +ip -net ${ns0} link set macvlan4 up + +# make the macvlan part of the bridge. +# veth4 is not a bridge port, only the macvlan on top of it. +ip -net ${ns0} link set macvlan4 master br0 + +ip -net ${ns0} link set br0 up +ip -net ${ns0} addr add 10.0.0.1/24 dev br0 +ip netns exec ${ns0} sysctl -q net.bridge.bridge-nf-call-iptables=1 +ret=$? +if [ $ret -ne 0 ] ; then + echo "SKIP: bridge netfilter not available" + ret=$ksft_skip +fi + +# for testing, so namespaces will reply to ping -b probes. +ip netns exec ${ns0} sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0 + +# enable conntrack in ns0 and drop broadcast packets in forward to +# avoid them from getting confirmed in the postrouting hook before +# the cloned skb is passed up the stack. +ip netns exec ${ns0} nft -f - <<EOF +table ip filter { + chain input { + type filter hook input priority 1; policy accept + iifname br0 counter + ct state new accept + } +} + +table bridge filter { + chain forward { + type filter hook forward priority 0; policy accept + meta pkttype broadcast ip protocol icmp counter drop + } +} +EOF + +# place 1, 2 & 3 in same subnet, connected via ns0:br0. +# ns4 is placed in same subnet as well, but its not +# part of the bridge: the corresponding veth4 is not +# part of the bridge, only its macvlan interface. +for i in $(seq 1 4); do + eval ip -net \$ns$i link set lo up + eval ip -net \$ns$i link set eth0 up +done +for i in $(seq 1 2); do + eval ip -net \$ns$i addr add 10.0.0.1$i/24 dev eth0 +done + +ip -net ${ns3} addr add 10.23.0.13/24 dev eth0 +ip -net ${ns4} addr add 10.23.0.14/24 dev eth0 + +# test basic connectivity +do_ping ${ns1} 10.0.0.12 +do_ping ${ns3} 10.23.0.1 +do_ping ${ns4} 10.23.0.1 + +if [ $ret -eq 0 ];then + echo "PASS: netns connectivity: ns1 can reach ns2, ns3 and ns4 can reach ns0" +fi + +bcast_ping ${ns1} 10.0.0.255 + +# This should deliver broadcast to macvlan0, which is on top of ns0:br0. +bcast_ping ${ns3} 10.23.0.255 + +# same, this time via veth4:macvlan4. +bcast_ping ${ns4} 10.23.0.255 + +read t < /proc/sys/kernel/tainted + +if [ $t -eq 0 ];then + echo PASS: kernel not tainted +else + echo ERROR: kernel is tainted + ret=1 +fi + +exit $ret diff --git a/tools/testing/selftests/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/netfilter/conntrack_dump_flush.c new file mode 100644 index 0000000000..b11ea8ee67 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_dump_flush.c @@ -0,0 +1,471 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <time.h> +#include <libmnl/libmnl.h> +#include <netinet/ip.h> + +#include <linux/netlink.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> +#include <linux/netfilter/nf_conntrack_tcp.h> +#include "../kselftest_harness.h" + +#define TEST_ZONE_ID 123 +#define NF_CT_DEFAULT_ZONE_ID 0 + +static int reply_counter; + +static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type, + uint32_t src_ip, uint32_t dst_ip, + uint16_t src_port, uint16_t dst_port) +{ + struct nlattr *nest, *nest_ip, *nest_proto; + + nest = mnl_attr_nest_start(nlh, type); + if (!nest) + return -1; + + nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP); + if (!nest_ip) + return -1; + mnl_attr_put_u32(nlh, CTA_IP_V4_SRC, src_ip); + mnl_attr_put_u32(nlh, CTA_IP_V4_DST, dst_ip); + mnl_attr_nest_end(nlh, nest_ip); + + nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6); + mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port)); + mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port)); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); +} + +static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type, + struct in6_addr src_ip, struct in6_addr dst_ip, + uint16_t src_port, uint16_t dst_port) +{ + struct nlattr *nest, *nest_ip, *nest_proto; + + nest = mnl_attr_nest_start(nlh, type); + if (!nest) + return -1; + + nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP); + if (!nest_ip) + return -1; + mnl_attr_put(nlh, CTA_IP_V6_SRC, sizeof(struct in6_addr), &src_ip); + mnl_attr_put(nlh, CTA_IP_V6_DST, sizeof(struct in6_addr), &dst_ip); + mnl_attr_nest_end(nlh, nest_ip); + + nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6); + mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port)); + mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port)); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); +} + +static int build_cta_proto(struct nlmsghdr *nlh) +{ + struct nlattr *nest, *nest_proto; + + nest = mnl_attr_nest_start(nlh, CTA_PROTOINFO); + if (!nest) + return -1; + + nest_proto = mnl_attr_nest_start(nlh, CTA_PROTOINFO_TCP); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTOINFO_TCP_STATE, TCP_CONNTRACK_ESTABLISHED); + mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 0x0a0a); + mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_REPLY, 0x0a0a); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); +} + +static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh, + uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *rplnlh; + unsigned int portid; + int err, ret; + + portid = mnl_socket_get_portid(sock); + + ret = build_cta_proto(nlh); + if (ret < 0) { + perror("build_cta_proto"); + return -1; + } + mnl_attr_put_u32(nlh, CTA_TIMEOUT, htonl(20000)); + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + if (mnl_socket_sendto(sock, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + return -1; + } + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL); + if (ret < 0) { + if (errno == EEXIST) { + /* The entries are probably still there from a previous + * run. So we are good + */ + return 0; + } + perror("mnl_cb_run"); + return ret; + } + + return 0; +} + +static int conntrack_data_generate_v4(struct mnl_socket *sock, uint32_t src_ip, + uint32_t dst_ip, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nfgenmsg *nfh; + int ret; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_ACK | NLM_F_EXCL; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_INET; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + ret = build_cta_tuple_v4(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v4"); + return ret; + } + ret = build_cta_tuple_v4(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, 443, 12345); + if (ret < 0) { + perror("build_cta_tuple_v4"); + return ret; + } + return conntrack_data_insert(sock, nlh, zone); +} + +static int conntrack_data_generate_v6(struct mnl_socket *sock, + struct in6_addr src_ip, + struct in6_addr dst_ip, + uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nfgenmsg *nfh; + int ret; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_ACK | NLM_F_EXCL; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_INET6; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + ret = build_cta_tuple_v6(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, + 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v6"); + return ret; + } + ret = build_cta_tuple_v6(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, + 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v6"); + return ret; + } + return conntrack_data_insert(sock, nlh, zone); +} + +static int count_entries(const struct nlmsghdr *nlh, void *data) +{ + reply_counter++; +} + +static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh, *rplnlh; + struct nfgenmsg *nfh; + struct nlattr *nest; + unsigned int portid; + int err, ret; + + portid = mnl_socket_get_portid(sock); + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_GET; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_UNSPEC; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len); + if (ret < 0) { + perror("mnl_socket_sendto"); + return ret; + } + + reply_counter = 0; + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + while (ret > 0) { + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, + count_entries, NULL); + if (ret <= MNL_CB_STOP) + break; + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + } + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + return reply_counter; +} + +static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh, *rplnlh; + struct nfgenmsg *nfh; + struct nlattr *nest; + unsigned int portid; + int err, ret; + + portid = mnl_socket_get_portid(sock); + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_DELETE; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_UNSPEC; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len); + if (ret < 0) { + perror("mnl_socket_sendto"); + return ret; + } + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL); + if (ret < 0) { + perror("mnl_cb_run"); + return ret; + } + + return 0; +} + +FIXTURE(conntrack_dump_flush) +{ + struct mnl_socket *sock; +}; + +FIXTURE_SETUP(conntrack_dump_flush) +{ + struct in6_addr src, dst; + int ret; + + self->sock = mnl_socket_open(NETLINK_NETFILTER); + if (!self->sock) { + perror("mnl_socket_open"); + exit(EXIT_FAILURE); + } + + if (mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID) < 0) { + perror("mnl_socket_bind"); + exit(EXIT_FAILURE); + } + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + if (ret < 0 && errno == EPERM) + SKIP(return, "Needs to be run as root"); + else if (ret < 0 && errno == EOPNOTSUPP) + SKIP(return, "Kernel does not seem to support conntrack zones"); + + ret = conntrack_data_generate_v4(self->sock, 0xf0f0f0f0, 0xf1f1f1f1, + TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntrack_data_generate_v4(self->sock, 0xf2f2f2f2, 0xf3f3f3f3, + TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 0); + ret = conntrack_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5, + TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 0); + ret = conntrack_data_generate_v4(self->sock, 0xf6f6f6f6, 0xf7f7f7f7, + NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); + + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x01000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x02000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x03000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x04000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 0); + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x05000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x06000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 0); + + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x07000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x08000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_GE(ret, 2); + if (ret > 2) + SKIP(return, "kernel does not support filtering by zone"); +} + +FIXTURE_TEARDOWN(conntrack_dump_flush) +{ +} + +TEST_F(conntrack_dump_flush, test_dump_by_zone) +{ + int ret; + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 2); +} + +TEST_F(conntrack_dump_flush, test_flush_by_zone) +{ + int ret; + + ret = conntrack_flush_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 2); +} + +TEST_F(conntrack_dump_flush, test_flush_by_zone_default) +{ + int ret; + + ret = conntrack_flush_zone(self->sock, NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/nolibc/.gitignore b/tools/testing/selftests/nolibc/.gitignore index 5119f9f7af..35d247a0d5 100644 --- a/tools/testing/selftests/nolibc/.gitignore +++ b/tools/testing/selftests/nolibc/.gitignore @@ -3,4 +3,5 @@ /libc-test /nolibc-test /run.out +/run.out.* /sysroot/ diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index a0fc07253b..40dd952280 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -1,9 +1,16 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for nolibc tests include ../../../scripts/Makefile.include +include ../../../scripts/utilities.mak # We need this for the "cc-option" macro. include ../../../build/Build.include +ifneq ($(O),) +ifneq ($(call is-absolute,$(O)),y) +$(error Only absolute O= parameters are supported) +endif +endif + # we're in ".../tools/testing/selftests/nolibc" ifeq ($(srctree),) srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR))) @@ -14,6 +21,8 @@ include $(srctree)/scripts/subarch.include ARCH = $(SUBARCH) endif +objtree ?= $(srctree) + # XARCH extends the kernel's ARCH with a few variants of the same # architecture that only differ by the configuration, the toolchain # and the Qemu program used. It is copied as-is into ARCH except for @@ -31,12 +40,15 @@ endif # configure default variants for target kernel supported architectures XARCH_powerpc = ppc +XARCH_mips = mips32le XARCH = $(or $(XARCH_$(ARCH)),$(ARCH)) # map from user input variants to their kernel supported architectures ARCH_ppc = powerpc ARCH_ppc64 = powerpc ARCH_ppc64le = powerpc +ARCH_mips32le = mips +ARCH_mips32be = mips ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) # kernel image names by architecture @@ -45,14 +57,15 @@ IMAGE_x86_64 = arch/x86/boot/bzImage IMAGE_x86 = arch/x86/boot/bzImage IMAGE_arm64 = arch/arm64/boot/Image IMAGE_arm = arch/arm/boot/zImage -IMAGE_mips = vmlinuz +IMAGE_mips32le = vmlinuz +IMAGE_mips32be = vmlinuz IMAGE_ppc = vmlinux IMAGE_ppc64 = vmlinux IMAGE_ppc64le = arch/powerpc/boot/zImage IMAGE_riscv = arch/riscv/boot/Image IMAGE_s390 = arch/s390/boot/bzImage IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi -IMAGE = $(IMAGE_$(XARCH)) +IMAGE = $(objtree)/$(IMAGE_$(XARCH)) IMAGE_NAME = $(notdir $(IMAGE)) # default kernel configurations that appear to be usable @@ -61,7 +74,8 @@ DEFCONFIG_x86_64 = defconfig DEFCONFIG_x86 = defconfig DEFCONFIG_arm64 = defconfig DEFCONFIG_arm = multi_v7_defconfig -DEFCONFIG_mips = malta_defconfig +DEFCONFIG_mips32le = malta_defconfig +DEFCONFIG_mips32be = malta_defconfig DEFCONFIG_ppc = pmac32_defconfig DEFCONFIG_ppc64 = powernv_be_defconfig DEFCONFIG_ppc64le = powernv_defconfig @@ -70,6 +84,9 @@ DEFCONFIG_s390 = defconfig DEFCONFIG_loongarch = defconfig DEFCONFIG = $(DEFCONFIG_$(XARCH)) +EXTRACONFIG_mips32be = -d CONFIG_CPU_LITTLE_ENDIAN -e CONFIG_CPU_BIG_ENDIAN +EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) + # optional tests to run (default = all) TEST = @@ -79,7 +96,8 @@ QEMU_ARCH_x86_64 = x86_64 QEMU_ARCH_x86 = x86_64 QEMU_ARCH_arm64 = aarch64 QEMU_ARCH_arm = arm -QEMU_ARCH_mips = mipsel # works with malta_defconfig +QEMU_ARCH_mips32le = mipsel # works with malta_defconfig +QEMU_ARCH_mips32be = mips QEMU_ARCH_ppc = ppc QEMU_ARCH_ppc64 = ppc64 QEMU_ARCH_ppc64le = ppc64 @@ -88,20 +106,31 @@ QEMU_ARCH_s390 = s390x QEMU_ARCH_loongarch = loongarch64 QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) +QEMU_ARCH_USER_ppc64le = ppc64le +QEMU_ARCH_USER = $(or $(QEMU_ARCH_USER_$(XARCH)),$(QEMU_ARCH_$(XARCH))) + +QEMU_BIOS_DIR = /usr/share/edk2/ +QEMU_BIOS_loongarch = $(QEMU_BIOS_DIR)/loongarch64/OVMF_CODE.fd + +ifneq ($(QEMU_BIOS_$(XARCH)),) +QEMU_ARGS_BIOS = -bios $(QEMU_BIOS_$(XARCH)) +endif + # QEMU_ARGS : some arch-specific args to pass to qemu QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_mips = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_s390 = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS = $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_EXTRA) +QEMU_ARGS = $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) # OUTPUT is only set when run from the main makefile, otherwise # it defaults to this nolibc directory. @@ -118,7 +147,8 @@ CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2) CFLAGS_s390 = -m64 -CFLAGS_mips = -EL +CFLAGS_mips32le = -EL -mabi=32 -fPIC +CFLAGS_mips32be = -EB -mabi=32 CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ $(call cc-option,-fno-stack-protector) \ @@ -167,7 +197,8 @@ sysroot: sysroot/$(ARCH)/include sysroot/$(ARCH)/include: $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot $(QUIET_MKDIR)mkdir -p sysroot - $(Q)$(MAKE) -C ../../../include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone + $(Q)$(MAKE) -C $(srctree) outputmakefile + $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone $(Q)mv sysroot/sysroot sysroot/$(ARCH) ifneq ($(NOLIBC_SYSROOT),0) @@ -177,7 +208,7 @@ nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include else nolibc-test: nolibc-test.c nolibc-test-linkage.c $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ - -nostdlib -static -include ../../../include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c -lgcc + -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c -lgcc endif libc-test: nolibc-test.c nolibc-test-linkage.c @@ -195,11 +226,11 @@ run-nolibc-test: nolibc-test # qemu user-land test run-user: nolibc-test - $(Q)qemu-$(QEMU_ARCH) ./nolibc-test > "$(CURDIR)/run.out" || : + $(Q)qemu-$(QEMU_ARCH_USER) ./nolibc-test > "$(CURDIR)/run.out" || : $(Q)$(REPORT) $(CURDIR)/run.out initramfs.cpio: kernel nolibc-test - $(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(srctree)/usr/gen_init_cpio - > initramfs.cpio + $(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(objtree)/usr/gen_init_cpio - > initramfs.cpio initramfs: nolibc-test $(QUIET_MKDIR)mkdir -p initramfs @@ -208,21 +239,25 @@ initramfs: nolibc-test defconfig: $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) mrproper $(DEFCONFIG) prepare + $(Q)if [ -n "$(EXTRACONFIG)" ]; then \ + $(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \ + $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \ + fi kernel: - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null kernel-standalone: initramfs - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null # run the tests after building the kernel run: kernel initramfs.cpio - $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" $(Q)$(REPORT) $(CURDIR)/run.out # re-run the tests from an existing kernel rerun: - $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" $(Q)$(REPORT) $(CURDIR)/run.out # report with existing test log diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index e173014f6b..6ba4f8275a 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -22,6 +22,7 @@ #include <sys/mount.h> #include <sys/prctl.h> #include <sys/reboot.h> +#include <sys/resource.h> #include <sys/stat.h> #include <sys/syscall.h> #include <sys/sysmacros.h> @@ -130,11 +131,17 @@ static const char *errorname(int err) } } -static void putcharn(char c, size_t n) +static void align_result(size_t llen) { - char buf[64]; + const size_t align = 64; + char buf[align]; + size_t n; - memset(buf, c, n); + if (llen >= align) + return; + + n = align - llen; + memset(buf, ' ', n); buf[n] = '\0'; fputs(buf, stdout); } @@ -156,8 +163,7 @@ static void result(int llen, enum RESULT r) else msg = " [FAIL]"; - if (llen < 64) - putcharn(' ', 64 - llen); + align_result(llen); puts(msg); } @@ -834,6 +840,33 @@ int test_pipe(void) return !!memcmp(buf, msg, len); } +int test_rlimit(void) +{ + struct rlimit rlim = { + .rlim_cur = 1 << 20, + .rlim_max = 1 << 21, + }; + int ret; + + ret = setrlimit(RLIMIT_CORE, &rlim); + if (ret) + return -1; + + rlim.rlim_cur = 0; + rlim.rlim_max = 0; + + ret = getrlimit(RLIMIT_CORE, &rlim); + if (ret) + return -1; + + if (rlim.rlim_cur != 1 << 20) + return -1; + if (rlim.rlim_max != 1 << 21) + return -1; + + return 0; +} + /* Run syscall tests between IDs <min> and <max>. * Return 0 on success, non-zero on failure. @@ -905,7 +938,6 @@ int run_syscall(int min, int max) CASE_TEST(gettimeofday_tv_tz);EXPECT_SYSZR(1, gettimeofday(&tv, &tz)); break; CASE_TEST(getpagesize); EXPECT_SYSZR(1, test_getpagesize()); break; CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break; - CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break; CASE_TEST(link_root1); EXPECT_SYSER(1, link("/", "/"), -1, EEXIST); break; CASE_TEST(link_blah); EXPECT_SYSER(1, link("/proc/self/blah", "/blah"), -1, ENOENT); break; CASE_TEST(link_dir); EXPECT_SYSER(euid0, link("/", "/blah"), -1, EPERM); break; @@ -924,6 +956,7 @@ int run_syscall(int min, int max) CASE_TEST(poll_fault); EXPECT_SYSER(1, poll(NULL, 1, 0), -1, EFAULT); break; CASE_TEST(prctl); EXPECT_SYSER(1, prctl(PR_SET_NAME, (unsigned long)NULL, 0, 0, 0), -1, EFAULT); break; CASE_TEST(read_badf); EXPECT_SYSER(1, read(-1, &tmp, 1), -1, EBADF); break; + CASE_TEST(rlimit); EXPECT_SYSZR(1, test_rlimit()); break; CASE_TEST(rmdir_blah); EXPECT_SYSER(1, rmdir("/blah"), -1, ENOENT); break; CASE_TEST(sched_yield); EXPECT_SYSZR(1, sched_yield()); break; CASE_TEST(select_null); EXPECT_SYSZR(1, ({ struct timeval tv = { 0 }; select(0, NULL, NULL, NULL, &tv); })); break; @@ -1133,6 +1166,7 @@ static int run_protection(int min __attribute__((unused)), { pid_t pid; int llen = 0, status; + struct rlimit rlimit = { 0, 0 }; llen += printf("0 -fstackprotector "); @@ -1164,6 +1198,7 @@ static int run_protection(int min __attribute__((unused)), close(STDERR_FILENO); prctl(PR_SET_DUMPABLE, 0, 0, 0, 0); + setrlimit(RLIMIT_CORE, &rlimit); smash_stack(); return 1; diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh new file mode 100755 index 0000000000..c0a5a7cea9 --- /dev/null +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -0,0 +1,169 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test runner for nolibc tests + +set -e + +trap 'echo Aborting...' 'ERR' + +crosstool_version=13.2.0 +hostarch=x86_64 +nproc=$(( $(nproc) + 2)) +cache_dir="${XDG_CACHE_HOME:-"$HOME"/.cache}" +download_location="${cache_dir}/crosstools/" +build_location="$(realpath "${cache_dir}"/nolibc-tests/)" +perform_download=0 +test_mode=system +archs="i386 x86_64 arm64 arm mips32le mips32be ppc ppc64 ppc64le riscv s390 loongarch" + +TEMP=$(getopt -o 'j:d:c:b:a:m:ph' -n "$0" -- "$@") + +eval set -- "$TEMP" +unset TEMP + +print_usage() { + cat <<EOF +Run nolibc testsuite for multiple architectures with crosstools + +Usage: + $0 [options] <architectures> + +Known architectures: + ${archs} + +Options: + -j [N] Allow N jobs at once (default: ${nproc}) + -p Allow download of toolchains + -d [DIR] Download location for toolchains (default: ${download_location}) + -c [VERSION] Version of toolchains to use (default: ${crosstool_version}) + -a [ARCH] Host architecture of toolchains to use (default: ${hostarch}) + -b [DIR] Build location (default: ${build_location}) + -m [MODE] Test mode user/system (default: ${test_mode}) +EOF +} + +while true; do + case "$1" in + '-j') + nproc="$2" + shift 2; continue ;; + '-p') + perform_download=1 + shift; continue ;; + '-d') + download_location="$2" + shift 2; continue ;; + '-c') + crosstool_version="$2" + shift 2; continue ;; + '-a') + hostarch="$2" + shift 2; continue ;; + '-b') + build_location="$(realpath "$2")" + shift 2; continue ;; + '-m') + test_mode="$2" + shift 2; continue ;; + '-h') + print_usage + exit 0 + ;; + '--') + shift; break ;; + *) + echo 'Internal error!' >&2; exit 1 ;; + esac +done + +if [[ -n "$*" ]]; then + archs="$*" +fi + +crosstool_arch() { + case "$1" in + arm64) echo aarch64;; + ppc) echo powerpc;; + ppc64) echo powerpc64;; + ppc64le) echo powerpc64;; + riscv) echo riscv64;; + loongarch) echo loongarch64;; + mips*) echo mips;; + *) echo "$1";; + esac +} + +crosstool_abi() { + case "$1" in + arm) echo linux-gnueabi;; + *) echo linux;; + esac +} + +download_crosstool() { + arch="$(crosstool_arch "$1")" + abi="$(crosstool_abi "$1")" + + archive_name="${hostarch}-gcc-${crosstool_version}-nolibc-${arch}-${abi}.tar.gz" + url="https://mirrors.edge.kernel.org/pub/tools/crosstool/files/bin/${hostarch}/${crosstool_version}/${archive_name}" + archive="${download_location}${archive_name}" + stamp="${archive}.stamp" + + [ -f "${stamp}" ] && return + + echo "Downloading crosstools ${arch} ${crosstool_version}" + mkdir -p "${download_location}" + curl -o "${archive}" --fail --continue-at - "${url}" + tar -C "${download_location}" -xf "${archive}" + touch "${stamp}" +} + +# capture command output, print it on failure +# mimics chronic(1) from moreutils +function swallow_output() { + if ! OUTPUT="$("$@" 2>&1)"; then + echo "$OUTPUT" + return 1 + fi + return 0 +} + +test_arch() { + arch=$1 + ct_arch=$(crosstool_arch "$arch") + ct_abi=$(crosstool_abi "$1") + cross_compile=$(realpath "${download_location}gcc-${crosstool_version}-nolibc/${ct_arch}-${ct_abi}/bin/${ct_arch}-${ct_abi}-") + build_dir="${build_location}/${arch}" + MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" O="${build_dir}") + + mkdir -p "$build_dir" + if [ "$test_mode" = "system" ] && [ ! -f "${build_dir}/.config" ]; then + swallow_output "${MAKE[@]}" defconfig + fi + case "$test_mode" in + 'system') + test_target=run + ;; + 'user') + test_target=run-user + ;; + *) + echo "Unknown mode $test_mode" + exit 1 + esac + printf '%-15s' "$arch:" + swallow_output "${MAKE[@]}" "$test_target" V=1 + cp run.out run.out."${arch}" + "${MAKE[@]}" report | grep passed +} + +if [ "$perform_download" -ne 0 ]; then + for arch in $archs; do + download_crosstool "$arch" + done +fi + +for arch in $archs; do + test_arch "$arch" +done diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 7ea42fa02e..c376151982 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -32,6 +32,8 @@ SUB_DIRS = alignment \ vphn \ math \ papr_attributes \ + papr_vpd \ + papr_sysparm \ ptrace \ security \ mce diff --git a/tools/testing/selftests/powerpc/math/fpu.h b/tools/testing/selftests/powerpc/math/fpu.h new file mode 100644 index 0000000000..a8ad0d4260 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/fpu.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2023, Michael Ellerman, IBM Corporation. + */ + +#ifndef _SELFTESTS_POWERPC_FPU_H +#define _SELFTESTS_POWERPC_FPU_H + +static inline void randomise_darray(double *darray, int num) +{ + long val; + + for (int i = 0; i < num; i++) { + val = random(); + if (val & 1) + val *= -1; + + if (val & 2) + darray[i] = 1.0 / val; + else + darray[i] = val * val; + } +} + +#endif /* _SELFTESTS_POWERPC_FPU_H */ diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S index 9dc0c158f8..efe1e1be46 100644 --- a/tools/testing/selftests/powerpc/math/fpu_asm.S +++ b/tools/testing/selftests/powerpc/math/fpu_asm.S @@ -66,6 +66,40 @@ FUNC_START(check_fpu) li r3,0 # Success!!! 1: blr + +// int check_all_fprs(double darray[32]) +FUNC_START(check_all_fprs) + PUSH_BASIC_STACK(8) + mr r4, r3 // r4 = darray + li r3, 1 // prepare for failure + + stfd f31, STACK_FRAME_LOCAL(0, 0)(sp) // backup f31 + + // Check regs f0-f30, using f31 as scratch + .set i, 0 + .rept 31 + lfd f31, (8 * i)(r4) // load expected value + fcmpu cr0, i, f31 // compare + bne cr0, 1f // bail if mismatch + .set i, i + 1 + .endr + + lfd f31, STACK_FRAME_LOCAL(0, 0)(sp) // reload f31 + stfd f30, STACK_FRAME_LOCAL(0, 0)(sp) // backup f30 + + lfd f30, (8 * 31)(r4) // load expected value of f31 + fcmpu cr0, f30, f31 // compare + bne cr0, 1f // bail if mismatch + + lfd f30, STACK_FRAME_LOCAL(0, 0)(sp) // reload f30 + + // Success + li r3, 0 + +1: POP_BASIC_STACK(8) + blr +FUNC_END(check_all_fprs) + FUNC_START(test_fpu) # r3 holds pointer to where to put the result of fork # r4 holds pointer to the pid @@ -75,8 +109,9 @@ FUNC_START(test_fpu) std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid - bl load_fpu - nop + // Load FPRs with expected values + OP_REGS lfd, 8, 0, 31, r3 + li r0,__NR_fork sc @@ -85,7 +120,7 @@ FUNC_START(test_fpu) std r3,0(r9) ld r3,STACK_FRAME_PARAM(0)(sp) - bl check_fpu + bl check_all_fprs nop POP_FPU(256) @@ -104,8 +139,8 @@ FUNC_START(preempt_fpu) std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting std r5,STACK_FRAME_PARAM(2)(sp) # int *running - bl load_fpu - nop + // Load FPRs with expected values + OP_REGS lfd, 8, 0, 31, r3 sync # Atomic DEC @@ -116,8 +151,7 @@ FUNC_START(preempt_fpu) bne- 1b 2: ld r3,STACK_FRAME_PARAM(0)(sp) - bl check_fpu - nop + bl check_all_fprs cmpdi r3,0 bne 3f ld r4,STACK_FRAME_PARAM(2)(sp) diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c b/tools/testing/selftests/powerpc/math/fpu_preempt.c index 3e5b5663d2..9ddede0770 100644 --- a/tools/testing/selftests/powerpc/math/fpu_preempt.c +++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c @@ -1,13 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright 2015, Cyril Bur, IBM Corp. + * Copyright 2023, Michael Ellerman, IBM Corp. * * This test attempts to see if the FPU registers change across preemption. - * Two things should be noted here a) The check_fpu function in asm only checks - * the non volatile registers as it is reused from the syscall test b) There is - * no way to be sure preemption happened so this test just uses many threads - * and a long wait. As such, a successful test doesn't mean much but a failure - * is bad. + * There is no way to be sure preemption happened so this test just uses many + * threads and a long wait. As such, a successful test doesn't mean much but + * a failure is bad. */ #include <stdio.h> @@ -20,9 +19,10 @@ #include <pthread.h> #include "utils.h" +#include "fpu.h" /* Time to wait for workers to get preempted (seconds) */ -#define PREEMPT_TIME 20 +#define PREEMPT_TIME 60 /* * Factor by which to multiply number of online CPUs for total number of * worker threads @@ -30,9 +30,7 @@ #define THREAD_FACTOR 8 -__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, - 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, - 2.1}; +__thread double darray[32]; int threads_starting; int running; @@ -42,12 +40,9 @@ extern int preempt_fpu(double *darray, int *threads_starting, int *running); void *preempt_fpu_c(void *p) { long rc; - int i; srand(pthread_self()); - for (i = 0; i < 21; i++) - darray[i] = rand(); - + randomise_darray(darray, ARRAY_SIZE(darray)); rc = preempt_fpu(darray, &threads_starting, &running); return (void *)rc; diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c b/tools/testing/selftests/powerpc/math/fpu_signal.c index 7b1addd504..8a64f63e37 100644 --- a/tools/testing/selftests/powerpc/math/fpu_signal.c +++ b/tools/testing/selftests/powerpc/math/fpu_signal.c @@ -18,6 +18,7 @@ #include <pthread.h> #include "utils.h" +#include "fpu.h" /* Number of times each thread should receive the signal */ #define ITERATIONS 10 @@ -27,9 +28,7 @@ */ #define THREAD_FACTOR 8 -__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, - 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, - 2.1}; +__thread double darray[32]; bool bad_context; int threads_starting; @@ -43,9 +42,9 @@ void signal_fpu_sig(int sig, siginfo_t *info, void *context) ucontext_t *uc = context; mcontext_t *mc = &uc->uc_mcontext; - /* Only the non volatiles were loaded up */ - for (i = 14; i < 32; i++) { - if (mc->fp_regs[i] != darray[i - 14]) { + // Don't check f30/f31, they're used as scratches in check_all_fprs() + for (i = 0; i < 30; i++) { + if (mc->fp_regs[i] != darray[i]) { bad_context = true; break; } @@ -54,7 +53,6 @@ void signal_fpu_sig(int sig, siginfo_t *info, void *context) void *signal_fpu_c(void *p) { - int i; long rc; struct sigaction act; act.sa_sigaction = signal_fpu_sig; @@ -64,9 +62,7 @@ void *signal_fpu_c(void *p) return p; srand(pthread_self()); - for (i = 0; i < 21; i++) - darray[i] = rand(); - + randomise_darray(darray, ARRAY_SIZE(darray)); rc = preempt_fpu(darray, &threads_starting, &running); return (void *) rc; diff --git a/tools/testing/selftests/powerpc/math/fpu_syscall.c b/tools/testing/selftests/powerpc/math/fpu_syscall.c index 694f225c7e..751d46b133 100644 --- a/tools/testing/selftests/powerpc/math/fpu_syscall.c +++ b/tools/testing/selftests/powerpc/math/fpu_syscall.c @@ -14,12 +14,11 @@ #include <stdlib.h> #include "utils.h" +#include "fpu.h" extern int test_fpu(double *darray, pid_t *pid); -double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, - 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, - 2.1}; +double darray[32]; int syscall_fpu(void) { @@ -27,6 +26,9 @@ int syscall_fpu(void) int i; int ret; int child_ret; + + randomise_darray(darray, ARRAY_SIZE(darray)); + for (i = 0; i < 1000; i++) { /* test_fpu will fork() */ ret = test_fpu(darray, &fork_pid); diff --git a/tools/testing/selftests/powerpc/papr_sysparm/.gitignore b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore new file mode 100644 index 0000000000..f2a69bf59d --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore @@ -0,0 +1 @@ +/papr_sysparm diff --git a/tools/testing/selftests/powerpc/papr_sysparm/Makefile b/tools/testing/selftests/powerpc/papr_sysparm/Makefile new file mode 100644 index 0000000000..7f79e43763 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_sysparm/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +noarg: + $(MAKE) -C ../ + +TEST_GEN_PROGS := papr_sysparm + +top_srcdir = ../../../../.. +include ../../lib.mk + +$(TEST_GEN_PROGS): ../harness.c ../utils.c + +$(OUTPUT)/papr_sysparm: CFLAGS += $(KHDR_INCLUDES) diff --git a/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c new file mode 100644 index 0000000000..f56c15a11e --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <unistd.h> +#include <asm/papr-sysparm.h> + +#include "utils.h" + +#define DEVPATH "/dev/papr-sysparm" + +static int open_close(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int get_splpar(void) +{ + struct papr_sysparm_io_block sp = { + .parameter = 20, // SPLPAR characteristics + }; + const int devfd = open(DEVPATH, O_RDONLY); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != 0); + FAIL_IF(sp.length == 0); + FAIL_IF(sp.length > sizeof(sp.data)); + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int get_bad_parameter(void) +{ + struct papr_sysparm_io_block sp = { + .parameter = UINT32_MAX, // there are only ~60 specified parameters + }; + const int devfd = open(DEVPATH, O_RDONLY); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + // Ensure expected error + FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != -1); + FAIL_IF(errno != EOPNOTSUPP); + + // Ensure the buffer is unchanged + FAIL_IF(sp.length != 0); + for (size_t i = 0; i < ARRAY_SIZE(sp.data); ++i) + FAIL_IF(sp.data[i] != 0); + + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int check_efault_common(unsigned long cmd) +{ + const int devfd = open(DEVPATH, O_RDWR); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + // Ensure expected error + FAIL_IF(ioctl(devfd, cmd, NULL) != -1); + FAIL_IF(errno != EFAULT); + + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int check_efault_get(void) +{ + return check_efault_common(PAPR_SYSPARM_IOC_GET); +} + +static int check_efault_set(void) +{ + return check_efault_common(PAPR_SYSPARM_IOC_SET); +} + +static int set_hmc0(void) +{ + struct papr_sysparm_io_block sp = { + .parameter = 0, // HMC0, not a settable parameter + }; + const int devfd = open(DEVPATH, O_RDWR); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + // Ensure expected error + FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_SET, &sp) != -1); + SKIP_IF_MSG(errno == EOPNOTSUPP, "operation not supported"); + FAIL_IF(errno != EPERM); + + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int set_with_ro_fd(void) +{ + struct papr_sysparm_io_block sp = { + .parameter = 0, // HMC0, not a settable parameter. + }; + const int devfd = open(DEVPATH, O_RDONLY); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + // Ensure expected error + FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_SET, &sp) != -1); + SKIP_IF_MSG(errno == EOPNOTSUPP, "operation not supported"); + + // HMC0 isn't a settable parameter and we would normally + // expect to get EPERM on attempts to modify it. However, when + // the file is open read-only, we expect the driver to prevent + // the attempt with a distinct error. + FAIL_IF(errno != EBADF); + + FAIL_IF(close(devfd) != 0); + + return 0; +} + +struct sysparm_test { + int (*function)(void); + const char *description; +}; + +static const struct sysparm_test sysparm_tests[] = { + { + .function = open_close, + .description = "open and close " DEVPATH " without issuing commands", + }, + { + .function = get_splpar, + .description = "retrieve SPLPAR characteristics", + }, + { + .function = get_bad_parameter, + .description = "verify EOPNOTSUPP for known-bad parameter", + }, + { + .function = check_efault_get, + .description = "PAPR_SYSPARM_IOC_GET returns EFAULT on bad address", + }, + { + .function = check_efault_set, + .description = "PAPR_SYSPARM_IOC_SET returns EFAULT on bad address", + }, + { + .function = set_hmc0, + .description = "ensure EPERM on attempt to update HMC0", + }, + { + .function = set_with_ro_fd, + .description = "PAPR_IOC_SYSPARM_SET returns EACCES on read-only fd", + }, +}; + +int main(void) +{ + size_t fails = 0; + + for (size_t i = 0; i < ARRAY_SIZE(sysparm_tests); ++i) { + const struct sysparm_test *t = &sysparm_tests[i]; + + if (test_harness(t->function, t->description)) + ++fails; + } + + return fails == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tools/testing/selftests/powerpc/papr_vpd/.gitignore b/tools/testing/selftests/powerpc/papr_vpd/.gitignore new file mode 100644 index 0000000000..49285031a6 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_vpd/.gitignore @@ -0,0 +1 @@ +/papr_vpd diff --git a/tools/testing/selftests/powerpc/papr_vpd/Makefile b/tools/testing/selftests/powerpc/papr_vpd/Makefile new file mode 100644 index 0000000000..06b719703b --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_vpd/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +noarg: + $(MAKE) -C ../ + +TEST_GEN_PROGS := papr_vpd + +top_srcdir = ../../../../.. +include ../../lib.mk + +$(TEST_GEN_PROGS): ../harness.c ../utils.c + +$(OUTPUT)/papr_vpd: CFLAGS += $(KHDR_INCLUDES) diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c new file mode 100644 index 0000000000..d6f99eb9be --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <unistd.h> + +#include <asm/papr-vpd.h> + +#include "utils.h" + +#define DEVPATH "/dev/papr-vpd" + +static int dev_papr_vpd_open_close(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + FAIL_IF(close(devfd) != 0); + + return 0; +} + +static int dev_papr_vpd_get_handle_all(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + struct papr_location_code lc = { .str = "", }; + off_t size; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(errno != 0); + FAIL_IF(fd < 0); + + FAIL_IF(close(devfd) != 0); + + size = lseek(fd, 0, SEEK_END); + FAIL_IF(size <= 0); + + void *buf = malloc((size_t)size); + FAIL_IF(!buf); + + ssize_t consumed = pread(fd, buf, size, 0); + FAIL_IF(consumed != size); + + /* Ensure EOF */ + FAIL_IF(read(fd, buf, size) != 0); + FAIL_IF(close(fd)); + + /* Verify that the buffer looks like VPD */ + static const char needle[] = "System VPD"; + FAIL_IF(!memmem(buf, size, needle, strlen(needle))); + + return 0; +} + +static int dev_papr_vpd_get_handle_byte_at_a_time(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + struct papr_location_code lc = { .str = "", }; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(errno != 0); + FAIL_IF(fd < 0); + + FAIL_IF(close(devfd) != 0); + + size_t consumed = 0; + while (1) { + ssize_t res; + char c; + + errno = 0; + res = read(fd, &c, sizeof(c)); + FAIL_IF(res > sizeof(c)); + FAIL_IF(res < 0); + FAIL_IF(errno != 0); + consumed += res; + if (res == 0) + break; + } + + FAIL_IF(consumed != lseek(fd, 0, SEEK_END)); + + FAIL_IF(close(fd)); + + return 0; +} + + +static int dev_papr_vpd_unterm_loc_code(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + struct papr_location_code lc = {}; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + /* + * Place a non-null byte in every element of loc_code; the + * driver should reject this input. + */ + memset(lc.str, 'x', ARRAY_SIZE(lc.str)); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(fd != -1); + FAIL_IF(errno != EINVAL); + + FAIL_IF(close(devfd) != 0); + return 0; +} + +static int dev_papr_vpd_null_handle(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + int rc; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + rc = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, NULL); + FAIL_IF(rc != -1); + FAIL_IF(errno != EFAULT); + + FAIL_IF(close(devfd) != 0); + return 0; +} + +static int papr_vpd_close_handle_without_reading(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + struct papr_location_code lc = { .str = "", }; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(errno != 0); + FAIL_IF(fd < 0); + + /* close the handle without reading it */ + FAIL_IF(close(fd) != 0); + + FAIL_IF(close(devfd) != 0); + return 0; +} + +static int papr_vpd_reread(void) +{ + const int devfd = open(DEVPATH, O_RDONLY); + struct papr_location_code lc = { .str = "", }; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + + FAIL_IF(devfd < 0); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(errno != 0); + FAIL_IF(fd < 0); + + FAIL_IF(close(devfd) != 0); + + const off_t size = lseek(fd, 0, SEEK_END); + FAIL_IF(size <= 0); + + char *bufs[2]; + + for (size_t i = 0; i < ARRAY_SIZE(bufs); ++i) { + bufs[i] = malloc(size); + FAIL_IF(!bufs[i]); + ssize_t consumed = pread(fd, bufs[i], size, 0); + FAIL_IF(consumed != size); + } + + FAIL_IF(memcmp(bufs[0], bufs[1], size)); + + FAIL_IF(close(fd) != 0); + + return 0; +} + +static int get_system_loc_code(struct papr_location_code *lc) +{ + static const char system_id_path[] = "/sys/firmware/devicetree/base/system-id"; + static const char model_path[] = "/sys/firmware/devicetree/base/model"; + char *system_id; + char *model; + int err = -1; + + if (read_file_alloc(model_path, &model, NULL)) + return err; + + if (read_file_alloc(system_id_path, &system_id, NULL)) + goto free_model; + + char *mtm; + int sscanf_ret = sscanf(model, "IBM,%ms", &mtm); + if (sscanf_ret != 1) + goto free_system_id; + + char *plant_and_seq; + if (sscanf(system_id, "IBM,%*c%*c%ms", &plant_and_seq) != 1) + goto free_mtm; + /* + * Replace - with . to build location code. + */ + char *sep = strchr(mtm, '-'); + if (!sep) + goto free_mtm; + else + *sep = '.'; + + snprintf(lc->str, sizeof(lc->str), + "U%s.%s", mtm, plant_and_seq); + err = 0; + + free(plant_and_seq); +free_mtm: + free(mtm); +free_system_id: + free(system_id); +free_model: + free(model); + return err; +} + +static int papr_vpd_system_loc_code(void) +{ + struct papr_location_code lc; + const int devfd = open(DEVPATH, O_RDONLY); + off_t size; + int fd; + + SKIP_IF_MSG(devfd < 0 && errno == ENOENT, + DEVPATH " not present"); + SKIP_IF_MSG(get_system_loc_code(&lc), + "Cannot determine system location code"); + + FAIL_IF(devfd < 0); + + errno = 0; + fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc); + FAIL_IF(errno != 0); + FAIL_IF(fd < 0); + + FAIL_IF(close(devfd) != 0); + + size = lseek(fd, 0, SEEK_END); + FAIL_IF(size <= 0); + + void *buf = malloc((size_t)size); + FAIL_IF(!buf); + + ssize_t consumed = pread(fd, buf, size, 0); + FAIL_IF(consumed != size); + + /* Ensure EOF */ + FAIL_IF(read(fd, buf, size) != 0); + FAIL_IF(close(fd)); + + /* Verify that the buffer looks like VPD */ + static const char needle[] = "System VPD"; + FAIL_IF(!memmem(buf, size, needle, strlen(needle))); + + return 0; +} + +struct vpd_test { + int (*function)(void); + const char *description; +}; + +static const struct vpd_test vpd_tests[] = { + { + .function = dev_papr_vpd_open_close, + .description = "open/close " DEVPATH, + }, + { + .function = dev_papr_vpd_unterm_loc_code, + .description = "ensure EINVAL on unterminated location code", + }, + { + .function = dev_papr_vpd_null_handle, + .description = "ensure EFAULT on bad handle addr", + }, + { + .function = dev_papr_vpd_get_handle_all, + .description = "get handle for all VPD" + }, + { + .function = papr_vpd_close_handle_without_reading, + .description = "close handle without consuming VPD" + }, + { + .function = dev_papr_vpd_get_handle_byte_at_a_time, + .description = "read all VPD one byte at a time" + }, + { + .function = papr_vpd_reread, + .description = "ensure re-read yields same results" + }, + { + .function = papr_vpd_system_loc_code, + .description = "get handle for system VPD" + }, +}; + +int main(void) +{ + size_t fails = 0; + + for (size_t i = 0; i < ARRAY_SIZE(vpd_tests); ++i) { + const struct vpd_test *t = &vpd_tests[i]; + + if (test_harness(t->function, t->description)) + ++fails; + } + + return fails == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tools/testing/selftests/prctl/set-process-name.c b/tools/testing/selftests/prctl/set-process-name.c index 3bc5e0e09e..562f707ba7 100644 --- a/tools/testing/selftests/prctl/set-process-name.c +++ b/tools/testing/selftests/prctl/set-process-name.c @@ -12,6 +12,7 @@ #define CHANGE_NAME "changename" #define EMPTY_NAME "" #define TASK_COMM_LEN 16 +#define MAX_PATH_LEN 50 int set_name(char *name) { @@ -47,6 +48,35 @@ int check_null_pointer(char *check_name) return res; } +int check_name(void) +{ + + int pid; + + pid = getpid(); + FILE *fptr = NULL; + char path[MAX_PATH_LEN] = {}; + char name[TASK_COMM_LEN] = {}; + char output[TASK_COMM_LEN] = {}; + int j; + + j = snprintf(path, MAX_PATH_LEN, "/proc/self/task/%d/comm", pid); + fptr = fopen(path, "r"); + if (!fptr) + return -EIO; + + fscanf(fptr, "%s", output); + if (ferror(fptr)) + return -EIO; + + int res = prctl(PR_GET_NAME, name, NULL, NULL, NULL); + + if (res < 0) + return -errno; + + return !strcmp(output, name); +} + TEST(rename_process) { EXPECT_GE(set_name(CHANGE_NAME), 0); @@ -57,6 +87,8 @@ TEST(rename_process) { EXPECT_GE(set_name(CHANGE_NAME), 0); EXPECT_LT(check_null_pointer(CHANGE_NAME), 0); + + EXPECT_TRUE(check_name()); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh index 212c52ca90..f3f8671295 100755 --- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh +++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh @@ -67,7 +67,10 @@ ___EOF___ # build using nolibc on supported archs (smaller executable) and fall # back to regular glibc on other ones. if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \ - "||__ARM_EABI__||__aarch64__||__s390x__||__loongarch__\nyes\n#endif" \ + "||__ARM_EABI__||__aarch64__||(__mips__ && _ABIO32)" \ + "||__powerpc__||(__riscv && __riscv_xlen == 64)" \ + "||__s390x__||__loongarch__" \ + "\nyes\n#endif" \ | ${CROSS_COMPILE}gcc -E -nostdlib -xc - \ | grep -q '^yes'; then # architecture supported by nolibc diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot index d446099375..979edbf4c8 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot @@ -1 +1,4 @@ nohz_full=2-9 +rcutorture.stall_cpu=14 +rcutorture.stall_cpu_holdoff=90 +rcutorture.fwd_progress=0 diff --git a/tools/testing/selftests/riscv/hwprobe/Makefile b/tools/testing/selftests/riscv/hwprobe/Makefile index f224b84591..cec81610a5 100644 --- a/tools/testing/selftests/riscv/hwprobe/Makefile +++ b/tools/testing/selftests/riscv/hwprobe/Makefile @@ -4,7 +4,7 @@ CFLAGS += -I$(top_srcdir)/tools/include -TEST_GEN_PROGS := hwprobe cbo +TEST_GEN_PROGS := hwprobe cbo which-cpus include ../../lib.mk @@ -13,3 +13,6 @@ $(OUTPUT)/hwprobe: hwprobe.c sys_hwprobe.S $(OUTPUT)/cbo: cbo.c sys_hwprobe.S $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ + +$(OUTPUT)/which-cpus: which-cpus.c sys_hwprobe.S + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c index c537d52faf..a40541bb7c 100644 --- a/tools/testing/selftests/riscv/hwprobe/cbo.c +++ b/tools/testing/selftests/riscv/hwprobe/cbo.c @@ -19,7 +19,7 @@ #include "hwprobe.h" #include "../../kselftest.h" -#define MK_CBO(fn) cpu_to_le32((fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15) +#define MK_CBO(fn) le32_bswap((uint32_t)(fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15) static char mem[4096] __aligned(4096) = { [0 ... 4095] = 0xa5 }; diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.c b/tools/testing/selftests/riscv/hwprobe/hwprobe.c index abb825811c..fd73c87804 100644 --- a/tools/testing/selftests/riscv/hwprobe/hwprobe.c +++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.c @@ -47,7 +47,7 @@ int main(int argc, char **argv) ksft_test_result(out != 0, "Bad CPU set\n"); out = riscv_hwprobe(pairs, 8, 1, 0, 0); - ksft_test_result(out != 0, "NULL CPU set with non-zero count\n"); + ksft_test_result(out != 0, "NULL CPU set with non-zero size\n"); pairs[0].key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR; out = riscv_hwprobe(pairs, 1, 1, &cpus, 0); diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.h b/tools/testing/selftests/riscv/hwprobe/hwprobe.h index 721b0ce73a..f3de970c32 100644 --- a/tools/testing/selftests/riscv/hwprobe/hwprobe.h +++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.h @@ -4,12 +4,22 @@ #include <stddef.h> #include <asm/hwprobe.h> +#if __BYTE_ORDER == __BIG_ENDIAN +# define le32_bswap(_x) \ + ((((_x) & 0x000000ffU) << 24) | \ + (((_x) & 0x0000ff00U) << 8) | \ + (((_x) & 0x00ff0000U) >> 8) | \ + (((_x) & 0xff000000U) >> 24)) +#else +# define le32_bswap(_x) (_x) +#endif + /* * Rather than relying on having a new enough libc to define this, just do it * ourselves. This way we don't need to be coupled to a new-enough libc to * contain the call. */ long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, - size_t cpu_count, unsigned long *cpus, unsigned int flags); + size_t cpusetsize, unsigned long *cpus, unsigned int flags); #endif diff --git a/tools/testing/selftests/riscv/hwprobe/which-cpus.c b/tools/testing/selftests/riscv/hwprobe/which-cpus.c new file mode 100644 index 0000000000..82c121412d --- /dev/null +++ b/tools/testing/selftests/riscv/hwprobe/which-cpus.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2023 Ventana Micro Systems Inc. + * + * Test the RISCV_HWPROBE_WHICH_CPUS flag of hwprobe. Also provides a command + * line interface to get the cpu list for arbitrary hwprobe pairs. + */ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sched.h> +#include <unistd.h> +#include <assert.h> + +#include "hwprobe.h" +#include "../../kselftest.h" + +static void help(void) +{ + printf("\n" + "which-cpus: [-h] [<key=value> [<key=value> ...]]\n\n" + " Without parameters, tests the RISCV_HWPROBE_WHICH_CPUS flag of hwprobe.\n" + " With parameters, where each parameter is a hwprobe pair written as\n" + " <key=value>, outputs the cpulist for cpus which all match the given set\n" + " of pairs. 'key' and 'value' should be in numeric form, e.g. 4=0x3b\n"); +} + +static void print_cpulist(cpu_set_t *cpus) +{ + int start = 0, end = 0; + + if (!CPU_COUNT(cpus)) { + printf("cpus: None\n"); + return; + } + + printf("cpus:"); + for (int i = 0, c = 0; i < CPU_COUNT(cpus); i++, c++) { + if (start != end && !CPU_ISSET(c, cpus)) + printf("-%d", end); + + while (!CPU_ISSET(c, cpus)) + ++c; + + if (i != 0 && c == end + 1) { + end = c; + continue; + } + + printf("%c%d", i == 0 ? ' ' : ',', c); + start = end = c; + } + if (start != end) + printf("-%d", end); + printf("\n"); +} + +static void do_which_cpus(int argc, char **argv, cpu_set_t *cpus) +{ + struct riscv_hwprobe *pairs; + int nr_pairs = argc - 1; + char *start, *end; + int rc; + + pairs = malloc(nr_pairs * sizeof(struct riscv_hwprobe)); + assert(pairs); + + for (int i = 0; i < nr_pairs; i++) { + start = argv[i + 1]; + pairs[i].key = strtol(start, &end, 0); + assert(end != start && *end == '='); + start = end + 1; + pairs[i].value = strtoul(start, &end, 0); + assert(end != start && *end == '\0'); + } + + rc = riscv_hwprobe(pairs, nr_pairs, sizeof(cpu_set_t), (unsigned long *)cpus, RISCV_HWPROBE_WHICH_CPUS); + assert(rc == 0); + print_cpulist(cpus); + free(pairs); +} + +int main(int argc, char **argv) +{ + struct riscv_hwprobe pairs[2]; + cpu_set_t cpus_aff, cpus; + __u64 ext0_all; + long rc; + + rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus_aff); + assert(rc == 0); + + if (argc > 1) { + if (!strcmp(argv[1], "-h")) + help(); + else + do_which_cpus(argc, argv, &cpus_aff); + return 0; + } + + ksft_print_header(); + ksft_set_plan(7); + + pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, }; + rc = riscv_hwprobe(pairs, 1, 0, NULL, 0); + assert(rc == 0 && pairs[0].key == RISCV_HWPROBE_KEY_BASE_BEHAVIOR && + pairs[0].value == RISCV_HWPROBE_BASE_BEHAVIOR_IMA); + + pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_IMA_EXT_0, }; + rc = riscv_hwprobe(pairs, 1, 0, NULL, 0); + assert(rc == 0 && pairs[0].key == RISCV_HWPROBE_KEY_IMA_EXT_0); + ext0_all = pairs[0].value; + + pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, }; + CPU_ZERO(&cpus); + rc = riscv_hwprobe(pairs, 1, 0, (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS); + ksft_test_result(rc == -EINVAL, "no cpusetsize\n"); + + pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, }; + rc = riscv_hwprobe(pairs, 1, sizeof(cpu_set_t), NULL, RISCV_HWPROBE_WHICH_CPUS); + ksft_test_result(rc == -EINVAL, "NULL cpus\n"); + + pairs[0] = (struct riscv_hwprobe){ .key = 0xbadc0de, }; + CPU_ZERO(&cpus); + rc = riscv_hwprobe(pairs, 1, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS); + ksft_test_result(rc == 0 && CPU_COUNT(&cpus) == 0, "unknown key\n"); + + pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, }; + pairs[1] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, }; + CPU_ZERO(&cpus); + rc = riscv_hwprobe(pairs, 2, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS); + ksft_test_result(rc == 0, "duplicate keys\n"); + + pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, }; + pairs[1] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_IMA_EXT_0, .value = ext0_all, }; + CPU_ZERO(&cpus); + rc = riscv_hwprobe(pairs, 2, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS); + ksft_test_result(rc == 0 && CPU_COUNT(&cpus) == sysconf(_SC_NPROCESSORS_ONLN), "set all cpus\n"); + + pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, }; + pairs[1] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_IMA_EXT_0, .value = ext0_all, }; + memcpy(&cpus, &cpus_aff, sizeof(cpu_set_t)); + rc = riscv_hwprobe(pairs, 2, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS); + ksft_test_result(rc == 0 && CPU_EQUAL(&cpus, &cpus_aff), "set all affinity cpus\n"); + + pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, }; + pairs[1] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_IMA_EXT_0, .value = ~ext0_all, }; + memcpy(&cpus, &cpus_aff, sizeof(cpu_set_t)); + rc = riscv_hwprobe(pairs, 2, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS); + ksft_test_result(rc == 0 && CPU_COUNT(&cpus) == 0, "clear all cpus\n"); + + ksft_finished(); +} diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c index 2c0d2b1126..1f9969bed2 100644 --- a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c +++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c @@ -1,4 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only + +#include <linux/wait.h> + #define THIS_PROGRAM "./vstate_exec_nolibc" int main(int argc, char **argv) diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c index 8ad94e08ff..27668fb3b6 100644 --- a/tools/testing/selftests/riscv/vector/vstate_prctl.c +++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c @@ -1,20 +1,12 @@ // SPDX-License-Identifier: GPL-2.0-only #include <sys/prctl.h> #include <unistd.h> -#include <asm/hwprobe.h> #include <errno.h> #include <sys/wait.h> +#include "../hwprobe/hwprobe.h" #include "../../kselftest.h" -/* - * Rather than relying on having a new enough libc to define this, just do it - * ourselves. This way we don't need to be coupled to a new-enough libc to - * contain the call. - */ -long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, - size_t cpu_count, unsigned long *cpus, unsigned int flags); - #define NEXT_PROGRAM "./vstate_exec_nolibc" static int launch_test(int test_inherit) { diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c index 8875429619..2348d2c20d 100644 --- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c +++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c @@ -24,6 +24,11 @@ bool rseq_validate_cpu_id(void) { return rseq_mm_cid_available(); } +static +bool rseq_use_cpu_index(void) +{ + return false; /* Use mm_cid */ +} #else # define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID static @@ -36,6 +41,11 @@ bool rseq_validate_cpu_id(void) { return rseq_current_cpu_raw() >= 0; } +static +bool rseq_use_cpu_index(void) +{ + return true; /* Use cpu_id as index. */ +} #endif struct percpu_lock_entry { @@ -274,7 +284,7 @@ void test_percpu_list(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; for (j = 1; j <= 100; j++) { struct percpu_list_node *node; @@ -299,7 +309,7 @@ void test_percpu_list(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_list_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while ((node = __percpu_list_pop(&list, i))) { diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 20403d5834..2f37961240 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -288,6 +288,11 @@ bool rseq_validate_cpu_id(void) { return rseq_mm_cid_available(); } +static +bool rseq_use_cpu_index(void) +{ + return false; /* Use mm_cid */ +} # ifdef TEST_MEMBARRIER /* * Membarrier does not currently support targeting a mm_cid, so @@ -312,6 +317,11 @@ bool rseq_validate_cpu_id(void) { return rseq_current_cpu_raw() >= 0; } +static +bool rseq_use_cpu_index(void) +{ + return true; /* Use cpu_id as index. */ +} # ifdef TEST_MEMBARRIER static int rseq_membarrier_expedited(int cpu) @@ -715,7 +725,7 @@ void test_percpu_list(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; for (j = 1; j <= 100; j++) { struct percpu_list_node *node; @@ -752,7 +762,7 @@ void test_percpu_list(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_list_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while ((node = __percpu_list_pop(&list, i))) { @@ -902,7 +912,7 @@ void test_percpu_buffer(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; /* Worse-case is every item in same CPU. */ buffer.c[i].array = @@ -952,7 +962,7 @@ void test_percpu_buffer(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_buffer_node *node; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while ((node = __percpu_buffer_pop(&buffer, i))) { @@ -1113,7 +1123,7 @@ void test_percpu_memcpy_buffer(void) /* Generate list entries for every usable cpu. */ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; /* Worse-case is every item in same CPU. */ buffer.c[i].array = @@ -1160,7 +1170,7 @@ void test_percpu_memcpy_buffer(void) for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_memcpy_buffer_node item; - if (!CPU_ISSET(i, &allowed_cpus)) + if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) { diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh index 92743980e5..a28c1416cb 100755 --- a/tools/testing/selftests/run_kselftest.sh +++ b/tools/testing/selftests/run_kselftest.sh @@ -20,11 +20,13 @@ usage() { cat <<EOF Usage: $0 [OPTIONS] - -s | --summary Print summary with detailed log in output.log + -s | --summary Print summary with detailed log in output.log (conflict with -p) + -p | --per_test_log Print test log in /tmp with each test name (conflict with -s) -t | --test COLLECTION:TEST Run TEST from COLLECTION -c | --collection COLLECTION Run all tests from COLLECTION -l | --list List the available collection:test entries -d | --dry-run Don't actually run any tests + -n | --netns Run each test in namespace -h | --help Show this usage info -o | --override-timeout Number of seconds after which we timeout EOF @@ -41,6 +43,9 @@ while true; do logfile="$BASE_DIR"/output.log cat /dev/null > $logfile shift ;; + -p | --per-test-log) + per_test_logging=1 + shift ;; -t | --test) TESTS="$TESTS $2" shift 2 ;; @@ -53,6 +58,9 @@ while true; do -d | --dry-run) dryrun="echo" shift ;; + -n | --netns) + RUN_IN_NETNS=1 + shift ;; -o | --override-timeout) kselftest_override_timeout="$2" shift 2 ;; diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c index 3e1619b6bf..7ba0571543 100644 --- a/tools/testing/selftests/sched/cs_prctl_test.c +++ b/tools/testing/selftests/sched/cs_prctl_test.c @@ -72,7 +72,7 @@ struct child_args { static struct child_args procs[MAX_PROCESSES]; static int num_processes = 2; -static int need_cleanup = 0; +static int need_cleanup; static int _prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index 5b5c9d558d..97b86980b7 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -38,10 +38,10 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples) i *= 1000000000ULL; i += finish.tv_nsec - start.tv_nsec; - printf("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n", - finish.tv_sec, finish.tv_nsec, - start.tv_sec, start.tv_nsec, - i, (double)i / 1000000000.0); + ksft_print_msg("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n", + finish.tv_sec, finish.tv_nsec, + start.tv_sec, start.tv_nsec, + i, (double)i / 1000000000.0); return i; } @@ -53,7 +53,7 @@ unsigned long long calibrate(void) pid_t pid, ret; int seconds = 15; - printf("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds); + ksft_print_msg("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds); samples = 0; pid = getpid(); @@ -98,24 +98,36 @@ bool le(int i_one, int i_two) } long compare(const char *name_one, const char *name_eval, const char *name_two, - unsigned long long one, bool (*eval)(int, int), unsigned long long two) + unsigned long long one, bool (*eval)(int, int), unsigned long long two, + bool skip) { bool good; - printf("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, - (long long)one, name_eval, (long long)two); + if (skip) { + ksft_test_result_skip("%s %s %s\n", name_one, name_eval, + name_two); + return 0; + } + + ksft_print_msg("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two, + (long long)one, name_eval, (long long)two); if (one > INT_MAX) { - printf("Miscalculation! Measurement went negative: %lld\n", (long long)one); - return 1; + ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)one); + good = false; + goto out; } if (two > INT_MAX) { - printf("Miscalculation! Measurement went negative: %lld\n", (long long)two); - return 1; + ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)two); + good = false; + goto out; } good = eval(one, two); printf("%s\n", good ? "✔️" : "❌"); +out: + ksft_test_result(good, "%s %s %s\n", name_one, name_eval, name_two); + return good ? 0 : 1; } @@ -142,15 +154,22 @@ int main(int argc, char *argv[]) unsigned long long samples, calc; unsigned long long native, filter1, filter2, bitmap1, bitmap2; unsigned long long entry, per_filter1, per_filter2; + bool skip = false; setbuf(stdout, NULL); - printf("Running on:\n"); + ksft_print_header(); + ksft_set_plan(7); + + ksft_print_msg("Running on:\n"); + ksft_print_msg(""); system("uname -a"); - printf("Current BPF sysctl settings:\n"); + ksft_print_msg("Current BPF sysctl settings:\n"); /* Avoid using "sysctl" which may not be installed. */ + ksft_print_msg(""); system("grep -H . /proc/sys/net/core/bpf_jit_enable"); + ksft_print_msg(""); system("grep -H . /proc/sys/net/core/bpf_jit_harden"); if (argc > 1) @@ -158,11 +177,11 @@ int main(int argc, char *argv[]) else samples = calibrate(); - printf("Benchmarking %llu syscalls...\n", samples); + ksft_print_msg("Benchmarking %llu syscalls...\n", samples); /* Native call */ native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid native: %llu ns\n", native); + ksft_print_msg("getpid native: %llu ns\n", native); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); assert(ret == 0); @@ -172,35 +191,37 @@ int main(int argc, char *argv[]) assert(ret == 0); bitmap1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1); + ksft_print_msg("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1); /* Second filter resulting in a bitmap */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); assert(ret == 0); bitmap2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2); + ksft_print_msg("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2); /* Third filter, can no longer be converted to bitmap */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); assert(ret == 0); filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1); + ksft_print_msg("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1); /* Fourth filter, can not be converted to bitmap because of filter 3 */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog); assert(ret == 0); filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2); + ksft_print_msg("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2); /* Estimations */ #define ESTIMATE(fmt, var, what) do { \ var = (what); \ - printf("Estimated " fmt ": %llu ns\n", var); \ - if (var > INT_MAX) \ - goto more_samples; \ + ksft_print_msg("Estimated " fmt ": %llu ns\n", var); \ + if (var > INT_MAX) { \ + skip = true; \ + ret |= 1; \ + } \ } while (0) ESTIMATE("total seccomp overhead for 1 bitmapped filter", calc, @@ -218,31 +239,34 @@ int main(int argc, char *argv[]) ESTIMATE("seccomp per-filter overhead (filters / 4)", per_filter2, (filter2 - native - entry) / 4); - printf("Expectations:\n"); - ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1); - bits = compare("native", "≤", "1 filter", native, le, filter1); + ksft_print_msg("Expectations:\n"); + ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1, + skip); + bits = compare("native", "≤", "1 filter", native, le, filter1, + skip); if (bits) - goto more_samples; + skip = true; ret |= compare("per-filter (last 2 diff)", "≈", "per-filter (filters / 4)", - per_filter1, approx, per_filter2); + per_filter1, approx, per_filter2, skip); bits = compare("1 bitmapped", "≈", "2 bitmapped", - bitmap1 - native, approx, bitmap2 - native); + bitmap1 - native, approx, bitmap2 - native, skip); if (bits) { - printf("Skipping constant action bitmap expectations: they appear unsupported.\n"); - goto out; + ksft_print_msg("Skipping constant action bitmap expectations: they appear unsupported.\n"); + skip = true; } - ret |= compare("entry", "≈", "1 bitmapped", entry, approx, bitmap1 - native); - ret |= compare("entry", "≈", "2 bitmapped", entry, approx, bitmap2 - native); + ret |= compare("entry", "≈", "1 bitmapped", entry, approx, + bitmap1 - native, skip); + ret |= compare("entry", "≈", "2 bitmapped", entry, approx, + bitmap2 - native, skip); ret |= compare("native + entry + (per filter * 4)", "≈", "4 filters total", - entry + (per_filter1 * 4) + native, approx, filter2); - if (ret == 0) - goto out; + entry + (per_filter1 * 4) + native, approx, filter2, + skip); -more_samples: - printf("Saw unexpected benchmark result. Try running again with more samples?\n"); -out: - return 0; + if (ret) + ksft_print_msg("Saw unexpected benchmark result. Try running again with more samples?\n"); + + ksft_finished(); } diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 38f6514699..cacf6507f6 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -784,7 +784,7 @@ void *kill_thread(void *data) bool die = (bool)data; if (die) { - prctl(PR_GET_SECCOMP, 0, 0, 0, 0); + syscall(__NR_getpid); return (void *)SIBLING_EXIT_FAILURE; } @@ -803,11 +803,11 @@ void kill_thread_or_group(struct __test_metadata *_metadata, { pthread_t thread; void *status; - /* Kill only when calling __NR_prctl. */ + /* Kill only when calling __NR_getpid. */ struct sock_filter filter_thread[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; @@ -819,7 +819,7 @@ void kill_thread_or_group(struct __test_metadata *_metadata, struct sock_filter filter_process[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), BPF_STMT(BPF_RET|BPF_K, kill), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; @@ -3709,7 +3709,12 @@ TEST(user_notification_sibling_pid_ns) ASSERT_GE(pid, 0); if (pid == 0) { - ASSERT_EQ(unshare(CLONE_NEWPID), 0); + ASSERT_EQ(unshare(CLONE_NEWPID), 0) { + if (errno == EPERM) + SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); + else if (errno == EINVAL) + SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)"); + } pid2 = fork(); ASSERT_GE(pid2, 0); @@ -3727,6 +3732,8 @@ TEST(user_notification_sibling_pid_ns) ASSERT_EQ(unshare(CLONE_NEWPID), 0) { if (errno == EPERM) SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); + else if (errno == EINVAL) + SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)"); } ASSERT_EQ(errno, 0); @@ -4037,6 +4044,16 @@ TEST(user_notification_filter_empty_threaded) EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); } + +int get_next_fd(int prev_fd) +{ + for (int i = prev_fd + 1; i < FD_SETSIZE; ++i) { + if (fcntl(i, F_GETFD) == -1) + return i; + } + _exit(EXIT_FAILURE); +} + TEST(user_notification_addfd) { pid_t pid; @@ -4053,7 +4070,7 @@ TEST(user_notification_addfd) /* There may be arbitrary already-open fds at test start. */ memfd = memfd_create("test", 0); ASSERT_GE(memfd, 0); - nextfd = memfd + 1; + nextfd = get_next_fd(memfd); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret) { @@ -4064,7 +4081,8 @@ TEST(user_notification_addfd) /* Check that the basic notification machinery works */ listener = user_notif_syscall(__NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER); - ASSERT_EQ(listener, nextfd++); + ASSERT_EQ(listener, nextfd); + nextfd = get_next_fd(nextfd); pid = fork(); ASSERT_GE(pid, 0); @@ -4119,14 +4137,16 @@ TEST(user_notification_addfd) /* Verify we can set an arbitrary remote fd */ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); - EXPECT_EQ(fd, nextfd++); + EXPECT_EQ(fd, nextfd); + nextfd = get_next_fd(nextfd); EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); /* Verify we can set an arbitrary remote fd with large size */ memset(&big, 0x0, sizeof(big)); big.addfd = addfd; fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); - EXPECT_EQ(fd, nextfd++); + EXPECT_EQ(fd, nextfd); + nextfd = get_next_fd(nextfd); /* Verify we can set a specific remote fd */ addfd.newfd = 42; @@ -4164,7 +4184,8 @@ TEST(user_notification_addfd) * Child has earlier "low" fds and now 42, so we expect the next * lowest available fd to be assigned here. */ - EXPECT_EQ(fd, nextfd++); + EXPECT_EQ(fd, nextfd); + nextfd = get_next_fd(nextfd); ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0); /* diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile index 01abe4969b..867f88ce25 100644 --- a/tools/testing/selftests/sgx/Makefile +++ b/tools/testing/selftests/sgx/Makefile @@ -12,9 +12,11 @@ OBJCOPY := $(CROSS_COMPILE)objcopy endif INCLUDES := -I$(top_srcdir)/tools/include -HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC -z noexecstack -ENCL_CFLAGS := -Wall -Werror -static -nostdlib -nostartfiles -fPIC \ +HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC +HOST_LDFLAGS := -z noexecstack -lcrypto +ENCL_CFLAGS += -Wall -Werror -static-pie -nostdlib -ffreestanding -fPIE \ -fno-stack-protector -mrdrnd $(INCLUDES) +ENCL_LDFLAGS := -Wl,-T,test_encl.lds,--build-id=none ifeq ($(CAN_BUILD_X86_64), 1) TEST_CUSTOM_PROGS := $(OUTPUT)/test_sgx @@ -28,7 +30,7 @@ $(OUTPUT)/test_sgx: $(OUTPUT)/main.o \ $(OUTPUT)/sigstruct.o \ $(OUTPUT)/call.o \ $(OUTPUT)/sign_key.o - $(CC) $(HOST_CFLAGS) -o $@ $^ -lcrypto + $(CC) $(HOST_CFLAGS) -o $@ $^ $(HOST_LDFLAGS) $(OUTPUT)/main.o: main.c $(CC) $(HOST_CFLAGS) -c $< -o $@ @@ -45,8 +47,8 @@ $(OUTPUT)/call.o: call.S $(OUTPUT)/sign_key.o: sign_key.S $(CC) $(HOST_CFLAGS) -c $< -o $@ -$(OUTPUT)/test_encl.elf: test_encl.lds test_encl.c test_encl_bootstrap.S - $(CC) $(ENCL_CFLAGS) -T $^ -o $@ -Wl,--build-id=none +$(OUTPUT)/test_encl.elf: test_encl.c test_encl_bootstrap.S + $(CC) $(ENCL_CFLAGS) $^ -o $@ $(ENCL_LDFLAGS) EXTRA_CLEAN := \ $(OUTPUT)/test_encl.elf \ diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h index d8587c9719..402f8787a7 100644 --- a/tools/testing/selftests/sgx/defines.h +++ b/tools/testing/selftests/sgx/defines.h @@ -13,6 +13,8 @@ #define __aligned(x) __attribute__((__aligned__(x))) #define __packed __attribute__((packed)) +#define __used __attribute__((used)) +#define __section(x)__attribute__((__section__(x))) #include "../../../../arch/x86/include/asm/sgx.h" #include "../../../../arch/x86/include/asm/enclu.h" diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c index ae791df3e5..2c4d709cce 100644 --- a/tools/testing/selftests/sgx/test_encl.c +++ b/tools/testing/selftests/sgx/test_encl.c @@ -5,11 +5,12 @@ #include "defines.h" /* - * Data buffer spanning two pages that will be placed first in .data - * segment. Even if not used internally the second page is needed by - * external test manipulating page permissions. + * Data buffer spanning two pages that will be placed first in the .data + * segment via the linker script. Even if not used internally the second page + * is needed by external test manipulating page permissions, so mark + * encl_buffer as "used" to make sure it is entirely preserved by the compiler. */ -static uint8_t encl_buffer[8192] = { 1 }; +static uint8_t __used __section(".data.encl_buffer") encl_buffer[8192] = { 1 }; enum sgx_enclu_function { EACCEPT = 0x5, @@ -121,21 +122,41 @@ static void do_encl_op_nop(void *_op) } +/* + * Symbol placed at the start of the enclave image by the linker script. + * Declare this extern symbol with visibility "hidden" to ensure the compiler + * does not access it through the GOT and generates position-independent + * addressing as __encl_base(%rip), so we can get the actual enclave base + * during runtime. + */ +extern const uint8_t __attribute__((visibility("hidden"))) __encl_base; + +typedef void (*encl_op_t)(void *); +static const encl_op_t encl_op_array[ENCL_OP_MAX] = { + do_encl_op_put_to_buf, + do_encl_op_get_from_buf, + do_encl_op_put_to_addr, + do_encl_op_get_from_addr, + do_encl_op_nop, + do_encl_eaccept, + do_encl_emodpe, + do_encl_init_tcs_page, +}; + void encl_body(void *rdi, void *rsi) { - const void (*encl_op_array[ENCL_OP_MAX])(void *) = { - do_encl_op_put_to_buf, - do_encl_op_get_from_buf, - do_encl_op_put_to_addr, - do_encl_op_get_from_addr, - do_encl_op_nop, - do_encl_eaccept, - do_encl_emodpe, - do_encl_init_tcs_page, - }; - - struct encl_op_header *op = (struct encl_op_header *)rdi; - - if (op->type < ENCL_OP_MAX) - (*encl_op_array[op->type])(op); + struct encl_op_header *header = (struct encl_op_header *)rdi; + encl_op_t op; + + if (header->type >= ENCL_OP_MAX) + return; + + /* + * The enclave base address needs to be added, as this call site + * *cannot be* made rip-relative by the compiler, or fixed up by + * any other possible means. + */ + op = ((uint64_t)&__encl_base) + encl_op_array[header->type]; + + (*op)(header); } diff --git a/tools/testing/selftests/sgx/test_encl.lds b/tools/testing/selftests/sgx/test_encl.lds index 108bc11d1d..ffe851a1ca 100644 --- a/tools/testing/selftests/sgx/test_encl.lds +++ b/tools/testing/selftests/sgx/test_encl.lds @@ -10,6 +10,7 @@ PHDRS SECTIONS { . = 0; + __encl_base = .; .tcs : { *(.tcs*) } : tcs @@ -23,6 +24,7 @@ SECTIONS } : text .data : { + *(.data.encl_buffer) *(.data*) } : data @@ -31,6 +33,8 @@ SECTIONS *(.note*) *(.debug*) *(.eh_frame*) + *(.dyn*) + *(.gnu.hash) } } diff --git a/tools/testing/selftests/sgx/test_encl_bootstrap.S b/tools/testing/selftests/sgx/test_encl_bootstrap.S index 03ae0f57e2..d8c4ac94e0 100644 --- a/tools/testing/selftests/sgx/test_encl_bootstrap.S +++ b/tools/testing/selftests/sgx/test_encl_bootstrap.S @@ -42,9 +42,12 @@ encl_entry: # RBX contains the base address for TCS, which is the first address # inside the enclave for TCS #1 and one page into the enclave for - # TCS #2. By adding the value of encl_stack to it, we get - # the absolute address for the stack. - lea (encl_stack)(%rbx), %rax + # TCS #2. First make it relative by substracting __encl_base and + # then add the address of encl_stack to get the address for the stack. + lea __encl_base(%rip), %rax + sub %rax, %rbx + lea encl_stack(%rip), %rax + add %rbx, %rax jmp encl_entry_core encl_dyn_entry: # Entry point for dynamically created TCS page expected to follow @@ -55,25 +58,12 @@ encl_entry_core: push %rax push %rcx # push the address after EENTER - push %rbx # push the enclave base address + # NOTE: as the selftest enclave is *not* intended for production, + # simplify the code by not initializing ABI registers on entry or + # cleansing caller-save registers on exit. call encl_body - pop %rbx # pop the enclave base address - - /* Clear volatile GPRs, except RAX (EEXIT function). */ - xor %rcx, %rcx - xor %rdx, %rdx - xor %rdi, %rdi - xor %rsi, %rsi - xor %r8, %r8 - xor %r9, %r9 - xor %r10, %r10 - xor %r11, %r11 - - # Reset status flags. - add %rdx, %rdx # OF = SF = AF = CF = 0; ZF = PF = 1 - # Prepare EEXIT target by popping the address of the instruction after # EENTER to RBX. pop %rbx diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh index 444b2befda..84472b436c 100755 --- a/tools/testing/selftests/sysctl/sysctl.sh +++ b/tools/testing/selftests/sysctl/sysctl.sh @@ -35,6 +35,7 @@ ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int:1" ALL_TESTS="$ALL_TESTS 0008:1:1:match_int:1" ALL_TESTS="$ALL_TESTS 0009:1:1:unregister_error:0" ALL_TESTS="$ALL_TESTS 0010:1:1:mnt/mnt_error:0" +ALL_TESTS="$ALL_TESTS 0011:1:1:empty_add:0" function allow_user_defaults() { @@ -63,7 +64,7 @@ function check_production_sysctl_writes_strict() else old_strict=$(cat ${WRITES_STRICT}) if [ "$old_strict" = "1" ]; then - echo "ok" + echo "OK" else echo "FAIL, strict value is 0 but force to 1 to continue" >&2 echo "1" > ${WRITES_STRICT} @@ -225,7 +226,7 @@ run_numerictests() echo "FAIL" >&2 exit 1 else - echo "ok" + echo "OK" fi echo -n "Checking sysctl is not set to test value ... " @@ -233,7 +234,7 @@ run_numerictests() echo "FAIL" >&2 exit 1 else - echo "ok" + echo "OK" fi echo -n "Writing sysctl from shell ... " @@ -242,7 +243,7 @@ run_numerictests() echo "FAIL" >&2 exit 1 else - echo "ok" + echo "OK" fi echo -n "Resetting sysctl to original value ... " @@ -251,7 +252,7 @@ run_numerictests() echo "FAIL" >&2 exit 1 else - echo "ok" + echo "OK" fi # Now that we've validated the sanity of "set_test" and "set_orig", @@ -265,7 +266,7 @@ run_numerictests() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi echo -n "Writing middle of sysctl after synchronized seek ... " @@ -275,7 +276,7 @@ run_numerictests() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi echo -n "Writing beyond end of sysctl ... " @@ -285,7 +286,7 @@ run_numerictests() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi echo -n "Writing sysctl with multiple long writes ... " @@ -296,14 +297,14 @@ run_numerictests() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi test_rc } check_failure() { - echo -n "Testing that $1 fails as expected..." + echo -n "Testing that $1 fails as expected ... " reset_vals TEST_STR="$1" orig="$(cat $TARGET)" @@ -314,7 +315,7 @@ check_failure() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi test_rc } @@ -356,7 +357,7 @@ run_wideint_tests() # Your test must accept digits 3 and 4 to use this run_limit_digit() { - echo -n "Checking ignoring spaces up to PAGE_SIZE works on write ..." + echo -n "Checking ignoring spaces up to PAGE_SIZE works on write ... " reset_vals LIMIT=$((MAX_DIGITS -1)) @@ -368,11 +369,11 @@ run_limit_digit() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi test_rc - echo -n "Checking passing PAGE_SIZE of spaces fails on write ..." + echo -n "Checking passing PAGE_SIZE of spaces fails on write ... " reset_vals LIMIT=$((MAX_DIGITS)) @@ -384,7 +385,7 @@ run_limit_digit() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi test_rc } @@ -392,7 +393,7 @@ run_limit_digit() # You are using an int run_limit_digit_int() { - echo -n "Testing INT_MAX works ..." + echo -n "Testing INT_MAX works ... " reset_vals TEST_STR="$INT_MAX" echo -n $TEST_STR > $TARGET @@ -401,11 +402,11 @@ run_limit_digit_int() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi test_rc - echo -n "Testing INT_MAX + 1 will fail as expected..." + echo -n "Testing INT_MAX + 1 will fail as expected ... " reset_vals let TEST_STR=$INT_MAX+1 echo -n $TEST_STR > $TARGET 2> /dev/null @@ -414,11 +415,11 @@ run_limit_digit_int() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi test_rc - echo -n "Testing negative values will work as expected..." + echo -n "Testing negative values will work as expected ... " reset_vals TEST_STR="-3" echo -n $TEST_STR > $TARGET 2> /dev/null @@ -426,7 +427,7 @@ run_limit_digit_int() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi test_rc } @@ -442,7 +443,7 @@ run_limit_digit_int_array() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi test_rc @@ -459,7 +460,7 @@ run_limit_digit_int_array() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi test_rc @@ -477,7 +478,7 @@ run_limit_digit_int_array() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi test_rc @@ -494,7 +495,7 @@ run_limit_digit_int_array() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi test_rc } @@ -502,7 +503,7 @@ run_limit_digit_int_array() # You are using an unsigned int run_limit_digit_uint() { - echo -n "Testing UINT_MAX works ..." + echo -n "Testing UINT_MAX works ... " reset_vals TEST_STR="$UINT_MAX" echo -n $TEST_STR > $TARGET @@ -511,11 +512,11 @@ run_limit_digit_uint() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi test_rc - echo -n "Testing UINT_MAX + 1 will fail as expected..." + echo -n "Testing UINT_MAX + 1 will fail as expected ... " reset_vals TEST_STR=$(($UINT_MAX+1)) echo -n $TEST_STR > $TARGET 2> /dev/null @@ -524,11 +525,11 @@ run_limit_digit_uint() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi test_rc - echo -n "Testing negative values will not work as expected ..." + echo -n "Testing negative values will not work as expected ... " reset_vals TEST_STR="-3" echo -n $TEST_STR > $TARGET 2> /dev/null @@ -537,7 +538,7 @@ run_limit_digit_uint() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi test_rc } @@ -551,7 +552,7 @@ run_stringtests() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi echo -n "Writing middle of sysctl after unsynchronized seek ... " @@ -561,7 +562,7 @@ run_stringtests() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi echo -n "Checking sysctl maxlen is at least $MAXLEN ... " @@ -572,7 +573,7 @@ run_stringtests() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi echo -n "Checking sysctl keeps original string on overflow append ... " @@ -583,7 +584,7 @@ run_stringtests() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi echo -n "Checking sysctl stays NULL terminated on write ... " @@ -594,7 +595,7 @@ run_stringtests() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi echo -n "Checking sysctl stays NULL terminated on overwrite ... " @@ -605,7 +606,7 @@ run_stringtests() echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" fi test_rc @@ -650,7 +651,7 @@ run_bitmaptest() { fi done - echo -n "Checking bitmap handler... " + echo -n "Checking bitmap handler ... " TEST_FILE=$(mktemp) echo -n "$TEST_STR" > $TEST_FILE @@ -665,7 +666,7 @@ run_bitmaptest() { echo "FAIL" >&2 rc=1 else - echo "ok" + echo "OK" rc=0 fi test_rc @@ -742,89 +743,111 @@ sysctl_test_0006() sysctl_test_0007() { TARGET="${SYSCTL}/$(get_test_target 0007)" + echo -n "Testing if $TARGET is set to 1 ... " + if [ ! -f $TARGET ]; then - echo "Skipping test for $TARGET as it is not present ..." + echo -e "SKIPPING\n$TARGET is not present" return $ksft_skip fi if [ -d $DIR ]; then - echo "Boot param test only possible sysctl_test is built-in, not module:" + echo -e "SKIPPING\nTest only possible if sysctl_test is built-in, not module:" cat $TEST_DIR/config >&2 return $ksft_skip fi - echo -n "Testing if $TARGET is set to 1 ..." ORIG=$(cat "${TARGET}") if [ x$ORIG = "x1" ]; then - echo "ok" + echo "OK" return 0 fi - echo "FAIL" - echo "Checking if /proc/cmdline contains setting of the expected parameter ..." + if [ ! -f /proc/cmdline ]; then - echo "/proc/cmdline does not exist, test inconclusive" - return 0 + echo -e "SKIPPING\nThere is no /proc/cmdline to check for paramter" + return $ksft_skip fi FOUND=$(grep -c "sysctl[./]debug[./]test_sysctl[./]boot_int=1" /proc/cmdline) if [ $FOUND = "1" ]; then - echo "Kernel param found but $TARGET is not 1, TEST FAILED" + echo -e "FAIL\nKernel param found but $TARGET is not 1." >&2 rc=1 test_rc fi - echo "Skipping test, expected kernel parameter missing." - echo "To perform this test, make sure kernel is booted with parameter: sysctl.debug.test_sysctl.boot_int=1" + echo -e "SKIPPING\nExpected kernel parameter missing." + echo "Kernel must be booted with parameter: sysctl.debug.test_sysctl.boot_int=1" return $ksft_skip } sysctl_test_0008() { TARGET="${SYSCTL}/$(get_test_target 0008)" + echo -n "Testing if $TARGET is matched in kernel ... " + if [ ! -f $TARGET ]; then - echo "Skipping test for $TARGET as it is not present ..." + echo -e "SKIPPING\n$TARGET is not present" return $ksft_skip fi - echo -n "Testing if $TARGET is matched in kernel" ORIG_VALUE=$(cat "${TARGET}") if [ $ORIG_VALUE -ne 1 ]; then - echo "TEST FAILED" + echo "FAIL" >&2 rc=1 test_rc fi - echo "ok" + echo "OK" return 0 } sysctl_test_0009() { TARGET="${SYSCTL}/$(get_test_target 0009)" - echo -n "Testing if $TARGET unregistered correctly ..." + echo -n "Testing if $TARGET unregistered correctly ... " if [ -d $TARGET ]; then - echo "TEST FAILED" + echo "FAIL" >&2 rc=1 test_rc fi - echo "ok" + echo "OK" return 0 } sysctl_test_0010() { TARGET="${SYSCTL}/$(get_test_target 0010)" - echo -n "Testing that $TARGET was not created ..." + echo -n "Testing that $TARGET was not created ... " if [ -d $TARGET ]; then - echo "TEST FAILED" + echo "FAIL" >&2 + rc=1 + test_rc + fi + + echo "OK" + return 0 +} + +sysctl_test_0011() +{ + TARGET="${SYSCTL}/$(get_test_target 0011)" + echo -n "Testing empty dir handling in ${TARGET} ... " + if [ ! -d ${TARGET} ]; then + echo -e "FAIL\nCould not create ${TARGET}" >&2 + rc=1 + test_rc + fi + + TARGET2="${TARGET}/empty" + if [ ! -d ${TARGET2} ]; then + echo -e "FAIL\nCould not create ${TARGET2}" >&2 rc=1 test_rc fi - echo "ok" + echo "OK" return 0 } @@ -846,6 +869,7 @@ list_tests() echo "0008 x $(get_test_count 0008) - tests sysctl macro values match" echo "0009 x $(get_test_count 0009) - tests sysct unregister" echo "0010 x $(get_test_count 0010) - tests sysct mount point" + echo "0011 x $(get_test_count 0011) - tests empty directories" } usage() @@ -934,7 +958,7 @@ function skip_test() if target_exists $TEST_TARGET $TEST_ID; then TEST_SKIP=$(get_test_skip_no_target $TEST_ID) if [[ $TEST_SKIP -eq "1" ]]; then - echo "Target for test $TEST_ID: $TEST_TARGET not exist, skipping test ..." + echo "Target $TEST_TARGET for test $TEST_ID does not exist ... SKIPPING" return 0 fi fi diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile index b1fa2e177e..9153e3428a 100644 --- a/tools/testing/selftests/tc-testing/Makefile +++ b/tools/testing/selftests/tc-testing/Makefile @@ -1,31 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -include ../../../scripts/Makefile.include -top_srcdir = $(abspath ../../../..) -APIDIR := $(top_scrdir)/include/uapi -TEST_GEN_FILES = action.o +TEST_PROGS += tdc.sh +TEST_FILES := action-ebpf tdc*.py Tdc*.py plugins plugin-lib tc-tests scripts include ../lib.mk - -PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) - -ifeq ($(PROBE),) - CPU ?= probe -else - CPU ?= generic -endif - -CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \ - | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') - -CLANG_FLAGS = -I. -I$(APIDIR) \ - $(CLANG_SYS_INCLUDES) \ - -Wno-compare-distinct-pointer-types - -$(OUTPUT)/%.o: %.c - $(CLANG) $(CLANG_FLAGS) \ - -O2 --target=bpf -emit-llvm -c $< -o - | \ - $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ - -TEST_PROGS += ./tdc.sh -TEST_FILES := tdc*.py Tdc*.py plugins plugin-lib tc-tests scripts diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README index be7b00799b..fc8e858ff1 100644 --- a/tools/testing/selftests/tc-testing/README +++ b/tools/testing/selftests/tc-testing/README @@ -195,8 +195,6 @@ directory: and the other is a test whether the command leaked memory or not. (This one is a preliminary version, it may not work quite right yet, but the overall template is there and it should only need tweaks.) - - buildebpfPlugin.py: - builds all programs in $EBPFDIR. ACKNOWLEDGEMENTS diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index 012aa33b34..c60acba951 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -82,7 +82,6 @@ CONFIG_NET_ACT_GACT=m CONFIG_GACT_PROB=y CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_SAMPLE=m -CONFIG_NET_ACT_IPT=m CONFIG_NET_ACT_NAT=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_ACT_SIMP=m diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py deleted file mode 100644 index d34fe06268..0000000000 --- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py +++ /dev/null @@ -1,67 +0,0 @@ -''' -build ebpf program -''' - -import os -import signal -from string import Template -import subprocess -import time -from TdcPlugin import TdcPlugin -from tdc_config import * - -class SubPlugin(TdcPlugin): - def __init__(self): - self.sub_class = 'buildebpf/SubPlugin' - self.tap = '' - super().__init__() - - def pre_suite(self, testcount, testidlist): - super().pre_suite(testcount, testidlist) - - if self.args.buildebpf: - self._ebpf_makeall() - - def post_suite(self, index): - super().post_suite(index) - - self._ebpf_makeclean() - - def add_args(self, parser): - super().add_args(parser) - - self.argparser_group = self.argparser.add_argument_group( - 'buildebpf', - 'options for buildebpfPlugin') - self.argparser_group.add_argument( - '--nobuildebpf', action='store_false', default=True, - dest='buildebpf', - help='Don\'t build eBPF programs') - - return self.argparser - - def _ebpf_makeall(self): - if self.args.buildebpf: - self._make('all') - - def _ebpf_makeclean(self): - if self.args.buildebpf: - self._make('clean') - - def _make(self, target): - command = 'make -C {} {}'.format(self.args.NAMES['EBPFDIR'], target) - proc = subprocess.Popen(command, - shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=os.environ.copy()) - (rawout, serr) = proc.communicate() - - if proc.returncode != 0 and len(serr) > 0: - foutput = serr.decode("utf-8") - else: - foutput = rawout.decode("utf-8") - - proc.stdout.close() - proc.stderr.close() - return proc, foutput diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index b62429b0fc..bb19b8b76d 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -9,43 +9,13 @@ from TdcPlugin import TdcPlugin from tdc_config import * -def prepare_suite(obj, test): - original = obj.args.NAMES - - if 'skip' in test and test['skip'] == 'yes': - return - - if 'nsPlugin' not in test['plugins']: - return - - shadow = {} - shadow['IP'] = original['IP'] - shadow['TC'] = original['TC'] - shadow['NS'] = '{}-{}'.format(original['NS'], test['random']) - shadow['DEV0'] = '{}id{}'.format(original['DEV0'], test['id']) - shadow['DEV1'] = '{}id{}'.format(original['DEV1'], test['id']) - shadow['DUMMY'] = '{}id{}'.format(original['DUMMY'], test['id']) - shadow['DEV2'] = original['DEV2'] - obj.args.NAMES = shadow - - if obj.args.namespace: - obj._ns_create() - else: - obj._ports_create() - - # Make sure the netns is visible in the fs - while True: - obj._proc_check() - try: - ns = obj.args.NAMES['NS'] - f = open('/run/netns/{}'.format(ns)) - f.close() - break - except: - time.sleep(0.1) - continue - - obj.args.NAMES = original +try: + from pyroute2 import netns + from pyroute2 import IPRoute + netlink = True +except ImportError: + netlink = False + print("!!! Consider installing pyroute2 !!!") class SubPlugin(TdcPlugin): def __init__(self): @@ -53,64 +23,71 @@ class SubPlugin(TdcPlugin): super().__init__() def pre_suite(self, testcount, testlist): - from itertools import cycle - super().pre_suite(testcount, testlist) - print("Setting up namespaces and devices...") + def prepare_test(self, test): + if 'skip' in test and test['skip'] == 'yes': + return - with Pool(self.args.mp) as p: - it = zip(cycle([self]), testlist) - p.starmap(prepare_suite, it) + if 'nsPlugin' not in test['plugins']: + return - def pre_case(self, caseinfo, test_skip): + if netlink == True: + self._nl_ns_create() + else: + self._ipr2_ns_create() + + # Make sure the netns is visible in the fs + ticks = 20 + while True: + if ticks == 0: + raise TimeoutError + self._proc_check() + try: + ns = self.args.NAMES['NS'] + f = open('/run/netns/{}'.format(ns)) + f.close() + break + except: + time.sleep(0.1) + ticks -= 1 + continue + + def pre_case(self, test, test_skip): if self.args.verbose: print('{}.pre_case'.format(self.sub_class)) if test_skip: return + self.prepare_test(test) def post_case(self): if self.args.verbose: print('{}.post_case'.format(self.sub_class)) - if self.args.namespace: - self._ns_destroy() + if netlink == True: + self._nl_ns_destroy() else: - self._ports_destroy() + self._ipr2_ns_destroy() def post_suite(self, index): if self.args.verbose: print('{}.post_suite'.format(self.sub_class)) # Make sure we don't leak resources - for f in os.listdir('/run/netns/'): - cmd = self._replace_keywords("$IP netns del {}".format(f)) + cmd = self._replace_keywords("$IP -a netns del") - if self.args.verbose > 3: - print('_exec_cmd: command "{}"'.format(cmd)) - - subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + if self.args.verbose > 3: + print('_exec_cmd: command "{}"'.format(cmd)) - def add_args(self, parser): - super().add_args(parser) - self.argparser_group = self.argparser.add_argument_group( - 'netns', - 'options for nsPlugin(run commands in net namespace)') - self.argparser_group.add_argument( - '-N', '--no-namespace', action='store_false', default=True, - dest='namespace', help='Don\'t run commands in namespace') - return self.argparser + subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) def adjust_command(self, stage, command): super().adjust_command(stage, command) cmdform = 'list' cmdlist = list() - if not self.args.namespace: - return command - if self.args.verbose: print('{}.adjust_command'.format(self.sub_class)) @@ -138,63 +115,90 @@ class SubPlugin(TdcPlugin): print('adjust_command: return command [{}]'.format(command)) return command - def _ports_create_cmds(self): - cmds = [] - - cmds.append(self._replace_keywords('link add $DEV0 type veth peer name $DEV1')) - cmds.append(self._replace_keywords('link set $DEV0 up')) - cmds.append(self._replace_keywords('link add $DUMMY type dummy')) - if not self.args.namespace: - cmds.append(self._replace_keywords('link set $DEV1 up')) - - return cmds - - def _ports_create(self): - self._exec_cmd_batched('pre', self._ports_create_cmds()) + def _nl_ns_create(self): + ns = self.args.NAMES["NS"]; + dev0 = self.args.NAMES["DEV0"]; + dev1 = self.args.NAMES["DEV1"]; + dummy = self.args.NAMES["DUMMY"]; - def _ports_destroy_cmd(self): - return self._replace_keywords('link del $DEV0') - - def _ports_destroy(self): - self._exec_cmd('post', self._ports_destroy_cmd()) - - def _ns_create_cmds(self): + if self.args.verbose: + print('{}._nl_ns_create'.format(self.sub_class)) + + netns.create(ns) + netns.pushns(newns=ns) + with IPRoute() as ip: + ip.link('add', ifname=dev1, kind='veth', peer={'ifname': dev0, 'net_ns_fd':'/proc/1/ns/net'}) + ip.link('add', ifname=dummy, kind='dummy') + ticks = 20 + while True: + if ticks == 0: + raise TimeoutError + try: + dev1_idx = ip.link_lookup(ifname=dev1)[0] + dummy_idx = ip.link_lookup(ifname=dummy)[0] + ip.link('set', index=dev1_idx, state='up') + ip.link('set', index=dummy_idx, state='up') + break + except: + time.sleep(0.1) + ticks -= 1 + continue + netns.popns() + + with IPRoute() as ip: + ticks = 20 + while True: + if ticks == 0: + raise TimeoutError + try: + dev0_idx = ip.link_lookup(ifname=dev0)[0] + ip.link('set', index=dev0_idx, state='up') + break + except: + time.sleep(0.1) + ticks -= 1 + continue + + def _ipr2_ns_create_cmds(self): cmds = [] - if self.args.namespace: - ns = self.args.NAMES['NS'] + ns = self.args.NAMES['NS'] - cmds.append(self._replace_keywords('netns add {}'.format(ns))) - cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns))) - cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns))) - cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns))) - cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns))) + cmds.append(self._replace_keywords('netns add {}'.format(ns))) + cmds.append(self._replace_keywords('link add $DEV1 type veth peer name $DEV0')) + cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns))) + cmds.append(self._replace_keywords('link add $DUMMY type dummy'.format(ns))) + cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns))) + cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns))) + cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns))) + cmds.append(self._replace_keywords('link set $DEV0 up'.format(ns))) - if self.args.device: - cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns))) - cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns))) + if self.args.device: + cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns))) + cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns))) return cmds - def _ns_create(self): + def _ipr2_ns_create(self): ''' Create the network namespace in which the tests will be run and set up the required network devices for it. ''' - self._ports_create() - self._exec_cmd_batched('pre', self._ns_create_cmds()) + self._exec_cmd_batched('pre', self._ipr2_ns_create_cmds()) + + def _nl_ns_destroy(self): + ns = self.args.NAMES['NS'] + netns.remove(ns) - def _ns_destroy_cmd(self): + def _ipr2_ns_destroy_cmd(self): return self._replace_keywords('netns delete {}'.format(self.args.NAMES['NS'])) - def _ns_destroy(self): + def _ipr2_ns_destroy(self): ''' Destroy the network namespace for testing (and any associated network devices as well) ''' - if self.args.namespace: - self._exec_cmd('post', self._ns_destroy_cmd()) - self._ports_destroy() + self._exec_cmd('post', self._ipr2_ns_destroy_cmd()) @cached_property def _proc(self): diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 91832400dd..6e00bf32ef 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -54,9 +54,6 @@ "actions", "bpf" ], - "plugins": { - "requires": "buildebpfPlugin" - }, "setup": [ [ "$TC action flush action bpf", @@ -65,10 +62,10 @@ 255 ] ], - "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667", + "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action-ebpf section action-ok index 667", "expExitCode": "0", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref", + "matchPattern": "action order [0-9]*: bpf action-ebpf:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref", "matchCount": "1", "teardown": [ "$TC action flush action bpf" @@ -81,9 +78,6 @@ "actions", "bpf" ], - "plugins": { - "requires": "buildebpfPlugin" - }, "setup": [ [ "$TC action flush action bpf", @@ -92,10 +86,10 @@ 255 ] ], - "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667", + "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action-ebpf section action-ko index 667", "expExitCode": "255", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref", + "matchPattern": "action order [0-9]*: bpf action-ebpf:\\[action-ko\\] id [0-9].*index 667 ref", "matchCount": "0", "teardown": [ [ diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/xt.json b/tools/testing/selftests/tc-testing/tc-tests/actions/xt.json deleted file mode 100644 index 1a92e8898f..0000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/xt.json +++ /dev/null @@ -1,243 +0,0 @@ -[ - { - "id": "2029", - "name": "Add xt action with log-prefix", - "category": [ - "actions", - "xt" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - [ - "$TC actions flush action xt", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC action add action xt -j LOG --log-prefix PONG index 100", - "expExitCode": "0", - "verifyCmd": "$TC action ls action xt", - "matchPattern": "action order [0-9]*:.*target LOG level warning prefix \"PONG\".*index 100 ref", - "matchCount": "1", - "teardown": [ - "$TC actions flush action xt" - ] - }, - { - "id": "3562", - "name": "Replace xt action log-prefix", - "category": [ - "actions", - "xt" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - [ - "$TC actions flush action xt", - 0, - 1, - 255 - ], - [ - "$TC action add action xt -j LOG --log-prefix PONG index 1", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC action replace action xt -j LOG --log-prefix WIN index 1", - "expExitCode": "0", - "verifyCmd": "$TC action get action xt index 1", - "matchPattern": "action order [0-9]*:.*target LOG level warning prefix \"WIN\".*index 1 ref", - "matchCount": "1", - "teardown": [ - "$TC action flush action xt" - ] - }, - { - "id": "8291", - "name": "Delete xt action with valid index", - "category": [ - "actions", - "xt" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - [ - "$TC actions flush action xt", - 0, - 1, - 255 - ], - [ - "$TC action add action xt -j LOG --log-prefix PONG index 1000", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC action delete action xt index 1000", - "expExitCode": "0", - "verifyCmd": "$TC action get action xt index 1000", - "matchPattern": "action order [0-9]*:.*target LOG level warning prefix \"PONG\".*index 1000 ref", - "matchCount": "0", - "teardown": [ - "$TC action flush action xt" - ] - }, - { - "id": "5169", - "name": "Delete xt action with invalid index", - "category": [ - "actions", - "xt" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - [ - "$TC actions flush action xt", - 0, - 1, - 255 - ], - [ - "$TC action add action xt -j LOG --log-prefix PONG index 1000", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC action delete action xt index 333", - "expExitCode": "255", - "verifyCmd": "$TC action get action xt index 1000", - "matchPattern": "action order [0-9]*:.*target LOG level warning prefix \"PONG\".*index 1000 ref", - "matchCount": "1", - "teardown": [ - "$TC action flush action xt" - ] - }, - { - "id": "7284", - "name": "List xt actions", - "category": [ - "actions", - "xt" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - [ - "$TC action flush action xt", - 0, - 1, - 255 - ], - "$TC action add action xt -j LOG --log-prefix PONG index 1001", - "$TC action add action xt -j LOG --log-prefix WIN index 1002", - "$TC action add action xt -j LOG --log-prefix LOSE index 1003" - ], - "cmdUnderTest": "$TC action list action xt", - "expExitCode": "0", - "verifyCmd": "$TC action list action xt", - "matchPattern": "action order [0-9]*: tablename:", - "matchCount": "3", - "teardown": [ - "$TC actions flush action xt" - ] - }, - { - "id": "5010", - "name": "Flush xt actions", - "category": [ - "actions", - "xt" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - [ - "$TC actions flush action xt", - 0, - 1, - 255 - ], - "$TC action add action xt -j LOG --log-prefix PONG index 1001", - "$TC action add action xt -j LOG --log-prefix WIN index 1002", - "$TC action add action xt -j LOG --log-prefix LOSE index 1003" - ], - "cmdUnderTest": "$TC action flush action xt", - "expExitCode": "0", - "verifyCmd": "$TC action list action xt", - "matchPattern": "action order [0-9]*: tablename:", - "matchCount": "0", - "teardown": [ - "$TC actions flush action xt" - ] - }, - { - "id": "8437", - "name": "Add xt action with duplicate index", - "category": [ - "actions", - "xt" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - [ - "$TC actions flush action xt", - 0, - 1, - 255 - ], - "$TC action add action xt -j LOG --log-prefix PONG index 101" - ], - "cmdUnderTest": "$TC action add action xt -j LOG --log-prefix WIN index 101", - "expExitCode": "255", - "verifyCmd": "$TC action get action xt index 101", - "matchPattern": "action order [0-9]*:.*target LOG level warning prefix \"PONG\".*index 101", - "matchCount": "1", - "teardown": [ - "$TC action flush action xt" - ] - }, - { - "id": "2837", - "name": "Add xt action with invalid index", - "category": [ - "actions", - "xt" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - [ - "$TC actions flush action xt", - 0, - 1, - 255 - ] - ], - "cmdUnderTest": "$TC action add action xt -j LOG --log-prefix WIN index 4294967296", - "expExitCode": "255", - "verifyCmd": "$TC action ls action xt", - "matchPattern": "action order [0-9]*:*target LOG level warning prefix \"WIN\"", - "matchCount": "0", - "teardown": [ - "$TC action flush action xt" - ] - } -] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json index 013fb983bc..725d406a30 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json @@ -52,17 +52,16 @@ ], "plugins": { "requires": [ - "buildebpfPlugin", "nsPlugin" ] }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action.o section action-ok", + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action-ebpf section action-ok", "expExitCode": "0", "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf", - "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action.o:\\[action-ok\\].*tag [0-9a-f]{16}( jited)?", + "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action-ebpf:\\[action-ok\\].*tag [0-9a-f]{16}( jited)?", "matchCount": "1", "teardown": [ "$TC qdisc del dev $DEV1 ingress" @@ -77,17 +76,16 @@ ], "plugins": { "requires": [ - "buildebpfPlugin", "nsPlugin" ] }, "setup": [ "$TC qdisc add dev $DEV1 ingress" ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action.o section action-ko", + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action-ebpf section action-ko", "expExitCode": "1", "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf", - "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action.o:\\[action-ko\\].*tag [0-9a-f]{16}( jited)?", + "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action-ebpf:\\[action-ko\\].*tag [0-9a-f]{16}( jited)?", "matchCount": "0", "teardown": [ "$TC qdisc del dev $DEV1 ingress" diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json index c2a433a473..6b08c06420 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json @@ -173,5 +173,103 @@ "$TC qdisc del dev $DEV2 ingress", "/bin/rm -rf $BATCH_DIR" ] + }, + { + "id": "2ff3", + "name": "Add flower with max handle and then dump it", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress handle 0xffffffff flower action ok", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV2 ingress", + "matchPattern": "filter protocol ip pref 1 flower.*handle 0xffffffff", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress" + ] + }, + { + "id": "d052", + "name": "Add 1M filters with the same action", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress", + "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000" + ], + "cmdUnderTest": "$TC -b $BATCH_FILE", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order 0: gact action drop.*index 1 ref 1000000 bind 1000000", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress", + "/bin/rm $BATCH_FILE" + ] + }, + { + "id": "4cbd", + "name": "Try to add filter with duplicate key", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress", + "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop" + ], + "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop", + "expExitCode": "2", + "verifyCmd": "$TC -s filter show dev $DEV2 ingress", + "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress" + ] + }, + { + "id": "7c65", + "name": "Add flower filter and then terse dump it", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", + "expExitCode": "0", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", + "matchPattern": "filter protocol ip pref 1 flower.*handle", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress" + ] + }, + { + "id": "d45e", + "name": "Add flower filter and verify that terse dump doesn't output filter key", + "category": [ + "filter", + "flower" + ], + "setup": [ + "$TC qdisc add dev $DEV2 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", + "expExitCode": "0", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", + "matchPattern": " dst_mac e4:11:22:11:4a:51", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV2 ingress" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json index afa1b9b0c8..f8d28c415b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json @@ -480,5 +480,28 @@ "$TC qdisc del dev $DUMMY ingress", "$TC actions del action police index 199" ] + }, + { + "id": "2638", + "name": "Add matchall and try to get it", + "category": [ + "filter", + "matchall" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok" + ], + "cmdUnderTest": "$TC filter get dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "filter protocol all pref 1 matchall chain 0 handle 0x1234", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 clsact" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json deleted file mode 100644 index 361235ad57..0000000000 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ /dev/null @@ -1,129 +0,0 @@ -[ - { - "id": "2638", - "name": "Add matchall and try to get it", - "category": [ - "filter", - "matchall" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 clsact", - "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok" - ], - "cmdUnderTest": "$TC filter get dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall", - "expExitCode": "0", - "verifyCmd": "$TC filter show dev $DEV1 ingress", - "matchPattern": "filter protocol all pref 1 matchall chain 0 handle 0x1234", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 clsact" - ] - }, - { - "id": "2ff3", - "name": "Add flower with max handle and then dump it", - "category": [ - "filter", - "flower" - ], - "setup": [ - "$TC qdisc add dev $DEV2 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress handle 0xffffffff flower action ok", - "expExitCode": "0", - "verifyCmd": "$TC filter show dev $DEV2 ingress", - "matchPattern": "filter protocol ip pref 1 flower.*handle 0xffffffff", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress" - ] - }, - { - "id": "d052", - "name": "Add 1M filters with the same action", - "category": [ - "filter", - "flower" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV2 ingress", - "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000" - ], - "cmdUnderTest": "$TC -b $BATCH_FILE", - "expExitCode": "0", - "verifyCmd": "$TC actions list action gact", - "matchPattern": "action order 0: gact action drop.*index 1 ref 1000000 bind 1000000", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress", - "/bin/rm $BATCH_FILE" - ] - }, - { - "id": "4cbd", - "name": "Try to add filter with duplicate key", - "category": [ - "filter", - "flower" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV2 ingress", - "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop" - ], - "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop", - "expExitCode": "2", - "verifyCmd": "$TC -s filter show dev $DEV2 ingress", - "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress" - ] - }, - { - "id": "7c65", - "name": "Add flower filter and then terse dump it", - "category": [ - "filter", - "flower" - ], - "setup": [ - "$TC qdisc add dev $DEV2 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", - "expExitCode": "0", - "verifyCmd": "$TC -br filter show dev $DEV2 ingress", - "matchPattern": "filter protocol ip pref 1 flower.*handle", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress" - ] - }, - { - "id": "d45e", - "name": "Add flower filter and verify that terse dump doesn't output filter key", - "category": [ - "filter", - "flower" - ], - "setup": [ - "$TC qdisc add dev $DEV2 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", - "expExitCode": "0", - "verifyCmd": "$TC -br filter show dev $DEV2 ingress", - "matchPattern": " dst_mac e4:11:22:11:4a:51", - "matchCount": "0", - "teardown": [ - "$TC qdisc del dev $DEV2 ingress" - ] - } -] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json index ddc7c355be..24bd0c2a30 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json @@ -272,5 +272,62 @@ "teardown": [ "$TC qdisc del dev $DEV1 parent root drr" ] + }, + { + "id": "bd32", + "name": "Try to delete hashtable referenced by another u32 filter", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 1:" + ], + "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10: prio 2 handle 1: u32", + "expExitCode": "2", + "verifyCmd": "$TC filter show dev $DEV1", + "matchPattern": "protocol ip pref 2 u32 chain 0 fh 1:", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] + }, + { + "id": "4585", + "name": "Delete small tree of u32 hashtables and filters", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 2: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 3: u32 divisor 2", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 4: u32 divisor 1", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 1: match ip src any action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 2: match ip src any action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 2:", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 1:", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 4: match ip src any action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 3:", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 4:" + ], + "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10:", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1", + "matchPattern": "protocol ip pref 2 u32", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index a6718192af..caeacc6915 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -497,11 +497,6 @@ def prepare_run(pm, args, testlist): pm.call_post_suite(1) return emergency_exit_message - if args.verbose: - print('give test rig 2 seconds to stabilize') - - time.sleep(2) - def purge_run(pm, index): pm.call_post_suite(index) @@ -616,7 +611,7 @@ def test_runner_mp(pm, args, alltests): batches.insert(0, serial) print("Executing {} tests in parallel and {} in serial".format(len(parallel), len(serial))) - print("Using {} batches".format(len(batches))) + print("Using {} batches and {} workers".format(len(batches), args.mp)) # We can't pickle these objects so workaround them global mp_pm @@ -1017,12 +1012,17 @@ def main(): parser = pm.call_add_args(parser) (args, remaining) = parser.parse_known_args() args.NAMES = NAMES + args.mp = min(args.mp, 4) pm.set_args(args) check_default_settings(args, remaining, pm) if args.verbose > 2: print('args is {}'.format(args)) - set_operation_mode(pm, parser, args, remaining) + try: + set_operation_mode(pm, parser, args, remaining) + except KeyboardInterrupt: + # Cleanup on Ctrl-C + pm.call_post_suite(None) if __name__ == "__main__": main() diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index eb357bd792..c53ede8b73 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -1,7 +1,67 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -modprobe netdevsim -modprobe sch_teql -./tdc.py -c actions --nobuildebpf -./tdc.py -c qdisc +# If a module is required and was not compiled +# the test that requires it will fail anyways +try_modprobe() { + modprobe -q -R "$1" + if [ $? -ne 0 ]; then + echo "Module $1 not found... skipping." + else + modprobe "$1" + fi +} + +try_modprobe netdevsim +try_modprobe act_bpf +try_modprobe act_connmark +try_modprobe act_csum +try_modprobe act_ct +try_modprobe act_ctinfo +try_modprobe act_gact +try_modprobe act_gate +try_modprobe act_mirred +try_modprobe act_mpls +try_modprobe act_nat +try_modprobe act_pedit +try_modprobe act_police +try_modprobe act_sample +try_modprobe act_simple +try_modprobe act_skbedit +try_modprobe act_skbmod +try_modprobe act_tunnel_key +try_modprobe act_vlan +try_modprobe cls_basic +try_modprobe cls_bpf +try_modprobe cls_cgroup +try_modprobe cls_flow +try_modprobe cls_flower +try_modprobe cls_fw +try_modprobe cls_matchall +try_modprobe cls_route +try_modprobe cls_u32 +try_modprobe em_canid +try_modprobe em_cmp +try_modprobe em_ipset +try_modprobe em_ipt +try_modprobe em_meta +try_modprobe em_nbyte +try_modprobe em_text +try_modprobe em_u32 +try_modprobe sch_cake +try_modprobe sch_cbs +try_modprobe sch_choke +try_modprobe sch_codel +try_modprobe sch_drr +try_modprobe sch_etf +try_modprobe sch_ets +try_modprobe sch_fq +try_modprobe sch_fq_codel +try_modprobe sch_fq_pie +try_modprobe sch_gred +try_modprobe sch_hfsc +try_modprobe sch_hhf +try_modprobe sch_htb +try_modprobe sch_teql +./tdc.py -J`nproc` -c actions +./tdc.py -J`nproc` -c qdisc diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index d49dd3ffd0..c001dd7917 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -66,7 +66,7 @@ static int check_diff(struct timeval start, struct timeval end) diff = end.tv_usec - start.tv_usec; diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC; - if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { + if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { printf("Diff too high: %lld..", diff); return -1; } @@ -184,80 +184,71 @@ static int check_timer_create(int which) return 0; } -int remain; -__thread int got_signal; +static pthread_t ctd_thread; +static volatile int ctd_count, ctd_failed; -static void *distribution_thread(void *arg) +static void ctd_sighandler(int sig) { - while (__atomic_load_n(&remain, __ATOMIC_RELAXED)); - return NULL; + if (pthread_self() != ctd_thread) + ctd_failed = 1; + ctd_count--; } -static void distribution_handler(int nr) +static void *ctd_thread_func(void *arg) { - if (!__atomic_exchange_n(&got_signal, 1, __ATOMIC_RELAXED)) - __atomic_fetch_sub(&remain, 1, __ATOMIC_RELAXED); -} - -/* - * Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID - * timer signals. This primarily tests that the kernel does not favour any one. - */ -static int check_timer_distribution(void) -{ - int err, i; - timer_t id; - const int nthreads = 10; - pthread_t threads[nthreads]; struct itimerspec val = { .it_value.tv_sec = 0, .it_value.tv_nsec = 1000 * 1000, .it_interval.tv_sec = 0, .it_interval.tv_nsec = 1000 * 1000, }; + timer_t id; - remain = nthreads + 1; /* worker threads + this thread */ - signal(SIGALRM, distribution_handler); - err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id); - if (err < 0) { - ksft_perror("Can't create timer"); - return -1; - } - err = timer_settime(id, 0, &val, NULL); - if (err < 0) { - ksft_perror("Can't set timer"); - return -1; - } + /* 1/10 seconds to ensure the leader sleeps */ + usleep(10000); - for (i = 0; i < nthreads; i++) { - err = pthread_create(&threads[i], NULL, distribution_thread, - NULL); - if (err) { - ksft_print_msg("Can't create thread: %s (%d)\n", - strerror(errno), errno); - return -1; - } - } + ctd_count = 100; + if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id)) + return "Can't create timer\n"; + if (timer_settime(id, 0, &val, NULL)) + return "Can't set timer\n"; - /* Wait for all threads to receive the signal. */ - while (__atomic_load_n(&remain, __ATOMIC_RELAXED)); + while (ctd_count > 0 && !ctd_failed) + ; - for (i = 0; i < nthreads; i++) { - err = pthread_join(threads[i], NULL); - if (err) { - ksft_print_msg("Can't join thread: %s (%d)\n", - strerror(errno), errno); - return -1; - } - } + if (timer_delete(id)) + return "Can't delete timer\n"; - if (timer_delete(id)) { - ksft_perror("Can't delete timer"); - return -1; - } + return NULL; +} + +/* + * Test that only the running thread receives the timer signal. + */ +static int check_timer_distribution(void) +{ + const char *errmsg; - ksft_test_result_pass("check_timer_distribution\n"); + signal(SIGALRM, ctd_sighandler); + + errmsg = "Can't create thread\n"; + if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL)) + goto err; + + errmsg = "Can't join thread\n"; + if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg) + goto err; + + if (!ctd_failed) + ksft_test_result_pass("check signal distribution\n"); + else if (ksft_min_kernel_version(6, 3)) + ksft_test_result_fail("check signal distribution\n"); + else + ksft_test_result_skip("check signal distribution (old kernel)\n"); return 0; +err: + ksft_print_msg("%s", errmsg); + return -1; } int main(int argc, char **argv) diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c index 883ca85424..96d32fd65b 100644 --- a/tools/testing/selftests/vDSO/vdso_test_abi.c +++ b/tools/testing/selftests/vDSO/vdso_test_abi.c @@ -33,9 +33,20 @@ typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts); typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts); typedef time_t (*vdso_time_t)(time_t *t); -#define VDSO_TEST_PASS_MSG() "\n%s(): PASS\n", __func__ -#define VDSO_TEST_FAIL_MSG(x) "\n%s(): %s FAIL\n", __func__, x -#define VDSO_TEST_SKIP_MSG(x) "\n%s(): SKIP: Could not find %s\n", __func__, x +const char *vdso_clock_name[12] = { + "CLOCK_REALTIME", + "CLOCK_MONOTONIC", + "CLOCK_PROCESS_CPUTIME_ID", + "CLOCK_THREAD_CPUTIME_ID", + "CLOCK_MONOTONIC_RAW", + "CLOCK_REALTIME_COARSE", + "CLOCK_MONOTONIC_COARSE", + "CLOCK_BOOTTIME", + "CLOCK_REALTIME_ALARM", + "CLOCK_BOOTTIME_ALARM", + "CLOCK_SGI_CYCLE", + "CLOCK_TAI", +}; static void vdso_test_gettimeofday(void) { @@ -44,7 +55,8 @@ static void vdso_test_gettimeofday(void) (vdso_gettimeofday_t)vdso_sym(version, name[0]); if (!vdso_gettimeofday) { - ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[0])); + ksft_print_msg("Couldn't find %s\n", name[0]); + ksft_test_result_skip("%s\n", name[0]); return; } @@ -54,9 +66,9 @@ static void vdso_test_gettimeofday(void) if (ret == 0) { ksft_print_msg("The time is %lld.%06lld\n", (long long)tv.tv_sec, (long long)tv.tv_usec); - ksft_test_result_pass(VDSO_TEST_PASS_MSG()); + ksft_test_result_pass("%s\n", name[0]); } else { - ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[0])); + ksft_test_result_fail("%s\n", name[0]); } } @@ -67,7 +79,9 @@ static void vdso_test_clock_gettime(clockid_t clk_id) (vdso_clock_gettime_t)vdso_sym(version, name[1]); if (!vdso_clock_gettime) { - ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[1])); + ksft_print_msg("Couldn't find %s\n", name[1]); + ksft_test_result_skip("%s %s\n", name[1], + vdso_clock_name[clk_id]); return; } @@ -77,9 +91,11 @@ static void vdso_test_clock_gettime(clockid_t clk_id) if (ret == 0) { ksft_print_msg("The time is %lld.%06lld\n", (long long)ts.tv_sec, (long long)ts.tv_nsec); - ksft_test_result_pass(VDSO_TEST_PASS_MSG()); + ksft_test_result_pass("%s %s\n", name[1], + vdso_clock_name[clk_id]); } else { - ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[1])); + ksft_test_result_fail("%s %s\n", name[1], + vdso_clock_name[clk_id]); } } @@ -90,7 +106,8 @@ static void vdso_test_time(void) (vdso_time_t)vdso_sym(version, name[2]); if (!vdso_time) { - ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[2])); + ksft_print_msg("Couldn't find %s\n", name[2]); + ksft_test_result_skip("%s\n", name[2]); return; } @@ -99,9 +116,9 @@ static void vdso_test_time(void) if (ret > 0) { ksft_print_msg("The time in hours since January 1, 1970 is %lld\n", (long long)(ret / 3600)); - ksft_test_result_pass(VDSO_TEST_PASS_MSG()); + ksft_test_result_pass("%s\n", name[2]); } else { - ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[2])); + ksft_test_result_fail("%s\n", name[2]); } } @@ -114,7 +131,9 @@ static void vdso_test_clock_getres(clockid_t clk_id) (vdso_clock_getres_t)vdso_sym(version, name[3]); if (!vdso_clock_getres) { - ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[3])); + ksft_print_msg("Couldn't find %s\n", name[3]); + ksft_test_result_skip("%s %s\n", name[3], + vdso_clock_name[clk_id]); return; } @@ -137,34 +156,21 @@ static void vdso_test_clock_getres(clockid_t clk_id) clock_getres_fail++; if (clock_getres_fail > 0) { - ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[3])); + ksft_test_result_fail("%s %s\n", name[3], + vdso_clock_name[clk_id]); } else { - ksft_test_result_pass(VDSO_TEST_PASS_MSG()); + ksft_test_result_pass("%s %s\n", name[3], + vdso_clock_name[clk_id]); } } -const char *vdso_clock_name[12] = { - "CLOCK_REALTIME", - "CLOCK_MONOTONIC", - "CLOCK_PROCESS_CPUTIME_ID", - "CLOCK_THREAD_CPUTIME_ID", - "CLOCK_MONOTONIC_RAW", - "CLOCK_REALTIME_COARSE", - "CLOCK_MONOTONIC_COARSE", - "CLOCK_BOOTTIME", - "CLOCK_REALTIME_ALARM", - "CLOCK_BOOTTIME_ALARM", - "CLOCK_SGI_CYCLE", - "CLOCK_TAI", -}; - /* * This function calls vdso_test_clock_gettime and vdso_test_clock_getres * with different values for clock_id. */ static inline void vdso_test_clock(clockid_t clock_id) { - ksft_print_msg("\nclock_id: %s\n", vdso_clock_name[clock_id]); + ksft_print_msg("clock_id: %s\n", vdso_clock_name[clock_id]); vdso_test_clock_gettime(clock_id); @@ -181,14 +187,14 @@ int main(int argc, char **argv) ksft_set_plan(VDSO_TEST_PLAN); if (!sysinfo_ehdr) { - printf("AT_SYSINFO_EHDR is not present!\n"); + ksft_print_msg("AT_SYSINFO_EHDR is not present!\n"); return KSFT_SKIP; } version = versions[VDSO_VERSION]; name = (const char **)&names[VDSO_NAMES]; - printf("[vDSO kselftest] VDSO_VERSION: %s\n", version); + ksft_print_msg("[vDSO kselftest] VDSO_VERSION: %s\n", version); vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR)); diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 8f9b06d9ce..215b8150b7 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -817,7 +817,7 @@ static void run_test(struct testcases *test, int count) /* return 3 is not support LA57, the case should be skipped */ if (ret == 3) { - ksft_test_result_skip(t->msg); + ksft_test_result_skip("%s", t->msg); continue; } @@ -826,7 +826,7 @@ static void run_test(struct testcases *test, int count) else ret = !(t->expected); - ksft_test_result(ret, t->msg); + ksft_test_result(ret, "%s", t->msg); } } |