diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /tools/testing/selftests/arm64/fp | |
parent | Initial commit. (diff) | |
download | linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip |
Adding upstream version 6.1.76.upstream/6.1.76
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools/testing/selftests/arm64/fp')
28 files changed, 4897 insertions, 0 deletions
diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore new file mode 100644 index 000000000..df79d2966 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -0,0 +1,14 @@ +fp-pidbench +fp-stress +fpsimd-test +rdvl-sme +rdvl-sve +sve-probe-vls +sve-ptrace +sve-test +ssve-test +vec-syscfg +vlset +za-fork +za-ptrace +za-test diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile new file mode 100644 index 000000000..932ec8792 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: GPL-2.0 + +# A proper top_srcdir is needed by KSFT(lib.mk) +top_srcdir = $(realpath ../../../../../) + +CFLAGS += $(KHDR_INCLUDES) + +TEST_GEN_PROGS := fp-stress \ + sve-ptrace sve-probe-vls \ + vec-syscfg \ + za-fork za-ptrace +TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \ + rdvl-sme rdvl-sve \ + sve-test \ + ssve-test \ + za-test \ + vlset +TEST_PROGS_EXTENDED := fpsimd-stress sve-stress ssve-stress za-stress + +EXTRA_CLEAN += $(OUTPUT)/asm-utils.o $(OUTPUT)/rdvl.o $(OUTPUT)/za-fork-asm.o + +# Build with nolibc to avoid effects due to libc's clone() support +$(OUTPUT)/fp-pidbench: fp-pidbench.S $(OUTPUT)/asm-utils.o + $(CC) -nostdlib $^ -o $@ +$(OUTPUT)/fpsimd-test: fpsimd-test.S $(OUTPUT)/asm-utils.o + $(CC) -nostdlib $^ -o $@ +$(OUTPUT)/rdvl-sve: rdvl-sve.c $(OUTPUT)/rdvl.o +$(OUTPUT)/rdvl-sme: rdvl-sme.c $(OUTPUT)/rdvl.o +$(OUTPUT)/sve-ptrace: sve-ptrace.c +$(OUTPUT)/sve-probe-vls: sve-probe-vls.c $(OUTPUT)/rdvl.o +$(OUTPUT)/sve-test: sve-test.S $(OUTPUT)/asm-utils.o + $(CC) -nostdlib $^ -o $@ +$(OUTPUT)/ssve-test: sve-test.S $(OUTPUT)/asm-utils.o + $(CC) -DSSVE -nostdlib $^ -o $@ +$(OUTPUT)/vec-syscfg: vec-syscfg.c $(OUTPUT)/rdvl.o +$(OUTPUT)/vlset: vlset.c +$(OUTPUT)/za-fork: za-fork.c $(OUTPUT)/za-fork-asm.o + $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ + -include ../../../../include/nolibc/nolibc.h \ + -static -ffreestanding -Wall $^ -o $@ +$(OUTPUT)/za-ptrace: za-ptrace.c +$(OUTPUT)/za-test: za-test.S $(OUTPUT)/asm-utils.o + $(CC) -nostdlib $^ -o $@ + +include ../../lib.mk diff --git a/tools/testing/selftests/arm64/fp/README b/tools/testing/selftests/arm64/fp/README new file mode 100644 index 000000000..03e3dad86 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/README @@ -0,0 +1,100 @@ +This directory contains a mix of tests integrated with kselftest and +standalone stress tests. + +kselftest tests +=============== + +sve-probe-vls - Checks the SVE vector length enumeration interface +sve-ptrace - Checks the SVE ptrace interface + +Running the non-kselftest tests +=============================== + +sve-stress performs an SVE context switch stress test, as described +below. + +(The fpsimd-stress test works the same way; just substitute "fpsimd" for +"sve" in the following commands.) + + +The test runs until killed by the user. + +If no context switch error was detected, you will see output such as +the following: + +$ ./sve-stress +(wait for some time) +^C +Vector length: 512 bits +PID: 1573 +Terminated by signal 15, no error, iterations=9467, signals=1014 +Vector length: 512 bits +PID: 1575 +Terminated by signal 15, no error, iterations=9448, signals=1028 +Vector length: 512 bits +PID: 1577 +Terminated by signal 15, no error, iterations=9436, signals=1039 +Vector length: 512 bits +PID: 1579 +Terminated by signal 15, no error, iterations=9421, signals=1039 +Vector length: 512 bits +PID: 1581 +Terminated by signal 15, no error, iterations=9403, signals=1039 +Vector length: 512 bits +PID: 1583 +Terminated by signal 15, no error, iterations=9385, signals=1036 +Vector length: 512 bits +PID: 1585 +Terminated by signal 15, no error, iterations=9376, signals=1039 +Vector length: 512 bits +PID: 1587 +Terminated by signal 15, no error, iterations=9361, signals=1039 +Vector length: 512 bits +PID: 1589 +Terminated by signal 15, no error, iterations=9350, signals=1039 + + +If an error was detected, details of the mismatch will be printed +instead of "no error". + +Ideally, the test should be allowed to run for many minutes or hours +to maximise test coverage. + + +KVM stress testing +================== + +To try to reproduce the bugs that we have been observing, sve-stress +should be run in parallel in two KVM guests, while simultaneously +running on the host. + +1) Start 2 guests, using the following command for each: + +$ lkvm run --console=virtio -pconsole=hvc0 --sve Image + +(Depending on the hardware GIC implementation, you may also need +--irqchip=gicv3. New kvmtool defaults to that if appropriate, but I +can't remember whether my branch is new enough for that. Try without +the option first.) + +Kvmtool occupies the terminal until you kill it (Ctrl+A x), +or until the guest terminates. It is therefore recommended to run +each instance in separate terminal (use screen or ssh etc.) This +allows multiple guests to be run in parallel while running other +commands on the host. + +Within the guest, the host filesystem is accessible, mounted on /host. + +2) Run the sve-stress on *each* guest with the Vector-Length set to 32: +guest$ ./vlset --inherit 32 ./sve-stress + +3) Run the sve-stress on the host with the maximum Vector-Length: +host$ ./vlset --inherit --max ./sve-stress + + +Again, the test should be allowed to run for many minutes or hours to +maximise test coverage. + +If no error is detected, you will see output from each sve-stress +instance similar to that illustrated above; otherwise details of the +observed mismatches will be printed. diff --git a/tools/testing/selftests/arm64/fp/TODO b/tools/testing/selftests/arm64/fp/TODO new file mode 100644 index 000000000..44004e53d --- /dev/null +++ b/tools/testing/selftests/arm64/fp/TODO @@ -0,0 +1,7 @@ +- Test unsupported values in the ABIs. +- More coverage for ptrace: + - Get/set of FFR. + - Ensure ptraced processes actually see the register state visible through + the ptrace interface. + - Big endian. +- Test PR_SVE_VL_INHERIT after a double fork. diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h new file mode 100644 index 000000000..757b2fd75 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/asm-offsets.h @@ -0,0 +1,12 @@ +#define sa_sz 32 +#define sa_flags 8 +#define sa_handler 0 +#define sa_mask_sz 8 +#define SIGUSR1 10 +#define SIGUSR2 12 +#define SIGTERM 15 +#define SIGINT 2 +#define SIGABRT 6 +#define SA_NODEFER 1073741824 +#define SA_SIGINFO 4 +#define ucontext_regs 184 diff --git a/tools/testing/selftests/arm64/fp/asm-utils.S b/tools/testing/selftests/arm64/fp/asm-utils.S new file mode 100644 index 000000000..4b9728efc --- /dev/null +++ b/tools/testing/selftests/arm64/fp/asm-utils.S @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2021 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> +// +// Utility functions for assembly code. + +#include <asm/unistd.h> +#include "assembler.h" + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction +.globl putc + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction +.globl puts + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction +.globl putdec + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction +.globl putdecn + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +.globl puthexb + +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction +.globl puthexnibble + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction +.globl dumphex + + // Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction +.globl memcpy + +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction +.globl memfill_ae + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction +.globl memclr + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction +.globl memfill diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h new file mode 100644 index 000000000..90bd433d2 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/assembler.h @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> + +#ifndef ASSEMBLER_H +#define ASSEMBLER_H + +.macro __for from:req, to:req + .if (\from) == (\to) + _for__body %\from + .else + __for \from, %(\from) + ((\to) - (\from)) / 2 + __for %(\from) + ((\to) - (\from)) / 2 + 1, \to + .endif +.endm + +.macro _for var:req, from:req, to:req, insn:vararg + .macro _for__body \var:req + .noaltmacro + \insn + .altmacro + .endm + + .altmacro + __for \from, \to + .noaltmacro + + .purgem _for__body +.endm + +.macro function name + .macro endfunction + .type \name, @function + .purgem endfunction + .endm +\name: +.endm + +.macro define_accessor name, num, insn + .macro \name\()_entry n + \insn \n, 1 + ret + .endm + +function \name + adr x2, .L__accessor_tbl\@ + add x2, x2, x0, lsl #3 + br x2 + +.L__accessor_tbl\@: + _for x, 0, (\num) - 1, \name\()_entry \x +endfunction + + .purgem \name\()_entry +.endm + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +#endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/fp/fp-pidbench.S b/tools/testing/selftests/arm64/fp/fp-pidbench.S new file mode 100644 index 000000000..16a436389 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fp-pidbench.S @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2021 ARM Limited. +// Original author: Mark Brown <broonie@kernel.org> +// +// Trivial syscall overhead benchmark. +// +// This is implemented in asm to ensure that we don't have any issues with +// system libraries using instructions that disrupt the test. + +#include <asm/unistd.h> +#include "assembler.h" + +.arch_extension sve + +.macro test_loop per_loop + mov x10, x20 + mov x8, #__NR_getpid + mrs x11, CNTVCT_EL0 +1: + \per_loop + svc #0 + sub x10, x10, #1 + cbnz x10, 1b + + mrs x12, CNTVCT_EL0 + sub x0, x12, x11 + bl putdec + puts "\n" +.endm + +// Main program entry point +.globl _start +function _start +_start: + puts "Iterations per test: " + mov x20, #10000 + lsl x20, x20, #8 + mov x0, x20 + bl putdec + puts "\n" + + // Test having never used SVE + puts "No SVE: " + test_loop + + // Check for SVE support - should use hwcap but that's hard in asm + mrs x0, ID_AA64PFR0_EL1 + ubfx x0, x0, #32, #4 + cbnz x0, 1f + puts "System does not support SVE\n" + b out +1: + + // Execute a SVE instruction + puts "SVE VL: " + rdvl x0, #8 + bl putdec + puts "\n" + + puts "SVE used once: " + test_loop + + // Use SVE per syscall + puts "SVE used per syscall: " + test_loop "rdvl x0, #8" + + // And we're done +out: + mov x0, #0 + mov x8, #__NR_exit + svc #0 diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c new file mode 100644 index 000000000..4e62a9199 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -0,0 +1,555 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 ARM Limited. + */ + +#define _GNU_SOURCE +#define _POSIX_C_SOURCE 199309L + +#include <errno.h> +#include <getopt.h> +#include <poll.h> +#include <signal.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/epoll.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/wait.h> +#include <asm/hwcap.h> + +#include "../../kselftest.h" + +#define MAX_VLS 16 + +struct child_data { + char *name, *output; + pid_t pid; + int stdout; + bool output_seen; + bool exited; + int exit_status; +}; + +static int epoll_fd; +static struct child_data *children; +static int num_children; +static bool terminate; + +static void drain_output(bool flush); + +static int num_processors(void) +{ + long nproc = sysconf(_SC_NPROCESSORS_CONF); + if (nproc < 0) { + perror("Unable to read number of processors\n"); + exit(EXIT_FAILURE); + } + + return nproc; +} + +static void child_start(struct child_data *child, const char *program) +{ + int ret, pipefd[2], i; + struct epoll_event ev; + + ret = pipe(pipefd); + if (ret != 0) + ksft_exit_fail_msg("Failed to create stdout pipe: %s (%d)\n", + strerror(errno), errno); + + child->pid = fork(); + if (child->pid == -1) + ksft_exit_fail_msg("fork() failed: %s (%d)\n", + strerror(errno), errno); + + if (!child->pid) { + /* + * In child, replace stdout with the pipe, errors to + * stderr from here as kselftest prints to stdout. + */ + ret = dup2(pipefd[1], 1); + if (ret == -1) { + fprintf(stderr, "dup2() %d\n", errno); + exit(EXIT_FAILURE); + } + + /* + * Very dumb mechanism to clean open FDs other than + * stdio. We don't want O_CLOEXEC for the pipes... + */ + for (i = 3; i < 8192; i++) + close(i); + + ret = execl(program, program, NULL); + fprintf(stderr, "execl(%s) failed: %d (%s)\n", + program, errno, strerror(errno)); + + exit(EXIT_FAILURE); + } else { + /* + * In parent, remember the child and close our copy of the + * write side of stdout. + */ + close(pipefd[1]); + child->stdout = pipefd[0]; + child->output = NULL; + child->exited = false; + child->output_seen = false; + + ev.events = EPOLLIN | EPOLLHUP; + ev.data.ptr = child; + + ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, child->stdout, &ev); + if (ret < 0) { + ksft_exit_fail_msg("%s EPOLL_CTL_ADD failed: %s (%d)\n", + child->name, strerror(errno), errno); + } + + /* + * Keep output flowing during child startup so logs + * are more timely, can help debugging. + */ + drain_output(false); + } +} + +static bool child_output_read(struct child_data *child) +{ + char read_data[1024]; + char work[1024]; + int ret, len, cur_work, cur_read; + + ret = read(child->stdout, read_data, sizeof(read_data)); + if (ret < 0) { + if (errno == EINTR) + return true; + + ksft_print_msg("%s: read() failed: %s (%d)\n", + child->name, strerror(errno), + errno); + return false; + } + len = ret; + + child->output_seen = true; + + /* Pick up any partial read */ + if (child->output) { + strncpy(work, child->output, sizeof(work) - 1); + cur_work = strnlen(work, sizeof(work)); + free(child->output); + child->output = NULL; + } else { + cur_work = 0; + } + + cur_read = 0; + while (cur_read < len) { + work[cur_work] = read_data[cur_read++]; + + if (work[cur_work] == '\n') { + work[cur_work] = '\0'; + ksft_print_msg("%s: %s\n", child->name, work); + cur_work = 0; + } else { + cur_work++; + } + } + + if (cur_work) { + work[cur_work] = '\0'; + ret = asprintf(&child->output, "%s", work); + if (ret == -1) + ksft_exit_fail_msg("Out of memory\n"); + } + + return false; +} + +static void child_output(struct child_data *child, uint32_t events, + bool flush) +{ + bool read_more; + + if (events & EPOLLIN) { + do { + read_more = child_output_read(child); + } while (read_more); + } + + if (events & EPOLLHUP) { + close(child->stdout); + child->stdout = -1; + flush = true; + } + + if (flush && child->output) { + ksft_print_msg("%s: %s<EOF>\n", child->name, child->output); + free(child->output); + child->output = NULL; + } +} + +static void child_tickle(struct child_data *child) +{ + if (child->output_seen && !child->exited) + kill(child->pid, SIGUSR2); +} + +static void child_stop(struct child_data *child) +{ + if (!child->exited) + kill(child->pid, SIGTERM); +} + +static void child_cleanup(struct child_data *child) +{ + pid_t ret; + int status; + bool fail = false; + + if (!child->exited) { + do { + ret = waitpid(child->pid, &status, 0); + if (ret == -1 && errno == EINTR) + continue; + + if (ret == -1) { + ksft_print_msg("waitpid(%d) failed: %s (%d)\n", + child->pid, strerror(errno), + errno); + fail = true; + break; + } + } while (!WIFEXITED(status)); + child->exit_status = WEXITSTATUS(status); + } + + if (!child->output_seen) { + ksft_print_msg("%s no output seen\n", child->name); + fail = true; + } + + if (child->exit_status != 0) { + ksft_print_msg("%s exited with error code %d\n", + child->name, child->exit_status); + fail = true; + } + + ksft_test_result(!fail, "%s\n", child->name); +} + +static void handle_child_signal(int sig, siginfo_t *info, void *context) +{ + int i; + bool found = false; + + for (i = 0; i < num_children; i++) { + if (children[i].pid == info->si_pid) { + children[i].exited = true; + children[i].exit_status = info->si_status; + found = true; + break; + } + } + + if (!found) + ksft_print_msg("SIGCHLD for unknown PID %d with status %d\n", + info->si_pid, info->si_status); +} + +static void handle_exit_signal(int sig, siginfo_t *info, void *context) +{ + int i; + + /* If we're already exiting then don't signal again */ + if (terminate) + return; + + ksft_print_msg("Got signal, exiting...\n"); + + terminate = true; + + /* + * This should be redundant, the main loop should clean up + * after us, but for safety stop everything we can here. + */ + for (i = 0; i < num_children; i++) + child_stop(&children[i]); +} + +static void start_fpsimd(struct child_data *child, int cpu, int copy) +{ + int ret; + + child_start(child, "./fpsimd-test"); + + ret = asprintf(&child->name, "FPSIMD-%d-%d", cpu, copy); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + + ksft_print_msg("Started %s\n", child->name); +} + +static void start_sve(struct child_data *child, int vl, int cpu) +{ + int ret; + + ret = prctl(PR_SVE_SET_VL, vl | PR_SVE_VL_INHERIT); + if (ret < 0) + ksft_exit_fail_msg("Failed to set SVE VL %d\n", vl); + + child_start(child, "./sve-test"); + + ret = asprintf(&child->name, "SVE-VL-%d-%d", vl, cpu); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + + ksft_print_msg("Started %s\n", child->name); +} + +static void start_ssve(struct child_data *child, int vl, int cpu) +{ + int ret; + + ret = prctl(PR_SME_SET_VL, vl | PR_SME_VL_INHERIT); + if (ret < 0) + ksft_exit_fail_msg("Failed to set SME VL %d\n", ret); + + child_start(child, "./ssve-test"); + + ret = asprintf(&child->name, "SSVE-VL-%d-%d", vl, cpu); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + + ksft_print_msg("Started %s\n", child->name); +} + +static void start_za(struct child_data *child, int vl, int cpu) +{ + int ret; + + ret = prctl(PR_SME_SET_VL, vl | PR_SVE_VL_INHERIT); + if (ret < 0) + ksft_exit_fail_msg("Failed to set SME VL %d\n", ret); + + child_start(child, "./za-test"); + + ret = asprintf(&child->name, "ZA-VL-%d-%d", vl, cpu); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + + ksft_print_msg("Started %s\n", child->name); +} + +static void probe_vls(int vls[], int *vl_count, int set_vl) +{ + unsigned int vq; + int vl; + + *vl_count = 0; + + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(set_vl, vq * 16); + if (vl == -1) + ksft_exit_fail_msg("SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + vl &= PR_SVE_VL_LEN_MASK; + + vq = sve_vq_from_vl(vl); + + vls[*vl_count] = vl; + *vl_count += 1; + } +} + +/* Handle any pending output without blocking */ +static void drain_output(bool flush) +{ + struct epoll_event ev; + int ret = 1; + + while (ret > 0) { + ret = epoll_wait(epoll_fd, &ev, 1, 0); + if (ret < 0) { + if (errno == EINTR) + continue; + ksft_print_msg("epoll_wait() failed: %s (%d)\n", + strerror(errno), errno); + } + + if (ret == 1) + child_output(ev.data.ptr, ev.events, flush); + } +} + +static const struct option options[] = { + { "timeout", required_argument, NULL, 't' }, + { } +}; + +int main(int argc, char **argv) +{ + int ret; + int timeout = 10; + int cpus, tests, i, j, c; + int sve_vl_count, sme_vl_count, fpsimd_per_cpu; + int sve_vls[MAX_VLS], sme_vls[MAX_VLS]; + struct epoll_event ev; + struct sigaction sa; + + while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) { + switch (c) { + case 't': + ret = sscanf(optarg, "%d", &timeout); + if (ret != 1) + ksft_exit_fail_msg("Failed to parse timeout %s\n", + optarg); + break; + default: + ksft_exit_fail_msg("Unknown argument\n"); + } + } + + cpus = num_processors(); + tests = 0; + + if (getauxval(AT_HWCAP) & HWCAP_SVE) { + probe_vls(sve_vls, &sve_vl_count, PR_SVE_SET_VL); + tests += sve_vl_count * cpus; + } else { + sve_vl_count = 0; + } + + if (getauxval(AT_HWCAP2) & HWCAP2_SME) { + probe_vls(sme_vls, &sme_vl_count, PR_SME_SET_VL); + tests += sme_vl_count * cpus * 2; + } else { + sme_vl_count = 0; + } + + /* Force context switching if we only have FPSIMD */ + if (!sve_vl_count && !sme_vl_count) + fpsimd_per_cpu = 2; + else + fpsimd_per_cpu = 1; + tests += cpus * fpsimd_per_cpu; + + ksft_print_header(); + ksft_set_plan(tests); + + ksft_print_msg("%d CPUs, %d SVE VLs, %d SME VLs\n", + cpus, sve_vl_count, sme_vl_count); + + if (timeout > 0) + ksft_print_msg("Will run for %ds\n", timeout); + else + ksft_print_msg("Will run until terminated\n"); + + children = calloc(sizeof(*children), tests); + if (!children) + ksft_exit_fail_msg("Unable to allocate child data\n"); + + ret = epoll_create1(EPOLL_CLOEXEC); + if (ret < 0) + ksft_exit_fail_msg("epoll_create1() failed: %s (%d)\n", + strerror(errno), ret); + epoll_fd = ret; + + /* Get signal handers ready before we start any children */ + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handle_exit_signal; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&sa.sa_mask); + ret = sigaction(SIGINT, &sa, NULL); + if (ret < 0) + ksft_print_msg("Failed to install SIGINT handler: %s (%d)\n", + strerror(errno), errno); + ret = sigaction(SIGTERM, &sa, NULL); + if (ret < 0) + ksft_print_msg("Failed to install SIGTERM handler: %s (%d)\n", + strerror(errno), errno); + sa.sa_sigaction = handle_child_signal; + ret = sigaction(SIGCHLD, &sa, NULL); + if (ret < 0) + ksft_print_msg("Failed to install SIGCHLD handler: %s (%d)\n", + strerror(errno), errno); + + for (i = 0; i < cpus; i++) { + for (j = 0; j < fpsimd_per_cpu; j++) + start_fpsimd(&children[num_children++], i, j); + + for (j = 0; j < sve_vl_count; j++) + start_sve(&children[num_children++], sve_vls[j], i); + + for (j = 0; j < sme_vl_count; j++) { + start_ssve(&children[num_children++], sme_vls[j], i); + start_za(&children[num_children++], sme_vls[j], i); + } + } + + for (;;) { + /* Did we get a signal asking us to exit? */ + if (terminate) + break; + + /* + * Timeout is counted in seconds with no output, the + * tests print during startup then are silent when + * running so this should ensure they all ran enough + * to install the signal handler, this is especially + * useful in emulation where we will both be slow and + * likely to have a large set of VLs. + */ + ret = epoll_wait(epoll_fd, &ev, 1, 1000); + if (ret < 0) { + if (errno == EINTR) + continue; + ksft_exit_fail_msg("epoll_wait() failed: %s (%d)\n", + strerror(errno), errno); + } + + /* Output? */ + if (ret == 1) { + child_output(ev.data.ptr, ev.events, false); + continue; + } + + /* Otherwise epoll_wait() timed out */ + + for (i = 0; i < num_children; i++) + child_tickle(&children[i]); + + /* Negative timeout means run indefinitely */ + if (timeout < 0) + continue; + if (--timeout == 0) + break; + } + + ksft_print_msg("Finishing up...\n"); + terminate = true; + + for (i = 0; i < tests; i++) + child_stop(&children[i]); + + drain_output(false); + + for (i = 0; i < tests; i++) + child_cleanup(&children[i]); + + drain_output(true); + + ksft_print_cnts(); + + return 0; +} diff --git a/tools/testing/selftests/arm64/fp/fpsimd-stress b/tools/testing/selftests/arm64/fp/fpsimd-stress new file mode 100755 index 000000000..781b5b022 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fpsimd-stress @@ -0,0 +1,60 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin <Dave.Martin@arm.com> + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT +trap child_died CHLD + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./fpsimd-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S new file mode 100644 index 000000000..918d04885 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> +// +// Simple FPSIMD context switch test +// Repeatedly writes unique test patterns into each FPSIMD register +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do fpsimd-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include <asm/unistd.h> +#include "assembler.h" +#include "asm-offsets.h" + +#define NVR 32 +#define MAXVL_B (128 / 8) + +.macro _vldr Vn:req, Xt:req + ld1 {v\Vn\().2d}, [x\Xt] +.endm + +.macro _vstr Vn:req, Xt:req + st1 {v\Vn\().2d}, [x\Xt] +.endm + +// Generate accessor functions to read/write programmatically selected +// FPSIMD registers. +// x0 is the register index to access +// x1 is the memory address to read from (getv,setp) or store to (setv,setp) +// All clobber x0-x2 +define_accessor setv, NVR, _vldr +define_accessor getv, NVR, _vstr + +// Declare some storate space to shadow the SVE register contents: +.pushsection .text +.data +.align 4 +vref: + .space MAXVL_B * NVR +scratch: + .space MAXVL_B +.popsection + +// Generate a test pattern for storage in SVE registers +// x0: pid (16 bits) +// x1: register number (6 bits) +// x2: generation (4 bits) +function pattern + orr w1, w0, w1, lsl #16 + orr w2, w1, w2, lsl #28 + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w2, [x0], #4 + add w2, w2, #(1 << 22) + subs w1, w1, #1 + bne 0b + + ret +endfunction + +// Get the address of shadow data for FPSIMD V-register V<xn> +.macro _adrv xd, xn, nrtmp + ldr \xd, =vref + mov x\nrtmp, #16 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a FPSIMD V-register +// x0: pid +// x1: register number +// x2: generation +function setup_vreg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrv x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setv + + ret x4 +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 1f + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne barf + subs x2, x2, #1 + b.ne 0b + +1: ret +endfunction + +// Verify that a FPSIMD V-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x5. +function check_vreg + mov x3, x30 + + _adrv x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getv + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Any SVE register modified here can cause corruption in the main +// thread -- but *only* the registers modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random V-regs + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #7 + movi v9.16b, #9 + movi v31.8b, #31 + + ret +endfunction + +function tickle_handler + // Increment the signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + mov x23, #0 // signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov w0, #SIGUSR2 + adr x1, tickle_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + // Sanity-check and report the vector length + + mov x19, #128 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "Bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "Vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + + mov x21, #0 // Set up V-regs & shadow with test pattern +0: mov x0, x20 + mov x1, x21 + and x2, x22, #0xf + bl setup_vreg + add x21, x21, #1 + cmp x21, #NVR + b.lo 0b + +// Can't do this when SVE state is volatile across SVC: + mov x8, #__NR_sched_yield // Encourage preemption + svc #0 + + mov x21, #0 +0: mov x0, x21 + bl check_vreg + add x21, x21, #1 + cmp x21, #NVR + b.lo 0b + + add x22, x22, #1 + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mismatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", reg=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction diff --git a/tools/testing/selftests/arm64/fp/rdvl-sme.c b/tools/testing/selftests/arm64/fp/rdvl-sme.c new file mode 100644 index 000000000..49b0b2e08 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/rdvl-sme.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <stdio.h> + +#include "rdvl.h" + +int main(void) +{ + int vl = rdvl_sme(); + + printf("%d\n", vl); + + return 0; +} diff --git a/tools/testing/selftests/arm64/fp/rdvl-sve.c b/tools/testing/selftests/arm64/fp/rdvl-sve.c new file mode 100644 index 000000000..7f8a13a18 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/rdvl-sve.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <stdio.h> + +#include "rdvl.h" + +int main(void) +{ + int vl = rdvl_sve(); + + printf("%d\n", vl); + + return 0; +} diff --git a/tools/testing/selftests/arm64/fp/rdvl.S b/tools/testing/selftests/arm64/fp/rdvl.S new file mode 100644 index 000000000..20dc29996 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/rdvl.S @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2021 ARM Limited. + +#include "sme-inst.h" + +.arch_extension sve + +.globl rdvl_sve +rdvl_sve: + hint 34 // BTI C + rdvl x0, #1 + ret + +.globl rdvl_sme +rdvl_sme: + hint 34 // BTI C + + rdsvl 0, 1 + + ret diff --git a/tools/testing/selftests/arm64/fp/rdvl.h b/tools/testing/selftests/arm64/fp/rdvl.h new file mode 100644 index 000000000..5d323679f --- /dev/null +++ b/tools/testing/selftests/arm64/fp/rdvl.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef RDVL_H +#define RDVL_H + +int rdvl_sme(void); +int rdvl_sve(void); + +#endif diff --git a/tools/testing/selftests/arm64/fp/sme-inst.h b/tools/testing/selftests/arm64/fp/sme-inst.h new file mode 100644 index 000000000..7191e53ca --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sme-inst.h @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2021-2 ARM Limited. +// Original author: Mark Brown <broonie@kernel.org> + +#ifndef SME_INST_H +#define SME_INST_H + +/* + * RDSVL X\nx, #\imm + */ +.macro rdsvl nx, imm + .inst 0x4bf5800 \ + | (\imm << 5) \ + | (\nx) +.endm + +.macro smstop + msr S0_3_C4_C6_3, xzr +.endm + +.macro smstart_za + msr S0_3_C4_C5_3, xzr +.endm + +.macro smstart_sm + msr S0_3_C4_C3_3, xzr +.endm + +/* + * LDR (vector to ZA array): + * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _ldr_za nw, nxbase, offset=0 + .inst 0xe1000000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +/* + * STR (vector from ZA array): + * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL] + */ +.macro _str_za nw, nxbase, offset=0 + .inst 0xe1200000 \ + | (((\nw) & 3) << 13) \ + | ((\nxbase) << 5) \ + | ((\offset) & 7) +.endm + +#endif diff --git a/tools/testing/selftests/arm64/fp/ssve-stress b/tools/testing/selftests/arm64/fp/ssve-stress new file mode 100644 index 000000000..e2bd2cc18 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/ssve-stress @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin <Dave.Martin@arm.com> + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./ssve-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c new file mode 100644 index 000000000..a24eca7a4 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2020 ARM Limited. + * Original author: Dave Martin <Dave.Martin@arm.com> + */ +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <asm/sigcontext.h> + +#include "../../kselftest.h" +#include "rdvl.h" + +int main(int argc, char **argv) +{ + unsigned int vq; + int vl; + static unsigned int vqs[SVE_VQ_MAX]; + unsigned int nvqs = 0; + + ksft_print_header(); + ksft_set_plan(2); + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + ksft_exit_skip("SVE not available\n"); + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(PR_SVE_SET_VL, vq * 16); + if (vl == -1) + ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + vl &= PR_SVE_VL_LEN_MASK; + + if (rdvl_sve() != vl) + ksft_exit_fail_msg("PR_SVE_SET_VL reports %d, RDVL %d\n", + vl, rdvl_sve()); + + if (!sve_vl_valid(vl)) + ksft_exit_fail_msg("VL %d invalid\n", vl); + vq = sve_vq_from_vl(vl); + + if (!(nvqs < SVE_VQ_MAX)) + ksft_exit_fail_msg("Too many VLs %u >= SVE_VQ_MAX\n", + nvqs); + vqs[nvqs++] = vq; + } + ksft_test_result_pass("Enumerated %d vector lengths\n", nvqs); + ksft_test_result_pass("All vector lengths valid\n"); + + /* Print out the vector lengths in ascending order: */ + while (nvqs--) + ksft_print_msg("%u\n", 16 * vqs[nvqs]); + + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c new file mode 100644 index 000000000..8c4847977 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -0,0 +1,754 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2021 ARM Limited. + * Original author: Dave Martin <Dave.Martin@arm.com> + */ +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/wait.h> +#include <asm/sigcontext.h> +#include <asm/ptrace.h> + +#include "../../kselftest.h" + +/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */ +#ifndef NT_ARM_SVE +#define NT_ARM_SVE 0x405 +#endif + +#ifndef NT_ARM_SSVE +#define NT_ARM_SSVE 0x40b +#endif + +struct vec_type { + const char *name; + unsigned long hwcap_type; + unsigned long hwcap; + int regset; + int prctl_set; +}; + +static const struct vec_type vec_types[] = { + { + .name = "SVE", + .hwcap_type = AT_HWCAP, + .hwcap = HWCAP_SVE, + .regset = NT_ARM_SVE, + .prctl_set = PR_SVE_SET_VL, + }, + { + .name = "Streaming SVE", + .hwcap_type = AT_HWCAP2, + .hwcap = HWCAP2_SME, + .regset = NT_ARM_SSVE, + .prctl_set = PR_SME_SET_VL, + }, +}; + +#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 4) +#define FLAG_TESTS 2 +#define FPSIMD_TESTS 2 + +#define EXPECTED_TESTS ((VL_TESTS + FLAG_TESTS + FPSIMD_TESTS) * ARRAY_SIZE(vec_types)) + +static void fill_buf(char *buf, size_t size) +{ + int i; + + for (i = 0; i < size; i++) + buf[i] = random(); +} + +static int do_child(void) +{ + if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) + ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + + if (raise(SIGSTOP)) + ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + + return EXIT_SUCCESS; +} + +static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd) +{ + struct iovec iov; + + iov.iov_base = fpsimd; + iov.iov_len = sizeof(*fpsimd); + return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov); +} + +static int set_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd) +{ + struct iovec iov; + + iov.iov_base = fpsimd; + iov.iov_len = sizeof(*fpsimd); + return ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov); +} + +static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type, + void **buf, size_t *size) +{ + struct user_sve_header *sve; + void *p; + size_t sz = sizeof *sve; + struct iovec iov; + + while (1) { + if (*size < sz) { + p = realloc(*buf, sz); + if (!p) { + errno = ENOMEM; + goto error; + } + + *buf = p; + *size = sz; + } + + iov.iov_base = *buf; + iov.iov_len = sz; + if (ptrace(PTRACE_GETREGSET, pid, type->regset, &iov)) + goto error; + + sve = *buf; + if (sve->size <= sz) + break; + + sz = sve->size; + } + + return sve; + +error: + return NULL; +} + +static int set_sve(pid_t pid, const struct vec_type *type, + const struct user_sve_header *sve) +{ + struct iovec iov; + + iov.iov_base = (void *)sve; + iov.iov_len = sve->size; + return ptrace(PTRACE_SETREGSET, pid, type->regset, &iov); +} + +/* Validate setting and getting the inherit flag */ +static void ptrace_set_get_inherit(pid_t child, const struct vec_type *type) +{ + struct user_sve_header sve; + struct user_sve_header *new_sve = NULL; + size_t new_sve_size = 0; + int ret; + + /* First set the flag */ + memset(&sve, 0, sizeof(sve)); + sve.size = sizeof(sve); + sve.vl = sve_vl_from_vq(SVE_VQ_MIN); + sve.flags = SVE_PT_VL_INHERIT; + ret = set_sve(child, type, &sve); + if (ret != 0) { + ksft_test_result_fail("Failed to set %s SVE_PT_VL_INHERIT\n", + type->name); + return; + } + + /* + * Read back the new register state and verify that we have + * set the flags we expected. + */ + if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) { + ksft_test_result_fail("Failed to read %s SVE flags\n", + type->name); + return; + } + + ksft_test_result(new_sve->flags & SVE_PT_VL_INHERIT, + "%s SVE_PT_VL_INHERIT set\n", type->name); + + /* Now clear */ + sve.flags &= ~SVE_PT_VL_INHERIT; + ret = set_sve(child, type, &sve); + if (ret != 0) { + ksft_test_result_fail("Failed to clear %s SVE_PT_VL_INHERIT\n", + type->name); + return; + } + + if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) { + ksft_test_result_fail("Failed to read %s SVE flags\n", + type->name); + return; + } + + ksft_test_result(!(new_sve->flags & SVE_PT_VL_INHERIT), + "%s SVE_PT_VL_INHERIT cleared\n", type->name); + + free(new_sve); +} + +/* Validate attempting to set the specfied VL via ptrace */ +static void ptrace_set_get_vl(pid_t child, const struct vec_type *type, + unsigned int vl, bool *supported) +{ + struct user_sve_header sve; + struct user_sve_header *new_sve = NULL; + size_t new_sve_size = 0; + int ret, prctl_vl; + + *supported = false; + + /* Check if the VL is supported in this process */ + prctl_vl = prctl(type->prctl_set, vl); + if (prctl_vl == -1) + ksft_exit_fail_msg("prctl(PR_%s_SET_VL) failed: %s (%d)\n", + type->name, strerror(errno), errno); + + /* If the VL is not supported then a supported VL will be returned */ + *supported = (prctl_vl == vl); + + /* Set the VL by doing a set with no register payload */ + memset(&sve, 0, sizeof(sve)); + sve.size = sizeof(sve); + sve.vl = vl; + ret = set_sve(child, type, &sve); + if (ret != 0) { + ksft_test_result_fail("Failed to set %s VL %u\n", + type->name, vl); + return; + } + + /* + * Read back the new register state and verify that we have the + * same VL that we got from prctl() on ourselves. + */ + if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) { + ksft_test_result_fail("Failed to read %s VL %u\n", + type->name, vl); + return; + } + + ksft_test_result(new_sve->vl = prctl_vl, "Set %s VL %u\n", + type->name, vl); + + free(new_sve); +} + +static void check_u32(unsigned int vl, const char *reg, + uint32_t *in, uint32_t *out, int *errors) +{ + if (*in != *out) { + printf("# VL %d %s wrote %x read %x\n", + vl, reg, *in, *out); + (*errors)++; + } +} + +/* Access the FPSIMD registers via the SVE regset */ +static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) +{ + void *svebuf; + struct user_sve_header *sve; + struct user_fpsimd_state *fpsimd, new_fpsimd; + unsigned int i, j; + unsigned char *p; + int ret; + + svebuf = malloc(SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD)); + if (!svebuf) { + ksft_test_result_fail("Failed to allocate FPSIMD buffer\n"); + return; + } + + memset(svebuf, 0, SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD)); + sve = svebuf; + sve->flags = SVE_PT_REGS_FPSIMD; + sve->size = SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD); + sve->vl = 16; /* We don't care what the VL is */ + + /* Try to set a known FPSIMD state via PT_REGS_SVE */ + fpsimd = (struct user_fpsimd_state *)((char *)sve + + SVE_PT_FPSIMD_OFFSET); + for (i = 0; i < 32; ++i) { + p = (unsigned char *)&fpsimd->vregs[i]; + + for (j = 0; j < sizeof(fpsimd->vregs[i]); ++j) + p[j] = j; + } + + ret = set_sve(child, type, sve); + ksft_test_result(ret == 0, "%s FPSIMD set via SVE: %d\n", + type->name, ret); + if (ret) + goto out; + + /* Verify via the FPSIMD regset */ + if (get_fpsimd(child, &new_fpsimd)) { + ksft_test_result_fail("get_fpsimd(): %s\n", + strerror(errno)); + goto out; + } + if (memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0) + ksft_test_result_pass("%s get_fpsimd() gave same state\n", + type->name); + else + ksft_test_result_fail("%s get_fpsimd() gave different state\n", + type->name); + +out: + free(svebuf); +} + +/* Validate attempting to set SVE data and read SVE data */ +static void ptrace_set_sve_get_sve_data(pid_t child, + const struct vec_type *type, + unsigned int vl) +{ + void *write_buf; + void *read_buf = NULL; + struct user_sve_header *write_sve; + struct user_sve_header *read_sve; + size_t read_sve_size = 0; + unsigned int vq = sve_vq_from_vl(vl); + int ret, i; + size_t data_size; + int errors = 0; + + data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); + write_buf = malloc(data_size); + if (!write_buf) { + ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n", + data_size, type->name, vl); + return; + } + write_sve = write_buf; + + /* Set up some data and write it out */ + memset(write_sve, 0, data_size); + write_sve->size = data_size; + write_sve->vl = vl; + write_sve->flags = SVE_PT_REGS_SVE; + + for (i = 0; i < __SVE_NUM_ZREGS; i++) + fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + SVE_PT_SVE_ZREG_SIZE(vq)); + + for (i = 0; i < __SVE_NUM_PREGS; i++) + fill_buf(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i), + SVE_PT_SVE_PREG_SIZE(vq)); + + fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE); + fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE); + + /* TODO: Generate a valid FFR pattern */ + + ret = set_sve(child, type, write_sve); + if (ret != 0) { + ksft_test_result_fail("Failed to set %s VL %u data\n", + type->name, vl); + goto out; + } + + /* Read the data back */ + if (!get_sve(child, type, (void **)&read_buf, &read_sve_size)) { + ksft_test_result_fail("Failed to read %s VL %u data\n", + type->name, vl); + goto out; + } + read_sve = read_buf; + + /* We might read more data if there's extensions we don't know */ + if (read_sve->size < write_sve->size) { + ksft_test_result_fail("%s wrote %d bytes, only read %d\n", + type->name, write_sve->size, + read_sve->size); + goto out_read; + } + + for (i = 0; i < __SVE_NUM_ZREGS; i++) { + if (memcmp(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + read_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + SVE_PT_SVE_ZREG_SIZE(vq)) != 0) { + printf("# Mismatch in %u Z%d\n", vl, i); + errors++; + } + } + + for (i = 0; i < __SVE_NUM_PREGS; i++) { + if (memcmp(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i), + read_buf + SVE_PT_SVE_PREG_OFFSET(vq, i), + SVE_PT_SVE_PREG_SIZE(vq)) != 0) { + printf("# Mismatch in %u P%d\n", vl, i); + errors++; + } + } + + check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), + read_buf + SVE_PT_SVE_FPSR_OFFSET(vq), &errors); + check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), + read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors); + + ksft_test_result(errors == 0, "Set and get %s data for VL %u\n", + type->name, vl); + +out_read: + free(read_buf); +out: + free(write_buf); +} + +/* Validate attempting to set SVE data and read it via the FPSIMD regset */ +static void ptrace_set_sve_get_fpsimd_data(pid_t child, + const struct vec_type *type, + unsigned int vl) +{ + void *write_buf; + struct user_sve_header *write_sve; + unsigned int vq = sve_vq_from_vl(vl); + struct user_fpsimd_state fpsimd_state; + int ret, i; + size_t data_size; + int errors = 0; + + if (__BYTE_ORDER == __BIG_ENDIAN) { + ksft_test_result_skip("Big endian not supported\n"); + return; + } + + data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); + write_buf = malloc(data_size); + if (!write_buf) { + ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n", + data_size, type->name, vl); + return; + } + write_sve = write_buf; + + /* Set up some data and write it out */ + memset(write_sve, 0, data_size); + write_sve->size = data_size; + write_sve->vl = vl; + write_sve->flags = SVE_PT_REGS_SVE; + + for (i = 0; i < __SVE_NUM_ZREGS; i++) + fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + SVE_PT_SVE_ZREG_SIZE(vq)); + + fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE); + fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE); + + ret = set_sve(child, type, write_sve); + if (ret != 0) { + ksft_test_result_fail("Failed to set %s VL %u data\n", + type->name, vl); + goto out; + } + + /* Read the data back */ + if (get_fpsimd(child, &fpsimd_state)) { + ksft_test_result_fail("Failed to read %s VL %u FPSIMD data\n", + type->name, vl); + goto out; + } + + for (i = 0; i < __SVE_NUM_ZREGS; i++) { + __uint128_t tmp = 0; + + /* + * Z regs are stored endianness invariant, this won't + * work for big endian + */ + memcpy(&tmp, write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + sizeof(tmp)); + + if (tmp != fpsimd_state.vregs[i]) { + printf("# Mismatch in FPSIMD for %s VL %u Z%d\n", + type->name, vl, i); + errors++; + } + } + + check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), + &fpsimd_state.fpsr, &errors); + check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), + &fpsimd_state.fpcr, &errors); + + ksft_test_result(errors == 0, "Set and get FPSIMD data for %s VL %u\n", + type->name, vl); + +out: + free(write_buf); +} + +/* Validate attempting to set FPSIMD data and read it via the SVE regset */ +static void ptrace_set_fpsimd_get_sve_data(pid_t child, + const struct vec_type *type, + unsigned int vl) +{ + void *read_buf = NULL; + unsigned char *p; + struct user_sve_header *read_sve; + unsigned int vq = sve_vq_from_vl(vl); + struct user_fpsimd_state write_fpsimd; + int ret, i, j; + size_t read_sve_size = 0; + size_t expected_size; + int errors = 0; + + if (__BYTE_ORDER == __BIG_ENDIAN) { + ksft_test_result_skip("Big endian not supported\n"); + return; + } + + for (i = 0; i < 32; ++i) { + p = (unsigned char *)&write_fpsimd.vregs[i]; + + for (j = 0; j < sizeof(write_fpsimd.vregs[i]); ++j) + p[j] = j; + } + + ret = set_fpsimd(child, &write_fpsimd); + if (ret != 0) { + ksft_test_result_fail("Failed to set FPSIMD state: %d\n)", + ret); + return; + } + + if (!get_sve(child, type, (void **)&read_buf, &read_sve_size)) { + ksft_test_result_fail("Failed to read %s VL %u data\n", + type->name, vl); + return; + } + read_sve = read_buf; + + if (read_sve->vl != vl) { + ksft_test_result_fail("Child VL != expected VL %d\n", + read_sve->vl, vl); + goto out; + } + + /* The kernel may return either SVE or FPSIMD format */ + switch (read_sve->flags & SVE_PT_REGS_MASK) { + case SVE_PT_REGS_FPSIMD: + expected_size = SVE_PT_FPSIMD_SIZE(vq, SVE_PT_REGS_FPSIMD); + if (read_sve_size < expected_size) { + ksft_test_result_fail("Read %d bytes, expected %d\n", + read_sve_size, expected_size); + goto out; + } + + ret = memcmp(&write_fpsimd, read_buf + SVE_PT_FPSIMD_OFFSET, + sizeof(write_fpsimd)); + if (ret != 0) { + ksft_print_msg("Read FPSIMD data mismatch\n"); + errors++; + } + break; + + case SVE_PT_REGS_SVE: + expected_size = SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); + if (read_sve_size < expected_size) { + ksft_test_result_fail("Read %d bytes, expected %d\n", + read_sve_size, expected_size); + goto out; + } + + for (i = 0; i < __SVE_NUM_ZREGS; i++) { + __uint128_t tmp = 0; + + /* + * Z regs are stored endianness invariant, this won't + * work for big endian + */ + memcpy(&tmp, read_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i), + sizeof(tmp)); + + if (tmp != write_fpsimd.vregs[i]) { + ksft_print_msg("Mismatch in FPSIMD for %s VL %u Z%d/V%d\n", + type->name, vl, i, i); + errors++; + } + } + + check_u32(vl, "FPSR", &write_fpsimd.fpsr, + read_buf + SVE_PT_SVE_FPSR_OFFSET(vq), &errors); + check_u32(vl, "FPCR", &write_fpsimd.fpcr, + read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors); + break; + default: + ksft_print_msg("Unexpected regs type %d\n", + read_sve->flags & SVE_PT_REGS_MASK); + errors++; + break; + } + + ksft_test_result(errors == 0, "Set FPSIMD, read via SVE for %s VL %u\n", + type->name, vl); + +out: + free(read_buf); +} + +static int do_parent(pid_t child) +{ + int ret = EXIT_FAILURE; + pid_t pid; + int status, i; + siginfo_t si; + unsigned int vq, vl; + bool vl_supported; + + ksft_print_msg("Parent is %d, child is %d\n", getpid(), child); + + /* Attach to the child */ + while (1) { + int sig; + + pid = wait(&status); + if (pid == -1) { + perror("wait"); + goto error; + } + + /* + * This should never happen but it's hard to flag in + * the framework. + */ + if (pid != child) + continue; + + if (WIFEXITED(status) || WIFSIGNALED(status)) + ksft_exit_fail_msg("Child died unexpectedly\n"); + + if (!WIFSTOPPED(status)) + goto error; + + sig = WSTOPSIG(status); + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) { + if (errno == ESRCH) + goto disappeared; + + if (errno == EINVAL) { + sig = 0; /* bust group-stop */ + goto cont; + } + + ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n", + strerror(errno)); + goto error; + } + + if (sig == SIGSTOP && si.si_code == SI_TKILL && + si.si_pid == pid) + break; + + cont: + if (ptrace(PTRACE_CONT, pid, NULL, sig)) { + if (errno == ESRCH) + goto disappeared; + + ksft_test_result_fail("PTRACE_CONT: %s\n", + strerror(errno)); + goto error; + } + } + + for (i = 0; i < ARRAY_SIZE(vec_types); i++) { + /* FPSIMD via SVE regset */ + if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) { + ptrace_sve_fpsimd(child, &vec_types[i]); + } else { + ksft_test_result_skip("%s FPSIMD set via SVE\n", + vec_types[i].name); + ksft_test_result_skip("%s FPSIMD read\n", + vec_types[i].name); + } + + /* prctl() flags */ + if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) { + ptrace_set_get_inherit(child, &vec_types[i]); + } else { + ksft_test_result_skip("%s SVE_PT_VL_INHERIT set\n", + vec_types[i].name); + ksft_test_result_skip("%s SVE_PT_VL_INHERIT cleared\n", + vec_types[i].name); + } + + /* Step through every possible VQ */ + for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { + vl = sve_vl_from_vq(vq); + + /* First, try to set this vector length */ + if (getauxval(vec_types[i].hwcap_type) & + vec_types[i].hwcap) { + ptrace_set_get_vl(child, &vec_types[i], vl, + &vl_supported); + } else { + ksft_test_result_skip("%s get/set VL %d\n", + vec_types[i].name, vl); + vl_supported = false; + } + + /* If the VL is supported validate data set/get */ + if (vl_supported) { + ptrace_set_sve_get_sve_data(child, &vec_types[i], vl); + ptrace_set_sve_get_fpsimd_data(child, &vec_types[i], vl); + ptrace_set_fpsimd_get_sve_data(child, &vec_types[i], vl); + } else { + ksft_test_result_skip("%s set SVE get SVE for VL %d\n", + vec_types[i].name, vl); + ksft_test_result_skip("%s set SVE get FPSIMD for VL %d\n", + vec_types[i].name, vl); + ksft_test_result_skip("%s set FPSIMD get SVE for VL %d\n", + vec_types[i].name, vl); + } + } + } + + ret = EXIT_SUCCESS; + +error: + kill(child, SIGKILL); + +disappeared: + return ret; +} + +int main(void) +{ + int ret = EXIT_SUCCESS; + pid_t child; + + srandom(getpid()); + + ksft_print_header(); + ksft_set_plan(EXPECTED_TESTS); + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + ksft_exit_skip("SVE not available\n"); + + child = fork(); + if (!child) + return do_child(); + + if (do_parent(child)) + ret = EXIT_FAILURE; + + ksft_print_cnts(); + + return ret; +} diff --git a/tools/testing/selftests/arm64/fp/sve-stress b/tools/testing/selftests/arm64/fp/sve-stress new file mode 100755 index 000000000..24dd0922c --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-stress @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin <Dave.Martin@arm.com> + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./sve-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S new file mode 100644 index 000000000..2a18cb4c5 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -0,0 +1,556 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> +// +// Simple Scalable Vector Extension context switch test +// Repeatedly writes unique test patterns into each SVE register +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include <asm/unistd.h> +#include "assembler.h" +#include "asm-offsets.h" +#include "sme-inst.h" + +#define NZR 32 +#define NPR 16 +#define MAXVL_B (2048 / 8) + +.arch_extension sve + +.macro _sve_ldr_v zt, xn + ldr z\zt, [x\xn] +.endm + +.macro _sve_str_v zt, xn + str z\zt, [x\xn] +.endm + +.macro _sve_ldr_p pt, xn + ldr p\pt, [x\xn] +.endm + +.macro _sve_str_p pt, xn + str p\pt, [x\xn] +.endm + +// Generate accessor functions to read/write programmatically selected +// SVE registers. +// x0 is the register index to access +// x1 is the memory address to read from (getz,setp) or store to (setz,setp) +// All clobber x0-x2 +define_accessor setz, NZR, _sve_ldr_v +define_accessor getz, NZR, _sve_str_v +define_accessor setp, NPR, _sve_ldr_p +define_accessor getp, NPR, _sve_str_p + +// Declare some storate space to shadow the SVE register contents: +.pushsection .text +.data +.align 4 +zref: + .space MAXVL_B * NZR +pref: + .space MAXVL_B / 8 * NPR +ffrref: + .space MAXVL_B / 8 +scratch: + .space MAXVL_B +.popsection + +// Generate a test pattern for storage in SVE registers +// x0: pid (16 bits) +// x1: register number (6 bits) +// x2: generation (4 bits) + +// These values are used to constuct a 32-bit pattern that is repeated in the +// scratch buffer as many times as will fit: +// bits 31:28 generation number (increments once per test_loop) +// bits 27:22 32-bit lane index +// bits 21:16 register number +// bits 15: 0 pid + +function pattern + orr w1, w0, w1, lsl #16 + orr w2, w1, w2, lsl #28 + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w2, [x0], #4 + add w2, w2, #(1 << 22) + subs w1, w1, #1 + bne 0b + + ret +endfunction + +// Get the address of shadow data for SVE Z-register Z<xn> +.macro _adrz xd, xn, nrtmp + ldr \xd, =zref + rdvl x\nrtmp, #1 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Get the address of shadow data for SVE P-register P<xn - NZR> +.macro _adrp xd, xn, nrtmp + ldr \xd, =pref + rdvl x\nrtmp, #1 + lsr x\nrtmp, x\nrtmp, #3 + sub \xn, \xn, #NZR + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a SVE Z-register +// x0: pid +// x1: register number +// x2: generation +function setup_zreg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrz x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setz + + ret x4 +endfunction + +// Set up test pattern in a SVE P-register +// x0: pid +// x1: register number +// x2: generation +function setup_preg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrp x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setp + + ret x4 +endfunction + +// Set up test pattern in the FFR +// x0: pid +// x2: generation +// +// We need to generate a canonical FFR value, which consists of a number of +// low "1" bits, followed by a number of zeros. This gives us 17 unique values +// per 16 bits of FFR, so we create a 4 bit signature out of the PID and +// generation, and use that as the initial number of ones in the pattern. +// We fill the upper lanes of FFR with zeros. +// Beware: corrupts P0. +function setup_ffr +#ifndef SSVE + mov x4, x30 + + and w0, w0, #0x3 + bfi w0, w2, #2, #2 + mov w1, #1 + lsl w1, w1, w0 + sub w1, w1, #1 + + ldr x0, =ffrref + strh w1, [x0], 2 + rdvl x1, #1 + lsr x1, x1, #3 + sub x1, x1, #2 + bl memclr + + mov x0, #0 + ldr x1, =ffrref + bl setp + + wrffr p0.b + + ret x4 +#else + ret +#endif +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 2f + + stp x0, x1, [sp, #-0x20]! + str x2, [sp, #0x10] + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne 1f + subs x2, x2, #1 + b.ne 0b + +1: ldr x2, [sp, #0x10] + ldp x0, x1, [sp], #0x20 + b.ne barf + +2: ret +endfunction + +// Verify that a SVE Z-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x7. +function check_zreg + mov x3, x30 + + _adrz x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getz + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Verify that a SVE P-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x7. +function check_preg + mov x3, x30 + + _adrp x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getp + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Verify that the FFR matches its shadow in memory, else abort +// Beware -- corrupts P0. +// Clobbers x0-x5. +function check_ffr +#ifndef SSVE + mov x3, x30 + + ldr x4, =scratch + rdvl x5, #1 + lsr x5, x5, #3 + + mov x0, x4 + mov x1, x5 + bl memfill_ae + + rdffr p0.b + mov x0, #0 + mov x1, x4 + bl getp + + ldr x0, =ffrref + mov x1, x4 + mov x2, x5 + mov x30, x3 + b memcmp +#else + ret +#endif +endfunction + +// Any SVE register modified here can cause corruption in the main +// thread -- but *only* the registers modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random Z-regs + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #1 + movi v9.16b, #2 + movi v31.8b, #3 +#ifndef SSVE + // And P0 + rdffr p0.b + // And FFR + wrffr p15.b +#endif + + ret +endfunction + +function tickle_handler + // Increment the signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov w0, #SIGUSR2 + adr x1, tickle_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + +#ifdef SSVE + puts "Streaming mode " + smstart_sm +#endif + + // Sanity-check and report the vector length + + rdvl x19, #8 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "Bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "Vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + +#ifdef SSVE + smstart_sm // syscalls will have exited streaming mode +#endif + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + rdvl x0, #8 + cmp x0, x19 + b.ne vl_barf + + mov x21, #0 // Set up Z-regs & shadow with test pattern +0: mov x0, x20 + mov x1, x21 + and x2, x22, #0xf + bl setup_zreg + add x21, x21, #1 + cmp x21, #NZR + b.lo 0b + + mov x0, x20 // Set up FFR & shadow with test pattern + mov x1, #NZR + NPR + and x2, x22, #0xf + bl setup_ffr + +0: mov x0, x20 // Set up P-regs & shadow with test pattern + mov x1, x21 + and x2, x22, #0xf + bl setup_preg + add x21, x21, #1 + cmp x21, #NZR + NPR + b.lo 0b + +// Can't do this when SVE state is volatile across SVC: +// mov x8, #__NR_sched_yield // Encourage preemption +// svc #0 + + mov x21, #0 +0: mov x0, x21 + bl check_zreg + add x21, x21, #1 + cmp x21, #NZR + b.lo 0b + +0: mov x0, x21 + bl check_preg + add x21, x21, #1 + cmp x21, #NZR + NPR + b.lo 0b + + bl check_ffr + + add x22, x22, #1 + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// end hack + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mismatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", reg=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_getpid + svc #0 +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// ^ end of hack + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +// mov x8, #__NR_exit +// mov x1, #1 +// svc #0 +endfunction + +function vl_barf + mov x10, x0 + + puts "Bad active VL: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c new file mode 100644 index 000000000..9bcfcdc34 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -0,0 +1,670 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 ARM Limited. + * Original author: Mark Brown <broonie@kernel.org> + */ +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <asm/sigcontext.h> +#include <asm/hwcap.h> + +#include "../../kselftest.h" +#include "rdvl.h" + +#define ARCH_MIN_VL SVE_VL_MIN + +struct vec_data { + const char *name; + unsigned long hwcap_type; + unsigned long hwcap; + const char *rdvl_binary; + int (*rdvl)(void); + + int prctl_get; + int prctl_set; + const char *default_vl_file; + + int default_vl; + int min_vl; + int max_vl; +}; + + +static struct vec_data vec_data[] = { + { + .name = "SVE", + .hwcap_type = AT_HWCAP, + .hwcap = HWCAP_SVE, + .rdvl = rdvl_sve, + .rdvl_binary = "./rdvl-sve", + .prctl_get = PR_SVE_GET_VL, + .prctl_set = PR_SVE_SET_VL, + .default_vl_file = "/proc/sys/abi/sve_default_vector_length", + }, + { + .name = "SME", + .hwcap_type = AT_HWCAP2, + .hwcap = HWCAP2_SME, + .rdvl = rdvl_sme, + .rdvl_binary = "./rdvl-sme", + .prctl_get = PR_SME_GET_VL, + .prctl_set = PR_SME_SET_VL, + .default_vl_file = "/proc/sys/abi/sme_default_vector_length", + }, +}; + +static int stdio_read_integer(FILE *f, const char *what, int *val) +{ + int n = 0; + int ret; + + ret = fscanf(f, "%d%*1[\n]%n", val, &n); + if (ret < 1 || n < 1) { + ksft_print_msg("failed to parse integer from %s\n", what); + return -1; + } + + return 0; +} + +/* Start a new process and return the vector length it sees */ +static int get_child_rdvl(struct vec_data *data) +{ + FILE *out; + int pipefd[2]; + pid_t pid, child; + int read_vl, ret; + + ret = pipe(pipefd); + if (ret == -1) { + ksft_print_msg("pipe() failed: %d (%s)\n", + errno, strerror(errno)); + return -1; + } + + fflush(stdout); + + child = fork(); + if (child == -1) { + ksft_print_msg("fork() failed: %d (%s)\n", + errno, strerror(errno)); + close(pipefd[0]); + close(pipefd[1]); + return -1; + } + + /* Child: put vector length on the pipe */ + if (child == 0) { + /* + * Replace stdout with the pipe, errors to stderr from + * here as kselftest prints to stdout. + */ + ret = dup2(pipefd[1], 1); + if (ret == -1) { + fprintf(stderr, "dup2() %d\n", errno); + exit(EXIT_FAILURE); + } + + /* exec() a new binary which puts the VL on stdout */ + ret = execl(data->rdvl_binary, data->rdvl_binary, NULL); + fprintf(stderr, "execl(%s) failed: %d (%s)\n", + data->rdvl_binary, errno, strerror(errno)); + + exit(EXIT_FAILURE); + } + + close(pipefd[1]); + + /* Parent; wait for the exit status from the child & verify it */ + do { + pid = wait(&ret); + if (pid == -1) { + ksft_print_msg("wait() failed: %d (%s)\n", + errno, strerror(errno)); + close(pipefd[0]); + return -1; + } + } while (pid != child); + + assert(pid == child); + + if (!WIFEXITED(ret)) { + ksft_print_msg("child exited abnormally\n"); + close(pipefd[0]); + return -1; + } + + if (WEXITSTATUS(ret) != 0) { + ksft_print_msg("child returned error %d\n", + WEXITSTATUS(ret)); + close(pipefd[0]); + return -1; + } + + out = fdopen(pipefd[0], "r"); + if (!out) { + ksft_print_msg("failed to open child stdout\n"); + close(pipefd[0]); + return -1; + } + + ret = stdio_read_integer(out, "child", &read_vl); + fclose(out); + if (ret != 0) + return ret; + + return read_vl; +} + +static int file_read_integer(const char *name, int *val) +{ + FILE *f; + int ret; + + f = fopen(name, "r"); + if (!f) { + ksft_test_result_fail("Unable to open %s: %d (%s)\n", + name, errno, + strerror(errno)); + return -1; + } + + ret = stdio_read_integer(f, name, val); + fclose(f); + + return ret; +} + +static int file_write_integer(const char *name, int val) +{ + FILE *f; + + f = fopen(name, "w"); + if (!f) { + ksft_test_result_fail("Unable to open %s: %d (%s)\n", + name, errno, + strerror(errno)); + return -1; + } + + fprintf(f, "%d", val); + fclose(f); + + return 0; +} + +/* + * Verify that we can read the default VL via proc, checking that it + * is set in a freshly spawned child. + */ +static void proc_read_default(struct vec_data *data) +{ + int default_vl, child_vl, ret; + + ret = file_read_integer(data->default_vl_file, &default_vl); + if (ret != 0) + return; + + /* Is this the actual default seen by new processes? */ + child_vl = get_child_rdvl(data); + if (child_vl != default_vl) { + ksft_test_result_fail("%s is %d but child VL is %d\n", + data->default_vl_file, + default_vl, child_vl); + return; + } + + ksft_test_result_pass("%s default vector length %d\n", data->name, + default_vl); + data->default_vl = default_vl; +} + +/* Verify that we can write a minimum value and have it take effect */ +static void proc_write_min(struct vec_data *data) +{ + int ret, new_default, child_vl; + + if (geteuid() != 0) { + ksft_test_result_skip("Need to be root to write to /proc\n"); + return; + } + + ret = file_write_integer(data->default_vl_file, ARCH_MIN_VL); + if (ret != 0) + return; + + /* What was the new value? */ + ret = file_read_integer(data->default_vl_file, &new_default); + if (ret != 0) + return; + + /* Did it take effect in a new process? */ + child_vl = get_child_rdvl(data); + if (child_vl != new_default) { + ksft_test_result_fail("%s is %d but child VL is %d\n", + data->default_vl_file, + new_default, child_vl); + return; + } + + ksft_test_result_pass("%s minimum vector length %d\n", data->name, + new_default); + data->min_vl = new_default; + + file_write_integer(data->default_vl_file, data->default_vl); +} + +/* Verify that we can write a maximum value and have it take effect */ +static void proc_write_max(struct vec_data *data) +{ + int ret, new_default, child_vl; + + if (geteuid() != 0) { + ksft_test_result_skip("Need to be root to write to /proc\n"); + return; + } + + /* -1 is accepted by the /proc interface as the maximum VL */ + ret = file_write_integer(data->default_vl_file, -1); + if (ret != 0) + return; + + /* What was the new value? */ + ret = file_read_integer(data->default_vl_file, &new_default); + if (ret != 0) + return; + + /* Did it take effect in a new process? */ + child_vl = get_child_rdvl(data); + if (child_vl != new_default) { + ksft_test_result_fail("%s is %d but child VL is %d\n", + data->default_vl_file, + new_default, child_vl); + return; + } + + ksft_test_result_pass("%s maximum vector length %d\n", data->name, + new_default); + data->max_vl = new_default; + + file_write_integer(data->default_vl_file, data->default_vl); +} + +/* Can we read back a VL from prctl? */ +static void prctl_get(struct vec_data *data) +{ + int ret; + + ret = prctl(data->prctl_get); + if (ret == -1) { + ksft_test_result_fail("%s prctl() read failed: %d (%s)\n", + data->name, errno, strerror(errno)); + return; + } + + /* Mask out any flags */ + ret &= PR_SVE_VL_LEN_MASK; + + /* Is that what we can read back directly? */ + if (ret == data->rdvl()) + ksft_test_result_pass("%s current VL is %d\n", + data->name, ret); + else + ksft_test_result_fail("%s prctl() VL %d but RDVL is %d\n", + data->name, ret, data->rdvl()); +} + +/* Does the prctl let us set the VL we already have? */ +static void prctl_set_same(struct vec_data *data) +{ + int cur_vl = data->rdvl(); + int ret; + + ret = prctl(data->prctl_set, cur_vl); + if (ret < 0) { + ksft_test_result_fail("%s prctl set failed: %d (%s)\n", + data->name, errno, strerror(errno)); + return; + } + + ksft_test_result(cur_vl == data->rdvl(), + "%s set VL %d and have VL %d\n", + data->name, cur_vl, data->rdvl()); +} + +/* Can we set a new VL for this process? */ +static void prctl_set(struct vec_data *data) +{ + int ret; + + if (data->min_vl == data->max_vl) { + ksft_test_result_skip("%s only one VL supported\n", + data->name); + return; + } + + /* Try to set the minimum VL */ + ret = prctl(data->prctl_set, data->min_vl); + if (ret < 0) { + ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n", + data->name, data->min_vl, + errno, strerror(errno)); + return; + } + + if ((ret & PR_SVE_VL_LEN_MASK) != data->min_vl) { + ksft_test_result_fail("%s prctl set %d but return value is %d\n", + data->name, data->min_vl, data->rdvl()); + return; + } + + if (data->rdvl() != data->min_vl) { + ksft_test_result_fail("%s set %d but RDVL is %d\n", + data->name, data->min_vl, data->rdvl()); + return; + } + + /* Try to set the maximum VL */ + ret = prctl(data->prctl_set, data->max_vl); + if (ret < 0) { + ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n", + data->name, data->max_vl, + errno, strerror(errno)); + return; + } + + if ((ret & PR_SVE_VL_LEN_MASK) != data->max_vl) { + ksft_test_result_fail("%s prctl() set %d but return value is %d\n", + data->name, data->max_vl, data->rdvl()); + return; + } + + /* The _INHERIT flag should not be present when we read the VL */ + ret = prctl(data->prctl_get); + if (ret == -1) { + ksft_test_result_fail("%s prctl() read failed: %d (%s)\n", + data->name, errno, strerror(errno)); + return; + } + + if (ret & PR_SVE_VL_INHERIT) { + ksft_test_result_fail("%s prctl() reports _INHERIT\n", + data->name); + return; + } + + ksft_test_result_pass("%s prctl() set min/max\n", data->name); +} + +/* If we didn't request it a new VL shouldn't affect the child */ +static void prctl_set_no_child(struct vec_data *data) +{ + int ret, child_vl; + + if (data->min_vl == data->max_vl) { + ksft_test_result_skip("%s only one VL supported\n", + data->name); + return; + } + + ret = prctl(data->prctl_set, data->min_vl); + if (ret < 0) { + ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n", + data->name, data->min_vl, + errno, strerror(errno)); + return; + } + + /* Ensure the default VL is different */ + ret = file_write_integer(data->default_vl_file, data->max_vl); + if (ret != 0) + return; + + /* Check that the child has the default we just set */ + child_vl = get_child_rdvl(data); + if (child_vl != data->max_vl) { + ksft_test_result_fail("%s is %d but child VL is %d\n", + data->default_vl_file, + data->max_vl, child_vl); + return; + } + + ksft_test_result_pass("%s vector length used default\n", data->name); + + file_write_integer(data->default_vl_file, data->default_vl); +} + +/* If we didn't request it a new VL shouldn't affect the child */ +static void prctl_set_for_child(struct vec_data *data) +{ + int ret, child_vl; + + if (data->min_vl == data->max_vl) { + ksft_test_result_skip("%s only one VL supported\n", + data->name); + return; + } + + ret = prctl(data->prctl_set, data->min_vl | PR_SVE_VL_INHERIT); + if (ret < 0) { + ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n", + data->name, data->min_vl, + errno, strerror(errno)); + return; + } + + /* The _INHERIT flag should be present when we read the VL */ + ret = prctl(data->prctl_get); + if (ret == -1) { + ksft_test_result_fail("%s prctl() read failed: %d (%s)\n", + data->name, errno, strerror(errno)); + return; + } + if (!(ret & PR_SVE_VL_INHERIT)) { + ksft_test_result_fail("%s prctl() does not report _INHERIT\n", + data->name); + return; + } + + /* Ensure the default VL is different */ + ret = file_write_integer(data->default_vl_file, data->max_vl); + if (ret != 0) + return; + + /* Check that the child inherited our VL */ + child_vl = get_child_rdvl(data); + if (child_vl != data->min_vl) { + ksft_test_result_fail("%s is %d but child VL is %d\n", + data->default_vl_file, + data->min_vl, child_vl); + return; + } + + ksft_test_result_pass("%s vector length was inherited\n", data->name); + + file_write_integer(data->default_vl_file, data->default_vl); +} + +/* _ONEXEC takes effect only in the child process */ +static void prctl_set_onexec(struct vec_data *data) +{ + int ret, child_vl; + + if (data->min_vl == data->max_vl) { + ksft_test_result_skip("%s only one VL supported\n", + data->name); + return; + } + + /* Set a known value for the default and our current VL */ + ret = file_write_integer(data->default_vl_file, data->max_vl); + if (ret != 0) + return; + + ret = prctl(data->prctl_set, data->max_vl); + if (ret < 0) { + ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n", + data->name, data->min_vl, + errno, strerror(errno)); + return; + } + + /* Set a different value for the child to have on exec */ + ret = prctl(data->prctl_set, data->min_vl | PR_SVE_SET_VL_ONEXEC); + if (ret < 0) { + ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n", + data->name, data->min_vl, + errno, strerror(errno)); + return; + } + + /* Our current VL should stay the same */ + if (data->rdvl() != data->max_vl) { + ksft_test_result_fail("%s VL changed by _ONEXEC prctl()\n", + data->name); + return; + } + + /* Check that the child inherited our VL */ + child_vl = get_child_rdvl(data); + if (child_vl != data->min_vl) { + ksft_test_result_fail("Set %d _ONEXEC but child VL is %d\n", + data->min_vl, child_vl); + return; + } + + ksft_test_result_pass("%s vector length set on exec\n", data->name); + + file_write_integer(data->default_vl_file, data->default_vl); +} + +/* For each VQ verify that setting via prctl() does the right thing */ +static void prctl_set_all_vqs(struct vec_data *data) +{ + int ret, vq, vl, new_vl; + int errors = 0; + + if (!data->min_vl || !data->max_vl) { + ksft_test_result_skip("%s Failed to enumerate VLs, not testing VL setting\n", + data->name); + return; + } + + for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { + vl = sve_vl_from_vq(vq); + + /* Attempt to set the VL */ + ret = prctl(data->prctl_set, vl); + if (ret < 0) { + errors++; + ksft_print_msg("%s prctl set failed for %d: %d (%s)\n", + data->name, vl, + errno, strerror(errno)); + continue; + } + + new_vl = ret & PR_SVE_VL_LEN_MASK; + + /* Check that we actually have the reported new VL */ + if (data->rdvl() != new_vl) { + ksft_print_msg("Set %s VL %d but RDVL reports %d\n", + data->name, new_vl, data->rdvl()); + errors++; + } + + /* Was that the VL we asked for? */ + if (new_vl == vl) + continue; + + /* Should round up to the minimum VL if below it */ + if (vl < data->min_vl) { + if (new_vl != data->min_vl) { + ksft_print_msg("%s VL %d returned %d not minimum %d\n", + data->name, vl, new_vl, + data->min_vl); + errors++; + } + + continue; + } + + /* Should round down to maximum VL if above it */ + if (vl > data->max_vl) { + if (new_vl != data->max_vl) { + ksft_print_msg("%s VL %d returned %d not maximum %d\n", + data->name, vl, new_vl, + data->max_vl); + errors++; + } + + continue; + } + + /* Otherwise we should've rounded down */ + if (!(new_vl < vl)) { + ksft_print_msg("%s VL %d returned %d, did not round down\n", + data->name, vl, new_vl); + errors++; + + continue; + } + } + + ksft_test_result(errors == 0, "%s prctl() set all VLs, %d errors\n", + data->name, errors); +} + +typedef void (*test_type)(struct vec_data *); + +static const test_type tests[] = { + /* + * The default/min/max tests must be first and in this order + * to provide data for other tests. + */ + proc_read_default, + proc_write_min, + proc_write_max, + + prctl_get, + prctl_set_same, + prctl_set, + prctl_set_no_child, + prctl_set_for_child, + prctl_set_onexec, + prctl_set_all_vqs, +}; + +int main(void) +{ + int i, j; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(tests) * ARRAY_SIZE(vec_data)); + + for (i = 0; i < ARRAY_SIZE(vec_data); i++) { + struct vec_data *data = &vec_data[i]; + unsigned long supported; + + supported = getauxval(data->hwcap_type) & data->hwcap; + + for (j = 0; j < ARRAY_SIZE(tests); j++) { + if (supported) + tests[j](data); + else + ksft_test_result_skip("%s not supported\n", + data->name); + } + } + + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c new file mode 100644 index 000000000..76912a581 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/vlset.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2019 ARM Limited. + * Original author: Dave Martin <Dave.Martin@arm.com> + */ +#define _GNU_SOURCE +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <asm/hwcap.h> +#include <asm/sigcontext.h> + +static int inherit = 0; +static int no_inherit = 0; +static int force = 0; +static unsigned long vl; +static int set_ctl = PR_SVE_SET_VL; +static int get_ctl = PR_SVE_GET_VL; + +static const struct option options[] = { + { "force", no_argument, NULL, 'f' }, + { "inherit", no_argument, NULL, 'i' }, + { "max", no_argument, NULL, 'M' }, + { "no-inherit", no_argument, &no_inherit, 1 }, + { "sme", no_argument, NULL, 's' }, + { "help", no_argument, NULL, '?' }, + {} +}; + +static char const *program_name; + +static int parse_options(int argc, char **argv) +{ + int c; + char *rest; + + program_name = strrchr(argv[0], '/'); + if (program_name) + ++program_name; + else + program_name = argv[0]; + + while ((c = getopt_long(argc, argv, "Mfhi", options, NULL)) != -1) + switch (c) { + case 'M': vl = SVE_VL_MAX; break; + case 'f': force = 1; break; + case 'i': inherit = 1; break; + case 's': set_ctl = PR_SME_SET_VL; + get_ctl = PR_SME_GET_VL; + break; + case 0: break; + default: goto error; + } + + if (inherit && no_inherit) + goto error; + + if (!vl) { + /* vector length */ + if (optind >= argc) + goto error; + + errno = 0; + vl = strtoul(argv[optind], &rest, 0); + if (*rest) { + vl = ULONG_MAX; + errno = EINVAL; + } + if (vl == ULONG_MAX && errno) { + fprintf(stderr, "%s: %s: %s\n", + program_name, argv[optind], strerror(errno)); + goto error; + } + + ++optind; + } + + /* command */ + if (optind >= argc) + goto error; + + return 0; + +error: + fprintf(stderr, + "Usage: %s [-f | --force] " + "[-i | --inherit | --no-inherit] " + "{-M | --max | <vector length>} " + "<command> [<arguments> ...]\n", + program_name); + return -1; +} + +int main(int argc, char **argv) +{ + int ret = 126; /* same as sh(1) command-not-executable error */ + long flags; + char *path; + int t, e; + + if (parse_options(argc, argv)) + return 2; /* same as sh(1) builtin incorrect-usage */ + + if (vl & ~(vl & PR_SVE_VL_LEN_MASK)) { + fprintf(stderr, "%s: Invalid vector length %lu\n", + program_name, vl); + return 2; /* same as sh(1) builtin incorrect-usage */ + } + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { + fprintf(stderr, "%s: Scalable Vector Extension not present\n", + program_name); + + if (!force) + goto error; + + fputs("Going ahead anyway (--force): " + "This is a debug option. Don't rely on it.\n", + stderr); + } + + flags = PR_SVE_SET_VL_ONEXEC; + if (inherit) + flags |= PR_SVE_VL_INHERIT; + + t = prctl(set_ctl, vl | flags); + if (t < 0) { + fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n", + program_name, strerror(errno)); + goto error; + } + + t = prctl(get_ctl); + if (t == -1) { + fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n", + program_name, strerror(errno)); + goto error; + } + flags = PR_SVE_VL_LEN_MASK; + flags = t & ~flags; + + assert(optind < argc); + path = argv[optind]; + + execvp(path, &argv[optind]); + e = errno; + if (errno == ENOENT) + ret = 127; /* same as sh(1) not-found error */ + fprintf(stderr, "%s: %s: %s\n", program_name, path, strerror(e)); + +error: + return ret; /* same as sh(1) not-executable error */ +} diff --git a/tools/testing/selftests/arm64/fp/za-fork-asm.S b/tools/testing/selftests/arm64/fp/za-fork-asm.S new file mode 100644 index 000000000..2fafadd49 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/za-fork-asm.S @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2021 ARM Limited. + +#include "sme-inst.h" + +.arch_extension sve + +#define MAGIC 42 + +#define MAXVL 2048 +#define MAXVL_B (MAXVL / 8) + +.pushsection .text +.data +.align 4 +scratch: + .space MAXVL_B +.popsection + +.globl fork_test +fork_test: + smstart_za + + // For simplicity just set one word in one vector, other tests + // cover general data corruption issues. + ldr x0, =scratch + mov x1, #MAGIC + str x1, [x0] + mov w12, wzr + _ldr_za 12, 0 // ZA.H[W12] loaded from [X0] + + // Tail call into the C portion that does the fork & verify + b fork_test_c + +.globl verify_fork +verify_fork: + // SVCR should have ZA=1, SM=0 + mrs x0, S3_3_C4_C2_2 + and x1, x0, #3 + cmp x1, #2 + beq 1f + mov x0, xzr + b 100f +1: + + // ZA should still have the value we loaded + ldr x0, =scratch + mov w12, wzr + _str_za 12, 0 // ZA.H[W12] stored to [X0] + ldr x1, [x0] + cmp x1, #MAGIC + beq 2f + mov x0, xzr + b 100f + +2: + // All tests passed + mov x0, #1 +100: + ret + diff --git a/tools/testing/selftests/arm64/fp/za-fork.c b/tools/testing/selftests/arm64/fp/za-fork.c new file mode 100644 index 000000000..ff475c649 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/za-fork.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 ARM Limited. + * Original author: Mark Brown <broonie@kernel.org> + */ + +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/sched.h> +#include <linux/wait.h> + +#define EXPECTED_TESTS 1 + +static void putstr(const char *str) +{ + write(1, str, strlen(str)); +} + +static void putnum(unsigned int num) +{ + char c; + + if (num / 10) + putnum(num / 10); + + c = '0' + (num % 10); + write(1, &c, 1); +} + +static int tests_run; +static int tests_passed; +static int tests_failed; +static int tests_skipped; + +static void print_summary(void) +{ + if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS) + putstr("# UNEXPECTED TEST COUNT: "); + + putstr("# Totals: pass:"); + putnum(tests_passed); + putstr(" fail:"); + putnum(tests_failed); + putstr(" xfail:0 xpass:0 skip:"); + putnum(tests_skipped); + putstr(" error:0\n"); +} + +int fork_test(void); +int verify_fork(void); + +/* + * If we fork the value in the parent should be unchanged and the + * child should start with the same value. This is called from the + * fork_test() asm function. + */ +int fork_test_c(void) +{ + pid_t newpid, waiting; + int child_status, parent_result; + + newpid = fork(); + if (newpid == 0) { + /* In child */ + if (!verify_fork()) { + putstr("# ZA state invalid in child\n"); + exit(0); + } else { + exit(1); + } + } + if (newpid < 0) { + putstr("# fork() failed: -"); + putnum(-newpid); + putstr("\n"); + return 0; + } + + parent_result = verify_fork(); + if (!parent_result) + putstr("# ZA state invalid in parent\n"); + + for (;;) { + waiting = waitpid(newpid, &child_status, 0); + + if (waiting < 0) { + if (errno == EINTR) + continue; + putstr("# waitpid() failed: "); + putnum(errno); + putstr("\n"); + return 0; + } + if (waiting != newpid) { + putstr("# waitpid() returned wrong PID\n"); + return 0; + } + + if (!WIFEXITED(child_status)) { + putstr("# child did not exit\n"); + return 0; + } + + return WEXITSTATUS(child_status) && parent_result; + } +} + +#define run_test(name) \ + if (name()) { \ + tests_passed++; \ + } else { \ + tests_failed++; \ + putstr("not "); \ + } \ + putstr("ok "); \ + putnum(++tests_run); \ + putstr(" " #name "\n"); + +int main(int argc, char **argv) +{ + int ret, i; + + putstr("TAP version 13\n"); + putstr("1.."); + putnum(EXPECTED_TESTS); + putstr("\n"); + + putstr("# PID: "); + putnum(getpid()); + putstr("\n"); + + /* + * This test is run with nolibc which doesn't support hwcap and + * it's probably disproportionate to implement so instead check + * for the default vector length configuration in /proc. + */ + ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0); + if (ret >= 0) { + run_test(fork_test); + + } else { + putstr("# SME support not present\n"); + + for (i = 0; i < EXPECTED_TESTS; i++) { + putstr("ok "); + putnum(i); + putstr(" skipped\n"); + } + + tests_skipped += EXPECTED_TESTS; + } + + print_summary(); + + return 0; +} diff --git a/tools/testing/selftests/arm64/fp/za-ptrace.c b/tools/testing/selftests/arm64/fp/za-ptrace.c new file mode 100644 index 000000000..bf6158654 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/za-ptrace.c @@ -0,0 +1,356 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 ARM Limited. + */ +#include <errno.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/wait.h> +#include <asm/sigcontext.h> +#include <asm/ptrace.h> + +#include "../../kselftest.h" + +/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */ +#ifndef NT_ARM_ZA +#define NT_ARM_ZA 0x40c +#endif + +#define EXPECTED_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3) + +static void fill_buf(char *buf, size_t size) +{ + int i; + + for (i = 0; i < size; i++) + buf[i] = random(); +} + +static int do_child(void) +{ + if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) + ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + + if (raise(SIGSTOP)) + ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + + return EXIT_SUCCESS; +} + +static struct user_za_header *get_za(pid_t pid, void **buf, size_t *size) +{ + struct user_za_header *za; + void *p; + size_t sz = sizeof(*za); + struct iovec iov; + + while (1) { + if (*size < sz) { + p = realloc(*buf, sz); + if (!p) { + errno = ENOMEM; + goto error; + } + + *buf = p; + *size = sz; + } + + iov.iov_base = *buf; + iov.iov_len = sz; + if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZA, &iov)) + goto error; + + za = *buf; + if (za->size <= sz) + break; + + sz = za->size; + } + + return za; + +error: + return NULL; +} + +static int set_za(pid_t pid, const struct user_za_header *za) +{ + struct iovec iov; + + iov.iov_base = (void *)za; + iov.iov_len = za->size; + return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZA, &iov); +} + +/* Validate attempting to set the specfied VL via ptrace */ +static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported) +{ + struct user_za_header za; + struct user_za_header *new_za = NULL; + size_t new_za_size = 0; + int ret, prctl_vl; + + *supported = false; + + /* Check if the VL is supported in this process */ + prctl_vl = prctl(PR_SME_SET_VL, vl); + if (prctl_vl == -1) + ksft_exit_fail_msg("prctl(PR_SME_SET_VL) failed: %s (%d)\n", + strerror(errno), errno); + + /* If the VL is not supported then a supported VL will be returned */ + *supported = (prctl_vl == vl); + + /* Set the VL by doing a set with no register payload */ + memset(&za, 0, sizeof(za)); + za.size = sizeof(za); + za.vl = vl; + ret = set_za(child, &za); + if (ret != 0) { + ksft_test_result_fail("Failed to set VL %u\n", vl); + return; + } + + /* + * Read back the new register state and verify that we have the + * same VL that we got from prctl() on ourselves. + */ + if (!get_za(child, (void **)&new_za, &new_za_size)) { + ksft_test_result_fail("Failed to read VL %u\n", vl); + return; + } + + ksft_test_result(new_za->vl = prctl_vl, "Set VL %u\n", vl); + + free(new_za); +} + +/* Validate attempting to set no ZA data and read it back */ +static void ptrace_set_no_data(pid_t child, unsigned int vl) +{ + void *read_buf = NULL; + struct user_za_header write_za; + struct user_za_header *read_za; + size_t read_za_size = 0; + int ret; + + /* Set up some data and write it out */ + memset(&write_za, 0, sizeof(write_za)); + write_za.size = ZA_PT_ZA_OFFSET; + write_za.vl = vl; + + ret = set_za(child, &write_za); + if (ret != 0) { + ksft_test_result_fail("Failed to set VL %u no data\n", vl); + return; + } + + /* Read the data back */ + if (!get_za(child, (void **)&read_buf, &read_za_size)) { + ksft_test_result_fail("Failed to read VL %u no data\n", vl); + return; + } + read_za = read_buf; + + /* We might read more data if there's extensions we don't know */ + if (read_za->size < write_za.size) { + ksft_test_result_fail("VL %u wrote %d bytes, only read %d\n", + vl, write_za.size, read_za->size); + goto out_read; + } + + ksft_test_result(read_za->size == write_za.size, + "Disabled ZA for VL %u\n", vl); + +out_read: + free(read_buf); +} + +/* Validate attempting to set data and read it back */ +static void ptrace_set_get_data(pid_t child, unsigned int vl) +{ + void *write_buf; + void *read_buf = NULL; + struct user_za_header *write_za; + struct user_za_header *read_za; + size_t read_za_size = 0; + unsigned int vq = sve_vq_from_vl(vl); + int ret; + size_t data_size; + + data_size = ZA_PT_SIZE(vq); + write_buf = malloc(data_size); + if (!write_buf) { + ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n", + data_size, vl); + return; + } + write_za = write_buf; + + /* Set up some data and write it out */ + memset(write_za, 0, data_size); + write_za->size = data_size; + write_za->vl = vl; + + fill_buf(write_buf + ZA_PT_ZA_OFFSET, ZA_PT_ZA_SIZE(vq)); + + ret = set_za(child, write_za); + if (ret != 0) { + ksft_test_result_fail("Failed to set VL %u data\n", vl); + goto out; + } + + /* Read the data back */ + if (!get_za(child, (void **)&read_buf, &read_za_size)) { + ksft_test_result_fail("Failed to read VL %u data\n", vl); + goto out; + } + read_za = read_buf; + + /* We might read more data if there's extensions we don't know */ + if (read_za->size < write_za->size) { + ksft_test_result_fail("VL %u wrote %d bytes, only read %d\n", + vl, write_za->size, read_za->size); + goto out_read; + } + + ksft_test_result(memcmp(write_buf + ZA_PT_ZA_OFFSET, + read_buf + ZA_PT_ZA_OFFSET, + ZA_PT_ZA_SIZE(vq)) == 0, + "Data match for VL %u\n", vl); + +out_read: + free(read_buf); +out: + free(write_buf); +} + +static int do_parent(pid_t child) +{ + int ret = EXIT_FAILURE; + pid_t pid; + int status; + siginfo_t si; + unsigned int vq, vl; + bool vl_supported; + + /* Attach to the child */ + while (1) { + int sig; + + pid = wait(&status); + if (pid == -1) { + perror("wait"); + goto error; + } + + /* + * This should never happen but it's hard to flag in + * the framework. + */ + if (pid != child) + continue; + + if (WIFEXITED(status) || WIFSIGNALED(status)) + ksft_exit_fail_msg("Child died unexpectedly\n"); + + if (!WIFSTOPPED(status)) + goto error; + + sig = WSTOPSIG(status); + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) { + if (errno == ESRCH) + goto disappeared; + + if (errno == EINVAL) { + sig = 0; /* bust group-stop */ + goto cont; + } + + ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n", + strerror(errno)); + goto error; + } + + if (sig == SIGSTOP && si.si_code == SI_TKILL && + si.si_pid == pid) + break; + + cont: + if (ptrace(PTRACE_CONT, pid, NULL, sig)) { + if (errno == ESRCH) + goto disappeared; + + ksft_test_result_fail("PTRACE_CONT: %s\n", + strerror(errno)); + goto error; + } + } + + ksft_print_msg("Parent is %d, child is %d\n", getpid(), child); + + /* Step through every possible VQ */ + for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { + vl = sve_vl_from_vq(vq); + + /* First, try to set this vector length */ + ptrace_set_get_vl(child, vl, &vl_supported); + + /* If the VL is supported validate data set/get */ + if (vl_supported) { + ptrace_set_no_data(child, vl); + ptrace_set_get_data(child, vl); + } else { + ksft_test_result_skip("Disabled ZA for VL %u\n", vl); + ksft_test_result_skip("Get and set data for VL %u\n", + vl); + } + } + + ret = EXIT_SUCCESS; + +error: + kill(child, SIGKILL); + +disappeared: + return ret; +} + +int main(void) +{ + int ret = EXIT_SUCCESS; + pid_t child; + + srandom(getpid()); + + ksft_print_header(); + + if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) { + ksft_set_plan(1); + ksft_exit_skip("SME not available\n"); + } + + ksft_set_plan(EXPECTED_TESTS); + + child = fork(); + if (!child) + return do_child(); + + if (do_parent(child)) + ret = EXIT_FAILURE; + + ksft_print_cnts(); + + return ret; +} diff --git a/tools/testing/selftests/arm64/fp/za-stress b/tools/testing/selftests/arm64/fp/za-stress new file mode 100644 index 000000000..5ac386b55 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/za-stress @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin <Dave.Martin@arm.com> + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./za-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S new file mode 100644 index 000000000..53c54af65 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/za-test.S @@ -0,0 +1,398 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2021 ARM Limited. +// Original author: Mark Brown <broonie@kernel.org> +// +// Scalable Matrix Extension ZA context switch test +// Repeatedly writes unique test patterns into each ZA tile +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include <asm/unistd.h> +#include "assembler.h" +#include "asm-offsets.h" +#include "sme-inst.h" + +.arch_extension sve + +#define MAXVL 2048 +#define MAXVL_B (MAXVL / 8) + +// Declare some storage space to shadow ZA register contents and a +// scratch buffer for a vector. +.pushsection .text +.data +.align 4 +zaref: + .space MAXVL_B * MAXVL_B +scratch: + .space MAXVL_B +.popsection + +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + +// Generate a test pattern for storage in ZA +// x0: pid +// x1: row in ZA +// x2: generation + +// These values are used to constuct a 32-bit pattern that is repeated in the +// scratch buffer as many times as will fit: +// bits 31:28 generation number (increments once per test_loop) +// bits 27:16 pid +// bits 15: 8 row number +// bits 7: 0 32-bit lane index + +function pattern + mov w3, wzr + bfi w3, w0, #16, #12 // PID + bfi w3, w1, #8, #8 // Row + bfi w3, w2, #28, #4 // Generation + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w3, [x0], #4 + add w3, w3, #1 // Lane + subs w1, w1, #1 + b.ne 0b + + ret +endfunction + +// Get the address of shadow data for ZA horizontal vector xn +.macro _adrza xd, xn, nrtmp + ldr \xd, =zaref + rdsvl \nrtmp, 1 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a ZA horizontal vector +// x0: pid +// x1: row number +// x2: generation +function setup_za + mov x4, x30 + mov x12, x1 // Use x12 for vector select + + bl pattern // Get pattern in scratch buffer + _adrza x0, x12, 2 // Shadow buffer pointer to x0 and x5 + mov x5, x0 + ldr x1, =scratch + bl memcpy // length set up in x2 by _adrza + + _ldr_za 12, 5 // load vector w12 from pointer x5 + + ret x4 +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 2f + + stp x0, x1, [sp, #-0x20]! + str x2, [sp, #0x10] + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne 1f + subs x2, x2, #1 + b.ne 0b + +1: ldr x2, [sp, #0x10] + ldp x0, x1, [sp], #0x20 + b.ne barf + +2: ret +endfunction + +// Verify that a ZA vector matches its shadow in memory, else abort +// x0: row number +// Clobbers x0-x7 and x12. +function check_za + mov x3, x30 + + mov x12, x0 + _adrza x5, x0, 6 // pointer to expected value in x5 + mov x4, x0 + ldr x7, =scratch // x7 is scratch + + mov x0, x7 // Poison scratch + mov x1, x6 + bl memfill_ae + + _str_za 12, 7 // save vector w12 to pointer x7 + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Any SME register modified here can cause corruption in the main +// thread -- but *only* the locations modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random ZA data +#if 0 + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #1 + movi v9.16b, #2 + movi v31.8b, #3 +#endif + + ret +endfunction + +function tickle_handler + // Increment the signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + mov x23, #0 // signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov w0, #SIGUSR2 + adr x1, tickle_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + puts "Streaming mode " + smstart_za + + // Sanity-check and report the vector length + + rdsvl 19, 8 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + rdsvl 0, 8 + cmp x0, x19 + b.ne vl_barf + + rdsvl 21, 1 // Set up ZA & shadow with test pattern +0: mov x0, x20 + sub x1, x21, #1 + mov x2, x22 + bl setup_za + subs x21, x21, #1 + b.ne 0b + + mov x8, #__NR_sched_yield // encourage preemption +1: + svc #0 + + mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0 + and x1, x0, #3 + cmp x1, #2 + b.ne svcr_barf + + rdsvl 21, 1 // Verify that the data made it through + rdsvl 24, 1 // Verify that the data made it through +0: sub x0, x24, x21 + bl check_za + subs x21, x21, #1 + bne 0b + + add x22, x22, #1 // Everything still working + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// end hack + smstop + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mismatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", row=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_getpid + svc #0 +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// ^ end of hack + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +// mov x8, #__NR_exit +// mov x1, #1 +// svc #0 +endfunction + +function vl_barf + mov x10, x0 + + puts "Bad active VL: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction + +function svcr_barf + mov x10, x0 + + puts "Bad SVCR: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction |