From 9f0fc191371843c4fc000a226b0a26b6c059aacd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 19:40:19 +0200 Subject: Merging upstream version 6.7.7. Signed-off-by: Daniel Baumann --- samples/bpf/Makefile | 19 +- samples/bpf/asm_goto_workaround.h | 8 +- samples/bpf/syscall_tp_kern.c | 15 +- samples/kprobes/kretprobe_example.c | 2 +- samples/landlock/sandboxer.c | 115 ++++++- samples/vfio-mdev/mbochs.c | 2 +- samples/vfio-mdev/mdpy.c | 2 +- samples/vfio-mdev/mtty.c | 590 ++++++++++++++++++++++++++++++++++++ 8 files changed, 725 insertions(+), 28 deletions(-) (limited to 'samples') diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 4ccf423603..933f6c3fe6 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -150,6 +150,9 @@ always-y += ibumad_kern.o always-y += hbm_out_kern.o always-y += hbm_edt_kern.o +TPROGS_CFLAGS = $(TPROGS_USER_CFLAGS) +TPROGS_LDFLAGS = $(TPROGS_USER_LDFLAGS) + ifeq ($(ARCH), arm) # Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux # headers when arm instruction set identification is requested. @@ -169,12 +172,16 @@ endif TPROGS_CFLAGS += -Wall -O2 TPROGS_CFLAGS += -Wmissing-prototypes TPROGS_CFLAGS += -Wstrict-prototypes +TPROGS_CFLAGS += $(call try-run,\ + printf "int main() { return 0; }" |\ + $(CC) -Werror -fsanitize=bounds -x c - -o "$$TMP",-fsanitize=bounds,) TPROGS_CFLAGS += -I$(objtree)/usr/include TPROGS_CFLAGS += -I$(srctree)/tools/testing/selftests/bpf/ TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE) TPROGS_CFLAGS += -I$(srctree)/tools/include TPROGS_CFLAGS += -I$(srctree)/tools/perf +TPROGS_CFLAGS += -I$(srctree)/tools/lib TPROGS_CFLAGS += -DHAVE_ATTR_TEST=0 ifdef SYSROOT @@ -247,14 +254,15 @@ clean: $(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) # Fix up variables inherited from Kbuild that tools/ build system won't like $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ - LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(BPF_SAMPLES_PATH)/../../ \ + LDFLAGS="$(TPROGS_LDFLAGS)" srctree=$(BPF_SAMPLES_PATH)/../../ \ O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ $@ install_headers BPFTOOLDIR := $(TOOLS_PATH)/bpf/bpftool BPFTOOL_OUTPUT := $(abspath $(BPF_SAMPLES_PATH))/bpftool -BPFTOOL := $(BPFTOOL_OUTPUT)/bootstrap/bpftool -$(BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) | $(BPFTOOL_OUTPUT) +DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)/bootstrap/bpftool +BPFTOOL ?= $(DEFAULT_BPFTOOL) +$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) | $(BPFTOOL_OUTPUT) $(MAKE) -C $(BPFTOOLDIR) srctree=$(BPF_SAMPLES_PATH)/../../ \ OUTPUT=$(BPFTOOL_OUTPUT)/ bootstrap @@ -312,8 +320,11 @@ XDP_SAMPLE_CFLAGS += -Wall -O2 \ -I$(LIBBPF_INCLUDE) \ -I$(src)/../../tools/testing/selftests/bpf -$(obj)/$(XDP_SAMPLE): TPROGS_CFLAGS = $(XDP_SAMPLE_CFLAGS) +$(obj)/$(XDP_SAMPLE): TPROGS_CFLAGS = $(XDP_SAMPLE_CFLAGS) $(TPROGS_USER_CFLAGS) $(obj)/$(XDP_SAMPLE): $(src)/xdp_sample_user.h $(src)/xdp_sample_shared.h +# Override includes for trace_helpers.o because __must_check won't be defined +# in our include path. +$(obj)/$(TRACE_HELPERS): TPROGS_CFLAGS := $(TPROGS_CFLAGS) -D__must_check= -include $(BPF_SAMPLES_PATH)/Makefile.target diff --git a/samples/bpf/asm_goto_workaround.h b/samples/bpf/asm_goto_workaround.h index 7048bb3594..634e81d83e 100644 --- a/samples/bpf/asm_goto_workaround.h +++ b/samples/bpf/asm_goto_workaround.h @@ -4,14 +4,14 @@ #define __ASM_GOTO_WORKAROUND_H /* - * This will bring in asm_volatile_goto and asm_inline macro definitions + * This will bring in asm_goto_output and asm_inline macro definitions * if enabled by compiler and config options. */ #include -#ifdef asm_volatile_goto -#undef asm_volatile_goto -#define asm_volatile_goto(x...) asm volatile("invalid use of asm_volatile_goto") +#ifdef asm_goto_output +#undef asm_goto_output +#define asm_goto_output(x...) asm volatile("invalid use of asm_goto_output") #endif /* diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c index 090fecfe64..58fef969a6 100644 --- a/samples/bpf/syscall_tp_kern.c +++ b/samples/bpf/syscall_tp_kern.c @@ -4,6 +4,7 @@ #include #include +#if !defined(__aarch64__) struct syscalls_enter_open_args { unsigned long long unused; long syscall_nr; @@ -11,6 +12,7 @@ struct syscalls_enter_open_args { long flags; long mode; }; +#endif struct syscalls_exit_open_args { unsigned long long unused; @@ -18,6 +20,15 @@ struct syscalls_exit_open_args { long ret; }; +struct syscalls_enter_open_at_args { + unsigned long long unused; + long syscall_nr; + long long dfd; + long filename_ptr; + long flags; + long mode; +}; + struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, u32); @@ -54,14 +65,14 @@ int trace_enter_open(struct syscalls_enter_open_args *ctx) #endif SEC("tracepoint/syscalls/sys_enter_openat") -int trace_enter_open_at(struct syscalls_enter_open_args *ctx) +int trace_enter_open_at(struct syscalls_enter_open_at_args *ctx) { count(&enter_open_map); return 0; } SEC("tracepoint/syscalls/sys_enter_openat2") -int trace_enter_open_at2(struct syscalls_enter_open_args *ctx) +int trace_enter_open_at2(struct syscalls_enter_open_at_args *ctx) { count(&enter_open_map); return 0; diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c index cbf16542d8..ed79fd3d48 100644 --- a/samples/kprobes/kretprobe_example.c +++ b/samples/kprobes/kretprobe_example.c @@ -35,7 +35,7 @@ struct my_data { ktime_t entry_stamp; }; -/* Here we use the entry_hanlder to timestamp function entry */ +/* Here we use the entry_handler to timestamp function entry */ static int entry_handler(struct kretprobe_instance *ri, struct pt_regs *regs) { struct my_data *data; diff --git a/samples/landlock/sandboxer.c b/samples/landlock/sandboxer.c index e2056c8b90..08596c0ef0 100644 --- a/samples/landlock/sandboxer.c +++ b/samples/landlock/sandboxer.c @@ -8,6 +8,8 @@ */ #define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ +#include #include #include #include @@ -51,7 +53,9 @@ static inline int landlock_restrict_self(const int ruleset_fd, #define ENV_FS_RO_NAME "LL_FS_RO" #define ENV_FS_RW_NAME "LL_FS_RW" -#define ENV_PATH_TOKEN ":" +#define ENV_TCP_BIND_NAME "LL_TCP_BIND" +#define ENV_TCP_CONNECT_NAME "LL_TCP_CONNECT" +#define ENV_DELIMITER ":" static int parse_path(char *env_path, const char ***const path_list) { @@ -60,13 +64,13 @@ static int parse_path(char *env_path, const char ***const path_list) if (env_path) { num_paths++; for (i = 0; env_path[i]; i++) { - if (env_path[i] == ENV_PATH_TOKEN[0]) + if (env_path[i] == ENV_DELIMITER[0]) num_paths++; } } *path_list = malloc(num_paths * sizeof(**path_list)); for (i = 0; i < num_paths; i++) - (*path_list)[i] = strsep(&env_path, ENV_PATH_TOKEN); + (*path_list)[i] = strsep(&env_path, ENV_DELIMITER); return num_paths; } @@ -81,8 +85,8 @@ static int parse_path(char *env_path, const char ***const path_list) /* clang-format on */ -static int populate_ruleset(const char *const env_var, const int ruleset_fd, - const __u64 allowed_access) +static int populate_ruleset_fs(const char *const env_var, const int ruleset_fd, + const __u64 allowed_access) { int num_paths, i, ret = 1; char *env_path_name; @@ -143,6 +147,39 @@ out_free_name: return ret; } +static int populate_ruleset_net(const char *const env_var, const int ruleset_fd, + const __u64 allowed_access) +{ + int ret = 1; + char *env_port_name, *strport; + struct landlock_net_port_attr net_port = { + .allowed_access = allowed_access, + .port = 0, + }; + + env_port_name = getenv(env_var); + if (!env_port_name) + return 0; + env_port_name = strdup(env_port_name); + unsetenv(env_var); + + while ((strport = strsep(&env_port_name, ENV_DELIMITER))) { + net_port.port = atoi(strport); + if (landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, + &net_port, 0)) { + fprintf(stderr, + "Failed to update the ruleset with port \"%llu\": %s\n", + net_port.port, strerror(errno)); + goto out_free_name; + } + } + ret = 0; + +out_free_name: + free(env_port_name); + return ret; +} + /* clang-format off */ #define ACCESS_FS_ROUGHLY_READ ( \ @@ -166,39 +203,58 @@ out_free_name: /* clang-format on */ -#define LANDLOCK_ABI_LAST 3 +#define LANDLOCK_ABI_LAST 4 int main(const int argc, char *const argv[], char *const *const envp) { const char *cmd_path; char *const *cmd_argv; int ruleset_fd, abi; + char *env_port_name; __u64 access_fs_ro = ACCESS_FS_ROUGHLY_READ, access_fs_rw = ACCESS_FS_ROUGHLY_READ | ACCESS_FS_ROUGHLY_WRITE; + struct landlock_ruleset_attr ruleset_attr = { .handled_access_fs = access_fs_rw, + .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP, }; if (argc < 2) { fprintf(stderr, - "usage: %s=\"...\" %s=\"...\" %s [args]...\n\n", - ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]); + "usage: %s=\"...\" %s=\"...\" %s=\"...\" %s=\"...\"%s " + " [args]...\n\n", + ENV_FS_RO_NAME, ENV_FS_RW_NAME, ENV_TCP_BIND_NAME, + ENV_TCP_CONNECT_NAME, argv[0]); fprintf(stderr, "Launch a command in a restricted environment.\n\n"); - fprintf(stderr, "Environment variables containing paths, " - "each separated by a colon:\n"); + fprintf(stderr, + "Environment variables containing paths and ports " + "each separated by a colon:\n"); fprintf(stderr, "* %s: list of paths allowed to be used in a read-only way.\n", ENV_FS_RO_NAME); fprintf(stderr, - "* %s: list of paths allowed to be used in a read-write way.\n", + "* %s: list of paths allowed to be used in a read-write way.\n\n", ENV_FS_RW_NAME); + fprintf(stderr, + "Environment variables containing ports are optional " + "and could be skipped.\n"); + fprintf(stderr, + "* %s: list of ports allowed to bind (server).\n", + ENV_TCP_BIND_NAME); + fprintf(stderr, + "* %s: list of ports allowed to connect (client).\n", + ENV_TCP_CONNECT_NAME); fprintf(stderr, "\nexample:\n" "%s=\"/bin:/lib:/usr:/proc:/etc:/dev/urandom\" " "%s=\"/dev/null:/dev/full:/dev/zero:/dev/pts:/tmp\" " + "%s=\"9418\" " + "%s=\"80:443\" " "%s bash -i\n\n", - ENV_FS_RO_NAME, ENV_FS_RW_NAME, argv[0]); + ENV_FS_RO_NAME, ENV_FS_RW_NAME, ENV_TCP_BIND_NAME, + ENV_TCP_CONNECT_NAME, argv[0]); fprintf(stderr, "This sandboxer can use Landlock features " "up to ABI version %d.\n", @@ -255,7 +311,12 @@ int main(const int argc, char *const argv[], char *const *const envp) case 2: /* Removes LANDLOCK_ACCESS_FS_TRUNCATE for ABI < 3 */ ruleset_attr.handled_access_fs &= ~LANDLOCK_ACCESS_FS_TRUNCATE; - + __attribute__((fallthrough)); + case 3: + /* Removes network support for ABI < 4 */ + ruleset_attr.handled_access_net &= + ~(LANDLOCK_ACCESS_NET_BIND_TCP | + LANDLOCK_ACCESS_NET_CONNECT_TCP); fprintf(stderr, "Hint: You should update the running kernel " "to leverage Landlock features " @@ -274,18 +335,42 @@ int main(const int argc, char *const argv[], char *const *const envp) access_fs_ro &= ruleset_attr.handled_access_fs; access_fs_rw &= ruleset_attr.handled_access_fs; + /* Removes bind access attribute if not supported by a user. */ + env_port_name = getenv(ENV_TCP_BIND_NAME); + if (!env_port_name) { + ruleset_attr.handled_access_net &= + ~LANDLOCK_ACCESS_NET_BIND_TCP; + } + /* Removes connect access attribute if not supported by a user. */ + env_port_name = getenv(ENV_TCP_CONNECT_NAME); + if (!env_port_name) { + ruleset_attr.handled_access_net &= + ~LANDLOCK_ACCESS_NET_CONNECT_TCP; + } + ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); if (ruleset_fd < 0) { perror("Failed to create a ruleset"); return 1; } - if (populate_ruleset(ENV_FS_RO_NAME, ruleset_fd, access_fs_ro)) { + + if (populate_ruleset_fs(ENV_FS_RO_NAME, ruleset_fd, access_fs_ro)) { + goto err_close_ruleset; + } + if (populate_ruleset_fs(ENV_FS_RW_NAME, ruleset_fd, access_fs_rw)) { goto err_close_ruleset; } - if (populate_ruleset(ENV_FS_RW_NAME, ruleset_fd, access_fs_rw)) { + + if (populate_ruleset_net(ENV_TCP_BIND_NAME, ruleset_fd, + LANDLOCK_ACCESS_NET_BIND_TCP)) { + goto err_close_ruleset; + } + if (populate_ruleset_net(ENV_TCP_CONNECT_NAME, ruleset_fd, + LANDLOCK_ACCESS_NET_CONNECT_TCP)) { goto err_close_ruleset; } + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { perror("Failed to restrict privileges"); goto err_close_ruleset; diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c index 3764d1911b..93405264ff 100644 --- a/samples/vfio-mdev/mbochs.c +++ b/samples/vfio-mdev/mbochs.c @@ -1262,7 +1262,7 @@ static long mbochs_ioctl(struct vfio_device *vdev, unsigned int cmd, case VFIO_DEVICE_QUERY_GFX_PLANE: { - struct vfio_device_gfx_plane_info plane; + struct vfio_device_gfx_plane_info plane = {}; minsz = offsetofend(struct vfio_device_gfx_plane_info, region_index); diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c index 064e1c0a7a..72ea5832c9 100644 --- a/samples/vfio-mdev/mdpy.c +++ b/samples/vfio-mdev/mdpy.c @@ -591,7 +591,7 @@ static long mdpy_ioctl(struct vfio_device *vdev, unsigned int cmd, case VFIO_DEVICE_QUERY_GFX_PLANE: { - struct vfio_device_gfx_plane_info plane; + struct vfio_device_gfx_plane_info plane = {}; minsz = offsetofend(struct vfio_device_gfx_plane_info, region_index); diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index 245db52bed..69ba0281f9 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -29,6 +29,8 @@ #include #include #include +#include + /* * #defines */ @@ -124,6 +126,29 @@ struct serial_port { u8 intr_trigger_level; /* interrupt trigger level */ }; +struct mtty_data { + u64 magic; +#define MTTY_MAGIC 0x7e9d09898c3e2c4e /* Nothing clever, just random */ + u32 major_ver; +#define MTTY_MAJOR_VER 1 + u32 minor_ver; +#define MTTY_MINOR_VER 0 + u32 nr_ports; + u32 flags; + struct serial_port ports[2]; +}; + +struct mdev_state; + +struct mtty_migration_file { + struct file *filp; + struct mutex lock; + struct mdev_state *mdev_state; + struct mtty_data data; + ssize_t filled_size; + u8 disabled:1; +}; + /* State of each mdev device */ struct mdev_state { struct vfio_device vdev; @@ -140,6 +165,12 @@ struct mdev_state { struct mutex rxtx_lock; struct vfio_device_info dev_info; int nr_ports; + enum vfio_device_mig_state state; + struct mutex state_mutex; + struct mutex reset_mutex; + struct mtty_migration_file *saving_migf; + struct mtty_migration_file *resuming_migf; + u8 deferred_reset:1; u8 intx_mask:1; }; @@ -743,6 +774,543 @@ accessfailed: return ret; } +static size_t mtty_data_size(struct mdev_state *mdev_state) +{ + return offsetof(struct mtty_data, ports) + + (mdev_state->nr_ports * sizeof(struct serial_port)); +} + +static void mtty_disable_file(struct mtty_migration_file *migf) +{ + mutex_lock(&migf->lock); + migf->disabled = true; + migf->filled_size = 0; + migf->filp->f_pos = 0; + mutex_unlock(&migf->lock); +} + +static void mtty_disable_files(struct mdev_state *mdev_state) +{ + if (mdev_state->saving_migf) { + mtty_disable_file(mdev_state->saving_migf); + fput(mdev_state->saving_migf->filp); + mdev_state->saving_migf = NULL; + } + + if (mdev_state->resuming_migf) { + mtty_disable_file(mdev_state->resuming_migf); + fput(mdev_state->resuming_migf->filp); + mdev_state->resuming_migf = NULL; + } +} + +static void mtty_state_mutex_unlock(struct mdev_state *mdev_state) +{ +again: + mutex_lock(&mdev_state->reset_mutex); + if (mdev_state->deferred_reset) { + mdev_state->deferred_reset = false; + mutex_unlock(&mdev_state->reset_mutex); + mdev_state->state = VFIO_DEVICE_STATE_RUNNING; + mtty_disable_files(mdev_state); + goto again; + } + mutex_unlock(&mdev_state->state_mutex); + mutex_unlock(&mdev_state->reset_mutex); +} + +static int mtty_release_migf(struct inode *inode, struct file *filp) +{ + struct mtty_migration_file *migf = filp->private_data; + + mtty_disable_file(migf); + mutex_destroy(&migf->lock); + kfree(migf); + + return 0; +} + +static long mtty_precopy_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct mtty_migration_file *migf = filp->private_data; + struct mdev_state *mdev_state = migf->mdev_state; + loff_t *pos = &filp->f_pos; + struct vfio_precopy_info info = {}; + unsigned long minsz; + int ret; + + if (cmd != VFIO_MIG_GET_PRECOPY_INFO) + return -ENOTTY; + + minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + if (info.argsz < minsz) + return -EINVAL; + + mutex_lock(&mdev_state->state_mutex); + if (mdev_state->state != VFIO_DEVICE_STATE_PRE_COPY && + mdev_state->state != VFIO_DEVICE_STATE_PRE_COPY_P2P) { + ret = -EINVAL; + goto unlock; + } + + mutex_lock(&migf->lock); + + if (migf->disabled) { + mutex_unlock(&migf->lock); + ret = -ENODEV; + goto unlock; + } + + if (*pos > migf->filled_size) { + mutex_unlock(&migf->lock); + ret = -EINVAL; + goto unlock; + } + + info.dirty_bytes = 0; + info.initial_bytes = migf->filled_size - *pos; + mutex_unlock(&migf->lock); + + ret = copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; +unlock: + mtty_state_mutex_unlock(mdev_state); + return ret; +} + +static ssize_t mtty_save_read(struct file *filp, char __user *buf, + size_t len, loff_t *pos) +{ + struct mtty_migration_file *migf = filp->private_data; + ssize_t ret = 0; + + if (pos) + return -ESPIPE; + + pos = &filp->f_pos; + + mutex_lock(&migf->lock); + + dev_dbg(migf->mdev_state->vdev.dev, "%s ask %zu\n", __func__, len); + + if (migf->disabled) { + ret = -ENODEV; + goto out_unlock; + } + + if (*pos > migf->filled_size) { + ret = -EINVAL; + goto out_unlock; + } + + len = min_t(size_t, migf->filled_size - *pos, len); + if (len) { + if (copy_to_user(buf, (void *)&migf->data + *pos, len)) { + ret = -EFAULT; + goto out_unlock; + } + *pos += len; + ret = len; + } +out_unlock: + dev_dbg(migf->mdev_state->vdev.dev, "%s read %zu\n", __func__, ret); + mutex_unlock(&migf->lock); + return ret; +} + +static const struct file_operations mtty_save_fops = { + .owner = THIS_MODULE, + .read = mtty_save_read, + .unlocked_ioctl = mtty_precopy_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .release = mtty_release_migf, + .llseek = no_llseek, +}; + +static void mtty_save_state(struct mdev_state *mdev_state) +{ + struct mtty_migration_file *migf = mdev_state->saving_migf; + int i; + + mutex_lock(&migf->lock); + for (i = 0; i < mdev_state->nr_ports; i++) { + memcpy(&migf->data.ports[i], + &mdev_state->s[i], sizeof(struct serial_port)); + migf->filled_size += sizeof(struct serial_port); + } + dev_dbg(mdev_state->vdev.dev, + "%s filled to %zu\n", __func__, migf->filled_size); + mutex_unlock(&migf->lock); +} + +static int mtty_load_state(struct mdev_state *mdev_state) +{ + struct mtty_migration_file *migf = mdev_state->resuming_migf; + int i; + + mutex_lock(&migf->lock); + /* magic and version already tested by resume write fn */ + if (migf->filled_size < mtty_data_size(mdev_state)) { + dev_dbg(mdev_state->vdev.dev, "%s expected %zu, got %zu\n", + __func__, mtty_data_size(mdev_state), + migf->filled_size); + mutex_unlock(&migf->lock); + return -EINVAL; + } + + for (i = 0; i < mdev_state->nr_ports; i++) + memcpy(&mdev_state->s[i], + &migf->data.ports[i], sizeof(struct serial_port)); + + mutex_unlock(&migf->lock); + return 0; +} + +static struct mtty_migration_file * +mtty_save_device_data(struct mdev_state *mdev_state, + enum vfio_device_mig_state state) +{ + struct mtty_migration_file *migf = mdev_state->saving_migf; + struct mtty_migration_file *ret = NULL; + + if (migf) { + if (state == VFIO_DEVICE_STATE_STOP_COPY) + goto fill_data; + return ret; + } + + migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); + if (!migf) + return ERR_PTR(-ENOMEM); + + migf->filp = anon_inode_getfile("mtty_mig", &mtty_save_fops, + migf, O_RDONLY); + if (IS_ERR(migf->filp)) { + int rc = PTR_ERR(migf->filp); + + kfree(migf); + return ERR_PTR(rc); + } + + stream_open(migf->filp->f_inode, migf->filp); + mutex_init(&migf->lock); + migf->mdev_state = mdev_state; + + migf->data.magic = MTTY_MAGIC; + migf->data.major_ver = MTTY_MAJOR_VER; + migf->data.minor_ver = MTTY_MINOR_VER; + migf->data.nr_ports = mdev_state->nr_ports; + + migf->filled_size = offsetof(struct mtty_data, ports); + + dev_dbg(mdev_state->vdev.dev, "%s filled header to %zu\n", + __func__, migf->filled_size); + + ret = mdev_state->saving_migf = migf; + +fill_data: + if (state == VFIO_DEVICE_STATE_STOP_COPY) + mtty_save_state(mdev_state); + + return ret; +} + +static ssize_t mtty_resume_write(struct file *filp, const char __user *buf, + size_t len, loff_t *pos) +{ + struct mtty_migration_file *migf = filp->private_data; + struct mdev_state *mdev_state = migf->mdev_state; + loff_t requested_length; + ssize_t ret = 0; + + if (pos) + return -ESPIPE; + + pos = &filp->f_pos; + + if (*pos < 0 || + check_add_overflow((loff_t)len, *pos, &requested_length)) + return -EINVAL; + + if (requested_length > mtty_data_size(mdev_state)) + return -ENOMEM; + + mutex_lock(&migf->lock); + + if (migf->disabled) { + ret = -ENODEV; + goto out_unlock; + } + + if (copy_from_user((void *)&migf->data + *pos, buf, len)) { + ret = -EFAULT; + goto out_unlock; + } + + *pos += len; + ret = len; + + dev_dbg(migf->mdev_state->vdev.dev, "%s received %zu, total %zu\n", + __func__, len, migf->filled_size + len); + + if (migf->filled_size < offsetof(struct mtty_data, ports) && + migf->filled_size + len >= offsetof(struct mtty_data, ports)) { + if (migf->data.magic != MTTY_MAGIC || migf->data.flags || + migf->data.major_ver != MTTY_MAJOR_VER || + migf->data.minor_ver != MTTY_MINOR_VER || + migf->data.nr_ports != mdev_state->nr_ports) { + dev_dbg(migf->mdev_state->vdev.dev, + "%s failed validation\n", __func__); + ret = -EFAULT; + } else { + dev_dbg(migf->mdev_state->vdev.dev, + "%s header validated\n", __func__); + } + } + + migf->filled_size += len; + +out_unlock: + mutex_unlock(&migf->lock); + return ret; +} + +static const struct file_operations mtty_resume_fops = { + .owner = THIS_MODULE, + .write = mtty_resume_write, + .release = mtty_release_migf, + .llseek = no_llseek, +}; + +static struct mtty_migration_file * +mtty_resume_device_data(struct mdev_state *mdev_state) +{ + struct mtty_migration_file *migf; + int ret; + + migf = kzalloc(sizeof(*migf), GFP_KERNEL_ACCOUNT); + if (!migf) + return ERR_PTR(-ENOMEM); + + migf->filp = anon_inode_getfile("mtty_mig", &mtty_resume_fops, + migf, O_WRONLY); + if (IS_ERR(migf->filp)) { + ret = PTR_ERR(migf->filp); + kfree(migf); + return ERR_PTR(ret); + } + + stream_open(migf->filp->f_inode, migf->filp); + mutex_init(&migf->lock); + migf->mdev_state = mdev_state; + + mdev_state->resuming_migf = migf; + + return migf; +} + +static struct file *mtty_step_state(struct mdev_state *mdev_state, + enum vfio_device_mig_state new) +{ + enum vfio_device_mig_state cur = mdev_state->state; + + dev_dbg(mdev_state->vdev.dev, "%s: %d -> %d\n", __func__, cur, new); + + /* + * The following state transitions are no-op considering + * mtty does not do DMA nor require any explicit start/stop. + * + * RUNNING -> RUNNING_P2P + * RUNNING_P2P -> RUNNING + * RUNNING_P2P -> STOP + * PRE_COPY -> PRE_COPY_P2P + * PRE_COPY_P2P -> PRE_COPY + * STOP -> RUNNING_P2P + */ + if ((cur == VFIO_DEVICE_STATE_RUNNING && + new == VFIO_DEVICE_STATE_RUNNING_P2P) || + (cur == VFIO_DEVICE_STATE_RUNNING_P2P && + (new == VFIO_DEVICE_STATE_RUNNING || + new == VFIO_DEVICE_STATE_STOP)) || + (cur == VFIO_DEVICE_STATE_PRE_COPY && + new == VFIO_DEVICE_STATE_PRE_COPY_P2P) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && + new == VFIO_DEVICE_STATE_PRE_COPY) || + (cur == VFIO_DEVICE_STATE_STOP && + new == VFIO_DEVICE_STATE_RUNNING_P2P)) + return NULL; + + /* + * The following state transitions simply close migration files, + * with the exception of RESUMING -> STOP, which needs to load + * the state first. + * + * RESUMING -> STOP + * PRE_COPY -> RUNNING + * PRE_COPY_P2P -> RUNNING_P2P + * STOP_COPY -> STOP + */ + if (cur == VFIO_DEVICE_STATE_RESUMING && + new == VFIO_DEVICE_STATE_STOP) { + int ret; + + ret = mtty_load_state(mdev_state); + if (ret) + return ERR_PTR(ret); + mtty_disable_files(mdev_state); + return NULL; + } + + if ((cur == VFIO_DEVICE_STATE_PRE_COPY && + new == VFIO_DEVICE_STATE_RUNNING) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && + new == VFIO_DEVICE_STATE_RUNNING_P2P) || + (cur == VFIO_DEVICE_STATE_STOP_COPY && + new == VFIO_DEVICE_STATE_STOP)) { + mtty_disable_files(mdev_state); + return NULL; + } + + /* + * The following state transitions return migration files. + * + * RUNNING -> PRE_COPY + * RUNNING_P2P -> PRE_COPY_P2P + * STOP -> STOP_COPY + * STOP -> RESUMING + * PRE_COPY_P2P -> STOP_COPY + */ + if ((cur == VFIO_DEVICE_STATE_RUNNING && + new == VFIO_DEVICE_STATE_PRE_COPY) || + (cur == VFIO_DEVICE_STATE_RUNNING_P2P && + new == VFIO_DEVICE_STATE_PRE_COPY_P2P) || + (cur == VFIO_DEVICE_STATE_STOP && + new == VFIO_DEVICE_STATE_STOP_COPY) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && + new == VFIO_DEVICE_STATE_STOP_COPY)) { + struct mtty_migration_file *migf; + + migf = mtty_save_device_data(mdev_state, new); + if (IS_ERR(migf)) + return ERR_CAST(migf); + + if (migf) { + get_file(migf->filp); + + return migf->filp; + } + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_STOP && + new == VFIO_DEVICE_STATE_RESUMING) { + struct mtty_migration_file *migf; + + migf = mtty_resume_device_data(mdev_state); + if (IS_ERR(migf)) + return ERR_CAST(migf); + + get_file(migf->filp); + + return migf->filp; + } + + /* vfio_mig_get_next_state() does not use arcs other than the above */ + WARN_ON(true); + return ERR_PTR(-EINVAL); +} + +static struct file *mtty_set_state(struct vfio_device *vdev, + enum vfio_device_mig_state new_state) +{ + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); + struct file *ret = NULL; + + dev_dbg(vdev->dev, "%s -> %d\n", __func__, new_state); + + mutex_lock(&mdev_state->state_mutex); + while (mdev_state->state != new_state) { + enum vfio_device_mig_state next_state; + int rc = vfio_mig_get_next_state(vdev, mdev_state->state, + new_state, &next_state); + if (rc) { + ret = ERR_PTR(rc); + break; + } + + ret = mtty_step_state(mdev_state, next_state); + if (IS_ERR(ret)) + break; + + mdev_state->state = next_state; + + if (WARN_ON(ret && new_state != next_state)) { + fput(ret); + ret = ERR_PTR(-EINVAL); + break; + } + } + mtty_state_mutex_unlock(mdev_state); + return ret; +} + +static int mtty_get_state(struct vfio_device *vdev, + enum vfio_device_mig_state *current_state) +{ + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); + + mutex_lock(&mdev_state->state_mutex); + *current_state = mdev_state->state; + mtty_state_mutex_unlock(mdev_state); + return 0; +} + +static int mtty_get_data_size(struct vfio_device *vdev, + unsigned long *stop_copy_length) +{ + struct mdev_state *mdev_state = + container_of(vdev, struct mdev_state, vdev); + + *stop_copy_length = mtty_data_size(mdev_state); + return 0; +} + +static const struct vfio_migration_ops mtty_migration_ops = { + .migration_set_state = mtty_set_state, + .migration_get_state = mtty_get_state, + .migration_get_data_size = mtty_get_data_size, +}; + +static int mtty_log_start(struct vfio_device *vdev, + struct rb_root_cached *ranges, + u32 nnodes, u64 *page_size) +{ + return 0; +} + +static int mtty_log_stop(struct vfio_device *vdev) +{ + return 0; +} + +static int mtty_log_read_and_clear(struct vfio_device *vdev, + unsigned long iova, unsigned long length, + struct iova_bitmap *dirty) +{ + return 0; +} + +static const struct vfio_log_ops mtty_log_ops = { + .log_start = mtty_log_start, + .log_stop = mtty_log_stop, + .log_read_and_clear = mtty_log_read_and_clear, +}; + static int mtty_init_dev(struct vfio_device *vdev) { struct mdev_state *mdev_state = @@ -775,6 +1343,16 @@ static int mtty_init_dev(struct vfio_device *vdev) mutex_init(&mdev_state->ops_lock); mdev_state->mdev = mdev; mtty_create_config_space(mdev_state); + + mutex_init(&mdev_state->state_mutex); + mutex_init(&mdev_state->reset_mutex); + vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | + VFIO_MIGRATION_P2P | + VFIO_MIGRATION_PRE_COPY; + vdev->mig_ops = &mtty_migration_ops; + vdev->log_ops = &mtty_log_ops; + mdev_state->state = VFIO_DEVICE_STATE_RUNNING; + return 0; err_nr_ports: @@ -808,6 +1386,8 @@ static void mtty_release_dev(struct vfio_device *vdev) struct mdev_state *mdev_state = container_of(vdev, struct mdev_state, vdev); + mutex_destroy(&mdev_state->reset_mutex); + mutex_destroy(&mdev_state->state_mutex); atomic_add(mdev_state->nr_ports, &mdev_avail_ports); kfree(mdev_state->vconfig); } @@ -824,6 +1404,15 @@ static int mtty_reset(struct mdev_state *mdev_state) { pr_info("%s: called\n", __func__); + mutex_lock(&mdev_state->reset_mutex); + mdev_state->deferred_reset = true; + if (!mutex_trylock(&mdev_state->state_mutex)) { + mutex_unlock(&mdev_state->reset_mutex); + return 0; + } + mutex_unlock(&mdev_state->reset_mutex); + mtty_state_mutex_unlock(mdev_state); + return 0; } @@ -1350,6 +1939,7 @@ static void mtty_close(struct vfio_device *vdev) struct mdev_state *mdev_state = container_of(vdev, struct mdev_state, vdev); + mtty_disable_files(mdev_state); mtty_disable_intx(mdev_state); mtty_disable_msi(mdev_state); } -- cgit v1.2.3