diff options
Diffstat (limited to 'drivers/vfio/pci')
-rw-r--r-- | drivers/vfio/pci/Kconfig | 2 | ||||
-rw-r--r-- | drivers/vfio/pci/Makefile | 2 | ||||
-rw-r--r-- | drivers/vfio/pci/pds/dirty.c | 1 | ||||
-rw-r--r-- | drivers/vfio/pci/qat/Kconfig | 12 | ||||
-rw-r--r-- | drivers/vfio/pci/qat/Makefile | 3 | ||||
-rw-r--r-- | drivers/vfio/pci/qat/main.c | 702 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_core.c | 352 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 61 |
8 files changed, 867 insertions, 268 deletions
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 15821a2d77..bf50ffa10b 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -69,4 +69,6 @@ source "drivers/vfio/pci/virtio/Kconfig" source "drivers/vfio/pci/nvgrace-gpu/Kconfig" +source "drivers/vfio/pci/qat/Kconfig" + endmenu diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index ce7a61f1d9..cf00c0a7e5 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -17,3 +17,5 @@ obj-$(CONFIG_PDS_VFIO_PCI) += pds/ obj-$(CONFIG_VIRTIO_VFIO_PCI) += virtio/ obj-$(CONFIG_NVGRACE_GPU_VFIO_PCI) += nvgrace-gpu/ + +obj-$(CONFIG_QAT_VFIO_PCI) += qat/ diff --git a/drivers/vfio/pci/pds/dirty.c b/drivers/vfio/pci/pds/dirty.c index 68e8f006df..c51f5e4c3d 100644 --- a/drivers/vfio/pci/pds/dirty.c +++ b/drivers/vfio/pci/pds/dirty.c @@ -3,6 +3,7 @@ #include <linux/interval_tree.h> #include <linux/vfio.h> +#include <linux/vmalloc.h> #include <linux/pds/pds_common.h> #include <linux/pds/pds_core_if.h> diff --git a/drivers/vfio/pci/qat/Kconfig b/drivers/vfio/pci/qat/Kconfig new file mode 100644 index 0000000000..bf52cfa4b5 --- /dev/null +++ b/drivers/vfio/pci/qat/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only +config QAT_VFIO_PCI + tristate "VFIO support for QAT VF PCI devices" + select VFIO_PCI_CORE + depends on CRYPTO_DEV_QAT_4XXX + help + This provides migration support for Intel(R) QAT Virtual Function + using the VFIO framework. + + To compile this as a module, choose M here: the module + will be called qat_vfio_pci. If you don't know what to do here, + say N. diff --git a/drivers/vfio/pci/qat/Makefile b/drivers/vfio/pci/qat/Makefile new file mode 100644 index 0000000000..5fe5c4ec19 --- /dev/null +++ b/drivers/vfio/pci/qat/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_QAT_VFIO_PCI) += qat_vfio_pci.o +qat_vfio_pci-y := main.o diff --git a/drivers/vfio/pci/qat/main.c b/drivers/vfio/pci/qat/main.c new file mode 100644 index 0000000000..e36740a282 --- /dev/null +++ b/drivers/vfio/pci/qat/main.c @@ -0,0 +1,702 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ + +#include <linux/anon_inodes.h> +#include <linux/container_of.h> +#include <linux/device.h> +#include <linux/file.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/sizes.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <linux/vfio_pci_core.h> +#include <linux/qat/qat_mig_dev.h> + +/* + * The migration data of each Intel QAT VF device is encapsulated into a + * 4096 bytes block. The data consists of two parts. + * The first is a pre-configured set of attributes of the VF being migrated, + * which are only set when it is created. This can be migrated during pre-copy + * stage and used for a device compatibility check. + * The second is the VF state. This includes the required MMIO regions and + * the shadow states maintained by the QAT PF driver. This part can only be + * saved when the VF is fully quiesced and be migrated during stop-copy stage. + * Both these 2 parts of data are saved in hierarchical structures including + * a preamble section and several raw state sections. + * When the pre-configured part of the migration data is fully retrieved from + * user space, the preamble section are used to validate the correctness of + * the data blocks and check the version compatibility. The raw state sections + * are then used to do a device compatibility check. + * When the device transits from RESUMING state, the VF states are extracted + * from the raw state sections of the VF state part of the migration data and + * then loaded into the device. + */ + +struct qat_vf_migration_file { + struct file *filp; + /* protects migration region context */ + struct mutex lock; + bool disabled; + struct qat_vf_core_device *qat_vdev; + ssize_t filled_size; +}; + +struct qat_vf_core_device { + struct vfio_pci_core_device core_device; + struct qat_mig_dev *mdev; + /* protects migration state */ + struct mutex state_mutex; + enum vfio_device_mig_state mig_state; + struct qat_vf_migration_file *resuming_migf; + struct qat_vf_migration_file *saving_migf; +}; + +static int qat_vf_pci_open_device(struct vfio_device *core_vdev) +{ + struct qat_vf_core_device *qat_vdev = + container_of(core_vdev, struct qat_vf_core_device, + core_device.vdev); + struct vfio_pci_core_device *vdev = &qat_vdev->core_device; + int ret; + + ret = vfio_pci_core_enable(vdev); + if (ret) + return ret; + + ret = qat_vfmig_open(qat_vdev->mdev); + if (ret) { + vfio_pci_core_disable(vdev); + return ret; + } + qat_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING; + + vfio_pci_core_finish_enable(vdev); + + return 0; +} + +static void qat_vf_disable_fd(struct qat_vf_migration_file *migf) +{ + mutex_lock(&migf->lock); + migf->disabled = true; + migf->filp->f_pos = 0; + migf->filled_size = 0; + mutex_unlock(&migf->lock); +} + +static void qat_vf_disable_fds(struct qat_vf_core_device *qat_vdev) +{ + if (qat_vdev->resuming_migf) { + qat_vf_disable_fd(qat_vdev->resuming_migf); + fput(qat_vdev->resuming_migf->filp); + qat_vdev->resuming_migf = NULL; + } + + if (qat_vdev->saving_migf) { + qat_vf_disable_fd(qat_vdev->saving_migf); + fput(qat_vdev->saving_migf->filp); + qat_vdev->saving_migf = NULL; + } +} + +static void qat_vf_pci_close_device(struct vfio_device *core_vdev) +{ + struct qat_vf_core_device *qat_vdev = container_of(core_vdev, + struct qat_vf_core_device, core_device.vdev); + + qat_vfmig_close(qat_vdev->mdev); + qat_vf_disable_fds(qat_vdev); + vfio_pci_core_close_device(core_vdev); +} + +static long qat_vf_precopy_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct qat_vf_migration_file *migf = filp->private_data; + struct qat_vf_core_device *qat_vdev = migf->qat_vdev; + struct qat_mig_dev *mig_dev = qat_vdev->mdev; + struct vfio_precopy_info info; + loff_t *pos = &filp->f_pos; + unsigned long minsz; + int ret = 0; + + if (cmd != VFIO_MIG_GET_PRECOPY_INFO) + return -ENOTTY; + + minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + if (info.argsz < minsz) + return -EINVAL; + + mutex_lock(&qat_vdev->state_mutex); + if (qat_vdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY && + qat_vdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) { + mutex_unlock(&qat_vdev->state_mutex); + return -EINVAL; + } + + mutex_lock(&migf->lock); + if (migf->disabled) { + ret = -ENODEV; + goto out; + } + + if (*pos > mig_dev->setup_size) { + ret = -EINVAL; + goto out; + } + + info.dirty_bytes = 0; + info.initial_bytes = mig_dev->setup_size - *pos; + +out: + mutex_unlock(&migf->lock); + mutex_unlock(&qat_vdev->state_mutex); + if (ret) + return ret; + return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; +} + +static ssize_t qat_vf_save_read(struct file *filp, char __user *buf, + size_t len, loff_t *pos) +{ + struct qat_vf_migration_file *migf = filp->private_data; + struct qat_mig_dev *mig_dev = migf->qat_vdev->mdev; + ssize_t done = 0; + loff_t *offs; + int ret; + + if (pos) + return -ESPIPE; + offs = &filp->f_pos; + + mutex_lock(&migf->lock); + if (*offs > migf->filled_size || *offs < 0) { + done = -EINVAL; + goto out_unlock; + } + + if (migf->disabled) { + done = -ENODEV; + goto out_unlock; + } + + len = min_t(size_t, migf->filled_size - *offs, len); + if (len) { + ret = copy_to_user(buf, mig_dev->state + *offs, len); + if (ret) { + done = -EFAULT; + goto out_unlock; + } + *offs += len; + done = len; + } + +out_unlock: + mutex_unlock(&migf->lock); + return done; +} + +static int qat_vf_release_file(struct inode *inode, struct file *filp) +{ + struct qat_vf_migration_file *migf = filp->private_data; + + qat_vf_disable_fd(migf); + mutex_destroy(&migf->lock); + kfree(migf); + + return 0; +} + +static const struct file_operations qat_vf_save_fops = { + .owner = THIS_MODULE, + .read = qat_vf_save_read, + .unlocked_ioctl = qat_vf_precopy_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .release = qat_vf_release_file, + .llseek = no_llseek, +}; + +static int qat_vf_save_state(struct qat_vf_core_device *qat_vdev, + struct qat_vf_migration_file *migf) +{ + int ret; + + ret = qat_vfmig_save_state(qat_vdev->mdev); + if (ret) + return ret; + migf->filled_size = qat_vdev->mdev->state_size; + + return 0; +} + +static int qat_vf_save_setup(struct qat_vf_core_device *qat_vdev, + struct qat_vf_migration_file *migf) +{ + int ret; + + ret = qat_vfmig_save_setup(qat_vdev->mdev); + if (ret) + return ret; + migf->filled_size = qat_vdev->mdev->setup_size; + + return 0; +} + +/* + * Allocate a file handler for user space and then save the migration data for + * the device being migrated. If this is called in the pre-copy stage, save the + * pre-configured device data. Otherwise, if this is called in the stop-copy + * stage, save the device state. In both cases, update the data size which can + * then be read from user space. + */ +static struct qat_vf_migration_file * +qat_vf_save_device_data(struct qat_vf_core_device *qat_vdev, bool pre_copy) +{ + struct qat_vf_migration_file *migf; + int ret; + + migf = kzalloc(sizeof(*migf), GFP_KERNEL); + if (!migf) + return ERR_PTR(-ENOMEM); + + migf->filp = anon_inode_getfile("qat_vf_mig", &qat_vf_save_fops, + migf, O_RDONLY); + ret = PTR_ERR_OR_ZERO(migf->filp); + if (ret) { + kfree(migf); + return ERR_PTR(ret); + } + + stream_open(migf->filp->f_inode, migf->filp); + mutex_init(&migf->lock); + + if (pre_copy) + ret = qat_vf_save_setup(qat_vdev, migf); + else + ret = qat_vf_save_state(qat_vdev, migf); + if (ret) { + fput(migf->filp); + return ERR_PTR(ret); + } + + migf->qat_vdev = qat_vdev; + + return migf; +} + +static ssize_t qat_vf_resume_write(struct file *filp, const char __user *buf, + size_t len, loff_t *pos) +{ + struct qat_vf_migration_file *migf = filp->private_data; + struct qat_mig_dev *mig_dev = migf->qat_vdev->mdev; + loff_t end, *offs; + ssize_t done = 0; + int ret; + + if (pos) + return -ESPIPE; + offs = &filp->f_pos; + + if (*offs < 0 || + check_add_overflow((loff_t)len, *offs, &end)) + return -EOVERFLOW; + + if (end > mig_dev->state_size) + return -ENOMEM; + + mutex_lock(&migf->lock); + if (migf->disabled) { + done = -ENODEV; + goto out_unlock; + } + + ret = copy_from_user(mig_dev->state + *offs, buf, len); + if (ret) { + done = -EFAULT; + goto out_unlock; + } + *offs += len; + migf->filled_size += len; + + /* + * Load the pre-configured device data first to check if the target + * device is compatible with the source device. + */ + ret = qat_vfmig_load_setup(mig_dev, migf->filled_size); + if (ret && ret != -EAGAIN) { + done = ret; + goto out_unlock; + } + done = len; + +out_unlock: + mutex_unlock(&migf->lock); + return done; +} + +static const struct file_operations qat_vf_resume_fops = { + .owner = THIS_MODULE, + .write = qat_vf_resume_write, + .release = qat_vf_release_file, + .llseek = no_llseek, +}; + +static struct qat_vf_migration_file * +qat_vf_resume_device_data(struct qat_vf_core_device *qat_vdev) +{ + struct qat_vf_migration_file *migf; + int ret; + + migf = kzalloc(sizeof(*migf), GFP_KERNEL); + if (!migf) + return ERR_PTR(-ENOMEM); + + migf->filp = anon_inode_getfile("qat_vf_mig", &qat_vf_resume_fops, migf, O_WRONLY); + ret = PTR_ERR_OR_ZERO(migf->filp); + if (ret) { + kfree(migf); + return ERR_PTR(ret); + } + + migf->qat_vdev = qat_vdev; + migf->filled_size = 0; + stream_open(migf->filp->f_inode, migf->filp); + mutex_init(&migf->lock); + + return migf; +} + +static int qat_vf_load_device_data(struct qat_vf_core_device *qat_vdev) +{ + return qat_vfmig_load_state(qat_vdev->mdev); +} + +static struct file *qat_vf_pci_step_device_state(struct qat_vf_core_device *qat_vdev, u32 new) +{ + u32 cur = qat_vdev->mig_state; + int ret; + + /* + * As the device is not capable of just stopping P2P DMAs, suspend the + * device completely once any of the P2P states are reached. + * When it is suspended, all its MMIO registers can still be operated + * correctly, jobs submitted through ring are queued while no jobs are + * processed by the device. The MMIO states can be safely migrated to + * the target VF during stop-copy stage and restored correctly in the + * target VF. All queued jobs can be resumed then. + */ + if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) || + (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) { + ret = qat_vfmig_suspend(qat_vdev->mdev); + if (ret) + return ERR_PTR(ret); + return NULL; + } + + if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) { + qat_vfmig_resume(qat_vdev->mdev); + return NULL; + } + + if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_STOP) || + (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING_P2P)) + return NULL; + + if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) { + struct qat_vf_migration_file *migf; + + migf = qat_vf_save_device_data(qat_vdev, false); + if (IS_ERR(migf)) + return ERR_CAST(migf); + get_file(migf->filp); + qat_vdev->saving_migf = migf; + return migf->filp; + } + + if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RESUMING) { + struct qat_vf_migration_file *migf; + + migf = qat_vf_resume_device_data(qat_vdev); + if (IS_ERR(migf)) + return ERR_CAST(migf); + get_file(migf->filp); + qat_vdev->resuming_migf = migf; + return migf->filp; + } + + if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) || + (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_RUNNING_P2P)) { + qat_vf_disable_fds(qat_vdev); + return NULL; + } + + if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) || + (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) { + struct qat_vf_migration_file *migf; + + migf = qat_vf_save_device_data(qat_vdev, true); + if (IS_ERR(migf)) + return ERR_CAST(migf); + get_file(migf->filp); + qat_vdev->saving_migf = migf; + return migf->filp; + } + + if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) { + struct qat_vf_migration_file *migf = qat_vdev->saving_migf; + + if (!migf) + return ERR_PTR(-EINVAL); + ret = qat_vf_save_state(qat_vdev, migf); + if (ret) + return ERR_PTR(ret); + return NULL; + } + + if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { + ret = qat_vf_load_device_data(qat_vdev); + if (ret) + return ERR_PTR(ret); + + qat_vf_disable_fds(qat_vdev); + return NULL; + } + + /* vfio_mig_get_next_state() does not use arcs other than the above */ + WARN_ON(true); + return ERR_PTR(-EINVAL); +} + +static void qat_vf_reset_done(struct qat_vf_core_device *qat_vdev) +{ + qat_vdev->mig_state = VFIO_DEVICE_STATE_RUNNING; + qat_vfmig_reset(qat_vdev->mdev); + qat_vf_disable_fds(qat_vdev); +} + +static struct file *qat_vf_pci_set_device_state(struct vfio_device *vdev, + enum vfio_device_mig_state new_state) +{ + struct qat_vf_core_device *qat_vdev = container_of(vdev, + struct qat_vf_core_device, core_device.vdev); + enum vfio_device_mig_state next_state; + struct file *res = NULL; + int ret; + + mutex_lock(&qat_vdev->state_mutex); + while (new_state != qat_vdev->mig_state) { + ret = vfio_mig_get_next_state(vdev, qat_vdev->mig_state, + new_state, &next_state); + if (ret) { + res = ERR_PTR(ret); + break; + } + res = qat_vf_pci_step_device_state(qat_vdev, next_state); + if (IS_ERR(res)) + break; + qat_vdev->mig_state = next_state; + if (WARN_ON(res && new_state != qat_vdev->mig_state)) { + fput(res); + res = ERR_PTR(-EINVAL); + break; + } + } + mutex_unlock(&qat_vdev->state_mutex); + + return res; +} + +static int qat_vf_pci_get_device_state(struct vfio_device *vdev, + enum vfio_device_mig_state *curr_state) +{ + struct qat_vf_core_device *qat_vdev = container_of(vdev, + struct qat_vf_core_device, core_device.vdev); + + mutex_lock(&qat_vdev->state_mutex); + *curr_state = qat_vdev->mig_state; + mutex_unlock(&qat_vdev->state_mutex); + + return 0; +} + +static int qat_vf_pci_get_data_size(struct vfio_device *vdev, + unsigned long *stop_copy_length) +{ + struct qat_vf_core_device *qat_vdev = container_of(vdev, + struct qat_vf_core_device, core_device.vdev); + + mutex_lock(&qat_vdev->state_mutex); + *stop_copy_length = qat_vdev->mdev->state_size; + mutex_unlock(&qat_vdev->state_mutex); + + return 0; +} + +static const struct vfio_migration_ops qat_vf_pci_mig_ops = { + .migration_set_state = qat_vf_pci_set_device_state, + .migration_get_state = qat_vf_pci_get_device_state, + .migration_get_data_size = qat_vf_pci_get_data_size, +}; + +static void qat_vf_pci_release_dev(struct vfio_device *core_vdev) +{ + struct qat_vf_core_device *qat_vdev = container_of(core_vdev, + struct qat_vf_core_device, core_device.vdev); + + qat_vfmig_cleanup(qat_vdev->mdev); + qat_vfmig_destroy(qat_vdev->mdev); + mutex_destroy(&qat_vdev->state_mutex); + vfio_pci_core_release_dev(core_vdev); +} + +static int qat_vf_pci_init_dev(struct vfio_device *core_vdev) +{ + struct qat_vf_core_device *qat_vdev = container_of(core_vdev, + struct qat_vf_core_device, core_device.vdev); + struct qat_mig_dev *mdev; + struct pci_dev *parent; + int ret, vf_id; + + core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P | + VFIO_MIGRATION_PRE_COPY; + core_vdev->mig_ops = &qat_vf_pci_mig_ops; + + ret = vfio_pci_core_init_dev(core_vdev); + if (ret) + return ret; + + mutex_init(&qat_vdev->state_mutex); + + parent = pci_physfn(qat_vdev->core_device.pdev); + vf_id = pci_iov_vf_id(qat_vdev->core_device.pdev); + if (vf_id < 0) { + ret = -ENODEV; + goto err_rel; + } + + mdev = qat_vfmig_create(parent, vf_id); + if (IS_ERR(mdev)) { + ret = PTR_ERR(mdev); + goto err_rel; + } + + ret = qat_vfmig_init(mdev); + if (ret) + goto err_destroy; + + qat_vdev->mdev = mdev; + + return 0; + +err_destroy: + qat_vfmig_destroy(mdev); +err_rel: + vfio_pci_core_release_dev(core_vdev); + return ret; +} + +static const struct vfio_device_ops qat_vf_pci_ops = { + .name = "qat-vf-vfio-pci", + .init = qat_vf_pci_init_dev, + .release = qat_vf_pci_release_dev, + .open_device = qat_vf_pci_open_device, + .close_device = qat_vf_pci_close_device, + .ioctl = vfio_pci_core_ioctl, + .read = vfio_pci_core_read, + .write = vfio_pci_core_write, + .mmap = vfio_pci_core_mmap, + .request = vfio_pci_core_request, + .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, + .detach_ioas = vfio_iommufd_physical_detach_ioas, +}; + +static struct qat_vf_core_device *qat_vf_drvdata(struct pci_dev *pdev) +{ + struct vfio_pci_core_device *core_device = pci_get_drvdata(pdev); + + return container_of(core_device, struct qat_vf_core_device, core_device); +} + +static void qat_vf_pci_aer_reset_done(struct pci_dev *pdev) +{ + struct qat_vf_core_device *qat_vdev = qat_vf_drvdata(pdev); + + if (!qat_vdev->mdev) + return; + + mutex_lock(&qat_vdev->state_mutex); + qat_vf_reset_done(qat_vdev); + mutex_unlock(&qat_vdev->state_mutex); +} + +static int +qat_vf_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct device *dev = &pdev->dev; + struct qat_vf_core_device *qat_vdev; + int ret; + + qat_vdev = vfio_alloc_device(qat_vf_core_device, core_device.vdev, dev, &qat_vf_pci_ops); + if (IS_ERR(qat_vdev)) + return PTR_ERR(qat_vdev); + + pci_set_drvdata(pdev, &qat_vdev->core_device); + ret = vfio_pci_core_register_device(&qat_vdev->core_device); + if (ret) + goto out_put_device; + + return 0; + +out_put_device: + vfio_put_device(&qat_vdev->core_device.vdev); + return ret; +} + +static void qat_vf_vfio_pci_remove(struct pci_dev *pdev) +{ + struct qat_vf_core_device *qat_vdev = qat_vf_drvdata(pdev); + + vfio_pci_core_unregister_device(&qat_vdev->core_device); + vfio_put_device(&qat_vdev->core_device.vdev); +} + +static const struct pci_device_id qat_vf_vfio_pci_table[] = { + /* Intel QAT GEN4 4xxx VF device */ + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x4941) }, + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x4943) }, + { PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_INTEL, 0x4945) }, + {} +}; +MODULE_DEVICE_TABLE(pci, qat_vf_vfio_pci_table); + +static const struct pci_error_handlers qat_vf_err_handlers = { + .reset_done = qat_vf_pci_aer_reset_done, + .error_detected = vfio_pci_core_aer_err_detected, +}; + +static struct pci_driver qat_vf_vfio_pci_driver = { + .name = "qat_vfio_pci", + .id_table = qat_vf_vfio_pci_table, + .probe = qat_vf_vfio_pci_probe, + .remove = qat_vf_vfio_pci_remove, + .err_handler = &qat_vf_err_handlers, + .driver_managed_dma = true, +}; +module_pci_driver(qat_vf_vfio_pci_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Xin Zeng <xin.zeng@intel.com>"); +MODULE_DESCRIPTION("QAT VFIO PCI - VFIO PCI driver with live migration support for Intel(R) QAT GEN4 device family"); +MODULE_IMPORT_NS(CRYPTO_QAT); diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index d94d61b92c..ba0ce0075b 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -778,25 +778,26 @@ static int vfio_pci_count_devs(struct pci_dev *pdev, void *data) } struct vfio_pci_fill_info { - struct vfio_pci_dependent_device __user *devices; - struct vfio_pci_dependent_device __user *devices_end; struct vfio_device *vdev; + struct vfio_pci_dependent_device *devices; + int nr_devices; u32 count; u32 flags; }; static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) { - struct vfio_pci_dependent_device info = { - .segment = pci_domain_nr(pdev->bus), - .bus = pdev->bus->number, - .devfn = pdev->devfn, - }; + struct vfio_pci_dependent_device *info; struct vfio_pci_fill_info *fill = data; - fill->count++; - if (fill->devices >= fill->devices_end) - return 0; + /* The topology changed since we counted devices */ + if (fill->count >= fill->nr_devices) + return -EAGAIN; + + info = &fill->devices[fill->count++]; + info->segment = pci_domain_nr(pdev->bus); + info->bus = pdev->bus->number; + info->devfn = pdev->devfn; if (fill->flags & VFIO_PCI_HOT_RESET_FLAG_DEV_ID) { struct iommufd_ctx *iommufd = vfio_iommufd_device_ictx(fill->vdev); @@ -809,19 +810,19 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) */ vdev = vfio_find_device_in_devset(dev_set, &pdev->dev); if (!vdev) { - info.devid = VFIO_PCI_DEVID_NOT_OWNED; + info->devid = VFIO_PCI_DEVID_NOT_OWNED; } else { int id = vfio_iommufd_get_dev_id(vdev, iommufd); if (id > 0) - info.devid = id; + info->devid = id; else if (id == -ENOENT) - info.devid = VFIO_PCI_DEVID_OWNED; + info->devid = VFIO_PCI_DEVID_OWNED; else - info.devid = VFIO_PCI_DEVID_NOT_OWNED; + info->devid = VFIO_PCI_DEVID_NOT_OWNED; } /* If devid is VFIO_PCI_DEVID_NOT_OWNED, clear owned flag. */ - if (info.devid == VFIO_PCI_DEVID_NOT_OWNED) + if (info->devid == VFIO_PCI_DEVID_NOT_OWNED) fill->flags &= ~VFIO_PCI_HOT_RESET_FLAG_DEV_ID_OWNED; } else { struct iommu_group *iommu_group; @@ -830,13 +831,10 @@ static int vfio_pci_fill_devs(struct pci_dev *pdev, void *data) if (!iommu_group) return -EPERM; /* Cannot reset non-isolated devices */ - info.group_id = iommu_group_id(iommu_group); + info->group_id = iommu_group_id(iommu_group); iommu_group_put(iommu_group); } - if (copy_to_user(fill->devices, &info, sizeof(info))) - return -EFAULT; - fill->devices++; return 0; } @@ -1258,10 +1256,11 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info( { unsigned long minsz = offsetofend(struct vfio_pci_hot_reset_info, count); + struct vfio_pci_dependent_device *devices = NULL; struct vfio_pci_hot_reset_info hdr; struct vfio_pci_fill_info fill = {}; bool slot = false; - int ret = 0; + int ret, count = 0; if (copy_from_user(&hdr, arg, minsz)) return -EFAULT; @@ -1277,9 +1276,26 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info( else if (pci_probe_reset_bus(vdev->pdev->bus)) return -ENODEV; - fill.devices = arg->devices; - fill.devices_end = arg->devices + - (hdr.argsz - sizeof(hdr)) / sizeof(arg->devices[0]); + ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs, + &count, slot); + if (ret) + return ret; + + if (WARN_ON(!count)) /* Should always be at least one */ + return -ERANGE; + + if (count > (hdr.argsz - sizeof(hdr)) / sizeof(*devices)) { + hdr.count = count; + ret = -ENOSPC; + goto header; + } + + devices = kcalloc(count, sizeof(*devices), GFP_KERNEL); + if (!devices) + return -ENOMEM; + + fill.devices = devices; + fill.nr_devices = count; fill.vdev = &vdev->vdev; if (vfio_device_cdev_opened(&vdev->vdev)) @@ -1291,16 +1307,23 @@ static int vfio_pci_ioctl_get_pci_hot_reset_info( &fill, slot); mutex_unlock(&vdev->vdev.dev_set->lock); if (ret) - return ret; + goto out; + + if (copy_to_user(arg->devices, devices, + sizeof(*devices) * fill.count)) { + ret = -EFAULT; + goto out; + } hdr.count = fill.count; hdr.flags = fill.flags; - if (copy_to_user(arg, &hdr, minsz)) - return -EFAULT; - if (fill.count > fill.devices - arg->devices) - return -ENOSPC; - return 0; +header: + if (copy_to_user(arg, &hdr, minsz)) + ret = -EFAULT; +out: + kfree(devices); + return ret; } static int @@ -1587,100 +1610,20 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu } EXPORT_SYMBOL_GPL(vfio_pci_core_write); -/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */ -static int vfio_pci_zap_and_vma_lock(struct vfio_pci_core_device *vdev, bool try) +static void vfio_pci_zap_bars(struct vfio_pci_core_device *vdev) { - struct vfio_pci_mmap_vma *mmap_vma, *tmp; - - /* - * Lock ordering: - * vma_lock is nested under mmap_lock for vm_ops callback paths. - * The memory_lock semaphore is used by both code paths calling - * into this function to zap vmas and the vm_ops.fault callback - * to protect the memory enable state of the device. - * - * When zapping vmas we need to maintain the mmap_lock => vma_lock - * ordering, which requires using vma_lock to walk vma_list to - * acquire an mm, then dropping vma_lock to get the mmap_lock and - * reacquiring vma_lock. This logic is derived from similar - * requirements in uverbs_user_mmap_disassociate(). - * - * mmap_lock must always be the top-level lock when it is taken. - * Therefore we can only hold the memory_lock write lock when - * vma_list is empty, as we'd need to take mmap_lock to clear - * entries. vma_list can only be guaranteed empty when holding - * vma_lock, thus memory_lock is nested under vma_lock. - * - * This enables the vm_ops.fault callback to acquire vma_lock, - * followed by memory_lock read lock, while already holding - * mmap_lock without risk of deadlock. - */ - while (1) { - struct mm_struct *mm = NULL; + struct vfio_device *core_vdev = &vdev->vdev; + loff_t start = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_BAR0_REGION_INDEX); + loff_t end = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_ROM_REGION_INDEX); + loff_t len = end - start; - if (try) { - if (!mutex_trylock(&vdev->vma_lock)) - return 0; - } else { - mutex_lock(&vdev->vma_lock); - } - while (!list_empty(&vdev->vma_list)) { - mmap_vma = list_first_entry(&vdev->vma_list, - struct vfio_pci_mmap_vma, - vma_next); - mm = mmap_vma->vma->vm_mm; - if (mmget_not_zero(mm)) - break; - - list_del(&mmap_vma->vma_next); - kfree(mmap_vma); - mm = NULL; - } - if (!mm) - return 1; - mutex_unlock(&vdev->vma_lock); - - if (try) { - if (!mmap_read_trylock(mm)) { - mmput(mm); - return 0; - } - } else { - mmap_read_lock(mm); - } - if (try) { - if (!mutex_trylock(&vdev->vma_lock)) { - mmap_read_unlock(mm); - mmput(mm); - return 0; - } - } else { - mutex_lock(&vdev->vma_lock); - } - list_for_each_entry_safe(mmap_vma, tmp, - &vdev->vma_list, vma_next) { - struct vm_area_struct *vma = mmap_vma->vma; - - if (vma->vm_mm != mm) - continue; - - list_del(&mmap_vma->vma_next); - kfree(mmap_vma); - - zap_vma_ptes(vma, vma->vm_start, - vma->vm_end - vma->vm_start); - } - mutex_unlock(&vdev->vma_lock); - mmap_read_unlock(mm); - mmput(mm); - } + unmap_mapping_range(core_vdev->inode->i_mapping, start, len, true); } void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev) { - vfio_pci_zap_and_vma_lock(vdev, false); down_write(&vdev->memory_lock); - mutex_unlock(&vdev->vma_lock); + vfio_pci_zap_bars(vdev); } u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev) @@ -1702,99 +1645,56 @@ void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, u16 c up_write(&vdev->memory_lock); } -/* Caller holds vma_lock */ -static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev, - struct vm_area_struct *vma) -{ - struct vfio_pci_mmap_vma *mmap_vma; - - mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL_ACCOUNT); - if (!mmap_vma) - return -ENOMEM; - - mmap_vma->vma = vma; - list_add(&mmap_vma->vma_next, &vdev->vma_list); - - return 0; -} - -/* - * Zap mmaps on open so that we can fault them in on access and therefore - * our vma_list only tracks mappings accessed since last zap. - */ -static void vfio_pci_mmap_open(struct vm_area_struct *vma) -{ - zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); -} - -static void vfio_pci_mmap_close(struct vm_area_struct *vma) +static unsigned long vma_to_pfn(struct vm_area_struct *vma) { struct vfio_pci_core_device *vdev = vma->vm_private_data; - struct vfio_pci_mmap_vma *mmap_vma; + int index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); + u64 pgoff; - mutex_lock(&vdev->vma_lock); - list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) { - if (mmap_vma->vma == vma) { - list_del(&mmap_vma->vma_next); - kfree(mmap_vma); - break; - } - } - mutex_unlock(&vdev->vma_lock); + pgoff = vma->vm_pgoff & + ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); + + return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff; } static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct vfio_pci_core_device *vdev = vma->vm_private_data; - struct vfio_pci_mmap_vma *mmap_vma; - vm_fault_t ret = VM_FAULT_NOPAGE; + unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff; + unsigned long addr = vma->vm_start; + vm_fault_t ret = VM_FAULT_SIGBUS; + + pfn = vma_to_pfn(vma); - mutex_lock(&vdev->vma_lock); down_read(&vdev->memory_lock); - /* - * Memory region cannot be accessed if the low power feature is engaged - * or memory access is disabled. - */ - if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) { - ret = VM_FAULT_SIGBUS; - goto up_out; - } + if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) + goto out_unlock; + + ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff); + if (ret & VM_FAULT_ERROR) + goto out_unlock; /* - * We populate the whole vma on fault, so we need to test whether - * the vma has already been mapped, such as for concurrent faults - * to the same vma. io_remap_pfn_range() will trigger a BUG_ON if - * we ask it to fill the same range again. + * Pre-fault the remainder of the vma, abort further insertions and + * supress error if fault is encountered during pre-fault. */ - list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) { - if (mmap_vma->vma == vma) - goto up_out; - } - - if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, - vma->vm_page_prot)) { - ret = VM_FAULT_SIGBUS; - zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); - goto up_out; - } + for (; addr < vma->vm_end; addr += PAGE_SIZE, pfn++) { + if (addr == vmf->address) + continue; - if (__vfio_pci_add_vma(vdev, vma)) { - ret = VM_FAULT_OOM; - zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); + if (vmf_insert_pfn(vma, addr, pfn) & VM_FAULT_ERROR) + break; } -up_out: +out_unlock: up_read(&vdev->memory_lock); - mutex_unlock(&vdev->vma_lock); + return ret; } static const struct vm_operations_struct vfio_pci_mmap_ops = { - .open = vfio_pci_mmap_open, - .close = vfio_pci_mmap_close, .fault = vfio_pci_mmap_fault, }; @@ -1857,11 +1757,12 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma vma->vm_private_data = vdev; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff; + vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); /* - * See remap_pfn_range(), called from vfio_pci_fault() but we can't - * change vm_flags within the fault handler. Set them now. + * Set vm_flags now, they should not be changed in the fault handler. + * We want the same flags and page protection (decrypted above) as + * io_remap_pfn_range() would set. * * VM_ALLOW_ANY_UNCACHED: The VMA flag is implemented for ARM64, * allowing KVM stage 2 device mapping attributes to use Normal-NC @@ -2179,8 +2080,6 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev) mutex_init(&vdev->ioeventfds_lock); INIT_LIST_HEAD(&vdev->dummy_resources_list); INIT_LIST_HEAD(&vdev->ioeventfds_list); - mutex_init(&vdev->vma_lock); - INIT_LIST_HEAD(&vdev->vma_list); INIT_LIST_HEAD(&vdev->sriov_pfs_item); init_rwsem(&vdev->memory_lock); xa_init(&vdev->ctx); @@ -2196,7 +2095,6 @@ void vfio_pci_core_release_dev(struct vfio_device *core_vdev) mutex_destroy(&vdev->igate); mutex_destroy(&vdev->ioeventfds_lock); - mutex_destroy(&vdev->vma_lock); kfree(vdev->region); kfree(vdev->pm_save); } @@ -2474,26 +2372,15 @@ unwind: return ret; } -/* - * We need to get memory_lock for each device, but devices can share mmap_lock, - * therefore we need to zap and hold the vma_lock for each device, and only then - * get each memory_lock. - */ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, struct vfio_pci_group_info *groups, struct iommufd_ctx *iommufd_ctx) { - struct vfio_pci_core_device *cur_mem; - struct vfio_pci_core_device *cur_vma; - struct vfio_pci_core_device *cur; + struct vfio_pci_core_device *vdev; struct pci_dev *pdev; - bool is_mem = true; int ret; mutex_lock(&dev_set->lock); - cur_mem = list_first_entry(&dev_set->device_list, - struct vfio_pci_core_device, - vdev.dev_set_list); pdev = vfio_pci_dev_set_resettable(dev_set); if (!pdev) { @@ -2510,7 +2397,7 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, if (ret) goto err_unlock; - list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) { + list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list) { bool owned; /* @@ -2534,38 +2421,38 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, * Otherwise, reset is not allowed. */ if (iommufd_ctx) { - int devid = vfio_iommufd_get_dev_id(&cur_vma->vdev, + int devid = vfio_iommufd_get_dev_id(&vdev->vdev, iommufd_ctx); owned = (devid > 0 || devid == -ENOENT); } else { - owned = vfio_dev_in_groups(&cur_vma->vdev, groups); + owned = vfio_dev_in_groups(&vdev->vdev, groups); } if (!owned) { ret = -EINVAL; - goto err_undo; + break; } /* - * Locking multiple devices is prone to deadlock, runaway and - * unwind if we hit contention. + * Take the memory write lock for each device and zap BAR + * mappings to prevent the user accessing the device while in + * reset. Locking multiple devices is prone to deadlock, + * runaway and unwind if we hit contention. */ - if (!vfio_pci_zap_and_vma_lock(cur_vma, true)) { + if (!down_write_trylock(&vdev->memory_lock)) { ret = -EBUSY; - goto err_undo; + break; } + + vfio_pci_zap_bars(vdev); } - cur_vma = NULL; - list_for_each_entry(cur_mem, &dev_set->device_list, vdev.dev_set_list) { - if (!down_write_trylock(&cur_mem->memory_lock)) { - ret = -EBUSY; - goto err_undo; - } - mutex_unlock(&cur_mem->vma_lock); + if (!list_entry_is_head(vdev, + &dev_set->device_list, vdev.dev_set_list)) { + vdev = list_prev_entry(vdev, vdev.dev_set_list); + goto err_undo; } - cur_mem = NULL; /* * The pci_reset_bus() will reset all the devices in the bus. @@ -2576,25 +2463,22 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, * cause the PCI config space reset without restoring the original * state (saved locally in 'vdev->pm_save'). */ - list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) - vfio_pci_set_power_state(cur, PCI_D0); + list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list) + vfio_pci_set_power_state(vdev, PCI_D0); ret = pci_reset_bus(pdev); + vdev = list_last_entry(&dev_set->device_list, + struct vfio_pci_core_device, vdev.dev_set_list); + err_undo: - list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) { - if (cur == cur_mem) - is_mem = false; - if (cur == cur_vma) - break; - if (is_mem) - up_write(&cur->memory_lock); - else - mutex_unlock(&cur->vma_lock); - } + list_for_each_entry_from_reverse(vdev, &dev_set->device_list, + vdev.dev_set_list) + up_write(&vdev->memory_lock); + + list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list) + pm_runtime_put(&vdev->pdev->dev); - list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) - pm_runtime_put(&cur->pdev->dev); err_unlock: mutex_unlock(&dev_set->lock); return ret; diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index fb5392b749..8382c58343 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -23,11 +23,12 @@ #include "vfio_pci_priv.h" struct vfio_pci_irq_ctx { - struct eventfd_ctx *trigger; - struct virqfd *unmask; - struct virqfd *mask; - char *name; - bool masked; + struct vfio_pci_core_device *vdev; + struct eventfd_ctx *trigger; + struct virqfd *unmask; + struct virqfd *mask; + char *name; + bool masked; struct irq_bypass_producer producer; }; @@ -84,19 +85,14 @@ vfio_irq_ctx_alloc(struct vfio_pci_core_device *vdev, unsigned long index) /* * INTx */ -static void vfio_send_intx_eventfd(void *opaque, void *unused) +static void vfio_send_intx_eventfd(void *opaque, void *data) { struct vfio_pci_core_device *vdev = opaque; if (likely(is_intx(vdev) && !vdev->virq_disabled)) { - struct vfio_pci_irq_ctx *ctx; - struct eventfd_ctx *trigger; + struct vfio_pci_irq_ctx *ctx = data; + struct eventfd_ctx *trigger = READ_ONCE(ctx->trigger); - ctx = vfio_irq_ctx_get(vdev, 0); - if (WARN_ON_ONCE(!ctx)) - return; - - trigger = READ_ONCE(ctx->trigger); if (likely(trigger)) eventfd_signal(trigger); } @@ -166,11 +162,11 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev) * a signal is necessary, which can then be handled via a work queue * or directly depending on the caller. */ -static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) +static int vfio_pci_intx_unmask_handler(void *opaque, void *data) { struct vfio_pci_core_device *vdev = opaque; struct pci_dev *pdev = vdev->pdev; - struct vfio_pci_irq_ctx *ctx; + struct vfio_pci_irq_ctx *ctx = data; unsigned long flags; int ret = 0; @@ -186,10 +182,6 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) goto out_unlock; } - ctx = vfio_irq_ctx_get(vdev, 0); - if (WARN_ON_ONCE(!ctx)) - goto out_unlock; - if (ctx->masked && !vdev->virq_disabled) { /* * A pending interrupt here would immediately trigger, @@ -213,10 +205,12 @@ out_unlock: static void __vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) { + struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0); + lockdep_assert_held(&vdev->igate); - if (vfio_pci_intx_unmask_handler(vdev, NULL) > 0) - vfio_send_intx_eventfd(vdev, NULL); + if (vfio_pci_intx_unmask_handler(vdev, ctx) > 0) + vfio_send_intx_eventfd(vdev, ctx); } void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) @@ -228,15 +222,11 @@ void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev) static irqreturn_t vfio_intx_handler(int irq, void *dev_id) { - struct vfio_pci_core_device *vdev = dev_id; - struct vfio_pci_irq_ctx *ctx; + struct vfio_pci_irq_ctx *ctx = dev_id; + struct vfio_pci_core_device *vdev = ctx->vdev; unsigned long flags; int ret = IRQ_NONE; - ctx = vfio_irq_ctx_get(vdev, 0); - if (WARN_ON_ONCE(!ctx)) - return ret; - spin_lock_irqsave(&vdev->irqlock, flags); if (!vdev->pci_2_3) { @@ -252,7 +242,7 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id) spin_unlock_irqrestore(&vdev->irqlock, flags); if (ret == IRQ_HANDLED) - vfio_send_intx_eventfd(vdev, NULL); + vfio_send_intx_eventfd(vdev, ctx); return ret; } @@ -277,11 +267,14 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev, return -ENOMEM; ctx = vfio_irq_ctx_alloc(vdev, 0); - if (!ctx) + if (!ctx) { + kfree(name); return -ENOMEM; + } ctx->name = name; ctx->trigger = trigger; + ctx->vdev = vdev; /* * Fill the initial masked state based on virq_disabled. After @@ -312,7 +305,7 @@ static int vfio_intx_enable(struct vfio_pci_core_device *vdev, vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX; ret = request_irq(pdev->irq, vfio_intx_handler, - irqflags, ctx->name, vdev); + irqflags, ctx->name, ctx); if (ret) { vdev->irq_type = VFIO_PCI_NUM_IRQS; kfree(name); @@ -358,7 +351,7 @@ static void vfio_intx_disable(struct vfio_pci_core_device *vdev) if (ctx) { vfio_virqfd_disable(&ctx->unmask); vfio_virqfd_disable(&ctx->mask); - free_irq(pdev->irq, vdev); + free_irq(pdev->irq, ctx); if (ctx->trigger) eventfd_ctx_put(ctx->trigger); kfree(ctx->name); @@ -606,7 +599,7 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev, if (fd >= 0) return vfio_virqfd_enable((void *) vdev, vfio_pci_intx_unmask_handler, - vfio_send_intx_eventfd, NULL, + vfio_send_intx_eventfd, ctx, &ctx->unmask, fd); vfio_virqfd_disable(&ctx->unmask); @@ -673,11 +666,11 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev, return -EINVAL; if (flags & VFIO_IRQ_SET_DATA_NONE) { - vfio_send_intx_eventfd(vdev, NULL); + vfio_send_intx_eventfd(vdev, vfio_irq_ctx_get(vdev, 0)); } else if (flags & VFIO_IRQ_SET_DATA_BOOL) { uint8_t trigger = *(uint8_t *)data; if (trigger) - vfio_send_intx_eventfd(vdev, NULL); + vfio_send_intx_eventfd(vdev, vfio_irq_ctx_get(vdev, 0)); } return 0; } |