summaryrefslogtreecommitdiffstats
path: root/drivers/vfio/container.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
commitace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
treeb2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/vfio/container.c
parentInitial commit. (diff)
downloadlinux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/vfio/container.c')
-rw-r--r--drivers/vfio/container.c607
1 files changed, 607 insertions, 0 deletions
diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
new file mode 100644
index 0000000000..d53d08f169
--- /dev/null
+++ b/drivers/vfio/container.c
@@ -0,0 +1,607 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
+ *
+ * VFIO container (/dev/vfio/vfio)
+ */
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/capability.h>
+#include <linux/iommu.h>
+#include <linux/miscdevice.h>
+#include <linux/vfio.h>
+#include <uapi/linux/vfio.h>
+
+#include "vfio.h"
+
+struct vfio_container {
+ struct kref kref;
+ struct list_head group_list;
+ struct rw_semaphore group_lock;
+ struct vfio_iommu_driver *iommu_driver;
+ void *iommu_data;
+ bool noiommu;
+};
+
+static struct vfio {
+ struct list_head iommu_drivers_list;
+ struct mutex iommu_drivers_lock;
+} vfio;
+
+static void *vfio_noiommu_open(unsigned long arg)
+{
+ if (arg != VFIO_NOIOMMU_IOMMU)
+ return ERR_PTR(-EINVAL);
+ if (!capable(CAP_SYS_RAWIO))
+ return ERR_PTR(-EPERM);
+
+ return NULL;
+}
+
+static void vfio_noiommu_release(void *iommu_data)
+{
+}
+
+static long vfio_noiommu_ioctl(void *iommu_data,
+ unsigned int cmd, unsigned long arg)
+{
+ if (cmd == VFIO_CHECK_EXTENSION)
+ return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
+
+ return -ENOTTY;
+}
+
+static int vfio_noiommu_attach_group(void *iommu_data,
+ struct iommu_group *iommu_group, enum vfio_group_type type)
+{
+ return 0;
+}
+
+static void vfio_noiommu_detach_group(void *iommu_data,
+ struct iommu_group *iommu_group)
+{
+}
+
+static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
+ .name = "vfio-noiommu",
+ .owner = THIS_MODULE,
+ .open = vfio_noiommu_open,
+ .release = vfio_noiommu_release,
+ .ioctl = vfio_noiommu_ioctl,
+ .attach_group = vfio_noiommu_attach_group,
+ .detach_group = vfio_noiommu_detach_group,
+};
+
+/*
+ * Only noiommu containers can use vfio-noiommu and noiommu containers can only
+ * use vfio-noiommu.
+ */
+static bool vfio_iommu_driver_allowed(struct vfio_container *container,
+ const struct vfio_iommu_driver *driver)
+{
+ if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
+ return true;
+ return container->noiommu == (driver->ops == &vfio_noiommu_ops);
+}
+
+/*
+ * IOMMU driver registration
+ */
+int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
+{
+ struct vfio_iommu_driver *driver, *tmp;
+
+ if (WARN_ON(!ops->register_device != !ops->unregister_device))
+ return -EINVAL;
+
+ driver = kzalloc(sizeof(*driver), GFP_KERNEL);
+ if (!driver)
+ return -ENOMEM;
+
+ driver->ops = ops;
+
+ mutex_lock(&vfio.iommu_drivers_lock);
+
+ /* Check for duplicates */
+ list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
+ if (tmp->ops == ops) {
+ mutex_unlock(&vfio.iommu_drivers_lock);
+ kfree(driver);
+ return -EINVAL;
+ }
+ }
+
+ list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
+
+ mutex_unlock(&vfio.iommu_drivers_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
+
+void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
+{
+ struct vfio_iommu_driver *driver;
+
+ mutex_lock(&vfio.iommu_drivers_lock);
+ list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
+ if (driver->ops == ops) {
+ list_del(&driver->vfio_next);
+ mutex_unlock(&vfio.iommu_drivers_lock);
+ kfree(driver);
+ return;
+ }
+ }
+ mutex_unlock(&vfio.iommu_drivers_lock);
+}
+EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
+
+/*
+ * Container objects - containers are created when /dev/vfio/vfio is
+ * opened, but their lifecycle extends until the last user is done, so
+ * it's freed via kref. Must support container/group/device being
+ * closed in any order.
+ */
+static void vfio_container_release(struct kref *kref)
+{
+ struct vfio_container *container;
+ container = container_of(kref, struct vfio_container, kref);
+
+ kfree(container);
+}
+
+static void vfio_container_get(struct vfio_container *container)
+{
+ kref_get(&container->kref);
+}
+
+static void vfio_container_put(struct vfio_container *container)
+{
+ kref_put(&container->kref, vfio_container_release);
+}
+
+void vfio_device_container_register(struct vfio_device *device)
+{
+ struct vfio_iommu_driver *iommu_driver =
+ device->group->container->iommu_driver;
+
+ if (iommu_driver && iommu_driver->ops->register_device)
+ iommu_driver->ops->register_device(
+ device->group->container->iommu_data, device);
+}
+
+void vfio_device_container_unregister(struct vfio_device *device)
+{
+ struct vfio_iommu_driver *iommu_driver =
+ device->group->container->iommu_driver;
+
+ if (iommu_driver && iommu_driver->ops->unregister_device)
+ iommu_driver->ops->unregister_device(
+ device->group->container->iommu_data, device);
+}
+
+static long
+vfio_container_ioctl_check_extension(struct vfio_container *container,
+ unsigned long arg)
+{
+ struct vfio_iommu_driver *driver;
+ long ret = 0;
+
+ down_read(&container->group_lock);
+
+ driver = container->iommu_driver;
+
+ switch (arg) {
+ /* No base extensions yet */
+ default:
+ /*
+ * If no driver is set, poll all registered drivers for
+ * extensions and return the first positive result. If
+ * a driver is already set, further queries will be passed
+ * only to that driver.
+ */
+ if (!driver) {
+ mutex_lock(&vfio.iommu_drivers_lock);
+ list_for_each_entry(driver, &vfio.iommu_drivers_list,
+ vfio_next) {
+
+ if (!list_empty(&container->group_list) &&
+ !vfio_iommu_driver_allowed(container,
+ driver))
+ continue;
+ if (!try_module_get(driver->ops->owner))
+ continue;
+
+ ret = driver->ops->ioctl(NULL,
+ VFIO_CHECK_EXTENSION,
+ arg);
+ module_put(driver->ops->owner);
+ if (ret > 0)
+ break;
+ }
+ mutex_unlock(&vfio.iommu_drivers_lock);
+ } else
+ ret = driver->ops->ioctl(container->iommu_data,
+ VFIO_CHECK_EXTENSION, arg);
+ }
+
+ up_read(&container->group_lock);
+
+ return ret;
+}
+
+/* hold write lock on container->group_lock */
+static int __vfio_container_attach_groups(struct vfio_container *container,
+ struct vfio_iommu_driver *driver,
+ void *data)
+{
+ struct vfio_group *group;
+ int ret = -ENODEV;
+
+ list_for_each_entry(group, &container->group_list, container_next) {
+ ret = driver->ops->attach_group(data, group->iommu_group,
+ group->type);
+ if (ret)
+ goto unwind;
+ }
+
+ return ret;
+
+unwind:
+ list_for_each_entry_continue_reverse(group, &container->group_list,
+ container_next) {
+ driver->ops->detach_group(data, group->iommu_group);
+ }
+
+ return ret;
+}
+
+static long vfio_ioctl_set_iommu(struct vfio_container *container,
+ unsigned long arg)
+{
+ struct vfio_iommu_driver *driver;
+ long ret = -ENODEV;
+
+ down_write(&container->group_lock);
+
+ /*
+ * The container is designed to be an unprivileged interface while
+ * the group can be assigned to specific users. Therefore, only by
+ * adding a group to a container does the user get the privilege of
+ * enabling the iommu, which may allocate finite resources. There
+ * is no unset_iommu, but by removing all the groups from a container,
+ * the container is deprivileged and returns to an unset state.
+ */
+ if (list_empty(&container->group_list) || container->iommu_driver) {
+ up_write(&container->group_lock);
+ return -EINVAL;
+ }
+
+ mutex_lock(&vfio.iommu_drivers_lock);
+ list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
+ void *data;
+
+ if (!vfio_iommu_driver_allowed(container, driver))
+ continue;
+ if (!try_module_get(driver->ops->owner))
+ continue;
+
+ /*
+ * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
+ * so test which iommu driver reported support for this
+ * extension and call open on them. We also pass them the
+ * magic, allowing a single driver to support multiple
+ * interfaces if they'd like.
+ */
+ if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
+ module_put(driver->ops->owner);
+ continue;
+ }
+
+ data = driver->ops->open(arg);
+ if (IS_ERR(data)) {
+ ret = PTR_ERR(data);
+ module_put(driver->ops->owner);
+ continue;
+ }
+
+ ret = __vfio_container_attach_groups(container, driver, data);
+ if (ret) {
+ driver->ops->release(data);
+ module_put(driver->ops->owner);
+ continue;
+ }
+
+ container->iommu_driver = driver;
+ container->iommu_data = data;
+ break;
+ }
+
+ mutex_unlock(&vfio.iommu_drivers_lock);
+ up_write(&container->group_lock);
+
+ return ret;
+}
+
+static long vfio_fops_unl_ioctl(struct file *filep,
+ unsigned int cmd, unsigned long arg)
+{
+ struct vfio_container *container = filep->private_data;
+ struct vfio_iommu_driver *driver;
+ void *data;
+ long ret = -EINVAL;
+
+ if (!container)
+ return ret;
+
+ switch (cmd) {
+ case VFIO_GET_API_VERSION:
+ ret = VFIO_API_VERSION;
+ break;
+ case VFIO_CHECK_EXTENSION:
+ ret = vfio_container_ioctl_check_extension(container, arg);
+ break;
+ case VFIO_SET_IOMMU:
+ ret = vfio_ioctl_set_iommu(container, arg);
+ break;
+ default:
+ driver = container->iommu_driver;
+ data = container->iommu_data;
+
+ if (driver) /* passthrough all unrecognized ioctls */
+ ret = driver->ops->ioctl(data, cmd, arg);
+ }
+
+ return ret;
+}
+
+static int vfio_fops_open(struct inode *inode, struct file *filep)
+{
+ struct vfio_container *container;
+
+ container = kzalloc(sizeof(*container), GFP_KERNEL_ACCOUNT);
+ if (!container)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&container->group_list);
+ init_rwsem(&container->group_lock);
+ kref_init(&container->kref);
+
+ filep->private_data = container;
+
+ return 0;
+}
+
+static int vfio_fops_release(struct inode *inode, struct file *filep)
+{
+ struct vfio_container *container = filep->private_data;
+
+ filep->private_data = NULL;
+
+ vfio_container_put(container);
+
+ return 0;
+}
+
+static const struct file_operations vfio_fops = {
+ .owner = THIS_MODULE,
+ .open = vfio_fops_open,
+ .release = vfio_fops_release,
+ .unlocked_ioctl = vfio_fops_unl_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+};
+
+struct vfio_container *vfio_container_from_file(struct file *file)
+{
+ struct vfio_container *container;
+
+ /* Sanity check, is this really our fd? */
+ if (file->f_op != &vfio_fops)
+ return NULL;
+
+ container = file->private_data;
+ WARN_ON(!container); /* fget ensures we don't race vfio_release */
+ return container;
+}
+
+static struct miscdevice vfio_dev = {
+ .minor = VFIO_MINOR,
+ .name = "vfio",
+ .fops = &vfio_fops,
+ .nodename = "vfio/vfio",
+ .mode = S_IRUGO | S_IWUGO,
+};
+
+int vfio_container_attach_group(struct vfio_container *container,
+ struct vfio_group *group)
+{
+ struct vfio_iommu_driver *driver;
+ int ret = 0;
+
+ lockdep_assert_held(&group->group_lock);
+
+ if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ down_write(&container->group_lock);
+
+ /* Real groups and fake groups cannot mix */
+ if (!list_empty(&container->group_list) &&
+ container->noiommu != (group->type == VFIO_NO_IOMMU)) {
+ ret = -EPERM;
+ goto out_unlock_container;
+ }
+
+ if (group->type == VFIO_IOMMU) {
+ ret = iommu_group_claim_dma_owner(group->iommu_group, group);
+ if (ret)
+ goto out_unlock_container;
+ }
+
+ driver = container->iommu_driver;
+ if (driver) {
+ ret = driver->ops->attach_group(container->iommu_data,
+ group->iommu_group,
+ group->type);
+ if (ret) {
+ if (group->type == VFIO_IOMMU)
+ iommu_group_release_dma_owner(
+ group->iommu_group);
+ goto out_unlock_container;
+ }
+ }
+
+ group->container = container;
+ group->container_users = 1;
+ container->noiommu = (group->type == VFIO_NO_IOMMU);
+ list_add(&group->container_next, &container->group_list);
+
+ /* Get a reference on the container and mark a user within the group */
+ vfio_container_get(container);
+
+out_unlock_container:
+ up_write(&container->group_lock);
+ return ret;
+}
+
+void vfio_group_detach_container(struct vfio_group *group)
+{
+ struct vfio_container *container = group->container;
+ struct vfio_iommu_driver *driver;
+
+ lockdep_assert_held(&group->group_lock);
+ WARN_ON(group->container_users != 1);
+
+ down_write(&container->group_lock);
+
+ driver = container->iommu_driver;
+ if (driver)
+ driver->ops->detach_group(container->iommu_data,
+ group->iommu_group);
+
+ if (group->type == VFIO_IOMMU)
+ iommu_group_release_dma_owner(group->iommu_group);
+
+ group->container = NULL;
+ group->container_users = 0;
+ list_del(&group->container_next);
+
+ /* Detaching the last group deprivileges a container, remove iommu */
+ if (driver && list_empty(&container->group_list)) {
+ driver->ops->release(container->iommu_data);
+ module_put(driver->ops->owner);
+ container->iommu_driver = NULL;
+ container->iommu_data = NULL;
+ }
+
+ up_write(&container->group_lock);
+
+ vfio_container_put(container);
+}
+
+int vfio_group_use_container(struct vfio_group *group)
+{
+ lockdep_assert_held(&group->group_lock);
+
+ /*
+ * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
+ * VFIO_SET_IOMMU hasn't been done yet.
+ */
+ if (!group->container->iommu_driver)
+ return -EINVAL;
+
+ if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ get_file(group->opened_file);
+ group->container_users++;
+ return 0;
+}
+
+void vfio_group_unuse_container(struct vfio_group *group)
+{
+ lockdep_assert_held(&group->group_lock);
+
+ WARN_ON(group->container_users <= 1);
+ group->container_users--;
+ fput(group->opened_file);
+}
+
+int vfio_device_container_pin_pages(struct vfio_device *device,
+ dma_addr_t iova, int npage,
+ int prot, struct page **pages)
+{
+ struct vfio_container *container = device->group->container;
+ struct iommu_group *iommu_group = device->group->iommu_group;
+ struct vfio_iommu_driver *driver = container->iommu_driver;
+
+ if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
+ return -E2BIG;
+
+ if (unlikely(!driver || !driver->ops->pin_pages))
+ return -ENOTTY;
+ return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
+ npage, prot, pages);
+}
+
+void vfio_device_container_unpin_pages(struct vfio_device *device,
+ dma_addr_t iova, int npage)
+{
+ struct vfio_container *container = device->group->container;
+
+ if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
+ return;
+
+ container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
+ npage);
+}
+
+int vfio_device_container_dma_rw(struct vfio_device *device,
+ dma_addr_t iova, void *data,
+ size_t len, bool write)
+{
+ struct vfio_container *container = device->group->container;
+ struct vfio_iommu_driver *driver = container->iommu_driver;
+
+ if (unlikely(!driver || !driver->ops->dma_rw))
+ return -ENOTTY;
+ return driver->ops->dma_rw(container->iommu_data, iova, data, len,
+ write);
+}
+
+int __init vfio_container_init(void)
+{
+ int ret;
+
+ mutex_init(&vfio.iommu_drivers_lock);
+ INIT_LIST_HEAD(&vfio.iommu_drivers_list);
+
+ ret = misc_register(&vfio_dev);
+ if (ret) {
+ pr_err("vfio: misc device register failed\n");
+ return ret;
+ }
+
+ if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
+ ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
+ if (ret)
+ goto err_misc;
+ }
+ return 0;
+
+err_misc:
+ misc_deregister(&vfio_dev);
+ return ret;
+}
+
+void vfio_container_cleanup(void)
+{
+ if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
+ vfio_unregister_iommu_driver(&vfio_noiommu_ops);
+ misc_deregister(&vfio_dev);
+ mutex_destroy(&vfio.iommu_drivers_lock);
+}
+
+MODULE_ALIAS_MISCDEV(VFIO_MINOR);
+MODULE_ALIAS("devname:vfio/vfio");