diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:35:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:39:31 +0000 |
commit | 85c675d0d09a45a135bddd15d7b385f8758c32fb (patch) | |
tree | 76267dbc9b9a130337be3640948fe397b04ac629 /drivers/iommu/iommufd | |
parent | Adding upstream version 6.6.15. (diff) | |
download | linux-85c675d0d09a45a135bddd15d7b385f8758c32fb.tar.xz linux-85c675d0d09a45a135bddd15d7b385f8758c32fb.zip |
Adding upstream version 6.7.7.upstream/6.7.7
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/iommu/iommufd')
-rw-r--r-- | drivers/iommu/iommufd/Makefile | 1 | ||||
-rw-r--r-- | drivers/iommu/iommufd/device.c | 188 | ||||
-rw-r--r-- | drivers/iommu/iommufd/hw_pagetable.c | 307 | ||||
-rw-r--r-- | drivers/iommu/iommufd/io_pagetable.c | 172 | ||||
-rw-r--r-- | drivers/iommu/iommufd/ioas.c | 14 | ||||
-rw-r--r-- | drivers/iommu/iommufd/iommufd_private.h | 152 | ||||
-rw-r--r-- | drivers/iommu/iommufd/iommufd_test.h | 39 | ||||
-rw-r--r-- | drivers/iommu/iommufd/iova_bitmap.c | 474 | ||||
-rw-r--r-- | drivers/iommu/iommufd/main.c | 163 | ||||
-rw-r--r-- | drivers/iommu/iommufd/selftest.c | 347 | ||||
-rw-r--r-- | drivers/iommu/iommufd/vfio_compat.c | 24 |
11 files changed, 1604 insertions, 277 deletions
diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile index 8aeba81800..34b4461469 100644 --- a/drivers/iommu/iommufd/Makefile +++ b/drivers/iommu/iommufd/Makefile @@ -11,3 +11,4 @@ iommufd-y := \ iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o obj-$(CONFIG_IOMMUFD) += iommufd.o +obj-$(CONFIG_IOMMUFD_DRIVER) += iova_bitmap.o diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index ce78c36715..873630c111 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -293,7 +293,7 @@ u32 iommufd_device_to_id(struct iommufd_device *idev) EXPORT_SYMBOL_NS_GPL(iommufd_device_to_id, IOMMUFD); static int iommufd_group_setup_msi(struct iommufd_group *igroup, - struct iommufd_hw_pagetable *hwpt) + struct iommufd_hwpt_paging *hwpt_paging) { phys_addr_t sw_msi_start = igroup->sw_msi_start; int rc; @@ -311,8 +311,9 @@ static int iommufd_group_setup_msi(struct iommufd_group *igroup, * matches what the IRQ layer actually expects in a newly created * domain. */ - if (sw_msi_start != PHYS_ADDR_MAX && !hwpt->msi_cookie) { - rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start); + if (sw_msi_start != PHYS_ADDR_MAX && !hwpt_paging->msi_cookie) { + rc = iommu_get_msi_cookie(hwpt_paging->common.domain, + sw_msi_start); if (rc) return rc; @@ -320,7 +321,31 @@ static int iommufd_group_setup_msi(struct iommufd_group *igroup, * iommu_get_msi_cookie() can only be called once per domain, * it returns -EBUSY on later calls. */ - hwpt->msi_cookie = true; + hwpt_paging->msi_cookie = true; + } + return 0; +} + +static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging, + struct iommufd_device *idev) +{ + int rc; + + lockdep_assert_held(&idev->igroup->lock); + + rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt, + idev->dev, + &idev->igroup->sw_msi_start); + if (rc) + return rc; + + if (list_empty(&idev->igroup->device_list)) { + rc = iommufd_group_setup_msi(idev->igroup, hwpt_paging); + if (rc) { + iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, + idev->dev); + return rc; + } } return 0; } @@ -337,18 +362,12 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, goto err_unlock; } - /* Try to upgrade the domain we have */ - if (idev->enforce_cache_coherency) { - rc = iommufd_hw_pagetable_enforce_cc(hwpt); + if (hwpt_is_paging(hwpt)) { + rc = iommufd_hwpt_paging_attach(to_hwpt_paging(hwpt), idev); if (rc) goto err_unlock; } - rc = iopt_table_enforce_dev_resv_regions(&hwpt->ioas->iopt, idev->dev, - &idev->igroup->sw_msi_start); - if (rc) - goto err_unlock; - /* * Only attach to the group once for the first device that is in the * group. All the other devices will follow this attachment. The user @@ -357,10 +376,6 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, * attachment. */ if (list_empty(&idev->igroup->device_list)) { - rc = iommufd_group_setup_msi(idev->igroup, hwpt); - if (rc) - goto err_unresv; - rc = iommu_attach_group(hwpt->domain, idev->igroup->group); if (rc) goto err_unresv; @@ -371,7 +386,9 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, mutex_unlock(&idev->igroup->lock); return 0; err_unresv: - iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); + if (hwpt_is_paging(hwpt)) + iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt, + idev->dev); err_unlock: mutex_unlock(&idev->igroup->lock); return rc; @@ -388,7 +405,9 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev) iommu_detach_group(hwpt->domain, idev->igroup->group); idev->igroup->hwpt = NULL; } - iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); + if (hwpt_is_paging(hwpt)) + iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt, + idev->dev); mutex_unlock(&idev->igroup->lock); /* Caller must destroy hwpt */ @@ -407,14 +426,55 @@ iommufd_device_do_attach(struct iommufd_device *idev, return NULL; } +static void +iommufd_group_remove_reserved_iova(struct iommufd_group *igroup, + struct iommufd_hwpt_paging *hwpt_paging) +{ + struct iommufd_device *cur; + + lockdep_assert_held(&igroup->lock); + + list_for_each_entry(cur, &igroup->device_list, group_item) + iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev); +} + +static int +iommufd_group_do_replace_paging(struct iommufd_group *igroup, + struct iommufd_hwpt_paging *hwpt_paging) +{ + struct iommufd_hw_pagetable *old_hwpt = igroup->hwpt; + struct iommufd_device *cur; + int rc; + + lockdep_assert_held(&igroup->lock); + + if (!hwpt_is_paging(old_hwpt) || + hwpt_paging->ioas != to_hwpt_paging(old_hwpt)->ioas) { + list_for_each_entry(cur, &igroup->device_list, group_item) { + rc = iopt_table_enforce_dev_resv_regions( + &hwpt_paging->ioas->iopt, cur->dev, NULL); + if (rc) + goto err_unresv; + } + } + + rc = iommufd_group_setup_msi(igroup, hwpt_paging); + if (rc) + goto err_unresv; + return 0; + +err_unresv: + iommufd_group_remove_reserved_iova(igroup, hwpt_paging); + return rc; +} + static struct iommufd_hw_pagetable * iommufd_device_do_replace(struct iommufd_device *idev, struct iommufd_hw_pagetable *hwpt) { struct iommufd_group *igroup = idev->igroup; struct iommufd_hw_pagetable *old_hwpt; - unsigned int num_devices = 0; - struct iommufd_device *cur; + unsigned int num_devices; int rc; mutex_lock(&idev->igroup->lock); @@ -429,42 +489,27 @@ iommufd_device_do_replace(struct iommufd_device *idev, return NULL; } - /* Try to upgrade the domain we have */ - list_for_each_entry(cur, &igroup->device_list, group_item) { - num_devices++; - if (cur->enforce_cache_coherency) { - rc = iommufd_hw_pagetable_enforce_cc(hwpt); - if (rc) - goto err_unlock; - } - } - old_hwpt = igroup->hwpt; - if (hwpt->ioas != old_hwpt->ioas) { - list_for_each_entry(cur, &igroup->device_list, group_item) { - rc = iopt_table_enforce_dev_resv_regions( - &hwpt->ioas->iopt, cur->dev, NULL); - if (rc) - goto err_unresv; - } + if (hwpt_is_paging(hwpt)) { + rc = iommufd_group_do_replace_paging(igroup, + to_hwpt_paging(hwpt)); + if (rc) + goto err_unlock; } - rc = iommufd_group_setup_msi(idev->igroup, hwpt); - if (rc) - goto err_unresv; - rc = iommu_group_replace_domain(igroup->group, hwpt->domain); if (rc) goto err_unresv; - if (hwpt->ioas != old_hwpt->ioas) { - list_for_each_entry(cur, &igroup->device_list, group_item) - iopt_remove_reserved_iova(&old_hwpt->ioas->iopt, - cur->dev); - } + if (hwpt_is_paging(old_hwpt) && + (!hwpt_is_paging(hwpt) || + to_hwpt_paging(hwpt)->ioas != to_hwpt_paging(old_hwpt)->ioas)) + iommufd_group_remove_reserved_iova(igroup, + to_hwpt_paging(old_hwpt)); igroup->hwpt = hwpt; + num_devices = list_count_nodes(&igroup->device_list); /* * Move the refcounts held by the device_list to the new hwpt. Retain a * refcount for this thread as the caller will free it. @@ -478,8 +523,9 @@ iommufd_device_do_replace(struct iommufd_device *idev, /* Caller must destroy old_hwpt */ return old_hwpt; err_unresv: - list_for_each_entry(cur, &igroup->device_list, group_item) - iopt_remove_reserved_iova(&hwpt->ioas->iopt, cur->dev); + if (hwpt_is_paging(hwpt)) + iommufd_group_remove_reserved_iova(igroup, + to_hwpt_paging(old_hwpt)); err_unlock: mutex_unlock(&idev->igroup->lock); return ERR_PTR(rc); @@ -507,6 +553,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, */ bool immediate_attach = do_attach == iommufd_device_do_attach; struct iommufd_hw_pagetable *destroy_hwpt; + struct iommufd_hwpt_paging *hwpt_paging; struct iommufd_hw_pagetable *hwpt; /* @@ -515,15 +562,16 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, * other. */ mutex_lock(&ioas->mutex); - list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) { - if (!hwpt->auto_domain) + list_for_each_entry(hwpt_paging, &ioas->hwpt_list, hwpt_item) { + if (!hwpt_paging->auto_domain) continue; + hwpt = &hwpt_paging->common; if (!iommufd_lock_obj(&hwpt->obj)) continue; destroy_hwpt = (*do_attach)(idev, hwpt); if (IS_ERR(destroy_hwpt)) { - iommufd_put_object(&hwpt->obj); + iommufd_put_object(idev->ictx, &hwpt->obj); /* * -EINVAL means the domain is incompatible with the * device. Other error codes should propagate to @@ -535,16 +583,17 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, goto out_unlock; } *pt_id = hwpt->obj.id; - iommufd_put_object(&hwpt->obj); + iommufd_put_object(idev->ictx, &hwpt->obj); goto out_unlock; } - hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev, - immediate_attach); - if (IS_ERR(hwpt)) { - destroy_hwpt = ERR_CAST(hwpt); + hwpt_paging = iommufd_hwpt_paging_alloc(idev->ictx, ioas, idev, 0, + immediate_attach, NULL); + if (IS_ERR(hwpt_paging)) { + destroy_hwpt = ERR_CAST(hwpt_paging); goto out_unlock; } + hwpt = &hwpt_paging->common; if (!immediate_attach) { destroy_hwpt = (*do_attach)(idev, hwpt); @@ -554,7 +603,7 @@ iommufd_device_auto_get_domain(struct iommufd_device *idev, destroy_hwpt = NULL; } - hwpt->auto_domain = true; + hwpt_paging->auto_domain = true; *pt_id = hwpt->obj.id; iommufd_object_finalize(idev->ictx, &hwpt->obj); @@ -579,7 +628,8 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, return PTR_ERR(pt_obj); switch (pt_obj->type) { - case IOMMUFD_OBJ_HW_PAGETABLE: { + case IOMMUFD_OBJ_HWPT_NESTED: + case IOMMUFD_OBJ_HWPT_PAGING: { struct iommufd_hw_pagetable *hwpt = container_of(pt_obj, struct iommufd_hw_pagetable, obj); @@ -602,7 +652,7 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, destroy_hwpt = ERR_PTR(-EINVAL); goto out_put_pt_obj; } - iommufd_put_object(pt_obj); + iommufd_put_object(idev->ictx, pt_obj); /* This destruction has to be after we unlock everything */ if (destroy_hwpt) @@ -610,15 +660,15 @@ static int iommufd_device_change_pt(struct iommufd_device *idev, u32 *pt_id, return 0; out_put_pt_obj: - iommufd_put_object(pt_obj); + iommufd_put_object(idev->ictx, pt_obj); return PTR_ERR(destroy_hwpt); } /** * iommufd_device_attach - Connect a device to an iommu_domain * @idev: device to attach - * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE - * Output the IOMMUFD_OBJ_HW_PAGETABLE ID + * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING + * Output the IOMMUFD_OBJ_HWPT_PAGING ID * * This connects the device to an iommu_domain, either automatically or manually * selected. Once this completes the device could do DMA. @@ -646,8 +696,8 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); /** * iommufd_device_replace - Change the device's iommu_domain * @idev: device to change - * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE - * Output the IOMMUFD_OBJ_HW_PAGETABLE ID + * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HWPT_PAGING + * Output the IOMMUFD_OBJ_HWPT_PAGING ID * * This is the same as:: * @@ -742,7 +792,7 @@ static int iommufd_access_change_ioas_id(struct iommufd_access *access, u32 id) if (IS_ERR(ioas)) return PTR_ERR(ioas); rc = iommufd_access_change_ioas(access, ioas); - iommufd_put_object(&ioas->obj); + iommufd_put_object(access->ictx, &ioas->obj); return rc; } @@ -891,7 +941,7 @@ void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, access->ops->unmap(access->data, iova, length); - iommufd_put_object(&access->obj); + iommufd_put_object(access->ictx, &access->obj); xa_lock(&ioas->iopt.access_list); } xa_unlock(&ioas->iopt.access_list); @@ -1185,10 +1235,14 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd) */ cmd->data_len = data_len; + cmd->out_capabilities = 0; + if (device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) + cmd->out_capabilities |= IOMMU_HW_CAP_DIRTY_TRACKING; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_free: kfree(data); out_put: - iommufd_put_object(&idev->obj); + iommufd_put_object(ucmd->ictx, &idev->obj); return rc; } diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index cf2c1504e2..6f680959b2 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -5,62 +5,87 @@ #include <linux/iommu.h> #include <uapi/linux/iommufd.h> +#include "../iommu-priv.h" #include "iommufd_private.h" -void iommufd_hw_pagetable_destroy(struct iommufd_object *obj) +void iommufd_hwpt_paging_destroy(struct iommufd_object *obj) { - struct iommufd_hw_pagetable *hwpt = - container_of(obj, struct iommufd_hw_pagetable, obj); + struct iommufd_hwpt_paging *hwpt_paging = + container_of(obj, struct iommufd_hwpt_paging, common.obj); - if (!list_empty(&hwpt->hwpt_item)) { - mutex_lock(&hwpt->ioas->mutex); - list_del(&hwpt->hwpt_item); - mutex_unlock(&hwpt->ioas->mutex); + if (!list_empty(&hwpt_paging->hwpt_item)) { + mutex_lock(&hwpt_paging->ioas->mutex); + list_del(&hwpt_paging->hwpt_item); + mutex_unlock(&hwpt_paging->ioas->mutex); - iopt_table_remove_domain(&hwpt->ioas->iopt, hwpt->domain); + iopt_table_remove_domain(&hwpt_paging->ioas->iopt, + hwpt_paging->common.domain); } - if (hwpt->domain) - iommu_domain_free(hwpt->domain); + if (hwpt_paging->common.domain) + iommu_domain_free(hwpt_paging->common.domain); - refcount_dec(&hwpt->ioas->obj.users); + refcount_dec(&hwpt_paging->ioas->obj.users); } -void iommufd_hw_pagetable_abort(struct iommufd_object *obj) +void iommufd_hwpt_paging_abort(struct iommufd_object *obj) { - struct iommufd_hw_pagetable *hwpt = - container_of(obj, struct iommufd_hw_pagetable, obj); + struct iommufd_hwpt_paging *hwpt_paging = + container_of(obj, struct iommufd_hwpt_paging, common.obj); /* The ioas->mutex must be held until finalize is called. */ - lockdep_assert_held(&hwpt->ioas->mutex); + lockdep_assert_held(&hwpt_paging->ioas->mutex); - if (!list_empty(&hwpt->hwpt_item)) { - list_del_init(&hwpt->hwpt_item); - iopt_table_remove_domain(&hwpt->ioas->iopt, hwpt->domain); + if (!list_empty(&hwpt_paging->hwpt_item)) { + list_del_init(&hwpt_paging->hwpt_item); + iopt_table_remove_domain(&hwpt_paging->ioas->iopt, + hwpt_paging->common.domain); } - iommufd_hw_pagetable_destroy(obj); + iommufd_hwpt_paging_destroy(obj); } -int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt) +void iommufd_hwpt_nested_destroy(struct iommufd_object *obj) { - if (hwpt->enforce_cache_coherency) + struct iommufd_hwpt_nested *hwpt_nested = + container_of(obj, struct iommufd_hwpt_nested, common.obj); + + if (hwpt_nested->common.domain) + iommu_domain_free(hwpt_nested->common.domain); + + refcount_dec(&hwpt_nested->parent->common.obj.users); +} + +void iommufd_hwpt_nested_abort(struct iommufd_object *obj) +{ + iommufd_hwpt_nested_destroy(obj); +} + +static int +iommufd_hwpt_paging_enforce_cc(struct iommufd_hwpt_paging *hwpt_paging) +{ + struct iommu_domain *paging_domain = hwpt_paging->common.domain; + + if (hwpt_paging->enforce_cache_coherency) return 0; - if (hwpt->domain->ops->enforce_cache_coherency) - hwpt->enforce_cache_coherency = - hwpt->domain->ops->enforce_cache_coherency( - hwpt->domain); - if (!hwpt->enforce_cache_coherency) + if (paging_domain->ops->enforce_cache_coherency) + hwpt_paging->enforce_cache_coherency = + paging_domain->ops->enforce_cache_coherency( + paging_domain); + if (!hwpt_paging->enforce_cache_coherency) return -EINVAL; return 0; } /** - * iommufd_hw_pagetable_alloc() - Get an iommu_domain for a device + * iommufd_hwpt_paging_alloc() - Get a PAGING iommu_domain for a device * @ictx: iommufd context * @ioas: IOAS to associate the domain with * @idev: Device to get an iommu_domain for + * @flags: Flags from userspace * @immediate_attach: True if idev should be attached to the hwpt + * @user_data: The user provided driver specific data describing the domain to + * create * * Allocate a new iommu_domain and return it as a hw_pagetable. The HWPT * will be linked to the given ioas and upon return the underlying iommu_domain @@ -70,28 +95,52 @@ int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt) * iommufd_object_abort_and_destroy() or iommufd_object_finalize() is called on * the returned hwpt. */ -struct iommufd_hw_pagetable * -iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, - struct iommufd_device *idev, bool immediate_attach) +struct iommufd_hwpt_paging * +iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, + struct iommufd_device *idev, u32 flags, + bool immediate_attach, + const struct iommu_user_data *user_data) { + const u32 valid_flags = IOMMU_HWPT_ALLOC_NEST_PARENT | + IOMMU_HWPT_ALLOC_DIRTY_TRACKING; + const struct iommu_ops *ops = dev_iommu_ops(idev->dev); + struct iommufd_hwpt_paging *hwpt_paging; struct iommufd_hw_pagetable *hwpt; int rc; lockdep_assert_held(&ioas->mutex); - hwpt = iommufd_object_alloc(ictx, hwpt, IOMMUFD_OBJ_HW_PAGETABLE); - if (IS_ERR(hwpt)) - return hwpt; + if ((flags || user_data) && !ops->domain_alloc_user) + return ERR_PTR(-EOPNOTSUPP); + if (flags & ~valid_flags) + return ERR_PTR(-EOPNOTSUPP); + + hwpt_paging = __iommufd_object_alloc( + ictx, hwpt_paging, IOMMUFD_OBJ_HWPT_PAGING, common.obj); + if (IS_ERR(hwpt_paging)) + return ERR_CAST(hwpt_paging); + hwpt = &hwpt_paging->common; - INIT_LIST_HEAD(&hwpt->hwpt_item); + INIT_LIST_HEAD(&hwpt_paging->hwpt_item); /* Pairs with iommufd_hw_pagetable_destroy() */ refcount_inc(&ioas->obj.users); - hwpt->ioas = ioas; + hwpt_paging->ioas = ioas; + hwpt_paging->nest_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT; - hwpt->domain = iommu_domain_alloc(idev->dev->bus); - if (!hwpt->domain) { - rc = -ENOMEM; - goto out_abort; + if (ops->domain_alloc_user) { + hwpt->domain = ops->domain_alloc_user(idev->dev, flags, NULL, + user_data); + if (IS_ERR(hwpt->domain)) { + rc = PTR_ERR(hwpt->domain); + hwpt->domain = NULL; + goto out_abort; + } + } else { + hwpt->domain = iommu_domain_alloc(idev->dev->bus); + if (!hwpt->domain) { + rc = -ENOMEM; + goto out_abort; + } } /* @@ -100,9 +149,16 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, * doing any maps. It is an iommu driver bug to report * IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail enforce_cache_coherency on * a new domain. + * + * The cache coherency mode must be configured here and unchanged later. + * Note that a HWPT (non-CC) created for a device (non-CC) can be later + * reused by another device (either non-CC or CC). However, A HWPT (CC) + * created for a device (CC) cannot be reused by another device (non-CC) + * but only devices (CC). Instead user space in this case would need to + * allocate a separate HWPT (non-CC). */ if (idev->enforce_cache_coherency) { - rc = iommufd_hw_pagetable_enforce_cc(hwpt); + rc = iommufd_hwpt_paging_enforce_cc(hwpt_paging); if (WARN_ON(rc)) goto out_abort; } @@ -119,11 +175,11 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, goto out_abort; } - rc = iopt_table_add_domain(&hwpt->ioas->iopt, hwpt->domain); + rc = iopt_table_add_domain(&ioas->iopt, hwpt->domain); if (rc) goto out_detach; - list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list); - return hwpt; + list_add_tail(&hwpt_paging->hwpt_item, &ioas->hwpt_list); + return hwpt_paging; out_detach: if (immediate_attach) @@ -133,32 +189,121 @@ out_abort: return ERR_PTR(rc); } +/** + * iommufd_hwpt_nested_alloc() - Get a NESTED iommu_domain for a device + * @ictx: iommufd context + * @parent: Parent PAGING-type hwpt to associate the domain with + * @idev: Device to get an iommu_domain for + * @flags: Flags from userspace + * @user_data: user_data pointer. Must be valid + * + * Allocate a new iommu_domain (must be IOMMU_DOMAIN_NESTED) and return it as + * a NESTED hw_pagetable. The given parent PAGING-type hwpt must be capable of + * being a parent. + */ +static struct iommufd_hwpt_nested * +iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx, + struct iommufd_hwpt_paging *parent, + struct iommufd_device *idev, u32 flags, + const struct iommu_user_data *user_data) +{ + const struct iommu_ops *ops = dev_iommu_ops(idev->dev); + struct iommufd_hwpt_nested *hwpt_nested; + struct iommufd_hw_pagetable *hwpt; + int rc; + + if (flags || !user_data->len || !ops->domain_alloc_user) + return ERR_PTR(-EOPNOTSUPP); + if (parent->auto_domain || !parent->nest_parent) + return ERR_PTR(-EINVAL); + + hwpt_nested = __iommufd_object_alloc( + ictx, hwpt_nested, IOMMUFD_OBJ_HWPT_NESTED, common.obj); + if (IS_ERR(hwpt_nested)) + return ERR_CAST(hwpt_nested); + hwpt = &hwpt_nested->common; + + refcount_inc(&parent->common.obj.users); + hwpt_nested->parent = parent; + + hwpt->domain = ops->domain_alloc_user(idev->dev, flags, + parent->common.domain, user_data); + if (IS_ERR(hwpt->domain)) { + rc = PTR_ERR(hwpt->domain); + hwpt->domain = NULL; + goto out_abort; + } + + if (WARN_ON_ONCE(hwpt->domain->type != IOMMU_DOMAIN_NESTED)) { + rc = -EINVAL; + goto out_abort; + } + return hwpt_nested; + +out_abort: + iommufd_object_abort_and_destroy(ictx, &hwpt->obj); + return ERR_PTR(rc); +} + int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) { struct iommu_hwpt_alloc *cmd = ucmd->cmd; + const struct iommu_user_data user_data = { + .type = cmd->data_type, + .uptr = u64_to_user_ptr(cmd->data_uptr), + .len = cmd->data_len, + }; struct iommufd_hw_pagetable *hwpt; + struct iommufd_ioas *ioas = NULL; + struct iommufd_object *pt_obj; struct iommufd_device *idev; - struct iommufd_ioas *ioas; int rc; - if (cmd->flags || cmd->__reserved) + if (cmd->__reserved) return -EOPNOTSUPP; + if ((cmd->data_type == IOMMU_HWPT_DATA_NONE && cmd->data_len) || + (cmd->data_type != IOMMU_HWPT_DATA_NONE && !cmd->data_len)) + return -EINVAL; idev = iommufd_get_device(ucmd, cmd->dev_id); if (IS_ERR(idev)) return PTR_ERR(idev); - ioas = iommufd_get_ioas(ucmd->ictx, cmd->pt_id); - if (IS_ERR(ioas)) { - rc = PTR_ERR(ioas); + pt_obj = iommufd_get_object(ucmd->ictx, cmd->pt_id, IOMMUFD_OBJ_ANY); + if (IS_ERR(pt_obj)) { + rc = -EINVAL; goto out_put_idev; } - mutex_lock(&ioas->mutex); - hwpt = iommufd_hw_pagetable_alloc(ucmd->ictx, ioas, idev, false); - if (IS_ERR(hwpt)) { - rc = PTR_ERR(hwpt); - goto out_unlock; + if (pt_obj->type == IOMMUFD_OBJ_IOAS) { + struct iommufd_hwpt_paging *hwpt_paging; + + ioas = container_of(pt_obj, struct iommufd_ioas, obj); + mutex_lock(&ioas->mutex); + hwpt_paging = iommufd_hwpt_paging_alloc( + ucmd->ictx, ioas, idev, cmd->flags, false, + user_data.len ? &user_data : NULL); + if (IS_ERR(hwpt_paging)) { + rc = PTR_ERR(hwpt_paging); + goto out_unlock; + } + hwpt = &hwpt_paging->common; + } else if (pt_obj->type == IOMMUFD_OBJ_HWPT_PAGING) { + struct iommufd_hwpt_nested *hwpt_nested; + + hwpt_nested = iommufd_hwpt_nested_alloc( + ucmd->ictx, + container_of(pt_obj, struct iommufd_hwpt_paging, + common.obj), + idev, cmd->flags, &user_data); + if (IS_ERR(hwpt_nested)) { + rc = PTR_ERR(hwpt_nested); + goto out_unlock; + } + hwpt = &hwpt_nested->common; + } else { + rc = -EINVAL; + goto out_put_pt; } cmd->out_hwpt_id = hwpt->obj.id; @@ -171,9 +316,59 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) out_hwpt: iommufd_object_abort_and_destroy(ucmd->ictx, &hwpt->obj); out_unlock: - mutex_unlock(&ioas->mutex); - iommufd_put_object(&ioas->obj); + if (ioas) + mutex_unlock(&ioas->mutex); +out_put_pt: + iommufd_put_object(ucmd->ictx, pt_obj); out_put_idev: - iommufd_put_object(&idev->obj); + iommufd_put_object(ucmd->ictx, &idev->obj); + return rc; +} + +int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd) +{ + struct iommu_hwpt_set_dirty_tracking *cmd = ucmd->cmd; + struct iommufd_hwpt_paging *hwpt_paging; + struct iommufd_ioas *ioas; + int rc = -EOPNOTSUPP; + bool enable; + + if (cmd->flags & ~IOMMU_HWPT_DIRTY_TRACKING_ENABLE) + return rc; + + hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id); + if (IS_ERR(hwpt_paging)) + return PTR_ERR(hwpt_paging); + + ioas = hwpt_paging->ioas; + enable = cmd->flags & IOMMU_HWPT_DIRTY_TRACKING_ENABLE; + + rc = iopt_set_dirty_tracking(&ioas->iopt, hwpt_paging->common.domain, + enable); + + iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj); + return rc; +} + +int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd) +{ + struct iommu_hwpt_get_dirty_bitmap *cmd = ucmd->cmd; + struct iommufd_hwpt_paging *hwpt_paging; + struct iommufd_ioas *ioas; + int rc = -EOPNOTSUPP; + + if ((cmd->flags & ~(IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR)) || + cmd->__reserved) + return -EOPNOTSUPP; + + hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id); + if (IS_ERR(hwpt_paging)) + return PTR_ERR(hwpt_paging); + + ioas = hwpt_paging->ioas; + rc = iopt_read_and_clear_dirty_data( + &ioas->iopt, hwpt_paging->common.domain, cmd->flags, cmd); + + iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj); return rc; } diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 117a39ae2e..504ac1b01b 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -15,6 +15,7 @@ #include <linux/err.h> #include <linux/slab.h> #include <linux/errno.h> +#include <uapi/linux/iommufd.h> #include "io_pagetable.h" #include "double_span.h" @@ -424,6 +425,177 @@ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, return 0; } +struct iova_bitmap_fn_arg { + unsigned long flags; + struct io_pagetable *iopt; + struct iommu_domain *domain; + struct iommu_dirty_bitmap *dirty; +}; + +static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap, + unsigned long iova, size_t length, + void *opaque) +{ + struct iopt_area *area; + struct iopt_area_contig_iter iter; + struct iova_bitmap_fn_arg *arg = opaque; + struct iommu_domain *domain = arg->domain; + struct iommu_dirty_bitmap *dirty = arg->dirty; + const struct iommu_dirty_ops *ops = domain->dirty_ops; + unsigned long last_iova = iova + length - 1; + unsigned long flags = arg->flags; + int ret; + + iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) { + unsigned long last = min(last_iova, iopt_area_last_iova(area)); + + ret = ops->read_and_clear_dirty(domain, iter.cur_iova, + last - iter.cur_iova + 1, flags, + dirty); + if (ret) + return ret; + } + + if (!iopt_area_contig_done(&iter)) + return -EINVAL; + return 0; +} + +static int +iommu_read_and_clear_dirty(struct iommu_domain *domain, + struct io_pagetable *iopt, unsigned long flags, + struct iommu_hwpt_get_dirty_bitmap *bitmap) +{ + const struct iommu_dirty_ops *ops = domain->dirty_ops; + struct iommu_iotlb_gather gather; + struct iommu_dirty_bitmap dirty; + struct iova_bitmap_fn_arg arg; + struct iova_bitmap *iter; + int ret = 0; + + if (!ops || !ops->read_and_clear_dirty) + return -EOPNOTSUPP; + + iter = iova_bitmap_alloc(bitmap->iova, bitmap->length, + bitmap->page_size, + u64_to_user_ptr(bitmap->data)); + if (IS_ERR(iter)) + return -ENOMEM; + + iommu_dirty_bitmap_init(&dirty, iter, &gather); + + arg.flags = flags; + arg.iopt = iopt; + arg.domain = domain; + arg.dirty = &dirty; + iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty); + + if (!(flags & IOMMU_DIRTY_NO_CLEAR)) + iommu_iotlb_sync(domain, &gather); + + iova_bitmap_free(iter); + + return ret; +} + +int iommufd_check_iova_range(struct io_pagetable *iopt, + struct iommu_hwpt_get_dirty_bitmap *bitmap) +{ + size_t iommu_pgsize = iopt->iova_alignment; + u64 last_iova; + + if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova)) + return -EOVERFLOW; + + if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX) + return -EOVERFLOW; + + if ((bitmap->iova & (iommu_pgsize - 1)) || + ((last_iova + 1) & (iommu_pgsize - 1))) + return -EINVAL; + + if (!bitmap->page_size) + return -EINVAL; + + if ((bitmap->iova & (bitmap->page_size - 1)) || + ((last_iova + 1) & (bitmap->page_size - 1))) + return -EINVAL; + + return 0; +} + +int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, + struct iommu_domain *domain, + unsigned long flags, + struct iommu_hwpt_get_dirty_bitmap *bitmap) +{ + int ret; + + ret = iommufd_check_iova_range(iopt, bitmap); + if (ret) + return ret; + + down_read(&iopt->iova_rwsem); + ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap); + up_read(&iopt->iova_rwsem); + + return ret; +} + +static int iopt_clear_dirty_data(struct io_pagetable *iopt, + struct iommu_domain *domain) +{ + const struct iommu_dirty_ops *ops = domain->dirty_ops; + struct iommu_iotlb_gather gather; + struct iommu_dirty_bitmap dirty; + struct iopt_area *area; + int ret = 0; + + lockdep_assert_held_read(&iopt->iova_rwsem); + + iommu_dirty_bitmap_init(&dirty, NULL, &gather); + + for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; + area = iopt_area_iter_next(area, 0, ULONG_MAX)) { + if (!area->pages) + continue; + + ret = ops->read_and_clear_dirty(domain, iopt_area_iova(area), + iopt_area_length(area), 0, + &dirty); + if (ret) + break; + } + + iommu_iotlb_sync(domain, &gather); + return ret; +} + +int iopt_set_dirty_tracking(struct io_pagetable *iopt, + struct iommu_domain *domain, bool enable) +{ + const struct iommu_dirty_ops *ops = domain->dirty_ops; + int ret = 0; + + if (!ops) + return -EOPNOTSUPP; + + down_read(&iopt->iova_rwsem); + + /* Clear dirty bits from PTEs to ensure a clean snapshot */ + if (enable) { + ret = iopt_clear_dirty_data(iopt, domain); + if (ret) + goto out_unlock; + } + + ret = ops->set_dirty_tracking(domain, enable); + +out_unlock: + up_read(&iopt->iova_rwsem); + return ret; +} + int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, unsigned long length, struct list_head *pages_list) { diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index d5624577f7..7422482765 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -105,7 +105,7 @@ int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd) rc = -EMSGSIZE; out_put: up_read(&ioas->iopt.iova_rwsem); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } @@ -175,7 +175,7 @@ out_free: interval_tree_remove(node, &allowed_iova); kfree(container_of(node, struct iopt_allowed, node)); } - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } @@ -228,7 +228,7 @@ int iommufd_ioas_map(struct iommufd_ucmd *ucmd) cmd->iova = iova; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_put: - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } @@ -258,7 +258,7 @@ int iommufd_ioas_copy(struct iommufd_ucmd *ucmd) return PTR_ERR(src_ioas); rc = iopt_get_pages(&src_ioas->iopt, cmd->src_iova, cmd->length, &pages_list); - iommufd_put_object(&src_ioas->obj); + iommufd_put_object(ucmd->ictx, &src_ioas->obj); if (rc) return rc; @@ -279,7 +279,7 @@ int iommufd_ioas_copy(struct iommufd_ucmd *ucmd) cmd->dst_iova = iova; rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_put_dst: - iommufd_put_object(&dst_ioas->obj); + iommufd_put_object(ucmd->ictx, &dst_ioas->obj); out_pages: iopt_free_pages_list(&pages_list); return rc; @@ -315,7 +315,7 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd) rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_put: - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } @@ -393,6 +393,6 @@ int iommufd_ioas_option(struct iommufd_ucmd *ucmd) rc = -EOPNOTSUPP; } - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 2c58670011..abae041e25 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -8,6 +8,9 @@ #include <linux/xarray.h> #include <linux/refcount.h> #include <linux/uaccess.h> +#include <linux/iommu.h> +#include <linux/iova_bitmap.h> +#include <uapi/linux/iommufd.h> struct iommu_domain; struct iommu_group; @@ -18,6 +21,7 @@ struct iommufd_ctx { struct file *file; struct xarray objects; struct xarray groups; + wait_queue_head_t destroy_wait; u8 account_mode; /* Compatibility with VFIO no iommu */ @@ -70,6 +74,13 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, unsigned long length, unsigned long *unmapped); int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped); +int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, + struct iommu_domain *domain, + unsigned long flags, + struct iommu_hwpt_get_dirty_bitmap *bitmap); +int iopt_set_dirty_tracking(struct io_pagetable *iopt, + struct iommu_domain *domain, bool enable); + void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, unsigned long length); int iopt_table_add_domain(struct io_pagetable *iopt, @@ -113,7 +124,8 @@ enum iommufd_object_type { IOMMUFD_OBJ_NONE, IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, IOMMUFD_OBJ_DEVICE, - IOMMUFD_OBJ_HW_PAGETABLE, + IOMMUFD_OBJ_HWPT_PAGING, + IOMMUFD_OBJ_HWPT_NESTED, IOMMUFD_OBJ_IOAS, IOMMUFD_OBJ_ACCESS, #ifdef CONFIG_IOMMUFD_TEST @@ -124,7 +136,7 @@ enum iommufd_object_type { /* Base struct for all objects with a userspace ID handle. */ struct iommufd_object { - struct rw_semaphore destroy_rwsem; + refcount_t shortterm_users; refcount_t users; enum iommufd_object_type type; unsigned int id; @@ -132,10 +144,15 @@ struct iommufd_object { static inline bool iommufd_lock_obj(struct iommufd_object *obj) { - if (!down_read_trylock(&obj->destroy_rwsem)) + if (!refcount_inc_not_zero(&obj->users)) return false; - if (!refcount_inc_not_zero(&obj->users)) { - up_read(&obj->destroy_rwsem); + if (!refcount_inc_not_zero(&obj->shortterm_users)) { + /* + * If the caller doesn't already have a ref on obj this must be + * called under the xa_lock. Otherwise the caller is holding a + * ref on users. Thus it cannot be one before this decrement. + */ + refcount_dec(&obj->users); return false; } return true; @@ -143,10 +160,16 @@ static inline bool iommufd_lock_obj(struct iommufd_object *obj) struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, enum iommufd_object_type type); -static inline void iommufd_put_object(struct iommufd_object *obj) +static inline void iommufd_put_object(struct iommufd_ctx *ictx, + struct iommufd_object *obj) { + /* + * Users first, then shortterm so that REMOVE_WAIT_SHORTTERM never sees + * a spurious !0 users with a 0 shortterm_users. + */ refcount_dec(&obj->users); - up_read(&obj->destroy_rwsem); + if (refcount_dec_and_test(&obj->shortterm_users)) + wake_up_interruptible_all(&ictx->destroy_wait); } void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj); @@ -154,24 +177,56 @@ void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, struct iommufd_object *obj); void iommufd_object_finalize(struct iommufd_ctx *ictx, struct iommufd_object *obj); -void __iommufd_object_destroy_user(struct iommufd_ctx *ictx, - struct iommufd_object *obj, bool allow_fail); + +enum { + REMOVE_WAIT_SHORTTERM = 1, +}; +int iommufd_object_remove(struct iommufd_ctx *ictx, + struct iommufd_object *to_destroy, u32 id, + unsigned int flags); + +/* + * The caller holds a users refcount and wants to destroy the object. At this + * point the caller has no shortterm_users reference and at least the xarray + * will be holding one. + */ static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx, struct iommufd_object *obj) { - __iommufd_object_destroy_user(ictx, obj, false); + int ret; + + ret = iommufd_object_remove(ictx, obj, obj->id, REMOVE_WAIT_SHORTTERM); + + /* + * If there is a bug and we couldn't destroy the object then we did put + * back the caller's users refcount and will eventually try to free it + * again during close. + */ + WARN_ON(ret); } -static inline void iommufd_object_deref_user(struct iommufd_ctx *ictx, - struct iommufd_object *obj) + +/* + * The HWPT allocated by autodomains is used in possibly many devices and + * is automatically destroyed when its refcount reaches zero. + * + * If userspace uses the HWPT manually, even for a short term, then it will + * disrupt this refcounting and the auto-free in the kernel will not work. + * Userspace that tries to use the automatically allocated HWPT must be careful + * to ensure that it is consistently destroyed, eg by not racing accesses + * and by not attaching an automatic HWPT to a device manually. + */ +static inline void +iommufd_object_put_and_try_destroy(struct iommufd_ctx *ictx, + struct iommufd_object *obj) { - __iommufd_object_destroy_user(ictx, obj, true); + iommufd_object_remove(ictx, obj, obj->id, 0); } struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, enum iommufd_object_type type); -#define iommufd_object_alloc(ictx, ptr, type) \ +#define __iommufd_object_alloc(ictx, ptr, type, obj) \ container_of(_iommufd_object_alloc( \ ictx, \ sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \ @@ -180,6 +235,9 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, type), \ typeof(*(ptr)), obj) +#define iommufd_object_alloc(ictx, ptr, type) \ + __iommufd_object_alloc(ictx, ptr, type, obj) + /* * The IO Address Space (IOAS) pagetable is a virtual page table backed by the * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The @@ -222,6 +280,8 @@ int iommufd_option_rlimit_mode(struct iommu_option *cmd, struct iommufd_ctx *ictx); int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd); +int iommufd_check_iova_range(struct io_pagetable *iopt, + struct iommu_hwpt_get_dirty_bitmap *bitmap); /* * A HW pagetable is called an iommu_domain inside the kernel. This user object @@ -231,35 +291,75 @@ int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd); */ struct iommufd_hw_pagetable { struct iommufd_object obj; - struct iommufd_ioas *ioas; struct iommu_domain *domain; +}; + +struct iommufd_hwpt_paging { + struct iommufd_hw_pagetable common; + struct iommufd_ioas *ioas; bool auto_domain : 1; bool enforce_cache_coherency : 1; bool msi_cookie : 1; + bool nest_parent : 1; /* Head at iommufd_ioas::hwpt_list */ struct list_head hwpt_item; }; -struct iommufd_hw_pagetable * -iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, - struct iommufd_device *idev, bool immediate_attach); -int iommufd_hw_pagetable_enforce_cc(struct iommufd_hw_pagetable *hwpt); +struct iommufd_hwpt_nested { + struct iommufd_hw_pagetable common; + struct iommufd_hwpt_paging *parent; +}; + +static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt) +{ + return hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING; +} + +static inline struct iommufd_hwpt_paging * +to_hwpt_paging(struct iommufd_hw_pagetable *hwpt) +{ + return container_of(hwpt, struct iommufd_hwpt_paging, common); +} + +static inline struct iommufd_hwpt_paging * +iommufd_get_hwpt_paging(struct iommufd_ucmd *ucmd, u32 id) +{ + return container_of(iommufd_get_object(ucmd->ictx, id, + IOMMUFD_OBJ_HWPT_PAGING), + struct iommufd_hwpt_paging, common.obj); +} +int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd); +int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd); + +struct iommufd_hwpt_paging * +iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, + struct iommufd_device *idev, u32 flags, + bool immediate_attach, + const struct iommu_user_data *user_data); int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt, struct iommufd_device *idev); struct iommufd_hw_pagetable * iommufd_hw_pagetable_detach(struct iommufd_device *idev); -void iommufd_hw_pagetable_destroy(struct iommufd_object *obj); -void iommufd_hw_pagetable_abort(struct iommufd_object *obj); +void iommufd_hwpt_paging_destroy(struct iommufd_object *obj); +void iommufd_hwpt_paging_abort(struct iommufd_object *obj); +void iommufd_hwpt_nested_destroy(struct iommufd_object *obj); +void iommufd_hwpt_nested_abort(struct iommufd_object *obj); int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd); static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, struct iommufd_hw_pagetable *hwpt) { - lockdep_assert_not_held(&hwpt->ioas->mutex); - if (hwpt->auto_domain) - iommufd_object_deref_user(ictx, &hwpt->obj); - else - refcount_dec(&hwpt->obj.users); + if (hwpt->obj.type == IOMMUFD_OBJ_HWPT_PAGING) { + struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt); + + lockdep_assert_not_held(&hwpt_paging->ioas->mutex); + + if (hwpt_paging->auto_domain) { + iommufd_object_put_and_try_destroy(ictx, &hwpt->obj); + return; + } + } + refcount_dec(&hwpt->obj.users); } struct iommufd_group { diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h index 3f3644375b..7910fbe196 100644 --- a/drivers/iommu/iommufd/iommufd_test.h +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -19,6 +19,8 @@ enum { IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT, IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE, IOMMU_TEST_OP_ACCESS_REPLACE_IOAS, + IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS, + IOMMU_TEST_OP_DIRTY, }; enum { @@ -40,6 +42,15 @@ enum { MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES = 1 << 0, }; +enum { + MOCK_FLAGS_DEVICE_NO_DIRTY = 1 << 0, +}; + +enum { + MOCK_NESTED_DOMAIN_IOTLB_ID_MAX = 3, + MOCK_NESTED_DOMAIN_IOTLB_NUM = 4, +}; + struct iommu_test_cmd { __u32 size; __u32 op; @@ -57,6 +68,13 @@ struct iommu_test_cmd { __u32 out_idev_id; } mock_domain; struct { + __u32 out_stdev_id; + __u32 out_hwpt_id; + __u32 out_idev_id; + /* Expand mock_domain to set mock device flags */ + __u32 dev_flags; + } mock_domain_flags; + struct { __u32 pt_id; } mock_domain_replace; struct { @@ -95,6 +113,14 @@ struct iommu_test_cmd { struct { __u32 ioas_id; } access_replace_ioas; + struct { + __u32 flags; + __aligned_u64 iova; + __aligned_u64 length; + __aligned_u64 page_size; + __aligned_u64 uptr; + __aligned_u64 out_nr_dirty; + } dirty; }; __u32 last; }; @@ -109,4 +135,17 @@ struct iommu_test_hw_info { __u32 test_reg; }; +/* Should not be equal to any defined value in enum iommu_hwpt_data_type */ +#define IOMMU_HWPT_DATA_SELFTEST 0xdead +#define IOMMU_TEST_IOTLB_DEFAULT 0xbadbeef + +/** + * struct iommu_hwpt_selftest + * + * @iotlb: default mock iotlb value, IOMMU_TEST_IOTLB_DEFAULT + */ +struct iommu_hwpt_selftest { + __u32 iotlb; +}; + #endif diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c new file mode 100644 index 0000000000..db8c46bee1 --- /dev/null +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -0,0 +1,474 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022, Oracle and/or its affiliates. + * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved + */ +#include <linux/iova_bitmap.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/highmem.h> + +#define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) + +/* + * struct iova_bitmap_map - A bitmap representing an IOVA range + * + * Main data structure for tracking mapped user pages of bitmap data. + * + * For example, for something recording dirty IOVAs, it will be provided a + * struct iova_bitmap structure, as a general structure for iterating the + * total IOVA range. The struct iova_bitmap_map, though, represents the + * subset of said IOVA space that is pinned by its parent structure (struct + * iova_bitmap). + * + * The user does not need to exact location of the bits in the bitmap. + * From user perspective the only API available is iova_bitmap_set() which + * records the IOVA *range* in the bitmap by setting the corresponding + * bits. + * + * The bitmap is an array of u64 whereas each bit represents an IOVA of + * range of (1 << pgshift). Thus formula for the bitmap data to be set is: + * + * data[(iova / page_size) / 64] & (1ULL << (iova % 64)) + */ +struct iova_bitmap_map { + /* base IOVA representing bit 0 of the first page */ + unsigned long iova; + + /* page size order that each bit granules to */ + unsigned long pgshift; + + /* page offset of the first user page pinned */ + unsigned long pgoff; + + /* number of pages pinned */ + unsigned long npages; + + /* pinned pages representing the bitmap data */ + struct page **pages; +}; + +/* + * struct iova_bitmap - The IOVA bitmap object + * + * Main data structure for iterating over the bitmap data. + * + * Abstracts the pinning work and iterates in IOVA ranges. + * It uses a windowing scheme and pins the bitmap in relatively + * big ranges e.g. + * + * The bitmap object uses one base page to store all the pinned pages + * pointers related to the bitmap. For sizeof(struct page*) == 8 it stores + * 512 struct page pointers which, if the base page size is 4K, it means + * 2M of bitmap data is pinned at a time. If the iova_bitmap page size is + * also 4K then the range window to iterate is 64G. + * + * For example iterating on a total IOVA range of 4G..128G, it will walk + * through this set of ranges: + * + * 4G - 68G-1 (64G) + * 68G - 128G-1 (64G) + * + * An example of the APIs on how to use/iterate over the IOVA bitmap: + * + * bitmap = iova_bitmap_alloc(iova, length, page_size, data); + * if (IS_ERR(bitmap)) + * return PTR_ERR(bitmap); + * + * ret = iova_bitmap_for_each(bitmap, arg, dirty_reporter_fn); + * + * iova_bitmap_free(bitmap); + * + * Each iteration of the @dirty_reporter_fn is called with a unique @iova + * and @length argument, indicating the current range available through the + * iova_bitmap. The @dirty_reporter_fn uses iova_bitmap_set() to mark dirty + * areas (@iova_length) within that provided range, as following: + * + * iova_bitmap_set(bitmap, iova, iova_length); + * + * The internals of the object uses an index @mapped_base_index that indexes + * which u64 word of the bitmap is mapped, up to @mapped_total_index. + * Those keep being incremented until @mapped_total_index is reached while + * mapping up to PAGE_SIZE / sizeof(struct page*) maximum of pages. + * + * The IOVA bitmap is usually located on what tracks DMA mapped ranges or + * some form of IOVA range tracking that co-relates to the user passed + * bitmap. + */ +struct iova_bitmap { + /* IOVA range representing the currently mapped bitmap data */ + struct iova_bitmap_map mapped; + + /* userspace address of the bitmap */ + u8 __user *bitmap; + + /* u64 index that @mapped points to */ + unsigned long mapped_base_index; + + /* how many u64 can we walk in total */ + unsigned long mapped_total_index; + + /* base IOVA of the whole bitmap */ + unsigned long iova; + + /* length of the IOVA range for the whole bitmap */ + size_t length; + + /* length of the IOVA range set ahead the pinned pages */ + unsigned long set_ahead_length; +}; + +/* + * Converts a relative IOVA to a bitmap index. + * This function provides the index into the u64 array (bitmap::bitmap) + * for a given IOVA offset. + * Relative IOVA means relative to the bitmap::mapped base IOVA + * (stored in mapped::iova). All computations in this file are done using + * relative IOVAs and thus avoid an extra subtraction against mapped::iova. + * The user API iova_bitmap_set() always uses a regular absolute IOVAs. + */ +static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap, + unsigned long iova) +{ + unsigned long pgsize = 1 << bitmap->mapped.pgshift; + + return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize); +} + +/* + * Converts a bitmap index to a *relative* IOVA. + */ +static unsigned long iova_bitmap_index_to_offset(struct iova_bitmap *bitmap, + unsigned long index) +{ + unsigned long pgshift = bitmap->mapped.pgshift; + + return (index * BITS_PER_TYPE(*bitmap->bitmap)) << pgshift; +} + +/* + * Returns the base IOVA of the mapped range. + */ +static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap *bitmap) +{ + unsigned long skip = bitmap->mapped_base_index; + + return bitmap->iova + iova_bitmap_index_to_offset(bitmap, skip); +} + +/* + * Pins the bitmap user pages for the current range window. + * This is internal to IOVA bitmap and called when advancing the + * index (@mapped_base_index) or allocating the bitmap. + */ +static int iova_bitmap_get(struct iova_bitmap *bitmap) +{ + struct iova_bitmap_map *mapped = &bitmap->mapped; + unsigned long npages; + u8 __user *addr; + long ret; + + /* + * @mapped_base_index is the index of the currently mapped u64 words + * that we have access. Anything before @mapped_base_index is not + * mapped. The range @mapped_base_index .. @mapped_total_index-1 is + * mapped but capped at a maximum number of pages. + */ + npages = DIV_ROUND_UP((bitmap->mapped_total_index - + bitmap->mapped_base_index) * + sizeof(*bitmap->bitmap), PAGE_SIZE); + + /* + * Bitmap address to be pinned is calculated via pointer arithmetic + * with bitmap u64 word index. + */ + addr = bitmap->bitmap + bitmap->mapped_base_index; + + /* + * We always cap at max number of 'struct page' a base page can fit. + * This is, for example, on x86 means 2M of bitmap data max. + */ + npages = min(npages + !!offset_in_page(addr), + PAGE_SIZE / sizeof(struct page *)); + + ret = pin_user_pages_fast((unsigned long)addr, npages, + FOLL_WRITE, mapped->pages); + if (ret <= 0) + return -EFAULT; + + mapped->npages = (unsigned long)ret; + /* Base IOVA where @pages point to i.e. bit 0 of the first page */ + mapped->iova = iova_bitmap_mapped_iova(bitmap); + + /* + * offset of the page where pinned pages bit 0 is located. + * This handles the case where the bitmap is not PAGE_SIZE + * aligned. + */ + mapped->pgoff = offset_in_page(addr); + return 0; +} + +/* + * Unpins the bitmap user pages and clears @npages + * (un)pinning is abstracted from API user and it's done when advancing + * the index or freeing the bitmap. + */ +static void iova_bitmap_put(struct iova_bitmap *bitmap) +{ + struct iova_bitmap_map *mapped = &bitmap->mapped; + + if (mapped->npages) { + unpin_user_pages(mapped->pages, mapped->npages); + mapped->npages = 0; + } +} + +/** + * iova_bitmap_alloc() - Allocates an IOVA bitmap object + * @iova: Start address of the IOVA range + * @length: Length of the IOVA range + * @page_size: Page size of the IOVA bitmap. It defines what each bit + * granularity represents + * @data: Userspace address of the bitmap + * + * Allocates an IOVA object and initializes all its fields including the + * first user pages of @data. + * + * Return: A pointer to a newly allocated struct iova_bitmap + * or ERR_PTR() on error. + */ +struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length, + unsigned long page_size, u64 __user *data) +{ + struct iova_bitmap_map *mapped; + struct iova_bitmap *bitmap; + int rc; + + bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); + if (!bitmap) + return ERR_PTR(-ENOMEM); + + mapped = &bitmap->mapped; + mapped->pgshift = __ffs(page_size); + bitmap->bitmap = (u8 __user *)data; + bitmap->mapped_total_index = + iova_bitmap_offset_to_index(bitmap, length - 1) + 1; + bitmap->iova = iova; + bitmap->length = length; + mapped->iova = iova; + mapped->pages = (struct page **)__get_free_page(GFP_KERNEL); + if (!mapped->pages) { + rc = -ENOMEM; + goto err; + } + + rc = iova_bitmap_get(bitmap); + if (rc) + goto err; + return bitmap; + +err: + iova_bitmap_free(bitmap); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_NS_GPL(iova_bitmap_alloc, IOMMUFD); + +/** + * iova_bitmap_free() - Frees an IOVA bitmap object + * @bitmap: IOVA bitmap to free + * + * It unpins and releases pages array memory and clears any leftover + * state. + */ +void iova_bitmap_free(struct iova_bitmap *bitmap) +{ + struct iova_bitmap_map *mapped = &bitmap->mapped; + + iova_bitmap_put(bitmap); + + if (mapped->pages) { + free_page((unsigned long)mapped->pages); + mapped->pages = NULL; + } + + kfree(bitmap); +} +EXPORT_SYMBOL_NS_GPL(iova_bitmap_free, IOMMUFD); + +/* + * Returns the remaining bitmap indexes from mapped_total_index to process for + * the currently pinned bitmap pages. + */ +static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap) +{ + unsigned long remaining, bytes; + + bytes = (bitmap->mapped.npages << PAGE_SHIFT) - bitmap->mapped.pgoff; + + remaining = bitmap->mapped_total_index - bitmap->mapped_base_index; + remaining = min_t(unsigned long, remaining, + DIV_ROUND_UP(bytes, sizeof(*bitmap->bitmap))); + + return remaining; +} + +/* + * Returns the length of the mapped IOVA range. + */ +static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap) +{ + unsigned long max_iova = bitmap->iova + bitmap->length - 1; + unsigned long iova = iova_bitmap_mapped_iova(bitmap); + unsigned long remaining; + + /* + * iova_bitmap_mapped_remaining() returns a number of indexes which + * when converted to IOVA gives us a max length that the bitmap + * pinned data can cover. Afterwards, that is capped to + * only cover the IOVA range in @bitmap::iova .. @bitmap::length. + */ + remaining = iova_bitmap_index_to_offset(bitmap, + iova_bitmap_mapped_remaining(bitmap)); + + if (iova + remaining - 1 > max_iova) + remaining -= ((iova + remaining - 1) - max_iova); + + return remaining; +} + +/* + * Returns true if there's not more data to iterate. + */ +static bool iova_bitmap_done(struct iova_bitmap *bitmap) +{ + return bitmap->mapped_base_index >= bitmap->mapped_total_index; +} + +static int iova_bitmap_set_ahead(struct iova_bitmap *bitmap, + size_t set_ahead_length) +{ + int ret = 0; + + while (set_ahead_length > 0 && !iova_bitmap_done(bitmap)) { + unsigned long length = iova_bitmap_mapped_length(bitmap); + unsigned long iova = iova_bitmap_mapped_iova(bitmap); + + ret = iova_bitmap_get(bitmap); + if (ret) + break; + + length = min(length, set_ahead_length); + iova_bitmap_set(bitmap, iova, length); + + set_ahead_length -= length; + bitmap->mapped_base_index += + iova_bitmap_offset_to_index(bitmap, length - 1) + 1; + iova_bitmap_put(bitmap); + } + + bitmap->set_ahead_length = 0; + return ret; +} + +/* + * Advances to the next range, releases the current pinned + * pages and pins the next set of bitmap pages. + * Returns 0 on success or otherwise errno. + */ +static int iova_bitmap_advance(struct iova_bitmap *bitmap) +{ + unsigned long iova = iova_bitmap_mapped_length(bitmap) - 1; + unsigned long count = iova_bitmap_offset_to_index(bitmap, iova) + 1; + + bitmap->mapped_base_index += count; + + iova_bitmap_put(bitmap); + if (iova_bitmap_done(bitmap)) + return 0; + + /* Iterate, set and skip any bits requested for next iteration */ + if (bitmap->set_ahead_length) { + int ret; + + ret = iova_bitmap_set_ahead(bitmap, bitmap->set_ahead_length); + if (ret) + return ret; + } + + /* When advancing the index we pin the next set of bitmap pages */ + return iova_bitmap_get(bitmap); +} + +/** + * iova_bitmap_for_each() - Iterates over the bitmap + * @bitmap: IOVA bitmap to iterate + * @opaque: Additional argument to pass to the callback + * @fn: Function that gets called for each IOVA range + * + * Helper function to iterate over bitmap data representing a portion of IOVA + * space. It hides the complexity of iterating bitmaps and translating the + * mapped bitmap user pages into IOVA ranges to process. + * + * Return: 0 on success, and an error on failure either upon + * iteration or when the callback returns an error. + */ +int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, + iova_bitmap_fn_t fn) +{ + int ret = 0; + + for (; !iova_bitmap_done(bitmap) && !ret; + ret = iova_bitmap_advance(bitmap)) { + ret = fn(bitmap, iova_bitmap_mapped_iova(bitmap), + iova_bitmap_mapped_length(bitmap), opaque); + if (ret) + break; + } + + return ret; +} +EXPORT_SYMBOL_NS_GPL(iova_bitmap_for_each, IOMMUFD); + +/** + * iova_bitmap_set() - Records an IOVA range in bitmap + * @bitmap: IOVA bitmap + * @iova: IOVA to start + * @length: IOVA range length + * + * Set the bits corresponding to the range [iova .. iova+length-1] in + * the user bitmap. + * + */ +void iova_bitmap_set(struct iova_bitmap *bitmap, + unsigned long iova, size_t length) +{ + struct iova_bitmap_map *mapped = &bitmap->mapped; + unsigned long cur_bit = ((iova - mapped->iova) >> + mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; + unsigned long last_bit = (((iova + length - 1) - mapped->iova) >> + mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; + unsigned long last_page_idx = mapped->npages - 1; + + do { + unsigned int page_idx = cur_bit / BITS_PER_PAGE; + unsigned int offset = cur_bit % BITS_PER_PAGE; + unsigned int nbits = min(BITS_PER_PAGE - offset, + last_bit - cur_bit + 1); + void *kaddr; + + if (unlikely(page_idx > last_page_idx)) + break; + + kaddr = kmap_local_page(mapped->pages[page_idx]); + bitmap_set(kaddr, offset, nbits); + kunmap_local(kaddr); + cur_bit += nbits; + } while (cur_bit <= last_bit); + + if (unlikely(cur_bit <= last_bit)) { + bitmap->set_ahead_length = + ((last_bit - cur_bit + 1) << bitmap->mapped.pgshift); + } +} +EXPORT_SYMBOL_NS_GPL(iova_bitmap_set, IOMMUFD); diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index e71523cbd0..c9091e46d2 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -33,7 +33,6 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, enum iommufd_object_type type) { - static struct lock_class_key obj_keys[IOMMUFD_OBJ_MAX]; struct iommufd_object *obj; int rc; @@ -41,15 +40,8 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, if (!obj) return ERR_PTR(-ENOMEM); obj->type = type; - /* - * In most cases the destroy_rwsem is obtained with try so it doesn't - * interact with lockdep, however on destroy we have to sleep. This - * means if we have to destroy an object while holding a get on another - * object it triggers lockdep. Using one locking class per object type - * is a simple and reasonable way to avoid this. - */ - __init_rwsem(&obj->destroy_rwsem, "iommufd_object::destroy_rwsem", - &obj_keys[type]); + /* Starts out bias'd by 1 until it is removed from the xarray */ + refcount_set(&obj->shortterm_users, 1); refcount_set(&obj->users, 1); /* @@ -129,92 +121,113 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, return obj; } +static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx, + struct iommufd_object *to_destroy) +{ + if (refcount_dec_and_test(&to_destroy->shortterm_users)) + return 0; + + if (wait_event_timeout(ictx->destroy_wait, + refcount_read(&to_destroy->shortterm_users) == + 0, + msecs_to_jiffies(10000))) + return 0; + + pr_crit("Time out waiting for iommufd object to become free\n"); + refcount_inc(&to_destroy->shortterm_users); + return -EBUSY; +} + /* * Remove the given object id from the xarray if the only reference to the - * object is held by the xarray. The caller must call ops destroy(). + * object is held by the xarray. */ -static struct iommufd_object *iommufd_object_remove(struct iommufd_ctx *ictx, - u32 id, bool extra_put) +int iommufd_object_remove(struct iommufd_ctx *ictx, + struct iommufd_object *to_destroy, u32 id, + unsigned int flags) { struct iommufd_object *obj; XA_STATE(xas, &ictx->objects, id); - - xa_lock(&ictx->objects); - obj = xas_load(&xas); - if (xa_is_zero(obj) || !obj) { - obj = ERR_PTR(-ENOENT); - goto out_xa; - } + bool zerod_shortterm = false; + int ret; /* - * If the caller is holding a ref on obj we put it here under the - * spinlock. + * The purpose of the shortterm_users is to ensure deterministic + * destruction of objects used by external drivers and destroyed by this + * function. Any temporary increment of the refcount must increment + * shortterm_users, such as during ioctl execution. */ - if (extra_put) + if (flags & REMOVE_WAIT_SHORTTERM) { + ret = iommufd_object_dec_wait_shortterm(ictx, to_destroy); + if (ret) { + /* + * We have a bug. Put back the callers reference and + * defer cleaning this object until close. + */ + refcount_dec(&to_destroy->users); + return ret; + } + zerod_shortterm = true; + } + + xa_lock(&ictx->objects); + obj = xas_load(&xas); + if (to_destroy) { + /* + * If the caller is holding a ref on obj we put it here under + * the spinlock. + */ refcount_dec(&obj->users); + if (WARN_ON(obj != to_destroy)) { + ret = -ENOENT; + goto err_xa; + } + } else if (xa_is_zero(obj) || !obj) { + ret = -ENOENT; + goto err_xa; + } + if (!refcount_dec_if_one(&obj->users)) { - obj = ERR_PTR(-EBUSY); - goto out_xa; + ret = -EBUSY; + goto err_xa; } xas_store(&xas, NULL); if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj)) ictx->vfio_ioas = NULL; - -out_xa: xa_unlock(&ictx->objects); - /* The returned object reference count is zero */ - return obj; -} - -/* - * The caller holds a users refcount and wants to destroy the object. Returns - * true if the object was destroyed. In all cases the caller no longer has a - * reference on obj. - */ -void __iommufd_object_destroy_user(struct iommufd_ctx *ictx, - struct iommufd_object *obj, bool allow_fail) -{ - struct iommufd_object *ret; - - /* - * The purpose of the destroy_rwsem is to ensure deterministic - * destruction of objects used by external drivers and destroyed by this - * function. Any temporary increment of the refcount must hold the read - * side of this, such as during ioctl execution. - */ - down_write(&obj->destroy_rwsem); - ret = iommufd_object_remove(ictx, obj->id, true); - up_write(&obj->destroy_rwsem); - - if (allow_fail && IS_ERR(ret)) - return; - /* - * If there is a bug and we couldn't destroy the object then we did put - * back the caller's refcount and will eventually try to free it again - * during close. + * Since users is zero any positive users_shortterm must be racing + * iommufd_put_object(), or we have a bug. */ - if (WARN_ON(IS_ERR(ret))) - return; + if (!zerod_shortterm) { + ret = iommufd_object_dec_wait_shortterm(ictx, obj); + if (WARN_ON(ret)) + return ret; + } iommufd_object_ops[obj->type].destroy(obj); kfree(obj); + return 0; + +err_xa: + if (zerod_shortterm) { + /* Restore the xarray owned reference */ + refcount_set(&obj->shortterm_users, 1); + } + xa_unlock(&ictx->objects); + + /* The returned object reference count is zero */ + return ret; } static int iommufd_destroy(struct iommufd_ucmd *ucmd) { struct iommu_destroy *cmd = ucmd->cmd; - struct iommufd_object *obj; - obj = iommufd_object_remove(ucmd->ictx, cmd->id, false); - if (IS_ERR(obj)) - return PTR_ERR(obj); - iommufd_object_ops[obj->type].destroy(obj); - kfree(obj); - return 0; + return iommufd_object_remove(ucmd->ictx, NULL, cmd->id, 0); } static int iommufd_fops_open(struct inode *inode, struct file *filp) @@ -238,6 +251,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp) xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); xa_init(&ictx->groups); ictx->file = filp; + init_waitqueue_head(&ictx->destroy_wait); filp->private_data = ictx; return 0; } @@ -307,6 +321,8 @@ union ucmd_buffer { struct iommu_destroy destroy; struct iommu_hw_info info; struct iommu_hwpt_alloc hwpt; + struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap; + struct iommu_hwpt_set_dirty_tracking set_dirty_tracking; struct iommu_ioas_alloc alloc; struct iommu_ioas_allow_iovas allow_iovas; struct iommu_ioas_copy ioas_copy; @@ -342,6 +358,10 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { __reserved), IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, __reserved), + IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap, + struct iommu_hwpt_get_dirty_bitmap, data), + IOCTL_OP(IOMMU_HWPT_SET_DIRTY_TRACKING, iommufd_hwpt_set_dirty_tracking, + struct iommu_hwpt_set_dirty_tracking, __reserved), IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, struct iommu_ioas_alloc, out_ioas_id), IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas, @@ -482,9 +502,13 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { [IOMMUFD_OBJ_IOAS] = { .destroy = iommufd_ioas_destroy, }, - [IOMMUFD_OBJ_HW_PAGETABLE] = { - .destroy = iommufd_hw_pagetable_destroy, - .abort = iommufd_hw_pagetable_abort, + [IOMMUFD_OBJ_HWPT_PAGING] = { + .destroy = iommufd_hwpt_paging_destroy, + .abort = iommufd_hwpt_paging_abort, + }, + [IOMMUFD_OBJ_HWPT_NESTED] = { + .destroy = iommufd_hwpt_nested_destroy, + .abort = iommufd_hwpt_nested_abort, }, #ifdef CONFIG_IOMMUFD_TEST [IOMMUFD_OBJ_SELFTEST] = { @@ -552,5 +576,6 @@ MODULE_ALIAS_MISCDEV(VFIO_MINOR); MODULE_ALIAS("devname:vfio/vfio"); #endif MODULE_IMPORT_NS(IOMMUFD_INTERNAL); +MODULE_IMPORT_NS(IOMMUFD); MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 56506d5753..022ef8f550 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -20,10 +20,13 @@ static DECLARE_FAULT_ATTR(fail_iommufd); static struct dentry *dbgfs_root; static struct platform_device *selftest_iommu_dev; +static const struct iommu_ops mock_ops; +static struct iommu_domain_ops domain_nested_ops; size_t iommufd_test_memory_limit = 65536; enum { + MOCK_DIRTY_TRACK = 1, MOCK_IO_PAGE_SIZE = PAGE_SIZE / 2, /* @@ -36,6 +39,7 @@ enum { _MOCK_PFN_START = MOCK_PFN_MASK + 1, MOCK_PFN_START_IOVA = _MOCK_PFN_START, MOCK_PFN_LAST_IOVA = _MOCK_PFN_START, + MOCK_PFN_DIRTY_IOVA = _MOCK_PFN_START << 1, }; /* @@ -82,20 +86,28 @@ void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, if (IS_ERR(ioas)) return; *iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); } struct mock_iommu_domain { + unsigned long flags; struct iommu_domain domain; struct xarray pfns; }; +struct mock_iommu_domain_nested { + struct iommu_domain domain; + struct mock_iommu_domain *parent; + u32 iotlb[MOCK_NESTED_DOMAIN_IOTLB_NUM]; +}; + enum selftest_obj_type { TYPE_IDEV, }; struct mock_dev { struct device dev; + unsigned long flags; }; struct selftest_obj { @@ -111,18 +123,18 @@ struct selftest_obj { }; }; -static void mock_domain_blocking_free(struct iommu_domain *domain) -{ -} - static int mock_domain_nop_attach(struct iommu_domain *domain, struct device *dev) { + struct mock_dev *mdev = container_of(dev, struct mock_dev, dev); + + if (domain->dirty_ops && (mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY)) + return -EINVAL; + return 0; } static const struct iommu_domain_ops mock_blocking_ops = { - .free = mock_domain_blocking_free, .attach_dev = mock_domain_nop_attach, }; @@ -146,15 +158,70 @@ static void *mock_domain_hw_info(struct device *dev, u32 *length, u32 *type) return info; } -static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type) +static int mock_domain_set_dirty_tracking(struct iommu_domain *domain, + bool enable) { - struct mock_iommu_domain *mock; + struct mock_iommu_domain *mock = + container_of(domain, struct mock_iommu_domain, domain); + unsigned long flags = mock->flags; - if (iommu_domain_type == IOMMU_DOMAIN_BLOCKED) - return &mock_blocking_domain; + if (enable && !domain->dirty_ops) + return -EINVAL; - if (iommu_domain_type != IOMMU_DOMAIN_UNMANAGED) - return NULL; + /* No change? */ + if (!(enable ^ !!(flags & MOCK_DIRTY_TRACK))) + return 0; + + flags = (enable ? flags | MOCK_DIRTY_TRACK : flags & ~MOCK_DIRTY_TRACK); + + mock->flags = flags; + return 0; +} + +static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct mock_iommu_domain *mock = + container_of(domain, struct mock_iommu_domain, domain); + unsigned long i, max = size / MOCK_IO_PAGE_SIZE; + void *ent, *old; + + if (!(mock->flags & MOCK_DIRTY_TRACK) && dirty->bitmap) + return -EINVAL; + + for (i = 0; i < max; i++) { + unsigned long cur = iova + i * MOCK_IO_PAGE_SIZE; + + ent = xa_load(&mock->pfns, cur / MOCK_IO_PAGE_SIZE); + if (ent && (xa_to_value(ent) & MOCK_PFN_DIRTY_IOVA)) { + /* Clear dirty */ + if (!(flags & IOMMU_DIRTY_NO_CLEAR)) { + unsigned long val; + + val = xa_to_value(ent) & ~MOCK_PFN_DIRTY_IOVA; + old = xa_store(&mock->pfns, + cur / MOCK_IO_PAGE_SIZE, + xa_mk_value(val), GFP_KERNEL); + WARN_ON_ONCE(ent != old); + } + iommu_dirty_bitmap_record(dirty, cur, + MOCK_IO_PAGE_SIZE); + } + } + + return 0; +} + +const struct iommu_dirty_ops dirty_ops = { + .set_dirty_tracking = mock_domain_set_dirty_tracking, + .read_and_clear_dirty = mock_domain_read_and_clear_dirty, +}; + +static struct iommu_domain *mock_domain_alloc_paging(struct device *dev) +{ + struct mock_iommu_domain *mock; mock = kzalloc(sizeof(*mock), GFP_KERNEL); if (!mock) @@ -162,10 +229,78 @@ static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type) mock->domain.geometry.aperture_start = MOCK_APERTURE_START; mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST; mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE; + mock->domain.ops = mock_ops.default_domain_ops; + mock->domain.type = IOMMU_DOMAIN_UNMANAGED; xa_init(&mock->pfns); return &mock->domain; } +static struct iommu_domain * +__mock_domain_alloc_nested(struct mock_iommu_domain *mock_parent, + const struct iommu_hwpt_selftest *user_cfg) +{ + struct mock_iommu_domain_nested *mock_nested; + int i; + + mock_nested = kzalloc(sizeof(*mock_nested), GFP_KERNEL); + if (!mock_nested) + return ERR_PTR(-ENOMEM); + mock_nested->parent = mock_parent; + mock_nested->domain.ops = &domain_nested_ops; + mock_nested->domain.type = IOMMU_DOMAIN_NESTED; + for (i = 0; i < MOCK_NESTED_DOMAIN_IOTLB_NUM; i++) + mock_nested->iotlb[i] = user_cfg->iotlb; + return &mock_nested->domain; +} + +static struct iommu_domain * +mock_domain_alloc_user(struct device *dev, u32 flags, + struct iommu_domain *parent, + const struct iommu_user_data *user_data) +{ + struct mock_iommu_domain *mock_parent; + struct iommu_hwpt_selftest user_cfg; + int rc; + + /* must be mock_domain */ + if (!parent) { + struct mock_dev *mdev = container_of(dev, struct mock_dev, dev); + bool has_dirty_flag = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; + bool no_dirty_ops = mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY; + struct iommu_domain *domain; + + if (flags & (~(IOMMU_HWPT_ALLOC_NEST_PARENT | + IOMMU_HWPT_ALLOC_DIRTY_TRACKING))) + return ERR_PTR(-EOPNOTSUPP); + if (user_data || (has_dirty_flag && no_dirty_ops)) + return ERR_PTR(-EOPNOTSUPP); + domain = mock_domain_alloc_paging(NULL); + if (!domain) + return ERR_PTR(-ENOMEM); + if (has_dirty_flag) + container_of(domain, struct mock_iommu_domain, domain) + ->domain.dirty_ops = &dirty_ops; + return domain; + } + + /* must be mock_domain_nested */ + if (user_data->type != IOMMU_HWPT_DATA_SELFTEST || flags) + return ERR_PTR(-EOPNOTSUPP); + if (!parent || parent->ops != mock_ops.default_domain_ops) + return ERR_PTR(-EINVAL); + + mock_parent = container_of(parent, struct mock_iommu_domain, domain); + if (!mock_parent) + return ERR_PTR(-EINVAL); + + rc = iommu_copy_struct_from_user(&user_cfg, user_data, + IOMMU_HWPT_DATA_SELFTEST, iotlb); + if (rc) + return ERR_PTR(rc); + + return __mock_domain_alloc_nested(mock_parent, &user_cfg); +} + static void mock_domain_free(struct iommu_domain *domain) { struct mock_iommu_domain *mock = @@ -243,7 +378,7 @@ static size_t mock_domain_unmap_pages(struct iommu_domain *domain, for (cur = 0; cur != pgsize; cur += MOCK_IO_PAGE_SIZE) { ent = xa_erase(&mock->pfns, iova / MOCK_IO_PAGE_SIZE); - WARN_ON(!ent); + /* * iommufd generates unmaps that must be a strict * superset of the map's performend So every starting @@ -253,13 +388,13 @@ static size_t mock_domain_unmap_pages(struct iommu_domain *domain, * passed to map_pages */ if (first) { - WARN_ON(!(xa_to_value(ent) & - MOCK_PFN_START_IOVA)); + WARN_ON(ent && !(xa_to_value(ent) & + MOCK_PFN_START_IOVA)); first = false; } if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize) - WARN_ON(!(xa_to_value(ent) & - MOCK_PFN_LAST_IOVA)); + WARN_ON(ent && !(xa_to_value(ent) & + MOCK_PFN_LAST_IOVA)); iova += MOCK_IO_PAGE_SIZE; ret += MOCK_IO_PAGE_SIZE; @@ -283,15 +418,18 @@ static phys_addr_t mock_domain_iova_to_phys(struct iommu_domain *domain, static bool mock_domain_capable(struct device *dev, enum iommu_cap cap) { - return cap == IOMMU_CAP_CACHE_COHERENCY; -} + struct mock_dev *mdev = container_of(dev, struct mock_dev, dev); -static void mock_domain_set_plaform_dma_ops(struct device *dev) -{ - /* - * mock doesn't setup default domains because we can't hook into the - * normal probe path - */ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + case IOMMU_CAP_DIRTY_TRACKING: + return !(mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY); + default: + break; + } + + return false; } static struct iommu_device mock_iommu_device = { @@ -303,12 +441,18 @@ static struct iommu_device *mock_probe_device(struct device *dev) } static const struct iommu_ops mock_ops = { + /* + * IOMMU_DOMAIN_BLOCKED cannot be returned from def_domain_type() + * because it is zero. + */ + .default_domain = &mock_blocking_domain, + .blocked_domain = &mock_blocking_domain, .owner = THIS_MODULE, .pgsize_bitmap = MOCK_IO_PAGE_SIZE, .hw_info = mock_domain_hw_info, - .domain_alloc = mock_domain_alloc, + .domain_alloc_paging = mock_domain_alloc_paging, + .domain_alloc_user = mock_domain_alloc_user, .capable = mock_domain_capable, - .set_platform_dma_ops = mock_domain_set_plaform_dma_ops, .device_group = generic_device_group, .probe_device = mock_probe_device, .default_domain_ops = @@ -321,26 +465,67 @@ static const struct iommu_ops mock_ops = { }, }; +static void mock_domain_free_nested(struct iommu_domain *domain) +{ + struct mock_iommu_domain_nested *mock_nested = + container_of(domain, struct mock_iommu_domain_nested, domain); + + kfree(mock_nested); +} + +static struct iommu_domain_ops domain_nested_ops = { + .free = mock_domain_free_nested, + .attach_dev = mock_domain_nop_attach, +}; + static inline struct iommufd_hw_pagetable * -get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id, - struct mock_iommu_domain **mock) +__get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id, u32 hwpt_type) { - struct iommufd_hw_pagetable *hwpt; struct iommufd_object *obj; - obj = iommufd_get_object(ucmd->ictx, mockpt_id, - IOMMUFD_OBJ_HW_PAGETABLE); + obj = iommufd_get_object(ucmd->ictx, mockpt_id, hwpt_type); if (IS_ERR(obj)) return ERR_CAST(obj); - hwpt = container_of(obj, struct iommufd_hw_pagetable, obj); - if (hwpt->domain->ops != mock_ops.default_domain_ops) { - iommufd_put_object(&hwpt->obj); + return container_of(obj, struct iommufd_hw_pagetable, obj); +} + +static inline struct iommufd_hw_pagetable * +get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id, + struct mock_iommu_domain **mock) +{ + struct iommufd_hw_pagetable *hwpt; + + hwpt = __get_md_pagetable(ucmd, mockpt_id, IOMMUFD_OBJ_HWPT_PAGING); + if (IS_ERR(hwpt)) + return hwpt; + if (hwpt->domain->type != IOMMU_DOMAIN_UNMANAGED || + hwpt->domain->ops != mock_ops.default_domain_ops) { + iommufd_put_object(ucmd->ictx, &hwpt->obj); return ERR_PTR(-EINVAL); } *mock = container_of(hwpt->domain, struct mock_iommu_domain, domain); return hwpt; } +static inline struct iommufd_hw_pagetable * +get_md_pagetable_nested(struct iommufd_ucmd *ucmd, u32 mockpt_id, + struct mock_iommu_domain_nested **mock_nested) +{ + struct iommufd_hw_pagetable *hwpt; + + hwpt = __get_md_pagetable(ucmd, mockpt_id, IOMMUFD_OBJ_HWPT_NESTED); + if (IS_ERR(hwpt)) + return hwpt; + if (hwpt->domain->type != IOMMU_DOMAIN_NESTED || + hwpt->domain->ops != &domain_nested_ops) { + iommufd_put_object(ucmd->ictx, &hwpt->obj); + return ERR_PTR(-EINVAL); + } + *mock_nested = container_of(hwpt->domain, + struct mock_iommu_domain_nested, domain); + return hwpt; +} + struct mock_bus_type { struct bus_type bus; struct notifier_block nb; @@ -362,16 +547,20 @@ static void mock_dev_release(struct device *dev) kfree(mdev); } -static struct mock_dev *mock_dev_create(void) +static struct mock_dev *mock_dev_create(unsigned long dev_flags) { struct mock_dev *mdev; int rc; + if (dev_flags & ~(MOCK_FLAGS_DEVICE_NO_DIRTY)) + return ERR_PTR(-EINVAL); + mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); if (!mdev) return ERR_PTR(-ENOMEM); device_initialize(&mdev->dev); + mdev->flags = dev_flags; mdev->dev.release = mock_dev_release; mdev->dev.bus = &iommufd_mock_bus_type.bus; @@ -407,6 +596,7 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, struct iommufd_device *idev; struct selftest_obj *sobj; u32 pt_id = cmd->id; + u32 dev_flags = 0; u32 idev_id; int rc; @@ -417,7 +607,10 @@ static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, sobj->idev.ictx = ucmd->ictx; sobj->type = TYPE_IDEV; - sobj->idev.mock_dev = mock_dev_create(); + if (cmd->op == IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS) + dev_flags = cmd->mock_domain_flags.dev_flags; + + sobj->idev.mock_dev = mock_dev_create(dev_flags); if (IS_ERR(sobj->idev.mock_dev)) { rc = PTR_ERR(sobj->idev.mock_dev); goto out_sobj; @@ -488,7 +681,7 @@ static int iommufd_test_mock_domain_replace(struct iommufd_ucmd *ucmd, rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); out_dev_obj: - iommufd_put_object(dev_obj); + iommufd_put_object(ucmd->ictx, dev_obj); return rc; } @@ -506,7 +699,7 @@ static int iommufd_test_add_reserved(struct iommufd_ucmd *ucmd, down_write(&ioas->iopt.iova_rwsem); rc = iopt_reserve_iova(&ioas->iopt, start, start + length - 1, NULL); up_write(&ioas->iopt.iova_rwsem); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return rc; } @@ -561,7 +754,7 @@ static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd, rc = 0; out_put: - iommufd_put_object(&hwpt->obj); + iommufd_put_object(ucmd->ictx, &hwpt->obj); return rc; } @@ -977,6 +1170,73 @@ static_assert((unsigned int)MOCK_ACCESS_RW_WRITE == IOMMUFD_ACCESS_RW_WRITE); static_assert((unsigned int)MOCK_ACCESS_RW_SLOW_PATH == __IOMMUFD_ACCESS_RW_SLOW_PATH); +static int iommufd_test_dirty(struct iommufd_ucmd *ucmd, unsigned int mockpt_id, + unsigned long iova, size_t length, + unsigned long page_size, void __user *uptr, + u32 flags) +{ + unsigned long bitmap_size, i, max; + struct iommu_test_cmd *cmd = ucmd->cmd; + struct iommufd_hw_pagetable *hwpt; + struct mock_iommu_domain *mock; + int rc, count = 0; + void *tmp; + + if (!page_size || !length || iova % page_size || length % page_size || + !uptr) + return -EINVAL; + + hwpt = get_md_pagetable(ucmd, mockpt_id, &mock); + if (IS_ERR(hwpt)) + return PTR_ERR(hwpt); + + if (!(mock->flags & MOCK_DIRTY_TRACK)) { + rc = -EINVAL; + goto out_put; + } + + max = length / page_size; + bitmap_size = max / BITS_PER_BYTE; + + tmp = kvzalloc(bitmap_size, GFP_KERNEL_ACCOUNT); + if (!tmp) { + rc = -ENOMEM; + goto out_put; + } + + if (copy_from_user(tmp, uptr, bitmap_size)) { + rc = -EFAULT; + goto out_free; + } + + for (i = 0; i < max; i++) { + unsigned long cur = iova + i * page_size; + void *ent, *old; + + if (!test_bit(i, (unsigned long *)tmp)) + continue; + + ent = xa_load(&mock->pfns, cur / page_size); + if (ent) { + unsigned long val; + + val = xa_to_value(ent) | MOCK_PFN_DIRTY_IOVA; + old = xa_store(&mock->pfns, cur / page_size, + xa_mk_value(val), GFP_KERNEL); + WARN_ON_ONCE(ent != old); + count++; + } + } + + cmd->dirty.out_nr_dirty = count; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); +out_free: + kvfree(tmp); +out_put: + iommufd_put_object(ucmd->ictx, &hwpt->obj); + return rc; +} + void iommufd_selftest_destroy(struct iommufd_object *obj) { struct selftest_obj *sobj = container_of(obj, struct selftest_obj, obj); @@ -1000,6 +1260,7 @@ int iommufd_test(struct iommufd_ucmd *ucmd) cmd->add_reserved.start, cmd->add_reserved.length); case IOMMU_TEST_OP_MOCK_DOMAIN: + case IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS: return iommufd_test_mock_domain(ucmd, cmd); case IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE: return iommufd_test_mock_domain_replace( @@ -1041,6 +1302,12 @@ int iommufd_test(struct iommufd_ucmd *ucmd) return -EINVAL; iommufd_test_memory_limit = cmd->memory_limit.limit; return 0; + case IOMMU_TEST_OP_DIRTY: + return iommufd_test_dirty(ucmd, cmd->id, cmd->dirty.iova, + cmd->dirty.length, + cmd->dirty.page_size, + u64_to_user_ptr(cmd->dirty.uptr), + cmd->dirty.flags); default: return -EOPNOTSUPP; } diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c index 6c810bf80f..a3ad5f0b6c 100644 --- a/drivers/iommu/iommufd/vfio_compat.c +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -41,7 +41,7 @@ int iommufd_vfio_compat_ioas_get_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) if (IS_ERR(ioas)) return PTR_ERR(ioas); *out_ioas_id = ioas->obj.id; - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return 0; } EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_get_id, IOMMUFD_VFIO); @@ -98,7 +98,7 @@ int iommufd_vfio_compat_ioas_create(struct iommufd_ctx *ictx) if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) { ret = 0; - iommufd_put_object(&ictx->vfio_ioas->obj); + iommufd_put_object(ictx, &ictx->vfio_ioas->obj); goto out_abort; } ictx->vfio_ioas = ioas; @@ -133,7 +133,7 @@ int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) if (IS_ERR(ioas)) return PTR_ERR(ioas); cmd->ioas_id = ioas->obj.id; - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return iommufd_ucmd_respond(ucmd, sizeof(*cmd)); case IOMMU_VFIO_IOAS_SET: @@ -143,7 +143,7 @@ int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) xa_lock(&ucmd->ictx->objects); ucmd->ictx->vfio_ioas = ioas; xa_unlock(&ucmd->ictx->objects); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ucmd->ictx, &ioas->obj); return 0; case IOMMU_VFIO_IOAS_CLEAR: @@ -190,7 +190,7 @@ static int iommufd_vfio_map_dma(struct iommufd_ctx *ictx, unsigned int cmd, iova = map.iova; rc = iopt_map_user_pages(ictx, &ioas->iopt, &iova, u64_to_user_ptr(map.vaddr), map.size, iommu_prot, 0); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return rc; } @@ -249,13 +249,13 @@ static int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd, rc = -EFAULT; err_put: - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return rc; } static int iommufd_vfio_cc_iommu(struct iommufd_ctx *ictx) { - struct iommufd_hw_pagetable *hwpt; + struct iommufd_hwpt_paging *hwpt_paging; struct iommufd_ioas *ioas; int rc = 1; @@ -264,15 +264,15 @@ static int iommufd_vfio_cc_iommu(struct iommufd_ctx *ictx) return PTR_ERR(ioas); mutex_lock(&ioas->mutex); - list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) { - if (!hwpt->enforce_cache_coherency) { + list_for_each_entry(hwpt_paging, &ioas->hwpt_list, hwpt_item) { + if (!hwpt_paging->enforce_cache_coherency) { rc = 0; break; } } mutex_unlock(&ioas->mutex); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return rc; } @@ -349,7 +349,7 @@ static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type) */ if (type == VFIO_TYPE1_IOMMU) rc = iopt_disable_large_pages(&ioas->iopt); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return rc; } @@ -511,7 +511,7 @@ static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx, out_put: up_read(&ioas->iopt.iova_rwsem); - iommufd_put_object(&ioas->obj); + iommufd_put_object(ictx, &ioas->obj); return rc; } |