diff options
Diffstat (limited to 'drivers/gpu/drm/ttm')
-rw-r--r-- | drivers/gpu/drm/ttm/Makefile | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/ttm/ttm_agp_backend.c | 144 | ||||
-rw-r--r-- | drivers/gpu/drm/ttm/ttm_bo.c | 1219 | ||||
-rw-r--r-- | drivers/gpu/drm/ttm/ttm_bo_util.c | 677 | ||||
-rw-r--r-- | drivers/gpu/drm/ttm/ttm_bo_vm.c | 475 | ||||
-rw-r--r-- | drivers/gpu/drm/ttm/ttm_device.c | 307 | ||||
-rw-r--r-- | drivers/gpu/drm/ttm/ttm_execbuf_util.c | 165 | ||||
-rw-r--r-- | drivers/gpu/drm/ttm/ttm_module.c | 91 | ||||
-rw-r--r-- | drivers/gpu/drm/ttm/ttm_module.h | 43 | ||||
-rw-r--r-- | drivers/gpu/drm/ttm/ttm_pool.c | 802 | ||||
-rw-r--r-- | drivers/gpu/drm/ttm/ttm_range_manager.c | 241 | ||||
-rw-r--r-- | drivers/gpu/drm/ttm/ttm_resource.c | 763 | ||||
-rw-r--r-- | drivers/gpu/drm/ttm/ttm_sys_manager.c | 49 | ||||
-rw-r--r-- | drivers/gpu/drm/ttm/ttm_tt.c | 451 |
14 files changed, 5437 insertions, 0 deletions
diff --git a/drivers/gpu/drm/ttm/Makefile b/drivers/gpu/drm/ttm/Makefile new file mode 100644 index 000000000..f906b2295 --- /dev/null +++ b/drivers/gpu/drm/ttm/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the drm device driver. This driver provides support for the + +ttm-y := ttm_tt.o ttm_bo.o ttm_bo_util.o ttm_bo_vm.o ttm_module.o \ + ttm_execbuf_util.o ttm_range_manager.o ttm_resource.o ttm_pool.o \ + ttm_device.o ttm_sys_manager.o +ttm-$(CONFIG_AGP) += ttm_agp_backend.o + +obj-$(CONFIG_DRM_TTM) += ttm.o diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c new file mode 100644 index 000000000..d27691f2e --- /dev/null +++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/************************************************************************** + * + * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +/* + * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com> + * Keith Packard. + */ + +#define pr_fmt(fmt) "[TTM] " fmt + +#include <drm/ttm/ttm_device.h> +#include <drm/ttm/ttm_tt.h> +#include <drm/ttm/ttm_resource.h> +#include <linux/agp_backend.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <asm/agp.h> + +struct ttm_agp_backend { + struct ttm_tt ttm; + struct agp_memory *mem; + struct agp_bridge_data *bridge; +}; + +int ttm_agp_bind(struct ttm_tt *ttm, struct ttm_resource *bo_mem) +{ + struct ttm_agp_backend *agp_be = container_of(ttm, struct ttm_agp_backend, ttm); + struct page *dummy_read_page = ttm_glob.dummy_read_page; + struct agp_memory *mem; + int ret, cached = ttm->caching == ttm_cached; + unsigned i; + + if (agp_be->mem) + return 0; + + mem = agp_allocate_memory(agp_be->bridge, ttm->num_pages, AGP_USER_MEMORY); + if (unlikely(mem == NULL)) + return -ENOMEM; + + mem->page_count = 0; + for (i = 0; i < ttm->num_pages; i++) { + struct page *page = ttm->pages[i]; + + if (!page) + page = dummy_read_page; + + mem->pages[mem->page_count++] = page; + } + agp_be->mem = mem; + + mem->is_flushed = 1; + mem->type = (cached) ? AGP_USER_CACHED_MEMORY : AGP_USER_MEMORY; + + ret = agp_bind_memory(mem, bo_mem->start); + if (ret) + pr_err("AGP Bind memory failed\n"); + + return ret; +} +EXPORT_SYMBOL(ttm_agp_bind); + +void ttm_agp_unbind(struct ttm_tt *ttm) +{ + struct ttm_agp_backend *agp_be = container_of(ttm, struct ttm_agp_backend, ttm); + + if (agp_be->mem) { + if (agp_be->mem->is_bound) { + agp_unbind_memory(agp_be->mem); + return; + } + agp_free_memory(agp_be->mem); + agp_be->mem = NULL; + } +} +EXPORT_SYMBOL(ttm_agp_unbind); + +bool ttm_agp_is_bound(struct ttm_tt *ttm) +{ + struct ttm_agp_backend *agp_be = container_of(ttm, struct ttm_agp_backend, ttm); + + if (!ttm) + return false; + + return (agp_be->mem != NULL); +} +EXPORT_SYMBOL(ttm_agp_is_bound); + +void ttm_agp_destroy(struct ttm_tt *ttm) +{ + struct ttm_agp_backend *agp_be = container_of(ttm, struct ttm_agp_backend, ttm); + + if (agp_be->mem) + ttm_agp_unbind(ttm); + ttm_tt_fini(ttm); + kfree(agp_be); +} +EXPORT_SYMBOL(ttm_agp_destroy); + +struct ttm_tt *ttm_agp_tt_create(struct ttm_buffer_object *bo, + struct agp_bridge_data *bridge, + uint32_t page_flags) +{ + struct ttm_agp_backend *agp_be; + + agp_be = kmalloc(sizeof(*agp_be), GFP_KERNEL); + if (!agp_be) + return NULL; + + agp_be->mem = NULL; + agp_be->bridge = bridge; + + if (ttm_tt_init(&agp_be->ttm, bo, page_flags, ttm_write_combined, 0)) { + kfree(agp_be); + return NULL; + } + + return &agp_be->ttm; +} +EXPORT_SYMBOL(ttm_agp_tt_create); diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c new file mode 100644 index 000000000..db332de13 --- /dev/null +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -0,0 +1,1219 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/************************************************************************** + * + * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +/* + * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com> + */ + +#define pr_fmt(fmt) "[TTM] " fmt + +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include <linux/jiffies.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/file.h> +#include <linux/module.h> +#include <linux/atomic.h> +#include <linux/dma-resv.h> + +#include "ttm_module.h" + +static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo, + struct ttm_placement *placement) +{ + struct drm_printer p = drm_debug_printer(TTM_PFX); + struct ttm_resource_manager *man; + int i, mem_type; + + drm_printf(&p, "No space for %p (%lu pages, %zuK, %zuM)\n", + bo, bo->resource->num_pages, bo->base.size >> 10, + bo->base.size >> 20); + for (i = 0; i < placement->num_placement; i++) { + mem_type = placement->placement[i].mem_type; + drm_printf(&p, " placement[%d]=0x%08X (%d)\n", + i, placement->placement[i].flags, mem_type); + man = ttm_manager_type(bo->bdev, mem_type); + ttm_resource_manager_debug(man, &p); + } +} + +/** + * ttm_bo_move_to_lru_tail + * + * @bo: The buffer object. + * + * Move this BO to the tail of all lru lists used to lookup and reserve an + * object. This function must be called with struct ttm_global::lru_lock + * held, and is used to make a BO less likely to be considered for eviction. + */ +void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo) +{ + dma_resv_assert_held(bo->base.resv); + + if (bo->resource) + ttm_resource_move_to_lru_tail(bo->resource); +} +EXPORT_SYMBOL(ttm_bo_move_to_lru_tail); + +/** + * ttm_bo_set_bulk_move - update BOs bulk move object + * + * @bo: The buffer object. + * + * Update the BOs bulk move object, making sure that resources are added/removed + * as well. A bulk move allows to move many resource on the LRU at once, + * resulting in much less overhead of maintaining the LRU. + * The only requirement is that the resources stay together on the LRU and are + * never separated. This is enforces by setting the bulk_move structure on a BO. + * ttm_lru_bulk_move_tail() should be used to move all resources to the tail of + * their LRU list. + */ +void ttm_bo_set_bulk_move(struct ttm_buffer_object *bo, + struct ttm_lru_bulk_move *bulk) +{ + dma_resv_assert_held(bo->base.resv); + + if (bo->bulk_move == bulk) + return; + + spin_lock(&bo->bdev->lru_lock); + if (bo->resource) + ttm_resource_del_bulk_move(bo->resource, bo); + bo->bulk_move = bulk; + if (bo->resource) + ttm_resource_add_bulk_move(bo->resource, bo); + spin_unlock(&bo->bdev->lru_lock); +} +EXPORT_SYMBOL(ttm_bo_set_bulk_move); + +static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, + struct ttm_resource *mem, bool evict, + struct ttm_operation_ctx *ctx, + struct ttm_place *hop) +{ + struct ttm_device *bdev = bo->bdev; + bool old_use_tt, new_use_tt; + int ret; + + old_use_tt = bo->resource && + ttm_manager_type(bdev, bo->resource->mem_type)->use_tt; + new_use_tt = ttm_manager_type(bdev, mem->mem_type)->use_tt; + + ttm_bo_unmap_virtual(bo); + + /* + * Create and bind a ttm if required. + */ + + if (new_use_tt) { + /* Zero init the new TTM structure if the old location should + * have used one as well. + */ + ret = ttm_tt_create(bo, old_use_tt); + if (ret) + goto out_err; + + if (mem->mem_type != TTM_PL_SYSTEM) { + ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx); + if (ret) + goto out_err; + } + } + + ret = dma_resv_reserve_fences(bo->base.resv, 1); + if (ret) + goto out_err; + + ret = bdev->funcs->move(bo, evict, ctx, mem, hop); + if (ret) { + if (ret == -EMULTIHOP) + return ret; + goto out_err; + } + + ctx->bytes_moved += bo->base.size; + return 0; + +out_err: + if (!old_use_tt) + ttm_bo_tt_destroy(bo); + + return ret; +} + +/* + * Call bo::reserved. + * Will release GPU memory type usage on destruction. + * This is the place to put in driver specific hooks to release + * driver private resources. + * Will release the bo::reserved lock. + */ + +static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo) +{ + if (bo->bdev->funcs->delete_mem_notify) + bo->bdev->funcs->delete_mem_notify(bo); + + ttm_bo_tt_destroy(bo); + ttm_resource_free(bo, &bo->resource); +} + +static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo) +{ + int r; + + if (bo->base.resv == &bo->base._resv) + return 0; + + BUG_ON(!dma_resv_trylock(&bo->base._resv)); + + r = dma_resv_copy_fences(&bo->base._resv, bo->base.resv); + dma_resv_unlock(&bo->base._resv); + if (r) + return r; + + if (bo->type != ttm_bo_type_sg) { + /* This works because the BO is about to be destroyed and nobody + * reference it any more. The only tricky case is the trylock on + * the resv object while holding the lru_lock. + */ + spin_lock(&bo->bdev->lru_lock); + bo->base.resv = &bo->base._resv; + spin_unlock(&bo->bdev->lru_lock); + } + + return r; +} + +static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) +{ + struct dma_resv *resv = &bo->base._resv; + struct dma_resv_iter cursor; + struct dma_fence *fence; + + dma_resv_iter_begin(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP); + dma_resv_for_each_fence_unlocked(&cursor, fence) { + if (!fence->ops->signaled) + dma_fence_enable_sw_signaling(fence); + } + dma_resv_iter_end(&cursor); +} + +/** + * ttm_bo_cleanup_refs + * If bo idle, remove from lru lists, and unref. + * If not idle, block if possible. + * + * Must be called with lru_lock and reservation held, this function + * will drop the lru lock and optionally the reservation lock before returning. + * + * @bo: The buffer object to clean-up + * @interruptible: Any sleeps should occur interruptibly. + * @no_wait_gpu: Never wait for gpu. Return -EBUSY instead. + * @unlock_resv: Unlock the reservation lock as well. + */ + +static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, + bool interruptible, bool no_wait_gpu, + bool unlock_resv) +{ + struct dma_resv *resv = &bo->base._resv; + int ret; + + if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP)) + ret = 0; + else + ret = -EBUSY; + + if (ret && !no_wait_gpu) { + long lret; + + if (unlock_resv) + dma_resv_unlock(bo->base.resv); + spin_unlock(&bo->bdev->lru_lock); + + lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, + interruptible, + 30 * HZ); + + if (lret < 0) + return lret; + else if (lret == 0) + return -EBUSY; + + spin_lock(&bo->bdev->lru_lock); + if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { + /* + * We raced, and lost, someone else holds the reservation now, + * and is probably busy in ttm_bo_cleanup_memtype_use. + * + * Even if it's not the case, because we finished waiting any + * delayed destruction would succeed, so just return success + * here. + */ + spin_unlock(&bo->bdev->lru_lock); + return 0; + } + ret = 0; + } + + if (ret || unlikely(list_empty(&bo->ddestroy))) { + if (unlock_resv) + dma_resv_unlock(bo->base.resv); + spin_unlock(&bo->bdev->lru_lock); + return ret; + } + + list_del_init(&bo->ddestroy); + spin_unlock(&bo->bdev->lru_lock); + ttm_bo_cleanup_memtype_use(bo); + + if (unlock_resv) + dma_resv_unlock(bo->base.resv); + + ttm_bo_put(bo); + + return 0; +} + +/* + * Traverse the delayed list, and call ttm_bo_cleanup_refs on all + * encountered buffers. + */ +bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all) +{ + struct list_head removed; + bool empty; + + INIT_LIST_HEAD(&removed); + + spin_lock(&bdev->lru_lock); + while (!list_empty(&bdev->ddestroy)) { + struct ttm_buffer_object *bo; + + bo = list_first_entry(&bdev->ddestroy, struct ttm_buffer_object, + ddestroy); + list_move_tail(&bo->ddestroy, &removed); + if (!ttm_bo_get_unless_zero(bo)) + continue; + + if (remove_all || bo->base.resv != &bo->base._resv) { + spin_unlock(&bdev->lru_lock); + dma_resv_lock(bo->base.resv, NULL); + + spin_lock(&bdev->lru_lock); + ttm_bo_cleanup_refs(bo, false, !remove_all, true); + + } else if (dma_resv_trylock(bo->base.resv)) { + ttm_bo_cleanup_refs(bo, false, !remove_all, true); + } else { + spin_unlock(&bdev->lru_lock); + } + + ttm_bo_put(bo); + spin_lock(&bdev->lru_lock); + } + list_splice_tail(&removed, &bdev->ddestroy); + empty = list_empty(&bdev->ddestroy); + spin_unlock(&bdev->lru_lock); + + return empty; +} + +static void ttm_bo_release(struct kref *kref) +{ + struct ttm_buffer_object *bo = + container_of(kref, struct ttm_buffer_object, kref); + struct ttm_device *bdev = bo->bdev; + int ret; + + WARN_ON_ONCE(bo->pin_count); + WARN_ON_ONCE(bo->bulk_move); + + if (!bo->deleted) { + ret = ttm_bo_individualize_resv(bo); + if (ret) { + /* Last resort, if we fail to allocate memory for the + * fences block for the BO to become idle + */ + dma_resv_wait_timeout(bo->base.resv, + DMA_RESV_USAGE_BOOKKEEP, false, + 30 * HZ); + } + + if (bo->bdev->funcs->release_notify) + bo->bdev->funcs->release_notify(bo); + + drm_vma_offset_remove(bdev->vma_manager, &bo->base.vma_node); + ttm_mem_io_free(bdev, bo->resource); + } + + if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP) || + !dma_resv_trylock(bo->base.resv)) { + /* The BO is not idle, resurrect it for delayed destroy */ + ttm_bo_flush_all_fences(bo); + bo->deleted = true; + + spin_lock(&bo->bdev->lru_lock); + + /* + * Make pinned bos immediately available to + * shrinkers, now that they are queued for + * destruction. + * + * FIXME: QXL is triggering this. Can be removed when the + * driver is fixed. + */ + if (bo->pin_count) { + bo->pin_count = 0; + ttm_resource_move_to_lru_tail(bo->resource); + } + + kref_init(&bo->kref); + list_add_tail(&bo->ddestroy, &bdev->ddestroy); + spin_unlock(&bo->bdev->lru_lock); + + schedule_delayed_work(&bdev->wq, + ((HZ / 100) < 1) ? 1 : HZ / 100); + return; + } + + spin_lock(&bo->bdev->lru_lock); + list_del(&bo->ddestroy); + spin_unlock(&bo->bdev->lru_lock); + + ttm_bo_cleanup_memtype_use(bo); + dma_resv_unlock(bo->base.resv); + + atomic_dec(&ttm_glob.bo_count); + bo->destroy(bo); +} + +void ttm_bo_put(struct ttm_buffer_object *bo) +{ + kref_put(&bo->kref, ttm_bo_release); +} +EXPORT_SYMBOL(ttm_bo_put); + +int ttm_bo_lock_delayed_workqueue(struct ttm_device *bdev) +{ + return cancel_delayed_work_sync(&bdev->wq); +} +EXPORT_SYMBOL(ttm_bo_lock_delayed_workqueue); + +void ttm_bo_unlock_delayed_workqueue(struct ttm_device *bdev, int resched) +{ + if (resched) + schedule_delayed_work(&bdev->wq, + ((HZ / 100) < 1) ? 1 : HZ / 100); +} +EXPORT_SYMBOL(ttm_bo_unlock_delayed_workqueue); + +static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo, + struct ttm_resource **mem, + struct ttm_operation_ctx *ctx, + struct ttm_place *hop) +{ + struct ttm_placement hop_placement; + struct ttm_resource *hop_mem; + int ret; + + hop_placement.num_placement = hop_placement.num_busy_placement = 1; + hop_placement.placement = hop_placement.busy_placement = hop; + + /* find space in the bounce domain */ + ret = ttm_bo_mem_space(bo, &hop_placement, &hop_mem, ctx); + if (ret) + return ret; + /* move to the bounce domain */ + ret = ttm_bo_handle_move_mem(bo, hop_mem, false, ctx, NULL); + if (ret) { + ttm_resource_free(bo, &hop_mem); + return ret; + } + return 0; +} + +static int ttm_bo_evict(struct ttm_buffer_object *bo, + struct ttm_operation_ctx *ctx) +{ + struct ttm_device *bdev = bo->bdev; + struct ttm_resource *evict_mem; + struct ttm_placement placement; + struct ttm_place hop; + int ret = 0; + + memset(&hop, 0, sizeof(hop)); + + dma_resv_assert_held(bo->base.resv); + + placement.num_placement = 0; + placement.num_busy_placement = 0; + bdev->funcs->evict_flags(bo, &placement); + + if (!placement.num_placement && !placement.num_busy_placement) { + ret = ttm_bo_wait(bo, true, false); + if (ret) + return ret; + + /* + * Since we've already synced, this frees backing store + * immediately. + */ + return ttm_bo_pipeline_gutting(bo); + } + + ret = ttm_bo_mem_space(bo, &placement, &evict_mem, ctx); + if (ret) { + if (ret != -ERESTARTSYS) { + pr_err("Failed to find memory space for buffer 0x%p eviction\n", + bo); + ttm_bo_mem_space_debug(bo, &placement); + } + goto out; + } + + do { + ret = ttm_bo_handle_move_mem(bo, evict_mem, true, ctx, &hop); + if (ret != -EMULTIHOP) + break; + + ret = ttm_bo_bounce_temp_buffer(bo, &evict_mem, ctx, &hop); + } while (!ret); + + if (ret) { + ttm_resource_free(bo, &evict_mem); + if (ret != -ERESTARTSYS && ret != -EINTR) + pr_err("Buffer eviction failed\n"); + } +out: + return ret; +} + +bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, + const struct ttm_place *place) +{ + struct ttm_resource *res = bo->resource; + struct ttm_device *bdev = bo->bdev; + + dma_resv_assert_held(bo->base.resv); + if (bo->resource->mem_type == TTM_PL_SYSTEM) + return true; + + /* Don't evict this BO if it's outside of the + * requested placement range + */ + return ttm_resource_intersects(bdev, res, place, bo->base.size); +} +EXPORT_SYMBOL(ttm_bo_eviction_valuable); + +/* + * Check the target bo is allowable to be evicted or swapout, including cases: + * + * a. if share same reservation object with ctx->resv, have assumption + * reservation objects should already be locked, so not lock again and + * return true directly when either the opreation allow_reserved_eviction + * or the target bo already is in delayed free list; + * + * b. Otherwise, trylock it. + */ +static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, + struct ttm_operation_ctx *ctx, + const struct ttm_place *place, + bool *locked, bool *busy) +{ + bool ret = false; + + if (bo->pin_count) { + *locked = false; + if (busy) + *busy = false; + return false; + } + + if (bo->base.resv == ctx->resv) { + dma_resv_assert_held(bo->base.resv); + if (ctx->allow_res_evict) + ret = true; + *locked = false; + if (busy) + *busy = false; + } else { + ret = dma_resv_trylock(bo->base.resv); + *locked = ret; + if (busy) + *busy = !ret; + } + + if (ret && place && (bo->resource->mem_type != place->mem_type || + !bo->bdev->funcs->eviction_valuable(bo, place))) { + ret = false; + if (*locked) { + dma_resv_unlock(bo->base.resv); + *locked = false; + } + } + + return ret; +} + +/** + * ttm_mem_evict_wait_busy - wait for a busy BO to become available + * + * @busy_bo: BO which couldn't be locked with trylock + * @ctx: operation context + * @ticket: acquire ticket + * + * Try to lock a busy buffer object to avoid failing eviction. + */ +static int ttm_mem_evict_wait_busy(struct ttm_buffer_object *busy_bo, + struct ttm_operation_ctx *ctx, + struct ww_acquire_ctx *ticket) +{ + int r; + + if (!busy_bo || !ticket) + return -EBUSY; + + if (ctx->interruptible) + r = dma_resv_lock_interruptible(busy_bo->base.resv, + ticket); + else + r = dma_resv_lock(busy_bo->base.resv, ticket); + + /* + * TODO: It would be better to keep the BO locked until allocation is at + * least tried one more time, but that would mean a much larger rework + * of TTM. + */ + if (!r) + dma_resv_unlock(busy_bo->base.resv); + + return r == -EDEADLK ? -EBUSY : r; +} + +int ttm_mem_evict_first(struct ttm_device *bdev, + struct ttm_resource_manager *man, + const struct ttm_place *place, + struct ttm_operation_ctx *ctx, + struct ww_acquire_ctx *ticket) +{ + struct ttm_buffer_object *bo = NULL, *busy_bo = NULL; + struct ttm_resource_cursor cursor; + struct ttm_resource *res; + bool locked = false; + int ret; + + spin_lock(&bdev->lru_lock); + ttm_resource_manager_for_each_res(man, &cursor, res) { + bool busy; + + if (!ttm_bo_evict_swapout_allowable(res->bo, ctx, place, + &locked, &busy)) { + if (busy && !busy_bo && ticket != + dma_resv_locking_ctx(res->bo->base.resv)) + busy_bo = res->bo; + continue; + } + + if (ttm_bo_get_unless_zero(res->bo)) { + bo = res->bo; + break; + } + if (locked) + dma_resv_unlock(res->bo->base.resv); + } + + if (!bo) { + if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) + busy_bo = NULL; + spin_unlock(&bdev->lru_lock); + ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); + if (busy_bo) + ttm_bo_put(busy_bo); + return ret; + } + + if (bo->deleted) { + ret = ttm_bo_cleanup_refs(bo, ctx->interruptible, + ctx->no_wait_gpu, locked); + ttm_bo_put(bo); + return ret; + } + + spin_unlock(&bdev->lru_lock); + + ret = ttm_bo_evict(bo, ctx); + if (locked) + ttm_bo_unreserve(bo); + else + ttm_bo_move_to_lru_tail_unlocked(bo); + + ttm_bo_put(bo); + return ret; +} + +/** + * ttm_bo_pin - Pin the buffer object. + * @bo: The buffer object to pin + * + * Make sure the buffer is not evicted any more during memory pressure. + * @bo must be unpinned again by calling ttm_bo_unpin(). + */ +void ttm_bo_pin(struct ttm_buffer_object *bo) +{ + dma_resv_assert_held(bo->base.resv); + WARN_ON_ONCE(!kref_read(&bo->kref)); + spin_lock(&bo->bdev->lru_lock); + if (bo->resource) + ttm_resource_del_bulk_move(bo->resource, bo); + ++bo->pin_count; + spin_unlock(&bo->bdev->lru_lock); +} +EXPORT_SYMBOL(ttm_bo_pin); + +/** + * ttm_bo_unpin - Unpin the buffer object. + * @bo: The buffer object to unpin + * + * Allows the buffer object to be evicted again during memory pressure. + */ +void ttm_bo_unpin(struct ttm_buffer_object *bo) +{ + dma_resv_assert_held(bo->base.resv); + WARN_ON_ONCE(!kref_read(&bo->kref)); + if (WARN_ON_ONCE(!bo->pin_count)) + return; + + spin_lock(&bo->bdev->lru_lock); + --bo->pin_count; + if (bo->resource) + ttm_resource_add_bulk_move(bo->resource, bo); + spin_unlock(&bo->bdev->lru_lock); +} +EXPORT_SYMBOL(ttm_bo_unpin); + +/* + * Add the last move fence to the BO as kernel dependency and reserve a new + * fence slot. + */ +static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, + struct ttm_resource_manager *man, + struct ttm_resource *mem, + bool no_wait_gpu) +{ + struct dma_fence *fence; + int ret; + + spin_lock(&man->move_lock); + fence = dma_fence_get(man->move); + spin_unlock(&man->move_lock); + + if (!fence) + return 0; + + if (no_wait_gpu) { + ret = dma_fence_is_signaled(fence) ? 0 : -EBUSY; + dma_fence_put(fence); + return ret; + } + + dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL); + + ret = dma_resv_reserve_fences(bo->base.resv, 1); + dma_fence_put(fence); + return ret; +} + +/* + * Repeatedly evict memory from the LRU for @mem_type until we create enough + * space, or we've evicted everything and there isn't enough space. + */ +static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo, + const struct ttm_place *place, + struct ttm_resource **mem, + struct ttm_operation_ctx *ctx) +{ + struct ttm_device *bdev = bo->bdev; + struct ttm_resource_manager *man; + struct ww_acquire_ctx *ticket; + int ret; + + man = ttm_manager_type(bdev, place->mem_type); + ticket = dma_resv_locking_ctx(bo->base.resv); + do { + ret = ttm_resource_alloc(bo, place, mem); + if (likely(!ret)) + break; + if (unlikely(ret != -ENOSPC)) + return ret; + ret = ttm_mem_evict_first(bdev, man, place, ctx, + ticket); + if (unlikely(ret != 0)) + return ret; + } while (1); + + return ttm_bo_add_move_fence(bo, man, *mem, ctx->no_wait_gpu); +} + +/* + * Creates space for memory region @mem according to its type. + * + * This function first searches for free space in compatible memory types in + * the priority order defined by the driver. If free space isn't found, then + * ttm_bo_mem_force_space is attempted in priority order to evict and find + * space. + */ +int ttm_bo_mem_space(struct ttm_buffer_object *bo, + struct ttm_placement *placement, + struct ttm_resource **mem, + struct ttm_operation_ctx *ctx) +{ + struct ttm_device *bdev = bo->bdev; + bool type_found = false; + int i, ret; + + ret = dma_resv_reserve_fences(bo->base.resv, 1); + if (unlikely(ret)) + return ret; + + for (i = 0; i < placement->num_placement; ++i) { + const struct ttm_place *place = &placement->placement[i]; + struct ttm_resource_manager *man; + + man = ttm_manager_type(bdev, place->mem_type); + if (!man || !ttm_resource_manager_used(man)) + continue; + + type_found = true; + ret = ttm_resource_alloc(bo, place, mem); + if (ret == -ENOSPC) + continue; + if (unlikely(ret)) + goto error; + + ret = ttm_bo_add_move_fence(bo, man, *mem, ctx->no_wait_gpu); + if (unlikely(ret)) { + ttm_resource_free(bo, mem); + if (ret == -EBUSY) + continue; + + goto error; + } + return 0; + } + + for (i = 0; i < placement->num_busy_placement; ++i) { + const struct ttm_place *place = &placement->busy_placement[i]; + struct ttm_resource_manager *man; + + man = ttm_manager_type(bdev, place->mem_type); + if (!man || !ttm_resource_manager_used(man)) + continue; + + type_found = true; + ret = ttm_bo_mem_force_space(bo, place, mem, ctx); + if (likely(!ret)) + return 0; + + if (ret && ret != -EBUSY) + goto error; + } + + ret = -ENOMEM; + if (!type_found) { + pr_err(TTM_PFX "No compatible memory type found\n"); + ret = -EINVAL; + } + +error: + return ret; +} +EXPORT_SYMBOL(ttm_bo_mem_space); + +static int ttm_bo_move_buffer(struct ttm_buffer_object *bo, + struct ttm_placement *placement, + struct ttm_operation_ctx *ctx) +{ + struct ttm_resource *mem; + struct ttm_place hop; + int ret; + + dma_resv_assert_held(bo->base.resv); + + /* + * Determine where to move the buffer. + * + * If driver determines move is going to need + * an extra step then it will return -EMULTIHOP + * and the buffer will be moved to the temporary + * stop and the driver will be called to make + * the second hop. + */ + ret = ttm_bo_mem_space(bo, placement, &mem, ctx); + if (ret) + return ret; +bounce: + ret = ttm_bo_handle_move_mem(bo, mem, false, ctx, &hop); + if (ret == -EMULTIHOP) { + ret = ttm_bo_bounce_temp_buffer(bo, &mem, ctx, &hop); + if (ret) + goto out; + /* try and move to final place now. */ + goto bounce; + } +out: + if (ret) + ttm_resource_free(bo, &mem); + return ret; +} + +int ttm_bo_validate(struct ttm_buffer_object *bo, + struct ttm_placement *placement, + struct ttm_operation_ctx *ctx) +{ + int ret; + + dma_resv_assert_held(bo->base.resv); + + /* + * Remove the backing store if no placement is given. + */ + if (!placement->num_placement && !placement->num_busy_placement) + return ttm_bo_pipeline_gutting(bo); + + /* + * Check whether we need to move buffer. + */ + if (!bo->resource || !ttm_resource_compat(bo->resource, placement)) { + ret = ttm_bo_move_buffer(bo, placement, ctx); + if (ret) + return ret; + } + /* + * We might need to add a TTM. + */ + if (!bo->resource || bo->resource->mem_type == TTM_PL_SYSTEM) { + ret = ttm_tt_create(bo, true); + if (ret) + return ret; + } + return 0; +} +EXPORT_SYMBOL(ttm_bo_validate); + +/** + * ttm_bo_init_reserved + * + * @bdev: Pointer to a ttm_device struct. + * @bo: Pointer to a ttm_buffer_object to be initialized. + * @type: Requested type of buffer object. + * @placement: Initial placement for buffer object. + * @alignment: Data alignment in pages. + * @ctx: TTM operation context for memory allocation. + * @sg: Scatter-gather table. + * @resv: Pointer to a dma_resv, or NULL to let ttm allocate one. + * @destroy: Destroy function. Use NULL for kfree(). + * + * This function initializes a pre-allocated struct ttm_buffer_object. + * As this object may be part of a larger structure, this function, + * together with the @destroy function, enables driver-specific objects + * derived from a ttm_buffer_object. + * + * On successful return, the caller owns an object kref to @bo. The kref and + * list_kref are usually set to 1, but note that in some situations, other + * tasks may already be holding references to @bo as well. + * Furthermore, if resv == NULL, the buffer's reservation lock will be held, + * and it is the caller's responsibility to call ttm_bo_unreserve. + * + * If a failure occurs, the function will call the @destroy function. Thus, + * after a failure, dereferencing @bo is illegal and will likely cause memory + * corruption. + * + * Returns + * -ENOMEM: Out of memory. + * -EINVAL: Invalid placement flags. + * -ERESTARTSYS: Interrupted by signal while sleeping waiting for resources. + */ +int ttm_bo_init_reserved(struct ttm_device *bdev, struct ttm_buffer_object *bo, + enum ttm_bo_type type, struct ttm_placement *placement, + uint32_t alignment, struct ttm_operation_ctx *ctx, + struct sg_table *sg, struct dma_resv *resv, + void (*destroy) (struct ttm_buffer_object *)) +{ + static const struct ttm_place sys_mem = { .mem_type = TTM_PL_SYSTEM }; + int ret; + + kref_init(&bo->kref); + INIT_LIST_HEAD(&bo->ddestroy); + bo->bdev = bdev; + bo->type = type; + bo->page_alignment = alignment; + bo->destroy = destroy; + bo->pin_count = 0; + bo->sg = sg; + bo->bulk_move = NULL; + if (resv) + bo->base.resv = resv; + else + bo->base.resv = &bo->base._resv; + atomic_inc(&ttm_glob.bo_count); + + ret = ttm_resource_alloc(bo, &sys_mem, &bo->resource); + if (unlikely(ret)) { + ttm_bo_put(bo); + return ret; + } + + /* + * For ttm_bo_type_device buffers, allocate + * address space from the device. + */ + if (bo->type == ttm_bo_type_device || bo->type == ttm_bo_type_sg) { + ret = drm_vma_offset_add(bdev->vma_manager, &bo->base.vma_node, + PFN_UP(bo->base.size)); + if (ret) + goto err_put; + } + + /* passed reservation objects should already be locked, + * since otherwise lockdep will be angered in radeon. + */ + if (!resv) + WARN_ON(!dma_resv_trylock(bo->base.resv)); + else + dma_resv_assert_held(resv); + + ret = ttm_bo_validate(bo, placement, ctx); + if (unlikely(ret)) + goto err_unlock; + + return 0; + +err_unlock: + if (!resv) + dma_resv_unlock(bo->base.resv); + +err_put: + ttm_bo_put(bo); + return ret; +} +EXPORT_SYMBOL(ttm_bo_init_reserved); + +/** + * ttm_bo_init_validate + * + * @bdev: Pointer to a ttm_device struct. + * @bo: Pointer to a ttm_buffer_object to be initialized. + * @type: Requested type of buffer object. + * @placement: Initial placement for buffer object. + * @alignment: Data alignment in pages. + * @interruptible: If needing to sleep to wait for GPU resources, + * sleep interruptible. + * pinned in physical memory. If this behaviour is not desired, this member + * holds a pointer to a persistent shmem object. Typically, this would + * point to the shmem object backing a GEM object if TTM is used to back a + * GEM user interface. + * @sg: Scatter-gather table. + * @resv: Pointer to a dma_resv, or NULL to let ttm allocate one. + * @destroy: Destroy function. Use NULL for kfree(). + * + * This function initializes a pre-allocated struct ttm_buffer_object. + * As this object may be part of a larger structure, this function, + * together with the @destroy function, + * enables driver-specific objects derived from a ttm_buffer_object. + * + * On successful return, the caller owns an object kref to @bo. The kref and + * list_kref are usually set to 1, but note that in some situations, other + * tasks may already be holding references to @bo as well. + * + * If a failure occurs, the function will call the @destroy function, Thus, + * after a failure, dereferencing @bo is illegal and will likely cause memory + * corruption. + * + * Returns + * -ENOMEM: Out of memory. + * -EINVAL: Invalid placement flags. + * -ERESTARTSYS: Interrupted by signal while sleeping waiting for resources. + */ +int ttm_bo_init_validate(struct ttm_device *bdev, struct ttm_buffer_object *bo, + enum ttm_bo_type type, struct ttm_placement *placement, + uint32_t alignment, bool interruptible, + struct sg_table *sg, struct dma_resv *resv, + void (*destroy) (struct ttm_buffer_object *)) +{ + struct ttm_operation_ctx ctx = { interruptible, false }; + int ret; + + ret = ttm_bo_init_reserved(bdev, bo, type, placement, alignment, &ctx, + sg, resv, destroy); + if (ret) + return ret; + + if (!resv) + ttm_bo_unreserve(bo); + + return 0; +} +EXPORT_SYMBOL(ttm_bo_init_validate); + +/* + * buffer object vm functions. + */ + +void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo) +{ + struct ttm_device *bdev = bo->bdev; + + drm_vma_node_unmap(&bo->base.vma_node, bdev->dev_mapping); + ttm_mem_io_free(bdev, bo->resource); +} +EXPORT_SYMBOL(ttm_bo_unmap_virtual); + +int ttm_bo_wait(struct ttm_buffer_object *bo, + bool interruptible, bool no_wait) +{ + long timeout = 15 * HZ; + + if (no_wait) { + if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP)) + return 0; + else + return -EBUSY; + } + + timeout = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP, + interruptible, timeout); + if (timeout < 0) + return timeout; + + if (timeout == 0) + return -EBUSY; + + return 0; +} +EXPORT_SYMBOL(ttm_bo_wait); + +int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags) +{ + struct ttm_place place; + bool locked; + int ret; + + /* + * While the bo may already reside in SYSTEM placement, set + * SYSTEM as new placement to cover also the move further below. + * The driver may use the fact that we're moving from SYSTEM + * as an indication that we're about to swap out. + */ + memset(&place, 0, sizeof(place)); + place.mem_type = bo->resource->mem_type; + if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL)) + return -EBUSY; + + if (!bo->ttm || !ttm_tt_is_populated(bo->ttm) || + bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL || + bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED || + !ttm_bo_get_unless_zero(bo)) { + if (locked) + dma_resv_unlock(bo->base.resv); + return -EBUSY; + } + + if (bo->deleted) { + ret = ttm_bo_cleanup_refs(bo, false, false, locked); + ttm_bo_put(bo); + return ret == -EBUSY ? -ENOSPC : ret; + } + + /* TODO: Cleanup the locking */ + spin_unlock(&bo->bdev->lru_lock); + + /* + * Move to system cached + */ + if (bo->resource->mem_type != TTM_PL_SYSTEM) { + struct ttm_operation_ctx ctx = { false, false }; + struct ttm_resource *evict_mem; + struct ttm_place hop; + + memset(&hop, 0, sizeof(hop)); + place.mem_type = TTM_PL_SYSTEM; + ret = ttm_resource_alloc(bo, &place, &evict_mem); + if (unlikely(ret)) + goto out; + + ret = ttm_bo_handle_move_mem(bo, evict_mem, true, &ctx, &hop); + if (unlikely(ret != 0)) { + WARN(ret == -EMULTIHOP, "Unexpected multihop in swaput - likely driver bug.\n"); + ttm_resource_free(bo, &evict_mem); + goto out; + } + } + + /* + * Make sure BO is idle. + */ + ret = ttm_bo_wait(bo, false, false); + if (unlikely(ret != 0)) + goto out; + + ttm_bo_unmap_virtual(bo); + + /* + * Swap out. Buffer will be swapped in again as soon as + * anyone tries to access a ttm page. + */ + if (bo->bdev->funcs->swap_notify) + bo->bdev->funcs->swap_notify(bo); + + if (ttm_tt_is_populated(bo->ttm)) + ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags); +out: + + /* + * Unreserve without putting on LRU to avoid swapping out an + * already swapped buffer. + */ + if (locked) + dma_resv_unlock(bo->base.resv); + ttm_bo_put(bo); + return ret == -EBUSY ? -ENOSPC : ret; +} + +void ttm_bo_tt_destroy(struct ttm_buffer_object *bo) +{ + if (bo->ttm == NULL) + return; + + ttm_tt_unpopulate(bo->bdev, bo->ttm); + ttm_tt_destroy(bo->bdev, bo->ttm); + bo->ttm = NULL; +} diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c new file mode 100644 index 000000000..fa04e6220 --- /dev/null +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -0,0 +1,677 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/************************************************************************** + * + * Copyright (c) 2007-2009 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +/* + * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com> + */ + +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include <drm/drm_cache.h> +#include <drm/drm_vma_manager.h> +#include <linux/iosys-map.h> +#include <linux/io.h> +#include <linux/highmem.h> +#include <linux/wait.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <linux/module.h> +#include <linux/dma-resv.h> + +struct ttm_transfer_obj { + struct ttm_buffer_object base; + struct ttm_buffer_object *bo; +}; + +int ttm_mem_io_reserve(struct ttm_device *bdev, + struct ttm_resource *mem) +{ + if (mem->bus.offset || mem->bus.addr) + return 0; + + mem->bus.is_iomem = false; + if (!bdev->funcs->io_mem_reserve) + return 0; + + return bdev->funcs->io_mem_reserve(bdev, mem); +} + +void ttm_mem_io_free(struct ttm_device *bdev, + struct ttm_resource *mem) +{ + if (!mem) + return; + + if (!mem->bus.offset && !mem->bus.addr) + return; + + if (bdev->funcs->io_mem_free) + bdev->funcs->io_mem_free(bdev, mem); + + mem->bus.offset = 0; + mem->bus.addr = NULL; +} + +/** + * ttm_move_memcpy - Helper to perform a memcpy ttm move operation. + * @clear: Whether to clear rather than copy. + * @num_pages: Number of pages of the operation. + * @dst_iter: A struct ttm_kmap_iter representing the destination resource. + * @src_iter: A struct ttm_kmap_iter representing the source resource. + * + * This function is intended to be able to move out async under a + * dma-fence if desired. + */ +void ttm_move_memcpy(bool clear, + u32 num_pages, + struct ttm_kmap_iter *dst_iter, + struct ttm_kmap_iter *src_iter) +{ + const struct ttm_kmap_iter_ops *dst_ops = dst_iter->ops; + const struct ttm_kmap_iter_ops *src_ops = src_iter->ops; + struct iosys_map src_map, dst_map; + pgoff_t i; + + /* Single TTM move. NOP */ + if (dst_ops->maps_tt && src_ops->maps_tt) + return; + + /* Don't move nonexistent data. Clear destination instead. */ + if (clear) { + for (i = 0; i < num_pages; ++i) { + dst_ops->map_local(dst_iter, &dst_map, i); + if (dst_map.is_iomem) + memset_io(dst_map.vaddr_iomem, 0, PAGE_SIZE); + else + memset(dst_map.vaddr, 0, PAGE_SIZE); + if (dst_ops->unmap_local) + dst_ops->unmap_local(dst_iter, &dst_map); + } + return; + } + + for (i = 0; i < num_pages; ++i) { + dst_ops->map_local(dst_iter, &dst_map, i); + src_ops->map_local(src_iter, &src_map, i); + + drm_memcpy_from_wc(&dst_map, &src_map, PAGE_SIZE); + + if (src_ops->unmap_local) + src_ops->unmap_local(src_iter, &src_map); + if (dst_ops->unmap_local) + dst_ops->unmap_local(dst_iter, &dst_map); + } +} +EXPORT_SYMBOL(ttm_move_memcpy); + +int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, + struct ttm_operation_ctx *ctx, + struct ttm_resource *dst_mem) +{ + struct ttm_device *bdev = bo->bdev; + struct ttm_resource_manager *dst_man = + ttm_manager_type(bo->bdev, dst_mem->mem_type); + struct ttm_tt *ttm = bo->ttm; + struct ttm_resource *src_mem = bo->resource; + struct ttm_resource_manager *src_man; + union { + struct ttm_kmap_iter_tt tt; + struct ttm_kmap_iter_linear_io io; + } _dst_iter, _src_iter; + struct ttm_kmap_iter *dst_iter, *src_iter; + bool clear; + int ret = 0; + + if (!src_mem) + return 0; + + src_man = ttm_manager_type(bdev, src_mem->mem_type); + if (ttm && ((ttm->page_flags & TTM_TT_FLAG_SWAPPED) || + dst_man->use_tt)) { + ret = ttm_tt_populate(bdev, ttm, ctx); + if (ret) + return ret; + } + + dst_iter = ttm_kmap_iter_linear_io_init(&_dst_iter.io, bdev, dst_mem); + if (PTR_ERR(dst_iter) == -EINVAL && dst_man->use_tt) + dst_iter = ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm); + if (IS_ERR(dst_iter)) + return PTR_ERR(dst_iter); + + src_iter = ttm_kmap_iter_linear_io_init(&_src_iter.io, bdev, src_mem); + if (PTR_ERR(src_iter) == -EINVAL && src_man->use_tt) + src_iter = ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm); + if (IS_ERR(src_iter)) { + ret = PTR_ERR(src_iter); + goto out_src_iter; + } + + clear = src_iter->ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) + ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); + + if (!src_iter->ops->maps_tt) + ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, src_mem); + ttm_bo_move_sync_cleanup(bo, dst_mem); + +out_src_iter: + if (!dst_iter->ops->maps_tt) + ttm_kmap_iter_linear_io_fini(&_dst_iter.io, bdev, dst_mem); + + return ret; +} +EXPORT_SYMBOL(ttm_bo_move_memcpy); + +static void ttm_transfered_destroy(struct ttm_buffer_object *bo) +{ + struct ttm_transfer_obj *fbo; + + fbo = container_of(bo, struct ttm_transfer_obj, base); + dma_resv_fini(&fbo->base.base._resv); + ttm_bo_put(fbo->bo); + kfree(fbo); +} + +/** + * ttm_buffer_object_transfer + * + * @bo: A pointer to a struct ttm_buffer_object. + * @new_obj: A pointer to a pointer to a newly created ttm_buffer_object, + * holding the data of @bo with the old placement. + * + * This is a utility function that may be called after an accelerated move + * has been scheduled. A new buffer object is created as a placeholder for + * the old data while it's being copied. When that buffer object is idle, + * it can be destroyed, releasing the space of the old placement. + * Returns: + * !0: Failure. + */ + +static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, + struct ttm_buffer_object **new_obj) +{ + struct ttm_transfer_obj *fbo; + int ret; + + fbo = kmalloc(sizeof(*fbo), GFP_KERNEL); + if (!fbo) + return -ENOMEM; + + fbo->base = *bo; + + /** + * Fix up members that we shouldn't copy directly: + * TODO: Explicit member copy would probably be better here. + */ + + atomic_inc(&ttm_glob.bo_count); + INIT_LIST_HEAD(&fbo->base.ddestroy); + drm_vma_node_reset(&fbo->base.base.vma_node); + + kref_init(&fbo->base.kref); + fbo->base.destroy = &ttm_transfered_destroy; + fbo->base.pin_count = 0; + if (bo->type != ttm_bo_type_sg) + fbo->base.base.resv = &fbo->base.base._resv; + + dma_resv_init(&fbo->base.base._resv); + fbo->base.base.dev = NULL; + ret = dma_resv_trylock(&fbo->base.base._resv); + WARN_ON(!ret); + + if (fbo->base.resource) { + ttm_resource_set_bo(fbo->base.resource, &fbo->base); + bo->resource = NULL; + ttm_bo_set_bulk_move(&fbo->base, NULL); + } else { + fbo->base.bulk_move = NULL; + } + + ret = dma_resv_reserve_fences(&fbo->base.base._resv, 1); + if (ret) { + kfree(fbo); + return ret; + } + + ttm_bo_get(bo); + fbo->bo = bo; + + ttm_bo_move_to_lru_tail_unlocked(&fbo->base); + + *new_obj = &fbo->base; + return 0; +} + +pgprot_t ttm_io_prot(struct ttm_buffer_object *bo, struct ttm_resource *res, + pgprot_t tmp) +{ + struct ttm_resource_manager *man; + enum ttm_caching caching; + + man = ttm_manager_type(bo->bdev, res->mem_type); + caching = man->use_tt ? bo->ttm->caching : res->bus.caching; + + return ttm_prot_from_caching(caching, tmp); +} +EXPORT_SYMBOL(ttm_io_prot); + +static int ttm_bo_ioremap(struct ttm_buffer_object *bo, + unsigned long offset, + unsigned long size, + struct ttm_bo_kmap_obj *map) +{ + struct ttm_resource *mem = bo->resource; + + if (bo->resource->bus.addr) { + map->bo_kmap_type = ttm_bo_map_premapped; + map->virtual = ((u8 *)bo->resource->bus.addr) + offset; + } else { + resource_size_t res = bo->resource->bus.offset + offset; + + map->bo_kmap_type = ttm_bo_map_iomap; + if (mem->bus.caching == ttm_write_combined) + map->virtual = ioremap_wc(res, size); +#ifdef CONFIG_X86 + else if (mem->bus.caching == ttm_cached) + map->virtual = ioremap_cache(res, size); +#endif + else + map->virtual = ioremap(res, size); + } + return (!map->virtual) ? -ENOMEM : 0; +} + +static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, + unsigned long start_page, + unsigned long num_pages, + struct ttm_bo_kmap_obj *map) +{ + struct ttm_resource *mem = bo->resource; + struct ttm_operation_ctx ctx = { + .interruptible = false, + .no_wait_gpu = false + }; + struct ttm_tt *ttm = bo->ttm; + pgprot_t prot; + int ret; + + BUG_ON(!ttm); + + ret = ttm_tt_populate(bo->bdev, ttm, &ctx); + if (ret) + return ret; + + if (num_pages == 1 && ttm->caching == ttm_cached) { + /* + * We're mapping a single page, and the desired + * page protection is consistent with the bo. + */ + + map->bo_kmap_type = ttm_bo_map_kmap; + map->page = ttm->pages[start_page]; + map->virtual = kmap(map->page); + } else { + /* + * We need to use vmap to get the desired page protection + * or to make the buffer object look contiguous. + */ + prot = ttm_io_prot(bo, mem, PAGE_KERNEL); + map->bo_kmap_type = ttm_bo_map_vmap; + map->virtual = vmap(ttm->pages + start_page, num_pages, + 0, prot); + } + return (!map->virtual) ? -ENOMEM : 0; +} + +int ttm_bo_kmap(struct ttm_buffer_object *bo, + unsigned long start_page, unsigned long num_pages, + struct ttm_bo_kmap_obj *map) +{ + unsigned long offset, size; + int ret; + + map->virtual = NULL; + map->bo = bo; + if (num_pages > bo->resource->num_pages) + return -EINVAL; + if ((start_page + num_pages) > bo->resource->num_pages) + return -EINVAL; + + ret = ttm_mem_io_reserve(bo->bdev, bo->resource); + if (ret) + return ret; + if (!bo->resource->bus.is_iomem) { + return ttm_bo_kmap_ttm(bo, start_page, num_pages, map); + } else { + offset = start_page << PAGE_SHIFT; + size = num_pages << PAGE_SHIFT; + return ttm_bo_ioremap(bo, offset, size, map); + } +} +EXPORT_SYMBOL(ttm_bo_kmap); + +void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map) +{ + if (!map->virtual) + return; + switch (map->bo_kmap_type) { + case ttm_bo_map_iomap: + iounmap(map->virtual); + break; + case ttm_bo_map_vmap: + vunmap(map->virtual); + break; + case ttm_bo_map_kmap: + kunmap(map->page); + break; + case ttm_bo_map_premapped: + break; + default: + BUG(); + } + ttm_mem_io_free(map->bo->bdev, map->bo->resource); + map->virtual = NULL; + map->page = NULL; +} +EXPORT_SYMBOL(ttm_bo_kunmap); + +int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map) +{ + struct ttm_resource *mem = bo->resource; + int ret; + + dma_resv_assert_held(bo->base.resv); + + ret = ttm_mem_io_reserve(bo->bdev, mem); + if (ret) + return ret; + + if (mem->bus.is_iomem) { + void __iomem *vaddr_iomem; + + if (mem->bus.addr) + vaddr_iomem = (void __iomem *)mem->bus.addr; + else if (mem->bus.caching == ttm_write_combined) + vaddr_iomem = ioremap_wc(mem->bus.offset, + bo->base.size); +#ifdef CONFIG_X86 + else if (mem->bus.caching == ttm_cached) + vaddr_iomem = ioremap_cache(mem->bus.offset, + bo->base.size); +#endif + else + vaddr_iomem = ioremap(mem->bus.offset, bo->base.size); + + if (!vaddr_iomem) + return -ENOMEM; + + iosys_map_set_vaddr_iomem(map, vaddr_iomem); + + } else { + struct ttm_operation_ctx ctx = { + .interruptible = false, + .no_wait_gpu = false + }; + struct ttm_tt *ttm = bo->ttm; + pgprot_t prot; + void *vaddr; + + ret = ttm_tt_populate(bo->bdev, ttm, &ctx); + if (ret) + return ret; + + /* + * We need to use vmap to get the desired page protection + * or to make the buffer object look contiguous. + */ + prot = ttm_io_prot(bo, mem, PAGE_KERNEL); + vaddr = vmap(ttm->pages, ttm->num_pages, 0, prot); + if (!vaddr) + return -ENOMEM; + + iosys_map_set_vaddr(map, vaddr); + } + + return 0; +} +EXPORT_SYMBOL(ttm_bo_vmap); + +void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct iosys_map *map) +{ + struct ttm_resource *mem = bo->resource; + + dma_resv_assert_held(bo->base.resv); + + if (iosys_map_is_null(map)) + return; + + if (!map->is_iomem) + vunmap(map->vaddr); + else if (!mem->bus.addr) + iounmap(map->vaddr_iomem); + iosys_map_clear(map); + + ttm_mem_io_free(bo->bdev, bo->resource); +} +EXPORT_SYMBOL(ttm_bo_vunmap); + +static int ttm_bo_wait_free_node(struct ttm_buffer_object *bo, + bool dst_use_tt) +{ + int ret; + ret = ttm_bo_wait(bo, false, false); + if (ret) + return ret; + + if (!dst_use_tt) + ttm_bo_tt_destroy(bo); + ttm_resource_free(bo, &bo->resource); + return 0; +} + +static int ttm_bo_move_to_ghost(struct ttm_buffer_object *bo, + struct dma_fence *fence, + bool dst_use_tt) +{ + struct ttm_buffer_object *ghost_obj; + int ret; + + /** + * This should help pipeline ordinary buffer moves. + * + * Hang old buffer memory on a new buffer object, + * and leave it to be released when the GPU + * operation has completed. + */ + + ret = ttm_buffer_object_transfer(bo, &ghost_obj); + if (ret) + return ret; + + dma_resv_add_fence(&ghost_obj->base._resv, fence, + DMA_RESV_USAGE_KERNEL); + + /** + * If we're not moving to fixed memory, the TTM object + * needs to stay alive. Otherwhise hang it on the ghost + * bo to be unbound and destroyed. + */ + + if (dst_use_tt) + ghost_obj->ttm = NULL; + else + bo->ttm = NULL; + + dma_resv_unlock(&ghost_obj->base._resv); + ttm_bo_put(ghost_obj); + return 0; +} + +static void ttm_bo_move_pipeline_evict(struct ttm_buffer_object *bo, + struct dma_fence *fence) +{ + struct ttm_device *bdev = bo->bdev; + struct ttm_resource_manager *from; + + from = ttm_manager_type(bdev, bo->resource->mem_type); + + /** + * BO doesn't have a TTM we need to bind/unbind. Just remember + * this eviction and free up the allocation + */ + spin_lock(&from->move_lock); + if (!from->move || dma_fence_is_later(fence, from->move)) { + dma_fence_put(from->move); + from->move = dma_fence_get(fence); + } + spin_unlock(&from->move_lock); + + ttm_resource_free(bo, &bo->resource); +} + +int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, + struct dma_fence *fence, + bool evict, + bool pipeline, + struct ttm_resource *new_mem) +{ + struct ttm_device *bdev = bo->bdev; + struct ttm_resource_manager *from = ttm_manager_type(bdev, bo->resource->mem_type); + struct ttm_resource_manager *man = ttm_manager_type(bdev, new_mem->mem_type); + int ret = 0; + + dma_resv_add_fence(bo->base.resv, fence, DMA_RESV_USAGE_KERNEL); + if (!evict) + ret = ttm_bo_move_to_ghost(bo, fence, man->use_tt); + else if (!from->use_tt && pipeline) + ttm_bo_move_pipeline_evict(bo, fence); + else + ret = ttm_bo_wait_free_node(bo, man->use_tt); + + if (ret) + return ret; + + ttm_bo_assign_mem(bo, new_mem); + + return 0; +} +EXPORT_SYMBOL(ttm_bo_move_accel_cleanup); + +void ttm_bo_move_sync_cleanup(struct ttm_buffer_object *bo, + struct ttm_resource *new_mem) +{ + struct ttm_device *bdev = bo->bdev; + struct ttm_resource_manager *man = ttm_manager_type(bdev, new_mem->mem_type); + int ret; + + ret = ttm_bo_wait_free_node(bo, man->use_tt); + if (WARN_ON(ret)) + return; + + ttm_bo_assign_mem(bo, new_mem); +} +EXPORT_SYMBOL(ttm_bo_move_sync_cleanup); + +/** + * ttm_bo_pipeline_gutting - purge the contents of a bo + * @bo: The buffer object + * + * Purge the contents of a bo, async if the bo is not idle. + * After a successful call, the bo is left unpopulated in + * system placement. The function may wait uninterruptible + * for idle on OOM. + * + * Return: 0 if successful, negative error code on failure. + */ +int ttm_bo_pipeline_gutting(struct ttm_buffer_object *bo) +{ + static const struct ttm_place sys_mem = { .mem_type = TTM_PL_SYSTEM }; + struct ttm_buffer_object *ghost; + struct ttm_resource *sys_res; + struct ttm_tt *ttm; + int ret; + + ret = ttm_resource_alloc(bo, &sys_mem, &sys_res); + if (ret) + return ret; + + /* If already idle, no need for ghost object dance. */ + ret = ttm_bo_wait(bo, false, true); + if (ret != -EBUSY) { + if (!bo->ttm) { + /* See comment below about clearing. */ + ret = ttm_tt_create(bo, true); + if (ret) + goto error_free_sys_mem; + } else { + ttm_tt_unpopulate(bo->bdev, bo->ttm); + if (bo->type == ttm_bo_type_device) + ttm_tt_mark_for_clear(bo->ttm); + } + ttm_resource_free(bo, &bo->resource); + ttm_bo_assign_mem(bo, sys_res); + return 0; + } + + /* + * We need an unpopulated ttm_tt after giving our current one, + * if any, to the ghost object. And we can't afford to fail + * creating one *after* the operation. If the bo subsequently gets + * resurrected, make sure it's cleared (if ttm_bo_type_device) + * to avoid leaking sensitive information to user-space. + */ + + ttm = bo->ttm; + bo->ttm = NULL; + ret = ttm_tt_create(bo, true); + swap(bo->ttm, ttm); + if (ret) + goto error_free_sys_mem; + + ret = ttm_buffer_object_transfer(bo, &ghost); + if (ret) + goto error_destroy_tt; + + ret = dma_resv_copy_fences(&ghost->base._resv, bo->base.resv); + /* Last resort, wait for the BO to be idle when we are OOM */ + if (ret) + ttm_bo_wait(bo, false, false); + + dma_resv_unlock(&ghost->base._resv); + ttm_bo_put(ghost); + bo->ttm = ttm; + ttm_bo_assign_mem(bo, sys_res); + return 0; + +error_destroy_tt: + ttm_tt_destroy(bo->bdev, ttm); + +error_free_sys_mem: + ttm_resource_free(bo, &sys_res); + return ret; +} diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c new file mode 100644 index 000000000..381193112 --- /dev/null +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -0,0 +1,475 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/************************************************************************** + * + * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +/* + * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com> + */ + +#define pr_fmt(fmt) "[TTM] " fmt + +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include <drm/drm_vma_manager.h> +#include <drm/drm_drv.h> +#include <drm/drm_managed.h> +#include <linux/mm.h> +#include <linux/pfn_t.h> +#include <linux/rbtree.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/mem_encrypt.h> + +static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, + struct vm_fault *vmf) +{ + long err = 0; + + /* + * Quick non-stalling check for idle. + */ + if (dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_KERNEL)) + return 0; + + /* + * If possible, avoid waiting for GPU with mmap_lock + * held. We only do this if the fault allows retry and this + * is the first attempt. + */ + if (fault_flag_allow_retry_first(vmf->flags)) { + if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) + return VM_FAULT_RETRY; + + ttm_bo_get(bo); + mmap_read_unlock(vmf->vma->vm_mm); + (void)dma_resv_wait_timeout(bo->base.resv, + DMA_RESV_USAGE_KERNEL, true, + MAX_SCHEDULE_TIMEOUT); + dma_resv_unlock(bo->base.resv); + ttm_bo_put(bo); + return VM_FAULT_RETRY; + } + + /* + * Ordinary wait. + */ + err = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_KERNEL, true, + MAX_SCHEDULE_TIMEOUT); + if (unlikely(err < 0)) { + return (err != -ERESTARTSYS) ? VM_FAULT_SIGBUS : + VM_FAULT_NOPAGE; + } + + return 0; +} + +static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, + unsigned long page_offset) +{ + struct ttm_device *bdev = bo->bdev; + + if (bdev->funcs->io_mem_pfn) + return bdev->funcs->io_mem_pfn(bo, page_offset); + + return (bo->resource->bus.offset >> PAGE_SHIFT) + page_offset; +} + +/** + * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback + * @bo: The buffer object + * @vmf: The fault structure handed to the callback + * + * vm callbacks like fault() and *_mkwrite() allow for the mmap_lock to be dropped + * during long waits, and after the wait the callback will be restarted. This + * is to allow other threads using the same virtual memory space concurrent + * access to map(), unmap() completely unrelated buffer objects. TTM buffer + * object reservations sometimes wait for GPU and should therefore be + * considered long waits. This function reserves the buffer object interruptibly + * taking this into account. Starvation is avoided by the vm system not + * allowing too many repeated restarts. + * This function is intended to be used in customized fault() and _mkwrite() + * handlers. + * + * Return: + * 0 on success and the bo was reserved. + * VM_FAULT_RETRY if blocking wait. + * VM_FAULT_NOPAGE if blocking wait and retrying was not allowed. + */ +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, + struct vm_fault *vmf) +{ + /* + * Work around locking order reversal in fault / nopfn + * between mmap_lock and bo_reserve: Perform a trylock operation + * for reserve, and if it fails, retry the fault after waiting + * for the buffer to become unreserved. + */ + if (unlikely(!dma_resv_trylock(bo->base.resv))) { + /* + * If the fault allows retry and this is the first + * fault attempt, we try to release the mmap_lock + * before waiting + */ + if (fault_flag_allow_retry_first(vmf->flags)) { + if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { + ttm_bo_get(bo); + mmap_read_unlock(vmf->vma->vm_mm); + if (!dma_resv_lock_interruptible(bo->base.resv, + NULL)) + dma_resv_unlock(bo->base.resv); + ttm_bo_put(bo); + } + + return VM_FAULT_RETRY; + } + + if (dma_resv_lock_interruptible(bo->base.resv, NULL)) + return VM_FAULT_NOPAGE; + } + + /* + * Refuse to fault imported pages. This should be handled + * (if at all) by redirecting mmap to the exporter. + */ + if (bo->ttm && (bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL)) { + if (!(bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE)) { + dma_resv_unlock(bo->base.resv); + return VM_FAULT_SIGBUS; + } + } + + return 0; +} +EXPORT_SYMBOL(ttm_bo_vm_reserve); + +/** + * ttm_bo_vm_fault_reserved - TTM fault helper + * @vmf: The struct vm_fault given as argument to the fault callback + * @prot: The page protection to be used for this memory area. + * @num_prefault: Maximum number of prefault pages. The caller may want to + * specify this based on madvice settings and the size of the GPU object + * backed by the memory. + * + * This function inserts one or more page table entries pointing to the + * memory backing the buffer object, and then returns a return code + * instructing the caller to retry the page access. + * + * Return: + * VM_FAULT_NOPAGE on success or pending signal + * VM_FAULT_SIGBUS on unspecified error + * VM_FAULT_OOM on out-of-memory + * VM_FAULT_RETRY if retryable wait + */ +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, + pgprot_t prot, + pgoff_t num_prefault) +{ + struct vm_area_struct *vma = vmf->vma; + struct ttm_buffer_object *bo = vma->vm_private_data; + struct ttm_device *bdev = bo->bdev; + unsigned long page_offset; + unsigned long page_last; + unsigned long pfn; + struct ttm_tt *ttm = NULL; + struct page *page; + int err; + pgoff_t i; + vm_fault_t ret = VM_FAULT_NOPAGE; + unsigned long address = vmf->address; + + /* + * Wait for buffer data in transit, due to a pipelined + * move. + */ + ret = ttm_bo_vm_fault_idle(bo, vmf); + if (unlikely(ret != 0)) + return ret; + + err = ttm_mem_io_reserve(bdev, bo->resource); + if (unlikely(err != 0)) + return VM_FAULT_SIGBUS; + + page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) + + vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node); + page_last = vma_pages(vma) + vma->vm_pgoff - + drm_vma_node_start(&bo->base.vma_node); + + if (unlikely(page_offset >= bo->resource->num_pages)) + return VM_FAULT_SIGBUS; + + prot = ttm_io_prot(bo, bo->resource, prot); + if (!bo->resource->bus.is_iomem) { + struct ttm_operation_ctx ctx = { + .interruptible = false, + .no_wait_gpu = false, + .force_alloc = true + }; + + ttm = bo->ttm; + if (ttm_tt_populate(bdev, bo->ttm, &ctx)) + return VM_FAULT_OOM; + } else { + /* Iomem should not be marked encrypted */ + prot = pgprot_decrypted(prot); + } + + /* + * Speculatively prefault a number of pages. Only error on + * first page. + */ + for (i = 0; i < num_prefault; ++i) { + if (bo->resource->bus.is_iomem) { + pfn = ttm_bo_io_mem_pfn(bo, page_offset); + } else { + page = ttm->pages[page_offset]; + if (unlikely(!page && i == 0)) { + return VM_FAULT_OOM; + } else if (unlikely(!page)) { + break; + } + pfn = page_to_pfn(page); + } + + /* + * Note that the value of @prot at this point may differ from + * the value of @vma->vm_page_prot in the caching- and + * encryption bits. This is because the exact location of the + * data may not be known at mmap() time and may also change + * at arbitrary times while the data is mmap'ed. + * See vmf_insert_mixed_prot() for a discussion. + */ + ret = vmf_insert_pfn_prot(vma, address, pfn, prot); + + /* Never error on prefaulted PTEs */ + if (unlikely((ret & VM_FAULT_ERROR))) { + if (i == 0) + return VM_FAULT_NOPAGE; + else + break; + } + + address += PAGE_SIZE; + if (unlikely(++page_offset >= page_last)) + break; + } + return ret; +} +EXPORT_SYMBOL(ttm_bo_vm_fault_reserved); + +static void ttm_bo_release_dummy_page(struct drm_device *dev, void *res) +{ + struct page *dummy_page = (struct page *)res; + + __free_page(dummy_page); +} + +vm_fault_t ttm_bo_vm_dummy_page(struct vm_fault *vmf, pgprot_t prot) +{ + struct vm_area_struct *vma = vmf->vma; + struct ttm_buffer_object *bo = vma->vm_private_data; + struct drm_device *ddev = bo->base.dev; + vm_fault_t ret = VM_FAULT_NOPAGE; + unsigned long address; + unsigned long pfn; + struct page *page; + + /* Allocate new dummy page to map all the VA range in this VMA to it*/ + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + return VM_FAULT_OOM; + + /* Set the page to be freed using drmm release action */ + if (drmm_add_action_or_reset(ddev, ttm_bo_release_dummy_page, page)) + return VM_FAULT_OOM; + + pfn = page_to_pfn(page); + + /* Prefault the entire VMA range right away to avoid further faults */ + for (address = vma->vm_start; address < vma->vm_end; + address += PAGE_SIZE) + ret = vmf_insert_pfn_prot(vma, address, pfn, prot); + + return ret; +} +EXPORT_SYMBOL(ttm_bo_vm_dummy_page); + +vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + pgprot_t prot; + struct ttm_buffer_object *bo = vma->vm_private_data; + struct drm_device *ddev = bo->base.dev; + vm_fault_t ret; + int idx; + + ret = ttm_bo_vm_reserve(bo, vmf); + if (ret) + return ret; + + prot = vma->vm_page_prot; + if (drm_dev_enter(ddev, &idx)) { + ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT); + drm_dev_exit(idx); + } else { + ret = ttm_bo_vm_dummy_page(vmf, prot); + } + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + + dma_resv_unlock(bo->base.resv); + + return ret; +} +EXPORT_SYMBOL(ttm_bo_vm_fault); + +void ttm_bo_vm_open(struct vm_area_struct *vma) +{ + struct ttm_buffer_object *bo = vma->vm_private_data; + + WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping); + + ttm_bo_get(bo); +} +EXPORT_SYMBOL(ttm_bo_vm_open); + +void ttm_bo_vm_close(struct vm_area_struct *vma) +{ + struct ttm_buffer_object *bo = vma->vm_private_data; + + ttm_bo_put(bo); + vma->vm_private_data = NULL; +} +EXPORT_SYMBOL(ttm_bo_vm_close); + +static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo, + unsigned long offset, + uint8_t *buf, int len, int write) +{ + unsigned long page = offset >> PAGE_SHIFT; + unsigned long bytes_left = len; + int ret; + + /* Copy a page at a time, that way no extra virtual address + * mapping is needed + */ + offset -= page << PAGE_SHIFT; + do { + unsigned long bytes = min(bytes_left, PAGE_SIZE - offset); + struct ttm_bo_kmap_obj map; + void *ptr; + bool is_iomem; + + ret = ttm_bo_kmap(bo, page, 1, &map); + if (ret) + return ret; + + ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset; + WARN_ON_ONCE(is_iomem); + if (write) + memcpy(ptr, buf, bytes); + else + memcpy(buf, ptr, bytes); + ttm_bo_kunmap(&map); + + page++; + buf += bytes; + bytes_left -= bytes; + offset = 0; + } while (bytes_left); + + return len; +} + +int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write) +{ + struct ttm_buffer_object *bo = vma->vm_private_data; + unsigned long offset = (addr) - vma->vm_start + + ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node)) + << PAGE_SHIFT); + int ret; + + if (len < 1 || (offset + len) >> PAGE_SHIFT > bo->resource->num_pages) + return -EIO; + + ret = ttm_bo_reserve(bo, true, false, NULL); + if (ret) + return ret; + + switch (bo->resource->mem_type) { + case TTM_PL_SYSTEM: + fallthrough; + case TTM_PL_TT: + ret = ttm_bo_vm_access_kmap(bo, offset, buf, len, write); + break; + default: + if (bo->bdev->funcs->access_memory) + ret = bo->bdev->funcs->access_memory( + bo, offset, buf, len, write); + else + ret = -EIO; + } + + ttm_bo_unreserve(bo); + + return ret; +} +EXPORT_SYMBOL(ttm_bo_vm_access); + +static const struct vm_operations_struct ttm_bo_vm_ops = { + .fault = ttm_bo_vm_fault, + .open = ttm_bo_vm_open, + .close = ttm_bo_vm_close, + .access = ttm_bo_vm_access, +}; + +int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct ttm_buffer_object *bo) +{ + /* Enforce no COW since would have really strange behavior with it. */ + if (is_cow_mapping(vma->vm_flags)) + return -EINVAL; + + ttm_bo_get(bo); + + /* + * Drivers may want to override the vm_ops field. Otherwise we + * use TTM's default callbacks. + */ + if (!vma->vm_ops) + vma->vm_ops = &ttm_bo_vm_ops; + + /* + * Note: We're transferring the bo reference to + * vma->vm_private_data here. + */ + + vma->vm_private_data = bo; + + vma->vm_flags |= VM_PFNMAP; + vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP; + return 0; +} +EXPORT_SYMBOL(ttm_bo_mmap_obj); diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c new file mode 100644 index 000000000..ec9ddaad5 --- /dev/null +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -0,0 +1,307 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +/* + * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christian König + */ + +#define pr_fmt(fmt) "[TTM DEVICE] " fmt + +#include <linux/mm.h> + +#include <drm/ttm/ttm_device.h> +#include <drm/ttm/ttm_tt.h> +#include <drm/ttm/ttm_placement.h> +#include <drm/ttm/ttm_bo_api.h> + +#include "ttm_module.h" + +/* + * ttm_global_mutex - protecting the global state + */ +static DEFINE_MUTEX(ttm_global_mutex); +static unsigned ttm_glob_use_count; +struct ttm_global ttm_glob; +EXPORT_SYMBOL(ttm_glob); + +struct dentry *ttm_debugfs_root; + +static void ttm_global_release(void) +{ + struct ttm_global *glob = &ttm_glob; + + mutex_lock(&ttm_global_mutex); + if (--ttm_glob_use_count > 0) + goto out; + + ttm_pool_mgr_fini(); + debugfs_remove(ttm_debugfs_root); + + __free_page(glob->dummy_read_page); + memset(glob, 0, sizeof(*glob)); +out: + mutex_unlock(&ttm_global_mutex); +} + +static int ttm_global_init(void) +{ + struct ttm_global *glob = &ttm_glob; + unsigned long num_pages, num_dma32; + struct sysinfo si; + int ret = 0; + + mutex_lock(&ttm_global_mutex); + if (++ttm_glob_use_count > 1) + goto out; + + si_meminfo(&si); + + ttm_debugfs_root = debugfs_create_dir("ttm", NULL); + if (IS_ERR(ttm_debugfs_root)) { + ttm_debugfs_root = NULL; + } + + /* Limit the number of pages in the pool to about 50% of the total + * system memory. + */ + num_pages = ((u64)si.totalram * si.mem_unit) >> PAGE_SHIFT; + num_pages /= 2; + + /* But for DMA32 we limit ourself to only use 2GiB maximum. */ + num_dma32 = (u64)(si.totalram - si.totalhigh) * si.mem_unit + >> PAGE_SHIFT; + num_dma32 = min(num_dma32, 2UL << (30 - PAGE_SHIFT)); + + ttm_pool_mgr_init(num_pages); + ttm_tt_mgr_init(num_pages, num_dma32); + + glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32); + + if (unlikely(glob->dummy_read_page == NULL)) { + ret = -ENOMEM; + goto out; + } + + INIT_LIST_HEAD(&glob->device_list); + atomic_set(&glob->bo_count, 0); + + debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root, + &glob->bo_count); +out: + if (ret && ttm_debugfs_root) + debugfs_remove(ttm_debugfs_root); + if (ret) + --ttm_glob_use_count; + mutex_unlock(&ttm_global_mutex); + return ret; +} + +/* + * A buffer object shrink method that tries to swap out the first + * buffer object on the global::swap_lru list. + */ +int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) +{ + struct ttm_global *glob = &ttm_glob; + struct ttm_device *bdev; + int ret = 0; + + mutex_lock(&ttm_global_mutex); + list_for_each_entry(bdev, &glob->device_list, device_list) { + ret = ttm_device_swapout(bdev, ctx, gfp_flags); + if (ret > 0) { + list_move_tail(&bdev->device_list, &glob->device_list); + break; + } + } + mutex_unlock(&ttm_global_mutex); + return ret; +} +EXPORT_SYMBOL(ttm_global_swapout); + +int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, + gfp_t gfp_flags) +{ + struct ttm_resource_cursor cursor; + struct ttm_resource_manager *man; + struct ttm_resource *res; + unsigned i; + int ret; + + spin_lock(&bdev->lru_lock); + for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { + man = ttm_manager_type(bdev, i); + if (!man || !man->use_tt) + continue; + + ttm_resource_manager_for_each_res(man, &cursor, res) { + struct ttm_buffer_object *bo = res->bo; + uint32_t num_pages; + + if (!bo || bo->resource != res) + continue; + + num_pages = PFN_UP(bo->base.size); + ret = ttm_bo_swapout(bo, ctx, gfp_flags); + /* ttm_bo_swapout has dropped the lru_lock */ + if (!ret) + return num_pages; + if (ret != -EBUSY) + return ret; + } + } + spin_unlock(&bdev->lru_lock); + return 0; +} +EXPORT_SYMBOL(ttm_device_swapout); + +static void ttm_device_delayed_workqueue(struct work_struct *work) +{ + struct ttm_device *bdev = + container_of(work, struct ttm_device, wq.work); + + if (!ttm_bo_delayed_delete(bdev, false)) + schedule_delayed_work(&bdev->wq, + ((HZ / 100) < 1) ? 1 : HZ / 100); +} + +/** + * ttm_device_init + * + * @bdev: A pointer to a struct ttm_device to initialize. + * @funcs: Function table for the device. + * @dev: The core kernel device pointer for DMA mappings and allocations. + * @mapping: The address space to use for this bo. + * @vma_manager: A pointer to a vma manager. + * @use_dma_alloc: If coherent DMA allocation API should be used. + * @use_dma32: If we should use GFP_DMA32 for device memory allocations. + * + * Initializes a struct ttm_device: + * Returns: + * !0: Failure. + */ +int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs, + struct device *dev, struct address_space *mapping, + struct drm_vma_offset_manager *vma_manager, + bool use_dma_alloc, bool use_dma32) +{ + struct ttm_global *glob = &ttm_glob; + int ret; + + if (WARN_ON(vma_manager == NULL)) + return -EINVAL; + + ret = ttm_global_init(); + if (ret) + return ret; + + bdev->funcs = funcs; + + ttm_sys_man_init(bdev); + ttm_pool_init(&bdev->pool, dev, use_dma_alloc, use_dma32); + + bdev->vma_manager = vma_manager; + INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue); + spin_lock_init(&bdev->lru_lock); + INIT_LIST_HEAD(&bdev->ddestroy); + INIT_LIST_HEAD(&bdev->pinned); + bdev->dev_mapping = mapping; + mutex_lock(&ttm_global_mutex); + list_add_tail(&bdev->device_list, &glob->device_list); + mutex_unlock(&ttm_global_mutex); + + return 0; +} +EXPORT_SYMBOL(ttm_device_init); + +void ttm_device_fini(struct ttm_device *bdev) +{ + struct ttm_resource_manager *man; + unsigned i; + + mutex_lock(&ttm_global_mutex); + list_del(&bdev->device_list); + mutex_unlock(&ttm_global_mutex); + + cancel_delayed_work_sync(&bdev->wq); + + if (ttm_bo_delayed_delete(bdev, true)) + pr_debug("Delayed destroy list was clean\n"); + + man = ttm_manager_type(bdev, TTM_PL_SYSTEM); + ttm_resource_manager_set_used(man, false); + ttm_set_driver_manager(bdev, TTM_PL_SYSTEM, NULL); + + spin_lock(&bdev->lru_lock); + for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) + if (list_empty(&man->lru[0])) + pr_debug("Swap list %d was clean\n", i); + spin_unlock(&bdev->lru_lock); + + ttm_pool_fini(&bdev->pool); + ttm_global_release(); +} +EXPORT_SYMBOL(ttm_device_fini); + +static void ttm_device_clear_lru_dma_mappings(struct ttm_device *bdev, + struct list_head *list) +{ + struct ttm_resource *res; + + spin_lock(&bdev->lru_lock); + while ((res = list_first_entry_or_null(list, typeof(*res), lru))) { + struct ttm_buffer_object *bo = res->bo; + + /* Take ref against racing releases once lru_lock is unlocked */ + if (!ttm_bo_get_unless_zero(bo)) + continue; + + list_del_init(&res->lru); + spin_unlock(&bdev->lru_lock); + + if (bo->ttm) + ttm_tt_unpopulate(bo->bdev, bo->ttm); + + ttm_bo_put(bo); + spin_lock(&bdev->lru_lock); + } + spin_unlock(&bdev->lru_lock); +} + +void ttm_device_clear_dma_mappings(struct ttm_device *bdev) +{ + struct ttm_resource_manager *man; + unsigned int i, j; + + ttm_device_clear_lru_dma_mappings(bdev, &bdev->pinned); + + for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { + man = ttm_manager_type(bdev, i); + if (!man || !man->use_tt) + continue; + + for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) + ttm_device_clear_lru_dma_mappings(bdev, &man->lru[j]); + } +} +EXPORT_SYMBOL(ttm_device_clear_dma_mappings); diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c new file mode 100644 index 000000000..dbee34a05 --- /dev/null +++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/************************************************************************** + * + * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <drm/ttm/ttm_execbuf_util.h> +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_placement.h> +#include <linux/wait.h> +#include <linux/sched.h> +#include <linux/module.h> + +static void ttm_eu_backoff_reservation_reverse(struct list_head *list, + struct ttm_validate_buffer *entry) +{ + list_for_each_entry_continue_reverse(entry, list, head) { + struct ttm_buffer_object *bo = entry->bo; + + dma_resv_unlock(bo->base.resv); + } +} + +void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket, + struct list_head *list) +{ + struct ttm_validate_buffer *entry; + + if (list_empty(list)) + return; + + list_for_each_entry(entry, list, head) { + struct ttm_buffer_object *bo = entry->bo; + + ttm_bo_move_to_lru_tail_unlocked(bo); + dma_resv_unlock(bo->base.resv); + } + + if (ticket) + ww_acquire_fini(ticket); +} +EXPORT_SYMBOL(ttm_eu_backoff_reservation); + +/* + * Reserve buffers for validation. + * + * If a buffer in the list is marked for CPU access, we back off and + * wait for that buffer to become free for GPU access. + * + * If a buffer is reserved for another validation, the validator with + * the highest validation sequence backs off and waits for that buffer + * to become unreserved. This prevents deadlocks when validating multiple + * buffers in different orders. + */ + +int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket, + struct list_head *list, bool intr, + struct list_head *dups) +{ + struct ttm_validate_buffer *entry; + int ret; + + if (list_empty(list)) + return 0; + + if (ticket) + ww_acquire_init(ticket, &reservation_ww_class); + + list_for_each_entry(entry, list, head) { + struct ttm_buffer_object *bo = entry->bo; + unsigned int num_fences; + + ret = ttm_bo_reserve(bo, intr, (ticket == NULL), ticket); + if (ret == -EALREADY && dups) { + struct ttm_validate_buffer *safe = entry; + entry = list_prev_entry(entry, head); + list_del(&safe->head); + list_add(&safe->head, dups); + continue; + } + + num_fences = max(entry->num_shared, 1u); + if (!ret) { + ret = dma_resv_reserve_fences(bo->base.resv, + num_fences); + if (!ret) + continue; + } + + /* uh oh, we lost out, drop every reservation and try + * to only reserve this buffer, then start over if + * this succeeds. + */ + ttm_eu_backoff_reservation_reverse(list, entry); + + if (ret == -EDEADLK) { + ret = ttm_bo_reserve_slowpath(bo, intr, ticket); + } + + if (!ret) + ret = dma_resv_reserve_fences(bo->base.resv, + num_fences); + + if (unlikely(ret != 0)) { + if (ticket) { + ww_acquire_done(ticket); + ww_acquire_fini(ticket); + } + return ret; + } + + /* move this item to the front of the list, + * forces correct iteration of the loop without keeping track + */ + list_del(&entry->head); + list_add(&entry->head, list); + } + + return 0; +} +EXPORT_SYMBOL(ttm_eu_reserve_buffers); + +void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket, + struct list_head *list, + struct dma_fence *fence) +{ + struct ttm_validate_buffer *entry; + + if (list_empty(list)) + return; + + list_for_each_entry(entry, list, head) { + struct ttm_buffer_object *bo = entry->bo; + + dma_resv_add_fence(bo->base.resv, fence, entry->num_shared ? + DMA_RESV_USAGE_READ : DMA_RESV_USAGE_WRITE); + ttm_bo_move_to_lru_tail_unlocked(bo); + dma_resv_unlock(bo->base.resv); + } + if (ticket) + ww_acquire_fini(ticket); +} +EXPORT_SYMBOL(ttm_eu_fence_buffer_objects); diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c new file mode 100644 index 000000000..b3fffe7b5 --- /dev/null +++ b/drivers/gpu/drm/ttm/ttm_module.c @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/************************************************************************** + * + * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +/* + * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com> + * Jerome Glisse + */ +#include <linux/module.h> +#include <linux/device.h> +#include <linux/pgtable.h> +#include <linux/sched.h> +#include <linux/debugfs.h> +#include <drm/drm_sysfs.h> +#include <drm/ttm/ttm_caching.h> + +#include "ttm_module.h" + +/** + * DOC: TTM + * + * TTM is a memory manager for accelerator devices with dedicated memory. + * + * The basic idea is that resources are grouped together in buffer objects of + * certain size and TTM handles lifetime, movement and CPU mappings of those + * objects. + * + * TODO: Add more design background and information here. + */ + +/** + * ttm_prot_from_caching - Modify the page protection according to the + * ttm cacing mode + * @caching: The ttm caching mode + * @tmp: The original page protection + * + * Return: The modified page protection + */ +pgprot_t ttm_prot_from_caching(enum ttm_caching caching, pgprot_t tmp) +{ + /* Cached mappings need no adjustment */ + if (caching == ttm_cached) + return tmp; + +#if defined(__i386__) || defined(__x86_64__) + if (caching == ttm_write_combined) + tmp = pgprot_writecombine(tmp); +#ifndef CONFIG_UML + else if (boot_cpu_data.x86 > 3) + tmp = pgprot_noncached(tmp); +#endif /* CONFIG_UML */ +#endif /* __i386__ || __x86_64__ */ +#if defined(__ia64__) || defined(__arm__) || defined(__aarch64__) || \ + defined(__powerpc__) || defined(__mips__) || defined(__loongarch__) + if (caching == ttm_write_combined) + tmp = pgprot_writecombine(tmp); + else + tmp = pgprot_noncached(tmp); +#endif +#if defined(__sparc__) + tmp = pgprot_noncached(tmp); +#endif + return tmp; +} + +MODULE_AUTHOR("Thomas Hellstrom, Jerome Glisse"); +MODULE_DESCRIPTION("TTM memory manager subsystem (for DRM device)"); +MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/ttm/ttm_module.h b/drivers/gpu/drm/ttm/ttm_module.h new file mode 100644 index 000000000..767fe22ae --- /dev/null +++ b/drivers/gpu/drm/ttm/ttm_module.h @@ -0,0 +1,43 @@ +/************************************************************************** + * + * Copyright 2008-2009 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +/* + * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com> + */ + +#ifndef _TTM_MODULE_H_ +#define _TTM_MODULE_H_ + +#define TTM_PFX "[TTM] " + +struct dentry; +struct ttm_device; + +extern struct dentry *ttm_debugfs_root; + +void ttm_sys_man_init(struct ttm_device *bdev); + +#endif /* _TTM_MODULE_H_ */ diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c new file mode 100644 index 000000000..86affe987 --- /dev/null +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -0,0 +1,802 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christian König + */ + +/* Pooling of allocated pages is necessary because changing the caching + * attributes on x86 of the linear mapping requires a costly cross CPU TLB + * invalidate for those addresses. + * + * Additional to that allocations from the DMA coherent API are pooled as well + * cause they are rather slow compared to alloc_pages+map. + */ + +#include <linux/module.h> +#include <linux/dma-mapping.h> +#include <linux/highmem.h> +#include <linux/sched/mm.h> + +#ifdef CONFIG_X86 +#include <asm/set_memory.h> +#endif + +#include <drm/ttm/ttm_pool.h> +#include <drm/ttm/ttm_bo_driver.h> +#include <drm/ttm/ttm_tt.h> + +#include "ttm_module.h" + +/** + * struct ttm_pool_dma - Helper object for coherent DMA mappings + * + * @addr: original DMA address returned for the mapping + * @vaddr: original vaddr return for the mapping and order in the lower bits + */ +struct ttm_pool_dma { + dma_addr_t addr; + unsigned long vaddr; +}; + +static unsigned long page_pool_size; + +MODULE_PARM_DESC(page_pool_size, "Number of pages in the WC/UC/DMA pool"); +module_param(page_pool_size, ulong, 0644); + +static atomic_long_t allocated_pages; + +static struct ttm_pool_type global_write_combined[MAX_ORDER]; +static struct ttm_pool_type global_uncached[MAX_ORDER]; + +static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER]; +static struct ttm_pool_type global_dma32_uncached[MAX_ORDER]; + +static spinlock_t shrinker_lock; +static struct list_head shrinker_list; +static struct shrinker mm_shrinker; + +/* Allocate pages of size 1 << order with the given gfp_flags */ +static struct page *ttm_pool_alloc_page(struct ttm_pool *pool, gfp_t gfp_flags, + unsigned int order) +{ + unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS; + struct ttm_pool_dma *dma; + struct page *p; + void *vaddr; + + /* Don't set the __GFP_COMP flag for higher order allocations. + * Mapping pages directly into an userspace process and calling + * put_page() on a TTM allocated page is illegal. + */ + if (order) + gfp_flags |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | + __GFP_KSWAPD_RECLAIM; + + if (!pool->use_dma_alloc) { + p = alloc_pages(gfp_flags, order); + if (p) + p->private = order; + return p; + } + + dma = kmalloc(sizeof(*dma), GFP_KERNEL); + if (!dma) + return NULL; + + if (order) + attr |= DMA_ATTR_NO_WARN; + + vaddr = dma_alloc_attrs(pool->dev, (1ULL << order) * PAGE_SIZE, + &dma->addr, gfp_flags, attr); + if (!vaddr) + goto error_free; + + /* TODO: This is an illegal abuse of the DMA API, but we need to rework + * TTM page fault handling and extend the DMA API to clean this up. + */ + if (is_vmalloc_addr(vaddr)) + p = vmalloc_to_page(vaddr); + else + p = virt_to_page(vaddr); + + dma->vaddr = (unsigned long)vaddr | order; + p->private = (unsigned long)dma; + return p; + +error_free: + kfree(dma); + return NULL; +} + +/* Reset the caching and pages of size 1 << order */ +static void ttm_pool_free_page(struct ttm_pool *pool, enum ttm_caching caching, + unsigned int order, struct page *p) +{ + unsigned long attr = DMA_ATTR_FORCE_CONTIGUOUS; + struct ttm_pool_dma *dma; + void *vaddr; + +#ifdef CONFIG_X86 + /* We don't care that set_pages_wb is inefficient here. This is only + * used when we have to shrink and CPU overhead is irrelevant then. + */ + if (caching != ttm_cached && !PageHighMem(p)) + set_pages_wb(p, 1 << order); +#endif + + if (!pool || !pool->use_dma_alloc) { + __free_pages(p, order); + return; + } + + if (order) + attr |= DMA_ATTR_NO_WARN; + + dma = (void *)p->private; + vaddr = (void *)(dma->vaddr & PAGE_MASK); + dma_free_attrs(pool->dev, (1UL << order) * PAGE_SIZE, vaddr, dma->addr, + attr); + kfree(dma); +} + +/* Apply a new caching to an array of pages */ +static int ttm_pool_apply_caching(struct page **first, struct page **last, + enum ttm_caching caching) +{ +#ifdef CONFIG_X86 + unsigned int num_pages = last - first; + + if (!num_pages) + return 0; + + switch (caching) { + case ttm_cached: + break; + case ttm_write_combined: + return set_pages_array_wc(first, num_pages); + case ttm_uncached: + return set_pages_array_uc(first, num_pages); + } +#endif + return 0; +} + +/* Map pages of 1 << order size and fill the DMA address array */ +static int ttm_pool_map(struct ttm_pool *pool, unsigned int order, + struct page *p, dma_addr_t **dma_addr) +{ + dma_addr_t addr; + unsigned int i; + + if (pool->use_dma_alloc) { + struct ttm_pool_dma *dma = (void *)p->private; + + addr = dma->addr; + } else { + size_t size = (1ULL << order) * PAGE_SIZE; + + addr = dma_map_page(pool->dev, p, 0, size, DMA_BIDIRECTIONAL); + if (dma_mapping_error(pool->dev, addr)) + return -EFAULT; + } + + for (i = 1 << order; i ; --i) { + *(*dma_addr)++ = addr; + addr += PAGE_SIZE; + } + + return 0; +} + +/* Unmap pages of 1 << order size */ +static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr, + unsigned int num_pages) +{ + /* Unmapped while freeing the page */ + if (pool->use_dma_alloc) + return; + + dma_unmap_page(pool->dev, dma_addr, (long)num_pages << PAGE_SHIFT, + DMA_BIDIRECTIONAL); +} + +/* Give pages into a specific pool_type */ +static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p) +{ + unsigned int i, num_pages = 1 << pt->order; + + for (i = 0; i < num_pages; ++i) { + if (PageHighMem(p)) + clear_highpage(p + i); + else + clear_page(page_address(p + i)); + } + + spin_lock(&pt->lock); + list_add(&p->lru, &pt->pages); + spin_unlock(&pt->lock); + atomic_long_add(1 << pt->order, &allocated_pages); +} + +/* Take pages from a specific pool_type, return NULL when nothing available */ +static struct page *ttm_pool_type_take(struct ttm_pool_type *pt) +{ + struct page *p; + + spin_lock(&pt->lock); + p = list_first_entry_or_null(&pt->pages, typeof(*p), lru); + if (p) { + atomic_long_sub(1 << pt->order, &allocated_pages); + list_del(&p->lru); + } + spin_unlock(&pt->lock); + + return p; +} + +/* Initialize and add a pool type to the global shrinker list */ +static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool, + enum ttm_caching caching, unsigned int order) +{ + pt->pool = pool; + pt->caching = caching; + pt->order = order; + spin_lock_init(&pt->lock); + INIT_LIST_HEAD(&pt->pages); + + spin_lock(&shrinker_lock); + list_add_tail(&pt->shrinker_list, &shrinker_list); + spin_unlock(&shrinker_lock); +} + +/* Remove a pool_type from the global shrinker list and free all pages */ +static void ttm_pool_type_fini(struct ttm_pool_type *pt) +{ + struct page *p; + + spin_lock(&shrinker_lock); + list_del(&pt->shrinker_list); + spin_unlock(&shrinker_lock); + + while ((p = ttm_pool_type_take(pt))) + ttm_pool_free_page(pt->pool, pt->caching, pt->order, p); +} + +/* Return the pool_type to use for the given caching and order */ +static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool, + enum ttm_caching caching, + unsigned int order) +{ + if (pool->use_dma_alloc) + return &pool->caching[caching].orders[order]; + +#ifdef CONFIG_X86 + switch (caching) { + case ttm_write_combined: + if (pool->use_dma32) + return &global_dma32_write_combined[order]; + + return &global_write_combined[order]; + case ttm_uncached: + if (pool->use_dma32) + return &global_dma32_uncached[order]; + + return &global_uncached[order]; + default: + break; + } +#endif + + return NULL; +} + +/* Free pages using the global shrinker list */ +static unsigned int ttm_pool_shrink(void) +{ + struct ttm_pool_type *pt; + unsigned int num_pages; + struct page *p; + + spin_lock(&shrinker_lock); + pt = list_first_entry(&shrinker_list, typeof(*pt), shrinker_list); + list_move_tail(&pt->shrinker_list, &shrinker_list); + spin_unlock(&shrinker_lock); + + p = ttm_pool_type_take(pt); + if (p) { + ttm_pool_free_page(pt->pool, pt->caching, pt->order, p); + num_pages = 1 << pt->order; + } else { + num_pages = 0; + } + + return num_pages; +} + +/* Return the allocation order based for a page */ +static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p) +{ + if (pool->use_dma_alloc) { + struct ttm_pool_dma *dma = (void *)p->private; + + return dma->vaddr & ~PAGE_MASK; + } + + return p->private; +} + +/* Called when we got a page, either from a pool or newly allocated */ +static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order, + struct page *p, dma_addr_t **dma_addr, + unsigned long *num_pages, + struct page ***pages) +{ + unsigned int i; + int r; + + if (*dma_addr) { + r = ttm_pool_map(pool, order, p, dma_addr); + if (r) + return r; + } + + *num_pages -= 1 << order; + for (i = 1 << order; i; --i, ++(*pages), ++p) + **pages = p; + + return 0; +} + +/** + * ttm_pool_free_range() - Free a range of TTM pages + * @pool: The pool used for allocating. + * @tt: The struct ttm_tt holding the page pointers. + * @caching: The page caching mode used by the range. + * @start_page: index for first page to free. + * @end_page: index for last page to free + 1. + * + * During allocation the ttm_tt page-vector may be populated with ranges of + * pages with different attributes if allocation hit an error without being + * able to completely fulfill the allocation. This function can be used + * to free these individual ranges. + */ +static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, + enum ttm_caching caching, + pgoff_t start_page, pgoff_t end_page) +{ + struct page **pages = tt->pages; + unsigned int order; + pgoff_t i, nr; + + for (i = start_page; i < end_page; i += nr, pages += nr) { + struct ttm_pool_type *pt = NULL; + + order = ttm_pool_page_order(pool, *pages); + nr = (1UL << order); + if (tt->dma_address) + ttm_pool_unmap(pool, tt->dma_address[i], nr); + + pt = ttm_pool_select_type(pool, caching, order); + if (pt) + ttm_pool_type_give(pt, *pages); + else + ttm_pool_free_page(pool, caching, order, *pages); + } +} + +/** + * ttm_pool_alloc - Fill a ttm_tt object + * + * @pool: ttm_pool to use + * @tt: ttm_tt object to fill + * @ctx: operation context + * + * Fill the ttm_tt object with pages and also make sure to DMA map them when + * necessary. + * + * Returns: 0 on successe, negative error code otherwise. + */ +int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt, + struct ttm_operation_ctx *ctx) +{ + pgoff_t num_pages = tt->num_pages; + dma_addr_t *dma_addr = tt->dma_address; + struct page **caching = tt->pages; + struct page **pages = tt->pages; + enum ttm_caching page_caching; + gfp_t gfp_flags = GFP_USER; + pgoff_t caching_divide; + unsigned int order; + struct page *p; + int r; + + WARN_ON(!num_pages || ttm_tt_is_populated(tt)); + WARN_ON(dma_addr && !pool->dev); + + if (tt->page_flags & TTM_TT_FLAG_ZERO_ALLOC) + gfp_flags |= __GFP_ZERO; + + if (ctx->gfp_retry_mayfail) + gfp_flags |= __GFP_RETRY_MAYFAIL; + + if (pool->use_dma32) + gfp_flags |= GFP_DMA32; + else + gfp_flags |= GFP_HIGHUSER; + + for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages)); + num_pages; + order = min_t(unsigned int, order, __fls(num_pages))) { + struct ttm_pool_type *pt; + + page_caching = tt->caching; + pt = ttm_pool_select_type(pool, tt->caching, order); + p = pt ? ttm_pool_type_take(pt) : NULL; + if (p) { + r = ttm_pool_apply_caching(caching, pages, + tt->caching); + if (r) + goto error_free_page; + + caching = pages; + do { + r = ttm_pool_page_allocated(pool, order, p, + &dma_addr, + &num_pages, + &pages); + if (r) + goto error_free_page; + + caching = pages; + if (num_pages < (1 << order)) + break; + + p = ttm_pool_type_take(pt); + } while (p); + } + + page_caching = ttm_cached; + while (num_pages >= (1 << order) && + (p = ttm_pool_alloc_page(pool, gfp_flags, order))) { + + if (PageHighMem(p)) { + r = ttm_pool_apply_caching(caching, pages, + tt->caching); + if (r) + goto error_free_page; + caching = pages; + } + r = ttm_pool_page_allocated(pool, order, p, &dma_addr, + &num_pages, &pages); + if (r) + goto error_free_page; + if (PageHighMem(p)) + caching = pages; + } + + if (!p) { + if (order) { + --order; + continue; + } + r = -ENOMEM; + goto error_free_all; + } + } + + r = ttm_pool_apply_caching(caching, pages, tt->caching); + if (r) + goto error_free_all; + + return 0; + +error_free_page: + ttm_pool_free_page(pool, page_caching, order, p); + +error_free_all: + num_pages = tt->num_pages - num_pages; + caching_divide = caching - tt->pages; + ttm_pool_free_range(pool, tt, tt->caching, 0, caching_divide); + ttm_pool_free_range(pool, tt, ttm_cached, caching_divide, num_pages); + + return r; +} +EXPORT_SYMBOL(ttm_pool_alloc); + +/** + * ttm_pool_free - Free the backing pages from a ttm_tt object + * + * @pool: Pool to give pages back to. + * @tt: ttm_tt object to unpopulate + * + * Give the packing pages back to a pool or free them + */ +void ttm_pool_free(struct ttm_pool *pool, struct ttm_tt *tt) +{ + ttm_pool_free_range(pool, tt, tt->caching, 0, tt->num_pages); + + while (atomic_long_read(&allocated_pages) > page_pool_size) + ttm_pool_shrink(); +} +EXPORT_SYMBOL(ttm_pool_free); + +/** + * ttm_pool_init - Initialize a pool + * + * @pool: the pool to initialize + * @dev: device for DMA allocations and mappings + * @use_dma_alloc: true if coherent DMA alloc should be used + * @use_dma32: true if GFP_DMA32 should be used + * + * Initialize the pool and its pool types. + */ +void ttm_pool_init(struct ttm_pool *pool, struct device *dev, + bool use_dma_alloc, bool use_dma32) +{ + unsigned int i, j; + + WARN_ON(!dev && use_dma_alloc); + + pool->dev = dev; + pool->use_dma_alloc = use_dma_alloc; + pool->use_dma32 = use_dma32; + + if (use_dma_alloc) { + for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) + for (j = 0; j < MAX_ORDER; ++j) + ttm_pool_type_init(&pool->caching[i].orders[j], + pool, i, j); + } +} + +/** + * ttm_pool_fini - Cleanup a pool + * + * @pool: the pool to clean up + * + * Free all pages in the pool and unregister the types from the global + * shrinker. + */ +void ttm_pool_fini(struct ttm_pool *pool) +{ + unsigned int i, j; + + if (pool->use_dma_alloc) { + for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) + for (j = 0; j < MAX_ORDER; ++j) + ttm_pool_type_fini(&pool->caching[i].orders[j]); + } + + /* We removed the pool types from the LRU, but we need to also make sure + * that no shrinker is concurrently freeing pages from the pool. + */ + synchronize_shrinkers(); +} + +/* As long as pages are available make sure to release at least one */ +static unsigned long ttm_pool_shrinker_scan(struct shrinker *shrink, + struct shrink_control *sc) +{ + unsigned long num_freed = 0; + + do + num_freed += ttm_pool_shrink(); + while (!num_freed && atomic_long_read(&allocated_pages)); + + return num_freed; +} + +/* Return the number of pages available or SHRINK_EMPTY if we have none */ +static unsigned long ttm_pool_shrinker_count(struct shrinker *shrink, + struct shrink_control *sc) +{ + unsigned long num_pages = atomic_long_read(&allocated_pages); + + return num_pages ? num_pages : SHRINK_EMPTY; +} + +#ifdef CONFIG_DEBUG_FS +/* Count the number of pages available in a pool_type */ +static unsigned int ttm_pool_type_count(struct ttm_pool_type *pt) +{ + unsigned int count = 0; + struct page *p; + + spin_lock(&pt->lock); + /* Only used for debugfs, the overhead doesn't matter */ + list_for_each_entry(p, &pt->pages, lru) + ++count; + spin_unlock(&pt->lock); + + return count; +} + +/* Print a nice header for the order */ +static void ttm_pool_debugfs_header(struct seq_file *m) +{ + unsigned int i; + + seq_puts(m, "\t "); + for (i = 0; i < MAX_ORDER; ++i) + seq_printf(m, " ---%2u---", i); + seq_puts(m, "\n"); +} + +/* Dump information about the different pool types */ +static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt, + struct seq_file *m) +{ + unsigned int i; + + for (i = 0; i < MAX_ORDER; ++i) + seq_printf(m, " %8u", ttm_pool_type_count(&pt[i])); + seq_puts(m, "\n"); +} + +/* Dump the total amount of allocated pages */ +static void ttm_pool_debugfs_footer(struct seq_file *m) +{ + seq_printf(m, "\ntotal\t: %8lu of %8lu\n", + atomic_long_read(&allocated_pages), page_pool_size); +} + +/* Dump the information for the global pools */ +static int ttm_pool_debugfs_globals_show(struct seq_file *m, void *data) +{ + ttm_pool_debugfs_header(m); + + spin_lock(&shrinker_lock); + seq_puts(m, "wc\t:"); + ttm_pool_debugfs_orders(global_write_combined, m); + seq_puts(m, "uc\t:"); + ttm_pool_debugfs_orders(global_uncached, m); + seq_puts(m, "wc 32\t:"); + ttm_pool_debugfs_orders(global_dma32_write_combined, m); + seq_puts(m, "uc 32\t:"); + ttm_pool_debugfs_orders(global_dma32_uncached, m); + spin_unlock(&shrinker_lock); + + ttm_pool_debugfs_footer(m); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_globals); + +/** + * ttm_pool_debugfs - Debugfs dump function for a pool + * + * @pool: the pool to dump the information for + * @m: seq_file to dump to + * + * Make a debugfs dump with the per pool and global information. + */ +int ttm_pool_debugfs(struct ttm_pool *pool, struct seq_file *m) +{ + unsigned int i; + + if (!pool->use_dma_alloc) { + seq_puts(m, "unused\n"); + return 0; + } + + ttm_pool_debugfs_header(m); + + spin_lock(&shrinker_lock); + for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) { + seq_puts(m, "DMA "); + switch (i) { + case ttm_cached: + seq_puts(m, "\t:"); + break; + case ttm_write_combined: + seq_puts(m, "wc\t:"); + break; + case ttm_uncached: + seq_puts(m, "uc\t:"); + break; + } + ttm_pool_debugfs_orders(pool->caching[i].orders, m); + } + spin_unlock(&shrinker_lock); + + ttm_pool_debugfs_footer(m); + return 0; +} +EXPORT_SYMBOL(ttm_pool_debugfs); + +/* Test the shrinker functions and dump the result */ +static int ttm_pool_debugfs_shrink_show(struct seq_file *m, void *data) +{ + struct shrink_control sc = { .gfp_mask = GFP_NOFS }; + + fs_reclaim_acquire(GFP_KERNEL); + seq_printf(m, "%lu/%lu\n", ttm_pool_shrinker_count(&mm_shrinker, &sc), + ttm_pool_shrinker_scan(&mm_shrinker, &sc)); + fs_reclaim_release(GFP_KERNEL); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(ttm_pool_debugfs_shrink); + +#endif + +/** + * ttm_pool_mgr_init - Initialize globals + * + * @num_pages: default number of pages + * + * Initialize the global locks and lists for the MM shrinker. + */ +int ttm_pool_mgr_init(unsigned long num_pages) +{ + unsigned int i; + + if (!page_pool_size) + page_pool_size = num_pages; + + spin_lock_init(&shrinker_lock); + INIT_LIST_HEAD(&shrinker_list); + + for (i = 0; i < MAX_ORDER; ++i) { + ttm_pool_type_init(&global_write_combined[i], NULL, + ttm_write_combined, i); + ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i); + + ttm_pool_type_init(&global_dma32_write_combined[i], NULL, + ttm_write_combined, i); + ttm_pool_type_init(&global_dma32_uncached[i], NULL, + ttm_uncached, i); + } + +#ifdef CONFIG_DEBUG_FS + debugfs_create_file("page_pool", 0444, ttm_debugfs_root, NULL, + &ttm_pool_debugfs_globals_fops); + debugfs_create_file("page_pool_shrink", 0400, ttm_debugfs_root, NULL, + &ttm_pool_debugfs_shrink_fops); +#endif + + mm_shrinker.count_objects = ttm_pool_shrinker_count; + mm_shrinker.scan_objects = ttm_pool_shrinker_scan; + mm_shrinker.seeks = 1; + return register_shrinker(&mm_shrinker, "drm-ttm_pool"); +} + +/** + * ttm_pool_mgr_fini - Finalize globals + * + * Cleanup the global pools and unregister the MM shrinker. + */ +void ttm_pool_mgr_fini(void) +{ + unsigned int i; + + for (i = 0; i < MAX_ORDER; ++i) { + ttm_pool_type_fini(&global_write_combined[i]); + ttm_pool_type_fini(&global_uncached[i]); + + ttm_pool_type_fini(&global_dma32_write_combined[i]); + ttm_pool_type_fini(&global_dma32_uncached[i]); + } + + unregister_shrinker(&mm_shrinker); + WARN_ON(!list_empty(&shrinker_list)); +} diff --git a/drivers/gpu/drm/ttm/ttm_range_manager.c b/drivers/gpu/drm/ttm/ttm_range_manager.c new file mode 100644 index 000000000..4cfef2b35 --- /dev/null +++ b/drivers/gpu/drm/ttm/ttm_range_manager.c @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/************************************************************************** + * + * Copyright (c) 2007-2010 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +/* + * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com> + */ + +#include <drm/ttm/ttm_device.h> +#include <drm/ttm/ttm_placement.h> +#include <drm/ttm/ttm_range_manager.h> +#include <drm/ttm/ttm_bo_api.h> +#include <drm/drm_mm.h> +#include <linux/slab.h> +#include <linux/spinlock.h> + +/* + * Currently we use a spinlock for the lock, but a mutex *may* be + * more appropriate to reduce scheduling latency if the range manager + * ends up with very fragmented allocation patterns. + */ + +struct ttm_range_manager { + struct ttm_resource_manager manager; + struct drm_mm mm; + spinlock_t lock; +}; + +static inline struct ttm_range_manager * +to_range_manager(struct ttm_resource_manager *man) +{ + return container_of(man, struct ttm_range_manager, manager); +} + +static int ttm_range_man_alloc(struct ttm_resource_manager *man, + struct ttm_buffer_object *bo, + const struct ttm_place *place, + struct ttm_resource **res) +{ + struct ttm_range_manager *rman = to_range_manager(man); + struct ttm_range_mgr_node *node; + struct drm_mm *mm = &rman->mm; + enum drm_mm_insert_mode mode; + unsigned long lpfn; + int ret; + + lpfn = place->lpfn; + if (!lpfn) + lpfn = man->size; + + node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL); + if (!node) + return -ENOMEM; + + mode = DRM_MM_INSERT_BEST; + if (place->flags & TTM_PL_FLAG_TOPDOWN) + mode = DRM_MM_INSERT_HIGH; + + ttm_resource_init(bo, place, &node->base); + + spin_lock(&rman->lock); + ret = drm_mm_insert_node_in_range(mm, &node->mm_nodes[0], + node->base.num_pages, + bo->page_alignment, 0, + place->fpfn, lpfn, mode); + spin_unlock(&rman->lock); + + if (unlikely(ret)) { + ttm_resource_fini(man, &node->base); + kfree(node); + return ret; + } + + node->base.start = node->mm_nodes[0].start; + *res = &node->base; + return 0; +} + +static void ttm_range_man_free(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); + struct ttm_range_manager *rman = to_range_manager(man); + + spin_lock(&rman->lock); + drm_mm_remove_node(&node->mm_nodes[0]); + spin_unlock(&rman->lock); + + ttm_resource_fini(man, res); + kfree(node); +} + +static bool ttm_range_man_intersects(struct ttm_resource_manager *man, + struct ttm_resource *res, + const struct ttm_place *place, + size_t size) +{ + struct drm_mm_node *node = &to_ttm_range_mgr_node(res)->mm_nodes[0]; + u32 num_pages = PFN_UP(size); + + /* Don't evict BOs outside of the requested placement range */ + if (place->fpfn >= (node->start + num_pages) || + (place->lpfn && place->lpfn <= node->start)) + return false; + + return true; +} + +static bool ttm_range_man_compatible(struct ttm_resource_manager *man, + struct ttm_resource *res, + const struct ttm_place *place, + size_t size) +{ + struct drm_mm_node *node = &to_ttm_range_mgr_node(res)->mm_nodes[0]; + u32 num_pages = PFN_UP(size); + + if (node->start < place->fpfn || + (place->lpfn && (node->start + num_pages) > place->lpfn)) + return false; + + return true; +} + +static void ttm_range_man_debug(struct ttm_resource_manager *man, + struct drm_printer *printer) +{ + struct ttm_range_manager *rman = to_range_manager(man); + + spin_lock(&rman->lock); + drm_mm_print(&rman->mm, printer); + spin_unlock(&rman->lock); +} + +static const struct ttm_resource_manager_func ttm_range_manager_func = { + .alloc = ttm_range_man_alloc, + .free = ttm_range_man_free, + .intersects = ttm_range_man_intersects, + .compatible = ttm_range_man_compatible, + .debug = ttm_range_man_debug +}; + +/** + * ttm_range_man_init_nocheck - Initialise a generic range manager for the + * selected memory type. + * + * @bdev: ttm device + * @type: memory manager type + * @use_tt: if the memory manager uses tt + * @p_size: size of area to be managed in pages. + * + * The range manager is installed for this device in the type slot. + * + * Return: %0 on success or a negative error code on failure + */ +int ttm_range_man_init_nocheck(struct ttm_device *bdev, + unsigned type, bool use_tt, + unsigned long p_size) +{ + struct ttm_resource_manager *man; + struct ttm_range_manager *rman; + + rman = kzalloc(sizeof(*rman), GFP_KERNEL); + if (!rman) + return -ENOMEM; + + man = &rman->manager; + man->use_tt = use_tt; + + man->func = &ttm_range_manager_func; + + ttm_resource_manager_init(man, bdev, p_size); + + drm_mm_init(&rman->mm, 0, p_size); + spin_lock_init(&rman->lock); + + ttm_set_driver_manager(bdev, type, &rman->manager); + ttm_resource_manager_set_used(man, true); + return 0; +} +EXPORT_SYMBOL(ttm_range_man_init_nocheck); + +/** + * ttm_range_man_fini_nocheck - Remove the generic range manager from a slot + * and tear it down. + * + * @bdev: ttm device + * @type: memory manager type + * + * Return: %0 on success or a negative error code on failure + */ +int ttm_range_man_fini_nocheck(struct ttm_device *bdev, + unsigned type) +{ + struct ttm_resource_manager *man = ttm_manager_type(bdev, type); + struct ttm_range_manager *rman = to_range_manager(man); + struct drm_mm *mm = &rman->mm; + int ret; + + if (!man) + return 0; + + ttm_resource_manager_set_used(man, false); + + ret = ttm_resource_manager_evict_all(bdev, man); + if (ret) + return ret; + + spin_lock(&rman->lock); + drm_mm_clean(mm); + drm_mm_takedown(mm); + spin_unlock(&rman->lock); + + ttm_resource_manager_cleanup(man); + ttm_set_driver_manager(bdev, type, NULL); + kfree(rman); + return 0; +} +EXPORT_SYMBOL(ttm_range_man_fini_nocheck); diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c new file mode 100644 index 000000000..3287032a2 --- /dev/null +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -0,0 +1,763 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christian König + */ + +#include <linux/iosys-map.h> +#include <linux/io-mapping.h> +#include <linux/scatterlist.h> + +#include <drm/ttm/ttm_resource.h> +#include <drm/ttm/ttm_bo_driver.h> + +/** + * ttm_lru_bulk_move_init - initialize a bulk move structure + * @bulk: the structure to init + * + * For now just memset the structure to zero. + */ +void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk) +{ + memset(bulk, 0, sizeof(*bulk)); +} +EXPORT_SYMBOL(ttm_lru_bulk_move_init); + +/** + * ttm_lru_bulk_move_tail - bulk move range of resources to the LRU tail. + * + * @bulk: bulk move structure + * + * Bulk move BOs to the LRU tail, only valid to use when driver makes sure that + * resource order never changes. Should be called with &ttm_device.lru_lock held. + */ +void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk) +{ + unsigned i, j; + + for (i = 0; i < TTM_NUM_MEM_TYPES; ++i) { + for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { + struct ttm_lru_bulk_move_pos *pos = &bulk->pos[i][j]; + struct ttm_resource_manager *man; + + if (!pos->first) + continue; + + lockdep_assert_held(&pos->first->bo->bdev->lru_lock); + dma_resv_assert_held(pos->first->bo->base.resv); + dma_resv_assert_held(pos->last->bo->base.resv); + + man = ttm_manager_type(pos->first->bo->bdev, i); + list_bulk_move_tail(&man->lru[j], &pos->first->lru, + &pos->last->lru); + } + } +} +EXPORT_SYMBOL(ttm_lru_bulk_move_tail); + +/* Return the bulk move pos object for this resource */ +static struct ttm_lru_bulk_move_pos * +ttm_lru_bulk_move_pos(struct ttm_lru_bulk_move *bulk, struct ttm_resource *res) +{ + return &bulk->pos[res->mem_type][res->bo->priority]; +} + +/* Move the resource to the tail of the bulk move range */ +static void ttm_lru_bulk_move_pos_tail(struct ttm_lru_bulk_move_pos *pos, + struct ttm_resource *res) +{ + if (pos->last != res) { + if (pos->first == res) + pos->first = list_next_entry(res, lru); + list_move(&res->lru, &pos->last->lru); + pos->last = res; + } +} + +/* Add the resource to a bulk_move cursor */ +static void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk, + struct ttm_resource *res) +{ + struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res); + + if (!pos->first) { + pos->first = res; + pos->last = res; + } else { + ttm_lru_bulk_move_pos_tail(pos, res); + } +} + +/* Remove the resource from a bulk_move range */ +static void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk, + struct ttm_resource *res) +{ + struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res); + + if (unlikely(WARN_ON(!pos->first || !pos->last) || + (pos->first == res && pos->last == res))) { + pos->first = NULL; + pos->last = NULL; + } else if (pos->first == res) { + pos->first = list_next_entry(res, lru); + } else if (pos->last == res) { + pos->last = list_prev_entry(res, lru); + } else { + list_move(&res->lru, &pos->last->lru); + } +} + +/* Add the resource to a bulk move if the BO is configured for it */ +void ttm_resource_add_bulk_move(struct ttm_resource *res, + struct ttm_buffer_object *bo) +{ + if (bo->bulk_move && !bo->pin_count) + ttm_lru_bulk_move_add(bo->bulk_move, res); +} + +/* Remove the resource from a bulk move if the BO is configured for it */ +void ttm_resource_del_bulk_move(struct ttm_resource *res, + struct ttm_buffer_object *bo) +{ + if (bo->bulk_move && !bo->pin_count) + ttm_lru_bulk_move_del(bo->bulk_move, res); +} + +/* Move a resource to the LRU or bulk tail */ +void ttm_resource_move_to_lru_tail(struct ttm_resource *res) +{ + struct ttm_buffer_object *bo = res->bo; + struct ttm_device *bdev = bo->bdev; + + lockdep_assert_held(&bo->bdev->lru_lock); + + if (bo->pin_count) { + list_move_tail(&res->lru, &bdev->pinned); + + } else if (bo->bulk_move) { + struct ttm_lru_bulk_move_pos *pos = + ttm_lru_bulk_move_pos(bo->bulk_move, res); + + ttm_lru_bulk_move_pos_tail(pos, res); + } else { + struct ttm_resource_manager *man; + + man = ttm_manager_type(bdev, res->mem_type); + list_move_tail(&res->lru, &man->lru[bo->priority]); + } +} + +/** + * ttm_resource_init - resource object constructure + * @bo: buffer object this resources is allocated for + * @place: placement of the resource + * @res: the resource object to inistilize + * + * Initialize a new resource object. Counterpart of ttm_resource_fini(). + */ +void ttm_resource_init(struct ttm_buffer_object *bo, + const struct ttm_place *place, + struct ttm_resource *res) +{ + struct ttm_resource_manager *man; + + res->start = 0; + res->num_pages = PFN_UP(bo->base.size); + res->mem_type = place->mem_type; + res->placement = place->flags; + res->bus.addr = NULL; + res->bus.offset = 0; + res->bus.is_iomem = false; + res->bus.caching = ttm_cached; + res->bo = bo; + + man = ttm_manager_type(bo->bdev, place->mem_type); + spin_lock(&bo->bdev->lru_lock); + if (bo->pin_count) + list_add_tail(&res->lru, &bo->bdev->pinned); + else + list_add_tail(&res->lru, &man->lru[bo->priority]); + man->usage += res->num_pages << PAGE_SHIFT; + spin_unlock(&bo->bdev->lru_lock); +} +EXPORT_SYMBOL(ttm_resource_init); + +/** + * ttm_resource_fini - resource destructor + * @man: the resource manager this resource belongs to + * @res: the resource to clean up + * + * Should be used by resource manager backends to clean up the TTM resource + * objects before freeing the underlying structure. Makes sure the resource is + * removed from the LRU before destruction. + * Counterpart of ttm_resource_init(). + */ +void ttm_resource_fini(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + struct ttm_device *bdev = man->bdev; + + spin_lock(&bdev->lru_lock); + list_del_init(&res->lru); + man->usage -= res->num_pages << PAGE_SHIFT; + spin_unlock(&bdev->lru_lock); +} +EXPORT_SYMBOL(ttm_resource_fini); + +int ttm_resource_alloc(struct ttm_buffer_object *bo, + const struct ttm_place *place, + struct ttm_resource **res_ptr) +{ + struct ttm_resource_manager *man = + ttm_manager_type(bo->bdev, place->mem_type); + int ret; + + ret = man->func->alloc(man, bo, place, res_ptr); + if (ret) + return ret; + + spin_lock(&bo->bdev->lru_lock); + ttm_resource_add_bulk_move(*res_ptr, bo); + spin_unlock(&bo->bdev->lru_lock); + return 0; +} + +void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res) +{ + struct ttm_resource_manager *man; + + if (!*res) + return; + + spin_lock(&bo->bdev->lru_lock); + ttm_resource_del_bulk_move(*res, bo); + spin_unlock(&bo->bdev->lru_lock); + man = ttm_manager_type(bo->bdev, (*res)->mem_type); + man->func->free(man, *res); + *res = NULL; +} +EXPORT_SYMBOL(ttm_resource_free); + +/** + * ttm_resource_intersects - test for intersection + * + * @bdev: TTM device structure + * @res: The resource to test + * @place: The placement to test + * @size: How many bytes the new allocation needs. + * + * Test if @res intersects with @place and @size. Used for testing if evictions + * are valueable or not. + * + * Returns true if the res placement intersects with @place and @size. + */ +bool ttm_resource_intersects(struct ttm_device *bdev, + struct ttm_resource *res, + const struct ttm_place *place, + size_t size) +{ + struct ttm_resource_manager *man; + + if (!res) + return false; + + man = ttm_manager_type(bdev, res->mem_type); + if (!place || !man->func->intersects) + return true; + + return man->func->intersects(man, res, place, size); +} + +/** + * ttm_resource_compatible - test for compatibility + * + * @bdev: TTM device structure + * @res: The resource to test + * @place: The placement to test + * @size: How many bytes the new allocation needs. + * + * Test if @res compatible with @place and @size. + * + * Returns true if the res placement compatible with @place and @size. + */ +bool ttm_resource_compatible(struct ttm_device *bdev, + struct ttm_resource *res, + const struct ttm_place *place, + size_t size) +{ + struct ttm_resource_manager *man; + + if (!res || !place) + return false; + + man = ttm_manager_type(bdev, res->mem_type); + if (!man->func->compatible) + return true; + + return man->func->compatible(man, res, place, size); +} + +static bool ttm_resource_places_compat(struct ttm_resource *res, + const struct ttm_place *places, + unsigned num_placement) +{ + struct ttm_buffer_object *bo = res->bo; + struct ttm_device *bdev = bo->bdev; + unsigned i; + + if (res->placement & TTM_PL_FLAG_TEMPORARY) + return false; + + for (i = 0; i < num_placement; i++) { + const struct ttm_place *heap = &places[i]; + + if (!ttm_resource_compatible(bdev, res, heap, bo->base.size)) + continue; + + if ((res->mem_type == heap->mem_type) && + (!(heap->flags & TTM_PL_FLAG_CONTIGUOUS) || + (res->placement & TTM_PL_FLAG_CONTIGUOUS))) + return true; + } + return false; +} + +/** + * ttm_resource_compat - check if resource is compatible with placement + * + * @res: the resource to check + * @placement: the placement to check against + * + * Returns true if the placement is compatible. + */ +bool ttm_resource_compat(struct ttm_resource *res, + struct ttm_placement *placement) +{ + if (ttm_resource_places_compat(res, placement->placement, + placement->num_placement)) + return true; + + if ((placement->busy_placement != placement->placement || + placement->num_busy_placement > placement->num_placement) && + ttm_resource_places_compat(res, placement->busy_placement, + placement->num_busy_placement)) + return true; + + return false; +} +EXPORT_SYMBOL(ttm_resource_compat); + +void ttm_resource_set_bo(struct ttm_resource *res, + struct ttm_buffer_object *bo) +{ + spin_lock(&bo->bdev->lru_lock); + res->bo = bo; + spin_unlock(&bo->bdev->lru_lock); +} + +/** + * ttm_resource_manager_init + * + * @man: memory manager object to init + * @bdev: ttm device this manager belongs to + * @size: size of managed resources in arbitrary units + * + * Initialise core parts of a manager object. + */ +void ttm_resource_manager_init(struct ttm_resource_manager *man, + struct ttm_device *bdev, + uint64_t size) +{ + unsigned i; + + spin_lock_init(&man->move_lock); + man->bdev = bdev; + man->size = size; + man->usage = 0; + + for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) + INIT_LIST_HEAD(&man->lru[i]); + man->move = NULL; +} +EXPORT_SYMBOL(ttm_resource_manager_init); + +/* + * ttm_resource_manager_evict_all + * + * @bdev - device to use + * @man - manager to use + * + * Evict all the objects out of a memory manager until it is empty. + * Part of memory manager cleanup sequence. + */ +int ttm_resource_manager_evict_all(struct ttm_device *bdev, + struct ttm_resource_manager *man) +{ + struct ttm_operation_ctx ctx = { + .interruptible = false, + .no_wait_gpu = false, + .force_alloc = true + }; + struct dma_fence *fence; + int ret; + unsigned i; + + /* + * Can't use standard list traversal since we're unlocking. + */ + + spin_lock(&bdev->lru_lock); + for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { + while (!list_empty(&man->lru[i])) { + spin_unlock(&bdev->lru_lock); + ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, + NULL); + if (ret) + return ret; + spin_lock(&bdev->lru_lock); + } + } + spin_unlock(&bdev->lru_lock); + + spin_lock(&man->move_lock); + fence = dma_fence_get(man->move); + spin_unlock(&man->move_lock); + + if (fence) { + ret = dma_fence_wait(fence, false); + dma_fence_put(fence); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL(ttm_resource_manager_evict_all); + +/** + * ttm_resource_manager_usage + * + * @man: A memory manager object. + * + * Return how many resources are currently used. + */ +uint64_t ttm_resource_manager_usage(struct ttm_resource_manager *man) +{ + uint64_t usage; + + spin_lock(&man->bdev->lru_lock); + usage = man->usage; + spin_unlock(&man->bdev->lru_lock); + return usage; +} +EXPORT_SYMBOL(ttm_resource_manager_usage); + +/** + * ttm_resource_manager_debug + * + * @man: manager type to dump. + * @p: printer to use for debug. + */ +void ttm_resource_manager_debug(struct ttm_resource_manager *man, + struct drm_printer *p) +{ + drm_printf(p, " use_type: %d\n", man->use_type); + drm_printf(p, " use_tt: %d\n", man->use_tt); + drm_printf(p, " size: %llu\n", man->size); + drm_printf(p, " usage: %llu\n", ttm_resource_manager_usage(man)); + if (man->func->debug) + man->func->debug(man, p); +} +EXPORT_SYMBOL(ttm_resource_manager_debug); + +/** + * ttm_resource_manager_first + * + * @man: resource manager to iterate over + * @cursor: cursor to record the position + * + * Returns the first resource from the resource manager. + */ +struct ttm_resource * +ttm_resource_manager_first(struct ttm_resource_manager *man, + struct ttm_resource_cursor *cursor) +{ + struct ttm_resource *res; + + lockdep_assert_held(&man->bdev->lru_lock); + + for (cursor->priority = 0; cursor->priority < TTM_MAX_BO_PRIORITY; + ++cursor->priority) + list_for_each_entry(res, &man->lru[cursor->priority], lru) + return res; + + return NULL; +} + +/** + * ttm_resource_manager_next + * + * @man: resource manager to iterate over + * @cursor: cursor to record the position + * @res: the current resource pointer + * + * Returns the next resource from the resource manager. + */ +struct ttm_resource * +ttm_resource_manager_next(struct ttm_resource_manager *man, + struct ttm_resource_cursor *cursor, + struct ttm_resource *res) +{ + lockdep_assert_held(&man->bdev->lru_lock); + + list_for_each_entry_continue(res, &man->lru[cursor->priority], lru) + return res; + + for (++cursor->priority; cursor->priority < TTM_MAX_BO_PRIORITY; + ++cursor->priority) + list_for_each_entry(res, &man->lru[cursor->priority], lru) + return res; + + return NULL; +} + +static void ttm_kmap_iter_iomap_map_local(struct ttm_kmap_iter *iter, + struct iosys_map *dmap, + pgoff_t i) +{ + struct ttm_kmap_iter_iomap *iter_io = + container_of(iter, typeof(*iter_io), base); + void __iomem *addr; + +retry: + while (i >= iter_io->cache.end) { + iter_io->cache.sg = iter_io->cache.sg ? + sg_next(iter_io->cache.sg) : iter_io->st->sgl; + iter_io->cache.i = iter_io->cache.end; + iter_io->cache.end += sg_dma_len(iter_io->cache.sg) >> + PAGE_SHIFT; + iter_io->cache.offs = sg_dma_address(iter_io->cache.sg) - + iter_io->start; + } + + if (i < iter_io->cache.i) { + iter_io->cache.end = 0; + iter_io->cache.sg = NULL; + goto retry; + } + + addr = io_mapping_map_local_wc(iter_io->iomap, iter_io->cache.offs + + (((resource_size_t)i - iter_io->cache.i) + << PAGE_SHIFT)); + iosys_map_set_vaddr_iomem(dmap, addr); +} + +static void ttm_kmap_iter_iomap_unmap_local(struct ttm_kmap_iter *iter, + struct iosys_map *map) +{ + io_mapping_unmap_local(map->vaddr_iomem); +} + +static const struct ttm_kmap_iter_ops ttm_kmap_iter_io_ops = { + .map_local = ttm_kmap_iter_iomap_map_local, + .unmap_local = ttm_kmap_iter_iomap_unmap_local, + .maps_tt = false, +}; + +/** + * ttm_kmap_iter_iomap_init - Initialize a struct ttm_kmap_iter_iomap + * @iter_io: The struct ttm_kmap_iter_iomap to initialize. + * @iomap: The struct io_mapping representing the underlying linear io_memory. + * @st: sg_table into @iomap, representing the memory of the struct + * ttm_resource. + * @start: Offset that needs to be subtracted from @st to make + * sg_dma_address(st->sgl) - @start == 0 for @iomap start. + * + * Return: Pointer to the embedded struct ttm_kmap_iter. + */ +struct ttm_kmap_iter * +ttm_kmap_iter_iomap_init(struct ttm_kmap_iter_iomap *iter_io, + struct io_mapping *iomap, + struct sg_table *st, + resource_size_t start) +{ + iter_io->base.ops = &ttm_kmap_iter_io_ops; + iter_io->iomap = iomap; + iter_io->st = st; + iter_io->start = start; + memset(&iter_io->cache, 0, sizeof(iter_io->cache)); + + return &iter_io->base; +} +EXPORT_SYMBOL(ttm_kmap_iter_iomap_init); + +/** + * DOC: Linear io iterator + * + * This code should die in the not too near future. Best would be if we could + * make io-mapping use memremap for all io memory, and have memremap + * implement a kmap_local functionality. We could then strip a huge amount of + * code. These linear io iterators are implemented to mimic old functionality, + * and they don't use kmap_local semantics at all internally. Rather ioremap or + * friends, and at least on 32-bit they add global TLB flushes and points + * of failure. + */ + +static void ttm_kmap_iter_linear_io_map_local(struct ttm_kmap_iter *iter, + struct iosys_map *dmap, + pgoff_t i) +{ + struct ttm_kmap_iter_linear_io *iter_io = + container_of(iter, typeof(*iter_io), base); + + *dmap = iter_io->dmap; + iosys_map_incr(dmap, i * PAGE_SIZE); +} + +static const struct ttm_kmap_iter_ops ttm_kmap_iter_linear_io_ops = { + .map_local = ttm_kmap_iter_linear_io_map_local, + .maps_tt = false, +}; + +/** + * ttm_kmap_iter_linear_io_init - Initialize an iterator for linear io memory + * @iter_io: The iterator to initialize + * @bdev: The TTM device + * @mem: The ttm resource representing the iomap. + * + * This function is for internal TTM use only. It sets up a memcpy kmap iterator + * pointing at a linear chunk of io memory. + * + * Return: A pointer to the embedded struct ttm_kmap_iter or error pointer on + * failure. + */ +struct ttm_kmap_iter * +ttm_kmap_iter_linear_io_init(struct ttm_kmap_iter_linear_io *iter_io, + struct ttm_device *bdev, + struct ttm_resource *mem) +{ + int ret; + + ret = ttm_mem_io_reserve(bdev, mem); + if (ret) + goto out_err; + if (!mem->bus.is_iomem) { + ret = -EINVAL; + goto out_io_free; + } + + if (mem->bus.addr) { + iosys_map_set_vaddr(&iter_io->dmap, mem->bus.addr); + iter_io->needs_unmap = false; + } else { + size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT; + + iter_io->needs_unmap = true; + memset(&iter_io->dmap, 0, sizeof(iter_io->dmap)); + if (mem->bus.caching == ttm_write_combined) + iosys_map_set_vaddr_iomem(&iter_io->dmap, + ioremap_wc(mem->bus.offset, + bus_size)); + else if (mem->bus.caching == ttm_cached) + iosys_map_set_vaddr(&iter_io->dmap, + memremap(mem->bus.offset, bus_size, + MEMREMAP_WB | + MEMREMAP_WT | + MEMREMAP_WC)); + + /* If uncached requested or if mapping cached or wc failed */ + if (iosys_map_is_null(&iter_io->dmap)) + iosys_map_set_vaddr_iomem(&iter_io->dmap, + ioremap(mem->bus.offset, + bus_size)); + + if (iosys_map_is_null(&iter_io->dmap)) { + ret = -ENOMEM; + goto out_io_free; + } + } + + iter_io->base.ops = &ttm_kmap_iter_linear_io_ops; + return &iter_io->base; + +out_io_free: + ttm_mem_io_free(bdev, mem); +out_err: + return ERR_PTR(ret); +} + +/** + * ttm_kmap_iter_linear_io_fini - Clean up an iterator for linear io memory + * @iter_io: The iterator to initialize + * @bdev: The TTM device + * @mem: The ttm resource representing the iomap. + * + * This function is for internal TTM use only. It cleans up a memcpy kmap + * iterator initialized by ttm_kmap_iter_linear_io_init. + */ +void +ttm_kmap_iter_linear_io_fini(struct ttm_kmap_iter_linear_io *iter_io, + struct ttm_device *bdev, + struct ttm_resource *mem) +{ + if (iter_io->needs_unmap && iosys_map_is_set(&iter_io->dmap)) { + if (iter_io->dmap.is_iomem) + iounmap(iter_io->dmap.vaddr_iomem); + else + memunmap(iter_io->dmap.vaddr); + } + + ttm_mem_io_free(bdev, mem); +} + +#if defined(CONFIG_DEBUG_FS) + +static int ttm_resource_manager_show(struct seq_file *m, void *unused) +{ + struct ttm_resource_manager *man = + (struct ttm_resource_manager *)m->private; + struct drm_printer p = drm_seq_file_printer(m); + ttm_resource_manager_debug(man, &p); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(ttm_resource_manager); + +#endif + +/** + * ttm_resource_manager_create_debugfs - Create debugfs entry for specified + * resource manager. + * @man: The TTM resource manager for which the debugfs stats file be creates + * @parent: debugfs directory in which the file will reside + * @name: The filename to create. + * + * This function setups up a debugfs file that can be used to look + * at debug statistics of the specified ttm_resource_manager. + */ +void ttm_resource_manager_create_debugfs(struct ttm_resource_manager *man, + struct dentry * parent, + const char *name) +{ +#if defined(CONFIG_DEBUG_FS) + debugfs_create_file(name, 0444, parent, man, &ttm_resource_manager_fops); +#endif +} +EXPORT_SYMBOL(ttm_resource_manager_create_debugfs); diff --git a/drivers/gpu/drm/ttm/ttm_sys_manager.c b/drivers/gpu/drm/ttm/ttm_sys_manager.c new file mode 100644 index 000000000..2ced16951 --- /dev/null +++ b/drivers/gpu/drm/ttm/ttm_sys_manager.c @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +#include <drm/ttm/ttm_resource.h> +#include <drm/ttm/ttm_device.h> +#include <drm/ttm/ttm_placement.h> +#include <linux/slab.h> + +#include "ttm_module.h" + +static int ttm_sys_man_alloc(struct ttm_resource_manager *man, + struct ttm_buffer_object *bo, + const struct ttm_place *place, + struct ttm_resource **res) +{ + *res = kzalloc(sizeof(**res), GFP_KERNEL); + if (!*res) + return -ENOMEM; + + ttm_resource_init(bo, place, *res); + return 0; +} + +static void ttm_sys_man_free(struct ttm_resource_manager *man, + struct ttm_resource *res) +{ + ttm_resource_fini(man, res); + kfree(res); +} + +static const struct ttm_resource_manager_func ttm_sys_manager_func = { + .alloc = ttm_sys_man_alloc, + .free = ttm_sys_man_free, +}; + +void ttm_sys_man_init(struct ttm_device *bdev) +{ + struct ttm_resource_manager *man = &bdev->sysman; + + /* + * Initialize the system memory buffer type. + * Other types need to be driver / IOCTL initialized. + */ + man->use_tt = true; + man->func = &ttm_sys_manager_func; + + ttm_resource_manager_init(man, bdev, 0); + ttm_set_driver_manager(bdev, TTM_PL_SYSTEM, man); + ttm_resource_manager_set_used(man, true); +} diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c new file mode 100644 index 000000000..d50560393 --- /dev/null +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -0,0 +1,451 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/************************************************************************** + * + * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +/* + * Authors: Thomas Hellstrom <thellstrom-at-vmware-dot-com> + */ + +#define pr_fmt(fmt) "[TTM] " fmt + +#include <linux/sched.h> +#include <linux/shmem_fs.h> +#include <linux/file.h> +#include <linux/module.h> +#include <drm/drm_cache.h> +#include <drm/ttm/ttm_bo_driver.h> + +#include "ttm_module.h" + +static unsigned long ttm_pages_limit; + +MODULE_PARM_DESC(pages_limit, "Limit for the allocated pages"); +module_param_named(pages_limit, ttm_pages_limit, ulong, 0644); + +static unsigned long ttm_dma32_pages_limit; + +MODULE_PARM_DESC(dma32_pages_limit, "Limit for the allocated DMA32 pages"); +module_param_named(dma32_pages_limit, ttm_dma32_pages_limit, ulong, 0644); + +static atomic_long_t ttm_pages_allocated; +static atomic_long_t ttm_dma32_pages_allocated; + +/* + * Allocates a ttm structure for the given BO. + */ +int ttm_tt_create(struct ttm_buffer_object *bo, bool zero_alloc) +{ + struct ttm_device *bdev = bo->bdev; + uint32_t page_flags = 0; + + dma_resv_assert_held(bo->base.resv); + + if (bo->ttm) + return 0; + + switch (bo->type) { + case ttm_bo_type_device: + if (zero_alloc) + page_flags |= TTM_TT_FLAG_ZERO_ALLOC; + break; + case ttm_bo_type_kernel: + break; + case ttm_bo_type_sg: + page_flags |= TTM_TT_FLAG_EXTERNAL; + break; + default: + pr_err("Illegal buffer object type\n"); + return -EINVAL; + } + + bo->ttm = bdev->funcs->ttm_tt_create(bo, page_flags); + if (unlikely(bo->ttm == NULL)) + return -ENOMEM; + + WARN_ON(bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE && + !(bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL)); + + return 0; +} + +/* + * Allocates storage for pointers to the pages that back the ttm. + */ +static int ttm_tt_alloc_page_directory(struct ttm_tt *ttm) +{ + ttm->pages = kvcalloc(ttm->num_pages, sizeof(void*), GFP_KERNEL); + if (!ttm->pages) + return -ENOMEM; + + return 0; +} + +static int ttm_dma_tt_alloc_page_directory(struct ttm_tt *ttm) +{ + ttm->pages = kvcalloc(ttm->num_pages, sizeof(*ttm->pages) + + sizeof(*ttm->dma_address), GFP_KERNEL); + if (!ttm->pages) + return -ENOMEM; + + ttm->dma_address = (void *)(ttm->pages + ttm->num_pages); + return 0; +} + +static int ttm_sg_tt_alloc_page_directory(struct ttm_tt *ttm) +{ + ttm->dma_address = kvcalloc(ttm->num_pages, sizeof(*ttm->dma_address), + GFP_KERNEL); + if (!ttm->dma_address) + return -ENOMEM; + + return 0; +} + +void ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm) +{ + bdev->funcs->ttm_tt_destroy(bdev, ttm); +} + +static void ttm_tt_init_fields(struct ttm_tt *ttm, + struct ttm_buffer_object *bo, + uint32_t page_flags, + enum ttm_caching caching, + unsigned long extra_pages) +{ + ttm->num_pages = (PAGE_ALIGN(bo->base.size) >> PAGE_SHIFT) + extra_pages; + ttm->caching = ttm_cached; + ttm->page_flags = page_flags; + ttm->dma_address = NULL; + ttm->swap_storage = NULL; + ttm->sg = bo->sg; + ttm->caching = caching; +} + +int ttm_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo, + uint32_t page_flags, enum ttm_caching caching, + unsigned long extra_pages) +{ + ttm_tt_init_fields(ttm, bo, page_flags, caching, extra_pages); + + if (ttm_tt_alloc_page_directory(ttm)) { + pr_err("Failed allocating page table\n"); + return -ENOMEM; + } + return 0; +} +EXPORT_SYMBOL(ttm_tt_init); + +void ttm_tt_fini(struct ttm_tt *ttm) +{ + WARN_ON(ttm->page_flags & TTM_TT_FLAG_PRIV_POPULATED); + + if (ttm->swap_storage) + fput(ttm->swap_storage); + ttm->swap_storage = NULL; + + if (ttm->pages) + kvfree(ttm->pages); + else + kvfree(ttm->dma_address); + ttm->pages = NULL; + ttm->dma_address = NULL; +} +EXPORT_SYMBOL(ttm_tt_fini); + +int ttm_sg_tt_init(struct ttm_tt *ttm, struct ttm_buffer_object *bo, + uint32_t page_flags, enum ttm_caching caching) +{ + int ret; + + ttm_tt_init_fields(ttm, bo, page_flags, caching, 0); + + if (page_flags & TTM_TT_FLAG_EXTERNAL) + ret = ttm_sg_tt_alloc_page_directory(ttm); + else + ret = ttm_dma_tt_alloc_page_directory(ttm); + if (ret) { + pr_err("Failed allocating page table\n"); + return -ENOMEM; + } + return 0; +} +EXPORT_SYMBOL(ttm_sg_tt_init); + +int ttm_tt_swapin(struct ttm_tt *ttm) +{ + struct address_space *swap_space; + struct file *swap_storage; + struct page *from_page; + struct page *to_page; + gfp_t gfp_mask; + int i, ret; + + swap_storage = ttm->swap_storage; + BUG_ON(swap_storage == NULL); + + swap_space = swap_storage->f_mapping; + gfp_mask = mapping_gfp_mask(swap_space); + + for (i = 0; i < ttm->num_pages; ++i) { + from_page = shmem_read_mapping_page_gfp(swap_space, i, + gfp_mask); + if (IS_ERR(from_page)) { + ret = PTR_ERR(from_page); + goto out_err; + } + to_page = ttm->pages[i]; + if (unlikely(to_page == NULL)) { + ret = -ENOMEM; + goto out_err; + } + + copy_highpage(to_page, from_page); + put_page(from_page); + } + + fput(swap_storage); + ttm->swap_storage = NULL; + ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED; + + return 0; + +out_err: + return ret; +} + +/** + * ttm_tt_swapout - swap out tt object + * + * @bdev: TTM device structure. + * @ttm: The struct ttm_tt. + * @gfp_flags: Flags to use for memory allocation. + * + * Swapout a TT object to a shmem_file, return number of pages swapped out or + * negative error code. + */ +int ttm_tt_swapout(struct ttm_device *bdev, struct ttm_tt *ttm, + gfp_t gfp_flags) +{ + loff_t size = (loff_t)ttm->num_pages << PAGE_SHIFT; + struct address_space *swap_space; + struct file *swap_storage; + struct page *from_page; + struct page *to_page; + int i, ret; + + swap_storage = shmem_file_setup("ttm swap", size, 0); + if (IS_ERR(swap_storage)) { + pr_err("Failed allocating swap storage\n"); + return PTR_ERR(swap_storage); + } + + swap_space = swap_storage->f_mapping; + gfp_flags &= mapping_gfp_mask(swap_space); + + for (i = 0; i < ttm->num_pages; ++i) { + from_page = ttm->pages[i]; + if (unlikely(from_page == NULL)) + continue; + + to_page = shmem_read_mapping_page_gfp(swap_space, i, gfp_flags); + if (IS_ERR(to_page)) { + ret = PTR_ERR(to_page); + goto out_err; + } + copy_highpage(to_page, from_page); + set_page_dirty(to_page); + mark_page_accessed(to_page); + put_page(to_page); + } + + ttm_tt_unpopulate(bdev, ttm); + ttm->swap_storage = swap_storage; + ttm->page_flags |= TTM_TT_FLAG_SWAPPED; + + return ttm->num_pages; + +out_err: + fput(swap_storage); + + return ret; +} + +int ttm_tt_populate(struct ttm_device *bdev, + struct ttm_tt *ttm, struct ttm_operation_ctx *ctx) +{ + int ret; + + if (!ttm) + return -EINVAL; + + if (ttm_tt_is_populated(ttm)) + return 0; + + if (!(ttm->page_flags & TTM_TT_FLAG_EXTERNAL)) { + atomic_long_add(ttm->num_pages, &ttm_pages_allocated); + if (bdev->pool.use_dma32) + atomic_long_add(ttm->num_pages, + &ttm_dma32_pages_allocated); + } + + while (atomic_long_read(&ttm_pages_allocated) > ttm_pages_limit || + atomic_long_read(&ttm_dma32_pages_allocated) > + ttm_dma32_pages_limit) { + + ret = ttm_global_swapout(ctx, GFP_KERNEL); + if (ret == 0) + break; + if (ret < 0) + goto error; + } + + if (bdev->funcs->ttm_tt_populate) + ret = bdev->funcs->ttm_tt_populate(bdev, ttm, ctx); + else + ret = ttm_pool_alloc(&bdev->pool, ttm, ctx); + if (ret) + goto error; + + ttm->page_flags |= TTM_TT_FLAG_PRIV_POPULATED; + if (unlikely(ttm->page_flags & TTM_TT_FLAG_SWAPPED)) { + ret = ttm_tt_swapin(ttm); + if (unlikely(ret != 0)) { + ttm_tt_unpopulate(bdev, ttm); + return ret; + } + } + + return 0; + +error: + if (!(ttm->page_flags & TTM_TT_FLAG_EXTERNAL)) { + atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); + if (bdev->pool.use_dma32) + atomic_long_sub(ttm->num_pages, + &ttm_dma32_pages_allocated); + } + return ret; +} +EXPORT_SYMBOL(ttm_tt_populate); + +void ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm) +{ + if (!ttm_tt_is_populated(ttm)) + return; + + if (bdev->funcs->ttm_tt_unpopulate) + bdev->funcs->ttm_tt_unpopulate(bdev, ttm); + else + ttm_pool_free(&bdev->pool, ttm); + + if (!(ttm->page_flags & TTM_TT_FLAG_EXTERNAL)) { + atomic_long_sub(ttm->num_pages, &ttm_pages_allocated); + if (bdev->pool.use_dma32) + atomic_long_sub(ttm->num_pages, + &ttm_dma32_pages_allocated); + } + + ttm->page_flags &= ~TTM_TT_FLAG_PRIV_POPULATED; +} + +#ifdef CONFIG_DEBUG_FS + +/* Test the shrinker functions and dump the result */ +static int ttm_tt_debugfs_shrink_show(struct seq_file *m, void *data) +{ + struct ttm_operation_ctx ctx = { false, false }; + + seq_printf(m, "%d\n", ttm_global_swapout(&ctx, GFP_KERNEL)); + return 0; +} +DEFINE_SHOW_ATTRIBUTE(ttm_tt_debugfs_shrink); + +#endif + + +/* + * ttm_tt_mgr_init - register with the MM shrinker + * + * Register with the MM shrinker for swapping out BOs. + */ +void ttm_tt_mgr_init(unsigned long num_pages, unsigned long num_dma32_pages) +{ +#ifdef CONFIG_DEBUG_FS + debugfs_create_file("tt_shrink", 0400, ttm_debugfs_root, NULL, + &ttm_tt_debugfs_shrink_fops); +#endif + + if (!ttm_pages_limit) + ttm_pages_limit = num_pages; + + if (!ttm_dma32_pages_limit) + ttm_dma32_pages_limit = num_dma32_pages; +} + +static void ttm_kmap_iter_tt_map_local(struct ttm_kmap_iter *iter, + struct iosys_map *dmap, + pgoff_t i) +{ + struct ttm_kmap_iter_tt *iter_tt = + container_of(iter, typeof(*iter_tt), base); + + iosys_map_set_vaddr(dmap, kmap_local_page_prot(iter_tt->tt->pages[i], + iter_tt->prot)); +} + +static void ttm_kmap_iter_tt_unmap_local(struct ttm_kmap_iter *iter, + struct iosys_map *map) +{ + kunmap_local(map->vaddr); +} + +static const struct ttm_kmap_iter_ops ttm_kmap_iter_tt_ops = { + .map_local = ttm_kmap_iter_tt_map_local, + .unmap_local = ttm_kmap_iter_tt_unmap_local, + .maps_tt = true, +}; + +/** + * ttm_kmap_iter_tt_init - Initialize a struct ttm_kmap_iter_tt + * @iter_tt: The struct ttm_kmap_iter_tt to initialize. + * @tt: Struct ttm_tt holding page pointers of the struct ttm_resource. + * + * Return: Pointer to the embedded struct ttm_kmap_iter. + */ +struct ttm_kmap_iter * +ttm_kmap_iter_tt_init(struct ttm_kmap_iter_tt *iter_tt, + struct ttm_tt *tt) +{ + iter_tt->base.ops = &ttm_kmap_iter_tt_ops; + iter_tt->tt = tt; + if (tt) + iter_tt->prot = ttm_prot_from_caching(tt->caching, PAGE_KERNEL); + else + iter_tt->prot = PAGE_KERNEL; + + return &iter_tt->base; +} +EXPORT_SYMBOL(ttm_kmap_iter_tt_init); |