diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_timeline.c')
-rw-r--r-- | drivers/gpu/drm/i915/gt/intel_timeline.c | 493 |
1 files changed, 493 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c new file mode 100644 index 000000000..b9640212d --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -0,0 +1,493 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2016-2018 Intel Corporation + */ + +#include <drm/drm_cache.h> + +#include "gem/i915_gem_internal.h" + +#include "i915_active.h" +#include "i915_drv.h" +#include "i915_syncmap.h" +#include "intel_gt.h" +#include "intel_ring.h" +#include "intel_timeline.h" + +#define TIMELINE_SEQNO_BYTES 8 + +static struct i915_vma *hwsp_alloc(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) + i915_gem_object_put(obj); + + return vma; +} + +static void __timeline_retire(struct i915_active *active) +{ + struct intel_timeline *tl = + container_of(active, typeof(*tl), active); + + i915_vma_unpin(tl->hwsp_ggtt); + intel_timeline_put(tl); +} + +static int __timeline_active(struct i915_active *active) +{ + struct intel_timeline *tl = + container_of(active, typeof(*tl), active); + + __i915_vma_pin(tl->hwsp_ggtt); + intel_timeline_get(tl); + return 0; +} + +I915_SELFTEST_EXPORT int +intel_timeline_pin_map(struct intel_timeline *timeline) +{ + struct drm_i915_gem_object *obj = timeline->hwsp_ggtt->obj; + u32 ofs = offset_in_page(timeline->hwsp_offset); + void *vaddr; + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + timeline->hwsp_map = vaddr; + timeline->hwsp_seqno = memset(vaddr + ofs, 0, TIMELINE_SEQNO_BYTES); + drm_clflush_virt_range(vaddr + ofs, TIMELINE_SEQNO_BYTES); + + return 0; +} + +static int intel_timeline_init(struct intel_timeline *timeline, + struct intel_gt *gt, + struct i915_vma *hwsp, + unsigned int offset) +{ + kref_init(&timeline->kref); + atomic_set(&timeline->pin_count, 0); + + timeline->gt = gt; + + if (hwsp) { + timeline->hwsp_offset = offset; + timeline->hwsp_ggtt = i915_vma_get(hwsp); + } else { + timeline->has_initial_breadcrumb = true; + hwsp = hwsp_alloc(gt); + if (IS_ERR(hwsp)) + return PTR_ERR(hwsp); + timeline->hwsp_ggtt = hwsp; + } + + timeline->hwsp_map = NULL; + timeline->hwsp_seqno = (void *)(long)timeline->hwsp_offset; + + GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); + + timeline->fence_context = dma_fence_context_alloc(1); + + mutex_init(&timeline->mutex); + + INIT_ACTIVE_FENCE(&timeline->last_request); + INIT_LIST_HEAD(&timeline->requests); + + i915_syncmap_init(&timeline->sync); + i915_active_init(&timeline->active, __timeline_active, + __timeline_retire, 0); + + return 0; +} + +void intel_gt_init_timelines(struct intel_gt *gt) +{ + struct intel_gt_timelines *timelines = >->timelines; + + spin_lock_init(&timelines->lock); + INIT_LIST_HEAD(&timelines->active_list); +} + +static void intel_timeline_fini(struct rcu_head *rcu) +{ + struct intel_timeline *timeline = + container_of(rcu, struct intel_timeline, rcu); + + if (timeline->hwsp_map) + i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); + + i915_vma_put(timeline->hwsp_ggtt); + i915_active_fini(&timeline->active); + + /* + * A small race exists between intel_gt_retire_requests_timeout and + * intel_timeline_exit which could result in the syncmap not getting + * free'd. Rather than work to hard to seal this race, simply cleanup + * the syncmap on fini. + */ + i915_syncmap_free(&timeline->sync); + + kfree(timeline); +} + +struct intel_timeline * +__intel_timeline_create(struct intel_gt *gt, + struct i915_vma *global_hwsp, + unsigned int offset) +{ + struct intel_timeline *timeline; + int err; + + timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); + if (!timeline) + return ERR_PTR(-ENOMEM); + + err = intel_timeline_init(timeline, gt, global_hwsp, offset); + if (err) { + kfree(timeline); + return ERR_PTR(err); + } + + return timeline; +} + +struct intel_timeline * +intel_timeline_create_from_engine(struct intel_engine_cs *engine, + unsigned int offset) +{ + struct i915_vma *hwsp = engine->status_page.vma; + struct intel_timeline *tl; + + tl = __intel_timeline_create(engine->gt, hwsp, offset); + if (IS_ERR(tl)) + return tl; + + /* Borrow a nearby lock; we only create these timelines during init */ + mutex_lock(&hwsp->vm->mutex); + list_add_tail(&tl->engine_link, &engine->status_page.timelines); + mutex_unlock(&hwsp->vm->mutex); + + return tl; +} + +void __intel_timeline_pin(struct intel_timeline *tl) +{ + GEM_BUG_ON(!atomic_read(&tl->pin_count)); + atomic_inc(&tl->pin_count); +} + +int intel_timeline_pin(struct intel_timeline *tl, struct i915_gem_ww_ctx *ww) +{ + int err; + + if (atomic_add_unless(&tl->pin_count, 1, 0)) + return 0; + + if (!tl->hwsp_map) { + err = intel_timeline_pin_map(tl); + if (err) + return err; + } + + err = i915_ggtt_pin(tl->hwsp_ggtt, ww, 0, PIN_HIGH); + if (err) + return err; + + tl->hwsp_offset = + i915_ggtt_offset(tl->hwsp_ggtt) + + offset_in_page(tl->hwsp_offset); + GT_TRACE(tl->gt, "timeline:%llx using HWSP offset:%x\n", + tl->fence_context, tl->hwsp_offset); + + i915_active_acquire(&tl->active); + if (atomic_fetch_inc(&tl->pin_count)) { + i915_active_release(&tl->active); + __i915_vma_unpin(tl->hwsp_ggtt); + } + + return 0; +} + +void intel_timeline_reset_seqno(const struct intel_timeline *tl) +{ + u32 *hwsp_seqno = (u32 *)tl->hwsp_seqno; + /* Must be pinned to be writable, and no requests in flight. */ + GEM_BUG_ON(!atomic_read(&tl->pin_count)); + + memset(hwsp_seqno + 1, 0, TIMELINE_SEQNO_BYTES - sizeof(*hwsp_seqno)); + WRITE_ONCE(*hwsp_seqno, tl->seqno); + drm_clflush_virt_range(hwsp_seqno, TIMELINE_SEQNO_BYTES); +} + +void intel_timeline_enter(struct intel_timeline *tl) +{ + struct intel_gt_timelines *timelines = &tl->gt->timelines; + + /* + * Pretend we are serialised by the timeline->mutex. + * + * While generally true, there are a few exceptions to the rule + * for the engine->kernel_context being used to manage power + * transitions. As the engine_park may be called from under any + * timeline, it uses the power mutex as a global serialisation + * lock to prevent any other request entering its timeline. + * + * The rule is generally tl->mutex, otherwise engine->wakeref.mutex. + * + * However, intel_gt_retire_request() does not know which engine + * it is retiring along and so cannot partake in the engine-pm + * barrier, and there we use the tl->active_count as a means to + * pin the timeline in the active_list while the locks are dropped. + * Ergo, as that is outside of the engine-pm barrier, we need to + * use atomic to manipulate tl->active_count. + */ + lockdep_assert_held(&tl->mutex); + + if (atomic_add_unless(&tl->active_count, 1, 0)) + return; + + spin_lock(&timelines->lock); + if (!atomic_fetch_inc(&tl->active_count)) { + /* + * The HWSP is volatile, and may have been lost while inactive, + * e.g. across suspend/resume. Be paranoid, and ensure that + * the HWSP value matches our seqno so we don't proclaim + * the next request as already complete. + */ + intel_timeline_reset_seqno(tl); + list_add_tail(&tl->link, &timelines->active_list); + } + spin_unlock(&timelines->lock); +} + +void intel_timeline_exit(struct intel_timeline *tl) +{ + struct intel_gt_timelines *timelines = &tl->gt->timelines; + + /* See intel_timeline_enter() */ + lockdep_assert_held(&tl->mutex); + + GEM_BUG_ON(!atomic_read(&tl->active_count)); + if (atomic_add_unless(&tl->active_count, -1, 1)) + return; + + spin_lock(&timelines->lock); + if (atomic_dec_and_test(&tl->active_count)) + list_del(&tl->link); + spin_unlock(&timelines->lock); + + /* + * Since this timeline is idle, all bariers upon which we were waiting + * must also be complete and so we can discard the last used barriers + * without loss of information. + */ + i915_syncmap_free(&tl->sync); +} + +static u32 timeline_advance(struct intel_timeline *tl) +{ + GEM_BUG_ON(!atomic_read(&tl->pin_count)); + GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); + + return tl->seqno += 1 + tl->has_initial_breadcrumb; +} + +static noinline int +__intel_timeline_get_seqno(struct intel_timeline *tl, + u32 *seqno) +{ + u32 next_ofs = offset_in_page(tl->hwsp_offset + TIMELINE_SEQNO_BYTES); + + /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ + if (TIMELINE_SEQNO_BYTES <= BIT(5) && (next_ofs & BIT(5))) + next_ofs = offset_in_page(next_ofs + BIT(5)); + + tl->hwsp_offset = i915_ggtt_offset(tl->hwsp_ggtt) + next_ofs; + tl->hwsp_seqno = tl->hwsp_map + next_ofs; + intel_timeline_reset_seqno(tl); + + *seqno = timeline_advance(tl); + GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); + return 0; +} + +int intel_timeline_get_seqno(struct intel_timeline *tl, + struct i915_request *rq, + u32 *seqno) +{ + *seqno = timeline_advance(tl); + + /* Replace the HWSP on wraparound for HW semaphores */ + if (unlikely(!*seqno && tl->has_initial_breadcrumb)) + return __intel_timeline_get_seqno(tl, seqno); + + return 0; +} + +int intel_timeline_read_hwsp(struct i915_request *from, + struct i915_request *to, + u32 *hwsp) +{ + struct intel_timeline *tl; + int err; + + rcu_read_lock(); + tl = rcu_dereference(from->timeline); + if (i915_request_signaled(from) || + !i915_active_acquire_if_busy(&tl->active)) + tl = NULL; + + if (tl) { + /* hwsp_offset may wraparound, so use from->hwsp_seqno */ + *hwsp = i915_ggtt_offset(tl->hwsp_ggtt) + + offset_in_page(from->hwsp_seqno); + } + + /* ensure we wait on the right request, if not, we completed */ + if (tl && __i915_request_is_complete(from)) { + i915_active_release(&tl->active); + tl = NULL; + } + rcu_read_unlock(); + + if (!tl) + return 1; + + /* Can't do semaphore waits on kernel context */ + if (!tl->has_initial_breadcrumb) { + err = -EINVAL; + goto out; + } + + err = i915_active_add_request(&tl->active, to); + +out: + i915_active_release(&tl->active); + return err; +} + +void intel_timeline_unpin(struct intel_timeline *tl) +{ + GEM_BUG_ON(!atomic_read(&tl->pin_count)); + if (!atomic_dec_and_test(&tl->pin_count)) + return; + + i915_active_release(&tl->active); + __i915_vma_unpin(tl->hwsp_ggtt); +} + +void __intel_timeline_free(struct kref *kref) +{ + struct intel_timeline *timeline = + container_of(kref, typeof(*timeline), kref); + + GEM_BUG_ON(atomic_read(&timeline->pin_count)); + GEM_BUG_ON(!list_empty(&timeline->requests)); + GEM_BUG_ON(timeline->retire); + + call_rcu(&timeline->rcu, intel_timeline_fini); +} + +void intel_gt_fini_timelines(struct intel_gt *gt) +{ + struct intel_gt_timelines *timelines = >->timelines; + + GEM_BUG_ON(!list_empty(&timelines->active_list)); +} + +void intel_gt_show_timelines(struct intel_gt *gt, + struct drm_printer *m, + void (*show_request)(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix, + int indent)) +{ + struct intel_gt_timelines *timelines = >->timelines; + struct intel_timeline *tl, *tn; + LIST_HEAD(free); + + spin_lock(&timelines->lock); + list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { + unsigned long count, ready, inflight; + struct i915_request *rq, *rn; + struct dma_fence *fence; + + if (!mutex_trylock(&tl->mutex)) { + drm_printf(m, "Timeline %llx: busy; skipping\n", + tl->fence_context); + continue; + } + + intel_timeline_get(tl); + GEM_BUG_ON(!atomic_read(&tl->active_count)); + atomic_inc(&tl->active_count); /* pin the list element */ + spin_unlock(&timelines->lock); + + count = 0; + ready = 0; + inflight = 0; + list_for_each_entry_safe(rq, rn, &tl->requests, link) { + if (i915_request_completed(rq)) + continue; + + count++; + if (i915_request_is_ready(rq)) + ready++; + if (i915_request_is_active(rq)) + inflight++; + } + + drm_printf(m, "Timeline %llx: { ", tl->fence_context); + drm_printf(m, "count: %lu, ready: %lu, inflight: %lu", + count, ready, inflight); + drm_printf(m, ", seqno: { current: %d, last: %d }", + *tl->hwsp_seqno, tl->seqno); + fence = i915_active_fence_get(&tl->last_request); + if (fence) { + drm_printf(m, ", engine: %s", + to_request(fence)->engine->name); + dma_fence_put(fence); + } + drm_printf(m, " }\n"); + + if (show_request) { + list_for_each_entry_safe(rq, rn, &tl->requests, link) + show_request(m, rq, "", 2); + } + + mutex_unlock(&tl->mutex); + spin_lock(&timelines->lock); + + /* Resume list iteration after reacquiring spinlock */ + list_safe_reset_next(tl, tn, link); + if (atomic_dec_and_test(&tl->active_count)) + list_del(&tl->link); + + /* Defer the final release to after the spinlock */ + if (refcount_dec_and_test(&tl->kref.refcount)) { + GEM_BUG_ON(atomic_read(&tl->active_count)); + list_add(&tl->link, &free); + } + } + spin_unlock(&timelines->lock); + + list_for_each_entry_safe(tl, tn, &free, link) + __intel_timeline_free(&tl->kref); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "gt/selftests/mock_timeline.c" +#include "gt/selftest_timeline.c" +#endif |