From 01a69402cf9d38ff180345d55c2ee51c7e89fbc7 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 20:50:03 +0200 Subject: Adding upstream version 6.8.9. Signed-off-by: Daniel Baumann --- drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 4 +- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 46 +++++++++ drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 13 ++- drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h | 3 +- drivers/gpu/drm/i915/gt/intel_context.c | 14 +++ drivers/gpu/drm/i915/gt/intel_context.h | 4 +- drivers/gpu/drm/i915/gt/intel_context_types.h | 2 + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 19 +++- drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 7 +- drivers/gpu/drm/i915/gt/intel_engine_pm.h | 1 + drivers/gpu/drm/i915/gt/intel_engine_regs.h | 8 ++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 + .../gpu/drm/i915/gt/intel_execlists_submission.c | 2 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 23 +++-- drivers/gpu/drm/i915/gt/intel_gsc.h | 7 +- drivers/gpu/drm/i915/gt/intel_gt.c | 8 +- drivers/gpu/drm/i915/gt/intel_gt.h | 24 +++++ drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c | 39 +++++++ drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h | 13 +++ drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c | 2 +- drivers/gpu/drm/i915/gt/intel_gt_mcr.c | 3 +- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 14 +-- drivers/gpu/drm/i915/gt/intel_gt_pm.h | 38 +++++-- drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 4 +- drivers/gpu/drm/i915/gt/intel_gt_regs.h | 12 +++ drivers/gpu/drm/i915/gt/intel_gtt.c | 26 +++++ drivers/gpu/drm/i915/gt/intel_gtt.h | 5 + drivers/gpu/drm/i915/gt/intel_lrc.c | 100 +++++++++++++++++- drivers/gpu/drm/i915/gt/intel_sseu.c | 7 +- drivers/gpu/drm/i915/gt/intel_workarounds.c | 96 ++++++++++++----- drivers/gpu/drm/i915/gt/selftest_engine_cs.c | 20 ++-- .../gpu/drm/i915/gt/selftest_engine_heartbeat.c | 2 +- drivers/gpu/drm/i915/gt/selftest_gt_pm.c | 5 +- drivers/gpu/drm/i915/gt/selftest_lrc.c | 65 ++++++++---- drivers/gpu/drm/i915/gt/selftest_reset.c | 10 +- drivers/gpu/drm/i915/gt/selftest_rps.c | 17 +-- drivers/gpu/drm/i915/gt/selftest_slpc.c | 5 +- drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c | 2 + drivers/gpu/drm/i915/gt/uc/intel_guc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 79 ++++++++------ drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 11 +- drivers/gpu/drm/i915/gt/uc/intel_guc_log.c | 10 +- drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 18 ++-- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 5 - drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 5 +- drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 115 +++++++++++++++++++++ .../gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c | 2 +- 51 files changed, 744 insertions(+), 183 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c create mode 100644 drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h (limited to 'drivers/gpu/drm/i915/gt') diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 86a04afff..e1bf13e3d 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -226,7 +226,7 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) static int mtl_dummy_pipe_control(struct i915_request *rq) { /* Wa_14016712196 */ - if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 71)) || + if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(rq->i915)) { u32 *cs; @@ -822,7 +822,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) flags |= PIPE_CONTROL_FLUSH_L3; /* Wa_14016712196 */ - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915)) /* dummy PIPE_CONTROL + depth flush */ cs = gen12_emit_pipe_control(cs, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 9895e18df..81bf22163 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -5,6 +5,7 @@ #include +#include "gem/i915_gem_internal.h" #include "gem/i915_gem_lmem.h" #include "gen8_ppgtt.h" @@ -222,6 +223,9 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + if (vm->rsvd.obj) + i915_gem_object_put(vm->rsvd.obj); + if (intel_vgpu_active(vm->i915)) gen8_ppgtt_notify_vgt(ppgtt, false); @@ -950,6 +954,44 @@ err_pd: return ERR_PTR(err); } +static int gen8_init_rsvd(struct i915_address_space *vm) +{ + struct drm_i915_private *i915 = vm->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; + + if (!intel_gt_needs_wa_16018031267(vm->gt)) + return 0; + + /* The memory will be used only by GPU. */ + obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, + I915_BO_ALLOC_VOLATILE | + I915_BO_ALLOC_GPU_ONLY); + if (IS_ERR(obj)) + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto unref; + } + + ret = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH); + if (ret) + goto unref; + + vm->rsvd.vma = i915_vma_make_unshrinkable(vma); + vm->rsvd.obj = obj; + vm->total -= vma->node.size; + return 0; +unref: + i915_gem_object_put(obj); + return ret; +} + /* * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers * with a net effect resembling a 2-level page table in normal x86 terms. Each @@ -1031,6 +1073,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt, if (intel_vgpu_active(gt->i915)) gen8_ppgtt_notify_vgt(ppgtt, true); + err = gen8_init_rsvd(&ppgtt->vm); + if (err) + goto err_put; + return ppgtt; err_put: diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index ecc990ec1..d650beb8e 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -28,11 +28,14 @@ static void irq_disable(struct intel_breadcrumbs *b) static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) { + intel_wakeref_t wakeref; + /* * Since we are waiting on a request, the GPU should be busy * and should have its own rpm reference. */ - if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt))) + wakeref = intel_gt_pm_get_if_awake(b->irq_engine->gt); + if (GEM_WARN_ON(!wakeref)) return; /* @@ -41,7 +44,7 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) * which we can add a new waiter and avoid the cost of re-enabling * the irq. */ - WRITE_ONCE(b->irq_armed, true); + WRITE_ONCE(b->irq_armed, wakeref); /* Requests may have completed before we could enable the interrupt. */ if (!b->irq_enabled++ && b->irq_enable(b)) @@ -61,12 +64,14 @@ static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) { + intel_wakeref_t wakeref = b->irq_armed; + GEM_BUG_ON(!b->irq_enabled); if (!--b->irq_enabled) b->irq_disable(b); - WRITE_ONCE(b->irq_armed, false); - intel_gt_pm_put_async(b->irq_engine->gt); + WRITE_ONCE(b->irq_armed, 0); + intel_gt_pm_put_async(b->irq_engine->gt, wakeref); } static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h index 72dfd3748..bdf09fd67 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h @@ -13,6 +13,7 @@ #include #include "intel_engine_types.h" +#include "intel_wakeref.h" /* * Rather than have every client wait upon all user interrupts, @@ -43,7 +44,7 @@ struct intel_breadcrumbs { spinlock_t irq_lock; /* protects the interrupt from hardirq context */ struct irq_work irq_work; /* for use from inside irq_lock */ unsigned int irq_enabled; - bool irq_armed; + intel_wakeref_t irq_armed; /* Not all breadcrumbs are attached to physical HW */ intel_engine_mask_t engine_mask; diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index a53b26178..a2f124574 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -6,6 +6,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" +#include "i915_drm_client.h" #include "i915_drv.h" #include "i915_trace.h" @@ -50,6 +51,7 @@ intel_context_create(struct intel_engine_cs *engine) int intel_context_alloc_state(struct intel_context *ce) { + struct i915_gem_context *ctx; int err = 0; if (mutex_lock_interruptible(&ce->pin_mutex)) @@ -66,6 +68,18 @@ int intel_context_alloc_state(struct intel_context *ce) goto unlock; set_bit(CONTEXT_ALLOC_BIT, &ce->flags); + + rcu_read_lock(); + ctx = rcu_dereference(ce->gem_context); + if (ctx && !kref_get_unless_zero(&ctx->ref)) + ctx = NULL; + rcu_read_unlock(); + if (ctx) { + if (ctx->client) + i915_drm_client_add_context_objects(ctx->client, + ce); + i915_gem_context_put(ctx); + } } unlock: diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index a80e3b7c2..25564c015 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -212,7 +212,7 @@ static inline void intel_context_enter(struct intel_context *ce) return; ce->ops->enter(ce); - intel_gt_pm_get(ce->vm->gt); + ce->wakeref = intel_gt_pm_get(ce->vm->gt); } static inline void intel_context_mark_active(struct intel_context *ce) @@ -229,7 +229,7 @@ static inline void intel_context_exit(struct intel_context *ce) if (--ce->active_count) return; - intel_gt_pm_put_async(ce->vm->gt); + intel_gt_pm_put_async(ce->vm->gt, ce->wakeref); ce->ops->exit(ce); } diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index aceaac28a..7eccbd70d 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -17,6 +17,7 @@ #include "i915_utils.h" #include "intel_engine_types.h" #include "intel_sseu.h" +#include "intel_wakeref.h" #include "uc/intel_guc_fwif.h" @@ -112,6 +113,7 @@ struct intel_context { u32 ring_size; struct intel_ring *ring; struct intel_timeline *timeline; + intel_wakeref_t wakeref; unsigned long flags; #define CONTEXT_BARRIER_BIT 0 diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 4a11219e5..84be97f95 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -47,7 +47,7 @@ #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) #define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE) -#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) +#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) #define MAX_MMIO_BASES 3 struct engine_info { @@ -908,6 +908,23 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) info->engine_mask &= ~BIT(GSC0); } + /* + * Do not create the command streamer for CCS slices beyond the first. + * All the workload submitted to the first engine will be shared among + * all the slices. + * + * Once the user will be allowed to customize the CCS mode, then this + * check needs to be removed. + */ + if (IS_DG2(gt->i915)) { + u8 first_ccs = __ffs(CCS_MASK(gt)); + + /* Mask off all the CCS engine */ + info->engine_mask &= ~GENMASK(CCS3, CCS0); + /* Put back in the first CCS engine */ + info->engine_mask |= BIT(_CCS(first_ccs)); + } + return info->engine_mask; } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 9a527e1f5..1a8e2b7db 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -188,7 +188,7 @@ static void heartbeat(struct work_struct *wrk) * low latency and no jitter] the chance to naturally * complete before being preempted. */ - attr.priority = 0; + attr.priority = I915_PRIORITY_NORMAL; if (rq->sched.attr.priority >= attr.priority) attr.priority = I915_PRIORITY_HEARTBEAT; if (rq->sched.attr.priority >= attr.priority) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 5a3a5b29d..fb7bff27b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -63,7 +63,7 @@ static int __engine_unpark(struct intel_wakeref *wf) ENGINE_TRACE(engine, "\n"); - intel_gt_pm_get(engine->gt); + engine->wakeref_track = intel_gt_pm_get(engine->gt); /* Discard stale context state from across idling */ ce = engine->kernel_context; @@ -122,6 +122,7 @@ __queue_and_release_pm(struct i915_request *rq, */ GEM_BUG_ON(rq->context->active_count != 1); __intel_gt_pm_get(engine->gt); + rq->context->wakeref = intel_wakeref_track(&engine->gt->wakeref); /* * We have to serialise all potential retirement paths with our @@ -282,7 +283,7 @@ static int __engine_park(struct intel_wakeref *wf) engine->park(engine); /* While gt calls i915_vma_parked(), we have to break the lock cycle */ - intel_gt_pm_put_async(engine->gt); + intel_gt_pm_put_async(engine->gt, engine->wakeref_track); return 0; } @@ -293,7 +294,7 @@ static const struct intel_wakeref_ops wf_ops = { void intel_engine_init__pm(struct intel_engine_cs *engine) { - intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops); + intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops, engine->name); intel_engine_init_heartbeat(engine); intel_gsc_idle_msg_enable(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h index d68675925..1d97c435a 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h @@ -10,6 +10,7 @@ #include "i915_request.h" #include "intel_engine_types.h" #include "intel_wakeref.h" +#include "intel_gt.h" #include "intel_gt_pm.h" static inline bool diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index fdd4ddd3a..a8eac59e3 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -118,9 +118,15 @@ #define CCID_EXTENDED_STATE_RESTORE BIT(2) #define CCID_EXTENDED_STATE_SAVE BIT(3) #define RING_BB_PER_CTX_PTR(base) _MMIO((base) + 0x1c0) /* gen8+ */ +#define PER_CTX_BB_FORCE BIT(2) +#define PER_CTX_BB_VALID BIT(0) + #define RING_INDIRECT_CTX(base) _MMIO((base) + 0x1c4) /* gen8+ */ #define RING_INDIRECT_CTX_OFFSET(base) _MMIO((base) + 0x1c8) /* gen8+ */ #define ECOSKPD(base) _MMIO((base) + 0x1d0) +#define XEHP_BLITTER_SCHEDULING_MODE_MASK REG_GENMASK(12, 11) +#define XEHP_BLITTER_ROUND_ROBIN_MODE \ + REG_FIELD_PREP(XEHP_BLITTER_SCHEDULING_MODE_MASK, 1) #define ECO_CONSTANT_BUFFER_SR_DISABLE REG_BIT(4) #define ECO_GATING_CX_ONLY REG_BIT(3) #define GEN6_BLITTER_FBC_NOTIFY REG_BIT(3) @@ -257,5 +263,7 @@ #define VDBOX_CGCTL3F18(base) _MMIO((base) + 0x3f18) #define ALNUNIT_CLKGATE_DIS REG_BIT(13) +#define VDBOX_CGCTL3F1C(base) _MMIO((base) + 0x3f1c) +#define MFXPIPE_CLKGATE_DIS REG_BIT(3) #endif /* __INTEL_ENGINE_REGS__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 876976025..960e6be20 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -446,7 +446,9 @@ struct intel_engine_cs { unsigned long serial; unsigned long wakeref_serial; + intel_wakeref_t wakeref_track; struct intel_wakeref wakeref; + struct file *default_state; struct { diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 42e09f158..b061a0a0d 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -630,7 +630,7 @@ static void __execlists_schedule_out(struct i915_request * const rq, execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); if (engine->fw_domain && !--engine->fw_active) intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); - intel_gt_pm_put_async(engine->gt); + intel_gt_pm_put_async_untracked(engine->gt); /* * If this is part of a virtual engine, its next request may diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 15fc8e470..21a7e3191 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -245,16 +245,15 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) gen8_ggtt_invalidate(ggtt); list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) { - if (intel_guc_tlb_invalidation_is_available(>->uc.guc)) { + if (intel_guc_tlb_invalidation_is_available(>->uc.guc)) guc_ggtt_ct_invalidate(gt); - } else if (GRAPHICS_VER(i915) >= 12) { + else if (GRAPHICS_VER(i915) >= 12) intel_uncore_write_fw(gt->uncore, GEN12_GUC_TLB_INV_CR, GEN12_GUC_TLB_INV_CR_INVALIDATE); - } else { + else intel_uncore_write_fw(gt->uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE); - } } } @@ -297,7 +296,7 @@ static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt) return intel_gt_is_bind_context_ready(gt); } -static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt) +static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt, intel_wakeref_t *wakeref) { struct intel_context *ce; struct intel_gt *gt = ggtt->vm.gt; @@ -314,7 +313,8 @@ static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt) * would conflict with fs_reclaim trying to allocate memory while * doing rpm_resume(). */ - if (!intel_gt_pm_get_if_awake(gt)) + *wakeref = intel_gt_pm_get_if_awake(gt); + if (!*wakeref) return NULL; intel_engine_pm_get(ce->engine); @@ -322,10 +322,10 @@ static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt) return ce; } -static void gen8_ggtt_bind_put_ce(struct intel_context *ce) +static void gen8_ggtt_bind_put_ce(struct intel_context *ce, intel_wakeref_t wakeref) { intel_engine_pm_put(ce->engine); - intel_gt_pm_put(ce->engine->gt); + intel_gt_pm_put(ce->engine->gt, wakeref); } static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset, @@ -338,12 +338,13 @@ static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset, struct sgt_iter iter; struct i915_request *rq; struct intel_context *ce; + intel_wakeref_t wakeref; u32 *cs; if (!num_entries) return true; - ce = gen8_ggtt_bind_get_ce(ggtt); + ce = gen8_ggtt_bind_get_ce(ggtt, &wakeref); if (!ce) return false; @@ -419,13 +420,13 @@ queue_err_rq: offset += n_ptes; } - gen8_ggtt_bind_put_ce(ce); + gen8_ggtt_bind_put_ce(ce, wakeref); return true; err_rq: i915_request_put(rq); put_ce: - gen8_ggtt_bind_put_ce(ce); + gen8_ggtt_bind_put_ce(ce, wakeref); return false; } diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.h b/drivers/gpu/drm/i915/gt/intel_gsc.h index 7ab3ca0f9..013c64251 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.h +++ b/drivers/gpu/drm/i915/gt/intel_gsc.h @@ -21,8 +21,11 @@ struct mei_aux_device; /** * struct intel_gsc - graphics security controller * - * @gem_obj: scratch memory GSC operations - * @intf : gsc interface + * @intf: gsc interface + * @intf.adev: MEI aux. device for this @intf + * @intf.gem_obj: scratch memory GSC operations + * @intf.irq: IRQ for this device (%-1 for no IRQ) + * @intf.id: this interface's id number/index */ struct intel_gsc { struct intel_gsc_intf { diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index ba1186fc5..6a2c2718b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -451,7 +451,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt) spin_lock_irqsave(&uncore->lock, flags); intel_uncore_posting_read_fw(uncore, - RING_HEAD(RENDER_RING_BASE)); + RING_TAIL(RENDER_RING_BASE)); spin_unlock_irqrestore(&uncore->lock, flags); } } @@ -1024,6 +1024,12 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt, return I915_MAP_WC; } +bool intel_gt_needs_wa_16018031267(struct intel_gt *gt) +{ + /* Wa_16018031267, Wa_16018063123 */ + return IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 55), IP_VER(12, 71)); +} + bool intel_gt_needs_wa_22016122933(struct intel_gt *gt) { return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 970bedf6b..003eb93b8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -87,8 +87,13 @@ static inline bool gt_is_root(struct intel_gt *gt) return !gt->info.id; } +bool intel_gt_needs_wa_16018031267(struct intel_gt *gt); bool intel_gt_needs_wa_22016122933(struct intel_gt *gt); +#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \ + intel_gt_needs_wa_16018031267(engine->gt) && \ + engine->class == COPY_ENGINE_CLASS && engine->instance == 0) + static inline struct intel_gt *uc_to_gt(struct intel_uc *uc) { return container_of(uc, struct intel_gt, uc); @@ -114,6 +119,11 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc) return container_of(gsc, struct intel_gt, gsc); } +static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) +{ + return guc_to_gt(guc)->i915; +} + void intel_gt_common_init_early(struct intel_gt *gt); int intel_root_gt_init_early(struct drm_i915_private *i915); int intel_gt_assign_ggtt(struct intel_gt *gt); @@ -167,6 +177,20 @@ void intel_gt_release_all(struct drm_i915_private *i915); (id__)++) \ for_each_if(((gt__) = (i915__)->gt[(id__)])) +/* Simple iterator over all initialised engines */ +#define for_each_engine(engine__, gt__, id__) \ + for ((id__) = 0; \ + (id__) < I915_NUM_ENGINES; \ + (id__)++) \ + for_each_if ((engine__) = (gt__)->engine[(id__)]) + +/* Iterator over subset of engines selected by mask */ +#define for_each_engine_masked(engine__, gt__, mask__, tmp__) \ + for ((tmp__) = (mask__) & (gt__)->info.engine_mask; \ + (tmp__) ? \ + ((engine__) = (gt__)->engine[__mask_next_bit(tmp__)]), 1 : \ + 0;) + void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c new file mode 100644 index 000000000..044219c59 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2024 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_gt.h" +#include "intel_gt_ccs_mode.h" +#include "intel_gt_regs.h" + +void intel_gt_apply_ccs_mode(struct intel_gt *gt) +{ + int cslice; + u32 mode = 0; + int first_ccs = __ffs(CCS_MASK(gt)); + + if (!IS_DG2(gt->i915)) + return; + + /* Build the value for the fixed CCS load balancing */ + for (cslice = 0; cslice < I915_MAX_CCS; cslice++) { + if (CCS_MASK(gt) & BIT(cslice)) + /* + * If available, assign the cslice + * to the first available engine... + */ + mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs); + + else + /* + * ... otherwise, mark the cslice as + * unavailable if no CCS dispatches here + */ + mode |= XEHP_CCS_MODE_CSLICE(cslice, + XEHP_CCS_MODE_CSLICE_MASK); + } + + intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h new file mode 100644 index 000000000..9e5549cae --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef __INTEL_GT_CCS_MODE_H__ +#define __INTEL_GT_CCS_MODE_H__ + +struct intel_gt; + +void intel_gt_apply_ccs_mode(struct intel_gt *gt); + +#endif /* __INTEL_GT_CCS_MODE_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c index 8f9b874fd..3aa1d014c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c @@ -6,8 +6,8 @@ #include -#include "i915_drv.h" /* for_each_engine! */ #include "intel_engine.h" +#include "intel_gt.h" #include "intel_gt_debugfs.h" #include "intel_gt_engines_debugfs.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 34913912d..e253750a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -388,8 +388,7 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags) * registers. This wakeref will be released in the unlock * routine. * - * This is expected to become a formally documented/numbered - * workaround soon. + * Wa_22018931422 */ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_GT); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index f5899d503..220ac4f92 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -28,19 +28,20 @@ static void user_forcewake(struct intel_gt *gt, bool suspend) { int count = atomic_read(>->user_wakeref); + intel_wakeref_t wakeref; /* Inside suspend/resume so single threaded, no races to worry about. */ if (likely(!count)) return; - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); if (suspend) { GEM_BUG_ON(count > atomic_read(>->wakeref.count)); atomic_sub(count, >->wakeref.count); } else { atomic_add(count, >->wakeref.count); } - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); } static void runtime_begin(struct intel_gt *gt) @@ -138,7 +139,7 @@ void intel_gt_pm_init_early(struct intel_gt *gt) * runtime_pm is per-device rather than per-tile, so this is still the * correct structure. */ - intel_wakeref_init(>->wakeref, gt->i915, &wf_ops); + intel_wakeref_init(>->wakeref, gt->i915, &wf_ops, "GT"); seqcount_mutex_init(>->stats.lock, >->wakeref.mutex); } @@ -167,7 +168,7 @@ static void gt_sanitize(struct intel_gt *gt, bool force) enum intel_engine_id id; intel_wakeref_t wakeref; - GT_TRACE(gt, "force:%s", str_yes_no(force)); + GT_TRACE(gt, "force:%s\n", str_yes_no(force)); /* Use a raw wakeref to avoid calling intel_display_power_get early */ wakeref = intel_runtime_pm_get(gt->uncore->rpm); @@ -236,6 +237,7 @@ int intel_gt_resume(struct intel_gt *gt) { struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err; err = intel_gt_has_unrecoverable_error(gt); @@ -252,7 +254,7 @@ int intel_gt_resume(struct intel_gt *gt) */ gt_sanitize(gt, true); - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); intel_rc6_sanitize(>->rc6); @@ -295,7 +297,7 @@ int intel_gt_resume(struct intel_gt *gt) out_fw: intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); intel_gt_bind_context_set_ready(gt); return err; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index b1eeb5b33..911fd0160 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -16,19 +16,28 @@ static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt) return intel_wakeref_is_active(>->wakeref); } -static inline void intel_gt_pm_get(struct intel_gt *gt) +static inline void intel_gt_pm_get_untracked(struct intel_gt *gt) { intel_wakeref_get(>->wakeref); } +static inline intel_wakeref_t intel_gt_pm_get(struct intel_gt *gt) +{ + intel_gt_pm_get_untracked(gt); + return intel_wakeref_track(>->wakeref); +} + static inline void __intel_gt_pm_get(struct intel_gt *gt) { __intel_wakeref_get(>->wakeref); } -static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt) +static inline intel_wakeref_t intel_gt_pm_get_if_awake(struct intel_gt *gt) { - return intel_wakeref_get_if_active(>->wakeref); + if (!intel_wakeref_get_if_active(>->wakeref)) + return 0; + + return intel_wakeref_track(>->wakeref); } static inline void intel_gt_pm_might_get(struct intel_gt *gt) @@ -36,12 +45,18 @@ static inline void intel_gt_pm_might_get(struct intel_gt *gt) intel_wakeref_might_get(>->wakeref); } -static inline void intel_gt_pm_put(struct intel_gt *gt) +static inline void intel_gt_pm_put_untracked(struct intel_gt *gt) { intel_wakeref_put(>->wakeref); } -static inline void intel_gt_pm_put_async(struct intel_gt *gt) +static inline void intel_gt_pm_put(struct intel_gt *gt, intel_wakeref_t handle) +{ + intel_wakeref_untrack(>->wakeref, handle); + intel_gt_pm_put_untracked(gt); +} + +static inline void intel_gt_pm_put_async_untracked(struct intel_gt *gt) { intel_wakeref_put_async(>->wakeref); } @@ -51,9 +66,14 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt) intel_wakeref_might_put(>->wakeref); } -#define with_intel_gt_pm(gt, tmp) \ - for (tmp = 1, intel_gt_pm_get(gt); tmp; \ - intel_gt_pm_put(gt), tmp = 0) +static inline void intel_gt_pm_put_async(struct intel_gt *gt, intel_wakeref_t handle) +{ + intel_wakeref_untrack(>->wakeref, handle); + intel_gt_pm_put_async_untracked(gt); +} + +#define with_intel_gt_pm(gt, wf) \ + for (wf = intel_gt_pm_get(gt); wf; intel_gt_pm_put(gt, wf), wf = 0) /** * with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent @@ -64,7 +84,7 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt) * @wf: pointer to a temporary wakeref. */ #define with_intel_gt_pm_if_awake(gt, wf) \ - for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0) + for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt, wf), wf = 0) static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt) { diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index f900cc68d..7114c116e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -27,7 +27,7 @@ void intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt) { atomic_inc(>->user_wakeref); - intel_gt_pm_get(gt); + intel_gt_pm_get_untracked(gt); if (GRAPHICS_VER(gt->i915) >= 6) intel_uncore_forcewake_user_get(gt->uncore); } @@ -36,7 +36,7 @@ void intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt) { if (GRAPHICS_VER(gt->i915) >= 6) intel_uncore_forcewake_user_put(gt->uncore); - intel_gt_pm_put(gt); + intel_gt_pm_put_untracked(gt); atomic_dec(>->user_wakeref); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index eecd0a87a..743fe3566 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -469,6 +469,9 @@ #define XEHP_PSS_MODE2 MCR_REG(0x703c) #define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5) +#define XEHP_PSS_CHICKEN MCR_REG(0x7044) +#define FD_END_COLLECT REG_BIT(5) + #define GEN7_SC_INSTDONE _MMIO(0x7100) #define GEN12_SC_INSTDONE_EXTRA _MMIO(0x7104) #define GEN12_SC_INSTDONE_EXTRA2 _MMIO(0x7108) @@ -537,6 +540,9 @@ #define XEHP_SQCM MCR_REG(0x8724) #define EN_32B_ACCESS REG_BIT(30) +#define MTL_GSCPSMI_BASEADDR_LSB _MMIO(0x880c) +#define MTL_GSCPSMI_BASEADDR_MSB _MMIO(0x8810) + #define HSW_IDICR _MMIO(0x9008) #define IDIHASHMSK(x) (((x) & 0x3f) << 16) @@ -1471,8 +1477,14 @@ #define ECOBITS_PPGTT_CACHE4B (0 << 8) #define GEN12_RCU_MODE _MMIO(0x14800) +#define XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1) #define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0) +#define XEHP_CCS_MODE _MMIO(0x14804) +#define XEHP_CCS_MODE_CSLICE_MASK REG_GENMASK(2, 0) /* CCS0-3 + rsvd */ +#define XEHP_CCS_MODE_CSLICE_WIDTH ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1) +#define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH)) + #define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168) #define CHV_FGT_DISABLE_SS0 (1 << 10) #define CHV_FGT_DISABLE_SS1 (1 << 11) diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 4fbed27ef..86f73fe55 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -63,6 +63,9 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz) if (!IS_ERR(obj)) { obj->base.resv = i915_vm_resv_get(vm); obj->shares_resv_from = vm; + + if (vm->fpriv) + i915_drm_client_add_object(vm->fpriv->client, obj); } return obj; @@ -84,6 +87,9 @@ struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz) if (!IS_ERR(obj)) { obj->base.resv = i915_vm_resv_get(vm); obj->shares_resv_from = vm; + + if (vm->fpriv) + i915_drm_client_add_object(vm->fpriv->client, obj); } return obj; @@ -95,6 +101,16 @@ int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj) void *vaddr; type = intel_gt_coherent_map_type(vm->gt, obj, true); + /* + * FIXME: It is suspected that some Address Translation Service (ATS) + * issue on IOMMU is causing CAT errors to occur on some MTL workloads. + * Applying a write barrier to the ppgtt set entry functions appeared + * to have no effect, so we must temporarily use I915_MAP_WC here on + * MTL until a proper ATS solution is found. + */ + if (IS_METEORLAKE(vm->i915)) + type = I915_MAP_WC; + vaddr = i915_gem_object_pin_map_unlocked(obj, type); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); @@ -109,6 +125,16 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object void *vaddr; type = intel_gt_coherent_map_type(vm->gt, obj, true); + /* + * FIXME: It is suspected that some Address Translation Service (ATS) + * issue on IOMMU is causing CAT errors to occur on some MTL workloads. + * Applying a write barrier to the ppgtt set entry functions appeared + * to have no effect, so we must temporarily use I915_MAP_WC here on + * MTL until a proper ATS solution is found. + */ + if (IS_METEORLAKE(vm->i915)) + type = I915_MAP_WC; + vaddr = i915_gem_object_pin_map(obj, type); if (IS_ERR(vaddr)) return PTR_ERR(vaddr); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index b471edac2..6b85222ee 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -249,8 +249,13 @@ struct i915_address_space { struct work_struct release_work; struct drm_mm mm; + struct { + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + } rsvd; struct intel_gt *gt; struct drm_i915_private *i915; + struct drm_i915_file_private *fpriv; struct device *dma; u64 total; /* size addr space maps (ex. 2GB for ggtt) */ u64 reserved; /* size addr space reserved */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index eaf66d903..7c367ba8d 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -828,6 +828,18 @@ lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) return 0; } +static void +lrc_setup_bb_per_ctx(u32 *regs, + const struct intel_engine_cs *engine, + u32 ctx_bb_ggtt_addr) +{ + GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); + regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = + ctx_bb_ggtt_addr | + PER_CTX_BB_FORCE | + PER_CTX_BB_VALID; +} + static void lrc_setup_indirect_ctx(u32 *regs, const struct intel_engine_cs *engine, @@ -1020,7 +1032,13 @@ static u32 context_wa_bb_offset(const struct intel_context *ce) return PAGE_SIZE * ce->wa_bb_page; } -static u32 *context_indirect_bb(const struct intel_context *ce) +/* + * per_ctx below determines which WABB section is used. + * When true, the function returns the location of the + * PER_CTX_BB. When false, the function returns the + * location of the INDIRECT_CTX. + */ +static u32 *context_wabb(const struct intel_context *ce, bool per_ctx) { void *ptr; @@ -1029,6 +1047,7 @@ static u32 *context_indirect_bb(const struct intel_context *ce) ptr = ce->lrc_reg_state; ptr -= LRC_STATE_OFFSET; /* back to start of context image */ ptr += context_wa_bb_offset(ce); + ptr += per_ctx ? PAGE_SIZE : 0; return ptr; } @@ -1105,7 +1124,8 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) if (GRAPHICS_VER(engine->i915) >= 12) { ce->wa_bb_page = context_size / PAGE_SIZE; - context_size += PAGE_SIZE; + /* INDIRECT_CTX and PER_CTX_BB need separate pages. */ + context_size += PAGE_SIZE * 2; } if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) { @@ -1407,12 +1427,85 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) return gen12_emit_aux_table_inv(ce->engine, cs); } +static u32 *xehp_emit_fastcolor_blt_wabb(const struct intel_context *ce, u32 *cs) +{ + struct intel_gt *gt = ce->engine->gt; + int mocs = gt->mocs.uc_index << 1; + + /** + * Wa_16018031267 / Wa_16018063123 requires that SW forces the + * main copy engine arbitration into round robin mode. We + * additionally need to submit the following WABB blt command + * to produce 4 subblits with each subblit generating 0 byte + * write requests as WABB: + * + * XY_FASTCOLOR_BLT + * BG0 -> 5100000E + * BG1 -> 0000003F (Dest pitch) + * BG2 -> 00000000 (X1, Y1) = (0, 0) + * BG3 -> 00040001 (X2, Y2) = (1, 4) + * BG4 -> scratch + * BG5 -> scratch + * BG6-12 -> 00000000 + * BG13 -> 20004004 (Surf. Width= 2,Surf. Height = 5 ) + * BG14 -> 00000010 (Qpitch = 4) + * BG15 -> 00000000 + */ + *cs++ = XY_FAST_COLOR_BLT_CMD | (16 - 2); + *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | 0x3f; + *cs++ = 0; + *cs++ = 4 << 16 | 1; + *cs++ = lower_32_bits(i915_vma_offset(ce->vm->rsvd.vma)); + *cs++ = upper_32_bits(i915_vma_offset(ce->vm->rsvd.vma)); + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0x20004004; + *cs++ = 0x10; + *cs++ = 0; + + return cs; +} + +static u32 * +xehp_emit_per_ctx_bb(const struct intel_context *ce, u32 *cs) +{ + /* Wa_16018031267, Wa_16018063123 */ + if (NEEDS_FASTCOLOR_BLT_WABB(ce->engine)) + cs = xehp_emit_fastcolor_blt_wabb(ce, cs); + + return cs; +} + +static void +setup_per_ctx_bb(const struct intel_context *ce, + const struct intel_engine_cs *engine, + u32 *(*emit)(const struct intel_context *, u32 *)) +{ + /* Place PER_CTX_BB on next page after INDIRECT_CTX */ + u32 * const start = context_wabb(ce, true); + u32 *cs; + + cs = emit(ce, start); + + /* PER_CTX_BB must manually terminate */ + *cs++ = MI_BATCH_BUFFER_END; + + GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs)); + lrc_setup_bb_per_ctx(ce->lrc_reg_state, engine, + lrc_indirect_bb(ce) + PAGE_SIZE); +} + static void setup_indirect_ctx_bb(const struct intel_context *ce, const struct intel_engine_cs *engine, u32 *(*emit)(const struct intel_context *, u32 *)) { - u32 * const start = context_indirect_bb(ce); + u32 * const start = context_wabb(ce, false); u32 *cs; cs = emit(ce, start); @@ -1511,6 +1604,7 @@ u32 lrc_update_regs(const struct intel_context *ce, /* Mutually exclusive wrt to global indirect bb */ GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size); setup_indirect_ctx_bb(ce, engine, fn); + setup_per_ctx_bb(ce, engine, xehp_emit_per_ctx_bb); } return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE; diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index f602895f6..6a3246240 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -849,13 +849,12 @@ void intel_sseu_print_topology(struct drm_i915_private *i915, const struct sseu_dev_info *sseu, struct drm_printer *p) { - if (sseu->max_slices == 0) { + if (sseu->max_slices == 0) drm_printf(p, "Unavailable\n"); - } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) sseu_print_xehp_topology(sseu, p); - } else { + else sseu_print_hsw_topology(sseu, p); - } } void intel_sseu_print_ss_info(const char *type, diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 192ac0e59..59816dd6f 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -10,6 +10,7 @@ #include "intel_engine_regs.h" #include "intel_gpu_commands.h" #include "intel_gt.h" +#include "intel_gt_ccs_mode.h" #include "intel_gt_mcr.h" #include "intel_gt_print.h" #include "intel_gt_regs.h" @@ -51,7 +52,8 @@ * registers belonging to BCS, VCS or VECS should be implemented in * xcs_engine_wa_init(). Workarounds for registers not belonging to a specific * engine's MMIO range but that are part of of the common RCS/CCS reset domain - * should be implemented in general_render_compute_wa_init(). + * should be implemented in general_render_compute_wa_init(). The settings + * about the CCS load balancing should be added in ccs_engine_wa_mode(). * * - GT workarounds: the list of these WAs is applied whenever these registers * revert to their default values: on GPU reset, suspend/resume [1]_, etc. @@ -777,6 +779,9 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, /* Wa_18019271663:dg2 */ wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); + + /* Wa_14019877138:dg2 */ + wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT); } static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine, @@ -786,8 +791,13 @@ static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine, dg2_ctx_gt_tuning_init(engine, wal); - if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || - IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) + /* + * Due to Wa_16014892111, the DRAW_WATERMARK tuning must be done in + * gen12_emit_indirect_ctx_rcs() rather than here on some early + * steppings. + */ + if (!(IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))) wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false); } @@ -905,7 +915,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) goto done; - if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) + if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) xelpg_ctx_workarounds_init(engine, wal); else if (IS_PONTEVECCHIO(i915)) ; /* noop; none at this time */ @@ -1640,7 +1650,8 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - /* Wa_14018778641 / Wa_18018781329 */ + /* Wa_14018575942 / Wa_18018781329 */ + wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB); /* Wa_22016670082 */ @@ -1662,9 +1673,23 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) debug_dump_steering(gt); } +static void +wa_16021867713(struct intel_gt *gt, struct i915_wa_list *wal) +{ + struct intel_engine_cs *engine; + int id; + + for_each_engine(engine, gt, id) + if (engine->class == VIDEO_DECODE_CLASS) + wa_write_or(wal, VDBOX_CGCTL3F1C(engine->mmio_base), + MFXPIPE_CLKGATE_DIS); +} + static void xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { + wa_16021867713(gt, wal); + /* * Wa_14018778641 * Wa_18018781329 @@ -1674,6 +1699,9 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) */ wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB); + /* Wa_22016670082 */ + wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE); + debug_dump_steering(gt); } @@ -1690,7 +1718,7 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) */ static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) { - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) { + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) { wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); } @@ -1723,7 +1751,7 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) return; } - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) xelpg_gt_workarounds_init(gt, wal); else if (IS_PONTEVECCHIO(i915)) pvc_gt_workarounds_init(gt, wal); @@ -2196,7 +2224,7 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) if (engine->gt->type == GT_MEDIA) ; /* none yet */ - else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) + else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74))) xelpg_whitelist_build(engine); else if (IS_PONTEVECCHIO(i915)) pvc_whitelist_build(engine); @@ -2340,14 +2368,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) 0, true); } - if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) { - /* Wa_22014600077:dg2 */ - wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), - 0 /* Wa_14012342262 write-only reg, so skip verification */, - true); - } - if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* @@ -2782,6 +2802,11 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) RING_SEMA_WAIT_POLL(engine->mmio_base), 1); } + /* Wa_16018031267, Wa_16018063123 */ + if (NEEDS_FASTCOLOR_BLT_WABB(engine)) + wa_masked_field_set(wal, ECOSKPD(engine->mmio_base), + XEHP_BLITTER_SCHEDULING_MODE_MASK, + XEHP_BLITTER_ROUND_ROBIN_MODE); } static void @@ -2811,7 +2836,7 @@ add_render_compute_tuning_settings(struct intel_gt *gt, { struct drm_i915_private *i915 = gt->i915; - if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915)) wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); /* @@ -2827,6 +2852,28 @@ add_render_compute_tuning_settings(struct intel_gt *gt, wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC); } +static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + struct intel_gt *gt = engine->gt; + + if (!IS_DG2(gt->i915)) + return; + + /* + * Wa_14019159160: This workaround, along with others, leads to + * significant challenges in utilizing load balancing among the + * CCS slices. Consequently, an architectural decision has been + * made to completely disable automatic CCS load balancing. + */ + wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE); + + /* + * After having disabled automatic load balancing we need to + * assign all slices to a single CCS. We will call it CCS mode 1 + */ + intel_gt_apply_ccs_mode(gt); +} + /* * The workarounds in this function apply to shared registers in * the general render reset domain that aren't tied to a @@ -2864,7 +2911,8 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li } if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || - IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) || + IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74))) /* Wa_14017856879 */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH); @@ -2915,6 +2963,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li * Wa_22015475538:dg2 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + + /* Wa_18028616096 */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3); } if (IS_DG2_G11(i915)) { @@ -2943,11 +2994,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li true); } - if (IS_DG2_G10(i915) || IS_DG2_G12(i915)) { - /* Wa_18028616096 */ - wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3); - } - if (IS_XEHPSDV(i915)) { /* Wa_1409954639 */ wa_mcr_masked_en(wal, @@ -2978,8 +3024,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal * to a single RCS/CCS engine's workaround list since * they're reset as part of the general render domain reset. */ - if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) + if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) { general_render_compute_wa_init(engine, wal); + ccs_engine_wa_mode(engine, wal); + } if (engine->class == COMPUTE_CLASS) ccs_engine_wa_init(engine, wal); diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 86cecf7a1..5ffa5e30f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -21,20 +21,22 @@ static int cmp_u32(const void *A, const void *B) return *a - *b; } -static void perf_begin(struct intel_gt *gt) +static intel_wakeref_t perf_begin(struct intel_gt *gt) { - intel_gt_pm_get(gt); + intel_wakeref_t wakeref = intel_gt_pm_get(gt); /* Boost gpufreq to max [waitboost] and keep it fixed */ atomic_inc(>->rps.num_waiters); queue_work(gt->i915->unordered_wq, >->rps.work); flush_work(>->rps.work); + + return wakeref; } -static int perf_end(struct intel_gt *gt) +static int perf_end(struct intel_gt *gt, intel_wakeref_t wakeref) { atomic_dec(>->rps.num_waiters); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); return igt_flush_test(gt->i915); } @@ -133,12 +135,13 @@ static int perf_mi_bb_start(void *arg) struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = 0; if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; - perf_begin(gt); + wakeref = perf_begin(gt); for_each_engine(engine, gt, id) { struct intel_context *ce = engine->kernel_context; struct i915_vma *batch; @@ -207,7 +210,7 @@ out: pr_info("%s: MI_BB_START cycles: %u\n", engine->name, trifilter(cycles)); } - if (perf_end(gt)) + if (perf_end(gt, wakeref)) err = -EIO; return err; @@ -260,12 +263,13 @@ static int perf_mi_noop(void *arg) struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = 0; if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; - perf_begin(gt); + wakeref = perf_begin(gt); for_each_engine(engine, gt, id) { struct intel_context *ce = engine->kernel_context; struct i915_vma *base, *nop; @@ -364,7 +368,7 @@ out: pr_info("%s: 16K MI_NOOP cycles: %u\n", engine->name, trifilter(cycles)); } - if (perf_end(gt)) + if (perf_end(gt, wakeref)) err = -EIO; return err; diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index 273d440a5..bc441ce7b 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -84,7 +84,7 @@ static struct pulse *pulse_create(void) static void pulse_unlock_wait(struct pulse *p) { - i915_active_unlock_wait(&p->active); + wait_var_event_timeout(&p->active, i915_active_is_idle(&p->active), HZ); } static int __live_idle_pulse(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index 097124170..33351deee 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -81,6 +81,7 @@ static int live_gt_clocks(void *arg) struct intel_gt *gt = arg; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = 0; if (!gt->clock_frequency) { /* unknown */ @@ -91,7 +92,7 @@ static int live_gt_clocks(void *arg) if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */ return 0; - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); for_each_engine(engine, gt, id) { @@ -128,7 +129,7 @@ static int live_gt_clocks(void *arg) } intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 5f826b6dc..e17b8777d 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1555,7 +1555,7 @@ static int live_lrc_isolation(void *arg) return err; } -static int indirect_ctx_submit_req(struct intel_context *ce) +static int wabb_ctx_submit_req(struct intel_context *ce) { struct i915_request *rq; int err = 0; @@ -1579,7 +1579,8 @@ static int indirect_ctx_submit_req(struct intel_context *ce) #define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32)) static u32 * -emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) +emit_wabb_ctx_canary(const struct intel_context *ce, + u32 *cs, bool per_ctx) { *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT | @@ -1587,26 +1588,43 @@ emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) *cs++ = i915_mmio_reg_offset(RING_START(0)); *cs++ = i915_ggtt_offset(ce->state) + context_wa_bb_offset(ce) + - CTX_BB_CANARY_OFFSET; + CTX_BB_CANARY_OFFSET + + (per_ctx ? PAGE_SIZE : 0); *cs++ = 0; return cs; } +static u32 * +emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs) +{ + return emit_wabb_ctx_canary(ce, cs, false); +} + +static u32 * +emit_per_ctx_bb_canary(const struct intel_context *ce, u32 *cs) +{ + return emit_wabb_ctx_canary(ce, cs, true); +} + static void -indirect_ctx_bb_setup(struct intel_context *ce) +wabb_ctx_setup(struct intel_context *ce, bool per_ctx) { - u32 *cs = context_indirect_bb(ce); + u32 *cs = context_wabb(ce, per_ctx); cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d; - setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); + if (per_ctx) + setup_per_ctx_bb(ce, ce->engine, emit_per_ctx_bb_canary); + else + setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary); } -static bool check_ring_start(struct intel_context *ce) +static bool check_ring_start(struct intel_context *ce, bool per_ctx) { const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) - - LRC_STATE_OFFSET + context_wa_bb_offset(ce); + LRC_STATE_OFFSET + context_wa_bb_offset(ce) + + (per_ctx ? PAGE_SIZE : 0); if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START]) return true; @@ -1618,21 +1636,21 @@ static bool check_ring_start(struct intel_context *ce) return false; } -static int indirect_ctx_bb_check(struct intel_context *ce) +static int wabb_ctx_check(struct intel_context *ce, bool per_ctx) { int err; - err = indirect_ctx_submit_req(ce); + err = wabb_ctx_submit_req(ce); if (err) return err; - if (!check_ring_start(ce)) + if (!check_ring_start(ce, per_ctx)) return -EINVAL; return 0; } -static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) +static int __lrc_wabb_ctx(struct intel_engine_cs *engine, bool per_ctx) { struct intel_context *a, *b; int err; @@ -1667,14 +1685,14 @@ static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine) * As ring start is restored apriori of starting the indirect ctx bb and * as it will be different for each context, it fits to this purpose. */ - indirect_ctx_bb_setup(a); - indirect_ctx_bb_setup(b); + wabb_ctx_setup(a, per_ctx); + wabb_ctx_setup(b, per_ctx); - err = indirect_ctx_bb_check(a); + err = wabb_ctx_check(a, per_ctx); if (err) goto unpin_b; - err = indirect_ctx_bb_check(b); + err = wabb_ctx_check(b, per_ctx); unpin_b: intel_context_unpin(b); @@ -1688,7 +1706,7 @@ put_a: return err; } -static int live_lrc_indirect_ctx_bb(void *arg) +static int lrc_wabb_ctx(void *arg, bool per_ctx) { struct intel_gt *gt = arg; struct intel_engine_cs *engine; @@ -1697,7 +1715,7 @@ static int live_lrc_indirect_ctx_bb(void *arg) for_each_engine(engine, gt, id) { intel_engine_pm_get(engine); - err = __live_lrc_indirect_ctx_bb(engine); + err = __lrc_wabb_ctx(engine, per_ctx); intel_engine_pm_put(engine); if (igt_flush_test(gt->i915)) @@ -1710,6 +1728,16 @@ static int live_lrc_indirect_ctx_bb(void *arg) return err; } +static int live_lrc_indirect_ctx_bb(void *arg) +{ + return lrc_wabb_ctx(arg, false); +} + +static int live_lrc_per_ctx_bb(void *arg) +{ + return lrc_wabb_ctx(arg, true); +} + static void garbage_reset(struct intel_engine_cs *engine, struct i915_request *rq) { @@ -1947,6 +1975,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915) SUBTEST(live_lrc_garbage), SUBTEST(live_pphwsp_runtime), SUBTEST(live_lrc_indirect_ctx_bb), + SUBTEST(live_lrc_per_ctx_bb), }; if (!HAS_LOGICAL_RING_CONTEXTS(i915)) diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index 79aa6ac66..f40de408c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -261,11 +261,12 @@ static int igt_atomic_reset(void *arg) { struct intel_gt *gt = arg; const typeof(*igt_atomic_phases) *p; + intel_wakeref_t wakeref; int err = 0; /* Check that the resets are usable from atomic context */ - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); igt_global_reset_lock(gt); /* Flush any requests before we get started and check basics */ @@ -296,7 +297,7 @@ static int igt_atomic_reset(void *arg) unlock: igt_global_reset_unlock(gt); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); return err; } @@ -307,6 +308,7 @@ static int igt_atomic_engine_reset(void *arg) const typeof(*igt_atomic_phases) *p; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; int err = 0; /* Check that the resets are usable from atomic context */ @@ -317,7 +319,7 @@ static int igt_atomic_engine_reset(void *arg) if (intel_uc_uses_guc_submission(>->uc)) return 0; - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); igt_global_reset_lock(gt); /* Flush any requests before we get started and check basics */ @@ -365,7 +367,7 @@ static int igt_atomic_engine_reset(void *arg) out_unlock: igt_global_reset_unlock(gt); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); return err; } diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index fb30f733b..dcef8d498 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -224,6 +224,7 @@ int live_rps_clock_interval(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; struct igt_spinner spin; + intel_wakeref_t wakeref; int err = 0; if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6) @@ -236,7 +237,7 @@ int live_rps_clock_interval(void *arg) saved_work = rps->work.func; rps->work.func = dummy_rps_work; - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); intel_rps_disable(>->rps); intel_gt_check_clock_frequency(gt); @@ -355,7 +356,7 @@ int live_rps_clock_interval(void *arg) } intel_rps_enable(>->rps); - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); igt_spinner_fini(&spin); @@ -376,6 +377,7 @@ int live_rps_control(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; struct igt_spinner spin; + intel_wakeref_t wakeref; int err = 0; /* @@ -398,7 +400,7 @@ int live_rps_control(void *arg) saved_work = rps->work.func; rps->work.func = dummy_rps_work; - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { struct i915_request *rq; ktime_t min_dt, max_dt; @@ -488,7 +490,7 @@ int live_rps_control(void *arg) break; } } - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); igt_spinner_fini(&spin); @@ -1023,6 +1025,7 @@ int live_rps_interrupt(void *arg) struct intel_engine_cs *engine; enum intel_engine_id id; struct igt_spinner spin; + intel_wakeref_t wakeref; u32 pm_events; int err = 0; @@ -1033,9 +1036,9 @@ int live_rps_interrupt(void *arg) if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6) return 0; - intel_gt_pm_get(gt); - pm_events = rps->pm_events; - intel_gt_pm_put(gt); + pm_events = 0; + with_intel_gt_pm(gt, wakeref) + pm_events = rps->pm_events; if (!pm_events) { pr_err("No RPS PM events registered, but RPS is enabled?\n"); return -ENODEV; diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c index 952c8d52d..302d05402 100644 --- a/drivers/gpu/drm/i915/gt/selftest_slpc.c +++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c @@ -266,6 +266,7 @@ static int run_test(struct intel_gt *gt, int test_type) struct intel_rps *rps = >->rps; struct intel_engine_cs *engine; enum intel_engine_id id; + intel_wakeref_t wakeref; struct igt_spinner spin; u32 slpc_min_freq, slpc_max_freq; int err = 0; @@ -311,7 +312,7 @@ static int run_test(struct intel_gt *gt, int test_type) } intel_gt_pm_wait_for_idle(gt); - intel_gt_pm_get(gt); + wakeref = intel_gt_pm_get(gt); for_each_engine(engine, gt, id) { struct i915_request *rq; u32 max_act_freq; @@ -397,7 +398,7 @@ static int run_test(struct intel_gt *gt, int test_type) if (igt_flush_test(gt->i915)) err = -EIO; - intel_gt_pm_put(gt); + intel_gt_pm_put(gt, wakeref); igt_spinner_fini(&spin); intel_gt_pm_wait_for_idle(gt); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c index 5f138de3c..40817ebcc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c @@ -322,6 +322,7 @@ static int i915_gsc_proxy_component_bind(struct device *i915_kdev, gsc->proxy.component = data; gsc->proxy.component->mei_dev = mei_kdev; mutex_unlock(&gsc->proxy.mutex); + gt_dbg(gt, "GSC proxy mei component bound\n"); return 0; } @@ -342,6 +343,7 @@ static void i915_gsc_proxy_component_unbind(struct device *i915_kdev, with_intel_runtime_pm(&i915->runtime_pm, wakeref) intel_uncore_rmw(gt->uncore, HECI_H_CSR(MTL_GSC_HECI2_BASE), HECI_H_CSR_IE | HECI_H_CSR_RST, 0); + gt_dbg(gt, "GSC proxy mei component unbound\n"); } static const struct component_ops i915_gsc_proxy_component_ops = { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 3f3df1166..2b450c43b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -330,7 +330,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) static u32 guc_ctl_devid(struct intel_guc *guc) { - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); return (INTEL_DEVID(i915) << 16) | INTEL_REVID(i915); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 2b6dfe62c..813cc888e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -105,61 +105,67 @@ struct intel_guc { */ struct { /** - * @lock: protects everything in submission_state, - * ce->guc_id.id, and ce->guc_id.ref when transitioning in and - * out of zero + * @submission_state.lock: protects everything in + * submission_state, ce->guc_id.id, and ce->guc_id.ref + * when transitioning in and out of zero */ spinlock_t lock; /** - * @guc_ids: used to allocate new guc_ids, single-lrc + * @submission_state.guc_ids: used to allocate new + * guc_ids, single-lrc */ struct ida guc_ids; /** - * @num_guc_ids: Number of guc_ids, selftest feature to be able - * to reduce this number while testing. + * @submission_state.num_guc_ids: Number of guc_ids, selftest + * feature to be able to reduce this number while testing. */ int num_guc_ids; /** - * @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc + * @submission_state.guc_ids_bitmap: used to allocate + * new guc_ids, multi-lrc */ unsigned long *guc_ids_bitmap; /** - * @guc_id_list: list of intel_context with valid guc_ids but no - * refs + * @submission_state.guc_id_list: list of intel_context + * with valid guc_ids but no refs */ struct list_head guc_id_list; /** - * @guc_ids_in_use: Number single-lrc guc_ids in use + * @submission_state.guc_ids_in_use: Number single-lrc + * guc_ids in use */ unsigned int guc_ids_in_use; /** - * @destroyed_contexts: list of contexts waiting to be destroyed - * (deregistered with the GuC) + * @submission_state.destroyed_contexts: list of contexts + * waiting to be destroyed (deregistered with the GuC) */ struct list_head destroyed_contexts; /** - * @destroyed_worker: worker to deregister contexts, need as we - * need to take a GT PM reference and can't from destroy - * function as it might be in an atomic context (no sleeping) + * @submission_state.destroyed_worker: worker to deregister + * contexts, need as we need to take a GT PM reference and + * can't from destroy function as it might be in an atomic + * context (no sleeping) */ struct work_struct destroyed_worker; /** - * @reset_fail_worker: worker to trigger a GT reset after an - * engine reset fails + * @submission_state.reset_fail_worker: worker to trigger + * a GT reset after an engine reset fails */ struct work_struct reset_fail_worker; /** - * @reset_fail_mask: mask of engines that failed to reset + * @submission_state.reset_fail_mask: mask of engines that + * failed to reset */ intel_engine_mask_t reset_fail_mask; /** - * @sched_disable_delay_ms: schedule disable delay, in ms, for - * contexts + * @submission_state.sched_disable_delay_ms: schedule + * disable delay, in ms, for contexts */ unsigned int sched_disable_delay_ms; /** - * @sched_disable_gucid_threshold: threshold of min remaining available - * guc_ids before we start bypassing the schedule disable delay + * @submission_state.sched_disable_gucid_threshold: + * threshold of min remaining available guc_ids before + * we start bypassing the schedule disable delay */ unsigned int sched_disable_gucid_threshold; } submission_state; @@ -243,37 +249,40 @@ struct intel_guc { */ struct { /** - * @lock: Lock protecting the below fields and the engine stats. + * @timestamp.lock: Lock protecting the below fields and + * the engine stats. */ spinlock_t lock; /** - * @gt_stamp: 64 bit extended value of the GT timestamp. + * @timestamp.gt_stamp: 64-bit extended value of the GT + * timestamp. */ u64 gt_stamp; /** - * @ping_delay: Period for polling the GT timestamp for - * overflow. + * @timestamp.ping_delay: Period for polling the GT + * timestamp for overflow. */ unsigned long ping_delay; /** - * @work: Periodic work to adjust GT timestamp, engine and - * context usage for overflows. + * @timestamp.work: Periodic work to adjust GT timestamp, + * engine and context usage for overflows. */ struct delayed_work work; /** - * @shift: Right shift value for the gpm timestamp + * @timestamp.shift: Right shift value for the gpm timestamp */ u32 shift; /** - * @last_stat_jiffies: jiffies at last actual stats collection time - * We use this timestamp to ensure we don't oversample the - * stats because runtime power management events can trigger - * stats collection at much higher rates than required. + * @timestamp.last_stat_jiffies: jiffies at last actual + * stats collection time. We use this timestamp to ensure + * we don't oversample the stats because runtime power + * management events can trigger stats collection at much + * higher rates than required. */ unsigned long last_stat_jiffies; } timestamp; @@ -297,6 +306,10 @@ struct intel_guc { * @number_guc_id_stolen: The number of guc_ids that have been stolen */ int number_guc_id_stolen; + /** + * @fast_response_selftest: Backdoor to CT handler for fast response selftest + */ + u32 fast_response_selftest; #endif }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index a4da0208c..a1cd40d80 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -355,7 +355,7 @@ guc_capture_alloc_steered_lists(struct intel_guc *guc, static const struct __guc_mmio_reg_descr_group * guc_capture_get_device_reglist(struct intel_guc *guc) { - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); const struct __guc_mmio_reg_descr_group *lists; if (GRAPHICS_VER(i915) >= 12) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 89e314b37..0d5197c08 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -265,7 +265,7 @@ int intel_guc_ct_init(struct intel_guc_ct *ct) u32 *cmds; int err; - err = i915_inject_probe_error(guc_to_gt(guc)->i915, -ENXIO); + err = i915_inject_probe_error(guc_to_i915(guc), -ENXIO); if (err) return err; @@ -1076,6 +1076,15 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r found = true; break; } + +#ifdef CONFIG_DRM_I915_SELFTEST + if (!found && ct_to_guc(ct)->fast_response_selftest) { + CT_DEBUG(ct, "Assuming unsolicited response due to FAST_REQUEST selftest\n"); + ct_to_guc(ct)->fast_response_selftest++; + found = true; + } +#endif + if (!found) { CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n", len, hxg[0], fence, ct->requests.last_fence); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c index 55bc8b55f..bf16351c9 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c @@ -520,7 +520,7 @@ void intel_guc_log_init_early(struct intel_guc_log *log) static int guc_log_relay_create(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); struct rchan *guc_log_relay_chan; size_t n_subbufs, subbuf_size; int ret; @@ -573,7 +573,7 @@ static void guc_log_relay_destroy(struct intel_guc_log *log) static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); intel_wakeref_t wakeref; _guc_log_copy_debuglogs_for_relay(log); @@ -589,7 +589,7 @@ static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log) static u32 __get_default_log_level(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); /* A negative value means "use platform/config default" */ if (i915->params.guc_log_level < 0) { @@ -664,7 +664,7 @@ void intel_guc_log_destroy(struct intel_guc_log *log) int intel_guc_log_set_level(struct intel_guc_log *log, u32 level) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); intel_wakeref_t wakeref; int ret = 0; @@ -796,7 +796,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log) static void guc_log_relay_stop(struct intel_guc_log *log) { struct intel_guc *guc = log_to_guc(log); - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); if (!log->relay.started) return; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c index 1adec6de2..9df792730 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c @@ -14,7 +14,7 @@ static bool __guc_rc_supported(struct intel_guc *guc) { /* GuC RC is unavailable for pre-Gen12 */ return guc->submission_supported && - GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; + GRAPHICS_VER(guc_to_i915(guc)) >= 12; } static bool __guc_rc_selected(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 2dfb07cc4..3e681ab6f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -34,7 +34,7 @@ static bool __detect_slpc_supported(struct intel_guc *guc) { /* GuC SLPC is unavailable for pre-Gen12 */ return guc->submission_supported && - GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12; + GRAPHICS_VER(guc_to_i915(guc)) >= 12; } static bool __guc_slpc_selected(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 17df71117..a259f1118 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1107,7 +1107,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) if (deregister) guc_signal_context_fence(ce); if (destroyed) { - intel_gt_pm_put_async(guc_to_gt(guc)); + intel_gt_pm_put_async_untracked(guc_to_gt(guc)); release_guc_id(guc, ce); __guc_context_destroy(ce); } @@ -1303,6 +1303,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) unsigned long flags; u32 reset_count; bool in_reset; + intel_wakeref_t wakeref; spin_lock_irqsave(&guc->timestamp.lock, flags); @@ -1325,7 +1326,8 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) * start_gt_clk is derived from GuC state. To get a consistent * view of activity, we query the GuC state only if gt is awake. */ - if (!in_reset && intel_gt_pm_get_if_awake(gt)) { + wakeref = in_reset ? 0 : intel_gt_pm_get_if_awake(gt); + if (wakeref) { stats_saved = *stats; gt_stamp_saved = guc->timestamp.gt_stamp; /* @@ -1334,7 +1336,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) */ guc_update_engine_gt_clks(engine); guc_update_pm_timestamp(guc, now); - intel_gt_pm_put_async(gt); + intel_gt_pm_put_async(gt, wakeref); if (i915_reset_count(gpu_error) != reset_count) { *stats = stats_saved; guc->timestamp.gt_stamp = gt_stamp_saved; @@ -3385,9 +3387,9 @@ static void destroyed_worker_func(struct work_struct *w) struct intel_guc *guc = container_of(w, struct intel_guc, submission_state.destroyed_worker); struct intel_gt *gt = guc_to_gt(guc); - int tmp; + intel_wakeref_t wakeref; - with_intel_gt_pm(gt, tmp) + with_intel_gt_pm(gt, wakeref) deregister_destroyed_contexts(guc); } @@ -4624,12 +4626,12 @@ static bool __guc_submission_supported(struct intel_guc *guc) { /* GuC submission is unavailable for pre-Gen11 */ return intel_guc_is_supported(guc) && - GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11; + GRAPHICS_VER(guc_to_i915(guc)) >= 11; } static bool __guc_submission_selected(struct intel_guc *guc) { - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct drm_i915_private *i915 = guc_to_i915(guc); if (!intel_guc_submission_is_supported(guc)) return false; @@ -4894,7 +4896,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, intel_context_put(ce); } else if (context_destroyed(ce)) { /* Context has been destroyed */ - intel_gt_pm_put_async(guc_to_gt(guc)); + intel_gt_pm_put_async_untracked(guc_to_gt(guc)); release_guc_id(guc, ce); __guc_context_destroy(ce); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 27f6561dd..3872d309e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -106,11 +106,6 @@ static void __confirm_options(struct intel_uc *uc) gt_info(gt, "Incompatible option enable_guc=%d - %s\n", i915->params.enable_guc, "GuC is not supported!"); - if (i915->params.enable_guc & ENABLE_GUC_LOAD_HUC && - !intel_uc_supports_huc(uc)) - gt_info(gt, "Incompatible option enable_guc=%d - %s\n", - i915->params.enable_guc, "HuC is not supported!"); - if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION && !intel_uc_supports_guc_submission(uc)) gt_info(gt, "Incompatible option enable_guc=%d - %s\n", diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 362639162..756093eaf 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -1343,16 +1343,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) { u32 len = min_t(u32, size, PAGE_SIZE - offset); - void *vaddr; if (idx > 0) { idx--; continue; } - vaddr = kmap_atomic(page); - memcpy(dst, vaddr + offset, len); - kunmap_atomic(vaddr); + memcpy_from_page(dst, page, offset, len); offset = 0; dst += len; diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c index bfb721435..c900aac85 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -286,11 +286,126 @@ err_wakeref: return ret; } +/* + * Send a context schedule H2G message with an invalid context id. + * This should generate a GUC_RESULT_INVALID_CONTEXT response. + */ +static int bad_h2g(struct intel_guc *guc) +{ + u32 action[] = { + INTEL_GUC_ACTION_SCHED_CONTEXT, + 0x12345678, + }; + + return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0); +} + +/* + * Set a spinner running to make sure the system is alive and active, + * then send a bad but asynchronous H2G command and wait to see if an + * error response is returned. If no response is received or if the + * spinner dies then the test will fail. + */ +#define FAST_RESPONSE_TIMEOUT_MS 1000 +static int intel_guc_fast_request(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_context *ce; + struct igt_spinner spin; + struct i915_request *rq; + intel_wakeref_t wakeref; + struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt); + bool spinning = false; + int ret = 0; + + if (!engine) + return 0; + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + gt_err(gt, "Failed to create spinner request: %pe\n", ce); + goto err_pm; + } + + ret = igt_spinner_init(&spin, engine->gt); + if (ret) { + gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret)); + goto err_pm; + } + spinning = true; + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + gt_err(gt, "Failed to create spinner request: %pe\n", rq); + goto err_spin; + } + + ret = request_add_spin(rq, &spin); + if (ret) { + gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + + gt->uc.guc.fast_response_selftest = 1; + + ret = bad_h2g(>->uc.guc); + if (ret) { + gt_err(gt, "Failed to send H2G: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + + ret = wait_for(gt->uc.guc.fast_response_selftest != 1 || i915_request_completed(rq), + FAST_RESPONSE_TIMEOUT_MS); + if (ret) { + gt_err(gt, "Request wait failed: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + + if (i915_request_completed(rq)) { + gt_err(gt, "Spinner died waiting for fast request error!\n"); + ret = -EIO; + goto err_rq; + } + + if (gt->uc.guc.fast_response_selftest != 2) { + gt_err(gt, "Unexpected fast response count: %d\n", + gt->uc.guc.fast_response_selftest); + goto err_rq; + } + + igt_spinner_end(&spin); + spinning = false; + + ret = intel_selftest_wait_for_rq(rq); + if (ret) { + gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret)); + goto err_rq; + } + +err_rq: + i915_request_put(rq); + +err_spin: + if (spinning) + igt_spinner_end(&spin); + igt_spinner_fini(&spin); + +err_pm: + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + return ret; +} + int intel_guc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(intel_guc_scrub_ctbs), SUBTEST(intel_guc_steal_guc_ids), + SUBTEST(intel_guc_fast_request), }; struct intel_gt *gt = to_gt(i915); diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c index 34b5d952e..26fdc392f 100644 --- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c @@ -74,7 +74,7 @@ static int intel_hang_guc(void *arg) goto err; } - rq = igt_spinner_create_request(&spin, ce, MI_NOOP); + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); intel_context_put(ce); if (IS_ERR(rq)) { ret = PTR_ERR(rq); -- cgit v1.2.3