summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 18:50:36 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 18:50:36 +0000
commit50ba0232fd5312410f1b65247e774244f89a628e (patch)
treefd8f2fc78e9e548af0ff9590276602ee6125be00 /drivers/gpu/drm/i915/gt
parentReleasing progress-linux version 6.7.12-1~progress7.99u1. (diff)
downloadlinux-50ba0232fd5312410f1b65247e774244f89a628e.tar.xz
linux-50ba0232fd5312410f1b65247e774244f89a628e.zip
Merging upstream version 6.8.9.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c4
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_ppgtt.c46
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs.c13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.c14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context.h4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_context_types.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_cs.c19
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.h1
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_regs.h8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_types.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_execlists_submission.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_ggtt.c23
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gsc.h7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.c8
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt.h24
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c39
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h13
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_mcr.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c14
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.h38
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h12
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.c26
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gtt.h5
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c100
-rw-r--r--drivers/gpu/drm/i915/gt/intel_sseu.c7
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c96
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_cs.c20
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c2
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_gt_pm.c5
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_lrc.c65
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_reset.c10
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_rps.c17
-rw-r--r--drivers/gpu/drm/i915/gt/selftest_slpc.c5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h79
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c11
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_log.c10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c18
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c5
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc.c115
-rw-r--r--drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c2
51 files changed, 744 insertions, 183 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 86a04afff6..e1bf13e3d3 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -226,7 +226,7 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
static int mtl_dummy_pipe_control(struct i915_request *rq)
{
/* Wa_14016712196 */
- if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 71)) ||
+ if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) ||
IS_DG2(rq->i915)) {
u32 *cs;
@@ -822,7 +822,7 @@ u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
flags |= PIPE_CONTROL_FLUSH_L3;
/* Wa_14016712196 */
- if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
/* dummy PIPE_CONTROL + depth flush */
cs = gen12_emit_pipe_control(cs, 0,
PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0);
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index 9895e18df0..81bf221637 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -5,6 +5,7 @@
#include <linux/log2.h>
+#include "gem/i915_gem_internal.h"
#include "gem/i915_gem_lmem.h"
#include "gen8_ppgtt.h"
@@ -222,6 +223,9 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
{
struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+ if (vm->rsvd.obj)
+ i915_gem_object_put(vm->rsvd.obj);
+
if (intel_vgpu_active(vm->i915))
gen8_ppgtt_notify_vgt(ppgtt, false);
@@ -950,6 +954,44 @@ err_pd:
return ERR_PTR(err);
}
+static int gen8_init_rsvd(struct i915_address_space *vm)
+{
+ struct drm_i915_private *i915 = vm->i915;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int ret;
+
+ if (!intel_gt_needs_wa_16018031267(vm->gt))
+ return 0;
+
+ /* The memory will be used only by GPU. */
+ obj = i915_gem_object_create_lmem(i915, PAGE_SIZE,
+ I915_BO_ALLOC_VOLATILE |
+ I915_BO_ALLOC_GPU_ONLY);
+ if (IS_ERR(obj))
+ obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(obj))
+ return PTR_ERR(obj);
+
+ vma = i915_vma_instance(obj, vm, NULL);
+ if (IS_ERR(vma)) {
+ ret = PTR_ERR(vma);
+ goto unref;
+ }
+
+ ret = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+ if (ret)
+ goto unref;
+
+ vm->rsvd.vma = i915_vma_make_unshrinkable(vma);
+ vm->rsvd.obj = obj;
+ vm->total -= vma->node.size;
+ return 0;
+unref:
+ i915_gem_object_put(obj);
+ return ret;
+}
+
/*
* GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
* with a net effect resembling a 2-level page table in normal x86 terms. Each
@@ -1031,6 +1073,10 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
if (intel_vgpu_active(gt->i915))
gen8_ppgtt_notify_vgt(ppgtt, true);
+ err = gen8_init_rsvd(&ppgtt->vm);
+ if (err)
+ goto err_put;
+
return ppgtt;
err_put:
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index ecc990ec1b..d650beb8ed 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -28,11 +28,14 @@ static void irq_disable(struct intel_breadcrumbs *b)
static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
+ intel_wakeref_t wakeref;
+
/*
* Since we are waiting on a request, the GPU should be busy
* and should have its own rpm reference.
*/
- if (GEM_WARN_ON(!intel_gt_pm_get_if_awake(b->irq_engine->gt)))
+ wakeref = intel_gt_pm_get_if_awake(b->irq_engine->gt);
+ if (GEM_WARN_ON(!wakeref))
return;
/*
@@ -41,7 +44,7 @@ static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
* which we can add a new waiter and avoid the cost of re-enabling
* the irq.
*/
- WRITE_ONCE(b->irq_armed, true);
+ WRITE_ONCE(b->irq_armed, wakeref);
/* Requests may have completed before we could enable the interrupt. */
if (!b->irq_enabled++ && b->irq_enable(b))
@@ -61,12 +64,14 @@ static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
{
+ intel_wakeref_t wakeref = b->irq_armed;
+
GEM_BUG_ON(!b->irq_enabled);
if (!--b->irq_enabled)
b->irq_disable(b);
- WRITE_ONCE(b->irq_armed, false);
- intel_gt_pm_put_async(b->irq_engine->gt);
+ WRITE_ONCE(b->irq_armed, 0);
+ intel_gt_pm_put_async(b->irq_engine->gt, wakeref);
}
static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
index 72dfd3748c..bdf09fd67b 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
@@ -13,6 +13,7 @@
#include <linux/types.h>
#include "intel_engine_types.h"
+#include "intel_wakeref.h"
/*
* Rather than have every client wait upon all user interrupts,
@@ -43,7 +44,7 @@ struct intel_breadcrumbs {
spinlock_t irq_lock; /* protects the interrupt from hardirq context */
struct irq_work irq_work; /* for use from inside irq_lock */
unsigned int irq_enabled;
- bool irq_armed;
+ intel_wakeref_t irq_armed;
/* Not all breadcrumbs are attached to physical HW */
intel_engine_mask_t engine_mask;
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c
index a53b26178f..a2f1245741 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -6,6 +6,7 @@
#include "gem/i915_gem_context.h"
#include "gem/i915_gem_pm.h"
+#include "i915_drm_client.h"
#include "i915_drv.h"
#include "i915_trace.h"
@@ -50,6 +51,7 @@ intel_context_create(struct intel_engine_cs *engine)
int intel_context_alloc_state(struct intel_context *ce)
{
+ struct i915_gem_context *ctx;
int err = 0;
if (mutex_lock_interruptible(&ce->pin_mutex))
@@ -66,6 +68,18 @@ int intel_context_alloc_state(struct intel_context *ce)
goto unlock;
set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
+
+ rcu_read_lock();
+ ctx = rcu_dereference(ce->gem_context);
+ if (ctx && !kref_get_unless_zero(&ctx->ref))
+ ctx = NULL;
+ rcu_read_unlock();
+ if (ctx) {
+ if (ctx->client)
+ i915_drm_client_add_context_objects(ctx->client,
+ ce);
+ i915_gem_context_put(ctx);
+ }
}
unlock:
diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h
index a80e3b7c24..25564c0150 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.h
+++ b/drivers/gpu/drm/i915/gt/intel_context.h
@@ -212,7 +212,7 @@ static inline void intel_context_enter(struct intel_context *ce)
return;
ce->ops->enter(ce);
- intel_gt_pm_get(ce->vm->gt);
+ ce->wakeref = intel_gt_pm_get(ce->vm->gt);
}
static inline void intel_context_mark_active(struct intel_context *ce)
@@ -229,7 +229,7 @@ static inline void intel_context_exit(struct intel_context *ce)
if (--ce->active_count)
return;
- intel_gt_pm_put_async(ce->vm->gt);
+ intel_gt_pm_put_async(ce->vm->gt, ce->wakeref);
ce->ops->exit(ce);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index aceaac28a3..7eccbd70d8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -17,6 +17,7 @@
#include "i915_utils.h"
#include "intel_engine_types.h"
#include "intel_sseu.h"
+#include "intel_wakeref.h"
#include "uc/intel_guc_fwif.h"
@@ -112,6 +113,7 @@ struct intel_context {
u32 ring_size;
struct intel_ring *ring;
struct intel_timeline *timeline;
+ intel_wakeref_t wakeref;
unsigned long flags;
#define CONTEXT_BARRIER_BIT 0
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 4a11219e56..84be97f959 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -47,7 +47,7 @@
#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
#define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE)
-#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE)
+#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
#define MAX_MMIO_BASES 3
struct engine_info {
@@ -908,6 +908,23 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
info->engine_mask &= ~BIT(GSC0);
}
+ /*
+ * Do not create the command streamer for CCS slices beyond the first.
+ * All the workload submitted to the first engine will be shared among
+ * all the slices.
+ *
+ * Once the user will be allowed to customize the CCS mode, then this
+ * check needs to be removed.
+ */
+ if (IS_DG2(gt->i915)) {
+ u8 first_ccs = __ffs(CCS_MASK(gt));
+
+ /* Mask off all the CCS engine */
+ info->engine_mask &= ~GENMASK(CCS3, CCS0);
+ /* Put back in the first CCS engine */
+ info->engine_mask |= BIT(_CCS(first_ccs));
+ }
+
return info->engine_mask;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
index 9a527e1f5b..1a8e2b7db0 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c
@@ -188,7 +188,7 @@ static void heartbeat(struct work_struct *wrk)
* low latency and no jitter] the chance to naturally
* complete before being preempted.
*/
- attr.priority = 0;
+ attr.priority = I915_PRIORITY_NORMAL;
if (rq->sched.attr.priority >= attr.priority)
attr.priority = I915_PRIORITY_HEARTBEAT;
if (rq->sched.attr.priority >= attr.priority)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 5a3a5b29d1..fb7bff27b4 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -63,7 +63,7 @@ static int __engine_unpark(struct intel_wakeref *wf)
ENGINE_TRACE(engine, "\n");
- intel_gt_pm_get(engine->gt);
+ engine->wakeref_track = intel_gt_pm_get(engine->gt);
/* Discard stale context state from across idling */
ce = engine->kernel_context;
@@ -122,6 +122,7 @@ __queue_and_release_pm(struct i915_request *rq,
*/
GEM_BUG_ON(rq->context->active_count != 1);
__intel_gt_pm_get(engine->gt);
+ rq->context->wakeref = intel_wakeref_track(&engine->gt->wakeref);
/*
* We have to serialise all potential retirement paths with our
@@ -282,7 +283,7 @@ static int __engine_park(struct intel_wakeref *wf)
engine->park(engine);
/* While gt calls i915_vma_parked(), we have to break the lock cycle */
- intel_gt_pm_put_async(engine->gt);
+ intel_gt_pm_put_async(engine->gt, engine->wakeref_track);
return 0;
}
@@ -293,7 +294,7 @@ static const struct intel_wakeref_ops wf_ops = {
void intel_engine_init__pm(struct intel_engine_cs *engine)
{
- intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops);
+ intel_wakeref_init(&engine->wakeref, engine->i915, &wf_ops, engine->name);
intel_engine_init_heartbeat(engine);
intel_gsc_idle_msg_enable(engine);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index d68675925b..1d97c435a0 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -10,6 +10,7 @@
#include "i915_request.h"
#include "intel_engine_types.h"
#include "intel_wakeref.h"
+#include "intel_gt.h"
#include "intel_gt_pm.h"
static inline bool
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
index fdd4ddd3a9..a8eac59e37 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
@@ -118,9 +118,15 @@
#define CCID_EXTENDED_STATE_RESTORE BIT(2)
#define CCID_EXTENDED_STATE_SAVE BIT(3)
#define RING_BB_PER_CTX_PTR(base) _MMIO((base) + 0x1c0) /* gen8+ */
+#define PER_CTX_BB_FORCE BIT(2)
+#define PER_CTX_BB_VALID BIT(0)
+
#define RING_INDIRECT_CTX(base) _MMIO((base) + 0x1c4) /* gen8+ */
#define RING_INDIRECT_CTX_OFFSET(base) _MMIO((base) + 0x1c8) /* gen8+ */
#define ECOSKPD(base) _MMIO((base) + 0x1d0)
+#define XEHP_BLITTER_SCHEDULING_MODE_MASK REG_GENMASK(12, 11)
+#define XEHP_BLITTER_ROUND_ROBIN_MODE \
+ REG_FIELD_PREP(XEHP_BLITTER_SCHEDULING_MODE_MASK, 1)
#define ECO_CONSTANT_BUFFER_SR_DISABLE REG_BIT(4)
#define ECO_GATING_CX_ONLY REG_BIT(3)
#define GEN6_BLITTER_FBC_NOTIFY REG_BIT(3)
@@ -257,5 +263,7 @@
#define VDBOX_CGCTL3F18(base) _MMIO((base) + 0x3f18)
#define ALNUNIT_CLKGATE_DIS REG_BIT(13)
+#define VDBOX_CGCTL3F1C(base) _MMIO((base) + 0x3f1c)
+#define MFXPIPE_CLKGATE_DIS REG_BIT(3)
#endif /* __INTEL_ENGINE_REGS__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 8769760257..960e6be204 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -446,7 +446,9 @@ struct intel_engine_cs {
unsigned long serial;
unsigned long wakeref_serial;
+ intel_wakeref_t wakeref_track;
struct intel_wakeref wakeref;
+
struct file *default_state;
struct {
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index 42e09f1589..b061a0a0d6 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -630,7 +630,7 @@ static void __execlists_schedule_out(struct i915_request * const rq,
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
if (engine->fw_domain && !--engine->fw_active)
intel_uncore_forcewake_put(engine->uncore, engine->fw_domain);
- intel_gt_pm_put_async(engine->gt);
+ intel_gt_pm_put_async_untracked(engine->gt);
/*
* If this is part of a virtual engine, its next request may
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 15fc8e4703..21a7e3191c 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -245,16 +245,15 @@ static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
gen8_ggtt_invalidate(ggtt);
list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) {
- if (intel_guc_tlb_invalidation_is_available(&gt->uc.guc)) {
+ if (intel_guc_tlb_invalidation_is_available(&gt->uc.guc))
guc_ggtt_ct_invalidate(gt);
- } else if (GRAPHICS_VER(i915) >= 12) {
+ else if (GRAPHICS_VER(i915) >= 12)
intel_uncore_write_fw(gt->uncore,
GEN12_GUC_TLB_INV_CR,
GEN12_GUC_TLB_INV_CR_INVALIDATE);
- } else {
+ else
intel_uncore_write_fw(gt->uncore,
GEN8_GTCR, GEN8_GTCR_INVALIDATE);
- }
}
}
@@ -297,7 +296,7 @@ static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
return intel_gt_is_bind_context_ready(gt);
}
-static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
+static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt, intel_wakeref_t *wakeref)
{
struct intel_context *ce;
struct intel_gt *gt = ggtt->vm.gt;
@@ -314,7 +313,8 @@ static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
* would conflict with fs_reclaim trying to allocate memory while
* doing rpm_resume().
*/
- if (!intel_gt_pm_get_if_awake(gt))
+ *wakeref = intel_gt_pm_get_if_awake(gt);
+ if (!*wakeref)
return NULL;
intel_engine_pm_get(ce->engine);
@@ -322,10 +322,10 @@ static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
return ce;
}
-static void gen8_ggtt_bind_put_ce(struct intel_context *ce)
+static void gen8_ggtt_bind_put_ce(struct intel_context *ce, intel_wakeref_t wakeref)
{
intel_engine_pm_put(ce->engine);
- intel_gt_pm_put(ce->engine->gt);
+ intel_gt_pm_put(ce->engine->gt, wakeref);
}
static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
@@ -338,12 +338,13 @@ static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
struct sgt_iter iter;
struct i915_request *rq;
struct intel_context *ce;
+ intel_wakeref_t wakeref;
u32 *cs;
if (!num_entries)
return true;
- ce = gen8_ggtt_bind_get_ce(ggtt);
+ ce = gen8_ggtt_bind_get_ce(ggtt, &wakeref);
if (!ce)
return false;
@@ -419,13 +420,13 @@ queue_err_rq:
offset += n_ptes;
}
- gen8_ggtt_bind_put_ce(ce);
+ gen8_ggtt_bind_put_ce(ce, wakeref);
return true;
err_rq:
i915_request_put(rq);
put_ce:
- gen8_ggtt_bind_put_ce(ce);
+ gen8_ggtt_bind_put_ce(ce, wakeref);
return false;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.h b/drivers/gpu/drm/i915/gt/intel_gsc.h
index 7ab3ca0f9f..013c642514 100644
--- a/drivers/gpu/drm/i915/gt/intel_gsc.h
+++ b/drivers/gpu/drm/i915/gt/intel_gsc.h
@@ -21,8 +21,11 @@ struct mei_aux_device;
/**
* struct intel_gsc - graphics security controller
*
- * @gem_obj: scratch memory GSC operations
- * @intf : gsc interface
+ * @intf: gsc interface
+ * @intf.adev: MEI aux. device for this @intf
+ * @intf.gem_obj: scratch memory GSC operations
+ * @intf.irq: IRQ for this device (%-1 for no IRQ)
+ * @intf.id: this interface's id number/index
*/
struct intel_gsc {
struct intel_gsc_intf {
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index ba1186fc52..6a2c2718bc 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -451,7 +451,7 @@ void intel_gt_flush_ggtt_writes(struct intel_gt *gt)
spin_lock_irqsave(&uncore->lock, flags);
intel_uncore_posting_read_fw(uncore,
- RING_HEAD(RENDER_RING_BASE));
+ RING_TAIL(RENDER_RING_BASE));
spin_unlock_irqrestore(&uncore->lock, flags);
}
}
@@ -1024,6 +1024,12 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt,
return I915_MAP_WC;
}
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt)
+{
+ /* Wa_16018031267, Wa_16018063123 */
+ return IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 55), IP_VER(12, 71));
+}
+
bool intel_gt_needs_wa_22016122933(struct intel_gt *gt)
{
return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 970bedf6b7..003eb93b82 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -87,8 +87,13 @@ static inline bool gt_is_root(struct intel_gt *gt)
return !gt->info.id;
}
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt);
bool intel_gt_needs_wa_22016122933(struct intel_gt *gt);
+#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
+ intel_gt_needs_wa_16018031267(engine->gt) && \
+ engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
+
static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
{
return container_of(uc, struct intel_gt, uc);
@@ -114,6 +119,11 @@ static inline struct intel_gt *gsc_to_gt(struct intel_gsc *gsc)
return container_of(gsc, struct intel_gt, gsc);
}
+static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc)
+{
+ return guc_to_gt(guc)->i915;
+}
+
void intel_gt_common_init_early(struct intel_gt *gt);
int intel_root_gt_init_early(struct drm_i915_private *i915);
int intel_gt_assign_ggtt(struct intel_gt *gt);
@@ -167,6 +177,20 @@ void intel_gt_release_all(struct drm_i915_private *i915);
(id__)++) \
for_each_if(((gt__) = (i915__)->gt[(id__)]))
+/* Simple iterator over all initialised engines */
+#define for_each_engine(engine__, gt__, id__) \
+ for ((id__) = 0; \
+ (id__) < I915_NUM_ENGINES; \
+ (id__)++) \
+ for_each_if ((engine__) = (gt__)->engine[(id__)])
+
+/* Iterator over subset of engines selected by mask */
+#define for_each_engine_masked(engine__, gt__, mask__, tmp__) \
+ for ((tmp__) = (mask__) & (gt__)->info.engine_mask; \
+ (tmp__) ? \
+ ((engine__) = (gt__)->engine[__mask_next_bit(tmp__)]), 1 : \
+ 0;)
+
void intel_gt_info_print(const struct intel_gt_info *info,
struct drm_printer *p);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
new file mode 100644
index 0000000000..044219c596
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
+#include "intel_gt_regs.h"
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt)
+{
+ int cslice;
+ u32 mode = 0;
+ int first_ccs = __ffs(CCS_MASK(gt));
+
+ if (!IS_DG2(gt->i915))
+ return;
+
+ /* Build the value for the fixed CCS load balancing */
+ for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
+ if (CCS_MASK(gt) & BIT(cslice))
+ /*
+ * If available, assign the cslice
+ * to the first available engine...
+ */
+ mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
+
+ else
+ /*
+ * ... otherwise, mark the cslice as
+ * unavailable if no CCS dispatches here
+ */
+ mode |= XEHP_CCS_MODE_CSLICE(cslice,
+ XEHP_CCS_MODE_CSLICE_MASK);
+ }
+
+ intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
new file mode 100644
index 0000000000..9e5549caeb
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_CCS_MODE_H__
+#define __INTEL_GT_CCS_MODE_H__
+
+struct intel_gt;
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt);
+
+#endif /* __INTEL_GT_CCS_MODE_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c
index 8f9b874fdc..3aa1d014c1 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_engines_debugfs.c
@@ -6,8 +6,8 @@
#include <drm/drm_print.h>
-#include "i915_drv.h" /* for_each_engine! */
#include "intel_engine.h"
+#include "intel_gt.h"
#include "intel_gt_debugfs.h"
#include "intel_gt_engines_debugfs.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
index 34913912d8..e253750a51 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c
@@ -388,8 +388,7 @@ void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags)
* registers. This wakeref will be released in the unlock
* routine.
*
- * This is expected to become a formally documented/numbered
- * workaround soon.
+ * Wa_22018931422
*/
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_GT);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index f5899d503e..220ac4f92e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -28,19 +28,20 @@
static void user_forcewake(struct intel_gt *gt, bool suspend)
{
int count = atomic_read(&gt->user_wakeref);
+ intel_wakeref_t wakeref;
/* Inside suspend/resume so single threaded, no races to worry about. */
if (likely(!count))
return;
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
if (suspend) {
GEM_BUG_ON(count > atomic_read(&gt->wakeref.count));
atomic_sub(count, &gt->wakeref.count);
} else {
atomic_add(count, &gt->wakeref.count);
}
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
}
static void runtime_begin(struct intel_gt *gt)
@@ -138,7 +139,7 @@ void intel_gt_pm_init_early(struct intel_gt *gt)
* runtime_pm is per-device rather than per-tile, so this is still the
* correct structure.
*/
- intel_wakeref_init(&gt->wakeref, gt->i915, &wf_ops);
+ intel_wakeref_init(&gt->wakeref, gt->i915, &wf_ops, "GT");
seqcount_mutex_init(&gt->stats.lock, &gt->wakeref.mutex);
}
@@ -167,7 +168,7 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
enum intel_engine_id id;
intel_wakeref_t wakeref;
- GT_TRACE(gt, "force:%s", str_yes_no(force));
+ GT_TRACE(gt, "force:%s\n", str_yes_no(force));
/* Use a raw wakeref to avoid calling intel_display_power_get early */
wakeref = intel_runtime_pm_get(gt->uncore->rpm);
@@ -236,6 +237,7 @@ int intel_gt_resume(struct intel_gt *gt)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
int err;
err = intel_gt_has_unrecoverable_error(gt);
@@ -252,7 +254,7 @@ int intel_gt_resume(struct intel_gt *gt)
*/
gt_sanitize(gt, true);
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
intel_rc6_sanitize(&gt->rc6);
@@ -295,7 +297,7 @@ int intel_gt_resume(struct intel_gt *gt)
out_fw:
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
intel_gt_bind_context_set_ready(gt);
return err;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index b1eeb5b339..911fd01602 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -16,19 +16,28 @@ static inline bool intel_gt_pm_is_awake(const struct intel_gt *gt)
return intel_wakeref_is_active(&gt->wakeref);
}
-static inline void intel_gt_pm_get(struct intel_gt *gt)
+static inline void intel_gt_pm_get_untracked(struct intel_gt *gt)
{
intel_wakeref_get(&gt->wakeref);
}
+static inline intel_wakeref_t intel_gt_pm_get(struct intel_gt *gt)
+{
+ intel_gt_pm_get_untracked(gt);
+ return intel_wakeref_track(&gt->wakeref);
+}
+
static inline void __intel_gt_pm_get(struct intel_gt *gt)
{
__intel_wakeref_get(&gt->wakeref);
}
-static inline bool intel_gt_pm_get_if_awake(struct intel_gt *gt)
+static inline intel_wakeref_t intel_gt_pm_get_if_awake(struct intel_gt *gt)
{
- return intel_wakeref_get_if_active(&gt->wakeref);
+ if (!intel_wakeref_get_if_active(&gt->wakeref))
+ return 0;
+
+ return intel_wakeref_track(&gt->wakeref);
}
static inline void intel_gt_pm_might_get(struct intel_gt *gt)
@@ -36,12 +45,18 @@ static inline void intel_gt_pm_might_get(struct intel_gt *gt)
intel_wakeref_might_get(&gt->wakeref);
}
-static inline void intel_gt_pm_put(struct intel_gt *gt)
+static inline void intel_gt_pm_put_untracked(struct intel_gt *gt)
{
intel_wakeref_put(&gt->wakeref);
}
-static inline void intel_gt_pm_put_async(struct intel_gt *gt)
+static inline void intel_gt_pm_put(struct intel_gt *gt, intel_wakeref_t handle)
+{
+ intel_wakeref_untrack(&gt->wakeref, handle);
+ intel_gt_pm_put_untracked(gt);
+}
+
+static inline void intel_gt_pm_put_async_untracked(struct intel_gt *gt)
{
intel_wakeref_put_async(&gt->wakeref);
}
@@ -51,9 +66,14 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
intel_wakeref_might_put(&gt->wakeref);
}
-#define with_intel_gt_pm(gt, tmp) \
- for (tmp = 1, intel_gt_pm_get(gt); tmp; \
- intel_gt_pm_put(gt), tmp = 0)
+static inline void intel_gt_pm_put_async(struct intel_gt *gt, intel_wakeref_t handle)
+{
+ intel_wakeref_untrack(&gt->wakeref, handle);
+ intel_gt_pm_put_async_untracked(gt);
+}
+
+#define with_intel_gt_pm(gt, wf) \
+ for (wf = intel_gt_pm_get(gt); wf; intel_gt_pm_put(gt, wf), wf = 0)
/**
* with_intel_gt_pm_if_awake - if GT is PM awake, get a reference to prevent
@@ -64,7 +84,7 @@ static inline void intel_gt_pm_might_put(struct intel_gt *gt)
* @wf: pointer to a temporary wakeref.
*/
#define with_intel_gt_pm_if_awake(gt, wf) \
- for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt), wf = 0)
+ for (wf = intel_gt_pm_get_if_awake(gt); wf; intel_gt_pm_put_async(gt, wf), wf = 0)
static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
{
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
index f900cc68d6..7114c116e9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
@@ -27,7 +27,7 @@
void intel_gt_pm_debugfs_forcewake_user_open(struct intel_gt *gt)
{
atomic_inc(&gt->user_wakeref);
- intel_gt_pm_get(gt);
+ intel_gt_pm_get_untracked(gt);
if (GRAPHICS_VER(gt->i915) >= 6)
intel_uncore_forcewake_user_get(gt->uncore);
}
@@ -36,7 +36,7 @@ void intel_gt_pm_debugfs_forcewake_user_release(struct intel_gt *gt)
{
if (GRAPHICS_VER(gt->i915) >= 6)
intel_uncore_forcewake_user_put(gt->uncore);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put_untracked(gt);
atomic_dec(&gt->user_wakeref);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index eecd0a87a6..743fe35667 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -469,6 +469,9 @@
#define XEHP_PSS_MODE2 MCR_REG(0x703c)
#define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5)
+#define XEHP_PSS_CHICKEN MCR_REG(0x7044)
+#define FD_END_COLLECT REG_BIT(5)
+
#define GEN7_SC_INSTDONE _MMIO(0x7100)
#define GEN12_SC_INSTDONE_EXTRA _MMIO(0x7104)
#define GEN12_SC_INSTDONE_EXTRA2 _MMIO(0x7108)
@@ -537,6 +540,9 @@
#define XEHP_SQCM MCR_REG(0x8724)
#define EN_32B_ACCESS REG_BIT(30)
+#define MTL_GSCPSMI_BASEADDR_LSB _MMIO(0x880c)
+#define MTL_GSCPSMI_BASEADDR_MSB _MMIO(0x8810)
+
#define HSW_IDICR _MMIO(0x9008)
#define IDIHASHMSK(x) (((x) & 0x3f) << 16)
@@ -1471,8 +1477,14 @@
#define ECOBITS_PPGTT_CACHE4B (0 << 8)
#define GEN12_RCU_MODE _MMIO(0x14800)
+#define XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1)
#define GEN12_RCU_MODE_CCS_ENABLE REG_BIT(0)
+#define XEHP_CCS_MODE _MMIO(0x14804)
+#define XEHP_CCS_MODE_CSLICE_MASK REG_GENMASK(2, 0) /* CCS0-3 + rsvd */
+#define XEHP_CCS_MODE_CSLICE_WIDTH ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1)
+#define XEHP_CCS_MODE_CSLICE(cslice, ccs) (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH))
+
#define CHV_FUSE_GT _MMIO(VLV_GUNIT_BASE + 0x2168)
#define CHV_FGT_DISABLE_SS0 (1 << 10)
#define CHV_FGT_DISABLE_SS1 (1 << 11)
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 4fbed27ef0..86f73fe558 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -63,6 +63,9 @@ struct drm_i915_gem_object *alloc_pt_lmem(struct i915_address_space *vm, int sz)
if (!IS_ERR(obj)) {
obj->base.resv = i915_vm_resv_get(vm);
obj->shares_resv_from = vm;
+
+ if (vm->fpriv)
+ i915_drm_client_add_object(vm->fpriv->client, obj);
}
return obj;
@@ -84,6 +87,9 @@ struct drm_i915_gem_object *alloc_pt_dma(struct i915_address_space *vm, int sz)
if (!IS_ERR(obj)) {
obj->base.resv = i915_vm_resv_get(vm);
obj->shares_resv_from = vm;
+
+ if (vm->fpriv)
+ i915_drm_client_add_object(vm->fpriv->client, obj);
}
return obj;
@@ -95,6 +101,16 @@ int map_pt_dma(struct i915_address_space *vm, struct drm_i915_gem_object *obj)
void *vaddr;
type = intel_gt_coherent_map_type(vm->gt, obj, true);
+ /*
+ * FIXME: It is suspected that some Address Translation Service (ATS)
+ * issue on IOMMU is causing CAT errors to occur on some MTL workloads.
+ * Applying a write barrier to the ppgtt set entry functions appeared
+ * to have no effect, so we must temporarily use I915_MAP_WC here on
+ * MTL until a proper ATS solution is found.
+ */
+ if (IS_METEORLAKE(vm->i915))
+ type = I915_MAP_WC;
+
vaddr = i915_gem_object_pin_map_unlocked(obj, type);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
@@ -109,6 +125,16 @@ int map_pt_dma_locked(struct i915_address_space *vm, struct drm_i915_gem_object
void *vaddr;
type = intel_gt_coherent_map_type(vm->gt, obj, true);
+ /*
+ * FIXME: It is suspected that some Address Translation Service (ATS)
+ * issue on IOMMU is causing CAT errors to occur on some MTL workloads.
+ * Applying a write barrier to the ppgtt set entry functions appeared
+ * to have no effect, so we must temporarily use I915_MAP_WC here on
+ * MTL until a proper ATS solution is found.
+ */
+ if (IS_METEORLAKE(vm->i915))
+ type = I915_MAP_WC;
+
vaddr = i915_gem_object_pin_map(obj, type);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
index b471edac26..6b85222ee3 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
@@ -249,8 +249,13 @@ struct i915_address_space {
struct work_struct release_work;
struct drm_mm mm;
+ struct {
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ } rsvd;
struct intel_gt *gt;
struct drm_i915_private *i915;
+ struct drm_i915_file_private *fpriv;
struct device *dma;
u64 total; /* size addr space maps (ex. 2GB for ggtt) */
u64 reserved; /* size addr space reserved */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index eaf66d9031..7c367ba8d9 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -829,6 +829,18 @@ lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine)
}
static void
+lrc_setup_bb_per_ctx(u32 *regs,
+ const struct intel_engine_cs *engine,
+ u32 ctx_bb_ggtt_addr)
+{
+ GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1);
+ regs[lrc_ring_wa_bb_per_ctx(engine) + 1] =
+ ctx_bb_ggtt_addr |
+ PER_CTX_BB_FORCE |
+ PER_CTX_BB_VALID;
+}
+
+static void
lrc_setup_indirect_ctx(u32 *regs,
const struct intel_engine_cs *engine,
u32 ctx_bb_ggtt_addr,
@@ -1020,7 +1032,13 @@ static u32 context_wa_bb_offset(const struct intel_context *ce)
return PAGE_SIZE * ce->wa_bb_page;
}
-static u32 *context_indirect_bb(const struct intel_context *ce)
+/*
+ * per_ctx below determines which WABB section is used.
+ * When true, the function returns the location of the
+ * PER_CTX_BB. When false, the function returns the
+ * location of the INDIRECT_CTX.
+ */
+static u32 *context_wabb(const struct intel_context *ce, bool per_ctx)
{
void *ptr;
@@ -1029,6 +1047,7 @@ static u32 *context_indirect_bb(const struct intel_context *ce)
ptr = ce->lrc_reg_state;
ptr -= LRC_STATE_OFFSET; /* back to start of context image */
ptr += context_wa_bb_offset(ce);
+ ptr += per_ctx ? PAGE_SIZE : 0;
return ptr;
}
@@ -1105,7 +1124,8 @@ __lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine)
if (GRAPHICS_VER(engine->i915) >= 12) {
ce->wa_bb_page = context_size / PAGE_SIZE;
- context_size += PAGE_SIZE;
+ /* INDIRECT_CTX and PER_CTX_BB need separate pages. */
+ context_size += PAGE_SIZE * 2;
}
if (intel_context_is_parent(ce) && intel_engine_uses_guc(engine)) {
@@ -1407,12 +1427,85 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
return gen12_emit_aux_table_inv(ce->engine, cs);
}
+static u32 *xehp_emit_fastcolor_blt_wabb(const struct intel_context *ce, u32 *cs)
+{
+ struct intel_gt *gt = ce->engine->gt;
+ int mocs = gt->mocs.uc_index << 1;
+
+ /**
+ * Wa_16018031267 / Wa_16018063123 requires that SW forces the
+ * main copy engine arbitration into round robin mode. We
+ * additionally need to submit the following WABB blt command
+ * to produce 4 subblits with each subblit generating 0 byte
+ * write requests as WABB:
+ *
+ * XY_FASTCOLOR_BLT
+ * BG0 -> 5100000E
+ * BG1 -> 0000003F (Dest pitch)
+ * BG2 -> 00000000 (X1, Y1) = (0, 0)
+ * BG3 -> 00040001 (X2, Y2) = (1, 4)
+ * BG4 -> scratch
+ * BG5 -> scratch
+ * BG6-12 -> 00000000
+ * BG13 -> 20004004 (Surf. Width= 2,Surf. Height = 5 )
+ * BG14 -> 00000010 (Qpitch = 4)
+ * BG15 -> 00000000
+ */
+ *cs++ = XY_FAST_COLOR_BLT_CMD | (16 - 2);
+ *cs++ = FIELD_PREP(XY_FAST_COLOR_BLT_MOCS_MASK, mocs) | 0x3f;
+ *cs++ = 0;
+ *cs++ = 4 << 16 | 1;
+ *cs++ = lower_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
+ *cs++ = upper_32_bits(i915_vma_offset(ce->vm->rsvd.vma));
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0x20004004;
+ *cs++ = 0x10;
+ *cs++ = 0;
+
+ return cs;
+}
+
+static u32 *
+xehp_emit_per_ctx_bb(const struct intel_context *ce, u32 *cs)
+{
+ /* Wa_16018031267, Wa_16018063123 */
+ if (NEEDS_FASTCOLOR_BLT_WABB(ce->engine))
+ cs = xehp_emit_fastcolor_blt_wabb(ce, cs);
+
+ return cs;
+}
+
+static void
+setup_per_ctx_bb(const struct intel_context *ce,
+ const struct intel_engine_cs *engine,
+ u32 *(*emit)(const struct intel_context *, u32 *))
+{
+ /* Place PER_CTX_BB on next page after INDIRECT_CTX */
+ u32 * const start = context_wabb(ce, true);
+ u32 *cs;
+
+ cs = emit(ce, start);
+
+ /* PER_CTX_BB must manually terminate */
+ *cs++ = MI_BATCH_BUFFER_END;
+
+ GEM_BUG_ON(cs - start > I915_GTT_PAGE_SIZE / sizeof(*cs));
+ lrc_setup_bb_per_ctx(ce->lrc_reg_state, engine,
+ lrc_indirect_bb(ce) + PAGE_SIZE);
+}
+
static void
setup_indirect_ctx_bb(const struct intel_context *ce,
const struct intel_engine_cs *engine,
u32 *(*emit)(const struct intel_context *, u32 *))
{
- u32 * const start = context_indirect_bb(ce);
+ u32 * const start = context_wabb(ce, false);
u32 *cs;
cs = emit(ce, start);
@@ -1511,6 +1604,7 @@ u32 lrc_update_regs(const struct intel_context *ce,
/* Mutually exclusive wrt to global indirect bb */
GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size);
setup_indirect_ctx_bb(ce, engine, fn);
+ setup_per_ctx_bb(ce, engine, xehp_emit_per_ctx_bb);
}
return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE;
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index f602895f6d..6a3246240e 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -849,13 +849,12 @@ void intel_sseu_print_topology(struct drm_i915_private *i915,
const struct sseu_dev_info *sseu,
struct drm_printer *p)
{
- if (sseu->max_slices == 0) {
+ if (sseu->max_slices == 0)
drm_printf(p, "Unavailable\n");
- } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
+ else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50))
sseu_print_xehp_topology(sseu, p);
- } else {
+ else
sseu_print_hsw_topology(sseu, p);
- }
}
void intel_sseu_print_ss_info(const char *type,
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 192ac0e59a..59816dd6fb 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -10,6 +10,7 @@
#include "intel_engine_regs.h"
#include "intel_gpu_commands.h"
#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
#include "intel_gt_mcr.h"
#include "intel_gt_print.h"
#include "intel_gt_regs.h"
@@ -51,7 +52,8 @@
* registers belonging to BCS, VCS or VECS should be implemented in
* xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
* engine's MMIO range but that are part of of the common RCS/CCS reset domain
- * should be implemented in general_render_compute_wa_init().
+ * should be implemented in general_render_compute_wa_init(). The settings
+ * about the CCS load balancing should be added in ccs_engine_wa_mode().
*
* - GT workarounds: the list of these WAs is applied whenever these registers
* revert to their default values: on GPU reset, suspend/resume [1]_, etc.
@@ -777,6 +779,9 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
/* Wa_18019271663:dg2 */
wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
+
+ /* Wa_14019877138:dg2 */
+ wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT);
}
static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
@@ -786,8 +791,13 @@ static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
dg2_ctx_gt_tuning_init(engine, wal);
- if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
- IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER))
+ /*
+ * Due to Wa_16014892111, the DRAW_WATERMARK tuning must be done in
+ * gen12_emit_indirect_ctx_rcs() rather than here on some early
+ * steppings.
+ */
+ if (!(IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)))
wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false);
}
@@ -905,7 +915,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
if (engine->class != RENDER_CLASS)
goto done;
- if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71)))
+ if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
xelpg_ctx_workarounds_init(engine, wal);
else if (IS_PONTEVECCHIO(i915))
; /* noop; none at this time */
@@ -1640,7 +1650,8 @@ pvc_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
static void
xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
- /* Wa_14018778641 / Wa_18018781329 */
+ /* Wa_14018575942 / Wa_18018781329 */
+ wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
/* Wa_22016670082 */
@@ -1663,8 +1674,22 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
}
static void
+wa_16021867713(struct intel_gt *gt, struct i915_wa_list *wal)
+{
+ struct intel_engine_cs *engine;
+ int id;
+
+ for_each_engine(engine, gt, id)
+ if (engine->class == VIDEO_DECODE_CLASS)
+ wa_write_or(wal, VDBOX_CGCTL3F1C(engine->mmio_base),
+ MFXPIPE_CLKGATE_DIS);
+}
+
+static void
xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
{
+ wa_16021867713(gt, wal);
+
/*
* Wa_14018778641
* Wa_18018781329
@@ -1674,6 +1699,9 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
*/
wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB);
+ /* Wa_22016670082 */
+ wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
+
debug_dump_steering(gt);
}
@@ -1690,7 +1718,7 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
*/
static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
{
- if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) {
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
}
@@ -1723,7 +1751,7 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
return;
}
- if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
xelpg_gt_workarounds_init(gt, wal);
else if (IS_PONTEVECCHIO(i915))
pvc_gt_workarounds_init(gt, wal);
@@ -2196,7 +2224,7 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
if (engine->gt->type == GT_MEDIA)
; /* none yet */
- else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71)))
+ else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
xelpg_whitelist_build(engine);
else if (IS_PONTEVECCHIO(i915))
pvc_whitelist_build(engine);
@@ -2340,14 +2368,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
0, true);
}
- if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) {
- /* Wa_22014600077:dg2 */
- wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
- _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
- 0 /* Wa_14012342262 write-only reg, so skip verification */,
- true);
- }
-
if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
/*
@@ -2782,6 +2802,11 @@ xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
RING_SEMA_WAIT_POLL(engine->mmio_base),
1);
}
+ /* Wa_16018031267, Wa_16018063123 */
+ if (NEEDS_FASTCOLOR_BLT_WABB(engine))
+ wa_masked_field_set(wal, ECOSKPD(engine->mmio_base),
+ XEHP_BLITTER_SCHEDULING_MODE_MASK,
+ XEHP_BLITTER_ROUND_ROBIN_MODE);
}
static void
@@ -2811,7 +2836,7 @@ add_render_compute_tuning_settings(struct intel_gt *gt,
{
struct drm_i915_private *i915 = gt->i915;
- if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915))
+ if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
/*
@@ -2827,6 +2852,28 @@ add_render_compute_tuning_settings(struct intel_gt *gt,
wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
}
+static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+ struct intel_gt *gt = engine->gt;
+
+ if (!IS_DG2(gt->i915))
+ return;
+
+ /*
+ * Wa_14019159160: This workaround, along with others, leads to
+ * significant challenges in utilizing load balancing among the
+ * CCS slices. Consequently, an architectural decision has been
+ * made to completely disable automatic CCS load balancing.
+ */
+ wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
+
+ /*
+ * After having disabled automatic load balancing we need to
+ * assign all slices to a single CCS. We will call it CCS mode 1
+ */
+ intel_gt_apply_ccs_mode(gt);
+}
+
/*
* The workarounds in this function apply to shared registers in
* the general render reset domain that aren't tied to a
@@ -2864,7 +2911,8 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
}
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
- IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER))
+ IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) ||
+ IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74)))
/* Wa_14017856879 */
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH);
@@ -2915,6 +2963,9 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
* Wa_22015475538:dg2
*/
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
+
+ /* Wa_18028616096 */
+ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3);
}
if (IS_DG2_G11(i915)) {
@@ -2943,11 +2994,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li
true);
}
- if (IS_DG2_G10(i915) || IS_DG2_G12(i915)) {
- /* Wa_18028616096 */
- wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3);
- }
-
if (IS_XEHPSDV(i915)) {
/* Wa_1409954639 */
wa_mcr_masked_en(wal,
@@ -2978,8 +3024,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
* to a single RCS/CCS engine's workaround list since
* they're reset as part of the general render domain reset.
*/
- if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
+ if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
general_render_compute_wa_init(engine, wal);
+ ccs_engine_wa_mode(engine, wal);
+ }
if (engine->class == COMPUTE_CLASS)
ccs_engine_wa_init(engine, wal);
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
index 86cecf7a11..5ffa5e30f4 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c
@@ -21,20 +21,22 @@ static int cmp_u32(const void *A, const void *B)
return *a - *b;
}
-static void perf_begin(struct intel_gt *gt)
+static intel_wakeref_t perf_begin(struct intel_gt *gt)
{
- intel_gt_pm_get(gt);
+ intel_wakeref_t wakeref = intel_gt_pm_get(gt);
/* Boost gpufreq to max [waitboost] and keep it fixed */
atomic_inc(&gt->rps.num_waiters);
queue_work(gt->i915->unordered_wq, &gt->rps.work);
flush_work(&gt->rps.work);
+
+ return wakeref;
}
-static int perf_end(struct intel_gt *gt)
+static int perf_end(struct intel_gt *gt, intel_wakeref_t wakeref)
{
atomic_dec(&gt->rps.num_waiters);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
return igt_flush_test(gt->i915);
}
@@ -133,12 +135,13 @@ static int perf_mi_bb_start(void *arg)
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
int err = 0;
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;
- perf_begin(gt);
+ wakeref = perf_begin(gt);
for_each_engine(engine, gt, id) {
struct intel_context *ce = engine->kernel_context;
struct i915_vma *batch;
@@ -207,7 +210,7 @@ out:
pr_info("%s: MI_BB_START cycles: %u\n",
engine->name, trifilter(cycles));
}
- if (perf_end(gt))
+ if (perf_end(gt, wakeref))
err = -EIO;
return err;
@@ -260,12 +263,13 @@ static int perf_mi_noop(void *arg)
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
int err = 0;
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;
- perf_begin(gt);
+ wakeref = perf_begin(gt);
for_each_engine(engine, gt, id) {
struct intel_context *ce = engine->kernel_context;
struct i915_vma *base, *nop;
@@ -364,7 +368,7 @@ out:
pr_info("%s: 16K MI_NOOP cycles: %u\n",
engine->name, trifilter(cycles));
}
- if (perf_end(gt))
+ if (perf_end(gt, wakeref))
err = -EIO;
return err;
diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
index 273d440a53..bc441ce7b3 100644
--- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
+++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c
@@ -84,7 +84,7 @@ static struct pulse *pulse_create(void)
static void pulse_unlock_wait(struct pulse *p)
{
- i915_active_unlock_wait(&p->active);
+ wait_var_event_timeout(&p->active, i915_active_is_idle(&p->active), HZ);
}
static int __live_idle_pulse(struct intel_engine_cs *engine,
diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
index 0971241707..33351deeea 100644
--- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c
@@ -81,6 +81,7 @@ static int live_gt_clocks(void *arg)
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
int err = 0;
if (!gt->clock_frequency) { /* unknown */
@@ -91,7 +92,7 @@ static int live_gt_clocks(void *arg)
if (GRAPHICS_VER(gt->i915) < 4) /* Any CS_TIMESTAMP? */
return 0;
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
for_each_engine(engine, gt, id) {
@@ -128,7 +129,7 @@ static int live_gt_clocks(void *arg)
}
intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 5f826b6dcf..e17b8777d2 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1555,7 +1555,7 @@ static int live_lrc_isolation(void *arg)
return err;
}
-static int indirect_ctx_submit_req(struct intel_context *ce)
+static int wabb_ctx_submit_req(struct intel_context *ce)
{
struct i915_request *rq;
int err = 0;
@@ -1579,7 +1579,8 @@ static int indirect_ctx_submit_req(struct intel_context *ce)
#define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
static u32 *
-emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+emit_wabb_ctx_canary(const struct intel_context *ce,
+ u32 *cs, bool per_ctx)
{
*cs++ = MI_STORE_REGISTER_MEM_GEN8 |
MI_SRM_LRM_GLOBAL_GTT |
@@ -1587,26 +1588,43 @@ emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
*cs++ = i915_mmio_reg_offset(RING_START(0));
*cs++ = i915_ggtt_offset(ce->state) +
context_wa_bb_offset(ce) +
- CTX_BB_CANARY_OFFSET;
+ CTX_BB_CANARY_OFFSET +
+ (per_ctx ? PAGE_SIZE : 0);
*cs++ = 0;
return cs;
}
+static u32 *
+emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+{
+ return emit_wabb_ctx_canary(ce, cs, false);
+}
+
+static u32 *
+emit_per_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
+{
+ return emit_wabb_ctx_canary(ce, cs, true);
+}
+
static void
-indirect_ctx_bb_setup(struct intel_context *ce)
+wabb_ctx_setup(struct intel_context *ce, bool per_ctx)
{
- u32 *cs = context_indirect_bb(ce);
+ u32 *cs = context_wabb(ce, per_ctx);
cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
- setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
+ if (per_ctx)
+ setup_per_ctx_bb(ce, ce->engine, emit_per_ctx_bb_canary);
+ else
+ setup_indirect_ctx_bb(ce, ce->engine, emit_indirect_ctx_bb_canary);
}
-static bool check_ring_start(struct intel_context *ce)
+static bool check_ring_start(struct intel_context *ce, bool per_ctx)
{
const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
- LRC_STATE_OFFSET + context_wa_bb_offset(ce);
+ LRC_STATE_OFFSET + context_wa_bb_offset(ce) +
+ (per_ctx ? PAGE_SIZE : 0);
if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
return true;
@@ -1618,21 +1636,21 @@ static bool check_ring_start(struct intel_context *ce)
return false;
}
-static int indirect_ctx_bb_check(struct intel_context *ce)
+static int wabb_ctx_check(struct intel_context *ce, bool per_ctx)
{
int err;
- err = indirect_ctx_submit_req(ce);
+ err = wabb_ctx_submit_req(ce);
if (err)
return err;
- if (!check_ring_start(ce))
+ if (!check_ring_start(ce, per_ctx))
return -EINVAL;
return 0;
}
-static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
+static int __lrc_wabb_ctx(struct intel_engine_cs *engine, bool per_ctx)
{
struct intel_context *a, *b;
int err;
@@ -1667,14 +1685,14 @@ static int __live_lrc_indirect_ctx_bb(struct intel_engine_cs *engine)
* As ring start is restored apriori of starting the indirect ctx bb and
* as it will be different for each context, it fits to this purpose.
*/
- indirect_ctx_bb_setup(a);
- indirect_ctx_bb_setup(b);
+ wabb_ctx_setup(a, per_ctx);
+ wabb_ctx_setup(b, per_ctx);
- err = indirect_ctx_bb_check(a);
+ err = wabb_ctx_check(a, per_ctx);
if (err)
goto unpin_b;
- err = indirect_ctx_bb_check(b);
+ err = wabb_ctx_check(b, per_ctx);
unpin_b:
intel_context_unpin(b);
@@ -1688,7 +1706,7 @@ put_a:
return err;
}
-static int live_lrc_indirect_ctx_bb(void *arg)
+static int lrc_wabb_ctx(void *arg, bool per_ctx)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
@@ -1697,7 +1715,7 @@ static int live_lrc_indirect_ctx_bb(void *arg)
for_each_engine(engine, gt, id) {
intel_engine_pm_get(engine);
- err = __live_lrc_indirect_ctx_bb(engine);
+ err = __lrc_wabb_ctx(engine, per_ctx);
intel_engine_pm_put(engine);
if (igt_flush_test(gt->i915))
@@ -1710,6 +1728,16 @@ static int live_lrc_indirect_ctx_bb(void *arg)
return err;
}
+static int live_lrc_indirect_ctx_bb(void *arg)
+{
+ return lrc_wabb_ctx(arg, false);
+}
+
+static int live_lrc_per_ctx_bb(void *arg)
+{
+ return lrc_wabb_ctx(arg, true);
+}
+
static void garbage_reset(struct intel_engine_cs *engine,
struct i915_request *rq)
{
@@ -1947,6 +1975,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915)
SUBTEST(live_lrc_garbage),
SUBTEST(live_pphwsp_runtime),
SUBTEST(live_lrc_indirect_ctx_bb),
+ SUBTEST(live_lrc_per_ctx_bb),
};
if (!HAS_LOGICAL_RING_CONTEXTS(i915))
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 79aa6ac66a..f40de408cd 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -261,11 +261,12 @@ static int igt_atomic_reset(void *arg)
{
struct intel_gt *gt = arg;
const typeof(*igt_atomic_phases) *p;
+ intel_wakeref_t wakeref;
int err = 0;
/* Check that the resets are usable from atomic context */
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
igt_global_reset_lock(gt);
/* Flush any requests before we get started and check basics */
@@ -296,7 +297,7 @@ static int igt_atomic_reset(void *arg)
unlock:
igt_global_reset_unlock(gt);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
return err;
}
@@ -307,6 +308,7 @@ static int igt_atomic_engine_reset(void *arg)
const typeof(*igt_atomic_phases) *p;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
int err = 0;
/* Check that the resets are usable from atomic context */
@@ -317,7 +319,7 @@ static int igt_atomic_engine_reset(void *arg)
if (intel_uc_uses_guc_submission(&gt->uc))
return 0;
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
igt_global_reset_lock(gt);
/* Flush any requests before we get started and check basics */
@@ -365,7 +367,7 @@ static int igt_atomic_engine_reset(void *arg)
out_unlock:
igt_global_reset_unlock(gt);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
return err;
}
diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c
index fb30f733b0..dcef8d4989 100644
--- a/drivers/gpu/drm/i915/gt/selftest_rps.c
+++ b/drivers/gpu/drm/i915/gt/selftest_rps.c
@@ -224,6 +224,7 @@ int live_rps_clock_interval(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct igt_spinner spin;
+ intel_wakeref_t wakeref;
int err = 0;
if (!intel_rps_is_enabled(rps) || GRAPHICS_VER(gt->i915) < 6)
@@ -236,7 +237,7 @@ int live_rps_clock_interval(void *arg)
saved_work = rps->work.func;
rps->work.func = dummy_rps_work;
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
intel_rps_disable(&gt->rps);
intel_gt_check_clock_frequency(gt);
@@ -355,7 +356,7 @@ int live_rps_clock_interval(void *arg)
}
intel_rps_enable(&gt->rps);
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
igt_spinner_fini(&spin);
@@ -376,6 +377,7 @@ int live_rps_control(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct igt_spinner spin;
+ intel_wakeref_t wakeref;
int err = 0;
/*
@@ -398,7 +400,7 @@ int live_rps_control(void *arg)
saved_work = rps->work.func;
rps->work.func = dummy_rps_work;
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
for_each_engine(engine, gt, id) {
struct i915_request *rq;
ktime_t min_dt, max_dt;
@@ -488,7 +490,7 @@ int live_rps_control(void *arg)
break;
}
}
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
igt_spinner_fini(&spin);
@@ -1023,6 +1025,7 @@ int live_rps_interrupt(void *arg)
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct igt_spinner spin;
+ intel_wakeref_t wakeref;
u32 pm_events;
int err = 0;
@@ -1033,9 +1036,9 @@ int live_rps_interrupt(void *arg)
if (!intel_rps_has_interrupts(rps) || GRAPHICS_VER(gt->i915) < 6)
return 0;
- intel_gt_pm_get(gt);
- pm_events = rps->pm_events;
- intel_gt_pm_put(gt);
+ pm_events = 0;
+ with_intel_gt_pm(gt, wakeref)
+ pm_events = rps->pm_events;
if (!pm_events) {
pr_err("No RPS PM events registered, but RPS is enabled?\n");
return -ENODEV;
diff --git a/drivers/gpu/drm/i915/gt/selftest_slpc.c b/drivers/gpu/drm/i915/gt/selftest_slpc.c
index 952c8d52d6..302d054029 100644
--- a/drivers/gpu/drm/i915/gt/selftest_slpc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_slpc.c
@@ -266,6 +266,7 @@ static int run_test(struct intel_gt *gt, int test_type)
struct intel_rps *rps = &gt->rps;
struct intel_engine_cs *engine;
enum intel_engine_id id;
+ intel_wakeref_t wakeref;
struct igt_spinner spin;
u32 slpc_min_freq, slpc_max_freq;
int err = 0;
@@ -311,7 +312,7 @@ static int run_test(struct intel_gt *gt, int test_type)
}
intel_gt_pm_wait_for_idle(gt);
- intel_gt_pm_get(gt);
+ wakeref = intel_gt_pm_get(gt);
for_each_engine(engine, gt, id) {
struct i915_request *rq;
u32 max_act_freq;
@@ -397,7 +398,7 @@ static int run_test(struct intel_gt *gt, int test_type)
if (igt_flush_test(gt->i915))
err = -EIO;
- intel_gt_pm_put(gt);
+ intel_gt_pm_put(gt, wakeref);
igt_spinner_fini(&spin);
intel_gt_pm_wait_for_idle(gt);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
index 5f138de3c1..40817ebcca 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_proxy.c
@@ -322,6 +322,7 @@ static int i915_gsc_proxy_component_bind(struct device *i915_kdev,
gsc->proxy.component = data;
gsc->proxy.component->mei_dev = mei_kdev;
mutex_unlock(&gsc->proxy.mutex);
+ gt_dbg(gt, "GSC proxy mei component bound\n");
return 0;
}
@@ -342,6 +343,7 @@ static void i915_gsc_proxy_component_unbind(struct device *i915_kdev,
with_intel_runtime_pm(&i915->runtime_pm, wakeref)
intel_uncore_rmw(gt->uncore, HECI_H_CSR(MTL_GSC_HECI2_BASE),
HECI_H_CSR_IE | HECI_H_CSR_RST, 0);
+ gt_dbg(gt, "GSC proxy mei component unbound\n");
}
static const struct component_ops i915_gsc_proxy_component_ops = {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
index 3f3df1166b..2b450c43bb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c
@@ -330,7 +330,7 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc)
static u32 guc_ctl_devid(struct intel_guc *guc)
{
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct drm_i915_private *i915 = guc_to_i915(guc);
return (INTEL_DEVID(i915) << 16) | INTEL_REVID(i915);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 2b6dfe62c8..813cc888e6 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -105,61 +105,67 @@ struct intel_guc {
*/
struct {
/**
- * @lock: protects everything in submission_state,
- * ce->guc_id.id, and ce->guc_id.ref when transitioning in and
- * out of zero
+ * @submission_state.lock: protects everything in
+ * submission_state, ce->guc_id.id, and ce->guc_id.ref
+ * when transitioning in and out of zero
*/
spinlock_t lock;
/**
- * @guc_ids: used to allocate new guc_ids, single-lrc
+ * @submission_state.guc_ids: used to allocate new
+ * guc_ids, single-lrc
*/
struct ida guc_ids;
/**
- * @num_guc_ids: Number of guc_ids, selftest feature to be able
- * to reduce this number while testing.
+ * @submission_state.num_guc_ids: Number of guc_ids, selftest
+ * feature to be able to reduce this number while testing.
*/
int num_guc_ids;
/**
- * @guc_ids_bitmap: used to allocate new guc_ids, multi-lrc
+ * @submission_state.guc_ids_bitmap: used to allocate
+ * new guc_ids, multi-lrc
*/
unsigned long *guc_ids_bitmap;
/**
- * @guc_id_list: list of intel_context with valid guc_ids but no
- * refs
+ * @submission_state.guc_id_list: list of intel_context
+ * with valid guc_ids but no refs
*/
struct list_head guc_id_list;
/**
- * @guc_ids_in_use: Number single-lrc guc_ids in use
+ * @submission_state.guc_ids_in_use: Number single-lrc
+ * guc_ids in use
*/
unsigned int guc_ids_in_use;
/**
- * @destroyed_contexts: list of contexts waiting to be destroyed
- * (deregistered with the GuC)
+ * @submission_state.destroyed_contexts: list of contexts
+ * waiting to be destroyed (deregistered with the GuC)
*/
struct list_head destroyed_contexts;
/**
- * @destroyed_worker: worker to deregister contexts, need as we
- * need to take a GT PM reference and can't from destroy
- * function as it might be in an atomic context (no sleeping)
+ * @submission_state.destroyed_worker: worker to deregister
+ * contexts, need as we need to take a GT PM reference and
+ * can't from destroy function as it might be in an atomic
+ * context (no sleeping)
*/
struct work_struct destroyed_worker;
/**
- * @reset_fail_worker: worker to trigger a GT reset after an
- * engine reset fails
+ * @submission_state.reset_fail_worker: worker to trigger
+ * a GT reset after an engine reset fails
*/
struct work_struct reset_fail_worker;
/**
- * @reset_fail_mask: mask of engines that failed to reset
+ * @submission_state.reset_fail_mask: mask of engines that
+ * failed to reset
*/
intel_engine_mask_t reset_fail_mask;
/**
- * @sched_disable_delay_ms: schedule disable delay, in ms, for
- * contexts
+ * @submission_state.sched_disable_delay_ms: schedule
+ * disable delay, in ms, for contexts
*/
unsigned int sched_disable_delay_ms;
/**
- * @sched_disable_gucid_threshold: threshold of min remaining available
- * guc_ids before we start bypassing the schedule disable delay
+ * @submission_state.sched_disable_gucid_threshold:
+ * threshold of min remaining available guc_ids before
+ * we start bypassing the schedule disable delay
*/
unsigned int sched_disable_gucid_threshold;
} submission_state;
@@ -243,37 +249,40 @@ struct intel_guc {
*/
struct {
/**
- * @lock: Lock protecting the below fields and the engine stats.
+ * @timestamp.lock: Lock protecting the below fields and
+ * the engine stats.
*/
spinlock_t lock;
/**
- * @gt_stamp: 64 bit extended value of the GT timestamp.
+ * @timestamp.gt_stamp: 64-bit extended value of the GT
+ * timestamp.
*/
u64 gt_stamp;
/**
- * @ping_delay: Period for polling the GT timestamp for
- * overflow.
+ * @timestamp.ping_delay: Period for polling the GT
+ * timestamp for overflow.
*/
unsigned long ping_delay;
/**
- * @work: Periodic work to adjust GT timestamp, engine and
- * context usage for overflows.
+ * @timestamp.work: Periodic work to adjust GT timestamp,
+ * engine and context usage for overflows.
*/
struct delayed_work work;
/**
- * @shift: Right shift value for the gpm timestamp
+ * @timestamp.shift: Right shift value for the gpm timestamp
*/
u32 shift;
/**
- * @last_stat_jiffies: jiffies at last actual stats collection time
- * We use this timestamp to ensure we don't oversample the
- * stats because runtime power management events can trigger
- * stats collection at much higher rates than required.
+ * @timestamp.last_stat_jiffies: jiffies at last actual
+ * stats collection time. We use this timestamp to ensure
+ * we don't oversample the stats because runtime power
+ * management events can trigger stats collection at much
+ * higher rates than required.
*/
unsigned long last_stat_jiffies;
} timestamp;
@@ -297,6 +306,10 @@ struct intel_guc {
* @number_guc_id_stolen: The number of guc_ids that have been stolen
*/
int number_guc_id_stolen;
+ /**
+ * @fast_response_selftest: Backdoor to CT handler for fast response selftest
+ */
+ u32 fast_response_selftest;
#endif
};
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
index a4da0208c8..a1cd40d805 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c
@@ -355,7 +355,7 @@ guc_capture_alloc_steered_lists(struct intel_guc *guc,
static const struct __guc_mmio_reg_descr_group *
guc_capture_get_device_reglist(struct intel_guc *guc)
{
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct drm_i915_private *i915 = guc_to_i915(guc);
const struct __guc_mmio_reg_descr_group *lists;
if (GRAPHICS_VER(i915) >= 12)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
index 89e314b375..0d5197c082 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c
@@ -265,7 +265,7 @@ int intel_guc_ct_init(struct intel_guc_ct *ct)
u32 *cmds;
int err;
- err = i915_inject_probe_error(guc_to_gt(guc)->i915, -ENXIO);
+ err = i915_inject_probe_error(guc_to_i915(guc), -ENXIO);
if (err)
return err;
@@ -1076,6 +1076,15 @@ static int ct_handle_response(struct intel_guc_ct *ct, struct ct_incoming_msg *r
found = true;
break;
}
+
+#ifdef CONFIG_DRM_I915_SELFTEST
+ if (!found && ct_to_guc(ct)->fast_response_selftest) {
+ CT_DEBUG(ct, "Assuming unsolicited response due to FAST_REQUEST selftest\n");
+ ct_to_guc(ct)->fast_response_selftest++;
+ found = true;
+ }
+#endif
+
if (!found) {
CT_ERROR(ct, "Unsolicited response message: len %u, data %#x (fence %u, last %u)\n",
len, hxg[0], fence, ct->requests.last_fence);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index 55bc8b55fb..bf16351c93 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -520,7 +520,7 @@ void intel_guc_log_init_early(struct intel_guc_log *log)
static int guc_log_relay_create(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct drm_i915_private *i915 = guc_to_i915(guc);
struct rchan *guc_log_relay_chan;
size_t n_subbufs, subbuf_size;
int ret;
@@ -573,7 +573,7 @@ static void guc_log_relay_destroy(struct intel_guc_log *log)
static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct drm_i915_private *i915 = guc_to_i915(guc);
intel_wakeref_t wakeref;
_guc_log_copy_debuglogs_for_relay(log);
@@ -589,7 +589,7 @@ static void guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
static u32 __get_default_log_level(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct drm_i915_private *i915 = guc_to_i915(guc);
/* A negative value means "use platform/config default" */
if (i915->params.guc_log_level < 0) {
@@ -664,7 +664,7 @@ void intel_guc_log_destroy(struct intel_guc_log *log)
int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
{
struct intel_guc *guc = log_to_guc(log);
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct drm_i915_private *i915 = guc_to_i915(guc);
intel_wakeref_t wakeref;
int ret = 0;
@@ -796,7 +796,7 @@ void intel_guc_log_relay_flush(struct intel_guc_log *log)
static void guc_log_relay_stop(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct drm_i915_private *i915 = guc_to_i915(guc);
if (!log->relay.started)
return;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
index 1adec6de22..9df7927304 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_rc.c
@@ -14,7 +14,7 @@ static bool __guc_rc_supported(struct intel_guc *guc)
{
/* GuC RC is unavailable for pre-Gen12 */
return guc->submission_supported &&
- GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12;
+ GRAPHICS_VER(guc_to_i915(guc)) >= 12;
}
static bool __guc_rc_selected(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index 2dfb07cc4b..3e681ab6fb 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -34,7 +34,7 @@ static bool __detect_slpc_supported(struct intel_guc *guc)
{
/* GuC SLPC is unavailable for pre-Gen12 */
return guc->submission_supported &&
- GRAPHICS_VER(guc_to_gt(guc)->i915) >= 12;
+ GRAPHICS_VER(guc_to_i915(guc)) >= 12;
}
static bool __guc_slpc_selected(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 17df71117c..a259f1118c 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -1107,7 +1107,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc)
if (deregister)
guc_signal_context_fence(ce);
if (destroyed) {
- intel_gt_pm_put_async(guc_to_gt(guc));
+ intel_gt_pm_put_async_untracked(guc_to_gt(guc));
release_guc_id(guc, ce);
__guc_context_destroy(ce);
}
@@ -1303,6 +1303,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
unsigned long flags;
u32 reset_count;
bool in_reset;
+ intel_wakeref_t wakeref;
spin_lock_irqsave(&guc->timestamp.lock, flags);
@@ -1325,7 +1326,8 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
* start_gt_clk is derived from GuC state. To get a consistent
* view of activity, we query the GuC state only if gt is awake.
*/
- if (!in_reset && intel_gt_pm_get_if_awake(gt)) {
+ wakeref = in_reset ? 0 : intel_gt_pm_get_if_awake(gt);
+ if (wakeref) {
stats_saved = *stats;
gt_stamp_saved = guc->timestamp.gt_stamp;
/*
@@ -1334,7 +1336,7 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now)
*/
guc_update_engine_gt_clks(engine);
guc_update_pm_timestamp(guc, now);
- intel_gt_pm_put_async(gt);
+ intel_gt_pm_put_async(gt, wakeref);
if (i915_reset_count(gpu_error) != reset_count) {
*stats = stats_saved;
guc->timestamp.gt_stamp = gt_stamp_saved;
@@ -3385,9 +3387,9 @@ static void destroyed_worker_func(struct work_struct *w)
struct intel_guc *guc = container_of(w, struct intel_guc,
submission_state.destroyed_worker);
struct intel_gt *gt = guc_to_gt(guc);
- int tmp;
+ intel_wakeref_t wakeref;
- with_intel_gt_pm(gt, tmp)
+ with_intel_gt_pm(gt, wakeref)
deregister_destroyed_contexts(guc);
}
@@ -4624,12 +4626,12 @@ static bool __guc_submission_supported(struct intel_guc *guc)
{
/* GuC submission is unavailable for pre-Gen11 */
return intel_guc_is_supported(guc) &&
- GRAPHICS_VER(guc_to_gt(guc)->i915) >= 11;
+ GRAPHICS_VER(guc_to_i915(guc)) >= 11;
}
static bool __guc_submission_selected(struct intel_guc *guc)
{
- struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
+ struct drm_i915_private *i915 = guc_to_i915(guc);
if (!intel_guc_submission_is_supported(guc))
return false;
@@ -4894,7 +4896,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
intel_context_put(ce);
} else if (context_destroyed(ce)) {
/* Context has been destroyed */
- intel_gt_pm_put_async(guc_to_gt(guc));
+ intel_gt_pm_put_async_untracked(guc_to_gt(guc));
release_guc_id(guc, ce);
__guc_context_destroy(ce);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 27f6561dd7..3872d309ed 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -106,11 +106,6 @@ static void __confirm_options(struct intel_uc *uc)
gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
i915->params.enable_guc, "GuC is not supported!");
- if (i915->params.enable_guc & ENABLE_GUC_LOAD_HUC &&
- !intel_uc_supports_huc(uc))
- gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
- i915->params.enable_guc, "HuC is not supported!");
-
if (i915->params.enable_guc & ENABLE_GUC_SUBMISSION &&
!intel_uc_supports_guc_submission(uc))
gt_info(gt, "Incompatible option enable_guc=%d - %s\n",
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index 362639162e..756093eaf2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -1343,16 +1343,13 @@ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len)
for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) {
u32 len = min_t(u32, size, PAGE_SIZE - offset);
- void *vaddr;
if (idx > 0) {
idx--;
continue;
}
- vaddr = kmap_atomic(page);
- memcpy(dst, vaddr + offset, len);
- kunmap_atomic(vaddr);
+ memcpy_from_page(dst, page, offset, len);
offset = 0;
dst += len;
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
index bfb7214356..c900aac85a 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c
@@ -286,11 +286,126 @@ err_wakeref:
return ret;
}
+/*
+ * Send a context schedule H2G message with an invalid context id.
+ * This should generate a GUC_RESULT_INVALID_CONTEXT response.
+ */
+static int bad_h2g(struct intel_guc *guc)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_SCHED_CONTEXT,
+ 0x12345678,
+ };
+
+ return intel_guc_send_nb(guc, action, ARRAY_SIZE(action), 0);
+}
+
+/*
+ * Set a spinner running to make sure the system is alive and active,
+ * then send a bad but asynchronous H2G command and wait to see if an
+ * error response is returned. If no response is received or if the
+ * spinner dies then the test will fail.
+ */
+#define FAST_RESPONSE_TIMEOUT_MS 1000
+static int intel_guc_fast_request(void *arg)
+{
+ struct intel_gt *gt = arg;
+ struct intel_context *ce;
+ struct igt_spinner spin;
+ struct i915_request *rq;
+ intel_wakeref_t wakeref;
+ struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt);
+ bool spinning = false;
+ int ret = 0;
+
+ if (!engine)
+ return 0;
+
+ wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+
+ ce = intel_context_create(engine);
+ if (IS_ERR(ce)) {
+ ret = PTR_ERR(ce);
+ gt_err(gt, "Failed to create spinner request: %pe\n", ce);
+ goto err_pm;
+ }
+
+ ret = igt_spinner_init(&spin, engine->gt);
+ if (ret) {
+ gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret));
+ goto err_pm;
+ }
+ spinning = true;
+
+ rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+ intel_context_put(ce);
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ gt_err(gt, "Failed to create spinner request: %pe\n", rq);
+ goto err_spin;
+ }
+
+ ret = request_add_spin(rq, &spin);
+ if (ret) {
+ gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret));
+ goto err_rq;
+ }
+
+ gt->uc.guc.fast_response_selftest = 1;
+
+ ret = bad_h2g(&gt->uc.guc);
+ if (ret) {
+ gt_err(gt, "Failed to send H2G: %pe\n", ERR_PTR(ret));
+ goto err_rq;
+ }
+
+ ret = wait_for(gt->uc.guc.fast_response_selftest != 1 || i915_request_completed(rq),
+ FAST_RESPONSE_TIMEOUT_MS);
+ if (ret) {
+ gt_err(gt, "Request wait failed: %pe\n", ERR_PTR(ret));
+ goto err_rq;
+ }
+
+ if (i915_request_completed(rq)) {
+ gt_err(gt, "Spinner died waiting for fast request error!\n");
+ ret = -EIO;
+ goto err_rq;
+ }
+
+ if (gt->uc.guc.fast_response_selftest != 2) {
+ gt_err(gt, "Unexpected fast response count: %d\n",
+ gt->uc.guc.fast_response_selftest);
+ goto err_rq;
+ }
+
+ igt_spinner_end(&spin);
+ spinning = false;
+
+ ret = intel_selftest_wait_for_rq(rq);
+ if (ret) {
+ gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret));
+ goto err_rq;
+ }
+
+err_rq:
+ i915_request_put(rq);
+
+err_spin:
+ if (spinning)
+ igt_spinner_end(&spin);
+ igt_spinner_fini(&spin);
+
+err_pm:
+ intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+ return ret;
+}
+
int intel_guc_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(intel_guc_scrub_ctbs),
SUBTEST(intel_guc_steal_guc_ids),
+ SUBTEST(intel_guc_fast_request),
};
struct intel_gt *gt = to_gt(i915);
diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
index 34b5d952e2..26fdc392fc 100644
--- a/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc_hangcheck.c
@@ -74,7 +74,7 @@ static int intel_hang_guc(void *arg)
goto err;
}
- rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+ rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
intel_context_put(ce);
if (IS_ERR(rq)) {
ret = PTR_ERR(rq);