diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:39:57 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:39:57 +0000 |
commit | dc50eab76b709d68175a358d6e23a5a3890764d3 (patch) | |
tree | c754d0390db060af0213ff994f0ac310e4cfd6e9 /drivers/gpu/drm/i915/gt | |
parent | Adding debian version 6.6.15-2. (diff) | |
download | linux-dc50eab76b709d68175a358d6e23a5a3890764d3.tar.xz linux-dc50eab76b709d68175a358d6e23a5a3890764d3.zip |
Merging upstream version 6.7.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/gpu/drm/i915/gt')
47 files changed, 1276 insertions, 610 deletions
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c index 7ad36198aa..86a04afff6 100644 --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -4,9 +4,9 @@ */ #include "gen8_engine_cs.h" -#include "i915_drv.h" #include "intel_engine_regs.h" #include "intel_gpu_commands.h" +#include "intel_gt.h" #include "intel_lrc.h" #include "intel_ring.h" @@ -226,8 +226,8 @@ u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs) static int mtl_dummy_pipe_control(struct i915_request *rq) { /* Wa_14016712196 */ - if (IS_MTL_GRAPHICS_STEP(rq->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(rq->i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 71)) || + IS_DG2(rq->i915)) { u32 *cs; /* dummy PIPE_CONTROL + depth flush */ @@ -278,7 +278,8 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) * deals with Protected Memory which is not needed for * AUX CCS invalidation and lead to unwanted side effects. */ - if (mode & EMIT_FLUSH) + if ((mode & EMIT_FLUSH) && + GRAPHICS_VER_FULL(rq->i915) < IP_VER(12, 70)) bit_group_1 |= PIPE_CONTROL_FLUSH_L3; bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH; @@ -808,18 +809,20 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) { struct drm_i915_private *i915 = rq->i915; + struct intel_gt *gt = rq->engine->gt; u32 flags = (PIPE_CONTROL_CS_STALL | PIPE_CONTROL_TLB_INVALIDATE | PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_DC_FLUSH_ENABLE | PIPE_CONTROL_FLUSH_ENABLE); + if (GRAPHICS_VER_FULL(rq->i915) < IP_VER(12, 70)) + flags |= PIPE_CONTROL_FLUSH_L3; + /* Wa_14016712196 */ - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) /* dummy PIPE_CONTROL + depth flush */ cs = gen12_emit_pipe_control(cs, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH, 0); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index c8568e5d11..9895e18df0 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -242,9 +242,9 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); len = gen8_pd_range(start, end, lvl--, &idx); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", - __func__, vm, lvl + 1, start, end, - idx, len, atomic_read(px_used(pd))); + GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", + __func__, vm, lvl + 1, start, end, + idx, len, atomic_read(px_used(pd))); GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); do { @@ -252,8 +252,8 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) && gen8_pd_contains(start, end, lvl)) { - DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", - __func__, vm, lvl + 1, idx, start, end); + GTT_TRACE("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", + __func__, vm, lvl + 1, idx, start, end); clear_pd_entry(pd, idx, scratch); __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); start += (u64)I915_PDES << gen8_pd_shift(lvl); @@ -270,10 +270,10 @@ static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, u64 *vaddr; count = gen8_pt_count(start, end); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", - __func__, vm, lvl, start, end, - gen8_pd_index(start, 0), count, - atomic_read(&pt->used)); + GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n", + __func__, vm, lvl, start, end, + gen8_pd_index(start, 0), count, + atomic_read(&pt->used)); GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); num_ptes = count; @@ -325,9 +325,9 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT); len = gen8_pd_range(*start, end, lvl--, &idx); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", - __func__, vm, lvl + 1, *start, end, - idx, len, atomic_read(px_used(pd))); + GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n", + __func__, vm, lvl + 1, *start, end, + idx, len, atomic_read(px_used(pd))); GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); spin_lock(&pd->lock); @@ -338,8 +338,8 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, if (!pt) { spin_unlock(&pd->lock); - DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", - __func__, vm, lvl + 1, idx); + GTT_TRACE("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", + __func__, vm, lvl + 1, idx); pt = stash->pt[!!lvl]; __i915_gem_object_pin_pages(pt->base); @@ -369,10 +369,10 @@ static void __gen8_ppgtt_alloc(struct i915_address_space * const vm, } else { unsigned int count = gen8_pt_count(*start, end); - DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", - __func__, vm, lvl, *start, end, - gen8_pd_index(*start, 0), count, - atomic_read(&pt->used)); + GTT_TRACE("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } inserting pte\n", + __func__, vm, lvl, *start, end, + gen8_pd_index(*start, 0), count, + atomic_read(&pt->used)); atomic_add(count, &pt->used); /* All other pdes may be simultaneously removed */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index b58c30ac8e..40269e4c1e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -170,6 +170,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define I915_GEM_HWS_SEQNO 0x40 #define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) #define I915_GEM_HWS_MIGRATE (0x42 * sizeof(u32)) +#define I915_GEM_HWS_GGTT_BIND 0x46 +#define I915_GEM_HWS_GGTT_BIND_ADDR (I915_GEM_HWS_GGTT_BIND * sizeof(u32)) #define I915_GEM_HWS_PXP 0x60 #define I915_GEM_HWS_PXP_ADDR (I915_GEM_HWS_PXP * sizeof(u32)) #define I915_GEM_HWS_GSC 0x62 diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index e85d70a621..4a11219e56 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -316,10 +316,9 @@ u32 intel_engine_context_size(struct intel_gt *gt, u8 class) * out in the wash. */ cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1; - drm_dbg(>->i915->drm, - "graphics_ver = %d CXT_SIZE = %d bytes [0x%08x]\n", - GRAPHICS_VER(gt->i915), cxt_size * 64, - cxt_size - 1); + gt_dbg(gt, "graphics_ver = %d CXT_SIZE = %d bytes [0x%08x]\n", + GRAPHICS_VER(gt->i915), cxt_size * 64, + cxt_size - 1); return round_up(cxt_size * 64, PAGE_SIZE); case 3: case 2: @@ -788,7 +787,7 @@ static void engine_mask_apply_media_fuses(struct intel_gt *gt) if (!(BIT(i) & vdbox_mask)) { gt->info.engine_mask &= ~BIT(_VCS(i)); - drm_dbg(&i915->drm, "vcs%u fused off\n", i); + gt_dbg(gt, "vcs%u fused off\n", i); continue; } @@ -796,8 +795,7 @@ static void engine_mask_apply_media_fuses(struct intel_gt *gt) gt->info.vdbox_sfc_access |= BIT(i); logical_vdbox++; } - drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n", - vdbox_mask, VDBOX_MASK(gt)); + gt_dbg(gt, "vdbox enable: %04x, instances: %04lx\n", vdbox_mask, VDBOX_MASK(gt)); GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt)); for (i = 0; i < I915_MAX_VECS; i++) { @@ -808,11 +806,10 @@ static void engine_mask_apply_media_fuses(struct intel_gt *gt) if (!(BIT(i) & vebox_mask)) { gt->info.engine_mask &= ~BIT(_VECS(i)); - drm_dbg(&i915->drm, "vecs%u fused off\n", i); + gt_dbg(gt, "vecs%u fused off\n", i); } } - drm_dbg(&i915->drm, "vebox enable: %04x, instances: %04lx\n", - vebox_mask, VEBOX_MASK(gt)); + gt_dbg(gt, "vebox enable: %04x, instances: %04lx\n", vebox_mask, VEBOX_MASK(gt)); GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt)); } @@ -838,7 +835,7 @@ static void engine_mask_apply_compute_fuses(struct intel_gt *gt) */ for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) { info->engine_mask &= ~BIT(_CCS(i)); - drm_dbg(&i915->drm, "ccs%u fused off\n", i); + gt_dbg(gt, "ccs%u fused off\n", i); } } @@ -866,8 +863,8 @@ static void engine_mask_apply_copy_fuses(struct intel_gt *gt) _BCS(instance)); if (mask & info->engine_mask) { - drm_dbg(&i915->drm, "bcs%u fused off\n", instance); - drm_dbg(&i915->drm, "bcs%u fused off\n", instance + 1); + gt_dbg(gt, "bcs%u fused off\n", instance); + gt_dbg(gt, "bcs%u fused off\n", instance + 1); info->engine_mask &= ~mask; } @@ -907,8 +904,7 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) * submission, which will wake up the GSC power well. */ if (__HAS_ENGINE(info->engine_mask, GSC0) && !intel_uc_wants_gsc_uc(>->uc)) { - drm_notice(>->i915->drm, - "No GSC FW selected, disabling GSC CS and media C6\n"); + gt_notice(gt, "No GSC FW selected, disabling GSC CS and media C6\n"); info->engine_mask &= ~BIT(GSC0); } @@ -1097,8 +1093,7 @@ static int init_status_page(struct intel_engine_cs *engine) */ obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); if (IS_ERR(obj)) { - drm_err(&engine->i915->drm, - "Failed to allocate status page\n"); + gt_err(engine->gt, "Failed to allocate status page\n"); return PTR_ERR(obj); } @@ -1419,6 +1414,20 @@ void intel_engine_destroy_pinned_context(struct intel_context *ce) } static struct intel_context * +create_ggtt_bind_context(struct intel_engine_cs *engine) +{ + static struct lock_class_key kernel; + + /* + * MI_UPDATE_GTT can insert up to 511 PTE entries and there could be multiple + * bind requets at a time so get a bigger ring. + */ + return intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_512K, + I915_GEM_HWS_GGTT_BIND_ADDR, + &kernel, "ggtt_bind_context"); +} + +static struct intel_context * create_kernel_context(struct intel_engine_cs *engine) { static struct lock_class_key kernel; @@ -1441,7 +1450,7 @@ create_kernel_context(struct intel_engine_cs *engine) */ static int engine_init_common(struct intel_engine_cs *engine) { - struct intel_context *ce; + struct intel_context *ce, *bce = NULL; int ret; engine->set_default_submission(engine); @@ -1457,17 +1466,34 @@ static int engine_init_common(struct intel_engine_cs *engine) ce = create_kernel_context(engine); if (IS_ERR(ce)) return PTR_ERR(ce); + /* + * Create a separate pinned context for GGTT update with blitter engine + * if a platform require such service. MI_UPDATE_GTT works on other + * engines as well but BCS should be less busy engine so pick that for + * GGTT updates. + */ + if (i915_ggtt_require_binder(engine->i915) && engine->id == BCS0) { + bce = create_ggtt_bind_context(engine); + if (IS_ERR(bce)) { + ret = PTR_ERR(bce); + goto err_ce_context; + } + } ret = measure_breadcrumb_dw(ce); if (ret < 0) - goto err_context; + goto err_bce_context; engine->emit_fini_breadcrumb_dw = ret; engine->kernel_context = ce; + engine->bind_context = bce; return 0; -err_context: +err_bce_context: + if (bce) + intel_engine_destroy_pinned_context(bce); +err_ce_context: intel_engine_destroy_pinned_context(ce); return ret; } @@ -1537,6 +1563,10 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) if (engine->kernel_context) intel_engine_destroy_pinned_context(engine->kernel_context); + if (engine->bind_context) + intel_engine_destroy_pinned_context(engine->bind_context); + + GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); cleanup_status_page(engine); @@ -1616,9 +1646,7 @@ static int __intel_engine_stop_cs(struct intel_engine_cs *engine, * Wa_22011802037: Prior to doing a reset, ensure CS is * stopped, set ring stop bit and prefetch disable bit to halt CS */ - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base), _MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE)); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index b538b5c049..e91fc881db 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -21,7 +21,7 @@ static void intel_gsc_idle_msg_enable(struct intel_engine_cs *engine) { struct drm_i915_private *i915 = engine->i915; - if (IS_METEORLAKE(i915) && engine->id == GSC0) { + if (MEDIA_VER(i915) >= 13 && engine->id == GSC0) { intel_uncore_write(engine->gt->uncore, RC_PSMI_CTRL_GSCCS, _MASKED_BIT_DISABLE(IDLE_MSG_DISABLE)); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h b/drivers/gpu/drm/i915/gt/intel_engine_regs.h index 6b9d9f8376..fdd4ddd3a9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h @@ -177,6 +177,7 @@ #define CTX_CTRL_RS_CTX_ENABLE REG_BIT(1) #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT REG_BIT(2) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) +#define GEN12_CTX_CTRL_RUNALONE_MODE REG_BIT(7) #define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE REG_BIT(8) #define RING_CTX_SR_CTL(base) _MMIO((base) + 0x244) #define RING_SEMA_WAIT_POLL(base) _MMIO((base) + 0x24c) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index a7e6775980..8769760257 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -402,7 +402,15 @@ struct intel_engine_cs { unsigned long context_tag; - struct rb_node uabi_node; + /* + * The type evolves during initialization, see related comment for + * struct drm_i915_private's uabi_engines member. + */ + union { + struct llist_node uabi_llist; + struct list_head uabi_list; + struct rb_node uabi_node; + }; struct intel_sseu sseu; @@ -416,6 +424,9 @@ struct intel_engine_cs { struct llist_head barrier_tasks; struct intel_context *kernel_context; /* pinned */ + struct intel_context *bind_context; /* pinned, only for BCS0 */ + /* mark the bind context's availability status */ + bool bind_context_ready; /** * pinned_contexts_list: List of pinned contexts. This list is only diff --git a/drivers/gpu/drm/i915/gt/intel_engine_user.c b/drivers/gpu/drm/i915/gt/intel_engine_user.c index d304e0a948..833987015b 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_user.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_user.c @@ -38,8 +38,7 @@ intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) void intel_engine_add_user(struct intel_engine_cs *engine) { - llist_add((struct llist_node *)&engine->uabi_node, - (struct llist_head *)&engine->i915->uabi_engines); + llist_add(&engine->uabi_llist, &engine->i915->uabi_engines_llist); } #define I915_NO_UABI_CLASS ((u16)(-1)) @@ -57,9 +56,9 @@ static int engine_cmp(void *priv, const struct list_head *A, const struct list_head *B) { const struct intel_engine_cs *a = - container_of((struct rb_node *)A, typeof(*a), uabi_node); + container_of(A, typeof(*a), uabi_list); const struct intel_engine_cs *b = - container_of((struct rb_node *)B, typeof(*b), uabi_node); + container_of(B, typeof(*b), uabi_list); if (uabi_classes[a->class] < uabi_classes[b->class]) return -1; @@ -76,7 +75,7 @@ static int engine_cmp(void *priv, const struct list_head *A, static struct llist_node *get_engines(struct drm_i915_private *i915) { - return llist_del_all((struct llist_head *)&i915->uabi_engines); + return llist_del_all(&i915->uabi_engines_llist); } static void sort_engines(struct drm_i915_private *i915, @@ -86,9 +85,8 @@ static void sort_engines(struct drm_i915_private *i915, llist_for_each_safe(pos, next, get_engines(i915)) { struct intel_engine_cs *engine = - container_of((struct rb_node *)pos, typeof(*engine), - uabi_node); - list_add((struct list_head *)&engine->uabi_node, engines); + container_of(pos, typeof(*engine), uabi_llist); + list_add(&engine->uabi_list, engines); } list_sort(NULL, engines, engine_cmp); } @@ -217,8 +215,7 @@ void intel_engines_driver_register(struct drm_i915_private *i915) p = &i915->uabi_engines.rb_node; list_for_each_safe(it, next, &engines) { struct intel_engine_cs *engine = - container_of((struct rb_node *)it, typeof(*engine), - uabi_node); + container_of(it, typeof(*engine), uabi_list); if (intel_gt_has_unrecoverable_error(engine->gt)) continue; /* ignore incomplete engines */ diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 3292524469..e8f42ec6b1 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -3001,9 +3001,7 @@ static void execlists_reset_prepare(struct intel_engine_cs *engine) * Wa_22011802037: In addition to stopping the cs, we need * to wait for any pending mi force wakeups */ - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) intel_engine_wait_for_pending_mi_fw(engine); engine->execlists.reset_ccid = active_ccid(engine); diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index b20d8fe8aa..15fc8e4703 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -15,18 +15,23 @@ #include "display/intel_display.h" #include "gem/i915_gem_lmem.h" +#include "intel_context.h" #include "intel_ggtt_gmch.h" +#include "intel_gpu_commands.h" #include "intel_gt.h" #include "intel_gt_regs.h" #include "intel_pci_config.h" +#include "intel_ring.h" #include "i915_drv.h" #include "i915_pci.h" +#include "i915_request.h" #include "i915_scatterlist.h" #include "i915_utils.h" #include "i915_vgpu.h" #include "intel_gtt.h" #include "gen8_ppgtt.h" +#include "intel_engine_pm.h" static void i915_ggtt_color_adjust(const struct drm_mm_node *node, unsigned long color, @@ -220,22 +225,36 @@ static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) GFX_FLSH_CNTL_EN); } +static void guc_ggtt_ct_invalidate(struct intel_gt *gt) +{ + struct intel_uncore *uncore = gt->uncore; + intel_wakeref_t wakeref; + + with_intel_runtime_pm_if_active(uncore->rpm, wakeref) { + struct intel_guc *guc = >->uc.guc; + + intel_guc_invalidate_tlb_guc(guc); + } +} + static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) { struct drm_i915_private *i915 = ggtt->vm.i915; + struct intel_gt *gt; gen8_ggtt_invalidate(ggtt); - if (GRAPHICS_VER(i915) >= 12) { - struct intel_gt *gt; - - list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) + list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) { + if (intel_guc_tlb_invalidation_is_available(>->uc.guc)) { + guc_ggtt_ct_invalidate(gt); + } else if (GRAPHICS_VER(i915) >= 12) { intel_uncore_write_fw(gt->uncore, GEN12_GUC_TLB_INV_CR, GEN12_GUC_TLB_INV_CR_INVALIDATE); - } else { - intel_uncore_write_fw(ggtt->vm.gt->uncore, - GEN8_GTCR, GEN8_GTCR_INVALIDATE); + } else { + intel_uncore_write_fw(gt->uncore, + GEN8_GTCR, GEN8_GTCR_INVALIDATE); + } } } @@ -271,6 +290,145 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr, return pte; } +static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt) +{ + struct intel_gt *gt = ggtt->vm.gt; + + return intel_gt_is_bind_context_ready(gt); +} + +static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt) +{ + struct intel_context *ce; + struct intel_gt *gt = ggtt->vm.gt; + + if (intel_gt_is_wedged(gt)) + return NULL; + + ce = gt->engine[BCS0]->bind_context; + GEM_BUG_ON(!ce); + + /* + * If the GT is not awake already at this stage then fallback + * to pci based GGTT update otherwise __intel_wakeref_get_first() + * would conflict with fs_reclaim trying to allocate memory while + * doing rpm_resume(). + */ + if (!intel_gt_pm_get_if_awake(gt)) + return NULL; + + intel_engine_pm_get(ce->engine); + + return ce; +} + +static void gen8_ggtt_bind_put_ce(struct intel_context *ce) +{ + intel_engine_pm_put(ce->engine); + intel_gt_pm_put(ce->engine->gt); +} + +static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset, + struct sg_table *pages, u32 num_entries, + const gen8_pte_t pte) +{ + struct i915_sched_attr attr = {}; + struct intel_gt *gt = ggtt->vm.gt; + const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode; + struct sgt_iter iter; + struct i915_request *rq; + struct intel_context *ce; + u32 *cs; + + if (!num_entries) + return true; + + ce = gen8_ggtt_bind_get_ce(ggtt); + if (!ce) + return false; + + if (pages) + iter = __sgt_iter(pages->sgl, true); + + while (num_entries) { + int count = 0; + dma_addr_t addr; + /* + * MI_UPDATE_GTT can update 512 entries in a single command but + * that end up with engine reset, 511 works. + */ + u32 n_ptes = min_t(u32, 511, num_entries); + + if (mutex_lock_interruptible(&ce->timeline->mutex)) + goto put_ce; + + intel_context_enter(ce); + rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC); + intel_context_exit(ce); + if (IS_ERR(rq)) { + GT_TRACE(gt, "Failed to get bind request\n"); + mutex_unlock(&ce->timeline->mutex); + goto put_ce; + } + + cs = intel_ring_begin(rq, 2 * n_ptes + 2); + if (IS_ERR(cs)) { + GT_TRACE(gt, "Failed to ring space for GGTT bind\n"); + i915_request_set_error_once(rq, PTR_ERR(cs)); + /* once a request is created, it must be queued */ + goto queue_err_rq; + } + + *cs++ = MI_UPDATE_GTT | (2 * n_ptes); + *cs++ = offset << 12; + + if (pages) { + for_each_sgt_daddr_next(addr, iter) { + if (count == n_ptes) + break; + *cs++ = lower_32_bits(pte | addr); + *cs++ = upper_32_bits(pte | addr); + count++; + } + /* fill remaining with scratch pte, if any */ + if (count < n_ptes) { + memset64((u64 *)cs, scratch_pte, + n_ptes - count); + cs += (n_ptes - count) * 2; + } + } else { + memset64((u64 *)cs, pte, n_ptes); + cs += n_ptes * 2; + } + + intel_ring_advance(rq, cs); +queue_err_rq: + i915_request_get(rq); + __i915_request_commit(rq); + __i915_request_queue(rq, &attr); + + mutex_unlock(&ce->timeline->mutex); + /* This will break if the request is complete or after engine reset */ + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + if (rq->fence.error) + goto err_rq; + + i915_request_put(rq); + + num_entries -= n_ptes; + offset += n_ptes; + } + + gen8_ggtt_bind_put_ce(ce); + return true; + +err_rq: + i915_request_put(rq); +put_ce: + gen8_ggtt_bind_put_ce(ce); + return false; +} + static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) { writeq(pte, addr); @@ -291,6 +449,21 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, ggtt->invalidate(ggtt); } +static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm, + dma_addr_t addr, u64 offset, + unsigned int pat_index, u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen8_pte_t pte; + + pte = ggtt->vm.pte_encode(addr, pat_index, flags); + if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) && + gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte)) + return ggtt->invalidate(ggtt); + + gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags); +} + static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct i915_vma_resource *vma_res, unsigned int pat_index, @@ -330,6 +503,50 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, ggtt->invalidate(ggtt); } +static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, + unsigned int pat_index, u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + gen8_pte_t scratch_pte = vm->scratch[0]->encode; + gen8_pte_t pte_encode; + u64 start, end; + + pte_encode = ggtt->vm.pte_encode(0, pat_index, flags); + start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; + end = start + vma_res->guard / I915_GTT_PAGE_SIZE; + if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte)) + goto err; + + start = end; + end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; + if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages, + vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode)) + goto err; + + start += vma_res->node_size / I915_GTT_PAGE_SIZE; + if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte)) + goto err; + + return true; + +err: + return false; +} + +static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm, + struct i915_vma_resource *vma_res, + unsigned int pat_index, u32 flags) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + + if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) && + __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags)) + return ggtt->invalidate(ggtt); + + gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags); +} + static void gen8_ggtt_clear_range(struct i915_address_space *vm, u64 start, u64 length) { @@ -351,6 +568,27 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, gen8_set_pte(>t_base[i], scratch_pte); } +static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); + unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + unsigned int num_entries = length / I915_GTT_PAGE_SIZE; + const gen8_pte_t scratch_pte = vm->scratch[0]->encode; + const int max_entries = ggtt_total_entries(ggtt) - first_entry; + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt, first_entry, + NULL, num_entries, scratch_pte)) + return ggtt->invalidate(ggtt); + + gen8_ggtt_clear_range(vm, start, length); +} + static void gen6_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, @@ -1021,7 +1259,18 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; } - if (intel_uc_wants_guc(&ggtt->vm.gt->uc)) + if (i915_ggtt_require_binder(i915)) { + ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind; + ggtt->vm.insert_page = gen8_ggtt_insert_page_bind; + ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind; + /* + * On GPU is hung, we might bind VMAs for error capture. + * Fallback to CPU GGTT updates in that case. + */ + ggtt->vm.raw_insert_page = gen8_ggtt_insert_page; + } + + if (intel_uc_wants_guc_submission(&ggtt->vm.gt->uc)) ggtt->invalidate = guc_ggtt_invalidate; else ggtt->invalidate = gen8_ggtt_invalidate; diff --git a/drivers/gpu/drm/i915/gt/intel_gsc.c b/drivers/gpu/drm/i915/gt/intel_gsc.c index bcc3605158..6d440de8ba 100644 --- a/drivers/gpu/drm/i915/gt/intel_gsc.c +++ b/drivers/gpu/drm/i915/gt/intel_gsc.c @@ -11,6 +11,7 @@ #include "gem/i915_gem_region.h" #include "gt/intel_gsc.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_print.h" #define GSC_BAR_LENGTH 0x00000FFC @@ -49,13 +50,13 @@ gsc_ext_om_alloc(struct intel_gsc *gsc, struct intel_gsc_intf *intf, size_t size I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_CPU_CLEAR); if (IS_ERR(obj)) { - drm_err(>->i915->drm, "Failed to allocate gsc memory\n"); + gt_err(gt, "Failed to allocate gsc memory\n"); return PTR_ERR(obj); } err = i915_gem_object_pin_pages_unlocked(obj); if (err) { - drm_err(>->i915->drm, "Failed to pin pages for gsc memory\n"); + gt_err(gt, "Failed to pin pages for gsc memory\n"); goto out_put; } @@ -286,12 +287,12 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id) int ret; if (intf_id >= INTEL_GSC_NUM_INTERFACES) { - drm_warn_once(>->i915->drm, "GSC irq: intf_id %d is out of range", intf_id); + gt_warn_once(gt, "GSC irq: intf_id %d is out of range", intf_id); return; } if (!HAS_HECI_GSC(gt->i915)) { - drm_warn_once(>->i915->drm, "GSC irq: not supported"); + gt_warn_once(gt, "GSC irq: not supported"); return; } @@ -300,7 +301,7 @@ static void gsc_irq_handler(struct intel_gt *gt, unsigned int intf_id) ret = generic_handle_irq(gt->gsc.intf[intf_id].irq); if (ret) - drm_err_ratelimited(>->i915->drm, "error handling GSC irq: %d\n", ret); + gt_err_ratelimited(gt, "error handling GSC irq: %d\n", ret); } void intel_gsc_irq_handler(struct intel_gt *gt, u32 iir) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 95631e8f39..ba1186fc52 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -62,7 +62,13 @@ void intel_gt_common_init_early(struct intel_gt *gt) /* Preliminary initialization of Tile 0 */ int intel_root_gt_init_early(struct drm_i915_private *i915) { - struct intel_gt *gt = to_gt(i915); + struct intel_gt *gt; + + gt = drmm_kzalloc(&i915->drm, sizeof(*gt), GFP_KERNEL); + if (!gt) + return -ENOMEM; + + i915->gt[0] = gt; gt->i915 = i915; gt->uncore = &i915->uncore; @@ -262,10 +268,21 @@ intel_gt_clear_error_registers(struct intel_gt *gt, I915_MASTER_ERROR_INTERRUPT); } - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + /* + * For the media GT, this ring fault register is not replicated, + * so don't do multicast/replicated register read/write operation on it. + */ + if (MEDIA_VER(i915) >= 13 && gt->type == GT_MEDIA) { + intel_uncore_rmw(uncore, XELPMP_RING_FAULT_REG, + RING_FAULT_VALID, 0); + intel_uncore_posting_read(uncore, + XELPMP_RING_FAULT_REG); + + } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { intel_gt_mcr_multicast_rmw(gt, XEHP_RING_FAULT_REG, RING_FAULT_VALID, 0); intel_gt_mcr_read_any(gt, XEHP_RING_FAULT_REG); + } else if (GRAPHICS_VER(i915) >= 12) { intel_uncore_rmw(uncore, GEN12_RING_FAULT_REG, RING_FAULT_VALID, 0); intel_uncore_posting_read(uncore, GEN12_RING_FAULT_REG); @@ -911,8 +928,6 @@ int intel_gt_probe_all(struct drm_i915_private *i915) if (ret) return ret; - i915->gt[0] = gt; - if (!HAS_EXTRA_GT_LIST(i915)) return 0; @@ -1008,3 +1023,57 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt, else return I915_MAP_WC; } + +bool intel_gt_needs_wa_22016122933(struct intel_gt *gt) +{ + return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA; +} + +static void __intel_gt_bind_context_set_ready(struct intel_gt *gt, bool ready) +{ + struct intel_engine_cs *engine = gt->engine[BCS0]; + + if (engine && engine->bind_context) + engine->bind_context_ready = ready; +} + +/** + * intel_gt_bind_context_set_ready - Set the context binding as ready + * + * @gt: GT structure + * + * This function marks the binder context as ready. + */ +void intel_gt_bind_context_set_ready(struct intel_gt *gt) +{ + __intel_gt_bind_context_set_ready(gt, true); +} + +/** + * intel_gt_bind_context_set_unready - Set the context binding as ready + * @gt: GT structure + * + * This function marks the binder context as not ready. + */ + +void intel_gt_bind_context_set_unready(struct intel_gt *gt) +{ + __intel_gt_bind_context_set_ready(gt, false); +} + +/** + * intel_gt_is_bind_context_ready - Check if context binding is ready + * + * @gt: GT structure + * + * This function returns binder context's ready status. + */ +bool intel_gt_is_bind_context_ready(struct intel_gt *gt) +{ + struct intel_engine_cs *engine = gt->engine[BCS0]; + + if (engine) + return engine->bind_context_ready; + + return false; +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 6c34547b58..970bedf6b7 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -6,7 +6,6 @@ #ifndef __INTEL_GT__ #define __INTEL_GT__ -#include "i915_drv.h" #include "intel_engine_types.h" #include "intel_gt_types.h" #include "intel_reset.h" @@ -14,6 +13,69 @@ struct drm_i915_private; struct drm_printer; +/* + * Check that the GT is a graphics GT and has an IP version within the + * specified range (inclusive). + */ +#define IS_GFX_GT_IP_RANGE(gt, from, until) ( \ + BUILD_BUG_ON_ZERO((from) < IP_VER(2, 0)) + \ + BUILD_BUG_ON_ZERO((until) < (from)) + \ + ((gt)->type != GT_MEDIA && \ + GRAPHICS_VER_FULL((gt)->i915) >= (from) && \ + GRAPHICS_VER_FULL((gt)->i915) <= (until))) + +/* + * Check that the GT is a media GT and has an IP version within the + * specified range (inclusive). + * + * Only usable on platforms with a standalone media design (i.e., IP version 13 + * and higher). + */ +#define IS_MEDIA_GT_IP_RANGE(gt, from, until) ( \ + BUILD_BUG_ON_ZERO((from) < IP_VER(13, 0)) + \ + BUILD_BUG_ON_ZERO((until) < (from)) + \ + ((gt) && (gt)->type == GT_MEDIA && \ + MEDIA_VER_FULL((gt)->i915) >= (from) && \ + MEDIA_VER_FULL((gt)->i915) <= (until))) + +/* + * Check that the GT is a graphics GT with a specific IP version and has + * a stepping in the range [from, until). The lower stepping bound is + * inclusive, the upper bound is exclusive. The most common use-case of this + * macro is for checking bounds for workarounds, which usually have a stepping + * ("from") at which the hardware issue is first present and another stepping + * ("until") at which a hardware fix is present and the software workaround is + * no longer necessary. E.g., + * + * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) + * IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B1, STEP_FOREVER) + * + * "STEP_FOREVER" can be passed as "until" for workarounds that have no upper + * stepping bound for the specified IP version. + */ +#define IS_GFX_GT_IP_STEP(gt, ipver, from, until) ( \ + BUILD_BUG_ON_ZERO((until) <= (from)) + \ + (IS_GFX_GT_IP_RANGE((gt), (ipver), (ipver)) && \ + IS_GRAPHICS_STEP((gt)->i915, (from), (until)))) + +/* + * Check that the GT is a media GT with a specific IP version and has + * a stepping in the range [from, until). The lower stepping bound is + * inclusive, the upper bound is exclusive. The most common use-case of this + * macro is for checking bounds for workarounds, which usually have a stepping + * ("from") at which the hardware issue is first present and another stepping + * ("until") at which a hardware fix is present and the software workaround is + * no longer necessary. "STEP_FOREVER" can be passed as "until" for + * workarounds that have no upper stepping bound for the specified IP version. + * + * This macro may only be used to match on platforms that have a standalone + * media design (i.e., media version 13 or higher). + */ +#define IS_MEDIA_GT_IP_STEP(gt, ipver, from, until) ( \ + BUILD_BUG_ON_ZERO((until) <= (from)) + \ + (IS_MEDIA_GT_IP_RANGE((gt), (ipver), (ipver)) && \ + IS_MEDIA_STEP((gt)->i915, (from), (until)))) + #define GT_TRACE(gt, fmt, ...) do { \ const struct intel_gt *gt__ __maybe_unused = (gt); \ GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev), \ @@ -25,10 +87,7 @@ static inline bool gt_is_root(struct intel_gt *gt) return !gt->info.id; } -static inline bool intel_gt_needs_wa_22016122933(struct intel_gt *gt) -{ - return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA; -} +bool intel_gt_needs_wa_22016122933(struct intel_gt *gt); static inline struct intel_gt *uc_to_gt(struct intel_uc *uc) { @@ -117,4 +176,7 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt, struct drm_i915_gem_object *obj, bool always_coherent); +void intel_gt_bind_context_set_ready(struct intel_gt *gt); +void intel_gt_bind_context_set_unready(struct intel_gt *gt); +bool intel_gt_is_bind_context_ready(struct intel_gt *gt); #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c index 2c0f1f3e28..34913912d8 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.c @@ -4,7 +4,7 @@ */ #include "i915_drv.h" - +#include "intel_gt.h" #include "intel_gt_mcr.h" #include "intel_gt_print.h" #include "intel_gt_regs.h" @@ -166,8 +166,8 @@ void intel_gt_mcr_init(struct intel_gt *gt) gt->steering_table[OADDRM] = xelpmp_oaddrm_steering_table; } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) { /* Wa_14016747170 */ - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) fuse = REG_FIELD_GET(MTL_GT_L3_EXC_MASK, intel_uncore_read(gt->uncore, MTL_GT_ACTIVITY_FACTOR)); @@ -440,6 +440,28 @@ void intel_gt_mcr_unlock(struct intel_gt *gt, unsigned long flags) } /** + * intel_gt_mcr_lock_sanitize - Sanitize MCR steering lock + * @gt: GT structure + * + * This will be used to sanitize the initial status of the hardware lock + * during driver load and resume since there won't be any concurrent access + * from other agents at those times, but it's possible that boot firmware + * may have left the lock in a bad state. + * + */ +void intel_gt_mcr_lock_sanitize(struct intel_gt *gt) +{ + /* + * This gets called at load/resume time, so we shouldn't be + * racing with other driver threads grabbing the mcr lock. + */ + lockdep_assert_not_held(>->mcr_lock); + + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + intel_uncore_write_fw(gt->uncore, MTL_STEER_SEMAPHORE, 0x1); +} + +/** * intel_gt_mcr_read - read a specific instance of an MCR register * @gt: GT structure * @reg: the MCR register to read diff --git a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h index 41684495b7..01ac565a56 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_mcr.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_mcr.h @@ -11,6 +11,7 @@ void intel_gt_mcr_init(struct intel_gt *gt); void intel_gt_mcr_lock(struct intel_gt *gt, unsigned long *flags); void intel_gt_mcr_unlock(struct intel_gt *gt, unsigned long flags); +void intel_gt_mcr_lock_sanitize(struct intel_gt *gt); u32 intel_gt_mcr_read(struct intel_gt *gt, i915_mcr_reg_t reg, diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 5a942af0a1..f5899d503e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -13,6 +13,7 @@ #include "intel_engine_pm.h" #include "intel_gt.h" #include "intel_gt_clock_utils.h" +#include "intel_gt_mcr.h" #include "intel_gt_pm.h" #include "intel_gt_print.h" #include "intel_gt_requests.h" @@ -216,6 +217,21 @@ void intel_gt_pm_fini(struct intel_gt *gt) intel_rc6_fini(>->rc6); } +void intel_gt_resume_early(struct intel_gt *gt) +{ + /* + * Sanitize steer semaphores during driver resume. This is necessary + * to address observed cases of steer semaphores being + * held after a suspend operation. Confirmation from the hardware team + * assures the safety of this operation, as no lock acquisitions + * by other agents occur during driver load/resume process. + */ + intel_gt_mcr_lock_sanitize(gt); + + intel_uncore_resume_early(gt->uncore); + intel_gt_check_and_clear_faults(gt); +} + int intel_gt_resume(struct intel_gt *gt) { struct intel_engine_cs *engine; @@ -280,6 +296,7 @@ int intel_gt_resume(struct intel_gt *gt) out_fw: intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); intel_gt_pm_put(gt); + intel_gt_bind_context_set_ready(gt); return err; err_wedged: @@ -306,6 +323,7 @@ static void wait_for_suspend(struct intel_gt *gt) void intel_gt_suspend_prepare(struct intel_gt *gt) { + intel_gt_bind_context_set_unready(gt); user_forcewake(gt, true); wait_for_suspend(gt); } @@ -359,6 +377,7 @@ void intel_gt_suspend_late(struct intel_gt *gt) void intel_gt_runtime_suspend(struct intel_gt *gt) { + intel_gt_bind_context_set_unready(gt); intel_uc_runtime_suspend(>->uc); GT_TRACE(gt, "\n"); @@ -376,6 +395,7 @@ int intel_gt_runtime_resume(struct intel_gt *gt) if (ret) return ret; + intel_gt_bind_context_set_ready(gt); return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 6c9a464523..b1eeb5b339 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -78,6 +78,7 @@ void intel_gt_pm_fini(struct intel_gt *gt); void intel_gt_suspend_prepare(struct intel_gt *gt); void intel_gt_suspend_late(struct intel_gt *gt); int intel_gt_resume(struct intel_gt *gt); +void intel_gt_resume_early(struct intel_gt *gt); void intel_gt_runtime_suspend(struct intel_gt *gt); int intel_gt_runtime_resume(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c index 357e2f8657..f900cc68d6 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c @@ -290,7 +290,6 @@ static int mtl_drpc(struct seq_file *m) seq_puts(m, "RC6\n"); break; default: - MISSING_CASE(REG_FIELD_GET(MTL_CC_MASK, gt_core_status)); seq_puts(m, "Unknown\n"); break; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_print.h b/drivers/gpu/drm/i915/gt/intel_gt_print.h index 55a336a9ff..7fdc78c792 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_print.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_print.h @@ -16,6 +16,9 @@ #define gt_warn(_gt, _fmt, ...) \ drm_warn(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) +#define gt_warn_once(_gt, _fmt, ...) \ + drm_warn_once(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) + #define gt_notice(_gt, _fmt, ...) \ drm_notice(&(_gt)->i915->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h index 2cdfb2f713..eecd0a87a6 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h @@ -26,7 +26,7 @@ #define MTL_CAGF_MASK REG_GENMASK(8, 0) #define MTL_CC0 0x0 #define MTL_CC6 0x3 -#define MTL_CC_MASK REG_GENMASK(12, 9) +#define MTL_CC_MASK REG_GENMASK(10, 9) /* RPM unit config (Gen8+) */ #define RPM_CONFIG0 _MMIO(0xd00) @@ -164,6 +164,8 @@ #define GEN9_CSFE_CHICKEN1_RCS _MMIO(0x20d4) #define GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE (1 << 2) #define GEN11_ENABLE_32_PLANE_MODE (1 << 7) +#define GEN12_CS_DEBUG_MODE2 _MMIO(0x20d8) +#define INSTRUCTION_STATE_CACHE_INVALIDATE REG_BIT(6) #define GEN7_FF_SLICE_CS_CHICKEN1 _MMIO(0x20e0) #define GEN9_FFSC_PERCTX_PREEMPT_CTRL (1 << 14) @@ -412,9 +414,6 @@ #define XEHP_CULLBIT1 MCR_REG(0x6100) -#define CHICKEN_RASTER_1 MCR_REG(0x6204) -#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8) - #define CHICKEN_RASTER_2 MCR_REG(0x6208) #define TBIMR_FAST_CLIP REG_BIT(5) @@ -1085,6 +1084,7 @@ #define GEN12_RING_FAULT_REG _MMIO(0xcec4) #define XEHP_RING_FAULT_REG MCR_REG(0xcec4) +#define XELPMP_RING_FAULT_REG _MMIO(0xcec4) #define GEN8_RING_FAULT_ENGINE_ID(x) (((x) >> 12) & 0x7) #define RING_FAULT_GTTSEL_MASK (1 << 11) #define RING_FAULT_SRCID(x) (((x) >> 3) & 0xff) @@ -1221,6 +1221,8 @@ #define XEHP_HDC_CHICKEN0 MCR_REG(0xe5f0) #define LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK REG_GENMASK(13, 11) +#define DIS_ATOMIC_CHAINING_TYPED_WRITES REG_BIT(3) + #define ICL_HDC_MODE MCR_REG(0xe5f4) #define EU_PERF_CNTL2 PERF_REG(0xe658) @@ -1231,6 +1233,7 @@ #define DISABLE_D8_D16_COASLESCE REG_BIT(30) #define FORCE_1_SUB_MESSAGE_PER_FRAGMENT REG_BIT(15) #define LSC_CHICKEN_BIT_0_UDW MCR_REG(0xe7c8 + 4) +#define UGM_FRAGMENT_THRESHOLD_TO_3 REG_BIT(58 - 32) #define DIS_CHAIN_2XSIMD8 REG_BIT(55 - 32) #define FORCE_SLM_FENCE_SCOPE_TO_TILE REG_BIT(42 - 32) #define FORCE_UGM_FENCE_SCOPE_TO_TILE REG_BIT(41 - 32) diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 13944a14ea..4fbed27ef0 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -21,6 +21,11 @@ #include "intel_gt_regs.h" #include "intel_gtt.h" +bool i915_ggtt_require_binder(struct drm_i915_private *i915) +{ + /* Wa_13010847436 & Wa_14019519902 */ + return MEDIA_VER_FULL(i915) == IP_VER(13, 0); +} static bool intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *i915) { diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 4d6296cdbc..b471edac26 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -35,9 +35,9 @@ #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) #if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT) -#define DBG(...) trace_printk(__VA_ARGS__) +#define GTT_TRACE(...) trace_printk(__VA_ARGS__) #else -#define DBG(...) +#define GTT_TRACE(...) #endif #define NALLOC 3 /* 1 normal, 1 for concurrent threads, 1 for preallocation */ @@ -171,6 +171,9 @@ struct intel_gt; #define for_each_sgt_daddr(__dp, __iter, __sgt) \ __for_each_sgt_daddr(__dp, __iter, __sgt, I915_GTT_PAGE_SIZE) +#define for_each_sgt_daddr_next(__dp, __iter) \ + __for_each_daddr_next(__dp, __iter, I915_GTT_PAGE_SIZE) + struct i915_page_table { struct drm_i915_gem_object *base; union { @@ -688,4 +691,6 @@ static inline struct sgt_dma { return (struct sgt_dma){ sg, addr, addr + sg_dma_len(sg) }; } +bool i915_ggtt_require_binder(struct drm_i915_private *i915); + #endif diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index c378cc7c95..eaf66d9031 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -845,6 +845,27 @@ lrc_setup_indirect_ctx(u32 *regs, lrc_ring_indirect_offset_default(engine) << 6; } +static bool ctx_needs_runalone(const struct intel_context *ce) +{ + struct i915_gem_context *gem_ctx; + bool ctx_is_protected = false; + + /* + * On MTL and newer platforms, protected contexts require setting + * the LRC run-alone bit or else the encryption will not happen. + */ + if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 70) && + (ce->engine->class == COMPUTE_CLASS || ce->engine->class == RENDER_CLASS)) { + rcu_read_lock(); + gem_ctx = rcu_dereference(ce->gem_context); + if (gem_ctx) + ctx_is_protected = gem_ctx->uses_protected_content; + rcu_read_unlock(); + } + + return ctx_is_protected; +} + static void init_common_regs(u32 * const regs, const struct intel_context *ce, const struct intel_engine_cs *engine, @@ -860,6 +881,8 @@ static void init_common_regs(u32 * const regs, if (GRAPHICS_VER(engine->i915) < 11) ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | CTX_CTRL_RS_CTX_ENABLE); + if (ctx_needs_runalone(ce)) + ctl |= _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_RUNALONE_MODE); regs[CTX_CONTEXT_CONTROL] = ctl; regs[CTX_TIMESTAMP] = ce->stats.runtime.last; @@ -1317,29 +1340,6 @@ gen12_emit_cmd_buf_wa(const struct intel_context *ce, u32 *cs) } /* - * On DG2 during context restore of a preempted context in GPGPU mode, - * RCS restore hang is detected. This is extremely timing dependent. - * To address this below sw wabb is implemented for DG2 A steppings. - */ -static u32 * -dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs) -{ - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce->engine->mmio_base)); - *cs++ = 0x21; - - *cs++ = MI_LOAD_REGISTER_REG; - *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); - *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT1); - - *cs++ = MI_LOAD_REGISTER_REG; - *cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base)); - *cs++ = i915_mmio_reg_offset(XEHP_CULLBIT2); - - return cs; -} - -/* * The bspec's tuning guide asks us to program a vertical watermark value of * 0x3FF. However this register is not saved/restored properly by the * hardware, so we're required to apply the desired value via INDIRECT_CTX @@ -1357,27 +1357,34 @@ dg2_emit_draw_watermark_setting(u32 *cs) } static u32 * +gen12_invalidate_state_cache(u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN12_CS_DEBUG_MODE2); + *cs++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE); + return cs; +} + +static u32 * gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs) { cs = gen12_emit_timestamp_wa(ce, cs); cs = gen12_emit_cmd_buf_wa(ce, cs); cs = gen12_emit_restore_scratch(ce, cs); - /* Wa_22011450934:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(ce->engine->i915, G11, STEP_A0, STEP_B0)) - cs = dg2_emit_rcs_hang_wabb(ce, cs); - /* Wa_16013000631:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(ce->engine->i915)) + if (IS_DG2_G11(ce->engine->i915)) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0); cs = gen12_emit_aux_table_inv(ce->engine, cs); + /* Wa_18022495364 */ + if (IS_GFX_GT_IP_RANGE(ce->engine->gt, IP_VER(12, 0), IP_VER(12, 10))) + cs = gen12_invalidate_state_cache(cs); + /* Wa_16014892111 */ - if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(ce->engine->i915, P, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(ce->engine->gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_DG2(ce->engine->i915)) cs = dg2_emit_draw_watermark_setting(cs); @@ -1391,8 +1398,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs) cs = gen12_emit_restore_scratch(ce, cs); /* Wa_16013000631:dg2 */ - if (IS_DG2_GRAPHICS_STEP(ce->engine->i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(ce->engine->i915)) + if (IS_DG2_G11(ce->engine->i915)) if (ce->engine->class == COMPUTE_CLASS) cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 2c01440722..353f93baac 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -404,18 +404,6 @@ static const struct drm_i915_mocs_entry dg2_mocs_table[] = { MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), }; -static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = { - /* Wa_14011441408: Set Go to Memory for MOCS#0 */ - MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), - /* UC - Coherent; GO:Memory */ - MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), - /* UC - Non-Coherent; GO:Memory */ - MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), - - /* WB - LC */ - MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), -}; - static const struct drm_i915_mocs_entry pvc_mocs_table[] = { /* Error */ MOCS_ENTRY(0, 0, L3_3_WB), @@ -499,7 +487,7 @@ static bool has_mocs(const struct drm_i915_private *i915) return !IS_DGFX(i915); } -static unsigned int get_mocs_settings(const struct drm_i915_private *i915, +static unsigned int get_mocs_settings(struct drm_i915_private *i915, struct drm_i915_mocs_table *table) { unsigned int flags; @@ -507,7 +495,7 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); table->unused_entries_index = I915_MOCS_PTE; - if (IS_METEORLAKE(i915)) { + if (IS_GFX_GT_IP_RANGE(to_gt(i915), IP_VER(12, 70), IP_VER(12, 71))) { table->size = ARRAY_SIZE(mtl_mocs_table); table->table = mtl_mocs_table; table->n_entries = MTL_NUM_MOCS_ENTRIES; @@ -521,13 +509,8 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->wb_index = 2; table->unused_entries_index = 2; } else if (IS_DG2(i915)) { - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { - table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); - table->table = dg2_mocs_table_g10_ax; - } else { - table->size = ARRAY_SIZE(dg2_mocs_table); - table->table = dg2_mocs_table; - } + table->size = ARRAY_SIZE(dg2_mocs_table); + table->table = dg2_mocs_table; table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->unused_entries_index = 3; diff --git a/drivers/gpu/drm/i915/gt/intel_rc6.c b/drivers/gpu/drm/i915/gt/intel_rc6.c index ccdc1afbf1..7090e4be29 100644 --- a/drivers/gpu/drm/i915/gt/intel_rc6.c +++ b/drivers/gpu/drm/i915/gt/intel_rc6.c @@ -118,14 +118,12 @@ static void gen11_rc6_enable(struct intel_rc6 *rc6) GEN6_RC_CTL_EI_MODE(1); /* - * Wa_16011777198 and BSpec 52698 - Render powergating must be off. + * BSpec 52698 - Render powergating must be off. * FIXME BSpec is outdated, disabling powergating for MTL is just * temporary wa and should be removed after fixing real cause * of forcewake timeouts. */ - if (IS_METEORLAKE(gt->i915) || - IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) pg_enable = GEN9_MEDIA_PG_ENABLE | GEN11_MEDIA_SAMPLER_PG_ENABLE; @@ -526,8 +524,7 @@ static bool rc6_supported(struct intel_rc6 *rc6) return false; } - if (IS_MTL_MEDIA_STEP(gt->i915, STEP_A0, STEP_B0) && - gt->type == GT_MEDIA) { + if (IS_MEDIA_GT_IP_STEP(gt, IP_VER(13, 0), STEP_A0, STEP_B0)) { drm_notice(&i915->drm, "Media RC6 disabled on A step\n"); return false; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 5fa57a34cf..6801f8b95c 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -26,6 +26,7 @@ #include "intel_engine_regs.h" #include "intel_gt.h" #include "intel_gt_pm.h" +#include "intel_gt_print.h" #include "intel_gt_requests.h" #include "intel_mchbar_regs.h" #include "intel_pci_config.h" @@ -161,16 +162,16 @@ static int i915_do_reset(struct intel_gt *gt, struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev); int err; - /* Assert reset for at least 20 usec, and wait for acknowledgement. */ + /* Assert reset for at least 50 usec, and wait for acknowledgement. */ pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); udelay(50); - err = wait_for_atomic(i915_in_reset(pdev), 50); + err = _wait_for_atomic(i915_in_reset(pdev), 50000, 0); /* Clear the reset request. */ pci_write_config_byte(pdev, I915_GDRST, 0); udelay(50); if (!err) - err = wait_for_atomic(!i915_in_reset(pdev), 50); + err = _wait_for_atomic(!i915_in_reset(pdev), 50000, 0); return err; } @@ -190,7 +191,7 @@ static int g33_do_reset(struct intel_gt *gt, struct pci_dev *pdev = to_pci_dev(gt->i915->drm.dev); pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - return wait_for_atomic(g4x_reset_complete(pdev), 50); + return _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0); } static int g4x_do_reset(struct intel_gt *gt, @@ -207,7 +208,7 @@ static int g4x_do_reset(struct intel_gt *gt, pci_write_config_byte(pdev, I915_GDRST, GRDOM_MEDIA | GRDOM_RESET_ENABLE); - ret = wait_for_atomic(g4x_reset_complete(pdev), 50); + ret = _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0); if (ret) { GT_TRACE(gt, "Wait for media reset failed\n"); goto out; @@ -215,7 +216,7 @@ static int g4x_do_reset(struct intel_gt *gt, pci_write_config_byte(pdev, I915_GDRST, GRDOM_RENDER | GRDOM_RESET_ENABLE); - ret = wait_for_atomic(g4x_reset_complete(pdev), 50); + ret = _wait_for_atomic(g4x_reset_complete(pdev), 50000, 0); if (ret) { GT_TRACE(gt, "Wait for render reset failed\n"); goto out; @@ -592,10 +593,10 @@ static int gen8_engine_reset_prepare(struct intel_engine_cs *engine) ret = __intel_wait_for_register_fw(uncore, reg, mask, ack, 700, 0, NULL); if (ret) - drm_err(&engine->i915->drm, - "%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n", - engine->name, request, - intel_uncore_read_fw(uncore, reg)); + gt_err(engine->gt, + "%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n", + engine->name, request, + intel_uncore_read_fw(uncore, reg)); return ret; } @@ -705,7 +706,7 @@ static int __reset_guc(struct intel_gt *gt) static bool needs_wa_14015076503(struct intel_gt *gt, intel_engine_mask_t engine_mask) { - if (!IS_METEORLAKE(gt->i915) || !HAS_ENGINE(gt, GSC0)) + if (MEDIA_VER_FULL(gt->i915) != IP_VER(13, 0) || !HAS_ENGINE(gt, GSC0)) return false; if (!__HAS_ENGINE(engine_mask, GSC0)) @@ -785,9 +786,7 @@ int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) reset_mask = wa_14015076503_start(gt, engine_mask, !retry); GT_TRACE(gt, "engine_mask=%x\n", reset_mask); - preempt_disable(); ret = reset(gt, reset_mask, retry); - preempt_enable(); wa_14015076503_end(gt, reset_mask); } @@ -1201,17 +1200,16 @@ void intel_gt_reset(struct intel_gt *gt, goto unlock; if (reason) - drm_notice(>->i915->drm, - "Resetting chip for %s\n", reason); + gt_notice(gt, "Resetting chip for %s\n", reason); atomic_inc(>->i915->gpu_error.reset_count); awake = reset_prepare(gt); if (!intel_has_gpu_reset(gt)) { if (gt->i915->params.reset) - drm_err(>->i915->drm, "GPU reset not supported\n"); + gt_err(gt, "GPU reset not supported\n"); else - drm_dbg(>->i915->drm, "GPU reset disabled\n"); + gt_dbg(gt, "GPU reset disabled\n"); goto error; } @@ -1219,7 +1217,7 @@ void intel_gt_reset(struct intel_gt *gt, intel_runtime_pm_disable_interrupts(gt->i915); if (do_reset(gt, stalled_mask)) { - drm_err(>->i915->drm, "Failed to reset chip\n"); + gt_err(gt, "Failed to reset chip\n"); goto taint; } @@ -1238,9 +1236,7 @@ void intel_gt_reset(struct intel_gt *gt, */ ret = intel_gt_init_hw(gt); if (ret) { - drm_err(>->i915->drm, - "Failed to initialise HW following reset (%d)\n", - ret); + gt_err(gt, "Failed to initialise HW following reset (%d)\n", ret); goto taint; } @@ -1607,9 +1603,7 @@ static void intel_wedge_me(struct work_struct *work) { struct intel_wedge_me *w = container_of(work, typeof(*w), work.work); - drm_err(&w->gt->i915->drm, - "%s timed out, cancelling all in-flight rendering.\n", - w->name); + gt_err(w->gt, "%s timed out, cancelling all in-flight rendering.\n", w->name); intel_gt_set_wedged(w->gt); } @@ -1632,6 +1626,24 @@ void __intel_fini_wedge(struct intel_wedge_me *w) w->gt = NULL; } +/* + * Wa_22011802037 requires that we (or the GuC) ensure that no command + * streamers are executing MI_FORCE_WAKE while an engine reset is initiated. + */ +bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt) +{ + if (GRAPHICS_VER(gt->i915) < 11) + return false; + + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0)) + return true; + + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 70)) + return false; + + return true; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_reset.c" #include "selftest_hangcheck.c" diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index 25c975b6e8..f615b30b81 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -78,4 +78,6 @@ void __intel_fini_wedge(struct intel_wedge_me *w); bool intel_has_gpu_reset(const struct intel_gt *gt); bool intel_has_reset_engine(const struct intel_gt *gt); +bool intel_engine_reset_needs_wa_22011802037(struct intel_gt *gt); + #endif /* I915_RESET_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 092542f53a..4feef874e6 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1161,7 +1161,7 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *c { struct drm_i915_private *i915 = rps_to_i915(rps); - if (IS_METEORLAKE(i915)) + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) return mtl_get_freq_caps(rps, caps); else return __gen6_rps_get_freq_caps(rps, caps); diff --git a/drivers/gpu/drm/i915/gt/intel_tlb.c b/drivers/gpu/drm/i915/gt/intel_tlb.c index 139608c30d..4bb13d1890 100644 --- a/drivers/gpu/drm/i915/gt/intel_tlb.c +++ b/drivers/gpu/drm/i915/gt/intel_tlb.c @@ -12,6 +12,7 @@ #include "intel_gt_print.h" #include "intel_gt_regs.h" #include "intel_tlb.h" +#include "uc/intel_guc.h" /* * HW architecture suggest typical invalidation time at 40us, @@ -131,11 +132,24 @@ void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno) return; with_intel_gt_pm_if_awake(gt, wakeref) { + struct intel_guc *guc = >->uc.guc; + mutex_lock(>->tlb.invalidate_lock); if (tlb_seqno_passed(gt, seqno)) goto unlock; - mmio_invalidate_full(gt); + if (HAS_GUC_TLB_INVALIDATION(gt->i915)) { + /* + * Only perform GuC TLB invalidation if GuC is ready. + * The only time GuC could not be ready is on GT reset, + * which would clobber all the TLBs anyways, making + * any TLB invalidation path here unnecessary. + */ + if (intel_guc_is_ready(guc)) + intel_guc_invalidate_tlb_engines(guc); + } else { + mmio_invalidate_full(gt); + } write_seqcount_invalidate(>->tlb.seqno); unlock: diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 3ae0dbd39e..192ac0e59a 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -11,6 +11,7 @@ #include "intel_gpu_commands.h" #include "intel_gt.h" #include "intel_gt_mcr.h" +#include "intel_gt_print.h" #include "intel_gt_regs.h" #include "intel_ring.h" #include "intel_workarounds.h" @@ -119,8 +120,8 @@ static void wa_init_finish(struct i915_wa_list *wal) if (!wal->count) return; - drm_dbg(&wal->gt->i915->drm, "Initialized %u %s workarounds on %s\n", - wal->wa_count, wal->name, wal->engine_name); + gt_dbg(wal->gt, "Initialized %u %s workarounds on %s\n", + wal->wa_count, wal->name, wal->engine_name); } static enum forcewake_domains @@ -764,68 +765,41 @@ static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine, { dg2_ctx_gt_tuning_init(engine, wal); - /* Wa_16011186671:dg2_g11 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) { - wa_mcr_masked_dis(wal, VFLSKPD, DIS_MULT_MISS_RD_SQUASH); - wa_mcr_masked_en(wal, VFLSKPD, DIS_OVER_FETCH_CACHE); - } - - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) { - /* Wa_14010469329:dg2_g10 */ - wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, - XEHP_DUAL_SIMD8_SEQ_MERGE_DISABLE); - - /* - * Wa_22010465075:dg2_g10 - * Wa_22010613112:dg2_g10 - * Wa_14010698770:dg2_g10 - */ - wa_mcr_masked_en(wal, XEHP_COMMON_SLICE_CHICKEN3, - GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); - } - /* Wa_16013271637:dg2 */ wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1, MSC_MSAA_REODER_BUF_BYPASS_DISABLE); /* Wa_14014947963:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) - wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); + wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); /* Wa_18018764978:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_C0, STEP_FOREVER) || - IS_DG2_G11(engine->i915) || IS_DG2_G12(engine->i915)) - wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); - - /* Wa_15010599737:dg2 */ - wa_mcr_masked_en(wal, CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN); + wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL); /* Wa_18019271663:dg2 */ wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE); } -static void mtl_ctx_gt_tuning_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) +static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; dg2_ctx_gt_tuning_init(engine, wal); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false); } -static void mtl_ctx_workarounds_init(struct intel_engine_cs *engine, - struct i915_wa_list *wal) +static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) { - struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; - mtl_ctx_gt_tuning_init(engine, wal); + xelpg_ctx_gt_tuning_init(engine, wal); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { /* Wa_14014947963 */ wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000); @@ -931,8 +905,8 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, if (engine->class != RENDER_CLASS) goto done; - if (IS_METEORLAKE(i915)) - mtl_ctx_workarounds_init(engine, wal); + if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) + xelpg_ctx_workarounds_init(engine, wal); else if (IS_PONTEVECCHIO(i915)) ; /* noop; none at this time */ else if (IS_DG2(i915)) @@ -1606,31 +1580,11 @@ xehpsdv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) static void dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) { - struct intel_engine_cs *engine; - int id; - xehp_init_mcr(gt, wal); /* Wa_14011060649:dg2 */ wa_14011060649(gt, wal); - /* - * Although there are per-engine instances of these registers, - * they technically exist outside the engine itself and are not - * impacted by engine resets. Furthermore, they're part of the - * GuC blacklist so trying to treat them as engine workarounds - * will result in GuC initialization failure and a wedged GPU. - */ - for_each_engine(engine, gt, id) { - if (engine->class != VIDEO_DECODE_CLASS) - continue; - - /* Wa_16010515920:dg2_g10 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) - wa_write_or(wal, VDBOX_CGCTL3F18(engine->mmio_base), - ALNUNIT_CLKGATE_DIS); - } - if (IS_DG2_G10(gt->i915)) { /* Wa_22010523718:dg2 */ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, @@ -1641,70 +1595,15 @@ dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) DSS_ROUTER_CLKGATE_DIS); } - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14012362059:dg2 */ - wa_mcr_write_or(wal, XEHP_MERT_MOD_CTRL, FORCE_MISS_FTLB); - } - - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) { - /* Wa_14010948348:dg2_g10 */ - wa_write_or(wal, UNSLCGCTL9430, MSQDUNIT_CLKGATE_DIS); - - /* Wa_14011037102:dg2_g10 */ - wa_write_or(wal, UNSLCGCTL9444, LTCDD_CLKGATE_DIS); - - /* Wa_14011371254:dg2_g10 */ - wa_mcr_write_or(wal, XEHP_SLICE_UNIT_LEVEL_CLKGATE, NODEDSS_CLKGATE_DIS); - - /* Wa_14011431319:dg2_g10 */ - wa_write_or(wal, UNSLCGCTL9440, GAMTLBOACS_CLKGATE_DIS | - GAMTLBVDBOX7_CLKGATE_DIS | - GAMTLBVDBOX6_CLKGATE_DIS | - GAMTLBVDBOX5_CLKGATE_DIS | - GAMTLBVDBOX4_CLKGATE_DIS | - GAMTLBVDBOX3_CLKGATE_DIS | - GAMTLBVDBOX2_CLKGATE_DIS | - GAMTLBVDBOX1_CLKGATE_DIS | - GAMTLBVDBOX0_CLKGATE_DIS | - GAMTLBKCR_CLKGATE_DIS | - GAMTLBGUC_CLKGATE_DIS | - GAMTLBBLT_CLKGATE_DIS); - wa_write_or(wal, UNSLCGCTL9444, GAMTLBGFXA0_CLKGATE_DIS | - GAMTLBGFXA1_CLKGATE_DIS | - GAMTLBCOMPA0_CLKGATE_DIS | - GAMTLBCOMPA1_CLKGATE_DIS | - GAMTLBCOMPB0_CLKGATE_DIS | - GAMTLBCOMPB1_CLKGATE_DIS | - GAMTLBCOMPC0_CLKGATE_DIS | - GAMTLBCOMPC1_CLKGATE_DIS | - GAMTLBCOMPD0_CLKGATE_DIS | - GAMTLBCOMPD1_CLKGATE_DIS | - GAMTLBMERT_CLKGATE_DIS | - GAMTLBVEBOX3_CLKGATE_DIS | - GAMTLBVEBOX2_CLKGATE_DIS | - GAMTLBVEBOX1_CLKGATE_DIS | - GAMTLBVEBOX0_CLKGATE_DIS); - - /* Wa_14010569222:dg2_g10 */ - wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, - GAMEDIA_CLKGATE_DIS); - - /* Wa_14011028019:dg2_g10 */ - wa_mcr_write_or(wal, SSMCGCTL9530, RTFUNIT_CLKGATE_DIS); - - /* Wa_14010680813:dg2_g10 */ - wa_mcr_write_or(wal, XEHP_GAMSTLB_CTRL, - CONTROL_BLOCK_CLKGATE_DIS | - EGRESS_BLOCK_CLKGATE_DIS | - TAG_BLOCK_CLKGATE_DIS); - } - /* Wa_14014830051:dg2 */ wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); - /* Wa_14015795083 */ - wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE); + /* + * Wa_14015795083 + * Skip verification for possibly locked register. + */ + wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE, + 0, 0, false); /* Wa_18018781329 */ wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB); @@ -1747,8 +1646,8 @@ xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) /* Wa_22016670082 */ wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE); - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(gt->i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { /* Wa_14014830051 */ wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN); @@ -1791,10 +1690,8 @@ xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) */ static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) { - if (IS_METEORLAKE(gt->i915)) { - if (gt->type != GT_MEDIA) - wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); - + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) { + wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS); wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS); } @@ -1818,15 +1715,15 @@ gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal) gt_tuning_settings(gt, wal); if (gt->type == GT_MEDIA) { - if (MEDIA_VER(i915) >= 13) + if (MEDIA_VER_FULL(i915) == IP_VER(13, 0)) xelpmp_gt_workarounds_init(gt, wal); else - MISSING_CASE(MEDIA_VER(i915)); + MISSING_CASE(MEDIA_VER_FULL(i915)); return; } - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71))) xelpg_gt_workarounds_init(gt, wal); else if (IS_PONTEVECCHIO(i915)) pvc_gt_workarounds_init(gt, wal); @@ -1884,10 +1781,10 @@ wa_verify(struct intel_gt *gt, const struct i915_wa *wa, u32 cur, const char *name, const char *from) { if ((cur ^ wa->set) & wa->read) { - drm_err(>->i915->drm, - "%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n", - name, from, i915_mmio_reg_offset(wa->reg), - cur, cur & wa->read, wa->set & wa->read); + gt_err(gt, + "%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n", + name, from, i915_mmio_reg_offset(wa->reg), + cur, cur & wa->read, wa->set & wa->read); return false; } @@ -2242,29 +2139,10 @@ static void dg2_whitelist_build(struct intel_engine_cs *engine) switch (engine->class) { case RENDER_CLASS: - /* - * Wa_1507100340:dg2_g10 - * - * This covers 4 registers which are next to one another : - * - PS_INVOCATION_COUNT - * - PS_INVOCATION_COUNT_UDW - * - PS_DEPTH_COUNT - * - PS_DEPTH_COUNT_UDW - */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) - whitelist_reg_ext(w, PS_INVOCATION_COUNT, - RING_FORCE_TO_NONPRIV_ACCESS_RD | - RING_FORCE_TO_NONPRIV_RANGE_4); - /* Required by recommended tuning setting (not a workaround) */ whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3); break; - case COMPUTE_CLASS: - /* Wa_16011157294:dg2_g10 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_B0)) - whitelist_reg(w, GEN9_CTX_PREEMPT_REG); - break; default: break; } @@ -2294,7 +2172,7 @@ static void pvc_whitelist_build(struct intel_engine_cs *engine) blacklist_trtt(engine); } -static void mtl_whitelist_build(struct intel_engine_cs *engine) +static void xelpg_whitelist_build(struct intel_engine_cs *engine) { struct i915_wa_list *w = &engine->whitelist; @@ -2316,8 +2194,10 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine) wa_init_start(w, engine->gt, "whitelist", engine->name); - if (IS_METEORLAKE(i915)) - mtl_whitelist_build(engine); + if (engine->gt->type == GT_MEDIA) + ; /* none yet */ + else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))) + xelpg_whitelist_build(engine); else if (IS_PONTEVECCHIO(i915)) pvc_whitelist_build(engine); else if (IS_DG2(i915)) @@ -2415,62 +2295,35 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) } } -static bool needs_wa_1308578152(struct intel_engine_cs *engine) -{ - return intel_sseu_find_first_xehp_dss(&engine->gt->info.sseu, 0, 0) >= - GEN_DSS_PER_GSLICE; -} - static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) { /* Wa_22014600077 */ wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, ENABLE_EU_COUNT_FOR_TDL_FLUSH); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || + IS_DG2(i915)) { /* Wa_1509727124 */ wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, SC_DISABLE_POWER_OPTIMIZATION_EBB); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915) || - IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_DG2(i915)) { /* Wa_22012856258 */ wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_READ_SUPPRESSION); } - if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14013392000:dg2_g11 */ - wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_ENABLE_LARGE_GRF_MODE); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) { - /* Wa_14012419201:dg2 */ - wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, - GEN12_DISABLE_HDR_PAST_PAYLOAD_HOLD_FIX); - } - - /* Wa_1308578152:dg2_g10 when first gslice is fused off */ - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) && - needs_wa_1308578152(engine)) { - wa_masked_dis(wal, GEN12_CS_DEBUG_MODE1_CCCSUNIT_BE_COMMON, - GEN12_REPLAY_MODE_GRANULARITY); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + if (IS_DG2(i915)) { /* * Wa_22010960976:dg2 * Wa_14013347512:dg2 @@ -2479,34 +2332,15 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { - /* - * Wa_1608949956:dg2_g10 - * Wa_14010198302:dg2_g10 - */ - wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, - MDQ_ARBITRATION_MODE | UGM_BACKUP_MODE); + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || + IS_DG2(i915)) { + /* Wa_14015150844 */ + wa_mcr_add(wal, XEHP_HDC_CHICKEN0, 0, + _MASKED_BIT_ENABLE(DIS_ATOMIC_CHAINING_TYPED_WRITES), + 0, true); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) - /* Wa_22010430635:dg2 */ - wa_mcr_masked_en(wal, - GEN9_ROW_CHICKEN4, - GEN12_DISABLE_GRF_CLEAR); - - /* Wa_14013202645:dg2 */ - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_B0)) - wa_mcr_write_or(wal, RT_CTRL, DIS_NULL_QUERY); - - /* Wa_22012532006:dg2 */ - if (IS_DG2_GRAPHICS_STEP(engine->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(engine->i915, G11, STEP_A0, STEP_B0)) - wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, - DG2_DISABLE_ROUND_ENABLE_ALLOW_FOR_SSLA); - - if (IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) || - IS_DG2_G10(i915)) { + if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) { /* Wa_22014600077:dg2 */ wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH), @@ -2514,6 +2348,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) true); } + if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || + IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { + /* + * Wa_1606700617:tgl,dg1,adl-p + * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p + * Wa_14010826681:tgl,dg1,rkl,adl-p + * Wa_18019627453:dg2 + */ + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); + } + if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { /* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */ @@ -2527,19 +2374,11 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) */ wa_write_or(wal, GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); - } - if (IS_ALDERLAKE_P(i915) || IS_DG2(i915) || IS_ALDERLAKE_S(i915) || - IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { - /* - * Wa_1606700617:tgl,dg1,adl-p - * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p - * Wa_14010826681:tgl,dg1,rkl,adl-p - * Wa_18019627453:dg2 - */ - wa_masked_en(wal, - GEN9_CS_DEBUG_MODE1, - FF_DOP_CLOCK_GATE_DISABLE); + /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ + wa_mcr_masked_en(wal, + GEN10_SAMPLER_MODE, + ENABLE_SMALLPL); } if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || @@ -2566,14 +2405,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_RC_SEMA_IDLE_MSG_DISABLE); } - if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || - IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) { - /* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */ - wa_mcr_masked_en(wal, - GEN10_SAMPLER_MODE, - ENABLE_SMALLPL); - } - if (GRAPHICS_VER(i915) == 11) { /* This is not an Wa. Enable for better image quality */ wa_masked_en(wal, @@ -2975,10 +2806,12 @@ ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * function invoked by __intel_engine_init_ctx_wa(). */ static void -add_render_compute_tuning_settings(struct drm_i915_private *i915, +add_render_compute_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal) { - if (IS_METEORLAKE(i915) || IS_DG2(i915)) + struct drm_i915_private *i915 = gt->i915; + + if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) || IS_DG2(i915)) wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512); /* @@ -3007,8 +2840,9 @@ static void general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { struct drm_i915_private *i915 = engine->i915; + struct intel_gt *gt = engine->gt; - add_render_compute_tuning_settings(i915, wal); + add_render_compute_tuning_settings(gt, wal); if (GRAPHICS_VER(i915) >= 11) { /* This is not a Wa (although referred to as @@ -3029,13 +2863,13 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_B0, STEP_FOREVER) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_B0, STEP_FOREVER)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER)) /* Wa_14017856879 */ wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) /* * Wa_14017066071 * Wa_14017654203 @@ -3043,37 +2877,47 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE, MTL_DISABLE_SAMPLER_SC_OOO); - if (IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0)) + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) /* Wa_22015279794 */ wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS, DISABLE_PREFETCH_INTO_IC); - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || - IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_FOREVER) || - IS_DG2_G11(i915) || IS_DG2_G12(i915)) { + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || + IS_DG2(i915)) { /* Wa_22013037850 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DISABLE_128B_EVICTION_COMMAND_UDW); + + /* Wa_18017747507 */ + wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || + IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) || IS_PONTEVECCHIO(i915) || IS_DG2(i915)) { /* Wa_22014226127 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE); } - if (IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_B0) || - IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_B0) || - IS_DG2(i915)) { - /* Wa_18017747507 */ - wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE); + if (IS_PONTEVECCHIO(i915) || IS_DG2(i915)) { + /* Wa_14015227452:dg2,pvc */ + wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); + + /* Wa_16015675438:dg2,pvc */ + wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_B0, STEP_C0) || - IS_DG2_G11(i915)) { + if (IS_DG2(i915)) { + /* + * Wa_16011620976:dg2_g11 + * Wa_22015475538:dg2 + */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); + } + + if (IS_DG2_G11(i915)) { /* * Wa_22012826095:dg2 * Wa_22013059131:dg2 @@ -3085,18 +2929,23 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li /* Wa_22013059131:dg2 */ wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, FORCE_1_SUB_MESSAGE_PER_FRAGMENT); - } - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { /* - * Wa_14010918519:dg2_g10 + * Wa_22012654132 * - * LSC_CHICKEN_BIT_0 always reads back as 0 is this stepping, - * so ignoring verification. + * Note that register 0xE420 is write-only and cannot be read + * back for verification on DG2 (due to Wa_14012342262), so + * we need to explicitly skip the readback. */ - wa_mcr_add(wal, LSC_CHICKEN_BIT_0_UDW, 0, - FORCE_SLM_FENCE_SCOPE_TO_TILE | FORCE_UGM_FENCE_SCOPE_TO_TILE, - 0, false); + wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, + _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), + 0 /* write-only, so skip validation */, + true); + } + + if (IS_DG2_G10(i915) || IS_DG2_G12(i915)) { + /* Wa_18028616096 */ + wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3); } if (IS_XEHPSDV(i915)) { @@ -3114,35 +2963,6 @@ general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_li wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1, GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); } - - if (IS_DG2(i915) || IS_PONTEVECCHIO(i915)) { - /* Wa_14015227452:dg2,pvc */ - wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE); - - /* Wa_16015675438:dg2,pvc */ - wa_masked_en(wal, FF_SLICE_CS_CHICKEN2, GEN12_PERF_FIX_BALANCING_CFE_DISABLE); - } - - if (IS_DG2(i915)) { - /* - * Wa_16011620976:dg2_g11 - * Wa_22015475538:dg2 - */ - wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8); - } - - if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_C0) || IS_DG2_G11(i915)) - /* - * Wa_22012654132 - * - * Note that register 0xE420 is write-only and cannot be read - * back for verification on DG2 (due to Wa_14012342262), so - * we need to explicitly skip the readback. - */ - wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0, - _MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC), - 0 /* write-only, so skip validation */, - true); } static void diff --git a/drivers/gpu/drm/i915/gt/selftest_migrate.c b/drivers/gpu/drm/i915/gt/selftest_migrate.c index 3def5ca72d..3eff364ccf 100644 --- a/drivers/gpu/drm/i915/gt/selftest_migrate.c +++ b/drivers/gpu/drm/i915/gt/selftest_migrate.c @@ -710,7 +710,7 @@ static int threaded_migrate(struct intel_migrate *migrate, thread[i].tsk = tsk; } - msleep(10); /* start all threads before we kthread_stop() */ + msleep(10 * n_cpus); /* start all threads before we kthread_stop() */ for (i = 0; i < n_cpus; ++i) { struct task_struct *tsk = thread[i].tsk; @@ -719,11 +719,9 @@ static int threaded_migrate(struct intel_migrate *migrate, if (IS_ERR_OR_NULL(tsk)) continue; - status = kthread_stop(tsk); + status = kthread_stop_put(tsk); if (status && !err) err = status; - - put_task_struct(tsk); } kfree(thread); diff --git a/drivers/gpu/drm/i915/gt/selftest_tlb.c b/drivers/gpu/drm/i915/gt/selftest_tlb.c index 7e41f69fc8..00b872b638 100644 --- a/drivers/gpu/drm/i915/gt/selftest_tlb.c +++ b/drivers/gpu/drm/i915/gt/selftest_tlb.c @@ -136,8 +136,15 @@ pte_tlbinv(struct intel_context *ce, i915_request_get(rq); i915_request_add(rq); - /* Short sleep to sanitycheck the batch is spinning before we begin */ - msleep(10); + /* + * Short sleep to sanitycheck the batch is spinning before we begin. + * FIXME: Why is GSC so slow? + */ + if (ce->engine->class == OTHER_CLASS) + msleep(200); + else + msleep(10); + if (va == vb) { if (!i915_request_completed(rq)) { pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n", diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h index f359bef046..33f253410d 100644 --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h @@ -138,6 +138,8 @@ enum intel_guc_action { INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, INTEL_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507, INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A, + INTEL_GUC_ACTION_TLB_INVALIDATION = 0x7000, + INTEL_GUC_ACTION_TLB_INVALIDATION_DONE = 0x7001, INTEL_GUC_ACTION_STATE_CAPTURE_NOTIFICATION = 0x8002, INTEL_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003, INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004, @@ -181,4 +183,35 @@ enum intel_guc_state_capture_event_status { #define INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK 0x000000FF +#define INTEL_GUC_TLB_INVAL_TYPE_MASK REG_GENMASK(7, 0) +#define INTEL_GUC_TLB_INVAL_MODE_MASK REG_GENMASK(11, 8) +#define INTEL_GUC_TLB_INVAL_FLUSH_CACHE REG_BIT(31) + +enum intel_guc_tlb_invalidation_type { + INTEL_GUC_TLB_INVAL_ENGINES = 0x0, + INTEL_GUC_TLB_INVAL_GUC = 0x3, +}; + +/* + * 0: Heavy mode of Invalidation: + * The pipeline of the engine(s) for which the invalidation is targeted to is + * blocked, and all the in-flight transactions are guaranteed to be Globally + * Observed before completing the TLB invalidation + * 1: Lite mode of Invalidation: + * TLBs of the targeted engine(s) are immediately invalidated. + * In-flight transactions are NOT guaranteed to be Globally Observed before + * completing TLB invalidation. + * Light Invalidation Mode is to be used only when + * it can be guaranteed (by SW) that the address translations remain invariant + * for the in-flight transactions across the TLB invalidation. In other words, + * this mode can be used when the TLB invalidation is intended to clear out the + * stale cached translations that are no longer in use. Light Invalidation Mode + * is much faster than the Heavy Invalidation Mode, as it does not wait for the + * in-flight transactions to be GOd. + */ +enum intel_guc_tlb_inval_mode { + INTEL_GUC_TLB_INVAL_MODE_HEAVY = 0x0, + INTEL_GUC_TLB_INVAL_MODE_LITE = 0x1, +}; + #endif /* _ABI_GUC_ACTIONS_ABI_H */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c index 0d3b22a743..453d855dd1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c @@ -68,8 +68,7 @@ static void gsc_work(struct work_struct *work) * A proxy failure right after firmware load means the proxy-init * step has failed so mark GSC as not usable after this */ - drm_err(>->i915->drm, - "GSC proxy handler failed to init\n"); + gt_err(gt, "GSC proxy handler failed to init\n"); intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL); } goto out_put; @@ -83,11 +82,10 @@ static void gsc_work(struct work_struct *work) * status register to check if the proxy init was actually successful */ if (intel_gsc_uc_fw_proxy_init_done(gsc, false)) { - drm_dbg(>->i915->drm, "GSC Proxy initialized\n"); + gt_dbg(gt, "GSC Proxy initialized\n"); intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_RUNNING); } else { - drm_err(>->i915->drm, - "GSC status reports proxy init not complete\n"); + gt_err(gt, "GSC status reports proxy init not complete\n"); intel_uc_fw_change_status(&gsc->fw, INTEL_UC_FIRMWARE_LOAD_FAIL); } } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c index 89ed5ee9cd..2fde5c360c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.c @@ -81,8 +81,17 @@ out_rq: i915_request_add(rq); - if (!err && i915_request_wait(rq, 0, msecs_to_jiffies(500)) < 0) - err = -ETIME; + if (!err) { + /* + * Start timeout for i915_request_wait only after considering one possible + * pending GSC-HECI submission cycle on the other (non-privileged) path. + */ + if (wait_for(i915_request_started(rq), GSC_HECI_REPLY_LATENCY_MS)) + drm_dbg(&gsc_uc_to_gt(gsc)->i915->drm, + "Delay in gsc-heci-priv submission to gsccs-hw"); + if (i915_request_wait(rq, 0, msecs_to_jiffies(GSC_HECI_REPLY_LATENCY_MS)) < 0) + err = -ETIME; + } i915_request_put(rq); @@ -186,6 +195,13 @@ out_rq: i915_request_add(rq); if (!err) { + /* + * Start timeout for i915_request_wait only after considering one possible + * pending GSC-HECI submission cycle on the other (privileged) path. + */ + if (wait_for(i915_request_started(rq), GSC_HECI_REPLY_LATENCY_MS)) + drm_dbg(&gsc_uc_to_gt(gsc)->i915->drm, + "Delay in gsc-heci-non-priv submission to gsccs-hw"); if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, msecs_to_jiffies(timeout_ms)) < 0) err = -ETIME; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h index 09d3fbdad0..c4308291c0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc_heci_cmd_submit.h @@ -12,6 +12,12 @@ struct i915_vma; struct intel_context; struct intel_gsc_uc; +#define GSC_HECI_REPLY_LATENCY_MS 500 +/* + * Max FW response time is 500ms, but this should be counted from the time the + * command has hit the GSC-CS hardware, not the preceding handoff to GuC CTB. + */ + struct intel_gsc_mtl_header { u32 validity_marker; #define GSC_HECI_VALIDITY_MARKER 0xA578875A diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 569b5fe94c..3f3df1166b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -159,6 +159,21 @@ static void gen11_disable_guc_interrupts(struct intel_guc *guc) gen11_reset_guc_interrupts(guc); } +static void guc_dead_worker_func(struct work_struct *w) +{ + struct intel_guc *guc = container_of(w, struct intel_guc, dead_guc_worker); + struct intel_gt *gt = guc_to_gt(guc); + unsigned long last = guc->last_dead_guc_jiffies; + unsigned long delta = jiffies_to_msecs(jiffies - last); + + if (delta < 500) { + intel_gt_set_wedged(gt); + } else { + intel_gt_handle_error(gt, ALL_ENGINES, I915_ERROR_CAPTURE, "dead GuC"); + guc->last_dead_guc_jiffies = jiffies; + } +} + void intel_guc_init_early(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -171,6 +186,8 @@ void intel_guc_init_early(struct intel_guc *guc) intel_guc_slpc_init_early(&guc->slpc); intel_guc_rc_init_early(guc); + INIT_WORK(&guc->dead_guc_worker, guc_dead_worker_func); + mutex_init(&guc->send_mutex); spin_lock_init(&guc->irq_lock); if (GRAPHICS_VER(i915) >= 11) { @@ -272,18 +289,14 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 50)) flags |= GUC_WA_POLLCS; - /* Wa_16011759253:dg2_g10:a0 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_B0)) - flags |= GUC_WA_GAM_CREDITS; - /* Wa_14014475959 */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_DG2(gt->i915)) flags |= GUC_WA_HOLD_CCS_SWITCHOUT; /* - * Wa_14012197797:dg2_g10:a0,dg2_g11:a0 - * Wa_22011391025:dg2_g10,dg2_g11,dg2_g12 + * Wa_14012197797 + * Wa_22011391025 * * The same WA bit is used for both and 22011391025 is applicable to * all DG2. @@ -292,28 +305,26 @@ static u32 guc_ctl_wa_flags(struct intel_guc *guc) flags |= GUC_WA_DUAL_QUEUE; /* Wa_22011802037: graphics version 11/12 */ - if (IS_MTL_GRAPHICS_STEP(gt->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(gt->i915) >= 11 && - GRAPHICS_VER_FULL(gt->i915) < IP_VER(12, 70))) + if (intel_engine_reset_needs_wa_22011802037(gt)) flags |= GUC_WA_PRE_PARSER; - /* Wa_16011777198:dg2 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)) - flags |= GUC_WA_RCS_RESET_BEFORE_RC6; - /* - * Wa_22012727170:dg2_g10[a0-c0), dg2_g11[a0..) - * Wa_22012727685:dg2_g11[a0..) + * Wa_22012727170 + * Wa_22012727685 */ - if (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) || - IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_FOREVER)) + if (IS_DG2_G11(gt->i915)) flags |= GUC_WA_CONTEXT_ISOLATION; /* Wa_16015675438 */ if (!RCS_MASK(gt)) flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST; + /* Wa_14018913170 */ + if (GUC_FIRMWARE_VER(guc) >= MAKE_GUC_VER(70, 7, 0)) { + if (IS_DG2(gt->i915) || IS_METEORLAKE(gt->i915) || IS_PONTEVECCHIO(gt->i915)) + flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6; + } + return flags; } @@ -461,6 +472,8 @@ void intel_guc_fini(struct intel_guc *guc) if (!intel_uc_fw_is_loadable(&guc->fw)) return; + flush_work(&guc->dead_guc_worker); + if (intel_guc_slpc_is_used(guc)) intel_guc_slpc_fini(&guc->slpc); @@ -585,6 +598,20 @@ out: return ret; } +int intel_guc_crash_process_msg(struct intel_guc *guc, u32 action) +{ + if (action == INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED) + guc_err(guc, "Crash dump notification\n"); + else if (action == INTEL_GUC_ACTION_NOTIFY_EXCEPTION) + guc_err(guc, "Exception notification\n"); + else + guc_err(guc, "Unknown crash notification: 0x%04X\n", action); + + queue_work(system_unbound_wq, &guc->dead_guc_worker); + + return 0; +} + int intel_guc_to_host_process_recv_msg(struct intel_guc *guc, const u32 *payload, u32 len) { @@ -601,6 +628,9 @@ int intel_guc_to_host_process_recv_msg(struct intel_guc *guc, if (msg & INTEL_GUC_RECV_MSG_EXCEPTION) guc_err(guc, "Received early exception notification!\n"); + if (msg & (INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED | INTEL_GUC_RECV_MSG_EXCEPTION)) + queue_work(system_unbound_wq, &guc->dead_guc_worker); + return 0; } @@ -640,6 +670,8 @@ int intel_guc_suspend(struct intel_guc *guc) return 0; if (intel_guc_submission_is_used(guc)) { + flush_work(&guc->dead_guc_worker); + /* * This H2G MMIO command tears down the GuC in two steps. First it will * generate a G2H CTB for every active context indicating a reset. In diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 8dc291ff00..2b6dfe62c8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -79,6 +79,18 @@ struct intel_guc { */ atomic_t outstanding_submission_g2h; + /** @tlb_lookup: xarray to store all pending TLB invalidation requests */ + struct xarray tlb_lookup; + + /** + * @serial_slot: id to the initial waiter created in tlb_lookup, + * which is used only when failed to allocate new waiter. + */ + u32 serial_slot; + + /** @next_seqno: the next id (sequence number) to allocate. */ + u32 next_seqno; + /** @interrupts: pointers to GuC interrupt-managing functions. */ struct { bool enabled; @@ -266,6 +278,20 @@ struct intel_guc { unsigned long last_stat_jiffies; } timestamp; + /** + * @dead_guc_worker: Asynchronous worker thread for forcing a GuC reset. + * Specifically used when the G2H handler wants to issue a reset. Resets + * require flushing the G2H queue. So, the G2H processing itself must not + * trigger a reset directly. Instead, go via this worker. + */ + struct work_struct dead_guc_worker; + /** + * @last_dead_guc_jiffies: timestamp of previous 'dead guc' occurrance + * used to prevent a fundamentally broken system from continuously + * reloading the GuC. + */ + unsigned long last_dead_guc_jiffies; + #ifdef CONFIG_DRM_I915_SELFTEST /** * @number_guc_id_stolen: The number of guc_ids that have been stolen @@ -274,6 +300,11 @@ struct intel_guc { #endif }; +struct intel_guc_tlb_wait { + struct wait_queue_head wq; + bool busy; +}; + /* * GuC version number components are only 8-bit, so converting to a 32bit 8.8.8 * integer works. @@ -281,6 +312,7 @@ struct intel_guc { #define MAKE_GUC_VER(maj, min, pat) (((maj) << 16) | ((min) << 8) | (pat)) #define MAKE_GUC_VER_STRUCT(ver) MAKE_GUC_VER((ver).major, (ver).minor, (ver).patch) #define GUC_SUBMIT_VER(guc) MAKE_GUC_VER_STRUCT((guc)->submission_version) +#define GUC_FIRMWARE_VER(guc) MAKE_GUC_VER_STRUCT((guc)->fw.file_selected.ver) static inline struct intel_guc *log_to_guc(struct intel_guc_log *log) { @@ -476,6 +508,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); int intel_guc_error_capture_process_msg(struct intel_guc *guc, const u32 *msg, u32 len); +int intel_guc_crash_process_msg(struct intel_guc *guc, u32 action); struct intel_engine_cs * intel_guc_lookup_engine(struct intel_guc *guc, u8 guc_class, u8 instance); @@ -499,4 +532,10 @@ void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p); int intel_guc_sched_disable_gucid_threshold_max(struct intel_guc *guc); +bool intel_guc_tlb_invalidation_is_available(struct intel_guc *guc); +int intel_guc_invalidate_tlb_engines(struct intel_guc *guc); +int intel_guc_invalidate_tlb_guc(struct intel_guc *guc); +int intel_guc_tlb_invalidation_done(struct intel_guc *guc, + const u32 *payload, u32 len); +void wake_up_all_tlb_invalidate(struct intel_guc *guc); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c index 331cec07c1..a4da0208c8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_capture.c @@ -1101,8 +1101,8 @@ guc_capture_create_prealloc_nodes(struct intel_guc *guc) static int guc_capture_extract_reglists(struct intel_guc *guc, struct __guc_capture_bufstate *buf) { - struct guc_state_capture_group_header_t ghdr = {0}; - struct guc_state_capture_header_t hdr = {0}; + struct guc_state_capture_group_header_t ghdr = {}; + struct guc_state_capture_header_t hdr = {}; struct __guc_capture_parsed_output *node = NULL; struct guc_mmio_reg *regs = NULL; int i, numlists, numregs, ret = 0; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 97eadd0818..89e314b375 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -96,13 +96,40 @@ struct ct_request { struct ct_incoming_msg { struct list_head link; u32 size; - u32 msg[]; + u32 msg[] __counted_by(size); }; enum { CTB_SEND = 0, CTB_RECV = 1 }; enum { CTB_OWNER_HOST = 0 }; +/* + * Some H2G commands involve a synchronous response that the driver needs + * to wait for. In such cases, a timeout is required to prevent the driver + * from waiting forever in the case of an error (either no error response + * is defined in the protocol or something has died and requires a reset). + * The specific command may be defined as having a time bound response but + * the CT is a queue and that time guarantee only starts from the point + * when the command reaches the head of the queue and is processed by GuC. + * + * Ideally there would be a helper to report the progress of a given + * command through the CT. However, that would require a significant + * amount of work in the CT layer. In the meantime, provide a reasonable + * estimation of the worst case latency it should take for the entire + * queue to drain. And therefore, how long a caller should wait before + * giving up on their request. The current estimate is based on empirical + * measurement of a test that fills the buffer with context creation and + * destruction requests as they seem to be the slowest operation. + */ +long intel_guc_ct_max_queue_time_jiffies(void) +{ + /* + * A 4KB buffer full of context destroy commands takes a little + * over a second to process so bump that to 2s to be super safe. + */ + return (CTB_H2G_BUFFER_SIZE * HZ) / SZ_2K; +} + static void ct_receive_tasklet_func(struct tasklet_struct *t); static void ct_incoming_request_worker_func(struct work_struct *w); @@ -1112,12 +1139,11 @@ static int ct_process_request(struct intel_guc_ct *ct, struct ct_incoming_msg *r ret = 0; break; case INTEL_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED: - CT_ERROR(ct, "Received GuC crash dump notification!\n"); - ret = 0; - break; case INTEL_GUC_ACTION_NOTIFY_EXCEPTION: - CT_ERROR(ct, "Received GuC exception notification!\n"); - ret = 0; + ret = intel_guc_crash_process_msg(guc, action); + break; + case INTEL_GUC_ACTION_TLB_INVALIDATION_DONE: + ret = intel_guc_tlb_invalidation_done(guc, payload, len); break; default: ret = -EOPNOTSUPP; @@ -1190,9 +1216,17 @@ static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *requ switch (action) { case INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE: case INTEL_GUC_ACTION_DEREGISTER_CONTEXT_DONE: + case INTEL_GUC_ACTION_TLB_INVALIDATION_DONE: g2h_release_space(ct, request->size); } + /* + * TLB invalidation responses must be handled immediately as processing + * of other G2H notifications may be blocked by an invalidation request. + */ + if (action == INTEL_GUC_ACTION_TLB_INVALIDATION_DONE) + return ct_process_request(ct, request); + spin_lock_irqsave(&ct->requests.lock, flags); list_add_tail(&request->link, &ct->requests.incoming); spin_unlock_irqrestore(&ct->requests.lock, flags); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h index 58e42901ff..2c4bb9a941 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.h @@ -104,6 +104,8 @@ struct intel_guc_ct { #endif }; +long intel_guc_ct_max_queue_time_jiffies(void); + void intel_guc_ct_init_early(struct intel_guc_ct *ct); int intel_guc_ct_init(struct intel_guc_ct *ct); void intel_guc_ct_fini(struct intel_guc_ct *ct); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index b4d56eccfb..8ae1846431 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -22,6 +22,7 @@ /* Payload length only i.e. don't include G2H header length */ #define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 2 #define G2H_LEN_DW_DEREGISTER_CONTEXT 1 +#define G2H_LEN_DW_INVALIDATE_TLB 1 #define GUC_CONTEXT_DISABLE 0 #define GUC_CONTEXT_ENABLE 1 @@ -100,6 +101,7 @@ #define GUC_WA_HOLD_CCS_SWITCHOUT BIT(17) #define GUC_WA_POLLCS BIT(18) #define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21) +#define GUC_WA_ENABLE_TSC_CHECK_ON_RC6 BIT(22) #define GUC_CTL_FEATURE 2 #define GUC_CTL_ENABLE_SLPC BIT(2) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index 477df260ae..2dfb07cc4b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -138,17 +138,6 @@ static int guc_action_slpc_set_param(struct intel_guc *guc, u8 id, u32 value) return ret > 0 ? -EPROTO : ret; } -static int guc_action_slpc_unset_param(struct intel_guc *guc, u8 id) -{ - u32 request[] = { - GUC_ACTION_HOST2GUC_PC_SLPC_REQUEST, - SLPC_EVENT(SLPC_EVENT_PARAMETER_UNSET, 1), - id, - }; - - return intel_guc_send(guc, request, ARRAY_SIZE(request)); -} - static bool slpc_is_running(struct intel_guc_slpc *slpc) { return slpc_get_state(slpc) == SLPC_GLOBAL_STATE_RUNNING; @@ -199,15 +188,6 @@ static int slpc_set_param(struct intel_guc_slpc *slpc, u8 id, u32 value) return ret; } -static int slpc_unset_param(struct intel_guc_slpc *slpc, u8 id) -{ - struct intel_guc *guc = slpc_to_guc(slpc); - - GEM_BUG_ON(id >= SLPC_MAX_PARAM); - - return guc_action_slpc_unset_param(guc, id); -} - static int slpc_force_min_freq(struct intel_guc_slpc *slpc, u32 freq) { struct intel_guc *guc = slpc_to_guc(slpc); @@ -672,49 +652,6 @@ static void slpc_get_rp_values(struct intel_guc_slpc *slpc) slpc->boost_freq = slpc->rp0_freq; } -/** - * intel_guc_slpc_override_gucrc_mode() - override GUCRC mode - * @slpc: pointer to intel_guc_slpc. - * @mode: new value of the mode. - * - * This function will override the GUCRC mode. - * - * Return: 0 on success, non-zero error code on failure. - */ -int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode) -{ - int ret; - struct drm_i915_private *i915 = slpc_to_i915(slpc); - intel_wakeref_t wakeref; - - if (mode >= SLPC_GUCRC_MODE_MAX) - return -EINVAL; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - ret = slpc_set_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE, mode); - if (ret) - guc_err(slpc_to_guc(slpc), "Override RC mode %d failed: %pe\n", - mode, ERR_PTR(ret)); - } - - return ret; -} - -int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc) -{ - struct drm_i915_private *i915 = slpc_to_i915(slpc); - intel_wakeref_t wakeref; - int ret = 0; - - with_intel_runtime_pm(&i915->runtime_pm, wakeref) { - ret = slpc_unset_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE); - if (ret) - guc_err(slpc_to_guc(slpc), "Unsetting RC mode failed: %pe\n", ERR_PTR(ret)); - } - - return ret; -} - /* * intel_guc_slpc_enable() - Start SLPC * @slpc: pointer to intel_guc_slpc. diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h index 597eb5413d..6ac6503c39 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h @@ -44,8 +44,6 @@ int intel_guc_slpc_set_media_ratio_mode(struct intel_guc_slpc *slpc, u32 val); void intel_guc_pm_intrmsk_enable(struct intel_gt *gt); void intel_guc_slpc_boost(struct intel_guc_slpc *slpc); void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc); -int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc); -int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode); int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val); #endif diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 836e4d9d65..17df71117c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -32,6 +32,7 @@ #include "i915_drv.h" #include "i915_reg.h" +#include "i915_irq.h" #include "i915_trace.h" /** @@ -1690,9 +1691,7 @@ static void guc_engine_reset_prepare(struct intel_engine_cs *engine) * Wa_22011802037: In addition to stopping the cs, we need * to wait for any pending mi force wakeups */ - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || - (GRAPHICS_VER(engine->i915) >= 11 && - GRAPHICS_VER_FULL(engine->i915) < IP_VER(12, 70))) { + if (intel_engine_reset_needs_wa_22011802037(engine->gt)) { intel_engine_stop_cs(engine); intel_engine_wait_for_pending_mi_fw(engine); } @@ -1798,6 +1797,20 @@ next_context: intel_context_put(parent); } +void wake_up_all_tlb_invalidate(struct intel_guc *guc) +{ + struct intel_guc_tlb_wait *wait; + unsigned long i; + + if (!intel_guc_tlb_invalidation_is_available(guc)) + return; + + xa_lock_irq(&guc->tlb_lookup); + xa_for_each(&guc->tlb_lookup, i, wait) + wake_up(&wait->wq); + xa_unlock_irq(&guc->tlb_lookup); +} + void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled) { struct intel_context *ce; @@ -1923,6 +1936,12 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc) /* GuC is blown away, drop all references to contexts */ xa_destroy(&guc->context_lookup); + + /* + * Wedged GT won't respond to any TLB invalidation request. Simply + * release all the blocked waiters. + */ + wake_up_all_tlb_invalidate(guc); } void intel_guc_submission_reset_finish(struct intel_guc *guc) @@ -1945,11 +1964,65 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc) intel_guc_global_policies_update(guc); enable_submission(guc); intel_gt_unpark_heartbeats(guc_to_gt(guc)); + + /* + * The full GT reset will have cleared the TLB caches and flushed the + * G2H message queue; we can release all the blocked waiters. + */ + wake_up_all_tlb_invalidate(guc); } static void destroyed_worker_func(struct work_struct *w); static void reset_fail_worker_func(struct work_struct *w); +bool intel_guc_tlb_invalidation_is_available(struct intel_guc *guc) +{ + return HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915) && + intel_guc_is_ready(guc); +} + +static int init_tlb_lookup(struct intel_guc *guc) +{ + struct intel_guc_tlb_wait *wait; + int err; + + if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915)) + return 0; + + xa_init_flags(&guc->tlb_lookup, XA_FLAGS_ALLOC); + + wait = kzalloc(sizeof(*wait), GFP_KERNEL); + if (!wait) + return -ENOMEM; + + init_waitqueue_head(&wait->wq); + + /* Preallocate a shared id for use under memory pressure. */ + err = xa_alloc_cyclic_irq(&guc->tlb_lookup, &guc->serial_slot, wait, + xa_limit_32b, &guc->next_seqno, GFP_KERNEL); + if (err < 0) { + kfree(wait); + return err; + } + + return 0; +} + +static void fini_tlb_lookup(struct intel_guc *guc) +{ + struct intel_guc_tlb_wait *wait; + + if (!HAS_GUC_TLB_INVALIDATION(guc_to_gt(guc)->i915)) + return; + + wait = xa_load(&guc->tlb_lookup, guc->serial_slot); + if (wait && wait->busy) + guc_err(guc, "Unexpected busy item in tlb_lookup on fini\n"); + kfree(wait); + + xa_destroy(&guc->tlb_lookup); +} + /* * Set up the memory resources to be shared with the GuC (via the GGTT) * at firmware loading time. @@ -1968,11 +2041,15 @@ int intel_guc_submission_init(struct intel_guc *guc) return ret; } + ret = init_tlb_lookup(guc); + if (ret) + goto destroy_pool; + guc->submission_state.guc_ids_bitmap = bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); if (!guc->submission_state.guc_ids_bitmap) { ret = -ENOMEM; - goto destroy_pool; + goto destroy_tlb; } guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; @@ -1981,9 +2058,10 @@ int intel_guc_submission_init(struct intel_guc *guc) return 0; +destroy_tlb: + fini_tlb_lookup(guc); destroy_pool: guc_lrc_desc_pool_destroy_v69(guc); - return ret; } @@ -1996,6 +2074,7 @@ void intel_guc_submission_fini(struct intel_guc *guc) guc_lrc_desc_pool_destroy_v69(guc); i915_sched_engine_put(guc->sched_engine); bitmap_free(guc->submission_state.guc_ids_bitmap); + fini_tlb_lookup(guc); guc->submission_initialized = false; } @@ -4299,7 +4378,7 @@ static void guc_default_vfuncs(struct intel_engine_cs *engine) /* Wa_14014475959:dg2 */ if (engine->class == COMPUTE_CLASS) - if (IS_MTL_GRAPHICS_STEP(engine->i915, M, STEP_A0, STEP_B0) || + if (IS_GFX_GT_IP_STEP(engine->gt, IP_VER(12, 70), STEP_A0, STEP_B0) || IS_DG2(engine->i915)) engine->flags |= I915_ENGINE_USES_WA_HOLD_CCS_SWITCHOUT; @@ -4626,6 +4705,154 @@ g2h_context_lookup(struct intel_guc *guc, u32 ctx_id) return ce; } +static void wait_wake_outstanding_tlb_g2h(struct intel_guc *guc, u32 seqno) +{ + struct intel_guc_tlb_wait *wait; + unsigned long flags; + + xa_lock_irqsave(&guc->tlb_lookup, flags); + wait = xa_load(&guc->tlb_lookup, seqno); + + if (wait) + wake_up(&wait->wq); + else + guc_dbg(guc, + "Stale TLB invalidation response with seqno %d\n", seqno); + + xa_unlock_irqrestore(&guc->tlb_lookup, flags); +} + +int intel_guc_tlb_invalidation_done(struct intel_guc *guc, + const u32 *payload, u32 len) +{ + if (len < 1) + return -EPROTO; + + wait_wake_outstanding_tlb_g2h(guc, payload[0]); + return 0; +} + +static long must_wait_woken(struct wait_queue_entry *wq_entry, long timeout) +{ + /* + * This is equivalent to wait_woken() with the exception that + * we do not wake up early if the kthread task has been completed. + * As we are called from page reclaim in any task context, + * we may be invoked from stopped kthreads, but we *must* + * complete the wait from the HW. + */ + do { + set_current_state(TASK_UNINTERRUPTIBLE); + if (wq_entry->flags & WQ_FLAG_WOKEN) + break; + + timeout = schedule_timeout(timeout); + } while (timeout); + + /* See wait_woken() and woken_wake_function() */ + __set_current_state(TASK_RUNNING); + smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); + + return timeout; +} + +static bool intel_gt_is_enabled(const struct intel_gt *gt) +{ + /* Check if GT is wedged or suspended */ + if (intel_gt_is_wedged(gt) || !intel_irqs_enabled(gt->i915)) + return false; + return true; +} + +static int guc_send_invalidate_tlb(struct intel_guc *guc, + enum intel_guc_tlb_invalidation_type type) +{ + struct intel_guc_tlb_wait _wq, *wq = &_wq; + struct intel_gt *gt = guc_to_gt(guc); + DEFINE_WAIT_FUNC(wait, woken_wake_function); + int err; + u32 seqno; + u32 action[] = { + INTEL_GUC_ACTION_TLB_INVALIDATION, + 0, + REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_TYPE_MASK, type) | + REG_FIELD_PREP(INTEL_GUC_TLB_INVAL_MODE_MASK, + INTEL_GUC_TLB_INVAL_MODE_HEAVY) | + INTEL_GUC_TLB_INVAL_FLUSH_CACHE, + }; + u32 size = ARRAY_SIZE(action); + + /* + * Early guard against GT enablement. TLB invalidation should not be + * attempted if the GT is disabled due to suspend/wedge. + */ + if (!intel_gt_is_enabled(gt)) + return -EINVAL; + + init_waitqueue_head(&_wq.wq); + + if (xa_alloc_cyclic_irq(&guc->tlb_lookup, &seqno, wq, + xa_limit_32b, &guc->next_seqno, + GFP_ATOMIC | __GFP_NOWARN) < 0) { + /* Under severe memory pressure? Serialise TLB allocations */ + xa_lock_irq(&guc->tlb_lookup); + wq = xa_load(&guc->tlb_lookup, guc->serial_slot); + wait_event_lock_irq(wq->wq, + !READ_ONCE(wq->busy), + guc->tlb_lookup.xa_lock); + /* + * Update wq->busy under lock to ensure only one waiter can + * issue the TLB invalidation command using the serial slot at a + * time. The condition is set to true before releasing the lock + * so that other caller continue to wait until woken up again. + */ + wq->busy = true; + xa_unlock_irq(&guc->tlb_lookup); + + seqno = guc->serial_slot; + } + + action[1] = seqno; + + add_wait_queue(&wq->wq, &wait); + + /* This is a critical reclaim path and thus we must loop here. */ + err = intel_guc_send_busy_loop(guc, action, size, G2H_LEN_DW_INVALIDATE_TLB, true); + if (err) + goto out; + + /* + * Late guard against GT enablement. It is not an error for the TLB + * invalidation to time out if the GT is disabled during the process + * due to suspend/wedge. In fact, the TLB invalidation is cancelled + * in this case. + */ + if (!must_wait_woken(&wait, intel_guc_ct_max_queue_time_jiffies()) && + intel_gt_is_enabled(gt)) { + guc_err(guc, + "TLB invalidation response timed out for seqno %u\n", seqno); + err = -ETIME; + } +out: + remove_wait_queue(&wq->wq, &wait); + if (seqno != guc->serial_slot) + xa_erase_irq(&guc->tlb_lookup, seqno); + + return err; +} + +/* Send a H2G command to invalidate the TLBs at engine level and beyond. */ +int intel_guc_invalidate_tlb_engines(struct intel_guc *guc) +{ + return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_ENGINES); +} + +/* Send a H2G command to invalidate the GuC's internal TLB. */ +int intel_guc_invalidate_tlb_guc(struct intel_guc *guc) +{ + return guc_send_invalidate_tlb(guc, INTEL_GUC_TLB_INVAL_GUC); +} + int intel_guc_deregister_done_process_msg(struct intel_guc *guc, const u32 *msg, u32 len) @@ -4805,19 +5032,19 @@ static void guc_context_replay(struct intel_context *ce) static void guc_handle_context_reset(struct intel_guc *guc, struct intel_context *ce) { + bool capture = intel_context_is_schedulable(ce); + trace_intel_context_reset(ce); - guc_dbg(guc, "Got context reset notification: 0x%04X on %s, exiting = %s, banned = %s\n", + guc_dbg(guc, "%s context reset notification: 0x%04X on %s, exiting = %s, banned = %s\n", + capture ? "Got" : "Ignoring", ce->guc_id.id, ce->engine->name, str_yes_no(intel_context_is_exiting(ce)), str_yes_no(intel_context_is_banned(ce))); - if (likely(intel_context_is_schedulable(ce))) { + if (capture) { capture_error_state(guc, ce); guc_context_replay(ce); - } else { - guc_info(guc, "Ignoring context reset notification of exiting context 0x%04X on %s", - ce->guc_id.id, ce->engine->name); } } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 98b103375b..27f6561dd7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -688,6 +688,8 @@ void intel_uc_suspend(struct intel_uc *uc) /* flush the GSC worker */ intel_gsc_uc_flush_work(&uc->gsc); + wake_up_all_tlb_invalidate(guc); + if (!intel_guc_is_ready(guc)) { guc->interrupts.enabled = false; return; @@ -736,6 +738,11 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication) intel_gsc_uc_resume(&uc->gsc); + if (intel_guc_tlb_invalidation_is_available(guc)) { + intel_guc_invalidate_tlb_engines(guc); + intel_guc_invalidate_tlb_guc(guc); + } + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 8be005de1d..362639162e 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -88,12 +88,12 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * security fixes, etc. to be enabled. */ #define INTEL_GUC_FIRMWARE_DEFS(fw_def, guc_maj, guc_mmp) \ - fw_def(METEORLAKE, 0, guc_maj(mtl, 70, 6, 6)) \ - fw_def(DG2, 0, guc_maj(dg2, 70, 5, 1)) \ - fw_def(ALDERLAKE_P, 0, guc_maj(adlp, 70, 5, 1)) \ + fw_def(METEORLAKE, 0, guc_maj(mtl, 70, 12, 1)) \ + fw_def(DG2, 0, guc_maj(dg2, 70, 12, 1)) \ + fw_def(ALDERLAKE_P, 0, guc_maj(adlp, 70, 12, 1)) \ fw_def(ALDERLAKE_P, 0, guc_mmp(adlp, 70, 1, 1)) \ fw_def(ALDERLAKE_P, 0, guc_mmp(adlp, 69, 0, 3)) \ - fw_def(ALDERLAKE_S, 0, guc_maj(tgl, 70, 5, 1)) \ + fw_def(ALDERLAKE_S, 0, guc_maj(tgl, 70, 12, 1)) \ fw_def(ALDERLAKE_S, 0, guc_mmp(tgl, 70, 1, 1)) \ fw_def(ALDERLAKE_S, 0, guc_mmp(tgl, 69, 0, 3)) \ fw_def(DG1, 0, guc_maj(dg1, 70, 5, 1)) \ @@ -132,6 +132,17 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, fw_def(SKYLAKE, 0, huc_mmp(skl, 2, 0, 0)) /* + * The GSC FW has multiple version (see intel_gsc_uc.h for details); since what + * we care about is the interface, we use the compatibility version in the + * binary names. + * Same as with the GuC, a major version bump indicate a + * backward-incompatible change, while a minor version bump indicates a + * backward-compatible one, so we use only the former in the file name. + */ +#define INTEL_GSC_FIRMWARE_DEFS(fw_def, gsc_def) \ + fw_def(METEORLAKE, 0, gsc_def(mtl, 1, 0)) + +/* * Set of macros for producing a list of filenames from the above table. */ #define __MAKE_UC_FW_PATH_BLANK(prefix_, name_) \ @@ -166,6 +177,9 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, #define MAKE_HUC_FW_PATH_MMP(prefix_, major_, minor_, patch_) \ __MAKE_UC_FW_PATH_MMP(prefix_, "huc", major_, minor_, patch_) +#define MAKE_GSC_FW_PATH(prefix_, major_, minor_) \ + __MAKE_UC_FW_PATH_MAJOR(prefix_, "gsc", major_) + /* * All blobs need to be declared via MODULE_FIRMWARE(). * This first expansion of the table macros is solely to provide @@ -176,6 +190,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH_MAJOR, MAKE_GUC_FW_PATH_MMP) INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH_BLANK, MAKE_HUC_FW_PATH_MMP, MAKE_HUC_FW_PATH_GSC) +INTEL_GSC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GSC_FW_PATH) /* * The next expansion of the table macros (in __uc_fw_auto_select below) provides @@ -225,6 +240,10 @@ struct __packed uc_fw_blob { #define HUC_FW_BLOB_GSC(prefix_) \ UC_FW_BLOB_NEW(0, 0, 0, true, MAKE_HUC_FW_PATH_GSC(prefix_)) +#define GSC_FW_BLOB(prefix_, major_, minor_) \ + UC_FW_BLOB_NEW(major_, minor_, 0, true, \ + MAKE_GSC_FW_PATH(prefix_, major_, minor_)) + struct __packed uc_fw_platform_requirement { enum intel_platform p; u8 rev; /* first platform rev using this FW */ @@ -251,9 +270,14 @@ static const struct uc_fw_platform_requirement blobs_huc[] = { INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB, HUC_FW_BLOB_MMP, HUC_FW_BLOB_GSC) }; +static const struct uc_fw_platform_requirement blobs_gsc[] = { + INTEL_GSC_FIRMWARE_DEFS(MAKE_FW_LIST, GSC_FW_BLOB) +}; + static const struct fw_blobs_by_type blobs_all[INTEL_UC_FW_NUM_TYPES] = { [INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) }, [INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) }, + [INTEL_UC_FW_TYPE_GSC] = { blobs_gsc, ARRAY_SIZE(blobs_gsc) }, }; static void @@ -267,14 +291,6 @@ __uc_fw_auto_select(struct drm_i915_private *i915, struct intel_uc_fw *uc_fw) bool found; /* - * GSC FW support is still not fully in place, so we're not defining - * the FW blob yet because we don't want the driver to attempt to load - * it until we're ready for it. - */ - if (uc_fw->type == INTEL_UC_FW_TYPE_GSC) - return; - - /* * The only difference between the ADL GuC FWs is the HWConfig support. * ADL-N does not support HWConfig, so we should use the same binary as * ADL-S, otherwise the GuC might attempt to fetch a config table that |