summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/i915/gt/uc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/uc')
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c21
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c10
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c125
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h2
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c64
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_uc.c8
7 files changed, 202 insertions, 30 deletions
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
index 813cc888e6..be70c46604 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h
@@ -206,8 +206,6 @@ struct intel_guc {
u32 ads_golden_ctxt_size;
/** @ads_capture_size: size of register lists in the ADS used for error capture */
u32 ads_capture_size;
- /** @ads_engine_usage_size: size of engine usage in the ADS */
- u32 ads_engine_usage_size;
/** @lrc_desc_pool_v69: object allocated to hold the GuC LRC descriptor pool */
struct i915_vma *lrc_desc_pool_v69;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 63724e1782..f7372f736a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -377,8 +377,13 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
CCS_MASK(engine->gt))
ret |= GUC_MMIO_REG_ADD(gt, regset, GEN12_RCU_MODE, true);
+ /*
+ * some of the WA registers are MCR registers. As it is safe to
+ * use MCR form for non-MCR registers, for code simplicity, all
+ * WA registers are added with MCR form.
+ */
for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
- ret |= GUC_MMIO_REG_ADD(gt, regset, wa->reg, wa->masked_reg);
+ ret |= GUC_MCR_REG_ADD(gt, regset, wa->mcr_reg, wa->masked_reg);
/* Be extra paranoid and include all whitelist registers. */
for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++)
@@ -394,13 +399,13 @@ static int guc_mmio_regset_init(struct temp_regset *regset,
ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
if (GRAPHICS_VER(engine->i915) >= 12) {
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL0, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL1, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL2, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL3, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL4, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL5, false);
- ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL6, false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL0)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL1)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL2)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL3)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL4)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL5)), false);
+ ret |= GUC_MCR_REG_ADD(gt, regset, MCR_REG(i915_mmio_reg_offset(EU_PERF_CNTL6)), false);
}
return ret ? -1 : 0;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
index 0f79cb6585..52332bb143 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
@@ -184,7 +184,7 @@ static int guc_wait_ucode(struct intel_guc *guc)
* in the seconds range. However, there is a limit on how long an
* individual wait_for() can wait. So wrap it in a loop.
*/
- before_freq = intel_rps_read_actual_frequency(&uncore->gt->rps);
+ before_freq = intel_rps_read_actual_frequency(&gt->rps);
before = ktime_get();
for (count = 0; count < GUC_LOAD_RETRY_LIMIT; count++) {
ret = wait_for(guc_load_done(uncore, &status, &success), 1000);
@@ -192,7 +192,7 @@ static int guc_wait_ucode(struct intel_guc *guc)
break;
guc_dbg(guc, "load still in progress, count = %d, freq = %dMHz, status = 0x%08X [0x%02X/%02X]\n",
- count, intel_rps_read_actual_frequency(&uncore->gt->rps), status,
+ count, intel_rps_read_actual_frequency(&gt->rps), status,
REG_FIELD_GET(GS_BOOTROM_MASK, status),
REG_FIELD_GET(GS_UKERNEL_MASK, status));
}
@@ -204,7 +204,7 @@ static int guc_wait_ucode(struct intel_guc *guc)
u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status);
guc_info(guc, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz, ret = %d\n",
- status, delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps), ret);
+ status, delta_ms, intel_rps_read_actual_frequency(&gt->rps), ret);
guc_info(guc, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
REG_FIELD_GET(GS_MIA_IN_RESET, status),
bootrom, ukernel,
@@ -254,11 +254,11 @@ static int guc_wait_ucode(struct intel_guc *guc)
guc_warn(guc, "excessive init time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n",
delta_ms, status, count, ret);
guc_warn(guc, "excessive init time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n",
- intel_rps_read_actual_frequency(&uncore->gt->rps), before_freq,
+ intel_rps_read_actual_frequency(&gt->rps), before_freq,
intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt)));
} else {
guc_dbg(guc, "init took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n",
- delta_ms, intel_rps_read_actual_frequency(&uncore->gt->rps),
+ delta_ms, intel_rps_read_actual_frequency(&gt->rps),
before_freq, status, count, ret);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index a259f1118c..0f83c6d437 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -236,6 +236,13 @@ set_context_destroyed(struct intel_context *ce)
ce->guc_state.sched_state |= SCHED_STATE_DESTROYED;
}
+static inline void
+clr_context_destroyed(struct intel_context *ce)
+{
+ lockdep_assert_held(&ce->guc_state.lock);
+ ce->guc_state.sched_state &= ~SCHED_STATE_DESTROYED;
+}
+
static inline bool context_pending_disable(struct intel_context *ce)
{
return ce->guc_state.sched_state & SCHED_STATE_PENDING_DISABLE;
@@ -613,6 +620,8 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc,
u32 g2h_len_dw,
bool loop)
{
+ int ret;
+
/*
* We always loop when a send requires a reply (i.e. g2h_len_dw > 0),
* so we don't handle the case where we don't get a reply because we
@@ -623,7 +632,11 @@ static int guc_submission_send_busy_loop(struct intel_guc *guc,
if (g2h_len_dw)
atomic_inc(&guc->outstanding_submission_g2h);
- return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
+ ret = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop);
+ if (ret)
+ atomic_dec(&guc->outstanding_submission_g2h);
+
+ return ret;
}
int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
@@ -1362,7 +1375,48 @@ static void guc_enable_busyness_worker(struct intel_guc *guc)
static void guc_cancel_busyness_worker(struct intel_guc *guc)
{
- cancel_delayed_work_sync(&guc->timestamp.work);
+ /*
+ * There are many different call stacks that can get here. Some of them
+ * hold the reset mutex. The busyness worker also attempts to acquire the
+ * reset mutex. Synchronously flushing a worker thread requires acquiring
+ * the worker mutex. Lockdep sees this as a conflict. It thinks that the
+ * flush can deadlock because it holds the worker mutex while waiting for
+ * the reset mutex, but another thread is holding the reset mutex and might
+ * attempt to use other worker functions.
+ *
+ * In practice, this scenario does not exist because the busyness worker
+ * does not block waiting for the reset mutex. It does a try-lock on it and
+ * immediately exits if the lock is already held. Unfortunately, the mutex
+ * in question (I915_RESET_BACKOFF) is an i915 implementation which has lockdep
+ * annotation but not to the extent of explaining the 'might lock' is also a
+ * 'does not need to lock'. So one option would be to add more complex lockdep
+ * annotations to ignore the issue (if at all possible). A simpler option is to
+ * just not flush synchronously when a rest in progress. Given that the worker
+ * will just early exit and re-schedule itself anyway, there is no advantage
+ * to running it immediately.
+ *
+ * If a reset is not in progress, then the synchronous flush may be required.
+ * As noted many call stacks lead here, some during suspend and driver unload
+ * which do require a synchronous flush to make sure the worker is stopped
+ * before memory is freed.
+ *
+ * Trying to pass a 'need_sync' or 'in_reset' flag all the way down through
+ * every possible call stack is unfeasible. It would be too intrusive to many
+ * areas that really don't care about the GuC backend. However, there is the
+ * I915_RESET_BACKOFF flag and the gt->reset.mutex can be tested for is_locked.
+ * So just use those. Note that testing both is required due to the hideously
+ * complex nature of the i915 driver's reset code paths.
+ *
+ * And note that in the case of a reset occurring during driver unload
+ * (wedged_on_fini), skipping the cancel in reset_prepare/reset_fini (when the
+ * reset flag/mutex are set) is fine because there is another explicit cancel in
+ * intel_guc_submission_fini (when the reset flag/mutex are not).
+ */
+ if (mutex_is_locked(&guc_to_gt(guc)->reset.mutex) ||
+ test_bit(I915_RESET_BACKOFF, &guc_to_gt(guc)->reset.flags))
+ cancel_delayed_work(&guc->timestamp.work);
+ else
+ cancel_delayed_work_sync(&guc->timestamp.work);
}
static void __reset_guc_busyness_stats(struct intel_guc *guc)
@@ -1373,8 +1427,6 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
unsigned long flags;
ktime_t unused;
- guc_cancel_busyness_worker(guc);
-
spin_lock_irqsave(&guc->timestamp.lock, flags);
guc_update_pm_timestamp(guc, &unused);
@@ -1613,6 +1665,11 @@ static void guc_flush_submissions(struct intel_guc *guc)
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
+void intel_guc_submission_flush_work(struct intel_guc *guc)
+{
+ flush_work(&guc->submission_state.destroyed_worker);
+}
+
static void guc_flush_destroyed_contexts(struct intel_guc *guc);
void intel_guc_submission_reset_prepare(struct intel_guc *guc)
@@ -1950,6 +2007,7 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc)
{
/* Reset called during driver load or during wedge? */
if (unlikely(!guc_submission_initialized(guc) ||
+ !intel_guc_is_fw_running(guc) ||
intel_gt_is_wedged(guc_to_gt(guc)))) {
return;
}
@@ -2072,6 +2130,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
if (!guc->submission_initialized)
return;
+ guc_fini_engine_stats(guc);
guc_flush_destroyed_contexts(guc);
guc_lrc_desc_pool_destroy_v69(guc);
i915_sched_engine_put(guc->sched_engine);
@@ -3283,12 +3342,13 @@ static void guc_context_close(struct intel_context *ce)
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
}
-static inline void guc_lrc_desc_unpin(struct intel_context *ce)
+static inline int guc_lrc_desc_unpin(struct intel_context *ce)
{
struct intel_guc *guc = ce_to_guc(ce);
struct intel_gt *gt = guc_to_gt(guc);
unsigned long flags;
bool disabled;
+ int ret;
GEM_BUG_ON(!intel_gt_pm_is_awake(gt));
GEM_BUG_ON(!ctx_id_mapped(guc, ce->guc_id.id));
@@ -3299,18 +3359,41 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce)
spin_lock_irqsave(&ce->guc_state.lock, flags);
disabled = submission_disabled(guc);
if (likely(!disabled)) {
+ /*
+ * Take a gt-pm ref and change context state to be destroyed.
+ * NOTE: a G2H IRQ that comes after will put this gt-pm ref back
+ */
__intel_gt_pm_get(gt);
set_context_destroyed(ce);
clr_context_registered(ce);
}
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
+
if (unlikely(disabled)) {
release_guc_id(guc, ce);
__guc_context_destroy(ce);
- return;
+ return 0;
}
- deregister_context(ce, ce->guc_id.id);
+ /*
+ * GuC is active, lets destroy this context, but at this point we can still be racing
+ * with suspend, so we undo everything if the H2G fails in deregister_context so
+ * that GuC reset will find this context during clean up.
+ */
+ ret = deregister_context(ce, ce->guc_id.id);
+ if (ret) {
+ spin_lock(&ce->guc_state.lock);
+ set_context_registered(ce);
+ clr_context_destroyed(ce);
+ spin_unlock(&ce->guc_state.lock);
+ /*
+ * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements
+ * the wakeref immediately but per function spec usage call this after unlock.
+ */
+ intel_wakeref_put_async(&gt->wakeref);
+ }
+
+ return ret;
}
static void __guc_context_destroy(struct intel_context *ce)
@@ -3378,7 +3461,22 @@ static void deregister_destroyed_contexts(struct intel_guc *guc)
if (!ce)
break;
- guc_lrc_desc_unpin(ce);
+ if (guc_lrc_desc_unpin(ce)) {
+ /*
+ * This means GuC's CT link severed mid-way which could happen
+ * in suspend-resume corner cases. In this case, put the
+ * context back into the destroyed_contexts list which will
+ * get picked up on the next context deregistration event or
+ * purged in a GuC sanitization event (reset/unload/wedged/...).
+ */
+ spin_lock_irqsave(&guc->submission_state.lock, flags);
+ list_add_tail(&ce->destroyed_link,
+ &guc->submission_state.destroyed_contexts);
+ spin_unlock_irqrestore(&guc->submission_state.lock, flags);
+ /* Bail now since the list might never be emptied if h2gs fail */
+ break;
+ }
+
}
}
@@ -3389,6 +3487,17 @@ static void destroyed_worker_func(struct work_struct *w)
struct intel_gt *gt = guc_to_gt(guc);
intel_wakeref_t wakeref;
+ /*
+ * In rare cases we can get here via async context-free fence-signals that
+ * come very late in suspend flow or very early in resume flows. In these
+ * cases, GuC won't be ready but just skipping it here is fine as these
+ * pending-destroy-contexts get destroyed totally at GuC reset time at the
+ * end of suspend.. OR.. this worker can be picked up later on the next
+ * context destruction trigger after resume-completes
+ */
+ if (!intel_guc_is_ready(guc))
+ return;
+
with_intel_gt_pm(gt, wakeref)
deregister_destroyed_contexts(guc);
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index c57b29cdb1..b6df75622d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -38,6 +38,8 @@ int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
bool interruptible,
long timeout);
+void intel_guc_submission_flush_work(struct intel_guc *guc);
+
static inline bool intel_guc_submission_is_supported(struct intel_guc *guc)
{
return guc->submission_supported;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index ba9e07fc2b..0945b177d5 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -6,6 +6,7 @@
#include <linux/types.h>
#include "gt/intel_gt.h"
+#include "gt/intel_rps.h"
#include "intel_guc_reg.h"
#include "intel_huc.h"
#include "intel_huc_print.h"
@@ -447,17 +448,68 @@ static const char *auth_mode_string(struct intel_huc *huc,
return partial ? "clear media" : "all workloads";
}
+/*
+ * Use a longer timeout for debug builds so that problems can be detected
+ * and analysed. But a shorter timeout for releases so that user's don't
+ * wait forever to find out there is a problem. Note that the only reason
+ * an end user should hit the timeout is in case of extreme thermal throttling.
+ * And a system that is that hot during boot is probably dead anyway!
+ */
+#if defined(CONFIG_DRM_I915_DEBUG_GEM)
+#define HUC_LOAD_RETRY_LIMIT 20
+#else
+#define HUC_LOAD_RETRY_LIMIT 3
+#endif
+
int intel_huc_wait_for_auth_complete(struct intel_huc *huc,
enum intel_huc_authentication_type type)
{
struct intel_gt *gt = huc_to_gt(huc);
- int ret;
+ struct intel_uncore *uncore = gt->uncore;
+ ktime_t before, after, delta;
+ int ret, count;
+ u64 delta_ms;
+ u32 before_freq;
- ret = __intel_wait_for_register(gt->uncore,
- huc->status[type].reg,
- huc->status[type].mask,
- huc->status[type].value,
- 2, 50, NULL);
+ /*
+ * The KMD requests maximum frequency during driver load, however thermal
+ * throttling can force the frequency down to minimum (although the board
+ * really should never get that hot in real life!). IFWI issues have been
+ * seen to cause sporadic failures to grant the higher frequency. And at
+ * minimum frequency, the authentication time can be in the seconds range.
+ * Note that there is a limit on how long an individual wait_for() can wait.
+ * So wrap it in a loop.
+ */
+ before_freq = intel_rps_read_actual_frequency(&gt->rps);
+ before = ktime_get();
+ for (count = 0; count < HUC_LOAD_RETRY_LIMIT; count++) {
+ ret = __intel_wait_for_register(gt->uncore,
+ huc->status[type].reg,
+ huc->status[type].mask,
+ huc->status[type].value,
+ 2, 1000, NULL);
+ if (!ret)
+ break;
+
+ huc_dbg(huc, "auth still in progress, count = %d, freq = %dMHz, status = 0x%08X\n",
+ count, intel_rps_read_actual_frequency(&gt->rps),
+ huc->status[type].reg.reg);
+ }
+ after = ktime_get();
+ delta = ktime_sub(after, before);
+ delta_ms = ktime_to_ms(delta);
+
+ if (delta_ms > 50) {
+ huc_warn(huc, "excessive auth time: %lldms! [status = 0x%08X, count = %d, ret = %d]\n",
+ delta_ms, huc->status[type].reg.reg, count, ret);
+ huc_warn(huc, "excessive auth time: [freq = %dMHz, before = %dMHz, perf_limit_reasons = 0x%08X]\n",
+ intel_rps_read_actual_frequency(&gt->rps), before_freq,
+ intel_uncore_read(uncore, intel_gt_perf_limit_reasons_reg(gt)));
+ } else {
+ huc_dbg(huc, "auth took %lldms, freq = %dMHz, before = %dMHz, status = 0x%08X, count = %d, ret = %d\n",
+ delta_ms, intel_rps_read_actual_frequency(&gt->rps),
+ before_freq, huc->status[type].reg.reg, count, ret);
+ }
/* mark the load process as complete even if the wait failed */
delayed_huc_load_complete(huc);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 3872d309ed..399bc31918 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -637,10 +637,14 @@ void intel_uc_reset_finish(struct intel_uc *uc)
{
struct intel_guc *guc = &uc->guc;
+ /*
+ * NB: The wedge code path results in prepare -> prepare -> finish -> finish.
+ * So this function is sometimes called with the in-progress flag not set.
+ */
uc->reset_in_progress = false;
/* Firmware expected to be running when this function is called */
- if (intel_guc_is_fw_running(guc) && intel_uc_uses_guc_submission(uc))
+ if (intel_uc_uses_guc_submission(uc))
intel_guc_submission_reset_finish(guc);
}
@@ -690,6 +694,8 @@ void intel_uc_suspend(struct intel_uc *uc)
return;
}
+ intel_guc_submission_flush_work(guc);
+
with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) {
err = intel_guc_suspend(guc);
if (err)