drivers/gpu/drm/i915/gt/intel_tlb.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173

// SPDX-License-Identifier: MIT
/*
 * Copyright © 2023 Intel Corporation
 */

#include "i915_drv.h"
#include "i915_perf_oa_regs.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_mcr.h"
#include "intel_gt_pm.h"
#include "intel_gt_print.h"
#include "intel_gt_regs.h"
#include "intel_tlb.h"
#include "uc/intel_guc.h"

/*
 * HW architecture suggest typical invalidation time at 40us,
 * with pessimistic cases up to 100us and a recommendation to
 * cap at 1ms. We go a bit higher just in case.
 */
#define TLB_INVAL_TIMEOUT_US 100
#define TLB_INVAL_TIMEOUT_MS 4

/*
 * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
 * but are now considered MCR registers.  Since they exist within a GAM range,
 * the primary instance of the register rolls up the status from each unit.
 */
static int wait_for_invalidate(struct intel_engine_cs *engine)
{
	if (engine->tlb_inv.mcr)
		return intel_gt_mcr_wait_for_reg(engine->gt,
						 engine->tlb_inv.reg.mcr_reg,
						 engine->tlb_inv.done,
						 0,
						 TLB_INVAL_TIMEOUT_US,
						 TLB_INVAL_TIMEOUT_MS);
	else
		return __intel_wait_for_register_fw(engine->gt->uncore,
						    engine->tlb_inv.reg.reg,
						    engine->tlb_inv.done,
						    0,
						    TLB_INVAL_TIMEOUT_US,
						    TLB_INVAL_TIMEOUT_MS,
						    NULL);
}

static void mmio_invalidate_full(struct intel_gt *gt)
{
	struct drm_i915_private *i915 = gt->i915;
	struct intel_uncore *uncore = gt->uncore;
	struct intel_engine_cs *engine;
	intel_engine_mask_t awake, tmp;
	enum intel_engine_id id;
	unsigned long flags;

	if (GRAPHICS_VER(i915) < 8)
		return;

	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);

	intel_gt_mcr_lock(gt, &flags);
	spin_lock(&uncore->lock); /* serialise invalidate with GT reset */

	awake = 0;
	for_each_engine(engine, gt, id) {
		if (!intel_engine_pm_is_awake(engine))
			continue;

		if (engine->tlb_inv.mcr)
			intel_gt_mcr_multicast_write_fw(gt,
							engine->tlb_inv.reg.mcr_reg,
							engine->tlb_inv.request);
		else
			intel_uncore_write_fw(uncore,
					      engine->tlb_inv.reg.reg,
					      engine->tlb_inv.request);

		awake |= engine->mask;
	}

	GT_TRACE(gt, "invalidated engines %08x\n", awake);

	/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
	if (awake &&
	    (IS_TIGERLAKE(i915) ||
	     IS_DG1(i915) ||
	     IS_ROCKETLAKE(i915) ||
	     IS_ALDERLAKE_S(i915) ||
	     IS_ALDERLAKE_P(i915)))
		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);

	spin_unlock(&uncore->lock);
	intel_gt_mcr_unlock(gt, flags);

	for_each_engine_masked(engine, gt, awake, tmp) {
		if (wait_for_invalidate(engine))
			gt_err_ratelimited(gt,
					   "%s TLB invalidation did not complete in %ums!\n",
					   engine->name, TLB_INVAL_TIMEOUT_MS);
	}

	/*
	 * Use delayed put since a) we mostly expect a flurry of TLB
	 * invalidations so it is good to avoid paying the forcewake cost and
	 * b) it works around a bug in Icelake which cannot cope with too rapid
	 * transitions.
	 */
	intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
}

static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
{
	u32 cur = intel_gt_tlb_seqno(gt);

	/* Only skip if a *full* TLB invalidate barrier has passed */
	return (s32)(cur - ALIGN(seqno, 2)) > 0;
}

void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
{
	intel_wakeref_t wakeref;

	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
		return;

	if (intel_gt_is_wedged(gt))
		return;

	if (tlb_seqno_passed(gt, seqno))
		return;

	with_intel_gt_pm_if_awake(gt, wakeref) {
		struct intel_guc *guc = gt_to_guc(gt);

		mutex_lock(&gt->tlb.invalidate_lock);
		if (tlb_seqno_passed(gt, seqno))
			goto unlock;

		if (HAS_GUC_TLB_INVALIDATION(gt->i915)) {
			/*
			 * Only perform GuC TLB invalidation if GuC is ready.
			 * The only time GuC could not be ready is on GT reset,
			 * which would clobber all the TLBs anyways, making
			 * any TLB invalidation path here unnecessary.
			 */
			if (intel_guc_is_ready(guc))
				intel_guc_invalidate_tlb_engines(guc);
		} else {
			mmio_invalidate_full(gt);
		}

		write_seqcount_invalidate(&gt->tlb.seqno);
unlock:
		mutex_unlock(&gt->tlb.invalidate_lock);
	}
}

void intel_gt_init_tlb(struct intel_gt *gt)
{
	mutex_init(&gt->tlb.invalidate_lock);
	seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
}

void intel_gt_fini_tlb(struct intel_gt *gt)
{
	mutex_destroy(&gt->tlb.invalidate_lock);
}

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_tlb.c"
#endif