summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/xe/xe_migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_migrate.c')
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c39
1 files changed, 24 insertions, 15 deletions
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 7a6ad3469d..aca519f5b8 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -12,7 +12,8 @@
#include <drm/ttm/ttm_tt.h>
#include <drm/xe_drm.h>
-#include "generated/xe_wa_oob.h"
+#include <generated/xe_wa_oob.h>
+
#include "instructions/xe_mi_commands.h"
#include "regs/xe_gpu_commands.h"
#include "tests/xe_test.h"
@@ -32,7 +33,6 @@
#include "xe_sync.h"
#include "xe_trace.h"
#include "xe_vm.h"
-#include "xe_wa.h"
/**
* struct xe_migrate - migrate context.
@@ -71,6 +71,16 @@ struct xe_migrate {
#define NUM_KERNEL_PDE 17
#define NUM_PT_SLOTS 32
#define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M
+#define MAX_NUM_PTE 512
+
+/*
+ * Although MI_STORE_DATA_IMM's "length" field is 10-bits, 0x3FE is the largest
+ * legal value accepted. Since that instruction field is always stored in
+ * (val-2) format, this translates to 0x400 dwords for the true maximum length
+ * of the instruction. Subtracting the instruction header (1 dword) and
+ * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values.
+ */
+#define MAX_PTE_PER_SDI 0x1FE
/**
* xe_tile_migrate_engine() - Get this tile's migrate engine.
@@ -288,10 +298,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
}
/*
- * Due to workaround 16017236439, odd instance hardware copy engines are
- * faster than even instance ones.
- * This function returns the mask involving all fast copy engines and the
- * reserved copy engine to be used as logical mask for migrate engine.
* Including the reserved copy engine is required to avoid deadlocks due to
* migrate jobs servicing the faults gets stuck behind the job that faulted.
*/
@@ -305,8 +311,7 @@ static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt)
if (hwe->class != XE_ENGINE_CLASS_COPY)
continue;
- if (!XE_WA(gt, 16017236439) ||
- xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1)
+ if (xe_gt_is_usm_hwe(gt, hwe))
logical_mask |= BIT(hwe->logical_instance);
}
@@ -357,10 +362,14 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
if (!hwe || !logical_mask)
return ERR_PTR(-EINVAL);
+ /*
+ * XXX: Currently only reserving 1 (likely slow) BCS instance on
+ * PVC, may want to revisit if performance is needed.
+ */
m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
EXEC_QUEUE_FLAG_KERNEL |
EXEC_QUEUE_FLAG_PERMANENT |
- EXEC_QUEUE_FLAG_HIGH_PRIORITY);
+ EXEC_QUEUE_FLAG_HIGH_PRIORITY, 0);
} else {
m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
XE_ENGINE_CLASS_COPY,
@@ -451,13 +460,13 @@ static u32 pte_update_size(struct xe_migrate *m,
} else {
/* Clip L0 to available size */
u64 size = min(*L0, (u64)avail_pts * SZ_2M);
- u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+ u32 num_4k_pages = (size + XE_PAGE_SIZE - 1) >> XE_PTE_SHIFT;
*L0 = size;
*L0_ofs = xe_migrate_vm_addr(pt_ofs, 0);
/* MI_STORE_DATA_IMM */
- cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff);
+ cmds += 3 * DIV_ROUND_UP(num_4k_pages, MAX_PTE_PER_SDI);
/* PDE qwords */
cmds += num_4k_pages * 2;
@@ -492,7 +501,7 @@ static void emit_pte(struct xe_migrate *m,
ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
while (ptes) {
- u32 chunk = min(0x1ffU, ptes);
+ u32 chunk = min(MAX_PTE_PER_SDI, ptes);
bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
bb->cs[bb->len++] = ofs;
@@ -1111,7 +1120,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
* This shouldn't be possible in practice.. might change when 16K
* pages are used. Hence the assert.
*/
- xe_tile_assert(tile, update->qwords <= 0x1ff);
+ xe_tile_assert(tile, update->qwords < MAX_NUM_PTE);
if (!ppgtt_ofs)
ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile),
xe_bo_addr(update->pt_bo, 0,
@@ -1120,7 +1129,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs,
do {
u64 addr = ppgtt_ofs + ofs * 8;
- chunk = min(update->qwords, 0x1ffU);
+ chunk = min(size, MAX_PTE_PER_SDI);
/* Ensure populatefn can do memset64 by aligning bb->cs */
if (!(bb->len & 1))
@@ -1299,7 +1308,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
batch_size = 6 + num_updates * 2;
for (i = 0; i < num_updates; i++) {
- u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff);
+ u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, MAX_PTE_PER_SDI);
/* align noop + MI_STORE_DATA_IMM cmd prefix */
batch_size += 4 * num_cmds + updates[i].qwords * 2;