diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_migrate.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_migrate.c | 39 |
1 files changed, 24 insertions, 15 deletions
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 7a6ad3469d..aca519f5b8 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -12,7 +12,8 @@ #include <drm/ttm/ttm_tt.h> #include <drm/xe_drm.h> -#include "generated/xe_wa_oob.h" +#include <generated/xe_wa_oob.h> + #include "instructions/xe_mi_commands.h" #include "regs/xe_gpu_commands.h" #include "tests/xe_test.h" @@ -32,7 +33,6 @@ #include "xe_sync.h" #include "xe_trace.h" #include "xe_vm.h" -#include "xe_wa.h" /** * struct xe_migrate - migrate context. @@ -71,6 +71,16 @@ struct xe_migrate { #define NUM_KERNEL_PDE 17 #define NUM_PT_SLOTS 32 #define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M +#define MAX_NUM_PTE 512 + +/* + * Although MI_STORE_DATA_IMM's "length" field is 10-bits, 0x3FE is the largest + * legal value accepted. Since that instruction field is always stored in + * (val-2) format, this translates to 0x400 dwords for the true maximum length + * of the instruction. Subtracting the instruction header (1 dword) and + * address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values. + */ +#define MAX_PTE_PER_SDI 0x1FE /** * xe_tile_migrate_engine() - Get this tile's migrate engine. @@ -288,10 +298,6 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, } /* - * Due to workaround 16017236439, odd instance hardware copy engines are - * faster than even instance ones. - * This function returns the mask involving all fast copy engines and the - * reserved copy engine to be used as logical mask for migrate engine. * Including the reserved copy engine is required to avoid deadlocks due to * migrate jobs servicing the faults gets stuck behind the job that faulted. */ @@ -305,8 +311,7 @@ static u32 xe_migrate_usm_logical_mask(struct xe_gt *gt) if (hwe->class != XE_ENGINE_CLASS_COPY) continue; - if (!XE_WA(gt, 16017236439) || - xe_gt_is_usm_hwe(gt, hwe) || hwe->instance & 1) + if (xe_gt_is_usm_hwe(gt, hwe)) logical_mask |= BIT(hwe->logical_instance); } @@ -357,10 +362,14 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile) if (!hwe || !logical_mask) return ERR_PTR(-EINVAL); + /* + * XXX: Currently only reserving 1 (likely slow) BCS instance on + * PVC, may want to revisit if performance is needed. + */ m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe, EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT | - EXEC_QUEUE_FLAG_HIGH_PRIORITY); + EXEC_QUEUE_FLAG_HIGH_PRIORITY, 0); } else { m->q = xe_exec_queue_create_class(xe, primary_gt, vm, XE_ENGINE_CLASS_COPY, @@ -451,13 +460,13 @@ static u32 pte_update_size(struct xe_migrate *m, } else { /* Clip L0 to available size */ u64 size = min(*L0, (u64)avail_pts * SZ_2M); - u64 num_4k_pages = DIV_ROUND_UP(size, XE_PAGE_SIZE); + u32 num_4k_pages = (size + XE_PAGE_SIZE - 1) >> XE_PTE_SHIFT; *L0 = size; *L0_ofs = xe_migrate_vm_addr(pt_ofs, 0); /* MI_STORE_DATA_IMM */ - cmds += 3 * DIV_ROUND_UP(num_4k_pages, 0x1ff); + cmds += 3 * DIV_ROUND_UP(num_4k_pages, MAX_PTE_PER_SDI); /* PDE qwords */ cmds += num_4k_pages * 2; @@ -492,7 +501,7 @@ static void emit_pte(struct xe_migrate *m, ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE); while (ptes) { - u32 chunk = min(0x1ffU, ptes); + u32 chunk = min(MAX_PTE_PER_SDI, ptes); bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = ofs; @@ -1111,7 +1120,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, * This shouldn't be possible in practice.. might change when 16K * pages are used. Hence the assert. */ - xe_tile_assert(tile, update->qwords <= 0x1ff); + xe_tile_assert(tile, update->qwords < MAX_NUM_PTE); if (!ppgtt_ofs) ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile), xe_bo_addr(update->pt_bo, 0, @@ -1120,7 +1129,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, do { u64 addr = ppgtt_ofs + ofs * 8; - chunk = min(update->qwords, 0x1ffU); + chunk = min(size, MAX_PTE_PER_SDI); /* Ensure populatefn can do memset64 by aligning bb->cs */ if (!(bb->len & 1)) @@ -1299,7 +1308,7 @@ xe_migrate_update_pgtables(struct xe_migrate *m, batch_size = 6 + num_updates * 2; for (i = 0; i < num_updates; i++) { - u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, 0x1ff); + u32 num_cmds = DIV_ROUND_UP(updates[i].qwords, MAX_PTE_PER_SDI); /* align noop + MI_STORE_DATA_IMM cmd prefix */ batch_size += 4 * num_cmds + updates[i].qwords * 2; |