diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_hw_engine.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_hw_engine.c | 144 |
1 files changed, 94 insertions, 50 deletions
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 1fa5cf5eea..b5e83ea172 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -25,6 +25,7 @@ #include "xe_reg_sr.h" #include "xe_rtp.h" #include "xe_sched_job.h" +#include "xe_sriov.h" #include "xe_tuning.h" #include "xe_uc_fw.h" #include "xe_wa.h" @@ -34,6 +35,7 @@ struct engine_info { const char *name; unsigned int class : 8; unsigned int instance : 8; + unsigned int irq_offset : 8; enum xe_force_wake_domains domain; u32 mmio_base; }; @@ -43,6 +45,7 @@ static const struct engine_info engine_infos[] = { .name = "rcs0", .class = XE_ENGINE_CLASS_RENDER, .instance = 0, + .irq_offset = ilog2(INTR_RCS0), .domain = XE_FW_RENDER, .mmio_base = RENDER_RING_BASE, }, @@ -50,6 +53,7 @@ static const struct engine_info engine_infos[] = { .name = "bcs0", .class = XE_ENGINE_CLASS_COPY, .instance = 0, + .irq_offset = ilog2(INTR_BCS(0)), .domain = XE_FW_RENDER, .mmio_base = BLT_RING_BASE, }, @@ -57,6 +61,7 @@ static const struct engine_info engine_infos[] = { .name = "bcs1", .class = XE_ENGINE_CLASS_COPY, .instance = 1, + .irq_offset = ilog2(INTR_BCS(1)), .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS1_RING_BASE, }, @@ -64,6 +69,7 @@ static const struct engine_info engine_infos[] = { .name = "bcs2", .class = XE_ENGINE_CLASS_COPY, .instance = 2, + .irq_offset = ilog2(INTR_BCS(2)), .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS2_RING_BASE, }, @@ -71,6 +77,7 @@ static const struct engine_info engine_infos[] = { .name = "bcs3", .class = XE_ENGINE_CLASS_COPY, .instance = 3, + .irq_offset = ilog2(INTR_BCS(3)), .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS3_RING_BASE, }, @@ -78,6 +85,7 @@ static const struct engine_info engine_infos[] = { .name = "bcs4", .class = XE_ENGINE_CLASS_COPY, .instance = 4, + .irq_offset = ilog2(INTR_BCS(4)), .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS4_RING_BASE, }, @@ -85,6 +93,7 @@ static const struct engine_info engine_infos[] = { .name = "bcs5", .class = XE_ENGINE_CLASS_COPY, .instance = 5, + .irq_offset = ilog2(INTR_BCS(5)), .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS5_RING_BASE, }, @@ -92,12 +101,14 @@ static const struct engine_info engine_infos[] = { .name = "bcs6", .class = XE_ENGINE_CLASS_COPY, .instance = 6, + .irq_offset = ilog2(INTR_BCS(6)), .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS6_RING_BASE, }, [XE_HW_ENGINE_BCS7] = { .name = "bcs7", .class = XE_ENGINE_CLASS_COPY, + .irq_offset = ilog2(INTR_BCS(7)), .instance = 7, .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS7_RING_BASE, @@ -106,6 +117,7 @@ static const struct engine_info engine_infos[] = { .name = "bcs8", .class = XE_ENGINE_CLASS_COPY, .instance = 8, + .irq_offset = ilog2(INTR_BCS8), .domain = XE_FW_RENDER, .mmio_base = XEHPC_BCS8_RING_BASE, }, @@ -114,6 +126,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs0", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 0, + .irq_offset = 32 + ilog2(INTR_VCS(0)), .domain = XE_FW_MEDIA_VDBOX0, .mmio_base = BSD_RING_BASE, }, @@ -121,6 +134,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs1", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 1, + .irq_offset = 32 + ilog2(INTR_VCS(1)), .domain = XE_FW_MEDIA_VDBOX1, .mmio_base = BSD2_RING_BASE, }, @@ -128,6 +142,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs2", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 2, + .irq_offset = 32 + ilog2(INTR_VCS(2)), .domain = XE_FW_MEDIA_VDBOX2, .mmio_base = BSD3_RING_BASE, }, @@ -135,6 +150,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs3", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 3, + .irq_offset = 32 + ilog2(INTR_VCS(3)), .domain = XE_FW_MEDIA_VDBOX3, .mmio_base = BSD4_RING_BASE, }, @@ -142,6 +158,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs4", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 4, + .irq_offset = 32 + ilog2(INTR_VCS(4)), .domain = XE_FW_MEDIA_VDBOX4, .mmio_base = XEHP_BSD5_RING_BASE, }, @@ -149,6 +166,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs5", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 5, + .irq_offset = 32 + ilog2(INTR_VCS(5)), .domain = XE_FW_MEDIA_VDBOX5, .mmio_base = XEHP_BSD6_RING_BASE, }, @@ -156,6 +174,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs6", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 6, + .irq_offset = 32 + ilog2(INTR_VCS(6)), .domain = XE_FW_MEDIA_VDBOX6, .mmio_base = XEHP_BSD7_RING_BASE, }, @@ -163,6 +182,7 @@ static const struct engine_info engine_infos[] = { .name = "vcs7", .class = XE_ENGINE_CLASS_VIDEO_DECODE, .instance = 7, + .irq_offset = 32 + ilog2(INTR_VCS(7)), .domain = XE_FW_MEDIA_VDBOX7, .mmio_base = XEHP_BSD8_RING_BASE, }, @@ -170,6 +190,7 @@ static const struct engine_info engine_infos[] = { .name = "vecs0", .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, .instance = 0, + .irq_offset = 32 + ilog2(INTR_VECS(0)), .domain = XE_FW_MEDIA_VEBOX0, .mmio_base = VEBOX_RING_BASE, }, @@ -177,6 +198,7 @@ static const struct engine_info engine_infos[] = { .name = "vecs1", .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, .instance = 1, + .irq_offset = 32 + ilog2(INTR_VECS(1)), .domain = XE_FW_MEDIA_VEBOX1, .mmio_base = VEBOX2_RING_BASE, }, @@ -184,6 +206,7 @@ static const struct engine_info engine_infos[] = { .name = "vecs2", .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, .instance = 2, + .irq_offset = 32 + ilog2(INTR_VECS(2)), .domain = XE_FW_MEDIA_VEBOX2, .mmio_base = XEHP_VEBOX3_RING_BASE, }, @@ -191,6 +214,7 @@ static const struct engine_info engine_infos[] = { .name = "vecs3", .class = XE_ENGINE_CLASS_VIDEO_ENHANCE, .instance = 3, + .irq_offset = 32 + ilog2(INTR_VECS(3)), .domain = XE_FW_MEDIA_VEBOX3, .mmio_base = XEHP_VEBOX4_RING_BASE, }, @@ -198,6 +222,7 @@ static const struct engine_info engine_infos[] = { .name = "ccs0", .class = XE_ENGINE_CLASS_COMPUTE, .instance = 0, + .irq_offset = ilog2(INTR_CCS(0)), .domain = XE_FW_RENDER, .mmio_base = COMPUTE0_RING_BASE, }, @@ -205,6 +230,7 @@ static const struct engine_info engine_infos[] = { .name = "ccs1", .class = XE_ENGINE_CLASS_COMPUTE, .instance = 1, + .irq_offset = ilog2(INTR_CCS(1)), .domain = XE_FW_RENDER, .mmio_base = COMPUTE1_RING_BASE, }, @@ -212,6 +238,7 @@ static const struct engine_info engine_infos[] = { .name = "ccs2", .class = XE_ENGINE_CLASS_COMPUTE, .instance = 2, + .irq_offset = ilog2(INTR_CCS(2)), .domain = XE_FW_RENDER, .mmio_base = COMPUTE2_RING_BASE, }, @@ -219,6 +246,7 @@ static const struct engine_info engine_infos[] = { .name = "ccs3", .class = XE_ENGINE_CLASS_COMPUTE, .instance = 3, + .irq_offset = ilog2(INTR_CCS(3)), .domain = XE_FW_RENDER, .mmio_base = COMPUTE3_RING_BASE, }, @@ -289,6 +317,19 @@ static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt, xe_rtp_match_first_render_or_compute(gt, hwe); } +static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt, + const struct xe_hw_engine *hwe) +{ + if (GRAPHICS_VER(gt_to_xe(gt)) < 20) + return false; + + if (hwe->class != XE_ENGINE_CLASS_COMPUTE && + hwe->class != XE_ENGINE_CLASS_RENDER) + return false; + + return xe_mmio_read32(hwe->gt, XEHP_FUSE4) & CFEG_WMTP_DISABLE; +} + void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) { @@ -319,6 +360,14 @@ xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe) XE_RTP_ACTIONS(FIELD_SET(RCU_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE, RCU_MODE_FIXED_SLICE_CCS_MODE)) }, + /* Disable WMTP if HW doesn't support it */ + { XE_RTP_NAME("DISABLE_WMTP_ON_UNSUPPORTED_HW"), + XE_RTP_RULES(FUNC(xe_rtp_cfeg_wmtp_disabled)), + XE_RTP_ACTIONS(FIELD_SET(CS_CHICKEN1(0), + PREEMPT_GPGPU_LEVEL_MASK, + PREEMPT_GPGPU_THREAD_GROUP_LEVEL)), + XE_RTP_ENTRY_FLAG(FOREACH_ENGINE) + }, {} }; @@ -397,6 +446,7 @@ static void hw_engine_init_early(struct xe_gt *gt, struct xe_hw_engine *hwe, hwe->class = info->class; hwe->instance = info->instance; hwe->mmio_base = info->mmio_base; + hwe->irq_offset = info->irq_offset; hwe->domain = info->domain; hwe->name = info->name; hwe->fence_irq = >->fence_irq[info->class]; @@ -700,7 +750,7 @@ struct xe_hw_engine_snapshot * xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) { struct xe_hw_engine_snapshot *snapshot; - int len; + u64 val; if (!xe_hw_engine_is_valid(hwe)) return NULL; @@ -710,11 +760,7 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) if (!snapshot) return NULL; - len = strlen(hwe->name) + 1; - snapshot->name = kzalloc(len, GFP_ATOMIC); - if (snapshot->name) - strscpy(snapshot->name, hwe->name, len); - + snapshot->name = kstrdup(hwe->name, GFP_ATOMIC); snapshot->class = hwe->class; snapshot->logical_instance = hwe->logical_instance; snapshot->forcewake.domain = hwe->domain; @@ -722,19 +768,35 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) hwe->domain); snapshot->mmio_base = hwe->mmio_base; - snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); - snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, - RING_HWS_PGA(0)); - snapshot->reg.ring_execlist_status_lo = + /* no more VF accessible data below this point */ + if (IS_SRIOV_VF(gt_to_xe(hwe->gt))) + return snapshot; + + snapshot->reg.ring_execlist_status = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_LO(0)); - snapshot->reg.ring_execlist_status_hi = - hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); - snapshot->reg.ring_execlist_sq_contents_lo = - hw_engine_mmio_read32(hwe, - RING_EXECLIST_SQ_CONTENTS_LO(0)); - snapshot->reg.ring_execlist_sq_contents_hi = - hw_engine_mmio_read32(hwe, - RING_EXECLIST_SQ_CONTENTS_HI(0)); + val = hw_engine_mmio_read32(hwe, RING_EXECLIST_STATUS_HI(0)); + snapshot->reg.ring_execlist_status |= val << 32; + + snapshot->reg.ring_execlist_sq_contents = + hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_LO(0)); + val = hw_engine_mmio_read32(hwe, RING_EXECLIST_SQ_CONTENTS_HI(0)); + snapshot->reg.ring_execlist_sq_contents |= val << 32; + + snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0)); + val = hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); + snapshot->reg.ring_acthd |= val << 32; + + snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0)); + val = hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); + snapshot->reg.ring_bbaddr |= val << 32; + + snapshot->reg.ring_dma_fadd = + hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); + val = hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); + snapshot->reg.ring_dma_fadd |= val << 32; + + snapshot->reg.ring_hwstam = hw_engine_mmio_read32(hwe, RING_HWSTAM(0)); + snapshot->reg.ring_hws_pga = hw_engine_mmio_read32(hwe, RING_HWS_PGA(0)); snapshot->reg.ring_start = hw_engine_mmio_read32(hwe, RING_START(0)); snapshot->reg.ring_head = hw_engine_mmio_read32(hwe, RING_HEAD(0)) & HEAD_ADDR; @@ -748,16 +810,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe) snapshot->reg.ring_esr = hw_engine_mmio_read32(hwe, RING_ESR(0)); snapshot->reg.ring_emr = hw_engine_mmio_read32(hwe, RING_EMR(0)); snapshot->reg.ring_eir = hw_engine_mmio_read32(hwe, RING_EIR(0)); - snapshot->reg.ring_acthd_udw = - hw_engine_mmio_read32(hwe, RING_ACTHD_UDW(0)); - snapshot->reg.ring_acthd = hw_engine_mmio_read32(hwe, RING_ACTHD(0)); - snapshot->reg.ring_bbaddr_udw = - hw_engine_mmio_read32(hwe, RING_BBADDR_UDW(0)); - snapshot->reg.ring_bbaddr = hw_engine_mmio_read32(hwe, RING_BBADDR(0)); - snapshot->reg.ring_dma_fadd_udw = - hw_engine_mmio_read32(hwe, RING_DMA_FADD_UDW(0)); - snapshot->reg.ring_dma_fadd = - hw_engine_mmio_read32(hwe, RING_DMA_FADD(0)); snapshot->reg.ipehr = hw_engine_mmio_read32(hwe, RING_IPEHR(0)); if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) @@ -786,33 +838,25 @@ void xe_hw_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, snapshot->forcewake.domain, snapshot->forcewake.ref); drm_printf(p, "\tHWSTAM: 0x%08x\n", snapshot->reg.ring_hwstam); drm_printf(p, "\tRING_HWS_PGA: 0x%08x\n", snapshot->reg.ring_hws_pga); - drm_printf(p, "\tRING_EXECLIST_STATUS_LO: 0x%08x\n", - snapshot->reg.ring_execlist_status_lo); - drm_printf(p, "\tRING_EXECLIST_STATUS_HI: 0x%08x\n", - snapshot->reg.ring_execlist_status_hi); - drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_LO: 0x%08x\n", - snapshot->reg.ring_execlist_sq_contents_lo); - drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS_HI: 0x%08x\n", - snapshot->reg.ring_execlist_sq_contents_hi); + drm_printf(p, "\tRING_EXECLIST_STATUS: 0x%016llx\n", + snapshot->reg.ring_execlist_status); + drm_printf(p, "\tRING_EXECLIST_SQ_CONTENTS: 0x%016llx\n", + snapshot->reg.ring_execlist_sq_contents); drm_printf(p, "\tRING_START: 0x%08x\n", snapshot->reg.ring_start); - drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head); - drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail); + drm_printf(p, "\tRING_HEAD: 0x%08x\n", snapshot->reg.ring_head); + drm_printf(p, "\tRING_TAIL: 0x%08x\n", snapshot->reg.ring_tail); drm_printf(p, "\tRING_CTL: 0x%08x\n", snapshot->reg.ring_ctl); drm_printf(p, "\tRING_MI_MODE: 0x%08x\n", snapshot->reg.ring_mi_mode); drm_printf(p, "\tRING_MODE: 0x%08x\n", snapshot->reg.ring_mode); - drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr); - drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr); - drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr); - drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir); - drm_printf(p, "\tACTHD: 0x%08x_%08x\n", snapshot->reg.ring_acthd_udw, - snapshot->reg.ring_acthd); - drm_printf(p, "\tBBADDR: 0x%08x_%08x\n", snapshot->reg.ring_bbaddr_udw, - snapshot->reg.ring_bbaddr); - drm_printf(p, "\tDMA_FADDR: 0x%08x_%08x\n", - snapshot->reg.ring_dma_fadd_udw, - snapshot->reg.ring_dma_fadd); - drm_printf(p, "\tIPEHR: 0x%08x\n\n", snapshot->reg.ipehr); + drm_printf(p, "\tRING_IMR: 0x%08x\n", snapshot->reg.ring_imr); + drm_printf(p, "\tRING_ESR: 0x%08x\n", snapshot->reg.ring_esr); + drm_printf(p, "\tRING_EMR: 0x%08x\n", snapshot->reg.ring_emr); + drm_printf(p, "\tRING_EIR: 0x%08x\n", snapshot->reg.ring_eir); + drm_printf(p, "\tACTHD: 0x%016llx\n", snapshot->reg.ring_acthd); + drm_printf(p, "\tBBADDR: 0x%016llx\n", snapshot->reg.ring_bbaddr); + drm_printf(p, "\tDMA_FADDR: 0x%016llx\n", snapshot->reg.ring_dma_fadd); + drm_printf(p, "\tIPEHR: 0x%08x\n", snapshot->reg.ipehr); if (snapshot->class == XE_ENGINE_CLASS_COMPUTE) drm_printf(p, "\tRCU_MODE: 0x%08x\n", snapshot->reg.rcu_mode); |