diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 275 |
1 files changed, 90 insertions, 185 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index fa87a85e10..6c51856088 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -51,8 +51,6 @@ #include "athub_v2_0.h" #include "athub_v2_1.h" -#include "amdgpu_reset.h" - static int gmc_v10_0_ecc_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned int type, @@ -145,11 +143,15 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, * the new fast GRBM interface. */ if ((entry->vmid_src == AMDGPU_GFXHUB(0)) && - (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0))) + (amdgpu_ip_version(adev, GC_HWIP, 0) < + IP_VERSION(10, 3, 0))) RREG32(hub->vm_l2_pro_fault_status); status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, + entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0)); } if (!printk_ratelimit()) @@ -230,20 +232,47 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( * by the amdgpu vm/hsa code. */ -static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, - unsigned int vmhub, uint32_t flush_type) +/** + * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback + * + * @adev: amdgpu_device pointer + * @vmid: vm instance to flush + * @vmhub: vmhub type + * @flush_type: the flush type + * + * Flush the TLB for the requested page table. + */ +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub); struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); - u32 tmp; /* Use register 17 for GART */ const unsigned int eng = 17; - unsigned int i; unsigned char hub_ip = 0; + u32 sem, req, ack; + unsigned int i; + u32 tmp; + + sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng; + req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? - GC_HWIP : MMHUB_HWIP; + /* flush hdp cache */ + adev->hdp.funcs->flush_hdp(adev, NULL); + + /* For SRIOV run time, driver shouldn't access the register through MMIO + * Directly use kiq to do the vm invalidation instead + */ + if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, GET_INST(GC, 0)); + return; + } + + hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); /* @@ -257,9 +286,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (i = 0; i < adev->usec_timeout; i++) { /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng, hub_ip); - + tmp = RREG32_RLC_NO_KIQ(sem, hub_ip); if (tmp & 0x1) break; udelay(1); @@ -269,24 +296,19 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + - hub->eng_distance * eng, - inv_req, hub_ip); + WREG32_RLC_NO_KIQ(req, inv_req, hub_ip); /* * Issue a dummy read to wait for the ACK register to be cleared * to avoid a false ACK due to the new fast GRBM interface. */ if ((vmhub == AMDGPU_GFXHUB(0)) && - (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0))) - RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + - hub->eng_distance * eng, hub_ip); + (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 3, 0))) + RREG32_RLC_NO_KIQ(req, hub_ip); /* Wait for ACK with a delay.*/ for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack + - hub->eng_distance * eng, hub_ip); - + tmp = RREG32_RLC_NO_KIQ(ack, hub_ip); tmp &= 1 << vmid; if (tmp) break; @@ -296,109 +318,13 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ if (use_semaphore) - /* - * add semaphore release after invalidation, - * write with 0 means semaphore release - */ - WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng, 0, hub_ip); + WREG32_RLC_NO_KIQ(sem, 0, hub_ip); spin_unlock(&adev->gmc.invalidate_lock); - if (i < adev->usec_timeout) - return; - - DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub); -} - -/** - * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback - * - * @adev: amdgpu_device pointer - * @vmid: vm instance to flush - * @vmhub: vmhub type - * @flush_type: the flush type - * - * Flush the TLB for the requested page table. - */ -static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, - uint32_t vmhub, uint32_t flush_type) -{ - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - struct dma_fence *fence; - struct amdgpu_job *job; - - int r; - - /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); - - /* For SRIOV run time, driver shouldn't access the register through MMIO - * Directly use kiq to do the vm invalidation instead - */ - if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes && - (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && - down_read_trylock(&adev->reset_domain->sem)) { - struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; - const unsigned int eng = 17; - u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); - u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng; - u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); - - up_read(&adev->reset_domain->sem); - return; - } - - mutex_lock(&adev->mman.gtt_window_lock); - - if (vmhub == AMDGPU_MMHUB0(0)) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0); - mutex_unlock(&adev->mman.gtt_window_lock); - return; - } - - BUG_ON(vmhub != AMDGPU_GFXHUB(0)); - - if (!adev->mman.buffer_funcs_enabled || - !adev->ib_pool_ready || - amdgpu_in_reset(adev) || - ring->sched.ready == false) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0); - mutex_unlock(&adev->mman.gtt_window_lock); - return; - } - - /* The SDMA on Navi has a bug which can theoretically result in memory - * corruption if an invalidation happens at the same time as an VA - * translation. Avoid this by doing the invalidation from the SDMA - * itself. - */ - r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr, - AMDGPU_FENCE_OWNER_UNDEFINED, - 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, - &job); - if (r) - goto error_alloc; - - job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); - job->vm_needs_flush = true; - job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; - amdgpu_ring_pad_ib(ring, &job->ibs[0]); - fence = amdgpu_job_submit(job); - - mutex_unlock(&adev->mman.gtt_window_lock); - - dma_fence_wait(fence, false); - dma_fence_put(fence); - - return; - -error_alloc: - mutex_unlock(&adev->mman.gtt_window_lock); - DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); + if (i >= adev->usec_timeout) + dev_err(adev->dev, "Timeout waiting for VM flush hub: %d!\n", + vmhub); } /** @@ -412,62 +338,31 @@ error_alloc: * * Flush the TLB for the requested pasid. */ -static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, uint32_t flush_type, - bool all_hub, uint32_t inst) +static void gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint32_t inst) { + uint16_t queried; int vmid, i; - signed long r; - uint32_t seq; - uint16_t queried_pasid; - bool ret; - u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; - struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - - if (amdgpu_emu_mode == 0 && ring->sched.ready) { - spin_lock(&adev->gfx.kiq[0].ring_lock); - /* 2 dwords flush + 8 dwords fence */ - amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); - kiq->pmf->kiq_invalidate_tlbs(ring, - pasid, flush_type, all_hub); - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); - if (r) { - amdgpu_ring_undo(ring); - spin_unlock(&adev->gfx.kiq[0].ring_lock); - return -ETIME; - } - - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq[0].ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); - if (r < 1) { - dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); - return -ETIME; - } - - return 0; - } for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { - - ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - if (all_hub) { - for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) - gmc_v10_0_flush_gpu_tlb(adev, vmid, - i, flush_type); - } else { - gmc_v10_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB(0), flush_type); - } - if (!adev->enable_mes) - break; + bool valid; + + valid = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried); + if (!valid || queried != pasid) + continue; + + if (all_hub) { + for_each_set_bit(i, adev->vmhubs_mask, + AMDGPU_MAX_VMHUBS) + gmc_v10_0_flush_gpu_tlb(adev, vmid, i, + flush_type); + } else { + gmc_v10_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), + flush_type); } } - - return 0; } static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, @@ -634,6 +529,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, } if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENT | AMDGPU_GEM_CREATE_UNCACHED)) *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) | AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); @@ -680,7 +576,7 @@ static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev) static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[UMC_HWIP][0]) { + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { case IP_VERSION(8, 7, 0): adev->umc.max_ras_err_cnt_per_query = UMC_V8_7_TOTAL_CHANNEL_NUM; adev->umc.channel_inst_num = UMC_V8_7_CHANNEL_INSTANCE_NUM; @@ -697,7 +593,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(2, 3, 0): case IP_VERSION(2, 4, 0): case IP_VERSION(2, 4, 1): @@ -711,7 +607,7 @@ static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev) static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 1): @@ -776,9 +672,11 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, &adev->gmc, base); - amdgpu_gmc_gart_location(adev, mc); - amdgpu_gmc_agp_location(adev, mc); + amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); + if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) + amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev); @@ -825,7 +723,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) /* set the gart size */ if (amdgpu_gart_size == -1) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { default: adev->gmc.gart_size = 512ULL << 20; break; @@ -892,7 +790,7 @@ static int gmc_v10_0_sw_init(void *handle) adev->gmc.vram_vendor = vram_vendor; } - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): adev->gmc.mall_size = 128 * 1024 * 1024; break; @@ -910,7 +808,7 @@ static int gmc_v10_0_sw_init(void *handle) break; } - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 2): @@ -1084,8 +982,10 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) static int gmc_v10_0_hw_init(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode; /* The sequence of these two function calls matters.*/ gmc_v10_0_init_golden_registers(adev); @@ -1141,6 +1041,10 @@ static int gmc_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + return 0; } @@ -1195,7 +1099,8 @@ static int gmc_v10_0_set_clockgating_state(void *handle, * is a new problem observed at DF 3.0.3, however with the same suspend sequence not * seen any issue on the DF 3.0.2 series platform. */ - if (adev->in_s0ix && adev->ip_versions[DF_HWIP][0] > IP_VERSION(3, 0, 2)) { + if (adev->in_s0ix && + amdgpu_ip_version(adev, DF_HWIP, 0) > IP_VERSION(3, 0, 2)) { dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n"); return 0; } @@ -1204,7 +1109,7 @@ static int gmc_v10_0_set_clockgating_state(void *handle, if (r) return r; - if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0)) + if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0)) return athub_v2_1_set_clockgating(adev, state); else return athub_v2_0_set_clockgating(adev, state); @@ -1214,13 +1119,13 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 4)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 4)) return; adev->mmhub.funcs->get_clockgating(adev, flags); - if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0)) + if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0)) athub_v2_1_get_clockgating(adev, flags); else athub_v2_0_get_clockgating(adev, flags); |