From 9f0fc191371843c4fc000a226b0a26b6c059aacd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 19:40:19 +0200 Subject: Merging upstream version 6.7.7. Signed-off-by: Daniel Baumann --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 58 ++++++++++++++---------------- 1 file changed, 27 insertions(+), 31 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 25d5fda5b2..3d126f2967 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -28,6 +28,7 @@ #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_dma_buf.h" +#include #include #include #include "amdgpu_xgmi.h" @@ -164,7 +165,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) */ bitmap_complement(gpu_resources.cp_queue_bitmap, adev->gfx.mec_bitmap[0].queue_bitmap, - KGD_MAX_QUEUES); + AMDGPU_MAX_QUEUES); /* According to linux/bitmap.h we shouldn't use bitmap_clear if * nbits is not compile time constant @@ -172,7 +173,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) last_valid_bit = 1 /* only first MEC can have compute queues */ * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; - for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) + for (i = last_valid_bit; i < AMDGPU_MAX_QUEUES; ++i) clear_bit(i, gpu_resources.cp_queue_bitmap); amdgpu_doorbell_get_kfd_info(adev, @@ -467,28 +468,6 @@ uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev) return 100; } -void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *cu_info) -{ - struct amdgpu_cu_info acu_info = adev->gfx.cu_info; - - memset(cu_info, 0, sizeof(*cu_info)); - if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap)) - return; - - cu_info->cu_active_number = acu_info.number; - cu_info->cu_ao_mask = acu_info.ao_cu_mask; - memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], - sizeof(cu_info->cu_bitmap)); - cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; - cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; - cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; - cu_info->simd_per_cu = acu_info.simd_per_cu; - cu_info->max_waves_per_simd = acu_info.max_waves_per_simd; - cu_info->wave_front_size = acu_info.wave_front_size; - cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu; - cu_info->lds_size = acu_info.lds_size; -} - int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, struct amdgpu_device **dmabuf_adev, uint64_t *bo_size, void *metadata_buffer, @@ -568,7 +547,7 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, struct amdgpu_device *adev = dst, *peer_adev; int num_links; - if (adev->asic_type != CHIP_ALDEBARAN) + if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)) return 0; if (src) @@ -704,12 +683,17 @@ err: void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) { - /* Temporary workaround to fix issues observed in some - * compute applications when GFXOFF is enabled on GFX11. - */ - if (IP_VERSION_MAJ(adev->ip_versions[GC_HWIP][0]) == 11) { + enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE; + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 && + ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) { pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); amdgpu_gfx_off_ctrl(adev, idle); + } else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) && + (adev->flags & AMD_IS_APU)) { + /* Disable GFXOFF and PG. Temporary workaround + * to fix some compute applications issue on GFX9. + */ + adev->ip_blocks[AMD_IP_BLOCK_TYPE_GFX].version->funcs->set_powergating_state((void *)adev, state); } amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, @@ -805,11 +789,23 @@ void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev) u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id) { - u64 tmp; s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id); + u64 tmp; if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) { - tmp = adev->gmc.mem_partitions[mem_id].size; + if (adev->gmc.is_app_apu && adev->gmc.num_mem_partitions == 1) { + /* In NPS1 mode, we should restrict the vram reporting + * tied to the ttm_pages_limit which is 1/2 of the system + * memory. For other partition modes, the HBM is uniformly + * divided already per numa node reported. If user wants to + * go beyond the default ttm limit and maximize the ROCm + * allocations, they can go up to max ttm and sysmem limits. + */ + + tmp = (ttm_tt_pages_limit() << PAGE_SHIFT) / num_online_nodes(); + } else { + tmp = adev->gmc.mem_partitions[mem_id].size; + } do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition); return ALIGN_DOWN(tmp, PAGE_SIZE); } else { -- cgit v1.2.3