summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_crat.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c97
1 files changed, 81 insertions, 16 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index f76b7aee5c..cd8e459201 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1404,6 +1404,66 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
return i;
}
+static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev,
+ struct kfd_gpu_cache_info *pcache_info)
+{
+ struct amdgpu_device *adev = kdev->adev;
+ int i = 0;
+
+ /* TCP L1 Cache per CU */
+ if (adev->gfx.config.gc_tcp_size_per_cu) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = 1;
+ i++;
+ }
+ /* Scalar L1 Instruction Cache per SQC */
+ if (adev->gfx.config.gc_l1_instruction_cache_size_per_sqc) {
+ pcache_info[i].cache_size =
+ adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;
+ i++;
+ }
+ /* Scalar L1 Data Cache per SQC */
+ if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+ pcache_info[i].cache_level = 1;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_cu_per_sqc;
+ i++;
+ }
+ /* L2 Data Cache per GPU (Total Tex Cache) */
+ if (adev->gfx.config.gc_tcc_size) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size;
+ pcache_info[i].cache_level = 2;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ i++;
+ }
+ /* L3 Data Cache per GPU */
+ if (adev->gmc.mall_size) {
+ pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
+ pcache_info[i].cache_level = 3;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+ pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ i++;
+ }
+ return i;
+}
+
int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info)
{
int num_of_cache_types = 0;
@@ -1461,10 +1521,14 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
break;
case IP_VERSION(9, 4, 2):
- case IP_VERSION(9, 4, 3):
*pcache_info = aldebaran_cache_info;
num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
break;
+ case IP_VERSION(9, 4, 3):
+ num_of_cache_types =
+ kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd,
+ *pcache_info);
+ break;
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
*pcache_info = raven_cache_info;
@@ -1522,6 +1586,7 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
case IP_VERSION(11, 0, 4):
+ case IP_VERSION(11, 5, 0):
num_of_cache_types =
kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
break;
@@ -2037,11 +2102,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
uint32_t proximity_domain)
{
struct crat_header *crat_table = (struct crat_header *)pcrat_image;
+ struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;
+ struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;
struct crat_subtype_generic *sub_type_hdr;
struct kfd_local_mem_info local_mem_info;
struct kfd_topology_device *peer_dev;
struct crat_subtype_computeunit *cu;
- struct kfd_cu_info cu_info;
int avail_size = *size;
uint32_t total_num_of_cu;
uint32_t nid = 0;
@@ -2085,21 +2151,20 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
cu->proximity_domain = proximity_domain;
- amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
- cu->num_simd_per_cu = cu_info.simd_per_cu;
- cu->num_simd_cores = cu_info.simd_per_cu *
- (cu_info.cu_active_number / kdev->kfd->num_nodes);
- cu->max_waves_simd = cu_info.max_waves_per_simd;
+ cu->num_simd_per_cu = cu_info->simd_per_cu;
+ cu->num_simd_cores = cu_info->simd_per_cu *
+ (cu_info->number / kdev->kfd->num_nodes);
+ cu->max_waves_simd = cu_info->max_waves_per_simd;
- cu->wave_front_size = cu_info.wave_front_size;
- cu->array_count = cu_info.num_shader_arrays_per_engine *
- cu_info.num_shader_engines;
- total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
+ cu->wave_front_size = cu_info->wave_front_size;
+ cu->array_count = gfx_info->max_sh_per_se *
+ gfx_info->max_shader_engines;
+ total_num_of_cu = (cu->array_count * gfx_info->max_cu_per_sh);
cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
- cu->num_cu_per_array = cu_info.num_cu_per_sh;
- cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
- cu->num_banks = cu_info.num_shader_engines;
- cu->lds_size_in_kb = cu_info.lds_size;
+ cu->num_cu_per_array = gfx_info->max_cu_per_sh;
+ cu->max_slots_scatch_cu = cu_info->max_scratch_slots_per_cu;
+ cu->num_banks = gfx_info->max_shader_engines;
+ cu->lds_size_in_kb = cu_info->lds_size;
cu->hsa_capability = 0;
@@ -2115,7 +2180,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
sub_type_hdr->length);
- if (debug_largebar)
+ if (kdev->adev->debug_largebar)
local_mem_info.local_mem_size_private = 0;
if (local_mem_info.local_mem_size_private == 0)