From 9f0fc191371843c4fc000a226b0a26b6c059aacd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 19:40:19 +0200 Subject: Merging upstream version 6.7.7. Signed-off-by: Daniel Baumann --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 35 +++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index db0d94ca4f..d65e21914d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -28,7 +28,7 @@ static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst) { - switch (adev->ip_versions[UMC_HWIP][0]) { + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { case IP_VERSION(6, 7, 0): umc_v6_7_convert_error_address(adev, err_data, err_addr, ch_inst, umc_inst); @@ -45,8 +45,12 @@ static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev, int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst) { - struct ras_err_data err_data = {0, 0, 0, NULL}; - int ret = AMDGPU_RAS_FAIL; + struct ras_err_data err_data; + int ret; + + ret = amdgpu_ras_error_data_init(&err_data); + if (ret) + return ret; err_data.err_addr = kcalloc(adev->umc.max_ras_err_cnt_per_query, @@ -54,7 +58,8 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, if (!err_data.err_addr) { dev_warn(adev->dev, "Failed to alloc memory for umc error record in MCA notifier!\n"); - return AMDGPU_RAS_FAIL; + ret = AMDGPU_RAS_FAIL; + goto out_fini_err_data; } /* @@ -63,7 +68,7 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, ret = amdgpu_umc_convert_error_address(adev, &err_data, err_addr, ch_inst, umc_inst); if (ret) - goto out; + goto out_free_err_addr; if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, @@ -71,8 +76,12 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, amdgpu_ras_save_bad_pages(adev, NULL); } -out: +out_free_err_addr: kfree(err_data.err_addr); + +out_fini_err_data: + amdgpu_ras_error_data_fini(&err_data); + return ret; } @@ -157,8 +166,12 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, } } - if (reset) + if (reset) { + /* use mode-2 reset for poison consumption */ + if (!entry) + con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; amdgpu_ras_reset_gpu(adev); + } } kfree(err_data->err_addr); @@ -182,18 +195,24 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset) } if (!amdgpu_sriov_vf(adev)) { - struct ras_err_data err_data = {0, 0, 0, NULL}; + struct ras_err_data err_data; struct ras_common_if head = { .block = AMDGPU_RAS_BLOCK__UMC, }; struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + ret = amdgpu_ras_error_data_init(&err_data); + if (ret) + return ret; + ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset); if (ret == AMDGPU_RAS_SUCCESS && obj) { obj->err_data.ue_count += err_data.ue_count; obj->err_data.ce_count += err_data.ce_count; } + + amdgpu_ras_error_data_fini(&err_data); } else { if (adev->virt.ops && adev->virt.ops->ras_poison_handler) adev->virt.ops->ras_poison_handler(adev); -- cgit v1.2.3