summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:18:06 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:18:06 +0000
commit638a9e433ecd61e64761352dbec1fa4f5874c941 (patch)
treefdbff74a238d7a5a7d1cef071b7230bc064b9f25 /drivers/gpu/drm/amd/amdgpu
parentReleasing progress-linux version 6.9.12-1~progress7.99u1. (diff)
downloadlinux-638a9e433ecd61e64761352dbec1fa4f5874c941.tar.xz
linux-638a9e433ecd61e64761352dbec1fa4f5874c941.zip
Merging upstream version 6.10.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c169
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c71
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c360
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c161
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c34
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c47
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c506
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c105
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c132
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c82
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c76
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c112
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atom.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c151
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_0.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v6_1.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ih_v7_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v10_1.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c188
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v14_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_ras_if.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.c416
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v12_0.h77
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v8_10.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c10
145 files changed, 3259 insertions, 1025 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 4536c8ad0e..078f588e99 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -23,7 +23,7 @@
# Makefile for the drm device driver. This driver provides support for the
# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
-FULL_AMD_PATH=$(srctree)/$(src)/..
+FULL_AMD_PATH=$(src)/..
DISPLAY_FOLDER_NAME=display
FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME)
@@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
- amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \
+ amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \
+ amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
@@ -80,7 +81,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
- amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o
+ amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
@@ -247,7 +248,8 @@ amdgpu-y += \
smuio_v11_0_6.o \
smuio_v13_0.o \
smuio_v13_0_3.o \
- smuio_v13_0_6.o
+ smuio_v13_0_6.o \
+ smuio_v14_0_2.o
# add reset block
amdgpu-y += \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b3b8464720..f87d53e183 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -139,6 +139,14 @@ enum amdgpu_ss {
AMDGPU_SS_DRV_UNLOAD
};
+struct amdgpu_hwip_reg_entry {
+ u32 hwip;
+ u32 inst;
+ u32 seg;
+ u32 reg_offset;
+ const char *reg_name;
+};
+
struct amdgpu_watchdog_timer {
bool timeout_fatal_disable;
uint32_t period; /* maxCycles = (1 << period), the number of cycles before a timeout */
@@ -494,6 +502,7 @@ struct amdgpu_wb {
uint64_t gpu_addr;
u32 num_wb; /* Number of wb slots actually reserved for amdgpu. */
unsigned long used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)];
+ spinlock_t lock;
};
int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
@@ -606,7 +615,7 @@ struct amdgpu_asic_funcs {
/* PCIe replay counter */
uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
/* device supports BACO */
- bool (*supports_baco)(struct amdgpu_device *adev);
+ int (*supports_baco)(struct amdgpu_device *adev);
/* pre asic_init quirks */
void (*pre_asic_init)(struct amdgpu_device *adev);
/* enter/exit umd stable pstate */
@@ -1408,7 +1417,8 @@ bool amdgpu_device_supports_atpx(struct drm_device *dev);
bool amdgpu_device_supports_px(struct drm_device *dev);
bool amdgpu_device_supports_boco(struct drm_device *dev);
bool amdgpu_device_supports_smart_shift(struct drm_device *dev);
-bool amdgpu_device_supports_baco(struct drm_device *dev);
+int amdgpu_device_supports_baco(struct drm_device *dev);
+void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev);
bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev);
int amdgpu_device_baco_enter(struct drm_device *dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
index 493982f946..c50202215f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -28,7 +28,7 @@
#define ACA_BANK_HWID(type, hwid, mcatype) [ACA_HWIP_TYPE_##type] = {hwid, mcatype}
-typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type, void *data);
+typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
struct aca_banks {
int nr_banks;
@@ -86,7 +86,7 @@ static void aca_banks_release(struct aca_banks *banks)
}
}
-static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_error_type type, u32 *count)
+static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count)
{
struct amdgpu_aca *aca = &adev->aca;
const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
@@ -116,20 +116,22 @@ static struct aca_regs_dump {
{"CONTROL_MASK", ACA_REG_IDX_CTL_MASK},
};
-static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank)
+static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank,
+ struct ras_query_context *qctx)
{
+ u64 event_id = qctx ? qctx->event_id : 0ULL;
int i;
- dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n");
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
/* plus 1 for output format, e.g: ACA[08/08]: xxxx */
for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
- dev_info(adev->dev, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n",
- idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n",
+ idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]);
}
-static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_error_type type,
+static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type,
int start, int count,
- struct aca_banks *banks)
+ struct aca_banks *banks, struct ras_query_context *qctx)
{
struct amdgpu_aca *aca = &adev->aca;
const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
@@ -143,13 +145,12 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_erro
return -EOPNOTSUPP;
switch (type) {
- case ACA_ERROR_TYPE_UE:
+ case ACA_SMU_TYPE_UE:
max_count = smu_funcs->max_ue_bank_count;
break;
- case ACA_ERROR_TYPE_CE:
+ case ACA_SMU_TYPE_CE:
max_count = smu_funcs->max_ce_bank_count;
break;
- case ACA_ERROR_TYPE_DEFERRED:
default:
return -EINVAL;
}
@@ -164,7 +165,9 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_erro
if (ret)
return ret;
- aca_smu_bank_dump(adev, i, count, &bank);
+ bank.type = type;
+
+ aca_smu_bank_dump(adev, i, count, &bank, qctx);
ret = aca_banks_add_bank(banks, &bank);
if (ret)
@@ -195,7 +198,7 @@ static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type t
return hwip->hwid == hwid && hwip->mcatype == mcatype;
}
-static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type)
+static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type)
{
const struct aca_bank_ops *bank_ops = handle->bank_ops;
@@ -273,59 +276,49 @@ static struct aca_bank_error *get_bank_error(struct aca_error *aerr, struct aca_
return new_bank_error(aerr, info);
}
-static int aca_log_errors(struct aca_handle *handle, enum aca_error_type type,
- struct aca_bank_report *report)
+int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info,
+ enum aca_error_type type, u64 count)
{
struct aca_error_cache *error_cache = &handle->error_cache;
struct aca_bank_error *bank_error;
struct aca_error *aerr;
- if (!handle || !report)
+ if (!handle || !info || type >= ACA_ERROR_TYPE_COUNT)
return -EINVAL;
- if (!report->count[type])
+ if (!count)
return 0;
aerr = &error_cache->errors[type];
- bank_error = get_bank_error(aerr, &report->info);
+ bank_error = get_bank_error(aerr, info);
if (!bank_error)
return -ENOMEM;
- bank_error->count[type] += report->count[type];
+ bank_error->count += count;
return 0;
}
-static int aca_generate_bank_report(struct aca_handle *handle, struct aca_bank *bank,
- enum aca_error_type type, struct aca_bank_report *report)
+static int aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type)
{
const struct aca_bank_ops *bank_ops = handle->bank_ops;
- if (!bank || !report)
+ if (!bank)
return -EINVAL;
- if (!bank_ops->aca_bank_generate_report)
+ if (!bank_ops->aca_bank_parser)
return -EOPNOTSUPP;
- memset(report, 0, sizeof(*report));
- return bank_ops->aca_bank_generate_report(handle, bank, type,
- report, handle->data);
+ return bank_ops->aca_bank_parser(handle, bank, type,
+ handle->data);
}
static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank,
- enum aca_error_type type, void *data)
+ enum aca_smu_type type, void *data)
{
- struct aca_bank_report report;
int ret;
- ret = aca_generate_bank_report(handle, bank, type, &report);
- if (ret)
- return ret;
-
- if (!report.count[type])
- return 0;
-
- ret = aca_log_errors(handle, type, &report);
+ ret = aca_bank_parser(handle, bank, type);
if (ret)
return ret;
@@ -333,7 +326,7 @@ static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank
}
static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *bank,
- enum aca_error_type type, bank_handler_t handler, void *data)
+ enum aca_smu_type type, bank_handler_t handler, void *data)
{
struct aca_handle *handle;
int ret;
@@ -354,7 +347,7 @@ static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *ba
}
static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *banks,
- enum aca_error_type type, bank_handler_t handler, void *data)
+ enum aca_smu_type type, bank_handler_t handler, void *data)
{
struct aca_bank_node *node;
struct aca_bank *bank;
@@ -378,8 +371,28 @@ static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *
return 0;
}
-static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type,
- bank_handler_t handler, void *data)
+static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type type)
+{
+ struct amdgpu_aca *aca = &adev->aca;
+ bool ret = true;
+
+ /*
+ * Because the UE Valid MCA count will only be cleared after reset,
+ * in order to avoid repeated counting of the error count,
+ * the aca bank is only updated once during the gpu recovery stage.
+ */
+ if (type == ACA_SMU_TYPE_UE) {
+ if (amdgpu_ras_intr_triggered())
+ ret = atomic_cmpxchg(&aca->ue_update_flag, 0, 1) == 0;
+ else
+ atomic_set(&aca->ue_update_flag, 0);
+ }
+
+ return ret;
+}
+
+static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type,
+ bank_handler_t handler, struct ras_query_context *qctx, void *data)
{
struct amdgpu_aca *aca = &adev->aca;
struct aca_banks banks;
@@ -389,9 +402,8 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type
if (list_empty(&aca->mgr.list))
return 0;
- /* NOTE: pmfw is only support UE and CE */
- if (type == ACA_ERROR_TYPE_DEFERRED)
- type = ACA_ERROR_TYPE_CE;
+ if (!aca_bank_should_update(adev, type))
+ return 0;
ret = aca_smu_get_valid_aca_count(adev, type, &count);
if (ret)
@@ -402,7 +414,7 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_error_type type
aca_banks_init(&banks);
- ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks);
+ ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks, qctx);
if (ret)
goto err_release_banks;
@@ -431,7 +443,7 @@ static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_er
if (type >= ACA_ERROR_TYPE_COUNT)
return -EINVAL;
- count = bank_error->count[type];
+ count = bank_error->count;
if (!count)
return 0;
@@ -447,6 +459,8 @@ static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_er
amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count);
break;
case ACA_ERROR_TYPE_DEFERRED:
+ amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, count);
+ break;
default:
break;
}
@@ -477,12 +491,25 @@ out_unlock:
}
static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type,
- struct ras_err_data *err_data)
+ struct ras_err_data *err_data, struct ras_query_context *qctx)
{
+ enum aca_smu_type smu_type;
int ret;
+ switch (type) {
+ case ACA_ERROR_TYPE_UE:
+ smu_type = ACA_SMU_TYPE_UE;
+ break;
+ case ACA_ERROR_TYPE_CE:
+ case ACA_ERROR_TYPE_DEFERRED:
+ smu_type = ACA_SMU_TYPE_CE;
+ break;
+ default:
+ return -EINVAL;
+ }
+
/* udpate aca bank to aca source error_cache first */
- ret = aca_banks_update(adev, type, handler_aca_log_bank_error, NULL);
+ ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL);
if (ret)
return ret;
@@ -498,10 +525,9 @@ static bool aca_handle_is_valid(struct aca_handle *handle)
}
int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
- enum aca_error_type type, void *data)
+ enum aca_error_type type, struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
{
- struct ras_err_data *err_data = (struct ras_err_data *)data;
-
if (!handle || !err_data)
return -EINVAL;
@@ -511,7 +537,7 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han
if (!(BIT(type) & handle->mask))
return 0;
- return __aca_get_error_data(adev, handle, type, err_data);
+ return __aca_get_error_data(adev, handle, type, err_data, qctx);
}
static void aca_error_init(struct aca_error *aerr, enum aca_error_type type)
@@ -668,6 +694,8 @@ int amdgpu_aca_init(struct amdgpu_device *adev)
struct amdgpu_aca *aca = &adev->aca;
int ret;
+ atomic_set(&aca->ue_update_flag, 0);
+
ret = aca_manager_init(&aca->mgr);
if (ret)
return ret;
@@ -680,6 +708,8 @@ void amdgpu_aca_fini(struct amdgpu_device *adev)
struct amdgpu_aca *aca = &adev->aca;
aca_manager_fini(&aca->mgr);
+
+ atomic_set(&aca->ue_update_flag, 0);
}
int amdgpu_aca_reset(struct amdgpu_device *adev)
@@ -723,23 +753,13 @@ int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info)
static int aca_bank_get_error_code(struct amdgpu_device *adev, struct aca_bank *bank)
{
- int error_code;
-
- switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
- case IP_VERSION(13, 0, 6):
- if (!(adev->flags & AMD_IS_APU) && adev->pm.fw_version >= 0x00555600) {
- error_code = ACA_REG__SYND__ERRORINFORMATION(bank->regs[ACA_REG_IDX_SYND]);
- return error_code & 0xff;
- }
- break;
- default:
- break;
- }
+ struct amdgpu_aca *aca = &adev->aca;
+ const struct aca_smu_funcs *smu_funcs = aca->smu_funcs;
- /* NOTE: the true error code is encoded in status.errorcode[0:7] */
- error_code = ACA_REG__STATUS__ERRORCODE(bank->regs[ACA_REG_IDX_STATUS]);
+ if (!smu_funcs || !smu_funcs->parse_error_code)
+ return -EOPNOTSUPP;
- return error_code & 0xff;
+ return smu_funcs->parse_error_code(adev, bank);
}
int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size)
@@ -750,6 +770,9 @@ int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank
return -EINVAL;
error_code = aca_bank_get_error_code(adev, bank);
+ if (error_code < 0)
+ return error_code;
+
for (i = 0; i < size; i++) {
if (err_codes[i] == error_code)
return 0;
@@ -784,7 +807,7 @@ static int amdgpu_aca_smu_debug_mode_set(void *data, u64 val)
return 0;
}
-static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_error_type type, int idx)
+static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_smu_type type, int idx)
{
struct aca_bank_info info;
int i, ret;
@@ -793,7 +816,7 @@ static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_e
if (ret)
return;
- seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_ERROR_TYPE_UE ? "UE" : "CE");
+ seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_SMU_TYPE_UE ? "UE" : "CE");
seq_printf(m, "aca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n",
idx, info.socket_id, info.die_id, info.hwid, info.mcatype);
@@ -807,7 +830,7 @@ struct aca_dump_context {
};
static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *bank,
- enum aca_error_type type, void *data)
+ enum aca_smu_type type, void *data)
{
struct aca_dump_context *ctx = (struct aca_dump_context *)data;
@@ -816,7 +839,7 @@ static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *ban
return handler_aca_log_bank_error(handle, bank, type, NULL);
}
-static int aca_dump_show(struct seq_file *m, enum aca_error_type type)
+static int aca_dump_show(struct seq_file *m, enum aca_smu_type type)
{
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
struct aca_dump_context context = {
@@ -824,12 +847,12 @@ static int aca_dump_show(struct seq_file *m, enum aca_error_type type)
.idx = 0,
};
- return aca_banks_update(adev, type, handler_aca_bank_dump, (void *)&context);
+ return aca_banks_update(adev, type, handler_aca_bank_dump, NULL, (void *)&context);
}
static int aca_dump_ce_show(struct seq_file *m, void *unused)
{
- return aca_dump_show(m, ACA_ERROR_TYPE_CE);
+ return aca_dump_show(m, ACA_SMU_TYPE_CE);
}
static int aca_dump_ce_open(struct inode *inode, struct file *file)
@@ -847,7 +870,7 @@ static const struct file_operations aca_ce_dump_debug_fops = {
static int aca_dump_ue_show(struct seq_file *m, void *unused)
{
- return aca_dump_show(m, ACA_ERROR_TYPE_UE);
+ return aca_dump_show(m, ACA_SMU_TYPE_UE);
}
static int aca_dump_ue_open(struct inode *inode, struct file *file)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
index 2da50e0958..5ef6b745f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
@@ -26,6 +26,9 @@
#include <linux/list.h>
+struct ras_err_data;
+struct ras_query_context;
+
#define ACA_MAX_REGS_COUNT (16)
#define ACA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l)
@@ -99,7 +102,14 @@ enum aca_error_type {
ACA_ERROR_TYPE_COUNT
};
+enum aca_smu_type {
+ ACA_SMU_TYPE_UE = 0,
+ ACA_SMU_TYPE_CE,
+ ACA_SMU_TYPE_COUNT,
+};
+
struct aca_bank {
+ enum aca_smu_type type;
u64 regs[ACA_MAX_REGS_COUNT];
};
@@ -115,15 +125,10 @@ struct aca_bank_info {
int mcatype;
};
-struct aca_bank_report {
- struct aca_bank_info info;
- u64 count[ACA_ERROR_TYPE_COUNT];
-};
-
struct aca_bank_error {
struct list_head node;
struct aca_bank_info info;
- u64 count[ACA_ERROR_TYPE_COUNT];
+ u64 count;
};
struct aca_error {
@@ -157,9 +162,8 @@ struct aca_handle {
};
struct aca_bank_ops {
- int (*aca_bank_generate_report)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type,
- struct aca_bank_report *report, void *data);
- bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type,
+ int (*aca_bank_parser)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data);
+ bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type,
void *data);
};
@@ -167,13 +171,15 @@ struct aca_smu_funcs {
int max_ue_bank_count;
int max_ce_bank_count;
int (*set_debug_mode)(struct amdgpu_device *adev, bool enable);
- int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_error_type type, u32 *count);
- int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_error_type type, int idx, struct aca_bank *bank);
+ int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count);
+ int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_smu_type type, int idx, struct aca_bank *bank);
+ int (*parse_error_code)(struct amdgpu_device *adev, struct aca_bank *bank);
};
struct amdgpu_aca {
struct aca_handle_manager mgr;
const struct aca_smu_funcs *smu_funcs;
+ atomic_t ue_update_flag;
bool is_enabled;
};
@@ -196,7 +202,10 @@ int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle,
const char *name, const struct aca_info *aca_info, void *data);
void amdgpu_aca_remove_handle(struct aca_handle *handle);
int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle,
- enum aca_error_type type, void *data);
+ enum aca_error_type type, struct ras_err_data *err_data,
+ struct ras_query_context *qctx);
int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en);
void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
+int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info,
+ enum aca_error_type type, u64 count);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index 6d72355ac4..bf6c4a0d05 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -637,6 +637,8 @@ static const struct amd_ip_funcs acp_ip_funcs = {
.soft_reset = acp_soft_reset,
.set_clockgating_state = acp_set_clockgating_state,
.set_powergating_state = acp_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version acp_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 7291c3fd8c..e3738d4172 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -750,10 +750,17 @@ bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev)
return amdgpu_ras_get_fed_status(adev);
}
+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset)
+{
+ amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset);
+}
+
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
- enum amdgpu_ras_block block, bool reset)
+ enum amdgpu_ras_block block, uint32_t reset)
{
- amdgpu_umc_poison_handler(adev, block, reset);
+ amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL, reset);
}
int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
@@ -772,12 +779,20 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
return 0;
}
-bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
+bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev,
+ int hub_inst, int hub_type)
{
- if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status)
- return adev->gfx.ras->query_utcl2_poison_status(adev);
- else
- return false;
+ if (!hub_type) {
+ if (adev->gfxhub.funcs->query_utcl2_poison_status)
+ return adev->gfxhub.funcs->query_utcl2_poison_status(adev, hub_inst);
+ else
+ return false;
+ } else {
+ if (adev->mmhub.funcs->query_utcl2_poison_status)
+ return adev->mmhub.funcs->query_utcl2_poison_status(adev, hub_inst);
+ else
+ return false;
+ }
}
int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 0ef223c2af..1de021ebdd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -336,12 +336,18 @@ void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev);
int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev,
struct tile_config *config);
void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
- enum amdgpu_ras_block block, bool reset);
+ enum amdgpu_ras_block block, uint32_t reset);
+
+void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset);
+
bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);
bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
void amdgpu_amdkfd_block_mmu_notifications(void *p);
int amdgpu_amdkfd_criu_resume(void *p);
-bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
+bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev,
+ int hub_inst, int hub_type);
int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag, int8_t xcp_id);
void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index 69810b3f1c..3ab6c3aa0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -881,6 +881,7 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev,
}
#define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H)
+#define SQ_WATCH_STRIDE (mmSQ_WATCH1_ADDR_H - mmSQ_WATCH0_ADDR_H)
uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
uint64_t watch_address,
uint32_t watch_address_mask,
@@ -889,55 +890,93 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev,
uint32_t debug_vmid,
uint32_t inst)
{
+ /* SQ_WATCH?_ADDR_* and TCP_WATCH?_ADDR_* are programmed with the
+ * same values.
+ */
uint32_t watch_address_high;
uint32_t watch_address_low;
- uint32_t watch_address_cntl;
-
- watch_address_cntl = 0;
+ uint32_t tcp_watch_address_cntl;
+ uint32_t sq_watch_address_cntl;
watch_address_low = lower_32_bits(watch_address);
watch_address_high = upper_32_bits(watch_address) & 0xffff;
- watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ tcp_watch_address_cntl = 0;
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
TCP_WATCH0_CNTL,
VMID,
debug_vmid);
- watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
TCP_WATCH0_CNTL,
MODE,
watch_mode);
- watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
TCP_WATCH0_CNTL,
MASK,
watch_address_mask >> 7);
+ sq_watch_address_cntl = 0;
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ VMID,
+ debug_vmid);
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ MODE,
+ watch_mode);
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ MASK,
+ watch_address_mask >> 6);
+
/* Turning off this watch point until we set all the registers */
- watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
TCP_WATCH0_CNTL,
VALID,
0);
-
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
(watch_id * TCP_WATCH_STRIDE)),
- watch_address_cntl);
+ tcp_watch_address_cntl);
+
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ VALID,
+ 0);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ sq_watch_address_cntl);
+ /* Program {TCP,SQ}_WATCH?_ADDR* */
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_high);
-
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_low);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_H) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ watch_address_high);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_L) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ watch_address_low);
+
/* Enable the watch point */
- watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
+ tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl,
TCP_WATCH0_CNTL,
VALID,
1);
-
WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) +
(watch_id * TCP_WATCH_STRIDE)),
- watch_address_cntl);
+ tcp_watch_address_cntl);
+
+ sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl,
+ SQ_WATCH0_CNTL,
+ VALID,
+ 1);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ sq_watch_address_cntl);
return 0;
}
@@ -953,8 +992,14 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev,
(watch_id * TCP_WATCH_STRIDE)),
watch_address_cntl);
+ WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) +
+ (watch_id * SQ_WATCH_STRIDE)),
+ watch_address_cntl);
+
return 0;
}
+#undef TCP_WATCH_STRIDE
+#undef SQ_WATCH_STRIDE
/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 8975cf41a9..48ad0c04aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -196,7 +196,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
return -EINVAL;
vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id);
- if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) {
+ if (adev->flags & AMD_IS_APU) {
system_mem_needed = size;
ttm_mem_needed = size;
}
@@ -233,7 +233,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
if (adev && xcp_id >= 0) {
adev->kfd.vram_used[xcp_id] += vram_needed;
adev->kfd.vram_used_aligned[xcp_id] +=
- (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) ?
+ (adev->flags & AMD_IS_APU) ?
vram_needed :
ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN);
}
@@ -261,7 +261,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
if (adev) {
adev->kfd.vram_used[xcp_id] -= size;
- if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) {
+ if (adev->flags & AMD_IS_APU) {
adev->kfd.vram_used_aligned[xcp_id] -= size;
kfd_mem_limit.system_mem_used -= size;
kfd_mem_limit.ttm_mem_used -= size;
@@ -890,7 +890,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
* if peer device has large BAR. In contrast, access over xGMI is
* allowed for both small and large BAR configurations of peer device
*/
- if ((adev != bo_adev && !(adev->gmc.is_app_apu || adev->flags & AMD_IS_APU)) &&
+ if ((adev != bo_adev && !(adev->flags & AMD_IS_APU)) &&
((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) ||
(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
@@ -1658,7 +1658,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
- atomic64_read(&adev->vram_pin_size)
- reserved_for_pt;
- if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) {
+ if (adev->flags & AMD_IS_APU) {
system_mem_available = no_system_mem_limit ?
kfd_mem_limit.max_system_mem_limit :
kfd_mem_limit.max_system_mem_limit -
@@ -1706,7 +1706,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
- if (adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) {
+ if (adev->flags & AMD_IS_APU) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
@@ -1953,7 +1953,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
if (size) {
if (!is_imported &&
(mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM ||
- ((adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) &&
+ ((adev->flags & AMD_IS_APU) &&
mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT)))
*size = bo_size;
else
@@ -2376,7 +2376,7 @@ static int import_obj_create(struct amdgpu_device *adev,
(*mem)->bo = bo;
(*mem)->va = va;
(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) &&
- !(adev->gmc.is_app_apu || adev->flags & AMD_IS_APU) ?
+ !(adev->flags & AMD_IS_APU) ?
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
(*mem)->mapped_to_gpu_memory = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 12b4885182..2e13c7c4b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -34,6 +34,7 @@ union firmware_info {
struct atom_firmware_info_v3_2 v32;
struct atom_firmware_info_v3_3 v33;
struct atom_firmware_info_v3_4 v34;
+ struct atom_firmware_info_v3_5 v35;
};
/*
@@ -887,6 +888,10 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev)
fw_reserved_fb_size =
(firmware_info->v34.fw_reserved_size_in_kb << 10);
break;
+ case 5:
+ fw_reserved_fb_size =
+ (firmware_info->v35.fw_reserved_size_in_kb << 10);
+ break;
default:
fw_reserved_fb_size = 0;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index edc6377ec5..199693369c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -39,7 +39,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
for (i = 0; i < n; i++) {
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
- false, false, false);
+ false, false, 0);
if (r)
goto exit_do_move;
r = dma_fence_wait(fence, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
new file mode 100644
index 0000000000..c1cb626836
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
@@ -0,0 +1,360 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <generated/utsrelease.h>
+#include <linux/devcoredump.h>
+#include "amdgpu_dev_coredump.h"
+#include "atom.h"
+
+#ifndef CONFIG_DEV_COREDUMP
+void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
+ struct amdgpu_reset_context *reset_context)
+{
+}
+#else
+
+const char *hw_ip_names[MAX_HWIP] = {
+ [GC_HWIP] = "GC",
+ [HDP_HWIP] = "HDP",
+ [SDMA0_HWIP] = "SDMA0",
+ [SDMA1_HWIP] = "SDMA1",
+ [SDMA2_HWIP] = "SDMA2",
+ [SDMA3_HWIP] = "SDMA3",
+ [SDMA4_HWIP] = "SDMA4",
+ [SDMA5_HWIP] = "SDMA5",
+ [SDMA6_HWIP] = "SDMA6",
+ [SDMA7_HWIP] = "SDMA7",
+ [LSDMA_HWIP] = "LSDMA",
+ [MMHUB_HWIP] = "MMHUB",
+ [ATHUB_HWIP] = "ATHUB",
+ [NBIO_HWIP] = "NBIO",
+ [MP0_HWIP] = "MP0",
+ [MP1_HWIP] = "MP1",
+ [UVD_HWIP] = "UVD/JPEG/VCN",
+ [VCN1_HWIP] = "VCN1",
+ [VCE_HWIP] = "VCE",
+ [VPE_HWIP] = "VPE",
+ [DF_HWIP] = "DF",
+ [DCE_HWIP] = "DCE",
+ [OSSSYS_HWIP] = "OSSSYS",
+ [SMUIO_HWIP] = "SMUIO",
+ [PWR_HWIP] = "PWR",
+ [NBIF_HWIP] = "NBIF",
+ [THM_HWIP] = "THM",
+ [CLK_HWIP] = "CLK",
+ [UMC_HWIP] = "UMC",
+ [RSMU_HWIP] = "RSMU",
+ [XGMI_HWIP] = "XGMI",
+ [DCI_HWIP] = "DCI",
+ [PCIE_HWIP] = "PCIE",
+};
+
+static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev,
+ struct drm_printer *p)
+{
+ uint32_t version;
+ uint32_t feature;
+ uint8_t smu_program, smu_major, smu_minor, smu_debug;
+ struct atom_context *ctx = adev->mode_info.atom_context;
+
+ drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n",
+ adev->vce.fb_version, adev->vce.fw_version);
+ drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n", 0,
+ adev->uvd.fw_version);
+ drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n", 0,
+ adev->gmc.fw_version);
+ drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.me_feature_version, adev->gfx.me_fw_version);
+ drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.pfp_feature_version, adev->gfx.pfp_fw_version);
+ drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.ce_feature_version, adev->gfx.ce_fw_version);
+ drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_feature_version, adev->gfx.rlc_fw_version);
+
+ drm_printf(p, "RLC SRLC feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_srlc_feature_version,
+ adev->gfx.rlc_srlc_fw_version);
+ drm_printf(p, "RLC SRLG feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_srlg_feature_version,
+ adev->gfx.rlc_srlg_fw_version);
+ drm_printf(p, "RLC SRLS feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlc_srls_feature_version,
+ adev->gfx.rlc_srls_fw_version);
+ drm_printf(p, "RLCP feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlcp_ucode_feature_version,
+ adev->gfx.rlcp_ucode_version);
+ drm_printf(p, "RLCV feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.rlcv_ucode_feature_version,
+ adev->gfx.rlcv_ucode_version);
+ drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.mec_feature_version, adev->gfx.mec_fw_version);
+
+ if (adev->gfx.mec2_fw)
+ drm_printf(p, "MEC2 feature version: %u, fw version: 0x%08x\n",
+ adev->gfx.mec2_feature_version,
+ adev->gfx.mec2_fw_version);
+
+ drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n", 0,
+ adev->gfx.imu_fw_version);
+ drm_printf(p, "PSP SOS feature version: %u, fw version: 0x%08x\n",
+ adev->psp.sos.feature_version, adev->psp.sos.fw_version);
+ drm_printf(p, "PSP ASD feature version: %u, fw version: 0x%08x\n",
+ adev->psp.asd_context.bin_desc.feature_version,
+ adev->psp.asd_context.bin_desc.fw_version);
+
+ drm_printf(p, "TA XGMI feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.xgmi_context.context.bin_desc.feature_version,
+ adev->psp.xgmi_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA RAS feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.ras_context.context.bin_desc.feature_version,
+ adev->psp.ras_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA HDCP feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.hdcp_context.context.bin_desc.feature_version,
+ adev->psp.hdcp_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA DTM feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.dtm_context.context.bin_desc.feature_version,
+ adev->psp.dtm_context.context.bin_desc.fw_version);
+ drm_printf(p, "TA RAP feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.rap_context.context.bin_desc.feature_version,
+ adev->psp.rap_context.context.bin_desc.fw_version);
+ drm_printf(p,
+ "TA SECURE DISPLAY feature version: 0x%08x, fw version: 0x%08x\n",
+ adev->psp.securedisplay_context.context.bin_desc.feature_version,
+ adev->psp.securedisplay_context.context.bin_desc.fw_version);
+
+ /* SMC firmware */
+ version = adev->pm.fw_version;
+
+ smu_program = (version >> 24) & 0xff;
+ smu_major = (version >> 16) & 0xff;
+ smu_minor = (version >> 8) & 0xff;
+ smu_debug = (version >> 0) & 0xff;
+ drm_printf(p,
+ "SMC feature version: %u, program: %d, fw version: 0x%08x (%d.%d.%d)\n",
+ 0, smu_program, version, smu_major, smu_minor, smu_debug);
+
+ /* SDMA firmware */
+ for (int i = 0; i < adev->sdma.num_instances; i++) {
+ drm_printf(p,
+ "SDMA%d feature version: %u, firmware version: 0x%08x\n",
+ i, adev->sdma.instance[i].feature_version,
+ adev->sdma.instance[i].fw_version);
+ }
+
+ drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n", 0,
+ adev->vcn.fw_version);
+ drm_printf(p, "DMCU feature version: %u, fw version: 0x%08x\n", 0,
+ adev->dm.dmcu_fw_version);
+ drm_printf(p, "DMCUB feature version: %u, fw version: 0x%08x\n", 0,
+ adev->dm.dmcub_fw_version);
+ drm_printf(p, "PSP TOC feature version: %u, fw version: 0x%08x\n",
+ adev->psp.toc.feature_version, adev->psp.toc.fw_version);
+
+ version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
+ feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK) >>
+ AMDGPU_MES_FEAT_VERSION_SHIFT;
+ drm_printf(p, "MES_KIQ feature version: %u, fw version: 0x%08x\n",
+ feature, version);
+
+ version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+ feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK) >>
+ AMDGPU_MES_FEAT_VERSION_SHIFT;
+ drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n", feature,
+ version);
+
+ drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n",
+ adev->vpe.feature_version, adev->vpe.fw_version);
+
+ drm_printf(p, "\nVBIOS Information\n");
+ drm_printf(p, "vbios name : %s\n", ctx->name);
+ drm_printf(p, "vbios pn : %s\n", ctx->vbios_pn);
+ drm_printf(p, "vbios version : %d\n", ctx->version);
+ drm_printf(p, "vbios ver_str : %s\n", ctx->vbios_ver_str);
+ drm_printf(p, "vbios date : %s\n", ctx->date);
+}
+
+static ssize_t
+amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
+ void *data, size_t datalen)
+{
+ struct drm_printer p;
+ struct amdgpu_coredump_info *coredump = data;
+ struct drm_print_iterator iter;
+ struct amdgpu_vm_fault_info *fault_info;
+ int i, ver;
+
+ iter.data = buffer;
+ iter.offset = 0;
+ iter.start = offset;
+ iter.remain = count;
+
+ p = drm_coredump_printer(&iter);
+
+ drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
+ drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n");
+ drm_printf(&p, "kernel: " UTS_RELEASE "\n");
+ drm_printf(&p, "module: " KBUILD_MODNAME "\n");
+ drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec,
+ coredump->reset_time.tv_nsec);
+
+ if (coredump->reset_task_info.pid)
+ drm_printf(&p, "process_name: %s PID: %d\n",
+ coredump->reset_task_info.process_name,
+ coredump->reset_task_info.pid);
+
+ /* GPU IP's information of the SOC */
+ drm_printf(&p, "\nIP Information\n");
+ drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
+ drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
+ drm_printf(&p, "SOC External Revision id: %d\n", coredump->adev->external_rev_id);
+
+ for (int i = 1; i < MAX_HWIP; i++) {
+ for (int j = 0; j < HWIP_MAX_INSTANCE; j++) {
+ ver = coredump->adev->ip_versions[i][j];
+ if (ver)
+ drm_printf(&p, "HWIP: %s[%d][%d]: v%d.%d.%d.%d.%d\n",
+ hw_ip_names[i], i, j,
+ IP_VERSION_MAJ(ver),
+ IP_VERSION_MIN(ver),
+ IP_VERSION_REV(ver),
+ IP_VERSION_VARIANT(ver),
+ IP_VERSION_SUBREV(ver));
+ }
+ }
+
+ /* IP firmware information */
+ drm_printf(&p, "\nIP Firmwares\n");
+ amdgpu_devcoredump_fw_info(coredump->adev, &p);
+
+ if (coredump->ring) {
+ drm_printf(&p, "\nRing timed out details\n");
+ drm_printf(&p, "IP Type: %d Ring Name: %s\n",
+ coredump->ring->funcs->type,
+ coredump->ring->name);
+ }
+
+ /* Add page fault information */
+ fault_info = &coredump->adev->vm_manager.fault_info;
+ drm_printf(&p, "\n[%s] Page fault observed\n",
+ fault_info->vmhub ? "mmhub" : "gfxhub");
+ drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", fault_info->addr);
+ drm_printf(&p, "Protection fault status register: 0x%x\n\n", fault_info->status);
+
+ /* dump the ip state for each ip */
+ drm_printf(&p, "IP Dump\n");
+ for (int i = 0; i < coredump->adev->num_ip_blocks; i++) {
+ if (coredump->adev->ip_blocks[i].version->funcs->print_ip_state) {
+ drm_printf(&p, "IP: %s\n",
+ coredump->adev->ip_blocks[i]
+ .version->funcs->name);
+ coredump->adev->ip_blocks[i]
+ .version->funcs->print_ip_state(
+ (void *)coredump->adev, &p);
+ drm_printf(&p, "\n");
+ }
+ }
+
+ /* Add ring buffer information */
+ drm_printf(&p, "Ring buffer information\n");
+ for (int i = 0; i < coredump->adev->num_rings; i++) {
+ int j = 0;
+ struct amdgpu_ring *ring = coredump->adev->rings[i];
+
+ drm_printf(&p, "ring name: %s\n", ring->name);
+ drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n",
+ amdgpu_ring_get_rptr(ring),
+ amdgpu_ring_get_wptr(ring),
+ ring->buf_mask);
+ drm_printf(&p, "Ring size in dwords: %d\n",
+ ring->ring_size / 4);
+ drm_printf(&p, "Ring contents\n");
+ drm_printf(&p, "Offset \t Value\n");
+
+ while (j < ring->ring_size) {
+ drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j / 4]);
+ j += 4;
+ }
+ }
+
+ if (coredump->reset_vram_lost)
+ drm_printf(&p, "VRAM is lost due to GPU reset!\n");
+ if (coredump->adev->reset_info.num_regs) {
+ drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
+
+ for (i = 0; i < coredump->adev->reset_info.num_regs; i++)
+ drm_printf(&p, "0x%08x: 0x%08x\n",
+ coredump->adev->reset_info.reset_dump_reg_list[i],
+ coredump->adev->reset_info.reset_dump_reg_value[i]);
+ }
+
+ return count - iter.remain;
+}
+
+static void amdgpu_devcoredump_free(void *data)
+{
+ kfree(data);
+}
+
+void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
+ struct amdgpu_reset_context *reset_context)
+{
+ struct amdgpu_coredump_info *coredump;
+ struct drm_device *dev = adev_to_drm(adev);
+ struct amdgpu_job *job = reset_context->job;
+ struct drm_sched_job *s_job;
+
+ coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
+
+ if (!coredump) {
+ DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__);
+ return;
+ }
+
+ coredump->reset_vram_lost = vram_lost;
+
+ if (reset_context->job && reset_context->job->vm) {
+ struct amdgpu_task_info *ti;
+ struct amdgpu_vm *vm = reset_context->job->vm;
+
+ ti = amdgpu_vm_get_task_info_vm(vm);
+ if (ti) {
+ coredump->reset_task_info = *ti;
+ amdgpu_vm_put_task_info(ti);
+ }
+ }
+
+ if (job) {
+ s_job = &job->base;
+ coredump->ring = to_amdgpu_ring(s_job->sched);
+ }
+
+ coredump->adev = adev;
+
+ ktime_get_ts64(&coredump->reset_time);
+
+ dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
+ amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
new file mode 100644
index 0000000000..52459512cb
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DEV_COREDUMP_H__
+#define __AMDGPU_DEV_COREDUMP_H__
+
+#include "amdgpu.h"
+#include "amdgpu_reset.h"
+
+#ifdef CONFIG_DEV_COREDUMP
+
+#define AMDGPU_COREDUMP_VERSION "1"
+
+struct amdgpu_coredump_info {
+ struct amdgpu_device *adev;
+ struct amdgpu_task_info reset_task_info;
+ struct timespec64 reset_time;
+ bool reset_vram_lost;
+ struct amdgpu_ring *ring;
+};
+#endif
+
+void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
+ struct amdgpu_reset_context *reset_context);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index eb8af02332..ee7df1d84e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -74,6 +74,7 @@
#include "amdgpu_fru_eeprom.h"
#include "amdgpu_reset.h"
#include "amdgpu_virt.h"
+#include "amdgpu_dev_coredump.h"
#include <linux/suspend.h>
#include <drm/task_barrier.h>
@@ -143,6 +144,8 @@ const char *amdgpu_asic_name[] = {
"LAST",
};
+static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
+
/**
* DOC: pcie_replay_count
*
@@ -335,16 +338,93 @@ bool amdgpu_device_supports_boco(struct drm_device *dev)
*
* @dev: drm_device pointer
*
- * Returns true if the device supporte BACO,
- * otherwise return false.
+ * Return:
+ * 1 if the device supporte BACO;
+ * 3 if the device support MACO (only works if BACO is supported)
+ * otherwise return 0.
*/
-bool amdgpu_device_supports_baco(struct drm_device *dev)
+int amdgpu_device_supports_baco(struct drm_device *dev)
{
struct amdgpu_device *adev = drm_to_adev(dev);
return amdgpu_asic_supports_baco(adev);
}
+void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
+{
+ struct drm_device *dev;
+ int bamaco_support;
+
+ dev = adev_to_drm(adev);
+
+ adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
+ bamaco_support = amdgpu_device_supports_baco(dev);
+
+ switch (amdgpu_runtime_pm) {
+ case 2:
+ if (bamaco_support & MACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
+ dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
+ } else if (bamaco_support == BACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
+ }
+ break;
+ case 1:
+ if (bamaco_support & BACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ dev_info(adev->dev, "Forcing BACO for runtime pm\n");
+ }
+ break;
+ case -1:
+ case -2:
+ if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */
+ adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
+ dev_info(adev->dev, "Using ATPX for runtime pm\n");
+ } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
+ dev_info(adev->dev, "Using BOCO for runtime pm\n");
+ } else {
+ if (!bamaco_support)
+ goto no_runtime_pm;
+
+ switch (adev->asic_type) {
+ case CHIP_VEGA20:
+ case CHIP_ARCTURUS:
+ /* BACO are not supported on vega20 and arctrus */
+ break;
+ case CHIP_VEGA10:
+ /* enable BACO as runpm mode if noretry=0 */
+ if (!adev->gmc.noretry)
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ break;
+ default:
+ /* enable BACO as runpm mode on CI+ */
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
+ break;
+ }
+
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
+ if (bamaco_support & MACO_SUPPORT) {
+ adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
+ dev_info(adev->dev, "Using BAMACO for runtime pm\n");
+ } else {
+ dev_info(adev->dev, "Using BACO for runtime pm\n");
+ }
+ }
+ }
+ break;
+ case 0:
+ dev_info(adev->dev, "runtime pm is manually disabled\n");
+ break;
+ default:
+ break;
+ }
+
+no_runtime_pm:
+ if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
+ dev_info(adev->dev, "Runtime PM not available\n");
+}
/**
* amdgpu_device_supports_smart_shift - Is the device dGPU with
* smart shift support
@@ -1402,13 +1482,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev)
*/
int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
{
- unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
+ unsigned long flags, offset;
+ spin_lock_irqsave(&adev->wb.lock, flags);
+ offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
if (offset < adev->wb.num_wb) {
__set_bit(offset, adev->wb.used);
+ spin_unlock_irqrestore(&adev->wb.lock, flags);
*wb = offset << 3; /* convert to dw offset */
return 0;
} else {
+ spin_unlock_irqrestore(&adev->wb.lock, flags);
return -EINVAL;
}
}
@@ -1423,9 +1507,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
*/
void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
{
+ unsigned long flags;
+
wb >>= 3;
+ spin_lock_irqsave(&adev->wb.lock, flags);
if (wb < adev->wb.num_wb)
__clear_bit(wb, adev->wb.used);
+ spin_unlock_irqrestore(&adev->wb.lock, flags);
}
/**
@@ -1455,7 +1543,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
/* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */
if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR))
- DRM_WARN("System can't access extended configuration space,please check!!\n");
+ DRM_WARN("System can't access extended configuration space, please check!!\n");
/* skip if the bios has already enabled large BAR */
if (adev->gmc.real_vram_size &&
@@ -3981,6 +4069,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
spin_lock_init(&adev->se_cac_idx_lock);
spin_lock_init(&adev->audio_endpt_idx_lock);
spin_lock_init(&adev->mm_stats.lock);
+ spin_lock_init(&adev->wb.lock);
INIT_LIST_HEAD(&adev->shadow_list);
mutex_init(&adev->shadow_list_lock);
@@ -4069,6 +4158,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* Enable TMZ based on IP_VERSION */
amdgpu_gmc_tmz_set(adev);
+ if (amdgpu_sriov_vf(adev) &&
+ amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0))
+ /* VF MMIO access (except mailbox range) from CPU
+ * will be blocked during sriov runtime
+ */
+ adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
+
amdgpu_gmc_noretry_set(adev);
/* Need to get xgmi info early to decide the reset behavior*/
if (adev->gmc.xgmi.supported) {
@@ -4974,12 +5070,15 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
retry:
amdgpu_amdkfd_pre_reset(adev);
+ amdgpu_device_stop_pending_resets(adev);
+
if (from_hypervisor)
r = amdgpu_virt_request_full_gpu(adev, true);
else
r = amdgpu_virt_reset_gpu(adev);
if (r)
return r;
+ amdgpu_ras_set_fed(adev, false);
amdgpu_irq_gpu_reset_resume_helper(adev);
/* some sw clean up VF needs to do before recover */
@@ -5266,11 +5365,21 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
struct amdgpu_device *tmp_adev = NULL;
bool need_full_reset, skip_hw_reset, vram_lost = false;
int r = 0;
+ uint32_t i;
/* Try reset handler method first */
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
reset_list);
- amdgpu_reset_reg_dumps(tmp_adev);
+
+ if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
+ amdgpu_reset_reg_dumps(tmp_adev);
+
+ /* Trigger ip dump before we reset the asic */
+ for (i = 0; i < tmp_adev->num_ip_blocks; i++)
+ if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
+ tmp_adev->ip_blocks[i].version->funcs
+ ->dump_ip_state((void *)tmp_adev);
+ }
reset_context->reset_device_list = device_list_handle;
r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
@@ -5343,7 +5452,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
- amdgpu_coredump(tmp_adev, vram_lost, reset_context);
+ if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
+ amdgpu_coredump(tmp_adev, vram_lost, reset_context);
if (vram_lost) {
DRM_INFO("VRAM is lost due to GPU reset!\n");
@@ -5541,6 +5651,23 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
}
+static int amdgpu_device_health_check(struct list_head *device_list_handle)
+{
+ struct amdgpu_device *tmp_adev;
+ int ret = 0;
+ u32 status;
+
+ list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
+ pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status);
+ if (PCI_POSSIBLE_ERROR(status)) {
+ dev_err(tmp_adev->dev, "device lost from bus!");
+ ret = -ENODEV;
+ }
+ }
+
+ return ret;
+}
+
/**
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
*
@@ -5612,6 +5739,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
device_list_handle = &device_list;
}
+ if (!amdgpu_sriov_vf(adev)) {
+ r = amdgpu_device_health_check(device_list_handle);
+ if (r)
+ goto end_reset;
+ }
+
/* We need to lock reset domain only once both for XGMI and single device */
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
reset_list);
@@ -5694,11 +5827,12 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
tmp_adev->asic_reset_res = r;
}
- /*
- * Drop all pending non scheduler resets. Scheduler resets
- * were already dropped during drm_sched_stop
- */
- amdgpu_device_stop_pending_resets(tmp_adev);
+ if (!amdgpu_sriov_vf(tmp_adev))
+ /*
+ * Drop all pending non scheduler resets. Scheduler resets
+ * were already dropped during drm_sched_stop
+ */
+ amdgpu_device_stop_pending_resets(tmp_adev);
}
/* Actual ASIC resets if needed.*/
@@ -5777,6 +5911,7 @@ skip_sched_resume:
reset_list);
amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
+end_reset:
if (hive) {
mutex_unlock(&hive->hive_lock);
amdgpu_put_xgmi_hive(hive);
@@ -6038,7 +6173,7 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
adev->nbio.funcs->enable_doorbell_interrupt)
adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
- if (amdgpu_passthrough(adev) &&
+ if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
adev->nbio.funcs->clear_doorbell_interrupt)
adev->nbio.funcs->clear_doorbell_interrupt(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index ac5bf01fe8..0e31bdb4b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -97,6 +97,7 @@
#include "smuio_v13_0.h"
#include "smuio_v13_0_3.h"
#include "smuio_v13_0_6.h"
+#include "smuio_v14_0_2.h"
#include "vcn_v5_0_0.h"
#include "jpeg_v5_0_0.h"
@@ -245,6 +246,9 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev,
return -ENOENT;
}
+#define IP_DISCOVERY_V2 2
+#define IP_DISCOVERY_V4 4
+
static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
uint8_t *binary)
{
@@ -259,14 +263,14 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
* wait for this to complete. Once the C2PMSG is updated, we can
* continue.
*/
- if (dev_is_removable(&adev->pdev->dev)) {
- for (i = 0; i < 1000; i++) {
- msg = RREG32(mmMP0_SMN_C2PMSG_33);
- if (msg & 0x80000000)
- break;
- msleep(1);
- }
+
+ for (i = 0; i < 1000; i++) {
+ msg = RREG32(mmMP0_SMN_C2PMSG_33);
+ if (msg & 0x80000000)
+ break;
+ usleep_range(1000, 1100);
}
+
vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
if (vram_size) {
@@ -1897,6 +1901,8 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
break;
case IP_VERSION(14, 0, 0):
case IP_VERSION(14, 0, 1):
+ case IP_VERSION(14, 0, 2):
+ case IP_VERSION(14, 0, 3):
amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
break;
default:
@@ -2678,6 +2684,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(14, 0, 1):
adev->smuio.funcs = &smuio_v13_0_6_funcs;
break;
+ case IP_VERSION(14, 0, 2):
+ adev->smuio.funcs = &smuio_v14_0_2_funcs;
+ break;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 055ba2ea4c..662d0f28f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -41,8 +41,6 @@
#include <linux/dma-buf.h>
#include <linux/dma-fence-array.h>
#include <linux/pci-p2pdma.h>
-#include <linux/pm_runtime.h>
-#include "amdgpu_trace.h"
/**
* amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
@@ -58,42 +56,11 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
struct drm_gem_object *obj = dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- int r;
if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0)
attach->peer2peer = false;
- r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
- trace_amdgpu_runpm_reference_dumps(1, __func__);
- if (r < 0)
- goto out;
-
return 0;
-
-out:
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- trace_amdgpu_runpm_reference_dumps(0, __func__);
- return r;
-}
-
-/**
- * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
- *
- * @dmabuf: DMA-buf where we remove the attachment from
- * @attach: the attachment to remove
- *
- * Called when an attachment is removed from the DMA-buf.
- */
-static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
- struct dma_buf_attachment *attach)
-{
- struct drm_gem_object *obj = dmabuf->priv;
- struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-
- pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
- pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- trace_amdgpu_runpm_reference_dumps(0, __func__);
}
/**
@@ -267,7 +234,6 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
const struct dma_buf_ops amdgpu_dmabuf_ops = {
.attach = amdgpu_dma_buf_attach,
- .detach = amdgpu_dma_buf_detach,
.pin = amdgpu_dma_buf_pin,
.unpin = amdgpu_dma_buf_unpin,
.map_dma_buf = amdgpu_dma_buf_map,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index e4277298cf..ea14f1c8f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -925,7 +925,7 @@ module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444);
* GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)
*/
MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)");
-module_param_named(reset_method, amdgpu_reset_method, int, 0444);
+module_param_named(reset_method, amdgpu_reset_method, int, 0644);
/**
* DOC: bad_page_threshold (int) Bad page threshold is specifies the
@@ -2481,6 +2481,7 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
/* Use a common context, just need to make sure full reset is done */
set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
+ set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);
r = amdgpu_do_asic_reset(&device_list, &reset_context);
if (r) {
@@ -2744,7 +2745,8 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
} else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) {
/* nothing to do */
- } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
+ } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) {
amdgpu_device_baco_enter(drm_dev);
}
@@ -2784,7 +2786,8 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
* PCI core handles it for _PR3.
*/
pci_set_master(pdev);
- } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
+ } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) {
amdgpu_device_baco_exit(drm_dev);
}
ret = amdgpu_device_resume(drm_dev, false);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 10832b4704..bc3ac73b6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -181,7 +181,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
seq, flags | AMDGPU_FENCE_FLAG_INT);
pm_runtime_get_noresume(adev_to_drm(adev)->dev);
- trace_amdgpu_runpm_reference_dumps(1, __func__);
ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
if (unlikely(rcu_dereference_protected(*ptr, 1))) {
struct dma_fence *old;
@@ -309,7 +308,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
dma_fence_put(fence);
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
- trace_amdgpu_runpm_reference_dumps(0, __func__);
} while (last_seq != seq);
return true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 67c234bcf8..3adaa46701 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -108,6 +108,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
memset(&bp, 0, sizeof(bp));
*obj = NULL;
+ flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
bp.size = size;
bp.byte_align = alignment;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 1d955652f3..e92bdc9a39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -329,8 +329,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
ring->eop_gpu_addr = kiq->eop_gpu_addr;
ring->no_scheduler = true;
- snprintf(ring->name, sizeof(ring->name), "kiq_%d.%d.%d.%d",
- xcc_id, ring->me, ring->pipe, ring->queue);
+ snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu",
+ (unsigned char)xcc_id, (unsigned char)ring->me,
+ (unsigned char)ring->pipe, (unsigned char)ring->queue);
r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 8fcf889ddc..64f197bbc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -259,7 +259,6 @@ struct amdgpu_cu_info {
struct amdgpu_gfx_ras {
struct amdgpu_ras_block_object ras_block;
void (*enable_watchdog_timer)(struct amdgpu_device *adev);
- bool (*query_utcl2_poison_status)(struct amdgpu_device *adev);
int (*rlc_gc_fed_irq)(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
@@ -434,6 +433,10 @@ struct amdgpu_gfx {
uint32_t num_xcc_per_xcp;
struct mutex partition_mutex;
bool mcbp; /* mid command buffer preemption */
+
+ /* IP reg dump */
+ uint32_t *ip_dump;
+ uint32_t reg_count;
};
struct amdgpu_gfx_ras_reg_entry {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
index c7b44aeb67..103a837ccc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h
@@ -38,6 +38,8 @@ struct amdgpu_gfxhub_funcs {
void (*mode2_save_regs)(struct amdgpu_device *adev);
void (*mode2_restore_regs)(struct amdgpu_device *adev);
void (*halt)(struct amdgpu_device *adev);
+ bool (*query_utcl2_poison_status)(struct amdgpu_device *adev,
+ int xcc_id);
};
struct amdgpu_gfxhub {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 08b9dfb653..86b096ad03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -878,7 +878,6 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
struct amdgpu_gmc *gmc = &adev->gmc;
uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) ||
- gc_ver == IP_VERSION(9, 3, 0) ||
gc_ver == IP_VERSION(9, 4, 0) ||
gc_ver == IP_VERSION(9, 4, 1) ||
gc_ver == IP_VERSION(9, 4, 2) ||
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
index d79cb13e1a..00d6211e0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@@ -279,7 +279,7 @@ amdgpu_i2c_lookup(struct amdgpu_device *adev,
return NULL;
}
-static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
+static int amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
u8 slave_addr,
u8 addr,
u8 *val)
@@ -304,16 +304,18 @@ static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus,
out_buf[0] = addr;
out_buf[1] = 0;
- if (i2c_transfer(&i2c_bus->adapter, msgs, 2) == 2) {
- *val = in_buf[0];
- DRM_DEBUG("val = 0x%02x\n", *val);
- } else {
- DRM_DEBUG("i2c 0x%02x 0x%02x read failed\n",
- addr, *val);
+ if (i2c_transfer(&i2c_bus->adapter, msgs, 2) != 2) {
+ DRM_DEBUG("i2c 0x%02x read failed\n", addr);
+ return -EIO;
}
+
+ *val = in_buf[0];
+ DRM_DEBUG("val = 0x%02x\n", *val);
+
+ return 0;
}
-static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
+static int amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
u8 slave_addr,
u8 addr,
u8 val)
@@ -329,9 +331,12 @@ static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus,
out_buf[0] = addr;
out_buf[1] = val;
- if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1)
- DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n",
- addr, val);
+ if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1) {
+ DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n", addr, val);
+ return -EIO;
+ }
+
+ return 0;
}
/* ddc router switching */
@@ -346,16 +351,18 @@ amdgpu_i2c_router_select_ddc_port(const struct amdgpu_connector *amdgpu_connecto
if (!amdgpu_connector->router_bus)
return;
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x3, &val);
+ 0x3, &val))
+ return;
val &= ~amdgpu_connector->router.ddc_mux_control_pin;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
0x3, val);
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x1, &val);
+ 0x1, &val))
+ return;
val &= ~amdgpu_connector->router.ddc_mux_control_pin;
val |= amdgpu_connector->router.ddc_mux_state;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
@@ -375,16 +382,18 @@ amdgpu_i2c_router_select_cd_port(const struct amdgpu_connector *amdgpu_connector
if (!amdgpu_connector->router_bus)
return;
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x3, &val);
+ 0x3, &val))
+ return;
val &= ~amdgpu_connector->router.cd_mux_control_pin;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
0x3, val);
- amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
+ if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus,
amdgpu_connector->router.i2c_addr,
- 0x1, &val);
+ 0x1, &val))
+ return;
val &= ~amdgpu_connector->router.cd_mux_control_pin;
val |= amdgpu_connector->router.cd_mux_state;
amdgpu_i2c_put_byte(amdgpu_connector->router_bus,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 665c63f552..013ff373e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -279,7 +279,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
adev->irq.msi_enabled = false;
if (!amdgpu_msi_ok(adev))
- flags = PCI_IRQ_LEGACY;
+ flags = PCI_IRQ_INTX;
else
flags = PCI_IRQ_ALL_TYPES;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index a2df3025a7..a0ea6fe8d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -149,38 +149,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
goto out;
}
- adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
- if (amdgpu_device_supports_px(dev) &&
- (amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */
- adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
- dev_info(adev->dev, "Using ATPX for runtime pm\n");
- } else if (amdgpu_device_supports_boco(dev) &&
- (amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */
- adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
- dev_info(adev->dev, "Using BOCO for runtime pm\n");
- } else if (amdgpu_device_supports_baco(dev) &&
- (amdgpu_runtime_pm != 0)) {
- switch (adev->asic_type) {
- case CHIP_VEGA20:
- case CHIP_ARCTURUS:
- /* enable BACO as runpm mode if runpm=1 */
- if (amdgpu_runtime_pm > 0)
- adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
- break;
- case CHIP_VEGA10:
- /* enable BACO as runpm mode if noretry=0 */
- if (!adev->gmc.noretry)
- adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
- break;
- default:
- /* enable BACO as runpm mode on CI+ */
- adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
- break;
- }
-
- if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)
- dev_info(adev->dev, "Using BACO for runtime pm\n");
- }
+ amdgpu_device_detect_runtime_pm_mode(adev);
/* Call ACPI methods: require modeset init
* but failure is not fatal
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
index 24ad4b9717..0734490347 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
@@ -210,22 +210,26 @@ int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
return -EOPNOTSUPP;
}
-static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry)
+static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry,
+ struct ras_query_context *qctx)
{
- dev_info(adev->dev, HW_ERR "Accelerator Check Architecture events logged\n");
- dev_info(adev->dev, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n",
- idx, entry->regs[MCA_REG_IDX_STATUS]);
- dev_info(adev->dev, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n",
- idx, entry->regs[MCA_REG_IDX_ADDR]);
- dev_info(adev->dev, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n",
- idx, entry->regs[MCA_REG_IDX_MISC0]);
- dev_info(adev->dev, HW_ERR "aca entry[%02d].IPID=0x%016llx\n",
- idx, entry->regs[MCA_REG_IDX_IPID]);
- dev_info(adev->dev, HW_ERR "aca entry[%02d].SYND=0x%016llx\n",
- idx, entry->regs[MCA_REG_IDX_SYND]);
+ u64 event_id = qctx->event_id;
+
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_STATUS]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_ADDR]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_MISC0]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].IPID=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_IPID]);
+ RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].SYND=0x%016llx\n",
+ idx, entry->regs[MCA_REG_IDX_SYND]);
}
-int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data)
+int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct ras_err_data *err_data, struct ras_query_context *qctx)
{
struct amdgpu_smuio_mcm_config_info mcm_info;
struct ras_err_addr err_addr = {0};
@@ -244,7 +248,7 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_blo
list_for_each_entry(node, &mca_set.list, node) {
entry = &node->entry;
- amdgpu_mca_smu_mca_bank_dump(adev, i++, entry);
+ amdgpu_mca_smu_mca_bank_dump(adev, i++, entry, qctx);
count = 0;
ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
index b964110ed1..e5bf07ce34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h
@@ -169,6 +169,7 @@ void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root
void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set);
int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry);
void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set);
-int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data);
+int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
+ struct ras_err_data *err_data, struct ras_query_context *qctx);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 1569bef030..5ca5c47ab5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -32,6 +32,18 @@
#define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
#define AMDGPU_ONE_DOORBELL_SIZE 8
+signed long amdgpu_mes_fence_wait_polling(u64 *fence,
+ u64 wait_seq,
+ signed long timeout)
+{
+
+ while ((s64)(wait_seq - *fence) > 0 && timeout > 0) {
+ udelay(2);
+ timeout -= 2;
+ }
+ return timeout > 0 ? timeout : 0;
+}
+
int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
{
return roundup(AMDGPU_ONE_DOORBELL_SIZE *
@@ -40,7 +52,6 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
}
static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev,
- struct amdgpu_mes_process *process,
int ip_type, uint64_t *doorbell_index)
{
unsigned int offset, found;
@@ -65,7 +76,6 @@ static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev,
}
static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev,
- struct amdgpu_mes_process *process,
uint32_t doorbell_index)
{
unsigned int old, rel_index;
@@ -656,7 +666,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
*queue_id = queue->queue_id = r;
/* allocate a doorbell index for the queue */
- r = amdgpu_mes_kernel_doorbell_get(adev, gang->process,
+ r = amdgpu_mes_kernel_doorbell_get(adev,
qprops->queue_type,
&qprops->doorbell_off);
if (r)
@@ -714,8 +724,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
return 0;
clean_up_doorbell:
- amdgpu_mes_kernel_doorbell_free(adev, gang->process,
- qprops->doorbell_off);
+ amdgpu_mes_kernel_doorbell_free(adev, qprops->doorbell_off);
clean_up_queue_id:
spin_lock_irqsave(&adev->mes.queue_id_lock, flags);
idr_remove(&adev->mes.queue_id_idr, queue->queue_id);
@@ -769,8 +778,7 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
queue_id);
list_del(&queue->list);
- amdgpu_mes_kernel_doorbell_free(adev, gang->process,
- queue->doorbell_off);
+ amdgpu_mes_kernel_doorbell_free(adev, queue->doorbell_off);
amdgpu_mes_unlock(&adev->mes);
amdgpu_mes_queue_free_mqd(queue);
@@ -778,6 +786,28 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id)
return 0;
}
+int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ struct mes_map_legacy_queue_input queue_input;
+ int r;
+
+ memset(&queue_input, 0, sizeof(queue_input));
+
+ queue_input.queue_type = ring->funcs->type;
+ queue_input.doorbell_offset = ring->doorbell_index;
+ queue_input.pipe_id = ring->pipe;
+ queue_input.queue_id = ring->queue;
+ queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
+ queue_input.wptr_addr = ring->wptr_gpu_addr;
+
+ r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input);
+ if (r)
+ DRM_ERROR("failed to map legacy queue\n");
+
+ return r;
+}
+
int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
enum amdgpu_unmap_queues_action action,
@@ -1475,7 +1505,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
const struct mes_firmware_header_v1_0 *mes_hdr;
struct amdgpu_firmware_info *info;
char ucode_prefix[30];
- char fw_name[40];
+ char fw_name[50];
bool need_retry = false;
int r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 4c8fc3117e..df9f0404d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -141,6 +141,12 @@ struct amdgpu_mes {
/* ip specific functions */
const struct amdgpu_mes_funcs *funcs;
+
+ /* mes resource_1 bo*/
+ struct amdgpu_bo *resource_1;
+ uint64_t resource_1_gpu_addr;
+ void *resource_1_addr;
+
};
struct amdgpu_mes_process {
@@ -242,6 +248,15 @@ struct mes_remove_queue_input {
uint64_t gang_context_addr;
};
+struct mes_map_legacy_queue_input {
+ uint32_t queue_type;
+ uint32_t doorbell_offset;
+ uint32_t pipe_id;
+ uint32_t queue_id;
+ uint64_t mqd_addr;
+ uint64_t wptr_addr;
+};
+
struct mes_unmap_legacy_queue_input {
enum amdgpu_unmap_queues_action action;
uint32_t queue_type;
@@ -318,6 +333,9 @@ struct amdgpu_mes_funcs {
int (*remove_hw_queue)(struct amdgpu_mes *mes,
struct mes_remove_queue_input *input);
+ int (*map_legacy_queue)(struct amdgpu_mes *mes,
+ struct mes_map_legacy_queue_input *input);
+
int (*unmap_legacy_queue)(struct amdgpu_mes *mes,
struct mes_unmap_legacy_queue_input *input);
@@ -334,6 +352,10 @@ struct amdgpu_mes_funcs {
#define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev))
#define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev))
+signed long amdgpu_mes_fence_wait_polling(u64 *fence,
+ u64 wait_seq,
+ signed long timeout);
+
int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs);
int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe);
@@ -357,6 +379,8 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
int *queue_id);
int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id);
+int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
enum amdgpu_unmap_queues_action action,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index 1ca9d4ed80..95d676ee20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -63,6 +63,8 @@ struct amdgpu_mmhub_funcs {
uint64_t page_table_base);
void (*update_power_gating)(struct amdgpu_device *adev,
bool enable);
+ bool (*query_utcl2_poison_status)(struct amdgpu_device *adev,
+ int hub_inst);
};
struct amdgpu_mmhub {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index f6d503432a..c556c8b653 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -39,6 +39,7 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_vram_mgr.h"
/**
* DOC: amdgpu_object
@@ -153,8 +154,10 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
else
places[c].flags |= TTM_PL_FLAG_TOPDOWN;
- if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+ if (abo->tbo.type == ttm_bo_type_kernel &&
+ flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
+
c++;
}
@@ -173,6 +176,12 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ?
AMDGPU_PL_PREEMPT : TTM_PL_TT;
places[c].flags = 0;
+ /*
+ * When GTT is just an alternative to VRAM make sure that we
+ * only use it as fallback and still try to fill up VRAM first.
+ */
+ if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)
+ places[c].flags |= TTM_PL_FLAG_FALLBACK;
c++;
}
@@ -595,9 +604,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (!amdgpu_bo_support_uswc(bo->flags))
bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- if (adev->ras_enabled)
- bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
-
bo->tbo.bdev = &adev->mman.bdev;
if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA |
AMDGPU_GEM_DOMAIN_GDS))
@@ -629,7 +635,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
bo->tbo.resource->mem_type == TTM_PL_VRAM) {
struct dma_fence *fence;
- r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true);
+ r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence);
if (unlikely(r))
goto fail_unreserve;
@@ -759,7 +765,7 @@ int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
amdgpu_bo_size(shadow), NULL, fence,
- true, false, false);
+ true, false, 0);
}
/**
@@ -961,6 +967,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
if (!bo->placements[i].lpfn ||
(lpfn && lpfn < bo->placements[i].lpfn))
bo->placements[i].lpfn = lpfn;
+
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS &&
+ bo->placements[i].mem_type == TTM_PL_VRAM)
+ bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS;
}
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@@ -1366,8 +1376,9 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv)))
return;
- r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true);
+ r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true);
if (!WARN_ON(r)) {
+ amdgpu_vram_mgr_set_cleared(bo->resource);
amdgpu_bo_fence(abo, fence, false);
dma_fence_put(fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 94b310fdb7..cef9dd0a01 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -640,6 +640,20 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id cmd_id)
}
}
+static bool psp_err_warn(struct psp_context *psp)
+{
+ struct psp_gfx_cmd_resp *cmd = psp->cmd_buf_mem;
+
+ /* This response indicates reg list is already loaded */
+ if (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2) &&
+ cmd->cmd_id == GFX_CMD_ID_LOAD_IP_FW &&
+ cmd->cmd.cmd_load_ip_fw.fw_type == GFX_FW_TYPE_REG_LIST &&
+ cmd->resp.status == TEE_ERROR_CANCEL)
+ return false;
+
+ return true;
+}
+
static int
psp_cmd_submit_buf(struct psp_context *psp,
struct amdgpu_firmware_info *ucode,
@@ -699,10 +713,13 @@ psp_cmd_submit_buf(struct psp_context *psp,
dev_warn(psp->adev->dev,
"failed to load ucode %s(0x%X) ",
amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id);
- dev_warn(psp->adev->dev,
- "psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
- psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id,
- psp->cmd_buf_mem->resp.status);
+ if (psp_err_warn(psp))
+ dev_warn(
+ psp->adev->dev,
+ "psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
+ psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id),
+ psp->cmd_buf_mem->cmd_id,
+ psp->cmd_buf_mem->resp.status);
/* If any firmware (including CAP) load fails under SRIOV, it should
* return failure to stop the VF from initializing.
* Also return failure in case of timeout
@@ -1053,6 +1070,11 @@ static int psp_asd_initialize(struct psp_context *psp)
if (amdgpu_sriov_vf(psp->adev) || !psp->asd_context.bin_desc.size_bytes)
return 0;
+ /* bypass asd if display hardware is not available */
+ if (!amdgpu_device_has_display_hardware(psp->adev) &&
+ amdgpu_ip_version(psp->adev, MP0_HWIP, 0) >= IP_VERSION(13, 0, 10))
+ return 0;
+
psp->asd_context.mem_context.shared_mc_addr = 0;
psp->asd_context.mem_context.shared_mem_size = PSP_ASD_SHARED_MEM_SIZE;
psp->asd_context.ta_load_type = GFX_CMD_ID_LOAD_ASD;
@@ -2260,6 +2282,15 @@ static int psp_hw_start(struct psp_context *psp)
}
}
+ if ((is_psp_fw_valid(psp->ipkeymgr_drv)) &&
+ (psp->funcs->bootloader_load_ipkeymgr_drv != NULL)) {
+ ret = psp_bootloader_load_ipkeymgr_drv(psp);
+ if (ret) {
+ dev_err(adev->dev, "PSP load ipkeymgr_drv failed!\n");
+ return ret;
+ }
+ }
+
if ((is_psp_fw_valid(psp->sos)) &&
(psp->funcs->bootloader_load_sos != NULL)) {
ret = psp_bootloader_load_sos(psp);
@@ -2617,7 +2648,8 @@ static int psp_load_p2s_table(struct psp_context *psp)
struct amdgpu_firmware_info *ucode =
&adev->firmware.ucode[AMDGPU_UCODE_ID_P2S_TABLE];
- if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO))
+ if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)))
return 0;
if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) {
@@ -2647,7 +2679,8 @@ static int psp_load_smu_fw(struct psp_context *psp)
* Skip SMU FW reloading in case of using BACO for runpm only,
* as SMU is always alive.
*/
- if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO))
+ if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) ||
+ (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)))
return 0;
if (!ucode->fw || amdgpu_sriov_vf(psp->adev))
@@ -3273,6 +3306,12 @@ static int parse_sos_bin_descriptor(struct psp_context *psp,
psp->ras_drv.size_bytes = le32_to_cpu(desc->size_bytes);
psp->ras_drv.start_addr = ucode_start_addr;
break;
+ case PSP_FW_TYPE_PSP_IPKEYMGR_DRV:
+ psp->ipkeymgr_drv.fw_version = le32_to_cpu(desc->fw_version);
+ psp->ipkeymgr_drv.feature_version = le32_to_cpu(desc->fw_version);
+ psp->ipkeymgr_drv.size_bytes = le32_to_cpu(desc->size_bytes);
+ psp->ipkeymgr_drv.start_addr = ucode_start_addr;
+ break;
default:
dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type);
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index ee16f134ae..3635303e65 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -73,8 +73,10 @@ enum psp_bootloader_cmd {
PSP_BL__LOAD_KEY_DATABASE = 0x80000,
PSP_BL__LOAD_SOCDRV = 0xB0000,
PSP_BL__LOAD_DBGDRV = 0xC0000,
+ PSP_BL__LOAD_HADDRV = PSP_BL__LOAD_DBGDRV,
PSP_BL__LOAD_INTFDRV = 0xD0000,
- PSP_BL__LOAD_RASDRV = 0xE0000,
+ PSP_BL__LOAD_RASDRV = 0xE0000,
+ PSP_BL__LOAD_IPKEYMGRDRV = 0xF0000,
PSP_BL__DRAM_LONG_TRAIN = 0x100000,
PSP_BL__DRAM_SHORT_TRAIN = 0x200000,
PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000,
@@ -117,6 +119,7 @@ struct psp_funcs {
int (*bootloader_load_intf_drv)(struct psp_context *psp);
int (*bootloader_load_dbg_drv)(struct psp_context *psp);
int (*bootloader_load_ras_drv)(struct psp_context *psp);
+ int (*bootloader_load_ipkeymgr_drv)(struct psp_context *psp);
int (*bootloader_load_sos)(struct psp_context *psp);
int (*ring_create)(struct psp_context *psp,
enum psp_ring_type ring_type);
@@ -336,6 +339,7 @@ struct psp_context {
struct psp_bin_desc intf_drv;
struct psp_bin_desc dbg_drv;
struct psp_bin_desc ras_drv;
+ struct psp_bin_desc ipkeymgr_drv;
/* tmr buffer */
struct amdgpu_bo *tmr_bo;
@@ -424,6 +428,9 @@ struct amdgpu_psp_funcs {
#define psp_bootloader_load_ras_drv(psp) \
((psp)->funcs->bootloader_load_ras_drv ? \
(psp)->funcs->bootloader_load_ras_drv((psp)) : 0)
+#define psp_bootloader_load_ipkeymgr_drv(psp) \
+ ((psp)->funcs->bootloader_load_ipkeymgr_drv ? \
+ (psp)->funcs->bootloader_load_ipkeymgr_drv((psp)) : 0)
#define psp_bootloader_load_sos(psp) \
((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0)
#define psp_smu_reload_quirk(psp) \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 8ebab6f22e..1adc81a557 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -122,6 +122,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block)
#define MAX_UMC_POISON_POLLING_TIME_ASYNC 100 //ms
+#define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100 //ms
+
enum amdgpu_ras_retire_page_reservation {
AMDGPU_RAS_RETIRE_PAGE_RESERVED,
AMDGPU_RAS_RETIRE_PAGE_PENDING,
@@ -1045,6 +1047,7 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d
static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
struct ras_manager *ras_mgr,
struct ras_err_data *err_data,
+ struct ras_query_context *qctx,
const char *blk_name,
bool is_ue,
bool is_de)
@@ -1052,27 +1055,28 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
struct amdgpu_smuio_mcm_config_info *mcm_info;
struct ras_err_node *err_node;
struct ras_err_info *err_info;
+ u64 event_id = qctx->event_id;
if (is_ue) {
for_each_ras_error(err_node, err_data) {
err_info = &err_node->err_info;
mcm_info = &err_info->mcm_info;
if (err_info->ue_count) {
- dev_info(adev->dev, "socket: %d, die: %d, "
- "%lld new uncorrectable hardware errors detected in %s block\n",
- mcm_info->socket_id,
- mcm_info->die_id,
- err_info->ue_count,
- blk_name);
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld new uncorrectable hardware errors detected in %s block\n",
+ mcm_info->socket_id,
+ mcm_info->die_id,
+ err_info->ue_count,
+ blk_name);
}
}
for_each_ras_error(err_node, &ras_mgr->err_data) {
err_info = &err_node->err_info;
mcm_info = &err_info->mcm_info;
- dev_info(adev->dev, "socket: %d, die: %d, "
- "%lld uncorrectable hardware errors detected in total in %s block\n",
- mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name);
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld uncorrectable hardware errors detected in total in %s block\n",
+ mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name);
}
} else {
@@ -1081,44 +1085,44 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
err_info = &err_node->err_info;
mcm_info = &err_info->mcm_info;
if (err_info->de_count) {
- dev_info(adev->dev, "socket: %d, die: %d, "
- "%lld new deferred hardware errors detected in %s block\n",
- mcm_info->socket_id,
- mcm_info->die_id,
- err_info->de_count,
- blk_name);
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld new deferred hardware errors detected in %s block\n",
+ mcm_info->socket_id,
+ mcm_info->die_id,
+ err_info->de_count,
+ blk_name);
}
}
for_each_ras_error(err_node, &ras_mgr->err_data) {
err_info = &err_node->err_info;
mcm_info = &err_info->mcm_info;
- dev_info(adev->dev, "socket: %d, die: %d, "
- "%lld deferred hardware errors detected in total in %s block\n",
- mcm_info->socket_id, mcm_info->die_id,
- err_info->de_count, blk_name);
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld deferred hardware errors detected in total in %s block\n",
+ mcm_info->socket_id, mcm_info->die_id,
+ err_info->de_count, blk_name);
}
} else {
for_each_ras_error(err_node, err_data) {
err_info = &err_node->err_info;
mcm_info = &err_info->mcm_info;
if (err_info->ce_count) {
- dev_info(adev->dev, "socket: %d, die: %d, "
- "%lld new correctable hardware errors detected in %s block\n",
- mcm_info->socket_id,
- mcm_info->die_id,
- err_info->ce_count,
- blk_name);
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld new correctable hardware errors detected in %s block\n",
+ mcm_info->socket_id,
+ mcm_info->die_id,
+ err_info->ce_count,
+ blk_name);
}
}
for_each_ras_error(err_node, &ras_mgr->err_data) {
err_info = &err_node->err_info;
mcm_info = &err_info->mcm_info;
- dev_info(adev->dev, "socket: %d, die: %d, "
- "%lld correctable hardware errors detected in total in %s block\n",
- mcm_info->socket_id, mcm_info->die_id,
- err_info->ce_count, blk_name);
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, "
+ "%lld correctable hardware errors detected in total in %s block\n",
+ mcm_info->socket_id, mcm_info->die_id,
+ err_info->ce_count, blk_name);
}
}
}
@@ -1131,77 +1135,79 @@ static inline bool err_data_has_source_info(struct ras_err_data *data)
static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev,
struct ras_query_if *query_if,
- struct ras_err_data *err_data)
+ struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
{
struct ras_manager *ras_mgr = amdgpu_ras_find_obj(adev, &query_if->head);
const char *blk_name = get_ras_block_str(&query_if->head);
+ u64 event_id = qctx->event_id;
if (err_data->ce_count) {
if (err_data_has_source_info(err_data)) {
- amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data,
+ amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
blk_name, false, false);
} else if (!adev->aid_mask &&
adev->smuio.funcs &&
adev->smuio.funcs->get_socket_id &&
adev->smuio.funcs->get_die_id) {
- dev_info(adev->dev, "socket: %d, die: %d "
- "%ld correctable hardware errors "
- "detected in %s block\n",
- adev->smuio.funcs->get_socket_id(adev),
- adev->smuio.funcs->get_die_id(adev),
- ras_mgr->err_data.ce_count,
- blk_name);
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
+ "%ld correctable hardware errors "
+ "detected in %s block\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ ras_mgr->err_data.ce_count,
+ blk_name);
} else {
- dev_info(adev->dev, "%ld correctable hardware errors "
- "detected in %s block\n",
- ras_mgr->err_data.ce_count,
- blk_name);
+ RAS_EVENT_LOG(adev, event_id, "%ld correctable hardware errors "
+ "detected in %s block\n",
+ ras_mgr->err_data.ce_count,
+ blk_name);
}
}
if (err_data->ue_count) {
if (err_data_has_source_info(err_data)) {
- amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data,
+ amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
blk_name, true, false);
} else if (!adev->aid_mask &&
adev->smuio.funcs &&
adev->smuio.funcs->get_socket_id &&
adev->smuio.funcs->get_die_id) {
- dev_info(adev->dev, "socket: %d, die: %d "
- "%ld uncorrectable hardware errors "
- "detected in %s block\n",
- adev->smuio.funcs->get_socket_id(adev),
- adev->smuio.funcs->get_die_id(adev),
- ras_mgr->err_data.ue_count,
- blk_name);
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
+ "%ld uncorrectable hardware errors "
+ "detected in %s block\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ ras_mgr->err_data.ue_count,
+ blk_name);
} else {
- dev_info(adev->dev, "%ld uncorrectable hardware errors "
- "detected in %s block\n",
- ras_mgr->err_data.ue_count,
- blk_name);
+ RAS_EVENT_LOG(adev, event_id, "%ld uncorrectable hardware errors "
+ "detected in %s block\n",
+ ras_mgr->err_data.ue_count,
+ blk_name);
}
}
if (err_data->de_count) {
if (err_data_has_source_info(err_data)) {
- amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data,
+ amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx,
blk_name, false, true);
} else if (!adev->aid_mask &&
adev->smuio.funcs &&
adev->smuio.funcs->get_socket_id &&
adev->smuio.funcs->get_die_id) {
- dev_info(adev->dev, "socket: %d, die: %d "
- "%ld deferred hardware errors "
- "detected in %s block\n",
- adev->smuio.funcs->get_socket_id(adev),
- adev->smuio.funcs->get_die_id(adev),
- ras_mgr->err_data.de_count,
- blk_name);
+ RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d "
+ "%ld deferred hardware errors "
+ "detected in %s block\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ ras_mgr->err_data.de_count,
+ blk_name);
} else {
- dev_info(adev->dev, "%ld deferred hardware errors "
- "detected in %s block\n",
- ras_mgr->err_data.de_count,
- blk_name);
+ RAS_EVENT_LOG(adev, event_id, "%ld deferred hardware errors "
+ "detected in %s block\n",
+ ras_mgr->err_data.de_count,
+ blk_name);
}
}
}
@@ -1244,6 +1250,10 @@ int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
{
struct ras_manager *obj;
+ /* in resume phase, no need to create aca fs node */
+ if (adev->in_suspend || amdgpu_in_reset(adev))
+ return 0;
+
obj = get_ras_manager(adev, blk);
if (!obj)
return -EINVAL;
@@ -1265,7 +1275,8 @@ int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk)
}
static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
- enum aca_error_type type, struct ras_err_data *err_data)
+ enum aca_error_type type, struct ras_err_data *err_data,
+ struct ras_query_context *qctx)
{
struct ras_manager *obj;
@@ -1273,7 +1284,7 @@ static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu
if (!obj)
return -EINVAL;
- return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data);
+ return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data, qctx);
}
ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr,
@@ -1287,13 +1298,14 @@ ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *a
if (amdgpu_ras_query_error_status(obj->adev, &info))
return -EINVAL;
- return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
- "ce", info.ce_count);
+ return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count,
+ "ce", info.ce_count, "de", info.ue_count);
}
static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
struct ras_query_if *info,
struct ras_err_data *err_data,
+ struct ras_query_context *qctx,
unsigned int error_query_mode)
{
enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT;
@@ -1329,17 +1341,21 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
}
} else {
if (amdgpu_aca_is_enabled(adev)) {
- ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data);
+ ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data, qctx);
+ if (ret)
+ return ret;
+
+ ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data, qctx);
if (ret)
return ret;
- ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data);
+ ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data, qctx);
if (ret)
return ret;
} else {
/* FIXME: add code to check return value later */
- amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data);
- amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data);
+ amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data, qctx);
+ amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data, qctx);
}
}
@@ -1351,6 +1367,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_i
{
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
struct ras_err_data err_data;
+ struct ras_query_context qctx;
unsigned int error_query_mode;
int ret;
@@ -1364,8 +1381,12 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_i
if (!amdgpu_ras_get_error_query_mode(adev, &error_query_mode))
return -EINVAL;
+ memset(&qctx, 0, sizeof(qctx));
+ qctx.event_id = amdgpu_ras_acquire_event_id(adev, amdgpu_ras_intr_triggered() ?
+ RAS_EVENT_TYPE_ISR : RAS_EVENT_TYPE_INVALID);
ret = amdgpu_ras_query_error_status_helper(adev, info,
&err_data,
+ &qctx,
error_query_mode);
if (ret)
goto out_fini_err_data;
@@ -1376,7 +1397,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_i
info->ce_count = obj->err_data.ce_count;
info->de_count = obj->err_data.de_count;
- amdgpu_ras_error_generate_report(adev, info, &err_data);
+ amdgpu_ras_error_generate_report(adev, info, &err_data, &qctx);
out_fini_err_data:
amdgpu_ras_error_data_fini(&err_data);
@@ -2041,7 +2062,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
}
}
- amdgpu_umc_poison_handler(adev, obj->head.block, false);
+ amdgpu_umc_poison_handler(adev, obj->head.block, 0);
if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption)
poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
@@ -2061,6 +2082,17 @@ static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj
{
dev_info(obj->adev->dev,
"Poison is created\n");
+
+ if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
+ struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev);
+
+ amdgpu_ras_put_poison_req(obj->adev,
+ AMDGPU_RAS_BLOCK__UMC, 0, NULL, NULL, false);
+
+ atomic_inc(&con->page_retirement_req_cnt);
+
+ wake_up(&con->page_retirement_wq);
+ }
}
static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
@@ -2371,7 +2403,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,
.flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED,
};
status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr,
- data->bps[i].retired_page);
+ data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT);
if (status == -EBUSY)
(*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING;
else if (status == -ENOENT)
@@ -2384,6 +2416,19 @@ out:
return ret;
}
+static void amdgpu_ras_set_fed_all(struct amdgpu_device *adev,
+ struct amdgpu_hive_info *hive, bool status)
+{
+ struct amdgpu_device *tmp_adev;
+
+ if (hive) {
+ list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
+ amdgpu_ras_set_fed(tmp_adev, status);
+ } else {
+ amdgpu_ras_set_fed(adev, status);
+ }
+}
+
static void amdgpu_ras_do_recovery(struct work_struct *work)
{
struct amdgpu_ras *ras =
@@ -2393,8 +2438,21 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
struct list_head device_list, *device_list_handle = NULL;
struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
- if (hive)
+ if (hive) {
atomic_set(&hive->ras_recovery, 1);
+
+ /* If any device which is part of the hive received RAS fatal
+ * error interrupt, set fatal error status on all. This
+ * condition will need a recovery, and flag will be cleared
+ * as part of recovery.
+ */
+ list_for_each_entry(remote_adev, &hive->device_list,
+ gmc.xgmi.head)
+ if (amdgpu_ras_get_fed_status(remote_adev)) {
+ amdgpu_ras_set_fed_all(adev, hive, true);
+ break;
+ }
+ }
if (!ras->disable_ras_err_cnt_harvest) {
/* Build list of devices to query RAS related errors */
@@ -2439,18 +2497,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET;
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
- /* For any RAS error that needs a full reset to
- * recover, set the fatal error status
- */
- if (hive) {
- list_for_each_entry(remote_adev,
- &hive->device_list,
- gmc.xgmi.head)
- amdgpu_ras_set_fed(remote_adev,
- true);
- } else {
- amdgpu_ras_set_fed(adev, true);
- }
psp_fatal_error_recovery_quirk(&adev->psp);
}
}
@@ -2516,9 +2562,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev,
goto out;
}
- amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
- bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT,
- AMDGPU_GPU_PAGE_SIZE);
+ amdgpu_ras_reserve_page(adev, bps[i].retired_page);
memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps));
data->count++;
@@ -2674,10 +2718,167 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
}
}
+int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset)
+{
+ int ret = 0;
+ struct ras_poison_msg poison_msg;
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ memset(&poison_msg, 0, sizeof(poison_msg));
+ poison_msg.block = block;
+ poison_msg.pasid = pasid;
+ poison_msg.reset = reset;
+ poison_msg.pasid_fn = pasid_fn;
+ poison_msg.data = data;
+
+ ret = kfifo_put(&con->poison_fifo, poison_msg);
+ if (!ret) {
+ dev_err(adev->dev, "Poison message fifo is full!\n");
+ return -ENOSPC;
+ }
+
+ return 0;
+}
+
+static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev,
+ struct ras_poison_msg *poison_msg)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+
+ return kfifo_get(&con->poison_fifo, poison_msg);
+}
+
+static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log)
+{
+ mutex_init(&ecc_log->lock);
+
+ /* Set any value as siphash key */
+ memset(&ecc_log->ecc_key, 0xad, sizeof(ecc_log->ecc_key));
+
+ INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL);
+ ecc_log->de_updated = false;
+}
+
+static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log)
+{
+ struct radix_tree_iter iter;
+ void __rcu **slot;
+ struct ras_ecc_err *ecc_err;
+
+ mutex_lock(&ecc_log->lock);
+ radix_tree_for_each_slot(slot, &ecc_log->de_page_tree, &iter, 0) {
+ ecc_err = radix_tree_deref_slot(slot);
+ kfree(ecc_err->err_pages.pfn);
+ kfree(ecc_err);
+ radix_tree_iter_delete(&ecc_log->de_page_tree, &iter, slot);
+ }
+ mutex_unlock(&ecc_log->lock);
+
+ mutex_destroy(&ecc_log->lock);
+ ecc_log->de_updated = false;
+}
+
+static void amdgpu_ras_do_page_retirement(struct work_struct *work)
+{
+ struct amdgpu_ras *con = container_of(work, struct amdgpu_ras,
+ page_retirement_dwork.work);
+ struct amdgpu_device *adev = con->adev;
+ struct ras_err_data err_data;
+
+ if (amdgpu_in_reset(adev) || atomic_read(&con->in_recovery))
+ return;
+
+ amdgpu_ras_error_data_init(&err_data);
+
+ amdgpu_umc_handle_bad_pages(adev, &err_data);
+
+ amdgpu_ras_error_data_fini(&err_data);
+
+ mutex_lock(&con->umc_ecc_log.lock);
+ if (radix_tree_tagged(&con->umc_ecc_log.de_page_tree,
+ UMC_ECC_NEW_DETECTED_TAG))
+ schedule_delayed_work(&con->page_retirement_dwork,
+ msecs_to_jiffies(AMDGPU_RAS_RETIRE_PAGE_INTERVAL));
+ mutex_unlock(&con->umc_ecc_log.lock);
+}
+
+static int amdgpu_ras_query_ecc_status(struct amdgpu_device *adev,
+ enum amdgpu_ras_block ras_block, uint32_t timeout_ms)
+{
+ int ret = 0;
+ struct ras_ecc_log_info *ecc_log;
+ struct ras_query_if info;
+ uint32_t timeout = timeout_ms;
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+
+ memset(&info, 0, sizeof(info));
+ info.head.block = ras_block;
+
+ ecc_log = &ras->umc_ecc_log;
+ ecc_log->de_updated = false;
+ do {
+ ret = amdgpu_ras_query_error_status(adev, &info);
+ if (ret) {
+ dev_err(adev->dev, "Failed to query ras error! ret:%d\n", ret);
+ return ret;
+ }
+
+ if (timeout && !ecc_log->de_updated) {
+ msleep(1);
+ timeout--;
+ }
+ } while (timeout && !ecc_log->de_updated);
+
+ if (timeout_ms && !timeout) {
+ dev_warn(adev->dev, "Can't find deferred error\n");
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
+ uint32_t timeout)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ int ret;
+
+ ret = amdgpu_ras_query_ecc_status(adev, AMDGPU_RAS_BLOCK__UMC, timeout);
+ if (!ret)
+ schedule_delayed_work(&con->page_retirement_dwork, 0);
+}
+
+static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
+ struct ras_poison_msg *poison_msg)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint32_t reset = poison_msg->reset;
+ uint16_t pasid = poison_msg->pasid;
+
+ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+
+ if (poison_msg->pasid_fn)
+ poison_msg->pasid_fn(adev, pasid, poison_msg->data);
+
+ if (reset) {
+ flush_delayed_work(&con->page_retirement_dwork);
+
+ con->gpu_reset_flags |= reset;
+ amdgpu_ras_reset_gpu(adev);
+ }
+
+ return 0;
+}
+
static int amdgpu_ras_page_retirement_thread(void *param)
{
struct amdgpu_device *adev = (struct amdgpu_device *)param;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_poison_msg poison_msg;
+ enum amdgpu_ras_block ras_block;
+ bool poison_creation_is_handled = false;
while (!kthread_should_stop()) {
@@ -2688,13 +2889,34 @@ static int amdgpu_ras_page_retirement_thread(void *param)
if (kthread_should_stop())
break;
- dev_info(adev->dev, "Start processing page retirement. request:%d\n",
- atomic_read(&con->page_retirement_req_cnt));
-
atomic_dec(&con->page_retirement_req_cnt);
- amdgpu_umc_bad_page_polling_timeout(adev,
- false, MAX_UMC_POISON_POLLING_TIME_ASYNC);
+ if (!amdgpu_ras_get_poison_req(adev, &poison_msg))
+ continue;
+
+ ras_block = poison_msg.block;
+
+ dev_info(adev->dev, "Start processing ras block %s(%d)\n",
+ ras_block_str(ras_block), ras_block);
+
+ if (ras_block == AMDGPU_RAS_BLOCK__UMC) {
+ amdgpu_ras_poison_creation_handler(adev,
+ MAX_UMC_POISON_POLLING_TIME_ASYNC);
+ poison_creation_is_handled = true;
+ } else {
+ /* poison_creation_is_handled:
+ * false: no poison creation interrupt, but it has poison
+ * consumption interrupt.
+ * true: It has poison creation interrupt at the beginning,
+ * but it has no poison creation interrupt later.
+ */
+ amdgpu_ras_poison_creation_handler(adev,
+ poison_creation_is_handled ?
+ 0 : MAX_UMC_POISON_POLLING_TIME_ASYNC);
+
+ amdgpu_ras_poison_consumption_handler(adev, &poison_msg);
+ poison_creation_is_handled = false;
+ }
}
return 0;
@@ -2763,6 +2985,8 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
}
}
+ mutex_init(&con->page_rsv_lock);
+ INIT_KFIFO(con->poison_fifo);
mutex_init(&con->page_retirement_lock);
init_waitqueue_head(&con->page_retirement_wq);
atomic_set(&con->page_retirement_req_cnt, 0);
@@ -2773,6 +2997,8 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n");
}
+ INIT_DELAYED_WORK(&con->page_retirement_dwork, amdgpu_ras_do_page_retirement);
+ amdgpu_ras_ecc_log_init(&con->umc_ecc_log);
#ifdef CONFIG_X86_MCE_AMD
if ((adev->asic_type == CHIP_ALDEBARAN) &&
(adev->gmc.xgmi.connected_to_cpu))
@@ -2813,8 +3039,14 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
atomic_set(&con->page_retirement_req_cnt, 0);
+ mutex_destroy(&con->page_rsv_lock);
+
cancel_work_sync(&con->recovery_work);
+ cancel_delayed_work_sync(&con->page_retirement_dwork);
+
+ amdgpu_ras_ecc_log_fini(&con->umc_ecc_log);
+
mutex_lock(&con->recovery_lock);
con->eh_data = NULL;
kfree(data->bps);
@@ -3036,6 +3268,35 @@ static int amdgpu_get_ras_schema(struct amdgpu_device *adev)
AMDGPU_RAS_ERROR__PARITY;
}
+static void ras_event_mgr_init(struct ras_event_manager *mgr)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(mgr->seqnos); i++)
+ atomic64_set(&mgr->seqnos[i], 0);
+}
+
+static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ struct amdgpu_hive_info *hive;
+
+ if (!ras)
+ return;
+
+ hive = amdgpu_get_xgmi_hive(adev);
+ ras->event_mgr = hive ? &hive->event_mgr : &ras->__event_mgr;
+
+ /* init event manager with node 0 on xgmi system */
+ if (!amdgpu_in_reset(adev)) {
+ if (!hive || adev->gmc.xgmi.node_id == 0)
+ ras_event_mgr_init(ras->event_mgr);
+ }
+
+ if (hive)
+ amdgpu_put_xgmi_hive(hive);
+}
+
int amdgpu_ras_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -3356,6 +3617,8 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return 0;
+ amdgpu_ras_event_mgr_init(adev);
+
if (amdgpu_aca_is_enabled(adev)) {
if (amdgpu_in_reset(adev))
r = amdgpu_aca_reset(adev);
@@ -3472,14 +3735,39 @@ void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status)
atomic_set(&ras->fed, !!status);
}
+bool amdgpu_ras_event_id_is_valid(struct amdgpu_device *adev, u64 id)
+{
+ return !(id & BIT_ULL(63));
+}
+
+u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type)
+{
+ struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ u64 id;
+
+ switch (type) {
+ case RAS_EVENT_TYPE_ISR:
+ id = (u64)atomic64_read(&ras->event_mgr->seqnos[type]);
+ break;
+ case RAS_EVENT_TYPE_INVALID:
+ default:
+ id = BIT_ULL(63) | 0ULL;
+ break;
+ }
+
+ return id;
+}
+
void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
{
if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ u64 event_id = (u64)atomic64_inc_return(&ras->event_mgr->seqnos[RAS_EVENT_TYPE_ISR]);
- dev_info(adev->dev, "uncorrectable hardware error"
- "(ERREVENT_ATHUB_INTERRUPT) detected!\n");
+ RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error"
+ "(ERREVENT_ATHUB_INTERRUPT) detected!\n");
+ amdgpu_ras_set_fed(adev, true);
ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET;
amdgpu_ras_reset_gpu(adev);
}
@@ -3998,6 +4286,8 @@ void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_a
{
struct ras_err_addr *mca_err_addr;
+ /* This function will be retired. */
+ return;
mca_err_addr = kzalloc(sizeof(*mca_err_addr), GFP_KERNEL);
if (!mca_err_addr)
return;
@@ -4195,3 +4485,19 @@ void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances)
amdgpu_ras_boot_time_error_reporting(adev, i, boot_error);
}
}
+
+int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ uint64_t start = pfn << AMDGPU_GPU_PAGE_SHIFT;
+ int ret = 0;
+
+ mutex_lock(&con->page_rsv_lock);
+ ret = amdgpu_vram_mgr_query_page_status(mgr, start);
+ if (ret == -ENOENT)
+ ret = amdgpu_vram_mgr_reserve_range(mgr, start, AMDGPU_GPU_PAGE_SIZE);
+ mutex_unlock(&con->page_rsv_lock);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index db9cb2b4e9..7021c4a66f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -26,6 +26,9 @@
#include <linux/debugfs.h>
#include <linux/list.h>
+#include <linux/kfifo.h>
+#include <linux/radix-tree.h>
+#include <linux/siphash.h>
#include "ta_ras_if.h"
#include "amdgpu_ras_eeprom.h"
#include "amdgpu_smuio.h"
@@ -64,6 +67,14 @@ struct amdgpu_iv_entry;
/* The high three bits indicates socketid */
#define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK)
+#define RAS_EVENT_LOG(_adev, _id, _fmt, ...) \
+do { \
+ if (amdgpu_ras_event_id_is_valid((_adev), (_id))) \
+ dev_info((_adev)->dev, "{%llu}" _fmt, (_id), ##__VA_ARGS__); \
+ else \
+ dev_info((_adev)->dev, _fmt, ##__VA_ARGS__); \
+} while (0)
+
enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__UMC = 0,
AMDGPU_RAS_BLOCK__SDMA,
@@ -419,6 +430,52 @@ struct umc_ecc_info {
int record_ce_addr_supported;
};
+enum ras_event_type {
+ RAS_EVENT_TYPE_INVALID = -1,
+ RAS_EVENT_TYPE_ISR = 0,
+ RAS_EVENT_TYPE_COUNT,
+};
+
+struct ras_event_manager {
+ atomic64_t seqnos[RAS_EVENT_TYPE_COUNT];
+};
+
+struct ras_query_context {
+ enum ras_event_type type;
+ u64 event_id;
+};
+
+typedef int (*pasid_notify)(struct amdgpu_device *adev,
+ uint16_t pasid, void *data);
+
+struct ras_poison_msg {
+ enum amdgpu_ras_block block;
+ uint16_t pasid;
+ uint32_t reset;
+ pasid_notify pasid_fn;
+ void *data;
+};
+
+struct ras_err_pages {
+ uint32_t count;
+ uint64_t *pfn;
+};
+
+struct ras_ecc_err {
+ u64 hash_index;
+ uint64_t status;
+ uint64_t ipid;
+ uint64_t addr;
+ struct ras_err_pages err_pages;
+};
+
+struct ras_ecc_log_info {
+ struct mutex lock;
+ siphash_key_t ecc_key;
+ struct radix_tree_root de_page_tree;
+ bool de_updated;
+};
+
struct amdgpu_ras {
/* ras infrastructure */
/* for ras itself. */
@@ -477,8 +534,18 @@ struct amdgpu_ras {
wait_queue_head_t page_retirement_wq;
struct mutex page_retirement_lock;
atomic_t page_retirement_req_cnt;
+ struct mutex page_rsv_lock;
+ DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128);
+ struct ras_ecc_log_info umc_ecc_log;
+ struct delayed_work page_retirement_dwork;
+
/* Fatal error detected flag */
atomic_t fed;
+
+ /* RAS event manager */
+ struct ras_event_manager __event_mgr;
+ struct ras_event_manager *event_mgr;
+
};
struct ras_fs_data {
@@ -512,6 +579,7 @@ struct ras_err_data {
unsigned long de_count;
unsigned long err_addr_cnt;
struct eeprom_table_record *err_addr;
+ unsigned long err_addr_len;
u32 err_list_count;
struct list_head err_node_list;
};
@@ -879,4 +947,13 @@ void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info,
void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status);
bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev);
+bool amdgpu_ras_event_id_is_valid(struct amdgpu_device *adev, u64 id);
+u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type);
+
+int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn);
+
+int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index b12808c0c3..06a62a8a99 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -404,6 +404,22 @@ static int amdgpu_ras_eeprom_correct_header_tag(
return res;
}
+static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control *control)
+{
+ struct amdgpu_device *adev = to_amdgpu_device(control);
+ struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr;
+
+ switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) {
+ case IP_VERSION(8, 10, 0):
+ case IP_VERSION(12, 0, 0):
+ hdr->version = RAS_TABLE_VER_V2_1;
+ return;
+ default:
+ hdr->version = RAS_TABLE_VER_V1;
+ return;
+ }
+}
+
/**
* amdgpu_ras_eeprom_reset_table -- Reset the RAS EEPROM table
* @control: pointer to control structure
@@ -423,11 +439,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control)
mutex_lock(&control->ras_tbl_mutex);
hdr->header = RAS_TABLE_HDR_VAL;
- if (adev->umc.ras &&
- adev->umc.ras->set_eeprom_table_version)
- adev->umc.ras->set_eeprom_table_version(hdr);
- else
- hdr->version = RAS_TABLE_VER_V1;
+ amdgpu_ras_set_eeprom_table_version(control);
if (hdr->version == RAS_TABLE_VER_V2_1) {
hdr->first_rec_offset = RAS_RECORD_START_V2_1;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index 381101d2bf..50fcd86e10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
}
}
+/**
+ * amdgpu_res_cleared - check if blocks are cleared
+ *
+ * @cur: the cursor to extract the block
+ *
+ * Check if the @cur block is cleared
+ */
+static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur)
+{
+ struct drm_buddy_block *block;
+
+ switch (cur->mem_type) {
+ case TTM_PL_VRAM:
+ block = cur->node;
+
+ if (!amdgpu_vram_mgr_is_cleared(block))
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index 147100c27c..ea4873f6cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -21,9 +21,6 @@
*
*/
-#include <linux/devcoredump.h>
-#include <generated/utsrelease.h>
-
#include "amdgpu_reset.h"
#include "aldebaran.h"
#include "sienna_cichlid.h"
@@ -161,105 +158,3 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain)
atomic_set(&reset_domain->in_gpu_reset, 0);
up_write(&reset_domain->sem);
}
-
-#ifndef CONFIG_DEV_COREDUMP
-void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
- struct amdgpu_reset_context *reset_context)
-{
-}
-#else
-static ssize_t
-amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
- void *data, size_t datalen)
-{
- struct drm_printer p;
- struct amdgpu_coredump_info *coredump = data;
- struct drm_print_iterator iter;
- int i;
-
- iter.data = buffer;
- iter.offset = 0;
- iter.start = offset;
- iter.remain = count;
-
- p = drm_coredump_printer(&iter);
-
- drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
- drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n");
- drm_printf(&p, "kernel: " UTS_RELEASE "\n");
- drm_printf(&p, "module: " KBUILD_MODNAME "\n");
- drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec,
- coredump->reset_time.tv_nsec);
-
- if (coredump->reset_task_info.pid)
- drm_printf(&p, "process_name: %s PID: %d\n",
- coredump->reset_task_info.process_name,
- coredump->reset_task_info.pid);
-
- if (coredump->ring) {
- drm_printf(&p, "\nRing timed out details\n");
- drm_printf(&p, "IP Type: %d Ring Name: %s\n",
- coredump->ring->funcs->type,
- coredump->ring->name);
- }
-
- if (coredump->reset_vram_lost)
- drm_printf(&p, "VRAM is lost due to GPU reset!\n");
- if (coredump->adev->reset_info.num_regs) {
- drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n");
-
- for (i = 0; i < coredump->adev->reset_info.num_regs; i++)
- drm_printf(&p, "0x%08x: 0x%08x\n",
- coredump->adev->reset_info.reset_dump_reg_list[i],
- coredump->adev->reset_info.reset_dump_reg_value[i]);
- }
-
- return count - iter.remain;
-}
-
-static void amdgpu_devcoredump_free(void *data)
-{
- kfree(data);
-}
-
-void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
- struct amdgpu_reset_context *reset_context)
-{
- struct amdgpu_coredump_info *coredump;
- struct drm_device *dev = adev_to_drm(adev);
- struct amdgpu_job *job = reset_context->job;
- struct drm_sched_job *s_job;
-
- coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
-
- if (!coredump) {
- DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__);
- return;
- }
-
- coredump->reset_vram_lost = vram_lost;
-
- if (reset_context->job && reset_context->job->vm) {
- struct amdgpu_task_info *ti;
- struct amdgpu_vm *vm = reset_context->job->vm;
-
- ti = amdgpu_vm_get_task_info_vm(vm);
- if (ti) {
- coredump->reset_task_info = *ti;
- amdgpu_vm_put_task_info(ti);
- }
- }
-
- if (job) {
- s_job = &job->base;
- coredump->ring = to_amdgpu_ring(s_job->sched);
- }
-
- coredump->adev = adev;
-
- ktime_get_ts64(&coredump->reset_time);
-
- dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
- amdgpu_devcoredump_read, amdgpu_devcoredump_free);
-}
-#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 60522963aa..b11d190ece 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -32,6 +32,7 @@ enum AMDGPU_RESET_FLAGS {
AMDGPU_NEED_FULL_RESET = 0,
AMDGPU_SKIP_HW_RESET = 1,
+ AMDGPU_SKIP_COREDUMP = 2,
};
struct amdgpu_reset_context {
@@ -88,19 +89,6 @@ struct amdgpu_reset_domain {
atomic_t reset_res;
};
-#ifdef CONFIG_DEV_COREDUMP
-
-#define AMDGPU_COREDUMP_VERSION "1"
-
-struct amdgpu_coredump_info {
- struct amdgpu_device *adev;
- struct amdgpu_task_info reset_task_info;
- struct timespec64 reset_time;
- bool reset_vram_lost;
- struct amdgpu_ring *ring;
-};
-#endif
-
int amdgpu_reset_init(struct amdgpu_device *adev);
int amdgpu_reset_fini(struct amdgpu_device *adev);
@@ -141,9 +129,6 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);
-void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost,
- struct amdgpu_reset_context *reset_context);
-
#define for_each_handler(i, handler, reset_ctl) \
for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \
(handler = (*reset_ctl->reset_handlers)[i]); \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 173a2a3080..b51a82e711 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -132,7 +132,7 @@ struct amdgpu_buffer_funcs {
uint64_t dst_offset,
/* number of byte to transfer */
uint32_t byte_count,
- bool tmz);
+ uint32_t copy_flags);
/* maximum bytes in a single operation */
uint32_t fill_max_bytes;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
index ff44351810..ec9d12f85f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
@@ -44,6 +44,7 @@ struct amdgpu_smuio_funcs {
u32 (*get_socket_id)(struct amdgpu_device *adev);
enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev);
bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
+ u64 (*get_gpu_clock_counter)(struct amdgpu_device *adev);
};
struct amdgpu_smuio {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index f539b1d002..383fce40d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -178,10 +178,10 @@ TRACE_EVENT(amdgpu_cs_ioctl,
TP_fast_assign(
__entry->sched_job_id = job->base.id;
- __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job));
+ __assign_str(timeline);
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __assign_str(ring, to_amdgpu_ring(job->base.sched)->name);
+ __assign_str(ring);
__entry->num_ibs = job->num_ibs;
),
TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
@@ -203,10 +203,10 @@ TRACE_EVENT(amdgpu_sched_run_job,
TP_fast_assign(
__entry->sched_job_id = job->base.id;
- __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job));
+ __assign_str(timeline);
__entry->context = job->base.s_fence->finished.context;
__entry->seqno = job->base.s_fence->finished.seqno;
- __assign_str(ring, to_amdgpu_ring(job->base.sched)->name);
+ __assign_str(ring);
__entry->num_ibs = job->num_ibs;
),
TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
@@ -231,7 +231,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
TP_fast_assign(
__entry->pasid = vm->pasid;
- __assign_str(ring, ring->name);
+ __assign_str(ring);
__entry->vmid = job->vmid;
__entry->vm_hub = ring->vm_hub,
__entry->pd_addr = job->vm_pd_addr;
@@ -425,7 +425,7 @@ TRACE_EVENT(amdgpu_vm_flush,
),
TP_fast_assign(
- __assign_str(ring, ring->name);
+ __assign_str(ring);
__entry->vmid = vmid;
__entry->vm_hub = ring->vm_hub;
__entry->pd_addr = pd_addr;
@@ -526,7 +526,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
),
TP_fast_assign(
- __assign_str(ring, sched_job->base.sched->name);
+ __assign_str(ring);
__entry->id = sched_job->base.id;
__entry->fence = fence;
__entry->ctx = fence->context;
@@ -554,21 +554,6 @@ TRACE_EVENT(amdgpu_reset_reg_dumps,
__entry->value)
);
-TRACE_EVENT(amdgpu_runpm_reference_dumps,
- TP_PROTO(uint32_t index, const char *func),
- TP_ARGS(index, func),
- TP_STRUCT__entry(
- __field(uint32_t, index)
- __string(func, func)
- ),
- TP_fast_assign(
- __entry->index = index;
- __assign_str(func, func);
- ),
- TP_printk("amdgpu runpm reference dump 0x%x: 0x%s\n",
- __entry->index,
- __get_str(func))
-);
#undef AMDGPU_JOB_GET_TIMELINE_NAME
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 29c197c000..e785f12841 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -236,7 +236,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
- dst_addr, num_bytes, false);
+ dst_addr, num_bytes, 0);
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
@@ -296,6 +296,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
struct dma_fence *fence = NULL;
int r = 0;
+ uint32_t copy_flags = 0;
+
if (!adev->mman.buffer_funcs_enabled) {
DRM_ERROR("Trying to move memory with ring turned off.\n");
return -EINVAL;
@@ -323,8 +325,11 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
if (r)
goto error;
- r = amdgpu_copy_buffer(ring, from, to, cur_size,
- resv, &next, false, true, tmz);
+ if (tmz)
+ copy_flags |= AMDGPU_COPY_FLAGS_TMZ;
+
+ r = amdgpu_copy_buffer(ring, from, to, cur_size, resv,
+ &next, false, true, copy_flags);
if (r)
goto error;
@@ -378,11 +383,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
- r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence,
- false);
+ r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence,
+ false);
if (r) {
goto error;
} else if (wipe_fence) {
+ amdgpu_vram_mgr_set_cleared(bo->resource);
dma_fence_put(fence);
fence = wipe_fence;
}
@@ -1492,7 +1498,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
swap(src_addr, dst_addr);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
- PAGE_SIZE, false);
+ PAGE_SIZE, 0);
amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
@@ -2143,7 +2149,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
- bool vm_needs_flush, bool tmz)
+ bool vm_needs_flush, uint32_t copy_flags)
{
struct amdgpu_device *adev = ring->adev;
unsigned int num_loops, num_dw;
@@ -2169,8 +2175,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
- dst_offset, cur_size_in_bytes, tmz);
-
+ dst_offset, cur_size_in_bytes, copy_flags);
src_offset += cur_size_in_bytes;
dst_offset += cur_size_in_bytes;
byte_count -= cur_size_in_bytes;
@@ -2230,6 +2235,71 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
return 0;
}
+/**
+ * amdgpu_ttm_clear_buffer - clear memory buffers
+ * @bo: amdgpu buffer object
+ * @resv: reservation object
+ * @fence: dma_fence associated with the operation
+ *
+ * Clear the memory buffer resource.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+ struct dma_resv *resv,
+ struct dma_fence **fence)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct amdgpu_res_cursor cursor;
+ u64 addr;
+ int r;
+
+ if (!adev->mman.buffer_funcs_enabled)
+ return -EINVAL;
+
+ if (!fence)
+ return -EINVAL;
+
+ *fence = dma_fence_get_stub();
+
+ amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
+
+ mutex_lock(&adev->mman.gtt_window_lock);
+ while (cursor.remaining) {
+ struct dma_fence *next = NULL;
+ u64 size;
+
+ if (amdgpu_res_cleared(&cursor)) {
+ amdgpu_res_next(&cursor, cursor.size);
+ continue;
+ }
+
+ /* Never clear more than 256MiB at once to avoid timeouts */
+ size = min(cursor.size, 256ULL << 20);
+
+ r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor,
+ 1, ring, false, &size, &addr);
+ if (r)
+ goto err;
+
+ r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv,
+ &next, true, true);
+ if (r)
+ goto err;
+
+ dma_fence_put(*fence);
+ *fence = next;
+
+ amdgpu_res_next(&cursor, size);
+ }
+err:
+ mutex_unlock(&adev->mman.gtt_window_lock);
+
+ return r;
+}
+
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
struct dma_resv *resv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 32cf6b6f6e..b6f53129de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -38,8 +38,6 @@
#define AMDGPU_GTT_MAX_TRANSFER_SIZE 512
#define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2
-#define AMDGPU_POISON 0xd0bed0be
-
extern const struct attribute_group amdgpu_vram_mgr_attr_group;
extern const struct attribute_group amdgpu_gtt_mgr_attr_group;
@@ -111,6 +109,8 @@ struct amdgpu_copy_mem {
unsigned long offset;
};
+#define AMDGPU_COPY_FLAGS_TMZ (1 << 0)
+
int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size);
void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev);
int amdgpu_preempt_mgr_init(struct amdgpu_device *adev);
@@ -151,13 +151,16 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
- bool vm_needs_flush, bool tmz);
+ bool vm_needs_flush, uint32_t copy_flags);
int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
const struct amdgpu_copy_mem *src,
const struct amdgpu_copy_mem *dst,
uint64_t size, bool tmz,
struct dma_resv *resv,
struct dma_fence **f);
+int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo,
+ struct dma_resv *resv,
+ struct dma_fence **fence);
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t src_data,
struct dma_resv *resv,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 6194457600..105d4de061 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -125,6 +125,7 @@ enum psp_fw_type {
PSP_FW_TYPE_PSP_INTF_DRV,
PSP_FW_TYPE_PSP_DBG_DRV,
PSP_FW_TYPE_PSP_RAS_DRV,
+ PSP_FW_TYPE_PSP_IPKEYMGR_DRV,
PSP_FW_TYPE_MAX_INDEX,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 6f7451e3ee..540e0f066b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -21,10 +21,13 @@
*
*/
+#include <linux/sort.h>
#include "amdgpu.h"
#include "umc_v6_7.h"
#define MAX_UMC_POISON_POLLING_TIME_SYNC 20 //ms
+#define MAX_UMC_HASH_STRING_SIZE 256
+
static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev,
struct ras_err_data *err_data, uint64_t err_addr,
uint32_t ch_inst, uint32_t umc_inst)
@@ -63,6 +66,8 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
goto out_fini_err_data;
}
+ err_data.err_addr_len = adev->umc.max_ras_err_cnt_per_query;
+
/*
* Translate UMC channel address to Physical address
*/
@@ -86,7 +91,7 @@ out_fini_err_data:
return ret;
}
-static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
+void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
@@ -118,6 +123,8 @@ static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
if(!err_data->err_addr)
dev_warn(adev->dev, "Failed to alloc memory for "
"umc error address record!\n");
+ else
+ err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query;
/* umc query_ras_error_address is also responsible for clearing
* error status
@@ -143,6 +150,8 @@ static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
if(!err_data->err_addr)
dev_warn(adev->dev, "Failed to alloc memory for "
"umc error address record!\n");
+ else
+ err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query;
/* umc query_ras_error_address is also responsible for clearing
* error status
@@ -178,7 +187,7 @@ static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
void *ras_error_status,
struct amdgpu_iv_entry *entry,
- bool reset)
+ uint32_t reset)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -187,9 +196,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
amdgpu_umc_handle_bad_pages(adev, ras_error_status);
if (err_data->ue_count && reset) {
- /* use mode-2 reset for poison consumption */
- if (!entry)
- con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ con->gpu_reset_flags |= reset;
amdgpu_ras_reset_gpu(adev);
}
@@ -197,7 +204,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
}
int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev,
- bool reset, uint32_t timeout_ms)
+ uint32_t reset, uint32_t timeout_ms)
{
struct ras_err_data err_data;
struct ras_common_if head = {
@@ -239,16 +246,16 @@ int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev,
if (reset) {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- /* use mode-2 reset for poison consumption */
- con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ con->gpu_reset_flags |= reset;
amdgpu_ras_reset_gpu(adev);
}
return 0;
}
-int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
- enum amdgpu_ras_block block, bool reset)
+int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset)
{
int ret = AMDGPU_RAS_SUCCESS;
@@ -286,16 +293,14 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
amdgpu_ras_error_data_fini(&err_data);
} else {
- if (reset) {
- amdgpu_umc_bad_page_polling_timeout(adev,
- reset, MAX_UMC_POISON_POLLING_TIME_SYNC);
- } else {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ amdgpu_ras_put_poison_req(adev,
+ block, pasid, pasid_fn, data, reset);
+
atomic_inc(&con->page_retirement_req_cnt);
wake_up(&con->page_retirement_wq);
- }
}
} else {
if (adev->virt.ops && adev->virt.ops->ras_poison_handler)
@@ -308,11 +313,19 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
return ret;
}
+int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint32_t reset)
+{
+ return amdgpu_umc_pasid_poison_handler(adev,
+ block, 0, NULL, NULL, reset);
+}
+
int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
void *ras_error_status,
struct amdgpu_iv_entry *entry)
{
- return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true);
+ return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry,
+ AMDGPU_RAS_GPU_RESET_MODE1_RESET);
}
int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev)
@@ -389,14 +402,20 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
return 0;
}
-void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
uint64_t err_addr,
uint64_t retired_page,
uint32_t channel_index,
uint32_t umc_inst)
{
- struct eeprom_table_record *err_rec =
- &err_data->err_addr[err_data->err_addr_cnt];
+ struct eeprom_table_record *err_rec;
+
+ if (!err_data ||
+ !err_data->err_addr ||
+ (err_data->err_addr_cnt >= err_data->err_addr_len))
+ return -EINVAL;
+
+ err_rec = &err_data->err_addr[err_data->err_addr_cnt];
err_rec->address = err_addr;
/* page frame address is saved */
@@ -408,6 +427,8 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++;
+
+ return 0;
}
int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
@@ -440,3 +461,76 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
return 0;
}
+
+int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr)
+{
+ if (adev->umc.ras->update_ecc_status)
+ return adev->umc.ras->update_ecc_status(adev,
+ status, ipid, addr);
+ return 0;
+}
+
+static int amdgpu_umc_uint64_cmp(const void *a, const void *b)
+{
+ uint64_t *addr_a = (uint64_t *)a;
+ uint64_t *addr_b = (uint64_t *)b;
+
+ if (*addr_a > *addr_b)
+ return 1;
+ else if (*addr_a < *addr_b)
+ return -1;
+ else
+ return 0;
+}
+
+/* Use string hash to avoid logging the same bad pages repeatedly */
+int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev,
+ uint64_t *pfns, int len, uint64_t *val)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ char buf[MAX_UMC_HASH_STRING_SIZE] = {0};
+ int offset = 0, i = 0;
+ uint64_t hash_val;
+
+ if (!pfns || !len)
+ return -EINVAL;
+
+ sort(pfns, len, sizeof(uint64_t), amdgpu_umc_uint64_cmp, NULL);
+
+ for (i = 0; i < len; i++)
+ offset += snprintf(&buf[offset], sizeof(buf) - offset, "%llx", pfns[i]);
+
+ hash_val = siphash(buf, offset, &con->umc_ecc_log.ecc_key);
+
+ *val = hash_val;
+
+ return 0;
+}
+
+int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
+ struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_ecc_log_info *ecc_log;
+ int ret;
+
+ ecc_log = &con->umc_ecc_log;
+
+ mutex_lock(&ecc_log->lock);
+ ret = radix_tree_insert(ecc_tree, ecc_err->hash_index, ecc_err);
+ if (!ret) {
+ struct ras_err_pages *err_pages = &ecc_err->err_pages;
+ int i;
+
+ /* Reserve memory */
+ for (i = 0; i < err_pages->count; i++)
+ amdgpu_ras_reserve_page(adev, err_pages->pfn[i]);
+
+ radix_tree_tag_set(ecc_tree,
+ ecc_err->hash_index, UMC_ECC_NEW_DETECTED_TAG);
+ }
+ mutex_unlock(&ecc_log->lock);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index 26d2ae498d..5f50c69c3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -52,6 +52,8 @@
#define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \
LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
+/* Page retirement tag */
+#define UMC_ECC_NEW_DETECTED_TAG 0x1
typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst,
uint32_t umc_inst, uint32_t ch_inst, void *data);
@@ -66,8 +68,8 @@ struct amdgpu_umc_ras {
void *ras_error_status);
bool (*check_ecc_err_status)(struct amdgpu_device *adev,
enum amdgpu_mca_error_type type, void *ras_error_status);
- /* support different eeprom table version for different asic */
- void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header *hdr);
+ int (*update_ecc_status)(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr);
};
struct amdgpu_umc_funcs {
@@ -103,11 +105,14 @@ struct amdgpu_umc {
int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
- enum amdgpu_ras_block block, bool reset);
+ enum amdgpu_ras_block block, uint32_t reset);
+int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
+ enum amdgpu_ras_block block, uint16_t pasid,
+ pasid_notify pasid_fn, void *data, uint32_t reset);
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
-void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
uint64_t err_addr,
uint64_t retired_page,
uint32_t channel_index,
@@ -123,5 +128,15 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
umc_func func, void *data);
int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev,
- bool reset, uint32_t timeout_ms);
+ uint32_t reset, uint32_t timeout_ms);
+
+int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr);
+int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev,
+ uint64_t *pfns, int len, uint64_t *val);
+int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,
+ struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err);
+
+void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
+ void *ras_error_status);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
index f7c73533e3..e01c1c8e64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c
@@ -878,6 +878,8 @@ static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = {
.hw_fini = umsch_mm_hw_fini,
.suspend = umsch_mm_suspend,
.resume = umsch_mm_resume,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 9c514a606a..677eb14155 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -93,7 +93,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work);
int amdgpu_vcn_early_init(struct amdgpu_device *adev)
{
- char ucode_prefix[30];
+ char ucode_prefix[25];
char fw_name[40];
int r, i;
@@ -185,7 +185,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
- if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) {
+ if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(5, 0, 0)) {
+ fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared));
+ log_offset = offsetof(struct amdgpu_vcn5_fw_shared, fw_log);
+ } else if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) {
fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared));
log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log);
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index a418393d89..9f06def236 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -454,6 +454,16 @@ struct amdgpu_vcn_rb_metadata {
uint8_t pad[26];
};
+struct amdgpu_vcn5_fw_shared {
+ uint32_t present_flag_0;
+ uint8_t pad[12];
+ struct amdgpu_fw_shared_unified_queue_struct sq;
+ uint8_t pad1[8];
+ struct amdgpu_fw_shared_fw_logging fw_log;
+ struct amdgpu_fw_shared_rb_setup rb_setup;
+ uint8_t pad2[4];
+};
+
#define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80
#define VCN_BLOCK_DECODE_DISABLE_MASK 0x40
#define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 7a4eae3677..54ab51a4ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -32,6 +32,7 @@
#include "amdgpu.h"
#include "amdgpu_ras.h"
+#include "amdgpu_reset.h"
#include "vi.h"
#include "soc15.h"
#include "nv.h"
@@ -424,7 +425,7 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
return -EINVAL;
if (pf2vf_info->size > 1024) {
- DRM_ERROR("invalid pf2vf message size\n");
+ dev_err(adev->dev, "invalid pf2vf message size: 0x%x\n", pf2vf_info->size);
return -EINVAL;
}
@@ -435,7 +436,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
adev->virt.fw_reserve.checksum_key, checksum);
if (checksum != checkval) {
- DRM_ERROR("invalid pf2vf message\n");
+ dev_err(adev->dev,
+ "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n",
+ checksum, checkval);
return -EINVAL;
}
@@ -449,7 +452,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
0, checksum);
if (checksum != checkval) {
- DRM_ERROR("invalid pf2vf message\n");
+ dev_err(adev->dev,
+ "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n",
+ checksum, checkval);
return -EINVAL;
}
@@ -485,7 +490,7 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid;
break;
default:
- DRM_ERROR("invalid pf2vf version\n");
+ dev_err(adev->dev, "invalid pf2vf version: 0x%x\n", pf2vf_info->version);
return -EINVAL;
}
@@ -571,6 +576,11 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
vf2pf_info->decode_usage = 0;
vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr;
+ vf2pf_info->mes_info_addr = (uint64_t)adev->mes.resource_1_gpu_addr;
+
+ if (adev->mes.resource_1) {
+ vf2pf_info->mes_info_size = adev->mes.resource_1->tbo.base.size;
+ }
vf2pf_info->checksum =
amd_sriov_msg_checksum(
vf2pf_info, vf2pf_info->header.size, 0, 0);
@@ -584,8 +594,22 @@ static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work)
int ret;
ret = amdgpu_virt_read_pf2vf_data(adev);
- if (ret)
+ if (ret) {
+ adev->virt.vf2pf_update_retry_cnt++;
+ if ((adev->virt.vf2pf_update_retry_cnt >= AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) &&
+ amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) {
+ amdgpu_ras_set_fed(adev, true);
+ if (amdgpu_reset_domain_schedule(adev->reset_domain,
+ &adev->virt.flr_work))
+ return;
+ else
+ dev_err(adev->dev, "Failed to queue work! at %s", __func__);
+ }
+
goto out;
+ }
+
+ adev->virt.vf2pf_update_retry_cnt = 0;
amdgpu_virt_write_vf2pf_data(adev);
out:
@@ -606,6 +630,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
adev->virt.fw_reserve.p_pf2vf = NULL;
adev->virt.fw_reserve.p_vf2pf = NULL;
adev->virt.vf2pf_update_interval_ms = 0;
+ adev->virt.vf2pf_update_retry_cnt = 0;
if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
@@ -705,12 +730,6 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
}
- if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
- /* VF MMIO access (except mailbox range) from CPU
- * will be blocked during sriov runtime
- */
- adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT;
-
/* we have the ability to check now */
if (amdgpu_sriov_vf(adev)) {
switch (adev->asic_type) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 3f59b7b552..642f1fd287 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -52,6 +52,8 @@
/* tonga/fiji use this offset */
#define mmBIF_IOV_FUNC_IDENTIFIER 0x1503
+#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 5
+
enum amdgpu_sriov_vf_mode {
SRIOV_VF_MODE_BARE_METAL = 0,
SRIOV_VF_MODE_ONE_VF,
@@ -130,6 +132,8 @@ enum AMDGIM_FEATURE_FLAG {
AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6),
/* VCN RB decouple */
AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7),
+ /* MES info */
+ AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8),
};
enum AMDGIM_REG_ACCESS_FLAG {
@@ -257,6 +261,7 @@ struct amdgpu_virt {
/* vf2pf message */
struct delayed_work vf2pf_work;
uint32_t vf2pf_update_interval_ms;
+ int vf2pf_update_retry_cnt;
/* multimedia bandwidth config */
bool is_mm_bw_enabled;
@@ -332,6 +337,8 @@ static inline bool is_virtual_machine(void)
((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT)
#define amdgpu_sriov_is_vcn_rb_decouple(adev) \
((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE)
+#define amdgpu_sriov_is_mes_info_enable(adev) \
+ ((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE)
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
void amdgpu_virt_init_setting(struct amdgpu_device *adev);
int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index ba6d1876ce..fde66225c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -672,6 +672,8 @@ static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = {
.soft_reset = amdgpu_vkms_soft_reset,
.set_clockgating_state = amdgpu_vkms_set_clockgating_state,
.set_powergating_state = amdgpu_vkms_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 94089069c9..0f71060664 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -434,7 +434,7 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm)
if (!vm)
return result;
- result += vm->generation;
+ result += lower_32_bits(vm->generation);
/* Add one if the page tables will be re-generated on next CS */
if (drm_sched_entity_error(&vm->delayed))
++result;
@@ -463,13 +463,14 @@ int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int (*validate)(void *p, struct amdgpu_bo *bo),
void *param)
{
+ uint64_t new_vm_generation = amdgpu_vm_generation(adev, vm);
struct amdgpu_vm_bo_base *bo_base;
struct amdgpu_bo *shadow;
struct amdgpu_bo *bo;
int r;
- if (drm_sched_entity_error(&vm->delayed)) {
- ++vm->generation;
+ if (vm->generation != new_vm_generation) {
+ vm->generation = new_vm_generation;
amdgpu_vm_bo_reset_state_machine(vm);
amdgpu_vm_fini_entities(vm);
r = amdgpu_vm_init_entities(adev, vm);
@@ -886,6 +887,44 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
}
/**
+ * amdgpu_vm_tlb_flush - prepare TLB flush
+ *
+ * @params: parameters for update
+ * @fence: input fence to sync TLB flush with
+ * @tlb_cb: the callback structure
+ *
+ * Increments the tlb sequence to make sure that future CS execute a VM flush.
+ */
+static void
+amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
+ struct dma_fence **fence,
+ struct amdgpu_vm_tlb_seq_struct *tlb_cb)
+{
+ struct amdgpu_vm *vm = params->vm;
+
+ if (!fence || !*fence)
+ return;
+
+ tlb_cb->vm = vm;
+ if (!dma_fence_add_callback(*fence, &tlb_cb->cb,
+ amdgpu_vm_tlb_seq_cb)) {
+ dma_fence_put(vm->last_tlb_flush);
+ vm->last_tlb_flush = dma_fence_get(*fence);
+ } else {
+ amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
+ }
+
+ /* Prepare a TLB flush fence to be attached to PTs */
+ if (!params->unlocked && vm->is_compute_context) {
+ amdgpu_vm_tlb_fence_create(params->adev, vm, fence);
+
+ /* Makes sure no PD/PT is freed before the flush */
+ dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence,
+ DMA_RESV_USAGE_BOOKKEEP);
+ }
+}
+
+/**
* amdgpu_vm_update_range - update a range in the vm page table
*
* @adev: amdgpu_device pointer to use for commands
@@ -916,8 +955,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct ttm_resource *res, dma_addr_t *pages_addr,
struct dma_fence **fence)
{
- struct amdgpu_vm_update_params params;
struct amdgpu_vm_tlb_seq_struct *tlb_cb;
+ struct amdgpu_vm_update_params params;
struct amdgpu_res_cursor cursor;
enum amdgpu_sync_mode sync_mode;
int r, idx;
@@ -927,8 +966,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
if (!tlb_cb) {
- r = -ENOMEM;
- goto error_unlock;
+ drm_dev_exit(idx);
+ return -ENOMEM;
}
/* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
@@ -948,7 +987,9 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
params.immediate = immediate;
params.pages_addr = pages_addr;
params.unlocked = unlocked;
+ params.needs_flush = flush_tlb;
params.allow_override = allow_override;
+ INIT_LIST_HEAD(&params.tlb_flush_waitlist);
/* Implicitly sync to command submissions in the same VM before
* unmapping. Sync to moving fences before mapping.
@@ -1031,24 +1072,18 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
r = vm->update_funcs->commit(&params, fence);
+ if (r)
+ goto error_free;
- if (flush_tlb || params.table_freed) {
- tlb_cb->vm = vm;
- if (fence && *fence &&
- !dma_fence_add_callback(*fence, &tlb_cb->cb,
- amdgpu_vm_tlb_seq_cb)) {
- dma_fence_put(vm->last_tlb_flush);
- vm->last_tlb_flush = dma_fence_get(*fence);
- } else {
- amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
- }
+ if (params.needs_flush) {
+ amdgpu_vm_tlb_flush(&params, fence, tlb_cb);
tlb_cb = NULL;
}
+ amdgpu_vm_pt_free_list(adev, &params);
+
error_free:
kfree(tlb_cb);
-
-error_unlock:
amdgpu_vm_eviction_unlock(vm);
drm_dev_exit(idx);
return r;
@@ -2407,10 +2442,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->last_update = dma_fence_get_stub();
vm->last_unlocked = dma_fence_get_stub();
vm->last_tlb_flush = dma_fence_get_stub();
- vm->generation = 0;
+ vm->generation = amdgpu_vm_generation(adev, NULL);
mutex_init(&vm->eviction_lock);
vm->evicting = false;
+ vm->tlb_fence_context = dma_fence_context_alloc(1);
r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
false, &root, xcp_id);
@@ -2944,6 +2980,14 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
if (vm && status) {
vm->fault_info.addr = addr;
vm->fault_info.status = status;
+ /*
+ * Update the fault information globally for later usage
+ * when vm could be stale or freed.
+ */
+ adev->vm_manager.fault_info.addr = addr;
+ adev->vm_manager.fault_info.vmhub = vmhub;
+ adev->vm_manager.fault_info.status = status;
+
if (AMDGPU_IS_GFXHUB(vmhub)) {
vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX;
vm->fault_info.vmhub |=
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 047ec1930d..54d7da396d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -257,15 +257,20 @@ struct amdgpu_vm_update_params {
unsigned int num_dw_left;
/**
- * @table_freed: return true if page table is freed when updating
+ * @needs_flush: true whenever we need to invalidate the TLB
*/
- bool table_freed;
+ bool needs_flush;
/**
* @allow_override: true for memory that is not uncached: allows MTYPE
* to be overridden for NUMA local memory.
*/
bool allow_override;
+
+ /**
+ * @tlb_flush_waitlist: temporary storage for BOs until tlb_flush
+ */
+ struct list_head tlb_flush_waitlist;
};
struct amdgpu_vm_update_funcs {
@@ -342,6 +347,7 @@ struct amdgpu_vm {
atomic64_t tlb_seq;
struct dma_fence *last_tlb_flush;
atomic64_t kfd_last_flushed_seq;
+ uint64_t tlb_fence_context;
/* How many times we had to re-generate the page tables */
uint64_t generation;
@@ -422,6 +428,8 @@ struct amdgpu_vm_manager {
* look up VM of a page fault
*/
struct xarray pasids;
+ /* Global registration of recent page fault information */
+ struct amdgpu_vm_fault_info fault_info;
};
struct amdgpu_bo_va_mapping;
@@ -544,6 +552,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
uint64_t start, uint64_t end,
uint64_t dst, uint64_t flags);
void amdgpu_vm_pt_free_work(struct work_struct *work);
+void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
+ struct amdgpu_vm_update_params *params);
#if defined(CONFIG_DEBUG_FS)
void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
@@ -609,5 +619,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
uint64_t addr,
uint32_t status,
unsigned int vmhub);
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct dma_fence **fence);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index 6e31621452..3895bd7d17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -108,7 +108,9 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p,
struct dma_fence **fence)
{
- /* Flush HDP */
+ if (p->needs_flush)
+ atomic64_inc(&p->vm->tlb_seq);
+
mb();
amdgpu_device_flush_hdp(p->adev, NULL);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 124389a6bf..f07647a9a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -622,40 +622,58 @@ void amdgpu_vm_pt_free_work(struct work_struct *work)
}
/**
- * amdgpu_vm_pt_free_dfs - free PD/PT levels
+ * amdgpu_vm_pt_free_list - free PD/PT levels
*
* @adev: amdgpu device structure
- * @vm: amdgpu vm structure
- * @start: optional cursor where to start freeing PDs/PTs
- * @unlocked: vm resv unlock status
+ * @params: see amdgpu_vm_update_params definition
*
- * Free the page directory or page table level and all sub levels.
+ * Free the page directory objects saved in the flush list
*/
-static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *start,
- bool unlocked)
+void amdgpu_vm_pt_free_list(struct amdgpu_device *adev,
+ struct amdgpu_vm_update_params *params)
{
- struct amdgpu_vm_pt_cursor cursor;
- struct amdgpu_vm_bo_base *entry;
+ struct amdgpu_vm_bo_base *entry, *next;
+ struct amdgpu_vm *vm = params->vm;
+ bool unlocked = params->unlocked;
+
+ if (list_empty(&params->tlb_flush_waitlist))
+ return;
if (unlocked) {
spin_lock(&vm->status_lock);
- for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
- list_move(&entry->vm_status, &vm->pt_freed);
-
- if (start)
- list_move(&start->entry->vm_status, &vm->pt_freed);
+ list_splice_init(&params->tlb_flush_waitlist, &vm->pt_freed);
spin_unlock(&vm->status_lock);
schedule_work(&vm->pt_free_work);
return;
}
- for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
+ list_for_each_entry_safe(entry, next, &params->tlb_flush_waitlist, vm_status)
amdgpu_vm_pt_free(entry);
+}
- if (start)
- amdgpu_vm_pt_free(start->entry);
+/**
+ * amdgpu_vm_pt_add_list - add PD/PT level to the flush list
+ *
+ * @params: parameters for the update
+ * @cursor: first PT entry to start DF search from, non NULL
+ *
+ * This list will be freed after TLB flush.
+ */
+static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ struct amdgpu_vm_pt_cursor seek;
+ struct amdgpu_vm_bo_base *entry;
+
+ spin_lock(&params->vm->status_lock);
+ for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) {
+ if (entry && entry->bo)
+ list_move(&entry->vm_status, &params->tlb_flush_waitlist);
+ }
+
+ /* enter start node now */
+ list_move(&cursor->entry->vm_status, &params->tlb_flush_waitlist);
+ spin_unlock(&params->vm->status_lock);
}
/**
@@ -667,7 +685,13 @@ static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
*/
void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
- amdgpu_vm_pt_free_dfs(adev, vm, NULL, false);
+ struct amdgpu_vm_pt_cursor cursor;
+ struct amdgpu_vm_bo_base *entry;
+
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) {
+ if (entry)
+ amdgpu_vm_pt_free(entry);
+ }
}
/**
@@ -682,11 +706,15 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
struct amdgpu_vm_bo_base *entry)
{
struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry);
- struct amdgpu_bo *bo = parent->bo, *pbo;
+ struct amdgpu_bo *bo, *pbo;
struct amdgpu_vm *vm = params->vm;
uint64_t pde, pt, flags;
unsigned int level;
+ if (WARN_ON(!parent))
+ return -EINVAL;
+
+ bo = parent->bo;
for (level = 0, pbo = bo->parent; pbo; ++level)
pbo = pbo->parent;
@@ -972,10 +1000,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
while (cursor.pfn < frag_start) {
/* Make sure previous mapping is freed */
if (cursor.entry->bo) {
- params->table_freed = true;
- amdgpu_vm_pt_free_dfs(adev, params->vm,
- &cursor,
- params->unlocked);
+ params->needs_flush = true;
+ amdgpu_vm_pt_add_list(params, &cursor);
}
amdgpu_vm_pt_next(adev, &cursor);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 349416e176..66e8a01612 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -126,6 +126,10 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
WARN_ON(ib->length_dw == 0);
amdgpu_ring_pad_ib(ring, ib);
+
+ if (p->needs_flush)
+ atomic64_inc(&p->vm->tlb_seq);
+
WARN_ON(ib->length_dw > p->num_dw_left);
f = amdgpu_job_submit(p->job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
new file mode 100644
index 0000000000..51cddfa3f1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/workqueue.h>
+
+#include "amdgpu.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_gmc.h"
+
+struct amdgpu_tlb_fence {
+ struct dma_fence base;
+ struct amdgpu_device *adev;
+ struct dma_fence *dependency;
+ struct work_struct work;
+ spinlock_t lock;
+ uint16_t pasid;
+
+};
+
+static const char *amdgpu_tlb_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "amdgpu tlb fence";
+}
+
+static const char *amdgpu_tlb_fence_get_timeline_name(struct dma_fence *f)
+{
+ return "amdgpu tlb timeline";
+}
+
+static void amdgpu_tlb_fence_work(struct work_struct *work)
+{
+ struct amdgpu_tlb_fence *f = container_of(work, typeof(*f), work);
+ int r;
+
+ if (f->dependency) {
+ dma_fence_wait(f->dependency, false);
+ dma_fence_put(f->dependency);
+ f->dependency = NULL;
+ }
+
+ r = amdgpu_gmc_flush_gpu_tlb_pasid(f->adev, f->pasid, 2, true, 0);
+ if (r) {
+ dev_err(f->adev->dev, "TLB flush failed for PASID %d.\n",
+ f->pasid);
+ dma_fence_set_error(&f->base, r);
+ }
+
+ dma_fence_signal(&f->base);
+ dma_fence_put(&f->base);
+}
+
+static const struct dma_fence_ops amdgpu_tlb_fence_ops = {
+ .use_64bit_seqno = true,
+ .get_driver_name = amdgpu_tlb_fence_get_driver_name,
+ .get_timeline_name = amdgpu_tlb_fence_get_timeline_name
+};
+
+void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct dma_fence **fence)
+{
+ struct amdgpu_tlb_fence *f;
+
+ f = kmalloc(sizeof(*f), GFP_KERNEL);
+ if (!f) {
+ /*
+ * We can't fail since the PDEs and PTEs are already updated, so
+ * just block for the dependency and execute the TLB flush
+ */
+ if (*fence)
+ dma_fence_wait(*fence, false);
+
+ amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, 2, true, 0);
+ *fence = dma_fence_get_stub();
+ return;
+ }
+
+ f->adev = adev;
+ f->dependency = *fence;
+ f->pasid = vm->pasid;
+ INIT_WORK(&f->work, amdgpu_tlb_fence_work);
+ spin_lock_init(&f->lock);
+
+ dma_fence_init(&f->base, &amdgpu_tlb_fence_ops, &f->lock,
+ vm->tlb_fence_context, atomic64_read(&vm->tlb_seq));
+
+ /* TODO: We probably need a separate wq here */
+ dma_fence_get(&f->base);
+ schedule_work(&f->work);
+
+ *fence = &f->base;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 8db8802443..6c30eceec8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
u64 vis_usage = 0, max_bytes, min_block_size;
struct amdgpu_vram_mgr_resource *vres;
u64 size, remaining_size, lpfn, fpfn;
@@ -468,7 +469,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
if (tbo->type != ttm_bo_type_kernel)
max_bytes -= AMDGPU_VM_RESERVED_VRAM;
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
pages_per_block = ~0ul;
} else {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -477,7 +478,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
/* default to 2MB */
pages_per_block = 2UL << (20UL - PAGE_SHIFT);
#endif
- pages_per_block = max_t(uint32_t, pages_per_block,
+ pages_per_block = max_t(u32, pages_per_block,
tbo->page_alignment);
}
@@ -498,9 +499,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
if (place->flags & TTM_PL_FLAG_TOPDOWN)
vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS)
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+ if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
+ vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION;
+
if (fpfn || lpfn != mgr->mm.size)
/* Allocate blocks in desired range */
vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
@@ -514,21 +518,31 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
else
min_block_size = mgr->default_page_size;
- BUG_ON(min_block_size < mm->chunk_size);
-
/* Limit maximum size to 2GiB due to SG table limitations */
size = min(remaining_size, 2ULL << 30);
if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
- !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
+ !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
min_block_size = (u64)pages_per_block << PAGE_SHIFT;
+ BUG_ON(min_block_size < mm->chunk_size);
+
r = drm_buddy_alloc_blocks(mm, fpfn,
lpfn,
size,
min_block_size,
&vres->blocks,
vres->flags);
+
+ if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul &&
+ !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) {
+ vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION;
+ pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT),
+ tbo->page_alignment);
+
+ continue;
+ }
+
if (unlikely(r))
goto error_free_blocks;
@@ -571,7 +585,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
return 0;
error_free_blocks:
- drm_buddy_free_list(mm, &vres->blocks);
+ drm_buddy_free_list(mm, &vres->blocks, 0);
mutex_unlock(&mgr->lock);
error_fini:
ttm_resource_fini(man, &vres->base);
@@ -604,7 +618,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
amdgpu_vram_mgr_do_reserve(man);
- drm_buddy_free_list(mm, &vres->blocks);
+ drm_buddy_free_list(mm, &vres->blocks, vres->flags);
mutex_unlock(&mgr->lock);
atomic64_sub(vis_usage, &mgr->vis_usage);
@@ -912,7 +926,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
kfree(rsv);
list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
- drm_buddy_free_list(&mgr->mm, &rsv->allocated);
+ drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0);
kfree(rsv);
}
if (!adev->gmc.is_app_apu)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
index 0e04e42cf8..b256cbc2bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -53,10 +53,20 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
return (u64)PAGE_SIZE << drm_buddy_block_order(block);
}
+static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block)
+{
+ return drm_buddy_block_is_clear(block);
+}
+
static inline struct amdgpu_vram_mgr_resource *
to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
{
return container_of(res, struct amdgpu_vram_mgr_resource, base);
}
+static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res)
+{
+ to_amdgpu_vram_mgr_resource(res)->flags |= DRM_BUDDY_CLEARED;
+}
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 20d51f6c9b..dd2ec48cf5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -1035,15 +1035,16 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
return 0;
}
-static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type,
- struct aca_bank_report *report, void *data)
+static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
{
struct amdgpu_device *adev = handle->adev;
+ struct aca_bank_info info;
const char *error_str;
- u64 status;
+ u64 status, count;
int ret, ext_error_code;
- ret = aca_bank_info_decode(bank, &report->info);
+ ret = aca_bank_info_decode(bank, &info);
if (ret)
return ret;
@@ -1055,15 +1056,28 @@ static int xgmi_v6_4_0_aca_bank_generate_report(struct aca_handle *handle, struc
if (error_str)
dev_info(adev->dev, "%s detected\n", error_str);
- if ((type == ACA_ERROR_TYPE_UE && ext_error_code == 0) ||
- (type == ACA_ERROR_TYPE_CE && ext_error_code == 6))
- report->count[type] = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]);
+ count = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]);
- return 0;
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ if (ext_error_code != 0 && ext_error_code != 9)
+ count = 0ULL;
+
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count);
+ break;
+ case ACA_SMU_TYPE_CE:
+ count = ext_error_code == 6 ? count : 0ULL;
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, count);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return ret;
}
static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = {
- .aca_bank_generate_report = xgmi_v6_4_0_aca_bank_generate_report,
+ .aca_bank_parser = xgmi_v6_4_0_aca_bank_parser,
};
static const struct aca_info xgmi_v6_4_0_aca_info = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 1592c63b30..a3bfc16de6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -44,6 +44,7 @@ struct amdgpu_hive_info {
struct amdgpu_reset_domain *reset_domain;
atomic_t ras_recovery;
+ struct ras_event_manager event_mgr;
};
struct amdgpu_pcs_ras_field {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 51a14f6d93..fb2b394bb9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -94,7 +94,8 @@ union amd_sriov_msg_feature_flags {
uint32_t reg_indirect_acc : 1;
uint32_t av1_support : 1;
uint32_t vcn_rb_decouple : 1;
- uint32_t reserved : 24;
+ uint32_t mes_info_enable : 1;
+ uint32_t reserved : 23;
} flags;
uint32_t all;
};
@@ -157,7 +158,7 @@ struct amd_sriov_msg_pf2vf_info_header {
uint32_t reserved[2];
};
-#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (48)
+#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (49)
struct amd_sriov_msg_pf2vf_info {
/* header contains size and version */
struct amd_sriov_msg_pf2vf_info_header header;
@@ -208,6 +209,8 @@ struct amd_sriov_msg_pf2vf_info {
struct amd_sriov_msg_uuid_info uuid_info;
/* PCIE atomic ops support flag */
uint32_t pcie_atomic_ops_support_flags;
+ /* Portion of GPU memory occupied by VF. MAX value is 65535, but set to uint32_t to maintain alignment with reserved size */
+ uint32_t gpu_capacity;
/* reserved */
uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE];
};
@@ -221,7 +224,7 @@ struct amd_sriov_msg_vf2pf_info_header {
uint32_t reserved[2];
};
-#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (70)
+#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (73)
struct amd_sriov_msg_vf2pf_info {
/* header contains size and version */
struct amd_sriov_msg_vf2pf_info_header header;
@@ -265,7 +268,9 @@ struct amd_sriov_msg_vf2pf_info {
uint32_t version;
} ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE];
uint64_t dummy_page_addr;
-
+ /* FB allocated for guest MES to record UQ info */
+ uint64_t mes_info_addr;
+ uint32_t mes_info_size;
/* reserved */
uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE];
};
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
index fbb43ae762..d4e2aed2ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
@@ -422,7 +422,7 @@ __aqua_vanjaram_get_auto_mode(struct amdgpu_xcp_mgr *xcp_mgr)
if (adev->gmc.num_mem_partitions == num_xcc / 2)
return (adev->flags & AMD_IS_APU) ? AMDGPU_TPX_PARTITION_MODE :
- AMDGPU_QPX_PARTITION_MODE;
+ AMDGPU_CPX_PARTITION_MODE;
if (adev->gmc.num_mem_partitions == 2 && !(adev->flags & AMD_IS_APU))
return AMDGPU_DPX_PARTITION_MODE;
@@ -630,7 +630,7 @@ static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev)
int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
{
- u32 mask, inst_mask = adev->sdma.sdma_mask;
+ u32 mask, avail_inst, inst_mask = adev->sdma.sdma_mask;
int ret, i;
/* generally 1 AID supports 4 instances */
@@ -642,7 +642,9 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev)
for (mask = (1 << adev->sdma.num_inst_per_aid) - 1; inst_mask;
inst_mask >>= adev->sdma.num_inst_per_aid, ++i) {
- if ((inst_mask & mask) == mask)
+ avail_inst = inst_mask & mask;
+ if (avail_inst == mask || avail_inst == 0x3 ||
+ avail_inst == 0xc)
adev->aid_mask |= (1 << i);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 72362df352..d552e01335 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -1243,6 +1243,7 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index,
ectx.ps_size = params_size;
ectx.abort = false;
ectx.last_jump = 0;
+ ectx.last_jump_jiffies = 0;
if (ws) {
ectx.ws = kcalloc(4, ws, GFP_KERNEL);
ectx.ws_size = ws;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index a3a643254d..cf1d5d462b 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1375,14 +1375,14 @@ static int cik_asic_pci_config_reset(struct amdgpu_device *adev)
return r;
}
-static bool cik_asic_supports_baco(struct amdgpu_device *adev)
+static int cik_asic_supports_baco(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_BONAIRE:
case CHIP_HAWAII:
return amdgpu_dpm_is_baco_supported(adev);
default:
- return false;
+ return 0;
}
}
@@ -2210,6 +2210,8 @@ static const struct amd_ip_funcs cik_common_ip_funcs = {
.soft_reset = cik_common_soft_reset,
.set_clockgating_state = cik_common_set_clockgating_state,
.set_powergating_state = cik_common_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ip_block_version cik_common_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index f24e34dc33..576baa9dbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -435,6 +435,8 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = {
.soft_reset = cik_ih_soft_reset,
.set_clockgating_state = cik_ih_set_clockgating_state,
.set_powergating_state = cik_ih_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs cik_ih_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index a3fccc4c1f..6948ebda0f 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -1228,6 +1228,8 @@ static const struct amd_ip_funcs cik_sdma_ip_funcs = {
.soft_reset = cik_sdma_soft_reset,
.set_clockgating_state = cik_sdma_set_clockgating_state,
.set_powergating_state = cik_sdma_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
@@ -1290,7 +1292,7 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: is this a secure operation
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (CIK).
* Used by the amdgpu ttm implementation to move pages if
@@ -1300,7 +1302,7 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
ib->ptr[ib->length_dw++] = byte_count;
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index c19681492e..0726437873 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -433,6 +433,8 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = {
.soft_reset = cz_ih_soft_reset,
.set_clockgating_state = cz_ih_set_clockgating_state,
.set_powergating_state = cz_ih_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs cz_ih_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 221af054d8..b44fce44c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -3333,6 +3333,8 @@ static const struct amd_ip_funcs dce_v10_0_ip_funcs = {
.soft_reset = dce_v10_0_soft_reset,
.set_clockgating_state = dce_v10_0_set_clockgating_state,
.set_powergating_state = dce_v10_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static void
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 69e8b0db6c..80b2e7f79a 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -3464,6 +3464,8 @@ static const struct amd_ip_funcs dce_v11_0_ip_funcs = {
.soft_reset = dce_v11_0_soft_reset,
.set_clockgating_state = dce_v11_0_set_clockgating_state,
.set_powergating_state = dce_v11_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static void
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 60d40201fd..db20012600 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -3154,6 +3154,8 @@ static const struct amd_ip_funcs dce_v6_0_ip_funcs = {
.soft_reset = dce_v6_0_soft_reset,
.set_clockgating_state = dce_v6_0_set_clockgating_state,
.set_powergating_state = dce_v6_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static void
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 5a5fcc45e4..5b56100ec9 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -3242,6 +3242,8 @@ static const struct amd_ip_funcs dce_v8_0_ip_funcs = {
.soft_reset = dce_v8_0_soft_reset,
.set_clockgating_state = dce_v8_0_set_clockgating_state,
.set_powergating_state = dce_v8_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static void
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 701146d649..536287ddd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -276,6 +276,99 @@ MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin");
MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin");
+static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = {
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS3),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HPD_STATUS0),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSQG_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL0_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_CNTL),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_3),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_4),
+ SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_GPM_STAT_2),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SPP_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_A),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_B),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR),
+ SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST)
+};
+
static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100),
@@ -3964,7 +4057,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
{
- char fw_name[40];
+ char fw_name[53];
char ucode_prefix[30];
const char *wks = "";
int err;
@@ -4490,6 +4583,22 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
hw_prio, NULL);
}
+static void gfx_v10_0_alloc_dump_mem(struct amdgpu_device *adev)
+{
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+ uint32_t *ptr;
+
+ ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
+ if (ptr == NULL) {
+ DRM_ERROR("Failed to allocate memory for IP Dump\n");
+ adev->gfx.ip_dump = NULL;
+ adev->gfx.reg_count = 0;
+ } else {
+ adev->gfx.ip_dump = ptr;
+ adev->gfx.reg_count = reg_count;
+ }
+}
+
static int gfx_v10_0_sw_init(void *handle)
{
int i, j, k, r, ring_id = 0;
@@ -4518,7 +4627,7 @@ static int gfx_v10_0_sw_init(void *handle)
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 7):
adev->gfx.me.num_me = 1;
- adev->gfx.me.num_pipe_per_me = 1;
+ adev->gfx.me.num_pipe_per_me = 2;
adev->gfx.me.num_queue_per_pipe = 1;
adev->gfx.mec.num_mec = 2;
adev->gfx.mec.num_pipe_per_mec = 4;
@@ -4642,6 +4751,8 @@ static int gfx_v10_0_sw_init(void *handle)
gfx_v10_0_gpu_early_init(adev);
+ gfx_v10_0_alloc_dump_mem(adev);
+
return 0;
}
@@ -4694,6 +4805,8 @@ static int gfx_v10_0_sw_fini(void *handle)
gfx_v10_0_free_microcode(adev);
+ kfree(adev->gfx.ip_dump);
+
return 0;
}
@@ -8317,7 +8430,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
reg_mem_engine = 0;
} else {
- ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
+ ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
reg_mem_engine = 1; /* pfp */
}
@@ -9154,6 +9267,36 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
}
+static void gfx_v10_ip_print(void *handle, struct drm_printer *p)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t i;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+
+ if (!adev->gfx.ip_dump)
+ return;
+
+ for (i = 0; i < reg_count; i++)
+ drm_printf(p, "%-50s \t 0x%08x\n",
+ gc_reg_list_10_1[i].reg_name,
+ adev->gfx.ip_dump[i]);
+}
+
+static void gfx_v10_ip_dump(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ uint32_t i;
+ uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
+
+ if (!adev->gfx.ip_dump)
+ return;
+
+ amdgpu_gfx_off_ctrl(adev, false);
+ for (i = 0; i < reg_count; i++)
+ adev->gfx.ip_dump[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i]));
+ amdgpu_gfx_off_ctrl(adev, true);
+}
+
static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
.name = "gfx_v10_0",
.early_init = gfx_v10_0_early_init,
@@ -9170,6 +9313,8 @@ static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
.set_clockgating_state = gfx_v10_0_set_clockgating_state,
.set_powergating_state = gfx_v10_0_set_powergating_state,
.get_clockgating_state = gfx_v10_0_get_clockgating_state,
+ .dump_ip_state = gfx_v10_ip_dump,
+ .print_ip_state = gfx_v10_ip_print,
};
static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index f00e05aba4..ad6431013c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -510,7 +510,7 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
{
char fw_name[40];
- char ucode_prefix[30];
+ char ucode_prefix[25];
int err;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
uint16_t version_major;
@@ -4506,14 +4506,11 @@ static int gfx_v11_0_soft_reset(void *handle)
gfx_v11_0_set_safe_mode(adev, 0);
+ mutex_lock(&adev->srbm_mutex);
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
- tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
- WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+ soc21_grbm_select(adev, i, k, j, 0);
WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
@@ -4523,16 +4520,14 @@ static int gfx_v11_0_soft_reset(void *handle)
for (i = 0; i < adev->gfx.me.num_me; ++i) {
for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
- tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
- tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
- WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
+ soc21_grbm_select(adev, i, k, j, 0);
WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
}
}
}
+ soc21_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
/* Try to acquire the gfx mutex before access to CP_VMID_RESET */
r = gfx_v11_0_request_gfx_index_mutex(adev, 1);
@@ -6174,6 +6169,8 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
.set_clockgating_state = gfx_v11_0_set_clockgating_state,
.set_powergating_state = gfx_v11_0_set_powergating_state,
.get_clockgating_state = gfx_v11_0_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 34f9211b26..d0992ce9fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -3457,6 +3457,8 @@ static const struct amd_ip_funcs gfx_v6_0_ip_funcs = {
.soft_reset = gfx_v6_0_soft_reset,
.set_clockgating_state = gfx_v6_0_set_clockgating_state,
.set_powergating_state = gfx_v6_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 86a4865b1a..541dbd70d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4977,6 +4977,8 @@ static const struct amd_ip_funcs gfx_v7_0_ip_funcs = {
.soft_reset = gfx_v7_0_soft_reset,
.set_clockgating_state = gfx_v7_0_set_clockgating_state,
.set_powergating_state = gfx_v7_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 202ddda57f..2f0e72caee 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6878,6 +6878,8 @@ static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
.set_clockgating_state = gfx_v8_0_set_clockgating_state,
.set_powergating_state = gfx_v8_0_set_powergating_state,
.get_clockgating_state = gfx_v8_0_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 99dbd23411..3c8c5abf35 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1249,7 +1249,7 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
char *chip_name)
{
- char fw_name[30];
+ char fw_name[50];
int err;
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
@@ -1282,7 +1282,7 @@ out:
static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
char *chip_name)
{
- char fw_name[30];
+ char fw_name[53];
int err;
const struct rlc_firmware_header_v2_0 *rlc_hdr;
uint16_t version_major;
@@ -1337,7 +1337,7 @@ static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
char *chip_name)
{
- char fw_name[30];
+ char fw_name[50];
int err;
if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN))
@@ -6856,6 +6856,8 @@ static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.set_clockgating_state = gfx_v9_0_set_clockgating_state,
.set_powergating_state = gfx_v9_0_set_powergating_state,
.get_clockgating_state = gfx_v9_0_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index 065b2bd5f5..3f4fd2f081 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -1909,18 +1909,7 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev)
mutex_unlock(&adev->grbm_idx_mutex);
}
-static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev)
-{
- u32 status = 0;
- struct amdgpu_vmhub *hub;
-
- hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
- status = RREG32(hub->vm_l2_pro_fault_status);
- /* reset page fault status */
- WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
- return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
-}
struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = {
.query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
@@ -1934,5 +1923,4 @@ struct amdgpu_gfx_ras gfx_v9_4_2_ras = {
.hw_ops = &gfx_v9_4_2_ras_ops,
},
.enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
- .query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index b10fdd8b54..f5b9f443cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -680,38 +680,44 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = {
.ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst,
};
-static int gfx_v9_4_3_aca_bank_generate_report(struct aca_handle *handle,
- struct aca_bank *bank, enum aca_error_type type,
- struct aca_bank_report *report, void *data)
+static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle,
+ struct aca_bank *bank, enum aca_smu_type type,
+ void *data)
{
- u64 status, misc0;
+ struct aca_bank_info info;
+ u64 misc0;
u32 instlo;
int ret;
- status = bank->regs[ACA_REG_IDX_STATUS];
- if ((type == ACA_ERROR_TYPE_UE &&
- ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) ||
- (type == ACA_ERROR_TYPE_CE &&
- ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) {
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
- ret = aca_bank_info_decode(bank, &report->info);
- if (ret)
- return ret;
+ /* NOTE: overwrite info.die_id with xcd id for gfx */
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+ info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1;
- /* NOTE: overwrite info.die_id with xcd id for gfx */
- instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
- instlo &= GENMASK(31, 1);
- report->info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1;
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
- misc0 = bank->regs[ACA_REG_IDX_MISC0];
- report->count[type] = ACA_REG__MISC0__ERRCNT(misc0);
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ ret = aca_error_cache_log_bank_error(handle, &info,
+ ACA_ERROR_TYPE_UE, 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ ret = aca_error_cache_log_bank_error(handle, &info,
+ ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
}
- return 0;
+ return ret;
}
static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
- enum aca_error_type type, void *data)
+ enum aca_smu_type type, void *data)
{
u32 instlo;
@@ -730,7 +736,7 @@ static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_b
}
static const struct aca_bank_ops gfx_v9_4_3_aca_bank_ops = {
- .aca_bank_generate_report = gfx_v9_4_3_aca_bank_generate_report,
+ .aca_bank_parser = gfx_v9_4_3_aca_bank_parser,
.aca_bank_is_valid = gfx_v9_4_3_aca_bank_is_valid,
};
@@ -2398,10 +2404,10 @@ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
if (def != data)
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
- /* enable cgcg FSM(0x0000363F) */
+ /* CGCG Hysteresis: 400us */
def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
- data = (0x36
+ data = (0x2710
<< RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
@@ -2410,10 +2416,10 @@ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
if (def != data)
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data);
- /* set IDLE_POLL_COUNT(0x00900100) */
+ /* set IDLE_POLL_COUNT(0x33450100)*/
def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL);
data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
- (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
+ (0x3345 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
if (def != data)
WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data);
} else {
@@ -4010,6 +4016,8 @@ static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = {
.set_clockgating_state = gfx_v9_4_3_set_clockgating_state,
.set_powergating_state = gfx_v9_4_3_set_powergating_state,
.get_clockgating_state = gfx_v9_4_3_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 22175da0e1..d200310d17 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -443,6 +443,22 @@ static void gfxhub_v1_0_init(struct amdgpu_device *adev)
mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
}
+static bool gfxhub_v1_0_query_utcl2_poison_status(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ u32 status = 0;
+ struct amdgpu_vmhub *hub;
+
+ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))
+ return false;
+
+ hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
+ status = RREG32(hub->vm_l2_pro_fault_status);
+ /* reset page fault status */
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
+}
const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = {
.get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset,
@@ -452,4 +468,5 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = {
.set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default,
.init = gfxhub_v1_0_init,
.get_xgmi_info = gfxhub_v1_1_get_xgmi_info,
+ .query_utcl2_poison_status = gfxhub_v1_0_query_utcl2_poison_status,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index 49aecdcee0..77df8c9cba 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -620,6 +620,20 @@ static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev)
return 0;
}
+static bool gfxhub_v1_2_query_utcl2_poison_status(struct amdgpu_device *adev,
+ int xcc_id)
+{
+ u32 fed, status;
+
+ status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVM_L2_PROTECTION_FAULT_STATUS);
+ fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
+ /* reset page fault status */
+ WREG32_P(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id),
+ regVM_L2_PROTECTION_FAULT_STATUS), 1, ~1);
+
+ return fed;
+}
+
const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = {
.get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset,
.setup_vm_pt_regs = gfxhub_v1_2_setup_vm_pt_regs,
@@ -628,6 +642,7 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = {
.set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default,
.init = gfxhub_v1_2_init,
.get_xgmi_info = gfxhub_v1_2_get_xgmi_info,
+ .query_utcl2_poison_status = gfxhub_v1_2_query_utcl2_poison_status,
};
static int gfxhub_v1_2_xcp_resume(void *handle, uint32_t inst_mask)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 23b4786399..3e38d8bfcb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -1115,6 +1115,8 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = {
.soft_reset = gmc_v6_0_soft_reset,
.set_clockgating_state = gmc_v6_0_set_clockgating_state,
.set_powergating_state = gmc_v6_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 3da7b6a2b0..85df8fc810 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -1354,6 +1354,8 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
.soft_reset = gmc_v7_0_soft_reset,
.set_clockgating_state = gmc_v7_0_set_clockgating_state,
.set_powergating_state = gmc_v7_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index d20e5f20ee..fc97757e33 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1717,6 +1717,8 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
.set_clockgating_state = gmc_v8_0_set_clockgating_state,
.set_powergating_state = gmc_v8_0_set_powergating_state,
.get_clockgating_state = gmc_v8_0_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 47b63a4ce6..f7f4924751 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -548,7 +548,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
{
bool retry_fault = !!(entry->src_data[1] & 0x80);
bool write_fault = !!(entry->src_data[1] & 0x20);
- uint32_t status = 0, cid = 0, rw = 0;
+ uint32_t status = 0, cid = 0, rw = 0, fed = 0;
struct amdgpu_task_info *task_info;
struct amdgpu_vmhub *hub;
const char *mmhub_cid;
@@ -664,6 +664,13 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
status = RREG32(hub->vm_l2_pro_fault_status);
cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID);
rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW);
+ fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
+
+ /* for fed error, kfd will handle it, return directly */
+ if (fed && amdgpu_ras_is_poison_mode_supported(adev) &&
+ (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2)))
+ return 0;
+
WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub);
@@ -1450,7 +1457,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev)
adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET;
adev->umc.active_mask = adev->aid_mask;
adev->umc.retire_unit = UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL;
- adev->umc.channel_idx_tbl = &umc_v12_0_channel_idx_tbl[0][0][0];
if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu)
adev->umc.ras = &umc_v12_0_ras;
break;
@@ -1904,7 +1910,7 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev,
break;
}
- size = adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT;
+ size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT;
size /= adev->gmc.num_mem_partitions;
for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index 2c02ae6988..07984f7c3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -425,6 +425,8 @@ static const struct amd_ip_funcs iceland_ih_ip_funcs = {
.soft_reset = iceland_ih_soft_reset,
.set_clockgating_state = iceland_ih_set_clockgating_state,
.set_powergating_state = iceland_ih_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs iceland_ih_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
index ad4ad39f12..3cb64c8f71 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c
@@ -346,6 +346,21 @@ static int ih_v6_0_irq_init(struct amdgpu_device *adev)
DELAY, 3);
WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp);
+ /* Redirect the interrupts to IH RB1 for dGPU */
+ if (adev->irq.ih1.ring_size) {
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA,
+ SOURCE_ID_MATCH_ENABLE, 0x1);
+
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp);
+ }
+
pci_set_master(adev->pdev);
/* enable interrupts */
@@ -549,8 +564,15 @@ static int ih_v6_0_sw_init(void *handle)
adev->irq.ih.use_doorbell = true;
adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
- adev->irq.ih1.ring_size = 0;
- adev->irq.ih2.ring_size = 0;
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE,
+ use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih1.use_doorbell = true;
+ adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
+ }
/* initialize ih control register offset */
ih_v6_0_init_register_offset(adev);
@@ -748,6 +770,8 @@ static const struct amd_ip_funcs ih_v6_0_ip_funcs = {
.set_clockgating_state = ih_v6_0_set_clockgating_state,
.set_powergating_state = ih_v6_0_set_powergating_state,
.get_clockgating_state = ih_v6_0_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs ih_v6_0_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
index b8da0fc293..0fbf5fa7b0 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c
@@ -346,6 +346,21 @@ static int ih_v6_1_irq_init(struct amdgpu_device *adev)
DELAY, 3);
WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp);
+ /* Redirect the interrupts to IH RB1 for dGPU */
+ if (adev->irq.ih1.ring_size) {
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0);
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp);
+
+ tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0);
+ tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA,
+ SOURCE_ID_MATCH_ENABLE, 0x1);
+
+ WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp);
+ }
+
pci_set_master(adev->pdev);
/* enable interrupts */
@@ -550,8 +565,15 @@ static int ih_v6_1_sw_init(void *handle)
adev->irq.ih.use_doorbell = true;
adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1;
- adev->irq.ih1.ring_size = 0;
- adev->irq.ih2.ring_size = 0;
+ if (!(adev->flags & AMD_IS_APU)) {
+ r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE,
+ use_bus_addr);
+ if (r)
+ return r;
+
+ adev->irq.ih1.use_doorbell = true;
+ adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1;
+ }
/* initialize ih control register offset */
ih_v6_1_init_register_offset(adev);
@@ -753,6 +775,8 @@ static const struct amd_ip_funcs ih_v6_1_ip_funcs = {
.set_clockgating_state = ih_v6_1_set_clockgating_state,
.set_powergating_state = ih_v6_1_set_powergating_state,
.get_clockgating_state = ih_v6_1_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs ih_v6_1_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
index 7aed96fa10..aa6235dd4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c
@@ -749,6 +749,8 @@ static const struct amd_ip_funcs ih_v7_0_ip_funcs = {
.set_clockgating_state = ih_v7_0_set_clockgating_state,
.set_powergating_state = ih_v7_0_set_powergating_state,
.get_clockgating_state = ih_v7_0_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs ih_v7_0_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index 1c8116d75f..ef3e42f6b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -759,6 +759,8 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = jpeg_v2_0_set_clockgating_state,
.set_powergating_state = jpeg_v2_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index 99cd49ee8e..afeaf3c64e 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -632,6 +632,8 @@ static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = jpeg_v2_5_set_clockgating_state,
.set_powergating_state = jpeg_v2_5_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = {
@@ -652,6 +654,8 @@ static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = jpeg_v2_5_set_clockgating_state,
.set_powergating_state = jpeg_v2_5_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index a92481da60..1c7cf4800b 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -557,6 +557,8 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = jpeg_v3_0_set_clockgating_state,
.set_powergating_state = jpeg_v3_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
index 88ea58d5c4..237fe5df5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -719,6 +719,8 @@ static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = jpeg_v4_0_set_clockgating_state,
.set_powergating_state = jpeg_v4_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 32caeb37ce..d66af11aa6 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -1053,6 +1053,8 @@ static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = jpeg_v4_0_3_set_clockgating_state,
.set_powergating_state = jpeg_v4_0_3_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
index edf5bcdd2b..da6bb9022b 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
@@ -762,6 +762,8 @@ static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = jpeg_v4_0_5_set_clockgating_state,
.set_powergating_state = jpeg_v4_0_5_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
index e70200f975..64c856bfe0 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
@@ -513,6 +513,8 @@ static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = jpeg_v5_0_0_set_clockgating_state,
.set_powergating_state = jpeg_v5_0_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 1e5ad1e08d..a626bf9049 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -1176,6 +1176,8 @@ static const struct amd_ip_funcs mes_v10_1_ip_funcs = {
.hw_fini = mes_v10_1_hw_fini,
.suspend = mes_v10_1_suspend,
.resume = mes_v10_1_resume,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version mes_v10_1_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 63f281a998..32d4519541 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -100,18 +100,76 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
.insert_nop = amdgpu_ring_insert_nop,
};
+static const char *mes_v11_0_opcodes[] = {
+ "SET_HW_RSRC",
+ "SET_SCHEDULING_CONFIG",
+ "ADD_QUEUE",
+ "REMOVE_QUEUE",
+ "PERFORM_YIELD",
+ "SET_GANG_PRIORITY_LEVEL",
+ "SUSPEND",
+ "RESUME",
+ "RESET",
+ "SET_LOG_BUFFER",
+ "CHANGE_GANG_PRORITY",
+ "QUERY_SCHEDULER_STATUS",
+ "PROGRAM_GDS",
+ "SET_DEBUG_VMID",
+ "MISC",
+ "UPDATE_ROOT_PAGE_TABLE",
+ "AMD_LOG",
+};
+
+static const char *mes_v11_0_misc_opcodes[] = {
+ "WRITE_REG",
+ "INV_GART",
+ "QUERY_STATUS",
+ "READ_REG",
+ "WAIT_REG_MEM",
+ "SET_SHADER_DEBUGGER",
+};
+
+static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes))
+ op_str = mes_v11_0_opcodes[x_pkt->header.opcode];
+
+ return op_str;
+}
+
+static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt)
+{
+ const char *op_str = NULL;
+
+ if ((x_pkt->header.opcode == MES_SCH_API_MISC) &&
+ (x_pkt->opcode < ARRAY_SIZE(mes_v11_0_misc_opcodes)))
+ op_str = mes_v11_0_misc_opcodes[x_pkt->opcode];
+
+ return op_str;
+}
+
static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
void *pkt, int size,
int api_status_off)
{
- int ndw = size / 4;
- signed long r;
- union MESAPI__ADD_QUEUE *x_pkt = pkt;
- struct MES_API_STATUS *api_status;
+ union MESAPI__QUERY_MES_STATUS mes_status_pkt;
+ signed long timeout = 3000000; /* 3000 ms */
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring;
+ struct MES_API_STATUS *api_status;
+ union MESAPI__MISC *x_pkt = pkt;
+ const char *op_str, *misc_op_str;
unsigned long flags;
- signed long timeout = adev->usec_timeout;
+ u64 status_gpu_addr;
+ u32 status_offset;
+ u64 *status_ptr;
+ signed long r;
+ int ret;
+
+ if (x_pkt->header.opcode >= MES_SCH_API_MAX)
+ return -EINVAL;
if (amdgpu_emu_mode) {
timeout *= 100;
@@ -119,37 +177,82 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
/* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
timeout = 15 * 600 * 1000;
}
- BUG_ON(size % 4 != 0);
+
+ ret = amdgpu_device_wb_get(adev, &status_offset);
+ if (ret)
+ return ret;
+
+ status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
+ status_ptr = (u64 *)&adev->wb.wb[status_offset];
+ *status_ptr = 0;
spin_lock_irqsave(&mes->ring_lock, flags);
- if (amdgpu_ring_alloc(ring, ndw)) {
- spin_unlock_irqrestore(&mes->ring_lock, flags);
- return -ENOMEM;
- }
+ r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
+ if (r)
+ goto error_unlock_free;
api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
- api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr;
- api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq;
+ api_status->api_completion_fence_addr = status_gpu_addr;
+ api_status->api_completion_fence_value = 1;
+
+ amdgpu_ring_write_multiple(ring, pkt, size / 4);
+
+ memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
+ mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
+ mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_status_pkt.api_status.api_completion_fence_addr =
+ ring->fence_drv.gpu_addr;
+ mes_status_pkt.api_status.api_completion_fence_value =
+ ++ring->fence_drv.sync_seq;
+
+ amdgpu_ring_write_multiple(ring, &mes_status_pkt,
+ sizeof(mes_status_pkt) / 4);
- amdgpu_ring_write_multiple(ring, pkt, ndw);
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&mes->ring_lock, flags);
- DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
+ op_str = mes_v11_0_get_op_string(x_pkt);
+ misc_op_str = mes_v11_0_get_misc_op_string(x_pkt);
- r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
- timeout);
- if (r < 1) {
- DRM_ERROR("MES failed to response msg=%d\n",
- x_pkt->header.opcode);
+ if (misc_op_str)
+ dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str,
+ misc_op_str);
+ else if (op_str)
+ dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
+ else
+ dev_dbg(adev->dev, "MES msg=%d was emitted\n",
+ x_pkt->header.opcode);
+
+ r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout);
+ if (r < 1 || !*status_ptr) {
+
+ if (misc_op_str)
+ dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
+ op_str, misc_op_str);
+ else if (op_str)
+ dev_err(adev->dev, "MES failed to respond to msg=%s\n",
+ op_str);
+ else
+ dev_err(adev->dev, "MES failed to respond to msg=%d\n",
+ x_pkt->header.opcode);
while (halt_if_hws_hang)
schedule();
- return -ETIMEDOUT;
+ r = -ETIMEDOUT;
+ goto error_wb_free;
}
+ amdgpu_device_wb_free(adev, status_offset);
return 0;
+
+error_unlock_free:
+ spin_unlock_irqrestore(&mes->ring_lock, flags);
+
+error_wb_free:
+ amdgpu_device_wb_free(adev, status_offset);
+ return r;
}
static int convert_to_mes_queue_type(int queue_type)
@@ -422,6 +525,36 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
}
+static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes)
+{
+ int size = 128 * PAGE_SIZE;
+ int ret = 0;
+ struct amdgpu_device *adev = mes->adev;
+ union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_pkt;
+ memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt));
+
+ mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER;
+ mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1;
+ mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
+ mes_set_hw_res_pkt.enable_mes_info_ctx = 1;
+
+ ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &mes->resource_1,
+ &mes->resource_1_gpu_addr,
+ &mes->resource_1_addr);
+ if (ret) {
+ dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", ret);
+ return ret;
+ }
+
+ mes_set_hw_res_pkt.mes_info_ctx_mc_addr = mes->resource_1_gpu_addr;
+ mes_set_hw_res_pkt.mes_info_ctx_size = mes->resource_1->tbo.base.size;
+ return mes_v11_0_submit_pkt_and_poll_completion(mes,
+ &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
+ offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status));
+}
+
static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
.add_hw_queue = mes_v11_0_add_hw_queue,
.remove_hw_queue = mes_v11_0_remove_hw_queue,
@@ -1203,6 +1336,14 @@ static int mes_v11_0_hw_init(void *handle)
if (r)
goto failure;
+ if (amdgpu_sriov_is_mes_info_enable(adev)) {
+ r = mes_v11_0_set_hw_resources_1(&adev->mes);
+ if (r) {
+ DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r);
+ goto failure;
+ }
+ }
+
r = mes_v11_0_query_sched_status(&adev->mes);
if (r) {
DRM_ERROR("MES is busy\n");
@@ -1226,6 +1367,11 @@ failure:
static int mes_v11_0_hw_fini(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (amdgpu_sriov_is_mes_info_enable(adev)) {
+ amdgpu_bo_free_kernel(&adev->mes.resource_1, &adev->mes.resource_1_gpu_addr,
+ &adev->mes.resource_1_addr);
+ }
return 0;
}
@@ -1291,6 +1437,8 @@ static const struct amd_ip_funcs mes_v11_0_ip_funcs = {
.hw_fini = mes_v11_0_hw_fini,
.suspend = mes_v11_0_suspend,
.resume = mes_v11_0_resume,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version mes_v11_0_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index c0fc44cdd6..7a1ff29841 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -559,6 +559,20 @@ static void mmhub_v1_8_get_clockgating(struct amdgpu_device *adev, u64 *flags)
}
+static bool mmhub_v1_8_query_utcl2_poison_status(struct amdgpu_device *adev,
+ int hub_inst)
+{
+ u32 fed, status;
+
+ status = RREG32_SOC15(MMHUB, hub_inst, regVM_L2_PROTECTION_FAULT_STATUS);
+ fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
+ /* reset page fault status */
+ WREG32_P(SOC15_REG_OFFSET(MMHUB, hub_inst,
+ regVM_L2_PROTECTION_FAULT_STATUS), 1, ~1);
+
+ return fed;
+}
+
const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = {
.get_fb_location = mmhub_v1_8_get_fb_location,
.init = mmhub_v1_8_init,
@@ -568,6 +582,7 @@ const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = {
.setup_vm_pt_regs = mmhub_v1_8_setup_vm_pt_regs,
.set_clockgating = mmhub_v1_8_set_clockgating,
.get_clockgating = mmhub_v1_8_get_clockgating,
+ .query_utcl2_poison_status = mmhub_v1_8_query_utcl2_poison_status,
};
static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = {
@@ -706,28 +721,32 @@ static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = {
.reset_ras_error_count = mmhub_v1_8_reset_ras_error_count,
};
-static int mmhub_v1_8_aca_bank_generate_report(struct aca_handle *handle,
- struct aca_bank *bank, enum aca_error_type type,
- struct aca_bank_report *report, void *data)
+static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
{
- u64 status, misc0;
+ struct aca_bank_info info;
+ u64 misc0;
int ret;
- status = bank->regs[ACA_REG_IDX_STATUS];
- if ((type == ACA_ERROR_TYPE_UE &&
- ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) ||
- (type == ACA_ERROR_TYPE_CE &&
- ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) {
-
- ret = aca_bank_info_decode(bank, &report->info);
- if (ret)
- return ret;
-
- misc0 = bank->regs[ACA_REG_IDX_MISC0];
- report->count[type] = ACA_REG__MISC0__ERRCNT(misc0);
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
}
- return 0;
+ return ret;
}
/* reference to smu driver if header file */
@@ -741,7 +760,7 @@ static int mmhub_v1_8_err_codes[] = {
};
static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
- enum aca_error_type type, void *data)
+ enum aca_smu_type type, void *data)
{
u32 instlo;
@@ -760,7 +779,7 @@ static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_b
}
static const struct aca_bank_ops mmhub_v1_8_aca_bank_ops = {
- .aca_bank_generate_report = mmhub_v1_8_aca_bank_generate_report,
+ .aca_bank_parser = mmhub_v1_8_aca_bank_parser,
.aca_bank_is_valid = mmhub_v1_8_aca_bank_is_valid,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index a2bd2c3b1e..0c7275bca8 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -276,6 +276,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
timeout -= 10;
} while (timeout > 1);
+ dev_warn(adev->dev, "waiting IDH_FLR_NOTIFICATION_CMPL timeout\n");
+
flr_done:
atomic_set(&adev->reset_domain->in_gpu_reset, 0);
up_write(&adev->reset_domain->sem);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
index 77f5b55dec..aba00d9616 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
@@ -309,6 +309,8 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
timeout -= 10;
} while (timeout > 1);
+ dev_warn(adev->dev, "waiting IDH_FLR_NOTIFICATION_CMPL timeout\n");
+
flr_done:
atomic_set(&adev->reset_domain->in_gpu_reset, 0);
up_write(&adev->reset_domain->sem);
@@ -444,7 +446,6 @@ static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev,
amdgpu_virt_fini_data_exchange(adev);
xgpu_nv_send_access_requests_with_param(adev,
IDH_RAS_POISON, block, 0, 0);
- amdgpu_virt_init_data_exchange(adev);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 4178f4e5da..b281462093 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -713,6 +713,8 @@ static const struct amd_ip_funcs navi10_ih_ip_funcs = {
.set_clockgating_state = navi10_ih_set_clockgating_state,
.set_powergating_state = navi10_ih_set_powergating_state,
.get_clockgating_state = navi10_ih_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs navi10_ih_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 4d7976b777..12e54047bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -110,7 +110,7 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[]
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
@@ -121,7 +121,7 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[]
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@@ -199,7 +199,7 @@ static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
@@ -1131,4 +1131,6 @@ static const struct amd_ip_funcs nv_common_ip_funcs = {
.set_clockgating_state = nv_common_set_clockgating_state,
.set_powergating_state = nv_common_set_powergating_state,
.get_clockgating_state = nv_common_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 7566973ed8..37b5ddd6f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -464,8 +464,9 @@ struct psp_gfx_rb_frame
#define PSP_ERR_UNKNOWN_COMMAND 0x00000100
enum tee_error_code {
- TEE_SUCCESS = 0x00000000,
- TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A,
+ TEE_SUCCESS = 0x00000000,
+ TEE_ERROR_CANCEL = 0xFFFF0002,
+ TEE_ERROR_NOT_SUPPORTED = 0xFFFF000A,
};
#endif /* _PSP_TEE_GFX_IF_H_ */
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
index 238abd9807..40b28298af 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c
@@ -174,7 +174,8 @@ static int psp_v14_0_bootloader_load_intf_drv(struct psp_context *psp)
static int psp_v14_0_bootloader_load_dbg_drv(struct psp_context *psp)
{
- return psp_v14_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_DBGDRV);
+ /* dbg_drv was renamed to had_drv in psp v14 */
+ return psp_v14_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_HADDRV);
}
static int psp_v14_0_bootloader_load_ras_drv(struct psp_context *psp)
@@ -182,6 +183,10 @@ static int psp_v14_0_bootloader_load_ras_drv(struct psp_context *psp)
return psp_v14_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV);
}
+static int psp_v14_0_bootloader_load_ipkeymgr_drv(struct psp_context *psp)
+{
+ return psp_v14_0_bootloader_load_component(psp, &psp->ipkeymgr_drv, PSP_BL__LOAD_IPKEYMGRDRV);
+}
static int psp_v14_0_bootloader_load_sos(struct psp_context *psp)
{
@@ -658,6 +663,7 @@ static const struct psp_funcs psp_v14_0_funcs = {
.bootloader_load_intf_drv = psp_v14_0_bootloader_load_intf_drv,
.bootloader_load_dbg_drv = psp_v14_0_bootloader_load_dbg_drv,
.bootloader_load_ras_drv = psp_v14_0_bootloader_load_ras_drv,
+ .bootloader_load_ipkeymgr_drv = psp_v14_0_bootloader_load_ipkeymgr_drv,
.bootloader_load_sos = psp_v14_0_bootloader_load_sos,
.ring_create = psp_v14_0_ring_create,
.ring_stop = psp_v14_0_ring_stop,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 07e19caf2b..ac8a9b9b3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -1113,6 +1113,8 @@ static const struct amd_ip_funcs sdma_v2_4_ip_funcs = {
.soft_reset = sdma_v2_4_soft_reset,
.set_clockgating_state = sdma_v2_4_set_clockgating_state,
.set_powergating_state = sdma_v2_4_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
@@ -1176,7 +1178,7 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: unused
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (VI).
* Used by the amdgpu ttm implementation to move pages if
@@ -1186,7 +1188,7 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 2ad615be4b..b8ebdc4ae6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -1553,6 +1553,8 @@ static const struct amd_ip_funcs sdma_v3_0_ip_funcs = {
.set_clockgating_state = sdma_v3_0_set_clockgating_state,
.set_powergating_state = sdma_v3_0_set_powergating_state,
.get_clockgating_state = sdma_v3_0_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
@@ -1616,7 +1618,7 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: unused
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (VI).
* Used by the amdgpu ttm implementation to move pages if
@@ -1626,7 +1628,7 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 3fac292334..772604feb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -2451,7 +2451,7 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine (VEGA10/12).
* Used by the amdgpu ttm implementation to move pages if
@@ -2461,11 +2461,11 @@ static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index e708468ac5..341b24d832 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -1945,7 +1945,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine.
* Used by the amdgpu ttm implementation to move pages if
@@ -1955,11 +1955,11 @@ static void sdma_v4_4_2_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
@@ -2180,35 +2180,39 @@ static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = {
.reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count,
};
-static int sdma_v4_4_2_aca_bank_generate_report(struct aca_handle *handle,
- struct aca_bank *bank, enum aca_error_type type,
- struct aca_bank_report *report, void *data)
+static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
{
- u64 status, misc0;
+ struct aca_bank_info info;
+ u64 misc0;
int ret;
- status = bank->regs[ACA_REG_IDX_STATUS];
- if ((type == ACA_ERROR_TYPE_UE &&
- ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) ||
- (type == ACA_ERROR_TYPE_CE &&
- ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) {
+ ret = aca_bank_info_decode(bank, &info);
+ if (ret)
+ return ret;
- ret = aca_bank_info_decode(bank, &report->info);
- if (ret)
- return ret;
-
- misc0 = bank->regs[ACA_REG_IDX_MISC0];
- report->count[type] = ACA_REG__MISC0__ERRCNT(misc0);
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ switch (type) {
+ case ACA_SMU_TYPE_UE:
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE,
+ 1ULL);
+ break;
+ case ACA_SMU_TYPE_CE:
+ ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE,
+ ACA_REG__MISC0__ERRCNT(misc0));
+ break;
+ default:
+ return -EINVAL;
}
- return 0;
+ return ret;
}
/* CODE_SDMA0 - CODE_SDMA4, reference to smu driver if header file */
static int sdma_v4_4_2_err_codes[] = { 33, 34, 35, 36 };
static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
- enum aca_error_type type, void *data)
+ enum aca_smu_type type, void *data)
{
u32 instlo;
@@ -2227,7 +2231,7 @@ static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_
}
static const struct aca_bank_ops sdma_v4_4_2_aca_bank_ops = {
- .aca_bank_generate_report = sdma_v4_4_2_aca_bank_generate_report,
+ .aca_bank_parser = sdma_v4_4_2_aca_bank_parser,
.aca_bank_is_valid = sdma_v4_4_2_aca_bank_is_valid,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 883e8a1b8a..b7d33d78bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -999,7 +999,8 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 20);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
- amdgpu_device_wb_free(adev, index);
+ if (!ring->is_mes_queue)
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1805,7 +1806,7 @@ static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine (NAVI10).
* Used by the amdgpu ttm implementation to move pages if
@@ -1815,11 +1816,11 @@ static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index da01b524b9..af1e90159c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -176,6 +176,14 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
+ /* SDMA seems to miss doorbells sometimes when powergating kicks in.
+ * Updating the wptr directly will wake it. This is only safe because
+ * we disallow gfxoff in begin_use() and then allow it again in end_use().
+ */
+ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
+ lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
+ upper_32_bits(ring->wptr << 2));
} else {
DRM_DEBUG("Not using doorbell -- "
"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
@@ -839,7 +847,8 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 20);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
- amdgpu_device_wb_free(adev, index);
+ if (!ring->is_mes_queue)
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1646,6 +1655,10 @@ static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring)
* but it shouldn't hurt for other parts since
* this GFXOFF will be disallowed anyway when SDMA is
* active, this just makes it explicit.
+ * sdma_v5_2_ring_set_wptr() takes advantage of this
+ * to update the wptr because sometimes SDMA seems to miss
+ * doorbells when entering PG. If you remove this, update
+ * sdma_v5_2_ring_set_wptr() as well!
*/
amdgpu_gfx_off_ctrl(adev, false);
}
@@ -1751,7 +1764,7 @@ static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine.
* Used by the amdgpu ttm implementation to move pages if
@@ -1761,11 +1774,11 @@ static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index 361835a61f..c833b6b837 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -507,6 +507,13 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev)
/* set minor_ptr_update to 0 after wptr programed */
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0);
+ /* Set up sdma hang watchdog */
+ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL));
+ /* 100ms per unit */
+ temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
+ max(adev->usec_timeout/100000, 1));
+ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp);
+
/* Set up RESP_MODE to non-copy addresses */
temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL));
temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3);
@@ -854,7 +861,8 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 5);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
- amdgpu_device_wb_free(adev, index);
+ if (!ring->is_mes_queue)
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -1567,7 +1575,7 @@ static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: if a secure copy should be used
+ * @copy_flags: copy flags for the buffers
*
* Copy GPU buffers using the DMA engine.
* Used by the amdgpu ttm implementation to move pages if
@@ -1577,11 +1585,11 @@ static void sdma_v6_0_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
- SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
+ SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
ib->ptr[ib->length_dw++] = byte_count - 1;
ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 23e4ef4fff..85235470e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1409,9 +1409,9 @@ static int si_gpu_pci_config_reset(struct amdgpu_device *adev)
return r;
}
-static bool si_asic_supports_baco(struct amdgpu_device *adev)
+static int si_asic_supports_baco(struct amdgpu_device *adev)
{
- return false;
+ return 0;
}
static enum amd_reset_method
@@ -2706,6 +2706,8 @@ static const struct amd_ip_funcs si_common_ip_funcs = {
.soft_reset = si_common_soft_reset,
.set_clockgating_state = si_common_set_clockgating_state,
.set_powergating_state = si_common_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ip_block_version si_common_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 9aa0e11ee6..11db5b7558 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -708,6 +708,8 @@ static const struct amd_ip_funcs si_dma_ip_funcs = {
.soft_reset = si_dma_soft_reset,
.set_clockgating_state = si_dma_set_clockgating_state,
.set_powergating_state = si_dma_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs si_dma_ring_funcs = {
@@ -761,7 +763,7 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev)
* @src_offset: src GPU address
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
- * @tmz: is this a secure operation
+ * @copy_flags: unused
*
* Copy GPU buffers using the DMA engine (VI).
* Used by the amdgpu ttm implementation to move pages if
@@ -771,7 +773,7 @@ static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib,
uint64_t src_offset,
uint64_t dst_offset,
uint32_t byte_count,
- bool tmz)
+ uint32_t copy_flags)
{
ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY,
1, 0, 0, byte_count);
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index cada9f300a..5237395e4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -296,6 +296,8 @@ static const struct amd_ip_funcs si_ih_ip_funcs = {
.soft_reset = si_ih_soft_reset,
.set_clockgating_state = si_ih_set_clockgating_state,
.set_powergating_state = si_ih_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs si_ih_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
index 04c797d545..0af648931d 100644
--- a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
+++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c
@@ -91,7 +91,7 @@ static int smu_v13_0_10_mode2_suspend_ip(struct amdgpu_device *adev)
adev->ip_blocks[i].status.hw = false;
}
- return r;
+ return 0;
}
static int
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c
new file mode 100644
index 0000000000..2a51a70d48
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "smuio_v14_0_2.h"
+#include "smuio/smuio_14_0_2_offset.h"
+#include "smuio/smuio_14_0_2_sh_mask.h"
+#include <linux/preempt.h>
+
+static u32 smuio_v14_0_2_get_rom_index_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_INDEX);
+}
+
+static u32 smuio_v14_0_2_get_rom_data_offset(struct amdgpu_device *adev)
+{
+ return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA);
+}
+
+static u64 smuio_v14_0_2_get_gpu_clock_counter(struct amdgpu_device *adev)
+{
+ u64 clock;
+ u64 clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after;
+
+ preempt_disable();
+ clock_counter_hi_pre = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ /* the clock counter may be udpated during polling the counters */
+ clock_counter_hi_after = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER);
+ if (clock_counter_hi_pre != clock_counter_hi_after)
+ clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER);
+ preempt_enable();
+
+ clock = clock_counter_lo | (clock_counter_hi_after << 32ULL);
+
+ return clock;
+}
+
+const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs = {
+ .get_rom_index_offset = smuio_v14_0_2_get_rom_index_offset,
+ .get_rom_data_offset = smuio_v14_0_2_get_rom_data_offset,
+ .get_gpu_clock_counter = smuio_v14_0_2_get_gpu_clock_counter,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h
new file mode 100644
index 0000000000..6e617f832d
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SMUIO_V14_0_2_H__
+#define __SMUIO_V14_0_2_H__
+
+#include "soc15_common.h"
+
+extern const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs;
+
+#endif /* __SMUIO_V14_0_2_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index dec81ccf62..170f02e967 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -143,7 +143,7 @@ static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] =
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@@ -156,7 +156,7 @@ static const struct amdgpu_video_codecs rn_video_codecs_decode =
static const struct amdgpu_video_codec_info vcn_4_0_3_video_codecs_decode_array[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
@@ -502,7 +502,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
static enum amd_reset_method
soc15_asic_reset_method(struct amdgpu_device *adev)
{
- bool baco_reset = false;
+ int baco_reset = 0;
bool connected_to_cpu = false;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
@@ -540,7 +540,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
*/
if (ras && adev->ras_enabled &&
adev->pm.fw_version <= 0x283400)
- baco_reset = false;
+ baco_reset = 0;
} else {
baco_reset = amdgpu_dpm_is_baco_supported(adev);
}
@@ -620,7 +620,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
}
}
-static bool soc15_supports_baco(struct amdgpu_device *adev)
+static int soc15_supports_baco(struct amdgpu_device *adev)
{
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(9, 0, 0):
@@ -628,13 +628,13 @@ static bool soc15_supports_baco(struct amdgpu_device *adev)
if (adev->asic_type == CHIP_VEGA20) {
if (adev->psp.sos.fw_version >= 0x80067)
return amdgpu_dpm_is_baco_supported(adev);
- return false;
+ return 0;
} else {
return amdgpu_dpm_is_baco_supported(adev);
}
break;
default:
- return false;
+ return 0;
}
}
@@ -1501,4 +1501,6 @@ static const struct amd_ip_funcs soc15_common_ip_funcs = {
.set_clockgating_state = soc15_common_set_clockgating_state,
.set_powergating_state = soc15_common_set_powergating_state,
.get_clockgating_state= soc15_common_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index 1444b7765e..282584a48b 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -88,6 +88,8 @@ struct soc15_ras_field_entry {
};
#define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg
+#define SOC15_REG_ENTRY_STR(ip, inst, reg) \
+ { ip##_HWIP, inst, reg##_BASE_IDX, reg, #reg }
#define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset)
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 43ca63fe85..fb67974675 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -72,7 +72,7 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn1 = {
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn0[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
@@ -80,7 +80,7 @@ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn1[] = {
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
- {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
};
@@ -985,4 +985,6 @@ static const struct amd_ip_funcs soc21_common_ip_funcs = {
.set_clockgating_state = soc21_common_set_clockgating_state,
.set_powergating_state = soc21_common_set_powergating_state,
.get_clockgating_state = soc21_common_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
index 056d4df8fa..3ac56a9645 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
@@ -146,6 +146,7 @@ struct ta_ras_mca_addr {
uint32_t ch_inst;
uint32_t umc_inst;
uint32_t node_inst;
+ uint32_t socket_id;
};
struct ta_ras_phy_addr {
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 450b6e8315..24d49d8136 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -486,6 +486,8 @@ static const struct amd_ip_funcs tonga_ih_ip_funcs = {
.post_soft_reset = tonga_ih_post_soft_reset,
.set_clockgating_state = tonga_ih_set_clockgating_state,
.set_powergating_state = tonga_ih_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ih_funcs tonga_ih_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 77af4e25ff..bfe61d86ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -28,27 +28,7 @@
#include "umc/umc_12_0_0_sh_mask.h"
#include "mp/mp_13_0_6_sh_mask.h"
-const uint32_t
- umc_v12_0_channel_idx_tbl[]
- [UMC_V12_0_UMC_INSTANCE_NUM]
- [UMC_V12_0_CHANNEL_INSTANCE_NUM] = {
- {{3, 7, 11, 15, 2, 6, 10, 14}, {1, 5, 9, 13, 0, 4, 8, 12},
- {19, 23, 27, 31, 18, 22, 26, 30}, {17, 21, 25, 29, 16, 20, 24, 28}},
- {{47, 43, 39, 35, 46, 42, 38, 34}, {45, 41, 37, 33, 44, 40, 36, 32},
- {63, 59, 55, 51, 62, 58, 54, 50}, {61, 57, 53, 49, 60, 56, 52, 48}},
- {{79, 75, 71, 67, 78, 74, 70, 66}, {77, 73, 69, 65, 76, 72, 68, 64},
- {95, 91, 87, 83, 94, 90, 86, 82}, {93, 89, 85, 81, 92, 88, 84, 80}},
- {{99, 103, 107, 111, 98, 102, 106, 110}, {97, 101, 105, 109, 96, 100, 104, 108},
- {115, 119, 123, 127, 114, 118, 122, 126}, {113, 117, 121, 125, 112, 116, 120, 124}}
- };
-
-/* mapping of MCA error address to normalized address */
-static const uint32_t umc_v12_0_ma2na_mapping[] = {
- 0, 5, 6, 8, 9, 14, 12, 13,
- 10, 11, 15, 16, 17, 18, 19, 20,
- 21, 22, 23, 24, 25, 26, 27, 28,
- 24, 7, 29, 30,
-};
+#define MAX_ECC_NUM_PER_RETIREMENT 32
static inline uint64_t get_umc_v12_0_reg_offset(struct amdgpu_device *adev,
uint32_t node_inst,
@@ -192,99 +172,74 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev,
umc_v12_0_reset_error_count(adev);
}
-static bool umc_v12_0_bit_wise_xor(uint32_t val)
+static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ struct ta_ras_query_address_input *addr_in)
{
- bool result = 0;
- int i;
+ uint32_t col, row, row_xor, bank, channel_index;
+ uint64_t soc_pa, retired_page, column, err_addr;
+ struct ta_ras_query_address_output addr_out;
- for (i = 0; i < 32; i++)
- result = result ^ ((val >> i) & 0x1);
+ err_addr = addr_in->ma.err_addr;
+ addr_in->addr_type = TA_RAS_MCA_TO_PA;
+ if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) {
+ dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx",
+ err_addr);
- return result;
-}
+ return;
+ }
+
+ soc_pa = addr_out.pa.pa;
+ bank = addr_out.pa.bank;
+ channel_index = addr_out.pa.channel_idx;
-static void umc_v12_0_mca_addr_to_pa(struct amdgpu_device *adev,
- uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst,
- uint32_t node_inst,
- struct ta_ras_query_address_output *addr_out)
-{
- uint32_t channel_index, i;
- uint64_t na, soc_pa;
- uint32_t bank_hash0, bank_hash1, bank_hash2, bank_hash3, col, row;
- uint32_t bank0, bank1, bank2, bank3, bank;
-
- bank_hash0 = (err_addr >> UMC_V12_0_MCA_B0_BIT) & 0x1ULL;
- bank_hash1 = (err_addr >> UMC_V12_0_MCA_B1_BIT) & 0x1ULL;
- bank_hash2 = (err_addr >> UMC_V12_0_MCA_B2_BIT) & 0x1ULL;
- bank_hash3 = (err_addr >> UMC_V12_0_MCA_B3_BIT) & 0x1ULL;
col = (err_addr >> 1) & 0x1fULL;
row = (err_addr >> 10) & 0x3fffULL;
+ row_xor = row ^ (0x1ULL << 13);
+ /* clear [C3 C2] in soc physical address */
+ soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT);
+ /* clear [C4] in soc physical address */
+ soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT);
+
+ /* loop for all possibilities of [C4 C3 C2] */
+ for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) {
+ retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT);
+ retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT);
+ /* include column bit 0 and 1 */
+ col &= 0x3;
+ col |= (column << 2);
+ dev_info(adev->dev,
+ "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
+ retired_page, row, col, bank, channel_index);
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, addr_in->ma.umc_inst);
- /* apply bank hash algorithm */
- bank0 =
- bank_hash0 ^ (UMC_V12_0_XOR_EN0 &
- (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR0) ^
- (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR0))));
- bank1 =
- bank_hash1 ^ (UMC_V12_0_XOR_EN1 &
- (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR1) ^
- (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR1))));
- bank2 =
- bank_hash2 ^ (UMC_V12_0_XOR_EN2 &
- (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR2) ^
- (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR2))));
- bank3 =
- bank_hash3 ^ (UMC_V12_0_XOR_EN3 &
- (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR3) ^
- (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR3))));
-
- bank = bank0 | (bank1 << 1) | (bank2 << 2) | (bank3 << 3);
- err_addr &= ~0x3c0ULL;
- err_addr |= (bank << UMC_V12_0_MCA_B0_BIT);
-
- na = 0x0;
- /* convert mca error address to normalized address */
- for (i = 1; i < ARRAY_SIZE(umc_v12_0_ma2na_mapping); i++)
- na |= ((err_addr >> i) & 0x1ULL) << umc_v12_0_ma2na_mapping[i];
-
- channel_index =
- adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num *
- adev->umc.channel_inst_num +
- umc_inst * adev->umc.channel_inst_num +
- ch_inst];
- /* translate umc channel address to soc pa, 3 parts are included */
- soc_pa = ADDR_OF_32KB_BLOCK(na) |
- ADDR_OF_256B_BLOCK(channel_index) |
- OFFSET_IN_256B_BLOCK(na);
-
- /* the umc channel bits are not original values, they are hashed */
- UMC_V12_0_SET_CHANNEL_HASH(channel_index, soc_pa);
-
- addr_out->pa.pa = soc_pa;
- addr_out->pa.bank = bank;
- addr_out->pa.channel_idx = channel_index;
+ /* shift R13 bit */
+ retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT);
+ dev_info(adev->dev,
+ "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
+ retired_page, row_xor, col, bank, channel_index);
+ amdgpu_umc_fill_error_record(err_data, err_addr,
+ retired_page, channel_index, addr_in->ma.umc_inst);
+ }
}
-static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
- struct ras_err_data *err_data, uint64_t err_addr,
- uint32_t ch_inst, uint32_t umc_inst,
- uint32_t node_inst)
+static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev,
+ struct ta_ras_query_address_input *addr_in,
+ uint64_t *pfns, int len)
{
uint32_t col, row, row_xor, bank, channel_index;
- uint64_t soc_pa, retired_page, column;
- struct ta_ras_query_address_input addr_in;
+ uint64_t soc_pa, retired_page, column, err_addr;
struct ta_ras_query_address_output addr_out;
+ uint32_t pos = 0;
- addr_in.addr_type = TA_RAS_MCA_TO_PA;
- addr_in.ma.err_addr = err_addr;
- addr_in.ma.ch_inst = ch_inst;
- addr_in.ma.umc_inst = umc_inst;
- addr_in.ma.node_inst = node_inst;
-
- if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out))
- /* fallback to old path if fail to get pa from psp */
- umc_v12_0_mca_addr_to_pa(adev, err_addr, ch_inst, umc_inst,
- node_inst, &addr_out);
+ err_addr = addr_in->ma.err_addr;
+ addr_in->addr_type = TA_RAS_MCA_TO_PA;
+ if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) {
+ dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx",
+ err_addr);
+ return 0;
+ }
soc_pa = addr_out.pa.pa;
bank = addr_out.pa.bank;
@@ -302,33 +257,42 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev,
for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) {
retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT);
retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT);
+
+ if (pos >= len)
+ return 0;
+ pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+
/* include column bit 0 and 1 */
col &= 0x3;
col |= (column << 2);
dev_info(adev->dev,
"Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
retired_page, row, col, bank, channel_index);
- amdgpu_umc_fill_error_record(err_data, err_addr,
- retired_page, channel_index, umc_inst);
/* shift R13 bit */
retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT);
+
+ if (pos >= len)
+ return 0;
+ pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+
dev_info(adev->dev,
"Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n",
retired_page, row_xor, col, bank, channel_index);
- amdgpu_umc_fill_error_record(err_data, err_addr,
- retired_page, channel_index, umc_inst);
}
+
+ return pos;
}
static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
uint32_t node_inst, uint32_t umc_inst,
uint32_t ch_inst, void *data)
{
+ struct ras_err_data *err_data = (struct ras_err_data *)data;
+ struct ta_ras_query_address_input addr_in;
uint64_t mc_umc_status_addr;
uint64_t mc_umc_status, err_addr;
uint64_t mc_umc_addrt0;
- struct ras_err_data *err_data = (struct ras_err_data *)data;
uint64_t umc_reg_offset =
get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst);
@@ -357,8 +321,19 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev,
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
- umc_v12_0_convert_error_address(adev, err_data, err_addr,
- ch_inst, umc_inst, node_inst);
+ if (!adev->aid_mask &&
+ adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id)
+ addr_in.ma.socket_id = adev->smuio.funcs->get_socket_id(adev);
+ else
+ addr_in.ma.socket_id = 0;
+
+ addr_in.ma.err_addr = err_addr;
+ addr_in.ma.ch_inst = ch_inst;
+ addr_in.ma.umc_inst = umc_inst;
+ addr_in.ma.node_inst = node_inst;
+
+ umc_v12_0_convert_error_address(adev, err_data, &addr_in);
}
/* clear umc status */
@@ -401,13 +376,20 @@ static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev,
return 0;
}
+#ifdef TO_BE_REMOVED
static void umc_v12_0_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
+ struct ras_query_context qctx;
+
+ memset(&qctx, 0, sizeof(qctx));
+ qctx.event_id = amdgpu_ras_acquire_event_id(adev, amdgpu_ras_intr_triggered() ?
+ RAS_EVENT_TYPE_ISR : RAS_EVENT_TYPE_INVALID);
+
amdgpu_mca_smu_log_ras_error(adev,
- AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, ras_error_status);
+ AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, ras_error_status, &qctx);
amdgpu_mca_smu_log_ras_error(adev,
- AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, ras_error_status);
+ AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, ras_error_status, &qctx);
}
static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *adev,
@@ -418,12 +400,16 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade
struct ras_err_info *err_info;
struct ras_err_addr *mca_err_addr, *tmp;
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ struct ta_ras_query_address_input addr_in;
for_each_ras_error(err_node, err_data) {
err_info = &err_node->err_info;
if (list_empty(&err_info->err_addr_list))
continue;
+ addr_in.ma.node_inst = err_info->mcm_info.die_id;
+ addr_in.ma.socket_id = err_info->mcm_info.socket_id;
+
list_for_each_entry_safe(mca_err_addr, tmp, &err_info->err_addr_list, node) {
mc_umc_status = mca_err_addr->err_status;
if (mc_umc_status &&
@@ -439,6 +425,10 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade
MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo);
+ addr_in.ma.err_addr = err_addr;
+ addr_in.ma.ch_inst = MCA_IPID_LO_2_UMC_CH(InstanceIdLo);
+ addr_in.ma.umc_inst = MCA_IPID_LO_2_UMC_INST(InstanceIdLo);
+
dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n",
mca_ipid,
err_info->mcm_info.die_id,
@@ -447,10 +437,7 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade
err_addr);
umc_v12_0_convert_error_address(adev,
- err_data, err_addr,
- MCA_IPID_LO_2_UMC_CH(InstanceIdLo),
- MCA_IPID_LO_2_UMC_INST(InstanceIdLo),
- err_info->mcm_info.die_id);
+ err_data, &addr_in);
}
/* Delete error address node from list and free memory */
@@ -458,6 +445,7 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade
}
}
}
+#endif
static bool umc_v12_0_check_ecc_err_status(struct amdgpu_device *adev,
enum amdgpu_mca_error_type type, void *ras_error_status)
@@ -498,43 +486,49 @@ const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = {
.query_ras_error_address = umc_v12_0_query_ras_error_address,
};
-static int umc_v12_0_aca_bank_generate_report(struct aca_handle *handle, struct aca_bank *bank, enum aca_error_type type,
- struct aca_bank_report *report, void *data)
+static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_smu_type type, void *data)
{
struct amdgpu_device *adev = handle->adev;
- u64 status;
+ struct aca_bank_info info;
+ enum aca_error_type err_type;
+ u64 status, count;
+ u32 ext_error_code;
int ret;
- ret = aca_bank_info_decode(bank, &report->info);
+ status = bank->regs[ACA_REG_IDX_STATUS];
+ if (umc_v12_0_is_deferred_error(adev, status))
+ err_type = ACA_ERROR_TYPE_DEFERRED;
+ else if (umc_v12_0_is_uncorrectable_error(adev, status))
+ err_type = ACA_ERROR_TYPE_UE;
+ else if (umc_v12_0_is_correctable_error(adev, status))
+ err_type = ACA_ERROR_TYPE_CE;
+ else
+ return 0;
+
+ ret = aca_bank_info_decode(bank, &info);
if (ret)
return ret;
- status = bank->regs[ACA_REG_IDX_STATUS];
- switch (type) {
- case ACA_ERROR_TYPE_UE:
- if (umc_v12_0_is_uncorrectable_error(adev, status)) {
- report->count[type] = 1;
- }
- break;
- case ACA_ERROR_TYPE_CE:
- if (umc_v12_0_is_correctable_error(adev, status)) {
- report->count[type] = 1;
- }
- break;
- default:
- return -EINVAL;
- }
+ amdgpu_umc_update_ecc_status(adev,
+ bank->regs[ACA_REG_IDX_STATUS],
+ bank->regs[ACA_REG_IDX_IPID],
+ bank->regs[ACA_REG_IDX_ADDR]);
- return 0;
+ ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status);
+ count = ext_error_code == 0 ?
+ ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL;
+
+ return aca_error_cache_log_bank_error(handle, &info, err_type, count);
}
static const struct aca_bank_ops umc_v12_0_aca_bank_ops = {
- .aca_bank_generate_report = umc_v12_0_aca_bank_generate_report,
+ .aca_bank_parser = umc_v12_0_aca_bank_parser,
};
const struct aca_info umc_v12_0_aca_info = {
.hwip = ACA_HWIP_TYPE_UMC,
- .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK,
+ .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK | ACA_ERROR_DEFERRED_MASK,
.bank_ops = &umc_v12_0_aca_bank_ops,
};
@@ -554,6 +548,152 @@ static int umc_v12_0_ras_late_init(struct amdgpu_device *adev, struct ras_common
return 0;
}
+static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev,
+ uint64_t status, uint64_t ipid, uint64_t addr)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ uint16_t hwid, mcatype;
+ struct ta_ras_query_address_input addr_in;
+ uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL];
+ uint64_t err_addr, hash_val = 0;
+ struct ras_ecc_err *ecc_err;
+ int count;
+ int ret;
+
+ hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID);
+ mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType);
+
+ if ((hwid != MCA_UMC_HWID_V12_0) || (mcatype != MCA_UMC_MCATYPE_V12_0))
+ return 0;
+
+ if (!status)
+ return 0;
+
+ if (!umc_v12_0_is_deferred_error(adev, status))
+ return 0;
+
+ err_addr = REG_GET_FIELD(addr,
+ MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
+
+ dev_info(adev->dev,
+ "UMC:IPID:0x%llx, socket:%llu, aid:%llu, inst:%llu, ch:%llu, err_addr:0x%llx\n",
+ ipid,
+ MCA_IPID_2_SOCKET_ID(ipid),
+ MCA_IPID_2_DIE_ID(ipid),
+ MCA_IPID_2_UMC_INST(ipid),
+ MCA_IPID_2_UMC_CH(ipid),
+ err_addr);
+
+ memset(page_pfn, 0, sizeof(page_pfn));
+
+ memset(&addr_in, 0, sizeof(addr_in));
+ addr_in.ma.err_addr = err_addr;
+ addr_in.ma.ch_inst = MCA_IPID_2_UMC_CH(ipid);
+ addr_in.ma.umc_inst = MCA_IPID_2_UMC_INST(ipid);
+ addr_in.ma.node_inst = MCA_IPID_2_DIE_ID(ipid);
+ addr_in.ma.socket_id = MCA_IPID_2_SOCKET_ID(ipid);
+
+ count = umc_v12_0_convert_err_addr(adev,
+ &addr_in, page_pfn, ARRAY_SIZE(page_pfn));
+ if (count <= 0) {
+ dev_warn(adev->dev, "Fail to convert error address! count:%d\n", count);
+ return 0;
+ }
+
+ ret = amdgpu_umc_build_pages_hash(adev,
+ page_pfn, count, &hash_val);
+ if (ret) {
+ dev_err(adev->dev, "Fail to build error pages hash\n");
+ return ret;
+ }
+
+ ecc_err = kzalloc(sizeof(*ecc_err), GFP_KERNEL);
+ if (!ecc_err)
+ return -ENOMEM;
+
+ ecc_err->err_pages.pfn = kcalloc(count, sizeof(*ecc_err->err_pages.pfn), GFP_KERNEL);
+ if (!ecc_err->err_pages.pfn) {
+ kfree(ecc_err);
+ return -ENOMEM;
+ }
+
+ memcpy(ecc_err->err_pages.pfn, page_pfn, count * sizeof(*ecc_err->err_pages.pfn));
+ ecc_err->err_pages.count = count;
+
+ ecc_err->hash_index = hash_val;
+ ecc_err->status = status;
+ ecc_err->ipid = ipid;
+ ecc_err->addr = addr;
+
+ ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err);
+ if (ret) {
+ if (ret == -EEXIST)
+ con->umc_ecc_log.de_updated = true;
+ else
+ dev_err(adev->dev, "Fail to log ecc error! ret:%d\n", ret);
+
+ kfree(ecc_err->err_pages.pfn);
+ kfree(ecc_err);
+ return ret;
+ }
+
+ con->umc_ecc_log.de_updated = true;
+
+ return 0;
+}
+
+static int umc_v12_0_fill_error_record(struct amdgpu_device *adev,
+ struct ras_ecc_err *ecc_err, void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+ uint32_t i = 0;
+ int ret = 0;
+
+ if (!err_data || !ecc_err)
+ return -EINVAL;
+
+ for (i = 0; i < ecc_err->err_pages.count; i++) {
+ ret = amdgpu_umc_fill_error_record(err_data,
+ ecc_err->addr,
+ ecc_err->err_pages.pfn[i] << AMDGPU_GPU_PAGE_SHIFT,
+ MCA_IPID_2_UMC_CH(ecc_err->ipid),
+ MCA_IPID_2_UMC_INST(ecc_err->ipid));
+ if (ret)
+ break;
+ }
+
+ err_data->de_count++;
+
+ return ret;
+}
+
+static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
+ struct ras_ecc_err *entries[MAX_ECC_NUM_PER_RETIREMENT];
+ struct radix_tree_root *ecc_tree;
+ int new_detected, ret, i;
+
+ ecc_tree = &con->umc_ecc_log.de_page_tree;
+
+ mutex_lock(&con->umc_ecc_log.lock);
+ new_detected = radix_tree_gang_lookup_tag(ecc_tree, (void **)entries,
+ 0, ARRAY_SIZE(entries), UMC_ECC_NEW_DETECTED_TAG);
+ for (i = 0; i < new_detected; i++) {
+ if (!entries[i])
+ continue;
+
+ ret = umc_v12_0_fill_error_record(adev, entries[i], ras_error_status);
+ if (ret) {
+ dev_err(adev->dev, "Fail to fill umc error record, ret:%d\n", ret);
+ break;
+ }
+ radix_tree_tag_clear(ecc_tree, entries[i]->hash_index, UMC_ECC_NEW_DETECTED_TAG);
+ }
+ mutex_unlock(&con->umc_ecc_log.lock);
+}
+
struct amdgpu_umc_ras umc_v12_0_ras = {
.ras_block = {
.hw_ops = &umc_v12_0_ras_hw_ops,
@@ -561,8 +701,8 @@ struct amdgpu_umc_ras umc_v12_0_ras = {
},
.err_cnt_init = umc_v12_0_err_cnt_init,
.query_ras_poison_mode = umc_v12_0_query_ras_poison_mode,
- .ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count,
- .ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address,
+ .ecc_info_query_ras_error_address = umc_v12_0_query_ras_ecc_err_addr,
.check_ecc_err_status = umc_v12_0_check_ecc_err_status,
+ .update_ecc_status = umc_v12_0_update_ecc_status,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
index 5973bfb14f..b497479385 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h
@@ -55,83 +55,38 @@
#define UMC_V12_0_NA_MAP_PA_NUM 8
/* R13 bit shift should be considered, double the number */
#define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2)
-/* bank bits in MCA error address */
-#define UMC_V12_0_MCA_B0_BIT 6
-#define UMC_V12_0_MCA_B1_BIT 7
-#define UMC_V12_0_MCA_B2_BIT 8
-#define UMC_V12_0_MCA_B3_BIT 9
+
/* column bits in SOC physical address */
#define UMC_V12_0_PA_C2_BIT 15
#define UMC_V12_0_PA_C4_BIT 21
/* row bits in SOC physical address */
#define UMC_V12_0_PA_R13_BIT 35
-/* channel index bits in SOC physical address */
-#define UMC_V12_0_PA_CH4_BIT 12
-#define UMC_V12_0_PA_CH5_BIT 13
-#define UMC_V12_0_PA_CH6_BIT 14
-
-/* bank hash settings */
-#define UMC_V12_0_XOR_EN0 1
-#define UMC_V12_0_XOR_EN1 1
-#define UMC_V12_0_XOR_EN2 1
-#define UMC_V12_0_XOR_EN3 1
-#define UMC_V12_0_COL_XOR0 0x0
-#define UMC_V12_0_COL_XOR1 0x0
-#define UMC_V12_0_COL_XOR2 0x800
-#define UMC_V12_0_COL_XOR3 0x1000
-#define UMC_V12_0_ROW_XOR0 0x11111
-#define UMC_V12_0_ROW_XOR1 0x22222
-#define UMC_V12_0_ROW_XOR2 0x4444
-#define UMC_V12_0_ROW_XOR3 0x8888
-
-/* channel hash settings */
-#define UMC_V12_0_HASH_4K 0
-#define UMC_V12_0_HASH_64K 1
-#define UMC_V12_0_HASH_2M 1
-#define UMC_V12_0_HASH_1G 1
-#define UMC_V12_0_HASH_1T 1
-
-/* XOR some bits of PA into CH4~CH6 bits (bits 12~14 of PA),
- * hash bit is only effective when related setting is enabled
- */
-#define UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) ((((channel_idx) >> 5) & 0x1) ^ \
- (((pa) >> 20) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \
- (((pa) >> 27) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \
- (((pa) >> 34) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \
- (((pa) >> 41) & 0x1ULL & UMC_V12_0_HASH_1T))
-#define UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) ((((channel_idx) >> 6) & 0x1) ^ \
- (((pa) >> 21) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \
- (((pa) >> 28) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \
- (((pa) >> 35) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \
- (((pa) >> 42) & 0x1ULL & UMC_V12_0_HASH_1T))
-#define UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) ((((channel_idx) >> 4) & 0x1) ^ \
- (((pa) >> 19) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \
- (((pa) >> 26) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \
- (((pa) >> 33) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \
- (((pa) >> 40) & 0x1ULL & UMC_V12_0_HASH_1T) ^ \
- (((pa) >> 47) & 0x1ULL & UMC_V12_0_HASH_4K))
-#define UMC_V12_0_SET_CHANNEL_HASH(channel_idx, pa) do { \
- (pa) &= ~(0x7ULL << UMC_V12_0_PA_CH4_BIT); \
- (pa) |= (UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) << UMC_V12_0_PA_CH4_BIT); \
- (pa) |= (UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) << UMC_V12_0_PA_CH5_BIT); \
- (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \
- } while (0)
+
+#define MCA_UMC_HWID_V12_0 0x96
+#define MCA_UMC_MCATYPE_V12_0 0x0
#define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \
(((_ipid_lo) >> 12) & 0xF))
#define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7)
+#define MCA_IPID_2_DIE_ID(ipid) ((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) >> 2) & 0x03)
+
+#define MCA_IPID_2_UMC_CH(ipid) \
+ (MCA_IPID_LO_2_UMC_CH(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo)))
+
+#define MCA_IPID_2_UMC_INST(ipid) \
+ (MCA_IPID_LO_2_UMC_INST(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo)))
+
+#define MCA_IPID_2_SOCKET_ID(ipid) \
+ (((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \
+ (REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03))
+
bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status);
typedef bool (*check_error_type_func)(struct amdgpu_device *adev, uint64_t mc_umc_status);
-extern const uint32_t
- umc_v12_0_channel_idx_tbl[]
- [UMC_V12_0_UMC_INSTANCE_NUM]
- [UMC_V12_0_CHANNEL_INSTANCE_NUM];
-
extern struct amdgpu_umc_ras umc_v12_0_ras;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
index c4c7725771..a32f87992f 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
@@ -442,11 +442,6 @@ static void umc_v8_10_ecc_info_query_ras_error_address(struct amdgpu_device *ade
umc_v8_10_ecc_info_query_error_address, ras_error_status);
}
-static void umc_v8_10_set_eeprom_table_version(struct amdgpu_ras_eeprom_table_header *hdr)
-{
- hdr->version = RAS_TABLE_VER_V2_1;
-}
-
const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
.query_ras_error_count = umc_v8_10_query_ras_error_count,
.query_ras_error_address = umc_v8_10_query_ras_error_address,
@@ -460,5 +455,4 @@ struct amdgpu_umc_ras umc_v8_10_ras = {
.query_ras_poison_mode = umc_v8_10_query_ras_poison_mode,
.ecc_info_query_ras_error_count = umc_v8_10_ecc_info_query_ras_error_count,
.ecc_info_query_ras_error_address = umc_v8_10_ecc_info_query_ras_error_address,
- .set_eeprom_table_version = umc_v8_10_set_eeprom_table_version,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
index a6006f231c..805d6662c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c
@@ -819,6 +819,8 @@ static const struct amd_ip_funcs uvd_v3_1_ip_funcs = {
.soft_reset = uvd_v3_1_soft_reset,
.set_clockgating_state = uvd_v3_1_set_clockgating_state,
.set_powergating_state = uvd_v3_1_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version uvd_v3_1_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 1aa09ad7bb..3f19c606f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -769,6 +769,8 @@ static const struct amd_ip_funcs uvd_v4_2_ip_funcs = {
.soft_reset = uvd_v4_2_soft_reset,
.set_clockgating_state = uvd_v4_2_set_clockgating_state,
.set_powergating_state = uvd_v4_2_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index f8b229b754..efd903c21d 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -877,6 +877,8 @@ static const struct amd_ip_funcs uvd_v5_0_ip_funcs = {
.set_clockgating_state = uvd_v5_0_set_clockgating_state,
.set_powergating_state = uvd_v5_0_set_powergating_state,
.get_clockgating_state = uvd_v5_0_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index a9a6880f44..495de50684 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -1545,6 +1545,8 @@ static const struct amd_ip_funcs uvd_v6_0_ip_funcs = {
.set_clockgating_state = uvd_v6_0_set_clockgating_state,
.set_powergating_state = uvd_v6_0_set_powergating_state,
.get_clockgating_state = uvd_v6_0_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index a08e7abca4..66fada199b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -626,6 +626,8 @@ static const struct amd_ip_funcs vce_v2_0_ip_funcs = {
.soft_reset = vce_v2_0_soft_reset,
.set_clockgating_state = vce_v2_0_set_clockgating_state,
.set_powergating_state = vce_v2_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index f4760748d3..32517c364c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -913,6 +913,8 @@ static const struct amd_ip_funcs vce_v3_0_ip_funcs = {
.set_clockgating_state = vce_v3_0_set_clockgating_state,
.set_powergating_state = vce_v3_0_set_powergating_state,
.get_clockgating_state = vce_v3_0_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index aaceecd558..cb253bd3a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -1902,6 +1902,8 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = {
.post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */,
.set_clockgating_state = vcn_v1_0_set_clockgating_state,
.set_powergating_state = vcn_v1_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
index e357d8cf0c..f18fd61c43 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c
@@ -2008,6 +2008,8 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = vcn_v2_0_set_clockgating_state,
.set_powergating_state = vcn_v2_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 1cd8a94b0f..baec14bde2 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -1901,6 +1901,8 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = vcn_v2_5_set_clockgating_state,
.set_powergating_state = vcn_v2_5_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amd_ip_funcs vcn_v2_6_ip_funcs = {
@@ -1921,6 +1923,8 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = vcn_v2_5_set_clockgating_state,
.set_powergating_state = vcn_v2_5_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version vcn_v2_5_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 8f82fb887e..6b31cf4b8a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -359,6 +359,7 @@ static int vcn_v3_0_hw_init(void *handle)
}
}
+ return 0;
done:
if (!r)
DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
@@ -2230,6 +2231,8 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = vcn_v3_0_set_clockgating_state,
.set_powergating_state = vcn_v3_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version vcn_v3_0_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 832d15f7b5..9a33d3d000 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -288,6 +288,7 @@ static int vcn_v4_0_hw_init(void *handle)
}
}
+ return 0;
done:
if (!r)
DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
@@ -1052,6 +1053,9 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
amdgpu_dpm_enable_uvd(adev, true);
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
@@ -1505,6 +1509,9 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev)
int i, r = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
@@ -2130,6 +2137,8 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = vcn_v4_0_set_clockgating_state,
.set_powergating_state = vcn_v4_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version vcn_v4_0_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 203fa98832..2279d8fce0 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -1660,6 +1660,8 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = vcn_v4_0_3_set_clockgating_state,
.set_powergating_state = vcn_v4_0_3_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index 501e53e69f..30e80c6f11 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -237,6 +237,7 @@ static int vcn_v4_0_5_hw_init(void *handle)
goto done;
}
+ return 0;
done:
if (!r)
DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
@@ -963,6 +964,9 @@ static int vcn_v4_0_5_start(struct amdgpu_device *adev)
amdgpu_dpm_enable_uvd(adev, true);
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
@@ -1167,6 +1171,9 @@ static int vcn_v4_0_5_stop(struct amdgpu_device *adev)
int i, r = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
@@ -1752,6 +1759,8 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = vcn_v4_0_5_set_clockgating_state,
.set_powergating_state = vcn_v4_0_5_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
index bc60c554eb..fbd3f7a582 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c
@@ -95,7 +95,7 @@ static int vcn_v5_0_0_sw_init(void *handle)
return r;
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
@@ -154,7 +154,7 @@ static int vcn_v5_0_0_sw_fini(void *handle)
if (drm_dev_enter(adev_to_drm(adev), &idx)) {
for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared;
if (adev->vcn.harvest_config & (1 << i))
continue;
@@ -203,6 +203,7 @@ static int vcn_v5_0_0_hw_init(void *handle)
goto done;
}
+ return 0;
done:
if (!r)
DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
@@ -334,7 +335,7 @@ static void vcn_v5_0_0_mc_resume(struct amdgpu_device *adev, int inst)
upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0);
WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0,
- AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)));
}
/**
@@ -438,7 +439,7 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i
VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0),
- AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect);
+ AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, indirect);
/* VCN global tiling registers */
WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET(
@@ -615,7 +616,7 @@ static void vcn_v5_0_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
*/
static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
{
- volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
struct amdgpu_ring *ring;
uint32_t tmp;
@@ -712,7 +713,7 @@ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b
*/
static int vcn_v5_0_0_start(struct amdgpu_device *adev)
{
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared;
struct amdgpu_ring *ring;
uint32_t tmp;
int i, j, k, r;
@@ -721,6 +722,9 @@ static int vcn_v5_0_0_start(struct amdgpu_device *adev)
amdgpu_dpm_enable_uvd(adev, true);
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
@@ -893,11 +897,14 @@ static void vcn_v5_0_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
*/
static int vcn_v5_0_0_stop(struct amdgpu_device *adev)
{
- volatile struct amdgpu_vcn4_fw_shared *fw_shared;
+ volatile struct amdgpu_vcn5_fw_shared *fw_shared;
uint32_t tmp;
int i, r = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ if (adev->vcn.harvest_config & (1 << i))
+ continue;
+
fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF;
@@ -1328,6 +1335,8 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = {
.post_soft_reset = NULL,
.set_clockgating_state = vcn_v5_0_0_set_clockgating_state,
.set_powergating_state = vcn_v5_0_0_set_powergating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 1a98812981..d39c670f62 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -897,7 +897,7 @@ static int vi_asic_pci_config_reset(struct amdgpu_device *adev)
return r;
}
-static bool vi_asic_supports_baco(struct amdgpu_device *adev)
+static int vi_asic_supports_baco(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_FIJI:
@@ -908,14 +908,14 @@ static bool vi_asic_supports_baco(struct amdgpu_device *adev)
case CHIP_TOPAZ:
return amdgpu_dpm_is_baco_supported(adev);
default:
- return false;
+ return 0;
}
}
static enum amd_reset_method
vi_asic_reset_method(struct amdgpu_device *adev)
{
- bool baco_reset;
+ int baco_reset;
if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY ||
amdgpu_reset_method == AMD_RESET_METHOD_BACO)
@@ -935,7 +935,7 @@ vi_asic_reset_method(struct amdgpu_device *adev)
baco_reset = amdgpu_dpm_is_baco_supported(adev);
break;
default:
- baco_reset = false;
+ baco_reset = 0;
break;
}
@@ -2058,6 +2058,8 @@ static const struct amd_ip_funcs vi_common_ip_funcs = {
.set_clockgating_state = vi_common_set_clockgating_state,
.set_powergating_state = vi_common_set_powergating_state,
.get_clockgating_state = vi_common_get_clockgating_state,
+ .dump_ip_state = NULL,
+ .print_ip_state = NULL,
};
static const struct amdgpu_ip_block_version vi_common_ip_block =